VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxtcp.c@ 77807

Last change on this file since 77807 was 76980, checked in by vboxsync, 6 years ago

NAT/Net: Fix another fallout from r94443. This should address elusive
natnet crash from ticketref:13899.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 69.2 KB
Line 
1/* $Id: pxtcp.c 76980 2019-01-24 16:34:23Z vboxsync $ */
2/** @file
3 * NAT Network - TCP proxy.
4 */
5
6/*
7 * Copyright (C) 2013-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18#define LOG_GROUP LOG_GROUP_NAT_SERVICE
19
20#include "winutils.h"
21
22#include "pxtcp.h"
23
24#include "proxy.h"
25#include "proxy_pollmgr.h"
26#include "pxremap.h"
27#include "portfwd.h" /* fwspec */
28
29#ifndef RT_OS_WINDOWS
30#include <sys/types.h>
31#include <sys/socket.h>
32#include <sys/ioctl.h>
33#ifdef RT_OS_SOLARIS
34#include <sys/filio.h> /* FIONREAD is BSD'ism */
35#endif
36#include <stdlib.h>
37#include <stdint.h>
38#include <stdio.h>
39#include <string.h>
40#include <poll.h>
41
42#include <err.h> /* BSD'ism */
43#else
44#include <stdlib.h>
45#include <stdio.h>
46#include <string.h>
47
48#include <iprt/stdint.h>
49#include "winpoll.h"
50#endif
51
52#include "lwip/opt.h"
53
54#include "lwip/sys.h"
55#include "lwip/tcpip.h"
56#include "lwip/netif.h"
57#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
58#include "lwip/icmp.h"
59#include "lwip/icmp6.h"
60
61/*
62 * Different OSes have different quirks in reporting POLLHUP for TCP
63 * sockets.
64 *
65 * Using shutdown(2) "how" values here would be more readable, but
66 * since SHUT_RD is 0, we can't use 0 for "none", unfortunately.
67 */
68#if defined(RT_OS_NETBSD) || defined(RT_OS_SOLARIS)
69# define HAVE_TCP_POLLHUP 0 /* not reported */
70#elif defined(RT_OS_DARWIN) || defined(RT_OS_WINDOWS)
71# define HAVE_TCP_POLLHUP POLLIN /* reported when remote closes */
72#else
73# define HAVE_TCP_POLLHUP (POLLIN|POLLOUT) /* reported when both directions are closed */
74#endif
75
76
77/**
78 * Ring buffer for inbound data. Filled with data from the host
79 * socket on poll manager thread. Data consumed by scheduling
80 * tcp_write() to the pcb on the lwip thread.
81 *
82 * NB: There is actually third party present, the lwip stack itself.
83 * Thus the buffer doesn't have dual free vs. data split, but rather
84 * three-way free / send and unACKed data / unsent data split.
85 */
86struct ringbuf {
87 char *buf;
88 size_t bufsize;
89
90 /*
91 * Start of free space, producer writes here (up till "unacked").
92 */
93 volatile size_t vacant;
94
95 /*
96 * Start of sent but unacknowledged data. The data are "owned" by
97 * the stack as it may need to retransmit. This is the free space
98 * limit for producer.
99 */
100 volatile size_t unacked;
101
102 /*
103 * Start of unsent data, consumer reads/sends from here (up till
104 * "vacant"). Not declared volatile since it's only accessed from
105 * the consumer thread.
106 */
107 size_t unsent;
108};
109
110
111/**
112 */
113struct pxtcp {
114 /**
115 * Our poll manager handler. Must be first, strong/weak
116 * references depend on this "inheritance".
117 */
118 struct pollmgr_handler pmhdl;
119
120 /**
121 * lwIP (internal/guest) side of the proxied connection.
122 */
123 struct tcp_pcb *pcb;
124
125 /**
126 * Host (external) side of the proxied connection.
127 */
128 SOCKET sock;
129
130 /**
131 * Socket events we are currently polling for.
132 */
133 int events;
134
135 /**
136 * Socket error. Currently used to save connect(2) errors so that
137 * we can decide if we need to send ICMP error.
138 */
139 int sockerr;
140
141 /**
142 * Interface that we have got the SYN from. Needed to send ICMP
143 * with correct source address.
144 */
145 struct netif *netif;
146
147 /**
148 * For tentatively accepted connections for which we are in
149 * process of connecting to the real destination this is the
150 * initial pbuf that we might need to build ICMP error.
151 *
152 * When connection is established this is used to hold outbound
153 * pbuf chain received by pxtcp_pcb_recv() but not yet completely
154 * forwarded over the socket. We cannot "return" it to lwIP since
155 * the head of the chain is already sent and freed.
156 */
157 struct pbuf *unsent;
158
159 /**
160 * Guest has closed its side. Reported to pxtcp_pcb_recv() only
161 * once and we might not be able to forward it immediately if we
162 * have unsent pbuf.
163 */
164 int outbound_close;
165
166 /**
167 * Outbound half-close has been done on the socket.
168 */
169 int outbound_close_done;
170
171 /**
172 * External has closed its side. We might not be able to forward
173 * it immediately if we have unforwarded data.
174 */
175 int inbound_close;
176
177 /**
178 * Inbound half-close has been done on the pcb.
179 */
180 int inbound_close_done;
181
182 /**
183 * On systems that report POLLHUP as soon as the final FIN is
184 * received on a socket we cannot continue polling for the rest of
185 * input, so we have to read (pull) last data from the socket on
186 * the lwIP thread instead of polling/pushing it from the poll
187 * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
188 */
189 int inbound_pull;
190
191
192 /**
193 * When poll manager schedules delete we may not be able to delete
194 * a pxtcp immediately if not all inbound data has been acked by
195 * the guest: lwIP may need to resend and the data are in pxtcp's
196 * inbuf::buf. We defer delete until all data are acked to
197 * pxtcp_pcb_sent().
198 */
199 int deferred_delete;
200
201 /**
202 * Ring-buffer for inbound data.
203 */
204 struct ringbuf inbuf;
205
206 /**
207 * lwIP thread's strong reference to us.
208 */
209 struct pollmgr_refptr *rp;
210
211
212 /*
213 * We use static messages to call functions on the lwIP thread to
214 * void malloc/free overhead.
215 */
216 struct tcpip_msg msg_delete; /* delete pxtcp */
217 struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
218 struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
219 struct tcpip_msg msg_outbound; /* trigger send of outbound data */
220 struct tcpip_msg msg_inbound; /* trigger send of inbound data */
221#if HAVE_TCP_POLLHUP
222 struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
223#endif
224};
225
226
227
228static struct pxtcp *pxtcp_allocate(void);
229static void pxtcp_free(struct pxtcp *);
230
231static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
232static void pxtcp_pcb_dissociate(struct pxtcp *);
233
234/* poll manager callbacks for pxtcp related channels */
235static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
236static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
237static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
238#if !(HAVE_TCP_POLLHUP & POLLOUT)
239static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
240#endif
241static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
242
243/* helper functions for sending/receiving pxtcp over poll manager channels */
244static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
245static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
246static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
247static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
248
249/* poll manager callbacks for individual sockets */
250static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
251static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
252
253/* get incoming traffic into ring buffer */
254static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
255static ssize_t pxtcp_sock_recv(struct pxtcp *, IOVEC *, size_t); /* default */
256
257/* convenience functions for poll manager callbacks */
258static int pxtcp_schedule_delete(struct pxtcp *);
259static int pxtcp_schedule_reset(struct pxtcp *);
260static int pxtcp_schedule_reject(struct pxtcp *);
261
262/* lwip thread callbacks called via proxy_lwip_post() */
263static void pxtcp_pcb_delete_pxtcp(void *);
264static void pxtcp_pcb_reset_pxtcp(void *);
265static void pxtcp_pcb_accept_refuse(void *);
266static void pxtcp_pcb_accept_confirm(void *);
267static void pxtcp_pcb_write_outbound(void *);
268static void pxtcp_pcb_write_inbound(void *);
269#if HAVE_TCP_POLLHUP
270static void pxtcp_pcb_pull_inbound(void *);
271#endif
272
273/* tcp pcb callbacks */
274static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, struct pbuf *); /* global */
275static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
276static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
277static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
278static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
279static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
280static void pxtcp_pcb_err(void *, err_t);
281
282static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
283static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
284
285static ssize_t pxtcp_sock_send(struct pxtcp *, IOVEC *, size_t);
286
287static void pxtcp_pcb_forward_inbound(struct pxtcp *);
288static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
289DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
290static void pxtcp_pcb_schedule_poll(struct pxtcp *);
291static void pxtcp_pcb_cancel_poll(struct pxtcp *);
292
293static void pxtcp_pcb_reject(struct tcp_pcb *, int, struct netif *, struct pbuf *);
294DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
295
296/* poll manager handlers for pxtcp channels */
297static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
298static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
299static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
300#if !(HAVE_TCP_POLLHUP & POLLOUT)
301static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
302#endif
303static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
304
305
306/**
307 * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
308 * manager threads haven't been created yet.
309 */
310void
311pxtcp_init(void)
312{
313 /*
314 * Create channels.
315 */
316#define CHANNEL(SLOT, NAME) do { \
317 NAME##_hdl.callback = NAME; \
318 NAME##_hdl.data = NULL; \
319 NAME##_hdl.slot = -1; \
320 pollmgr_add_chan(SLOT, &NAME##_hdl); \
321 } while (0)
322
323 CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
324 CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
325 CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
326#if !(HAVE_TCP_POLLHUP & POLLOUT)
327 CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
328#endif
329 CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
330
331#undef CHANNEL
332
333 /*
334 * Listen to outgoing connection from guest(s).
335 */
336 tcp_proxy_accept(pxtcp_pcb_heard);
337}
338
339
340/**
341 * Syntactic sugar for sending pxtcp pointer over poll manager
342 * channel. Used by lwip thread functions.
343 */
344static ssize_t
345pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
346{
347 return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
348}
349
350
351/**
352 * Syntactic sugar for sending weak reference to pxtcp over poll
353 * manager channel. Used by lwip thread functions.
354 */
355static ssize_t
356pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
357{
358 pollmgr_refptr_weak_ref(pxtcp->rp);
359 return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
360}
361
362
363/**
364 * Counterpart of pxtcp_chan_send().
365 */
366static struct pxtcp *
367pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
368{
369 struct pxtcp *pxtcp;
370
371 pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
372 return pxtcp;
373}
374
375
376/**
377 * Counterpart of pxtcp_chan_send_weak().
378 */
379static struct pxtcp *
380pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
381{
382 struct pollmgr_refptr *rp;
383 struct pollmgr_handler *base;
384 struct pxtcp *pxtcp;
385
386 rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
387 base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
388 pxtcp = (struct pxtcp *)base;
389
390 return pxtcp;
391}
392
393
394/**
395 * Register pxtcp with poll manager.
396 *
397 * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
398 * error handling is different in these two cases, we leave it up to
399 * the caller.
400 */
401int
402pxtcp_pmgr_add(struct pxtcp *pxtcp)
403{
404 int status;
405
406 LWIP_ASSERT1(pxtcp != NULL);
407#ifdef RT_OS_WINDOWS
408 LWIP_ASSERT1(pxtcp->sock != INVALID_SOCKET);
409#else
410 LWIP_ASSERT1(pxtcp->sock >= 0);
411#endif
412 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
413 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
414 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
415
416 status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
417 return status;
418}
419
420
421/**
422 * Unregister pxtcp with poll manager.
423 *
424 * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
425 * leg).
426 */
427void
428pxtcp_pmgr_del(struct pxtcp *pxtcp)
429{
430 LWIP_ASSERT1(pxtcp != NULL);
431
432 pollmgr_del_slot(pxtcp->pmhdl.slot);
433}
434
435
436/**
437 * POLLMGR_CHAN_PXTCP_ADD handler.
438 *
439 * Get new pxtcp from lwip thread and start polling its socket.
440 */
441static int
442pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
443{
444 struct pxtcp *pxtcp;
445 int status;
446
447 pxtcp = pxtcp_chan_recv(handler, fd, revents);
448 DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
449 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
450
451 status = pxtcp_pmgr_add(pxtcp);
452 if (status < 0) {
453 (void) pxtcp_schedule_reset(pxtcp);
454 }
455
456 return POLLIN;
457}
458
459
460/**
461 * POLLMGR_CHAN_PXTCP_POLLOUT handler.
462 *
463 * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
464 * and failed, it now requests us to poll the socket for POLLOUT and
465 * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
466 */
467static int
468pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
469{
470 struct pxtcp *pxtcp;
471
472 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
473 DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
474
475 if (pxtcp == NULL) {
476 return POLLIN;
477 }
478
479 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
480 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
481
482 pxtcp->events |= POLLOUT;
483 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
484
485 return POLLIN;
486}
487
488
489/**
490 * POLLMGR_CHAN_PXTCP_POLLIN handler.
491 */
492static int
493pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
494{
495 struct pxtcp *pxtcp;
496
497 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
498 DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
499
500 if (pxtcp == NULL) {
501 return POLLIN;
502 }
503
504 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
505 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
506
507 if (pxtcp->inbound_close) {
508 return POLLIN;
509 }
510
511 pxtcp->events |= POLLIN;
512 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
513
514 return POLLIN;
515}
516
517
518#if !(HAVE_TCP_POLLHUP & POLLOUT)
519/**
520 * POLLMGR_CHAN_PXTCP_DEL handler.
521 *
522 * Schedule pxtcp deletion. We only need this if host system doesn't
523 * report POLLHUP for fully closed tcp sockets.
524 */
525static int
526pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
527{
528 struct pxtcp *pxtcp;
529
530 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
531 if (pxtcp == NULL) {
532 return POLLIN;
533 }
534
535 DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
536 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
537
538 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
539 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
540
541 LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
542 LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
543
544 pxtcp_pmgr_del(pxtcp);
545 (void) pxtcp_schedule_delete(pxtcp);
546
547 return POLLIN;
548}
549#endif /* !(HAVE_TCP_POLLHUP & POLLOUT) */
550
551
552/**
553 * POLLMGR_CHAN_PXTCP_RESET handler.
554 *
555 * Close the socket with RST and delete pxtcp.
556 */
557static int
558pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
559{
560 struct pxtcp *pxtcp;
561
562 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
563 if (pxtcp == NULL) {
564 return POLLIN;
565 }
566
567 DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
568 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
569
570 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
571 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
572
573 pxtcp_pmgr_del(pxtcp);
574
575 proxy_reset_socket(pxtcp->sock);
576 pxtcp->sock = INVALID_SOCKET;
577
578 (void) pxtcp_schedule_reset(pxtcp);
579
580 return POLLIN;
581}
582
583
584static struct pxtcp *
585pxtcp_allocate(void)
586{
587 struct pxtcp *pxtcp;
588
589 pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
590 if (pxtcp == NULL) {
591 return NULL;
592 }
593
594 pxtcp->pmhdl.callback = NULL;
595 pxtcp->pmhdl.data = (void *)pxtcp;
596 pxtcp->pmhdl.slot = -1;
597
598 pxtcp->pcb = NULL;
599 pxtcp->sock = INVALID_SOCKET;
600 pxtcp->events = 0;
601 pxtcp->sockerr = 0;
602 pxtcp->netif = NULL;
603 pxtcp->unsent = NULL;
604 pxtcp->outbound_close = 0;
605 pxtcp->outbound_close_done = 0;
606 pxtcp->inbound_close = 0;
607 pxtcp->inbound_close_done = 0;
608 pxtcp->inbound_pull = 0;
609 pxtcp->deferred_delete = 0;
610
611 pxtcp->inbuf.bufsize = 64 * 1024;
612 pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
613 if (pxtcp->inbuf.buf == NULL) {
614 free(pxtcp);
615 return NULL;
616 }
617 pxtcp->inbuf.vacant = 0;
618 pxtcp->inbuf.unacked = 0;
619 pxtcp->inbuf.unsent = 0;
620
621 pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
622 if (pxtcp->rp == NULL) {
623 free(pxtcp->inbuf.buf);
624 free(pxtcp);
625 return NULL;
626 }
627
628#define CALLBACK_MSG(MSG, FUNC) \
629 do { \
630 pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
631 pxtcp->MSG.sem = NULL; \
632 pxtcp->MSG.msg.cb.function = FUNC; \
633 pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
634 } while (0)
635
636 CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
637 CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
638 CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
639 CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
640 CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
641#if HAVE_TCP_POLLHUP
642 CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
643#endif
644
645#undef CALLBACK_MSG
646
647 return pxtcp;
648}
649
650
651/**
652 * Exported to fwtcp to create pxtcp for incoming port-forwarded
653 * connections. Completed with pcb in pxtcp_pcb_connect().
654 */
655struct pxtcp *
656pxtcp_create_forwarded(SOCKET sock)
657{
658 struct pxtcp *pxtcp;
659
660 pxtcp = pxtcp_allocate();
661 if (pxtcp == NULL) {
662 return NULL;
663 }
664
665 pxtcp->sock = sock;
666 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
667 pxtcp->events = 0;
668
669 return pxtcp;
670}
671
672
673static void
674pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
675{
676 LWIP_ASSERT1(pxtcp != NULL);
677 LWIP_ASSERT1(pcb != NULL);
678
679 pxtcp->pcb = pcb;
680
681 tcp_arg(pcb, pxtcp);
682
683 tcp_recv(pcb, pxtcp_pcb_recv);
684 tcp_sent(pcb, pxtcp_pcb_sent);
685 tcp_poll(pcb, NULL, 255);
686 tcp_err(pcb, pxtcp_pcb_err);
687}
688
689
690static void
691pxtcp_free(struct pxtcp *pxtcp)
692{
693 if (pxtcp->unsent != NULL) {
694 pbuf_free(pxtcp->unsent);
695 }
696 if (pxtcp->inbuf.buf != NULL) {
697 free(pxtcp->inbuf.buf);
698 }
699 free(pxtcp);
700}
701
702
703/**
704 * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
705 * fwtcp failed to register with poll manager to post to lwip thread
706 * for doing connect.
707 */
708void
709pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
710{
711 LWIP_ASSERT1(pxtcp->pcb == NULL);
712 pxtcp_pcb_reset_pxtcp(pxtcp);
713}
714
715
716static void
717pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
718{
719 if (pxtcp == NULL || pxtcp->pcb == NULL) {
720 return;
721 }
722
723 DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
724 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
725
726 /*
727 * We must have dissociated from a fully closed pcb immediately
728 * since lwip recycles them and we don't wan't to mess with what
729 * would be someone else's pcb that we happen to have a stale
730 * pointer to.
731 */
732 LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
733
734 tcp_recv(pxtcp->pcb, NULL);
735 tcp_sent(pxtcp->pcb, NULL);
736 tcp_poll(pxtcp->pcb, NULL, 255);
737 tcp_err(pxtcp->pcb, NULL);
738 tcp_arg(pxtcp->pcb, NULL);
739 pxtcp->pcb = NULL;
740}
741
742
743/**
744 * Lwip thread callback invoked via pxtcp::msg_delete
745 *
746 * Since we use static messages to communicate to the lwip thread, we
747 * cannot delete pxtcp without making sure there are no unprocessed
748 * messages in the lwip thread mailbox.
749 *
750 * The easiest way to ensure that is to send this "delete" message as
751 * the last one and when it's processed we know there are no more and
752 * it's safe to delete pxtcp.
753 *
754 * Poll manager handlers should use pxtcp_schedule_delete()
755 * convenience function.
756 */
757static void
758pxtcp_pcb_delete_pxtcp(void *ctx)
759{
760 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
761
762 DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
763 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
764 (pxtcp->deferred_delete && !pxtcp->inbound_pull
765 ? " (was deferred)" : "")));
766
767 LWIP_ASSERT1(pxtcp != NULL);
768 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
769 LWIP_ASSERT1(pxtcp->outbound_close_done);
770 LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
771
772
773 /*
774 * pxtcp is no longer registered with poll manager, so it's safe
775 * to close the socket.
776 */
777 if (pxtcp->sock != INVALID_SOCKET) {
778 closesocket(pxtcp->sock);
779 pxtcp->sock = INVALID_SOCKET;
780 }
781
782 /*
783 * We might have already dissociated from a fully closed pcb, or
784 * guest might have sent us a reset while msg_delete was in
785 * transit. If there's no pcb, we are done.
786 */
787 if (pxtcp->pcb == NULL) {
788 pollmgr_refptr_unref(pxtcp->rp);
789 pxtcp_free(pxtcp);
790 return;
791 }
792
793 /*
794 * Have we completely forwarded all inbound traffic to the guest?
795 *
796 * We may still be waiting for ACKs. We may have failed to send
797 * some of the data (tcp_write() failed with ERR_MEM). We may
798 * have failed to send the FIN (tcp_shutdown() failed with
799 * ERR_MEM).
800 */
801 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
802 pxtcp_pcb_dissociate(pxtcp);
803 pollmgr_refptr_unref(pxtcp->rp);
804 pxtcp_free(pxtcp);
805 }
806 else {
807 DPRINTF2(("delete: pxtcp %p; pcb %p:"
808 " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
809 (void *)pxtcp, (void *)pxtcp->pcb,
810 (int)pxtcp->inbuf.unacked,
811 (int)pxtcp->inbuf.unsent,
812 (int)pxtcp->inbuf.vacant,
813 pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
814
815 LWIP_ASSERT1(!pxtcp->deferred_delete);
816 pxtcp->deferred_delete = 1;
817 }
818}
819
820
821/**
822 * If we couldn't delete pxtcp right away in the msg_delete callback
823 * from the poll manager thread, we repeat the check at the end of
824 * relevant pcb callbacks.
825 */
826DECLINLINE(void)
827pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
828{
829 if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
830 pxtcp_pcb_delete_pxtcp(pxtcp);
831 }
832}
833
834
835/**
836 * Poll manager callbacks should use this convenience wrapper to
837 * schedule pxtcp deletion on the lwip thread and to deregister from
838 * the poll manager.
839 */
840static int
841pxtcp_schedule_delete(struct pxtcp *pxtcp)
842{
843 /*
844 * If pollmgr_refptr_get() is called by any channel before
845 * scheduled deletion happens, let them know we are gone.
846 */
847 pxtcp->pmhdl.slot = -1;
848
849 /*
850 * Schedule deletion. Since poll manager thread may be pre-empted
851 * right after we send the message, the deletion may actually
852 * happen on the lwip thread before we return from this function,
853 * so it's not safe to refer to pxtcp after this call.
854 */
855 proxy_lwip_post(&pxtcp->msg_delete);
856
857 /* tell poll manager to deregister us */
858 return -1;
859}
860
861
862/**
863 * Lwip thread callback invoked via pxtcp::msg_reset
864 *
865 * Like pxtcp_pcb_delete(), but sends RST to the guest before
866 * deleting this pxtcp.
867 */
868static void
869pxtcp_pcb_reset_pxtcp(void *ctx)
870{
871 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
872 LWIP_ASSERT1(pxtcp != NULL);
873
874 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
875 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
876
877 if (pxtcp->sock != INVALID_SOCKET) {
878 proxy_reset_socket(pxtcp->sock);
879 pxtcp->sock = INVALID_SOCKET;
880 }
881
882 if (pxtcp->pcb != NULL) {
883 struct tcp_pcb *pcb = pxtcp->pcb;
884 pxtcp_pcb_dissociate(pxtcp);
885 tcp_abort(pcb);
886 }
887
888 pollmgr_refptr_unref(pxtcp->rp);
889 pxtcp_free(pxtcp);
890}
891
892
893
894/**
895 * Poll manager callbacks should use this convenience wrapper to
896 * schedule pxtcp reset and deletion on the lwip thread and to
897 * deregister from the poll manager.
898 *
899 * See pxtcp_schedule_delete() for additional comments.
900 */
901static int
902pxtcp_schedule_reset(struct pxtcp *pxtcp)
903{
904 pxtcp->pmhdl.slot = -1;
905 proxy_lwip_post(&pxtcp->msg_reset);
906 return -1;
907}
908
909
910/**
911 * Reject proxy connection attempt. Depending on the cause (sockerr)
912 * we may just drop the pcb silently, generate an ICMP datagram or
913 * send TCP reset.
914 */
915static void
916pxtcp_pcb_reject(struct tcp_pcb *pcb, int sockerr,
917 struct netif *netif, struct pbuf *p)
918{
919 int reset = 0;
920
921 if (sockerr == ECONNREFUSED) {
922 reset = 1;
923 }
924 else if (p != NULL) {
925 struct netif *oif;
926
927 LWIP_ASSERT1(netif != NULL);
928
929 oif = ip_current_netif();
930 ip_current_netif() = netif;
931
932 if (PCB_ISIPV6(pcb)) {
933 if (sockerr == EHOSTDOWN) {
934 icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
935 }
936 else if (sockerr == EHOSTUNREACH
937 || sockerr == ENETDOWN
938 || sockerr == ENETUNREACH)
939 {
940 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
941 }
942 }
943 else {
944 if (sockerr == EHOSTDOWN
945 || sockerr == EHOSTUNREACH
946 || sockerr == ENETDOWN
947 || sockerr == ENETUNREACH)
948 {
949 icmp_dest_unreach(p, ICMP_DUR_HOST);
950 }
951 }
952
953 ip_current_netif() = oif;
954 }
955
956 tcp_abandon(pcb, reset);
957}
958
959
960/**
961 * Called from poll manager thread via pxtcp::msg_accept when proxy
962 * failed to connect to the destination. Also called when we failed
963 * to register pxtcp with poll manager.
964 *
965 * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
966 * how this unestablished connection is terminated.
967 */
968static void
969pxtcp_pcb_accept_refuse(void *ctx)
970{
971 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
972
973 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: %R[sockerr]\n",
974 __func__, (void *)pxtcp, (void *)pxtcp->pcb,
975 pxtcp->sock, pxtcp->sockerr));
976
977 LWIP_ASSERT1(pxtcp != NULL);
978 LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
979
980 if (pxtcp->pcb != NULL) {
981 struct tcp_pcb *pcb = pxtcp->pcb;
982 pxtcp_pcb_dissociate(pxtcp);
983 pxtcp_pcb_reject(pcb, pxtcp->sockerr, pxtcp->netif, pxtcp->unsent);
984 }
985
986 pollmgr_refptr_unref(pxtcp->rp);
987 pxtcp_free(pxtcp);
988}
989
990
991/**
992 * Convenience wrapper for poll manager connect callback to reject
993 * connection attempt.
994 *
995 * Like pxtcp_schedule_reset(), but the callback is more discriminate
996 * in how this unestablished connection is terminated.
997 */
998static int
999pxtcp_schedule_reject(struct pxtcp *pxtcp)
1000{
1001 pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
1002 pxtcp->pmhdl.slot = -1;
1003 proxy_lwip_post(&pxtcp->msg_accept);
1004 return -1;
1005}
1006
1007
1008/**
1009 * Global tcp_proxy_accept() callback for proxied outgoing TCP
1010 * connections from guest(s).
1011 */
1012static err_t
1013pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, struct pbuf *syn)
1014{
1015 LWIP_UNUSED_ARG(arg);
1016
1017 return pxtcp_pcb_accept_outbound(newpcb, syn,
1018 PCB_ISIPV6(newpcb), &newpcb->local_ip, newpcb->local_port);
1019}
1020
1021
1022err_t
1023pxtcp_pcb_accept_outbound(struct tcp_pcb *newpcb, struct pbuf *p,
1024 int is_ipv6, ipX_addr_t *dst_addr, u16_t dst_port)
1025{
1026 struct pxtcp *pxtcp;
1027 ipX_addr_t mapped_dst_addr;
1028 int sdom;
1029 SOCKET sock;
1030 ssize_t nsent;
1031 int sockerr = 0;
1032
1033 /*
1034 * TCP first calls accept callback when it receives the first SYN
1035 * and "tentatively accepts" new proxied connection attempt. When
1036 * proxy "confirms" the SYN and sends SYN|ACK and the guest
1037 * replies with ACK the accept callback is called again, this time
1038 * with the established connection.
1039 */
1040 LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
1041 tcp_accept(newpcb, pxtcp_pcb_accept);
1042 tcp_arg(newpcb, NULL);
1043
1044 tcp_setprio(newpcb, TCP_PRIO_MAX);
1045
1046 pxremap_outbound_ipX(is_ipv6, &mapped_dst_addr, dst_addr);
1047
1048 sdom = is_ipv6 ? PF_INET6 : PF_INET;
1049 sock = proxy_connected_socket(sdom, SOCK_STREAM,
1050 &mapped_dst_addr, dst_port);
1051 if (sock == INVALID_SOCKET) {
1052 sockerr = SOCKERRNO();
1053 goto abort;
1054 }
1055
1056 pxtcp = pxtcp_allocate();
1057 if (pxtcp == NULL) {
1058 proxy_reset_socket(sock);
1059 goto abort;
1060 }
1061
1062 /* save initial datagram in case we need to reply with ICMP */
1063 if (p != NULL) {
1064 pbuf_ref(p);
1065 pxtcp->unsent = p;
1066 pxtcp->netif = ip_current_netif();
1067 }
1068
1069 pxtcp_pcb_associate(pxtcp, newpcb);
1070 pxtcp->sock = sock;
1071
1072 pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
1073 pxtcp->events = POLLOUT;
1074
1075 nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
1076 if (nsent < 0) {
1077 pxtcp->sock = INVALID_SOCKET;
1078 proxy_reset_socket(sock);
1079 pxtcp_pcb_accept_refuse(pxtcp);
1080 return ERR_ABRT;
1081 }
1082
1083 return ERR_OK;
1084
1085 abort:
1086 DPRINTF0(("%s: pcb %p, sock %d: %R[sockerr]\n",
1087 __func__, (void *)newpcb, sock, sockerr));
1088 pxtcp_pcb_reject(newpcb, sockerr, ip_current_netif(), p);
1089 return ERR_ABRT;
1090}
1091
1092
1093/**
1094 * tcp_proxy_accept() callback for accepted proxied outgoing TCP
1095 * connections from guest(s). This is "real" accept with three-way
1096 * handshake completed.
1097 */
1098static err_t
1099pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
1100{
1101 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1102
1103 LWIP_UNUSED_ARG(pcb); /* used only in asserts */
1104 LWIP_UNUSED_ARG(error); /* always ERR_OK */
1105
1106 LWIP_ASSERT1(pxtcp != NULL);
1107 LWIP_ASSERT1(pxtcp->pcb = pcb);
1108 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1109
1110 /* send any inbound data that are already queued */
1111 pxtcp_pcb_forward_inbound(pxtcp);
1112 return ERR_OK;
1113}
1114
1115
1116/**
1117 * Initial poll manager callback for proxied outgoing TCP connections.
1118 * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
1119 *
1120 * Waits for connect(2) to the destination to complete. On success
1121 * replaces itself with pxtcp_pmgr_pump() callback common to all
1122 * established TCP connections.
1123 */
1124static int
1125pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
1126{
1127 struct pxtcp *pxtcp;
1128 RT_NOREF(fd);
1129
1130 pxtcp = (struct pxtcp *)handler->data;
1131 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1132 LWIP_ASSERT1(fd == pxtcp->sock);
1133 LWIP_ASSERT1(pxtcp->sockerr == 0);
1134
1135 if (revents & POLLNVAL) {
1136 pxtcp->sock = INVALID_SOCKET;
1137 pxtcp->sockerr = ETIMEDOUT;
1138 return pxtcp_schedule_reject(pxtcp);
1139 }
1140
1141 /*
1142 * Solaris and NetBSD don't report either POLLERR or POLLHUP when
1143 * connect(2) fails, just POLLOUT. In that case we always need to
1144 * check SO_ERROR.
1145 */
1146#if defined(RT_OS_SOLARIS) || defined(RT_OS_NETBSD)
1147# define CONNECT_CHECK_ERROR POLLOUT
1148#else
1149# define CONNECT_CHECK_ERROR (POLLERR | POLLHUP)
1150#endif
1151
1152 /*
1153 * Check the cause of the failure so that pxtcp_pcb_reject() may
1154 * behave accordingly.
1155 */
1156 if (revents & CONNECT_CHECK_ERROR) {
1157 socklen_t optlen = (socklen_t)sizeof(pxtcp->sockerr);
1158 int status;
1159 SOCKET s;
1160
1161 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1162 (char *)&pxtcp->sockerr, &optlen);
1163 if (RT_UNLIKELY(status == SOCKET_ERROR)) { /* should not happen */
1164 DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
1165 __func__, fd, SOCKERRNO()));
1166 pxtcp->sockerr = ETIMEDOUT;
1167 }
1168 else {
1169 /* don't spam this log on successful connect(2) */
1170 if ((revents & (POLLERR | POLLHUP)) /* we were told it's failed */
1171 || pxtcp->sockerr != 0) /* we determined it's failed */
1172 {
1173 DPRINTF(("%s: sock %d: connect: %R[sockerr]\n",
1174 __func__, fd, pxtcp->sockerr));
1175 }
1176
1177 if ((revents & (POLLERR | POLLHUP))
1178 && RT_UNLIKELY(pxtcp->sockerr == 0))
1179 {
1180 /* if we're told it's failed, make sure it's marked as such */
1181 pxtcp->sockerr = ETIMEDOUT;
1182 }
1183 }
1184
1185 if (pxtcp->sockerr != 0) {
1186 s = pxtcp->sock;
1187 pxtcp->sock = INVALID_SOCKET;
1188 closesocket(s);
1189 return pxtcp_schedule_reject(pxtcp);
1190 }
1191 }
1192
1193 if (revents & POLLOUT) { /* connect is successful */
1194 /* confirm accept to the guest */
1195 proxy_lwip_post(&pxtcp->msg_accept);
1196
1197 /*
1198 * Switch to common callback used for all established proxied
1199 * connections.
1200 */
1201 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
1202
1203 /*
1204 * Initially we poll for incoming traffic only. Outgoing
1205 * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
1206 * it will ask us to poll for POLLOUT too.
1207 */
1208 pxtcp->events = POLLIN;
1209 return pxtcp->events;
1210 }
1211
1212 /* should never get here */
1213 DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
1214 __func__, (void *)pxtcp, fd, revents));
1215 return pxtcp_schedule_reset(pxtcp);
1216}
1217
1218
1219/**
1220 * Called from poll manager thread via pxtcp::msg_accept when proxy
1221 * connected to the destination. Finalize accept by sending SYN|ACK
1222 * to the guest.
1223 */
1224static void
1225pxtcp_pcb_accept_confirm(void *ctx)
1226{
1227 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1228 err_t error;
1229
1230 LWIP_ASSERT1(pxtcp != NULL);
1231 if (pxtcp->pcb == NULL) {
1232 return;
1233 }
1234
1235 /* we are not going to reply with ICMP, so we can drop initial pbuf */
1236 if (pxtcp->unsent != NULL) {
1237 pbuf_free(pxtcp->unsent);
1238 pxtcp->unsent = NULL;
1239 }
1240
1241 error = tcp_proxy_accept_confirm(pxtcp->pcb);
1242
1243 /*
1244 * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
1245 * abandons the pcb. Retrying that is not very easy, since it
1246 * would require keeping "fractional state". From guest's point
1247 * of view there is no reply to its SYN so it will either resend
1248 * the SYN (effetively triggering full connection retry for us),
1249 * or it will eventually time out.
1250 */
1251 if (error == ERR_ABRT) {
1252 pxtcp->pcb = NULL; /* pcb is gone */
1253 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1254 }
1255
1256 /*
1257 * else if (error != ERR_OK): even if tcp_output() failed with
1258 * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
1259 * retransmitted eventually.
1260 */
1261}
1262
1263
1264/**
1265 * Entry point for port-forwarding.
1266 *
1267 * fwtcp accepts new incoming connection, creates pxtcp for the socket
1268 * (with no pcb yet) and adds it to the poll manager (polling for
1269 * errors only). Then it calls this function to construct the pcb and
1270 * perform connection to the guest.
1271 */
1272void
1273pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
1274{
1275 struct sockaddr_storage ss;
1276 socklen_t sslen;
1277 struct tcp_pcb *pcb;
1278 ipX_addr_t src_addr, dst_addr;
1279 u16_t src_port, dst_port;
1280 int status;
1281 err_t error;
1282
1283 LWIP_ASSERT1(pxtcp != NULL);
1284 LWIP_ASSERT1(pxtcp->pcb == NULL);
1285 LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
1286
1287 pcb = tcp_new();
1288 if (pcb == NULL) {
1289 goto reset;
1290 }
1291
1292 tcp_setprio(pcb, TCP_PRIO_MAX);
1293 pxtcp_pcb_associate(pxtcp, pcb);
1294
1295 sslen = sizeof(ss);
1296 status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
1297 if (status == SOCKET_ERROR) {
1298 goto reset;
1299 }
1300
1301 /* nit: compares PF and AF, but they are the same everywhere */
1302 LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
1303
1304 status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
1305 if (status == PXREMAP_FAILED) {
1306 goto reset;
1307 }
1308
1309 if (ss.ss_family == PF_INET) {
1310 const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
1311
1312 src_port = peer4->sin_port;
1313
1314 memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
1315 dst_port = fwspec->dst.sin.sin_port;
1316 }
1317 else { /* PF_INET6 */
1318 const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
1319 ip_set_v6(pcb, 1);
1320
1321 src_port = peer6->sin6_port;
1322
1323 memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
1324 dst_port = fwspec->dst.sin6.sin6_port;
1325 }
1326
1327 /* lwip port arguments are in host order */
1328 src_port = ntohs(src_port);
1329 dst_port = ntohs(dst_port);
1330
1331 error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
1332 if (error != ERR_OK) {
1333 goto reset;
1334 }
1335
1336 error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
1337 /* callback: */ pxtcp_pcb_connected);
1338 if (error != ERR_OK) {
1339 goto reset;
1340 }
1341
1342 return;
1343
1344 reset:
1345 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1346}
1347
1348
1349/**
1350 * Port-forwarded connection to guest is successful, pump data.
1351 */
1352static err_t
1353pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
1354{
1355 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1356
1357 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1358 LWIP_UNUSED_ARG(error);
1359
1360 LWIP_ASSERT1(pxtcp != NULL);
1361 LWIP_ASSERT1(pxtcp->pcb == pcb);
1362 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1363 LWIP_UNUSED_ARG(pcb);
1364
1365 DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
1366 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
1367
1368 /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
1369 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
1370
1371 return ERR_OK;
1372}
1373
1374
1375/**
1376 * tcp_recv() callback.
1377 */
1378static err_t
1379pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
1380{
1381 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1382
1383 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1384 LWIP_UNUSED_ARG(error);
1385
1386 LWIP_ASSERT1(pxtcp != NULL);
1387 LWIP_ASSERT1(pxtcp->pcb == pcb);
1388 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1389 LWIP_UNUSED_ARG(pcb);
1390
1391
1392 /*
1393 * Have we done sending previous batch?
1394 */
1395 if (pxtcp->unsent != NULL) {
1396 if (p != NULL) {
1397 /*
1398 * Return an error to tell TCP to hold onto that pbuf.
1399 * It will be presented to us later from tcp_fasttmr().
1400 */
1401 return ERR_WOULDBLOCK;
1402 }
1403 else {
1404 /*
1405 * Unlike data, p == NULL indicating orderly shutdown is
1406 * NOT presented to us again
1407 */
1408 pxtcp->outbound_close = 1;
1409 return ERR_OK;
1410 }
1411 }
1412
1413
1414 /*
1415 * Guest closed?
1416 */
1417 if (p == NULL) {
1418 pxtcp->outbound_close = 1;
1419 pxtcp_pcb_forward_outbound_close(pxtcp);
1420 return ERR_OK;
1421 }
1422
1423
1424 /*
1425 * Got data, send what we can without blocking.
1426 */
1427 return pxtcp_pcb_forward_outbound(pxtcp, p);
1428}
1429
1430
1431/**
1432 * Guest half-closed its TX side of the connection.
1433 *
1434 * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
1435 * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
1436 * previously unsent data and sees pxtcp::outbound_close flag saved by
1437 * pxtcp_pcb_recv().
1438 */
1439static void
1440pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
1441{
1442 struct tcp_pcb *pcb;
1443
1444 LWIP_ASSERT1(pxtcp != NULL);
1445 LWIP_ASSERT1(pxtcp->outbound_close);
1446 LWIP_ASSERT1(!pxtcp->outbound_close_done);
1447
1448 pcb = pxtcp->pcb;
1449 LWIP_ASSERT1(pcb != NULL);
1450
1451 DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
1452 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1453
1454
1455 /* set the flag first, since shutdown() may trigger POLLHUP */
1456 pxtcp->outbound_close_done = 1;
1457 shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
1458
1459#if !(HAVE_TCP_POLLHUP & POLLOUT)
1460 /*
1461 * We need to nudge poll manager manually, since OS will not
1462 * report POLLHUP.
1463 */
1464 if (pxtcp->inbound_close) {
1465 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
1466 }
1467#endif
1468
1469
1470 /* no more outbound data coming to us */
1471 tcp_recv(pcb, NULL);
1472
1473 /*
1474 * If we have already done inbound close previously (active close
1475 * on the pcb), then we must not hold onto a pcb in TIME_WAIT
1476 * state since those will be recycled by lwip when it runs out of
1477 * free pcbs in the pool.
1478 *
1479 * The test is true also for a pcb in CLOSING state that waits
1480 * just for the ACK of its FIN (to transition to TIME_WAIT).
1481 */
1482 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
1483 pxtcp_pcb_dissociate(pxtcp);
1484 }
1485}
1486
1487
1488/**
1489 * Forward outbound data from pcb to socket.
1490 *
1491 * Called by pxtcp_pcb_recv() to forward new data and by callout
1492 * triggered by POLLOUT on the socket to send previously unsent data.
1493 *
1494 * (Re)scehdules one-time callout if not all data are sent.
1495 */
1496static err_t
1497pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
1498{
1499 struct pbuf *qs, *q;
1500 size_t qoff;
1501 size_t forwarded;
1502 int sockerr;
1503
1504 LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
1505
1506 forwarded = 0;
1507 sockerr = 0;
1508
1509 q = NULL;
1510 qoff = 0;
1511
1512 qs = p;
1513 while (qs != NULL) {
1514 IOVEC iov[8];
1515 const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
1516 size_t fwd1;
1517 ssize_t nsent;
1518 size_t i;
1519
1520 fwd1 = 0;
1521 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1522 LWIP_ASSERT1(q->len > 0);
1523 IOVEC_SET_BASE(iov[i], q->payload);
1524 IOVEC_SET_LEN(iov[i], q->len);
1525 fwd1 += q->len;
1526 }
1527
1528 /*
1529 * TODO: This is where application-level proxy can hook into
1530 * to process outbound traffic.
1531 */
1532 nsent = pxtcp_sock_send(pxtcp, iov, i);
1533
1534 if (nsent == (ssize_t)fwd1) {
1535 /* successfully sent this chain fragment completely */
1536 forwarded += nsent;
1537 qs = q;
1538 }
1539 else if (nsent >= 0) {
1540 /* successfully sent only some data */
1541 forwarded += nsent;
1542
1543 /* find the first pbuf that was not completely forwarded */
1544 qoff = nsent;
1545 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1546 if (qoff < q->len) {
1547 break;
1548 }
1549 qoff -= q->len;
1550 }
1551 LWIP_ASSERT1(q != NULL);
1552 LWIP_ASSERT1(qoff < q->len);
1553 break;
1554 }
1555 else {
1556 sockerr = -nsent;
1557
1558 /*
1559 * Some errors are really not errors - if we get them,
1560 * it's not different from getting nsent == 0, so filter
1561 * them out here.
1562 */
1563 if (proxy_error_is_transient(sockerr)) {
1564 sockerr = 0;
1565 }
1566 q = qs;
1567 qoff = 0;
1568 break;
1569 }
1570 }
1571
1572 if (forwarded > 0) {
1573 DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: sent %d bytes\n",
1574 (void *)pxtcp, (void *)pxtcp->pcb, (int)forwarded));
1575 tcp_recved(pxtcp->pcb, (u16_t)forwarded);
1576 }
1577
1578 if (q == NULL) { /* everything is forwarded? */
1579 LWIP_ASSERT1(sockerr == 0);
1580 LWIP_ASSERT1(forwarded == p->tot_len);
1581
1582 pxtcp->unsent = NULL;
1583 pbuf_free(p);
1584 if (pxtcp->outbound_close) {
1585 pxtcp_pcb_forward_outbound_close(pxtcp);
1586 }
1587 }
1588 else {
1589 if (q != p) {
1590 /* free forwarded pbufs at the beginning of the chain */
1591 pbuf_ref(q);
1592 pbuf_free(p);
1593 }
1594 if (qoff > 0) {
1595 /* advance payload pointer past the forwarded part */
1596 pbuf_header(q, -(s16_t)qoff);
1597 }
1598 pxtcp->unsent = q;
1599 DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: kept %d bytes\n",
1600 (void *)pxtcp, (void *)pxtcp->pcb, (int)q->tot_len));
1601
1602 /*
1603 * Have sendmsg() failed?
1604 *
1605 * Connection reset will be detected by poll and
1606 * pxtcp_schedule_reset() will be called.
1607 *
1608 * Otherwise something *really* unexpected must have happened,
1609 * so we'd better abort.
1610 */
1611 if (sockerr != 0 && sockerr != ECONNRESET) {
1612 struct tcp_pcb *pcb = pxtcp->pcb;
1613 DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: %R[sockerr]\n",
1614 (void *)pxtcp, (void *)pcb, sockerr));
1615
1616 pxtcp_pcb_dissociate(pxtcp);
1617
1618 tcp_abort(pcb);
1619
1620 /* call error callback manually since we've already dissociated */
1621 pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
1622 return ERR_ABRT;
1623 }
1624
1625 /* schedule one-shot POLLOUT on the socket */
1626 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
1627 }
1628 return ERR_OK;
1629}
1630
1631
1632#if !defined(RT_OS_WINDOWS)
1633static ssize_t
1634pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1635{
1636 struct msghdr mh;
1637 ssize_t nsent;
1638
1639#ifdef MSG_NOSIGNAL
1640 const int send_flags = MSG_NOSIGNAL;
1641#else
1642 const int send_flags = 0;
1643#endif
1644
1645 memset(&mh, 0, sizeof(mh));
1646
1647 mh.msg_iov = iov;
1648 mh.msg_iovlen = iovlen;
1649
1650 nsent = sendmsg(pxtcp->sock, &mh, send_flags);
1651 if (nsent < 0) {
1652 nsent = -SOCKERRNO();
1653 }
1654
1655 return nsent;
1656}
1657#else /* RT_OS_WINDOWS */
1658static ssize_t
1659pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1660{
1661 DWORD nsent;
1662 int status;
1663
1664 status = WSASend(pxtcp->sock, iov, (DWORD)iovlen, &nsent,
1665 0, NULL, NULL);
1666 if (status == SOCKET_ERROR) {
1667 return -SOCKERRNO();
1668 }
1669
1670 return nsent;
1671}
1672#endif /* RT_OS_WINDOWS */
1673
1674
1675/**
1676 * Callback from poll manager (on POLLOUT) to send data from
1677 * pxtcp::unsent pbuf to socket.
1678 */
1679static void
1680pxtcp_pcb_write_outbound(void *ctx)
1681{
1682 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1683 LWIP_ASSERT1(pxtcp != NULL);
1684
1685 if (pxtcp->pcb == NULL) {
1686 return;
1687 }
1688
1689 pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
1690}
1691
1692
1693/**
1694 * Common poll manager callback used by both outgoing and incoming
1695 * (port-forwarded) connections that has connected socket.
1696 */
1697static int
1698pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1699{
1700 struct pxtcp *pxtcp;
1701 int status;
1702 int sockerr;
1703 RT_NOREF(fd);
1704
1705 pxtcp = (struct pxtcp *)handler->data;
1706 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1707 LWIP_ASSERT1(fd == pxtcp->sock);
1708
1709 if (revents & POLLNVAL) {
1710 pxtcp->sock = INVALID_SOCKET;
1711 return pxtcp_schedule_reset(pxtcp);
1712 }
1713
1714 if (revents & POLLERR) {
1715 socklen_t optlen = (socklen_t)sizeof(sockerr);
1716
1717 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1718 (char *)&sockerr, &optlen);
1719 if (status == SOCKET_ERROR) { /* should not happen */
1720 DPRINTF(("sock %d: POLLERR: SO_ERROR failed: %R[sockerr]\n",
1721 fd, SOCKERRNO()));
1722 }
1723 else {
1724 DPRINTF0(("sock %d: POLLERR: %R[sockerr]\n", fd, sockerr));
1725 }
1726 return pxtcp_schedule_reset(pxtcp);
1727 }
1728
1729 if (revents & POLLOUT) {
1730 pxtcp->events &= ~POLLOUT;
1731 proxy_lwip_post(&pxtcp->msg_outbound);
1732 }
1733
1734 if (revents & POLLIN) {
1735 ssize_t nread;
1736 int stop_pollin;
1737
1738 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
1739 if (nread < 0) {
1740 sockerr = -(int)nread;
1741 DPRINTF0(("sock %d: POLLIN: %R[sockerr]\n", fd, sockerr));
1742 return pxtcp_schedule_reset(pxtcp);
1743 }
1744
1745 if (stop_pollin) {
1746 pxtcp->events &= ~POLLIN;
1747 }
1748
1749 if (nread > 0) {
1750 proxy_lwip_post(&pxtcp->msg_inbound);
1751#if !HAVE_TCP_POLLHUP
1752 /*
1753 * If host does not report POLLHUP for closed sockets
1754 * (e.g. NetBSD) we should check for full close manually.
1755 */
1756 if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
1757 LWIP_ASSERT1((revents & POLLHUP) == 0);
1758 return pxtcp_schedule_delete(pxtcp);
1759 }
1760#endif
1761 }
1762 }
1763
1764#if !HAVE_TCP_POLLHUP
1765 LWIP_ASSERT1((revents & POLLHUP) == 0);
1766#else
1767 if (revents & POLLHUP) {
1768 DPRINTF(("sock %d: HUP\n", fd));
1769
1770#if HAVE_TCP_POLLHUP == POLLIN
1771 /*
1772 * XXX: OSX reports POLLHUP once more when inbound is already
1773 * half-closed (which has already been reported as a "normal"
1774 * POLLHUP, handled below), the socket is polled for POLLOUT
1775 * (guest sends a lot of data that we can't push out fast
1776 * enough), and remote sends a reset - e.g. an http client
1777 * that half-closes after request and then aborts the transfer.
1778 *
1779 * It really should have been reported as POLLERR, but it
1780 * seems OSX never reports POLLERR for sockets.
1781 */
1782#if defined(RT_OS_DARWIN)
1783 {
1784 socklen_t optlen = (socklen_t)sizeof(sockerr);
1785
1786 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1787 (char *)&sockerr, &optlen);
1788 if (status == SOCKET_ERROR) { /* should not happen */
1789 DPRINTF(("sock %d: POLLHUP: SO_ERROR failed: %R[sockerr]\n",
1790 fd, SOCKERRNO()));
1791 sockerr = ECONNRESET;
1792 }
1793 else if (sockerr != 0) {
1794 DPRINTF0(("sock %d: POLLHUP: %R[sockerr]\n", fd, sockerr));
1795 }
1796
1797 if (sockerr != 0) { /* XXX: should have been POLLERR */
1798 return pxtcp_schedule_reset(pxtcp);
1799 }
1800 }
1801#endif /* RT_OS_DARWIN */
1802
1803 /*
1804 * Remote closed inbound.
1805 */
1806 if (!pxtcp->outbound_close_done) {
1807 /*
1808 * We might still need to poll for POLLOUT, but we can not
1809 * poll for POLLIN anymore (even if not all data are read)
1810 * because we will be spammed by POLLHUP.
1811 */
1812 pxtcp->events &= ~POLLIN;
1813 if (!pxtcp->inbound_close) {
1814 /* the rest of the input has to be pulled */
1815 proxy_lwip_post(&pxtcp->msg_inpull);
1816 }
1817 }
1818 else
1819#endif
1820 /*
1821 * Both directions are closed.
1822 */
1823 {
1824 LWIP_ASSERT1(pxtcp->outbound_close_done);
1825
1826 if (pxtcp->inbound_close) {
1827 /* there's no unread data, we are done */
1828 return pxtcp_schedule_delete(pxtcp);
1829 }
1830 else {
1831 /* pull the rest of the input first (deferred_delete) */
1832 pxtcp->pmhdl.slot = -1;
1833 proxy_lwip_post(&pxtcp->msg_inpull);
1834 return -1;
1835 }
1836 /* NOTREACHED */
1837 }
1838
1839 }
1840#endif /* HAVE_TCP_POLLHUP */
1841
1842 return pxtcp->events;
1843}
1844
1845
1846/**
1847 * Read data from socket to ringbuf. This may be used both on lwip
1848 * and poll manager threads.
1849 *
1850 * Flag pointed to by pstop is set when further reading is impossible,
1851 * either temporary when buffer is full, or permanently when EOF is
1852 * received.
1853 *
1854 * Returns number of bytes read. NB: EOF is reported as 1!
1855 *
1856 * Returns zero if nothing was read, either because buffer is full, or
1857 * if no data is available (EWOULDBLOCK, EINTR &c).
1858 *
1859 * Returns -errno on real socket errors.
1860 */
1861static ssize_t
1862pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
1863{
1864 IOVEC iov[2];
1865 size_t iovlen;
1866 ssize_t nread;
1867
1868 const size_t sz = pxtcp->inbuf.bufsize;
1869 size_t beg, lim, wrnew;
1870
1871 *pstop = 0;
1872
1873 beg = pxtcp->inbuf.vacant;
1874 IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
1875
1876 /* lim is the index we can NOT write to */
1877 lim = pxtcp->inbuf.unacked;
1878 if (lim == 0) {
1879 lim = sz - 1; /* empty slot at the end */
1880 }
1881 else if (lim == 1 && beg != 0) {
1882 lim = sz; /* empty slot at the beginning */
1883 }
1884 else {
1885 --lim;
1886 }
1887
1888 if (beg == lim) {
1889 /*
1890 * Buffer is full, stop polling for POLLIN.
1891 *
1892 * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
1893 * data, freeing space in the ring buffer.
1894 */
1895 *pstop = 1;
1896 return 0;
1897 }
1898
1899 if (beg < lim) {
1900 /* free space in one chunk */
1901 iovlen = 1;
1902 IOVEC_SET_LEN(iov[0], lim - beg);
1903 }
1904 else {
1905 /* free space in two chunks */
1906 iovlen = 2;
1907 IOVEC_SET_LEN(iov[0], sz - beg);
1908 IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
1909 IOVEC_SET_LEN(iov[1], lim);
1910 }
1911
1912 /*
1913 * TODO: This is where application-level proxy can hook into to
1914 * process inbound traffic.
1915 */
1916 nread = pxtcp_sock_recv(pxtcp, iov, iovlen);
1917
1918 if (nread > 0) {
1919 wrnew = beg + nread;
1920 if (wrnew >= sz) {
1921 wrnew -= sz;
1922 }
1923 pxtcp->inbuf.vacant = wrnew;
1924 DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
1925 (void *)pxtcp, pxtcp->sock, (int)nread));
1926 return nread;
1927 }
1928 else if (nread == 0) {
1929 *pstop = 1;
1930 pxtcp->inbound_close = 1;
1931 DPRINTF2(("pxtcp %p: sock %d read EOF\n",
1932 (void *)pxtcp, pxtcp->sock));
1933 return 1;
1934 }
1935 else {
1936 int sockerr = -nread;
1937
1938 if (proxy_error_is_transient(sockerr)) {
1939 /* haven't read anything, just return */
1940 DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
1941 (void *)pxtcp, pxtcp->sock));
1942 return 0;
1943 }
1944 else {
1945 /* socket error! */
1946 DPRINTF0(("pxtcp %p: sock %d read: %R[sockerr]\n",
1947 (void *)pxtcp, pxtcp->sock, sockerr));
1948 return -sockerr;
1949 }
1950 }
1951}
1952
1953
1954#if !defined(RT_OS_WINDOWS)
1955static ssize_t
1956pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1957{
1958 struct msghdr mh;
1959 ssize_t nread;
1960
1961 memset(&mh, 0, sizeof(mh));
1962
1963 mh.msg_iov = iov;
1964 mh.msg_iovlen = iovlen;
1965
1966 nread = recvmsg(pxtcp->sock, &mh, 0);
1967 if (nread < 0) {
1968 nread = -SOCKERRNO();
1969 }
1970
1971 return nread;
1972}
1973#else /* RT_OS_WINDOWS */
1974static ssize_t
1975pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1976{
1977 DWORD flags;
1978 DWORD nread;
1979 int status;
1980
1981 flags = 0;
1982 status = WSARecv(pxtcp->sock, iov, (DWORD)iovlen, &nread,
1983 &flags, NULL, NULL);
1984 if (status == SOCKET_ERROR) {
1985 return -SOCKERRNO();
1986 }
1987
1988 return (ssize_t)nread;
1989}
1990#endif /* RT_OS_WINDOWS */
1991
1992
1993/**
1994 * Callback from poll manager (pxtcp::msg_inbound) to trigger output
1995 * from ringbuf to guest.
1996 */
1997static void
1998pxtcp_pcb_write_inbound(void *ctx)
1999{
2000 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2001 LWIP_ASSERT1(pxtcp != NULL);
2002
2003 if (pxtcp->pcb == NULL) {
2004 return;
2005 }
2006
2007 pxtcp_pcb_forward_inbound(pxtcp);
2008}
2009
2010
2011/**
2012 * tcp_poll() callback
2013 *
2014 * We swtich it on when tcp_write() or tcp_shutdown() fail with
2015 * ERR_MEM to prevent connection from stalling. If there are ACKs or
2016 * more inbound data then pxtcp_pcb_forward_inbound() will be
2017 * triggered again, but if neither happens, tcp_poll() comes to the
2018 * rescue.
2019 */
2020static err_t
2021pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
2022{
2023 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2024 LWIP_UNUSED_ARG(pcb);
2025
2026 DPRINTF2(("%s: pxtcp %p; pcb %p\n",
2027 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2028
2029 pxtcp_pcb_forward_inbound(pxtcp);
2030
2031 /*
2032 * If the last thing holding up deletion of the pxtcp was failed
2033 * tcp_shutdown() and it succeeded, we may be the last callback.
2034 */
2035 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2036
2037 return ERR_OK;
2038}
2039
2040
2041static void
2042pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
2043{
2044 tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
2045}
2046
2047
2048static void
2049pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
2050{
2051 tcp_poll(pxtcp->pcb, NULL, 255);
2052}
2053
2054
2055/**
2056 * Forward inbound data from ring buffer to the guest.
2057 *
2058 * Scheduled by poll manager thread after it receives more data into
2059 * the ring buffer (we have more data to send).
2060
2061 * Also called from tcp_sent() callback when guest ACKs some data,
2062 * increasing pcb->snd_buf (we are permitted to send more data).
2063 *
2064 * Also called from tcp_poll() callback if previous attempt to forward
2065 * inbound data failed with ERR_MEM (we need to try again).
2066 */
2067static void
2068pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
2069{
2070 struct tcp_pcb *pcb;
2071 size_t sndbuf;
2072 size_t beg, lim, sndlim;
2073 size_t toeob, tolim;
2074 size_t nsent;
2075 err_t error;
2076
2077 LWIP_ASSERT1(pxtcp != NULL);
2078 pcb = pxtcp->pcb;
2079 if (pcb == NULL) {
2080 return;
2081 }
2082
2083 if (/* __predict_false */ pcb->state < ESTABLISHED) {
2084 /*
2085 * If we have just confirmed accept of this connection, the
2086 * pcb is in SYN_RCVD state and we still haven't received the
2087 * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
2088 * transition that lwip decrements pcb->acked so that that ACK
2089 * is not reported to pxtcp_pcb_sent(). If we send something
2090 * now and immediately close (think "daytime", e.g.) while
2091 * still in SYN_RCVD state, we will move directly to
2092 * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
2093 * report it to pxtcp_pcb_sent().
2094 */
2095 DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
2096 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2097 return;
2098 }
2099
2100
2101 beg = pxtcp->inbuf.unsent; /* private to lwip thread */
2102 lim = pxtcp->inbuf.vacant;
2103
2104 if (beg == lim) {
2105 if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
2106 pxtcp_pcb_forward_inbound_close(pxtcp);
2107 tcp_output(pcb);
2108 return;
2109 }
2110
2111 /*
2112 * Else, there's no data to send.
2113 *
2114 * If there is free space in the buffer, producer will
2115 * reschedule us as it receives more data and vacant (lim)
2116 * advances.
2117 *
2118 * If buffer is full when all data have been passed to
2119 * tcp_write() but not yet acknowledged, we will advance
2120 * unacked on ACK, freeing some space for producer to write to
2121 * (then see above).
2122 */
2123 return;
2124 }
2125
2126 sndbuf = tcp_sndbuf(pcb);
2127 if (sndbuf == 0) {
2128 /*
2129 * Can't send anything now. As guest ACKs some data, TCP will
2130 * call pxtcp_pcb_sent() callback and we will come here again.
2131 */
2132 return;
2133 }
2134
2135 nsent = 0;
2136
2137 /*
2138 * We have three limits to consider:
2139 * - how much data we have in the ringbuf
2140 * - how much data we are allowed to send
2141 * - ringbuf size
2142 */
2143 toeob = pxtcp->inbuf.bufsize - beg;
2144 if (lim < beg) { /* lim wrapped */
2145 if (sndbuf < toeob) { /* but we are limited by sndbuf */
2146 /* so beg is not going to wrap, treat sndbuf as lim */
2147 lim = beg + sndbuf; /* ... and proceed to the simple case */
2148 }
2149 else { /* we are limited by the end of the buffer, beg will wrap */
2150 u8_t maybemore;
2151 if (toeob == sndbuf || lim == 0) {
2152 maybemore = 0;
2153 }
2154 else {
2155 maybemore = TCP_WRITE_FLAG_MORE;
2156 }
2157
2158 Assert(toeob == (u16_t)toeob);
2159 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)toeob, maybemore);
2160 if (error != ERR_OK) {
2161 goto writeerr;
2162 }
2163 nsent += toeob;
2164 pxtcp->inbuf.unsent = 0; /* wrap */
2165
2166 if (maybemore) {
2167 beg = 0;
2168 sndbuf -= toeob;
2169 }
2170 else {
2171 /* we are done sending, but ... */
2172 goto check_inbound_close;
2173 }
2174 }
2175 }
2176
2177 LWIP_ASSERT1(beg < lim);
2178 sndlim = beg + sndbuf;
2179 if (lim > sndlim) {
2180 lim = sndlim;
2181 }
2182 tolim = lim - beg;
2183 if (tolim > 0) {
2184 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
2185 if (error != ERR_OK) {
2186 goto writeerr;
2187 }
2188 nsent += tolim;
2189 pxtcp->inbuf.unsent = lim;
2190 }
2191
2192 check_inbound_close:
2193 if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
2194 pxtcp_pcb_forward_inbound_close(pxtcp);
2195 }
2196
2197 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
2198 (void *)pxtcp, (void *)pcb, (int)nsent));
2199 tcp_output(pcb);
2200 pxtcp_pcb_cancel_poll(pxtcp);
2201 return;
2202
2203 writeerr:
2204 if (error == ERR_MEM) {
2205 if (nsent > 0) { /* first write succeeded, second failed */
2206 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
2207 (void *)pxtcp, (void *)pcb, (int)nsent));
2208 tcp_output(pcb);
2209 }
2210 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
2211 (void *)pxtcp, (void *)pcb));
2212 pxtcp_pcb_schedule_poll(pxtcp);
2213 }
2214 else {
2215 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
2216 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2217
2218 /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
2219 LWIP_ASSERT1(error == ERR_MEM);
2220 }
2221}
2222
2223
2224static void
2225pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
2226{
2227 struct tcp_pcb *pcb;
2228 err_t error;
2229
2230 LWIP_ASSERT1(pxtcp != NULL);
2231 LWIP_ASSERT1(pxtcp->inbound_close);
2232 LWIP_ASSERT1(!pxtcp->inbound_close_done);
2233 LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
2234
2235 pcb = pxtcp->pcb;
2236 LWIP_ASSERT1(pcb != NULL);
2237
2238 DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
2239 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2240
2241 error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
2242 if (error != ERR_OK) {
2243 DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
2244 " tcp_shutdown: error=%s\n",
2245 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2246 pxtcp_pcb_schedule_poll(pxtcp);
2247 return;
2248 }
2249
2250 pxtcp_pcb_cancel_poll(pxtcp);
2251 pxtcp->inbound_close_done = 1;
2252
2253
2254 /*
2255 * If we have already done outbound close previously (passive
2256 * close on the pcb), then we must not hold onto a pcb in LAST_ACK
2257 * state since those will be deleted by lwip when that last ack
2258 * comes from the guest.
2259 *
2260 * NB: We do NOT check for deferred delete here, even though we
2261 * have just set one of its conditions, inbound_close_done. We
2262 * let pcb callbacks that called us do that. It's simpler and
2263 * cleaner that way.
2264 */
2265 if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
2266 pxtcp_pcb_dissociate(pxtcp);
2267 }
2268}
2269
2270
2271/**
2272 * Check that all forwarded inbound data is sent and acked, and that
2273 * inbound close is scheduled (we aren't called back when it's acked).
2274 */
2275DECLINLINE(int)
2276pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
2277{
2278 return (pxtcp->inbound_close_done /* also implies that all data forwarded */
2279 && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
2280}
2281
2282
2283/**
2284 * tcp_sent() callback - guest acknowledged len bytes.
2285 *
2286 * We can advance inbuf::unacked index, making more free space in the
2287 * ringbuf and wake up producer on poll manager thread.
2288 *
2289 * We can also try to send more data if we have any since pcb->snd_buf
2290 * was increased and we are now permitted to send more.
2291 */
2292static err_t
2293pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
2294{
2295 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2296 size_t unacked;
2297
2298 LWIP_ASSERT1(pxtcp != NULL);
2299 LWIP_ASSERT1(pxtcp->pcb == pcb);
2300 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2301 LWIP_UNUSED_ARG(pcb); /* only in assert */
2302
2303 DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
2304 " unacked %d, unsent %d, vacant %d\n",
2305 __func__, (void *)pxtcp, (void *)pcb, (int)len,
2306 (int)pxtcp->inbuf.unacked,
2307 (int)pxtcp->inbuf.unsent,
2308 (int)pxtcp->inbuf.vacant));
2309
2310 if (/* __predict_false */ len == 0) {
2311 /* we are notified to start pulling */
2312 LWIP_ASSERT1(!pxtcp->inbound_close);
2313 LWIP_ASSERT1(pxtcp->inbound_pull);
2314
2315 unacked = pxtcp->inbuf.unacked;
2316 }
2317 else {
2318 /*
2319 * Advance unacked index. Guest acknowledged the data, so it
2320 * won't be needed again for potential retransmits.
2321 */
2322 unacked = pxtcp->inbuf.unacked + len;
2323 if (unacked > pxtcp->inbuf.bufsize) {
2324 unacked -= pxtcp->inbuf.bufsize;
2325 }
2326 pxtcp->inbuf.unacked = unacked;
2327 }
2328
2329 /* arrange for more inbound data */
2330 if (!pxtcp->inbound_close) {
2331 if (!pxtcp->inbound_pull) {
2332 /* wake up producer, in case it has stopped polling for POLLIN */
2333 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
2334#ifdef RT_OS_WINDOWS
2335 /**
2336 * We have't got enought room in ring buffer to read atm,
2337 * but we don't want to lose notification from WSAW4ME when
2338 * space would be available, so we reset event with empty recv
2339 */
2340 recv(pxtcp->sock, NULL, 0, 0);
2341#endif
2342 }
2343 else {
2344 ssize_t nread;
2345 int stop_pollin; /* ignored */
2346
2347 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
2348
2349 if (nread < 0) {
2350 int sockerr = -(int)nread;
2351 LWIP_UNUSED_ARG(sockerr);
2352 DPRINTF0(("%s: sock %d: %R[sockerr]\n",
2353 __func__, pxtcp->sock, sockerr));
2354
2355#if HAVE_TCP_POLLHUP == POLLIN /* see counterpart in pxtcp_pmgr_pump() */
2356 /*
2357 * It may still be registered with poll manager for POLLOUT.
2358 */
2359 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2360 return ERR_OK;
2361#else
2362 /*
2363 * It is no longer registered with poll manager so we
2364 * can kill it directly.
2365 */
2366 pxtcp_pcb_reset_pxtcp(pxtcp);
2367 return ERR_ABRT;
2368#endif
2369 }
2370 }
2371 }
2372
2373 /* forward more data if we can */
2374 if (!pxtcp->inbound_close_done) {
2375 pxtcp_pcb_forward_inbound(pxtcp);
2376
2377 /*
2378 * NB: we might have dissociated from a pcb that transitioned
2379 * to LAST_ACK state, so don't refer to pcb below.
2380 */
2381 }
2382
2383
2384 /* have we got all the acks? */
2385 if (pxtcp->inbound_close /* no more new data */
2386 && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
2387 && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
2388 {
2389 char *buf;
2390
2391 DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
2392 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2393
2394 /* no more retransmits, so buf is not needed */
2395 buf = pxtcp->inbuf.buf;
2396 pxtcp->inbuf.buf = NULL;
2397 free(buf);
2398
2399 /* no more acks, so no more callbacks */
2400 if (pxtcp->pcb != NULL) {
2401 tcp_sent(pxtcp->pcb, NULL);
2402 }
2403
2404 /*
2405 * We may be the last callback for this pcb if we have also
2406 * successfully forwarded inbound_close.
2407 */
2408 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2409 }
2410
2411 return ERR_OK;
2412}
2413
2414
2415#if HAVE_TCP_POLLHUP
2416/**
2417 * Callback from poll manager (pxtcp::msg_inpull) to switch
2418 * pxtcp_pcb_sent() to actively pull the last bits of input. See
2419 * POLLHUP comment in pxtcp_pmgr_pump().
2420 *
2421 * pxtcp::sock is deregistered from poll manager after this callback
2422 * is scheduled.
2423 */
2424static void
2425pxtcp_pcb_pull_inbound(void *ctx)
2426{
2427 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2428 LWIP_ASSERT1(pxtcp != NULL);
2429
2430 if (pxtcp->pcb == NULL) {
2431 DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
2432 pxtcp_pcb_reset_pxtcp(pxtcp);
2433 return;
2434 }
2435
2436 pxtcp->inbound_pull = 1;
2437 if (pxtcp->pmhdl.slot < 0) {
2438 DPRINTF(("%s: pxtcp %p: pcb %p (deferred delete)\n",
2439 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2440 pxtcp->deferred_delete = 1;
2441 }
2442 else {
2443 DPRINTF(("%s: pxtcp %p: pcb %p\n",
2444 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2445 }
2446
2447 pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
2448}
2449#endif /* HAVE_TCP_POLLHUP */
2450
2451
2452/**
2453 * tcp_err() callback.
2454 *
2455 * pcb is not passed to this callback since it may be already
2456 * deallocated by the stack, but we can't do anything useful with it
2457 * anyway since connection is gone.
2458 */
2459static void
2460pxtcp_pcb_err(void *arg, err_t error)
2461{
2462 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2463 LWIP_ASSERT1(pxtcp != NULL);
2464
2465 /*
2466 * ERR_CLSD is special - it is reported here when:
2467 *
2468 * . guest has already half-closed
2469 * . we send FIN to guest when external half-closes
2470 * . guest acks that FIN
2471 *
2472 * Since connection is closed but receive has been already closed
2473 * lwip can only report this via tcp_err. At this point the pcb
2474 * is still alive, so we can peek at it if need be.
2475 *
2476 * The interesting twist is when the ACK from guest that akcs our
2477 * FIN also acks some data. In this scenario lwip will NOT call
2478 * tcp_sent() callback with the ACK for that last bit of data but
2479 * instead will call tcp_err with ERR_CLSD right away. Since that
2480 * ACK also acknowledges all the data, we should run some of
2481 * pxtcp_pcb_sent() logic here.
2482 */
2483 if (error == ERR_CLSD) {
2484 struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
2485
2486 DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
2487 " pcb->acked %d;"
2488 " unacked %d, unsent %d, vacant %d\n",
2489 (void *)pxtcp, (void *)pcb,
2490 pcb->acked,
2491 (int)pxtcp->inbuf.unacked,
2492 (int)pxtcp->inbuf.unsent,
2493 (int)pxtcp->inbuf.vacant));
2494
2495 LWIP_ASSERT1(pxtcp->pcb == pcb);
2496 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2497
2498 if (pcb->acked > 0) {
2499 pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
2500 }
2501 return;
2502 }
2503
2504 DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
2505 (void *)pxtcp, proxy_lwip_strerr(error)));
2506
2507 pxtcp->pcb = NULL; /* pcb is gone */
2508 if (pxtcp->deferred_delete) {
2509 pxtcp_pcb_reset_pxtcp(pxtcp);
2510 }
2511 else {
2512 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2513 }
2514}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette