VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxtcp.c@ 52934

Last change on this file since 52934 was 52650, checked in by vboxsync, 10 years ago

NAT/Net: pxtcp_pmgr_connect: Solaris doesn't report either POLLERR or
POLLHUP for failed connect(2). Refactor code to always check SO_ERROR
on Solaris so that we can reject failed connection attempt immediately
instead of accepting it only to reset on the first socket operation.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 65.9 KB
Line 
1/* -*- indent-tabs-mode: nil; -*- */
2#define LOG_GROUP LOG_GROUP_NAT_SERVICE
3
4#include "winutils.h"
5
6#include "pxtcp.h"
7
8#include "proxy.h"
9#include "proxy_pollmgr.h"
10#include "pxremap.h"
11#include "portfwd.h" /* fwspec */
12
13#ifndef RT_OS_WINDOWS
14#include <sys/types.h>
15#include <sys/socket.h>
16#include <sys/ioctl.h>
17#ifdef RT_OS_SOLARIS
18#include <sys/filio.h> /* FIONREAD is BSD'ism */
19#endif
20#include <stdlib.h>
21#include <stdint.h>
22#include <stdio.h>
23#include <string.h>
24#include <poll.h>
25
26#include <err.h> /* BSD'ism */
27#else
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31
32#include <iprt/stdint.h>
33#include "winpoll.h"
34#endif
35
36#include "lwip/opt.h"
37
38#include "lwip/sys.h"
39#include "lwip/tcpip.h"
40#include "lwip/netif.h"
41#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
42#include "lwip/icmp.h"
43#include "lwip/icmp6.h"
44
45/*
46 * Different OSes have different quirks in reporting POLLHUP for TCP
47 * sockets.
48 *
49 * Using shutdown(2) "how" values here would be more readable, but
50 * since SHUT_RD is 0, we can't use 0 for "none", unfortunately.
51 */
52#if defined(RT_OS_NETBSD) || defined(RT_OS_SOLARIS)
53# define HAVE_TCP_POLLHUP 0 /* not reported */
54#elif defined(RT_OS_DARWIN) || defined(RT_OS_WINDOWS)
55# define HAVE_TCP_POLLHUP POLLIN /* reported when remote closes */
56#else
57# define HAVE_TCP_POLLHUP (POLLIN|POLLOUT) /* reported when both directions are closed */
58#endif
59
60
61/**
62 * Ring buffer for inbound data. Filled with data from the host
63 * socket on poll manager thread. Data consumed by scheduling
64 * tcp_write() to the pcb on the lwip thread.
65 *
66 * NB: There is actually third party present, the lwip stack itself.
67 * Thus the buffer doesn't have dual free vs. data split, but rather
68 * three-way free / send and unACKed data / unsent data split.
69 */
70struct ringbuf {
71 char *buf;
72 size_t bufsize;
73
74 /*
75 * Start of free space, producer writes here (up till "unacked").
76 */
77 volatile size_t vacant;
78
79 /*
80 * Start of sent but unacknowledged data. The data are "owned" by
81 * the stack as it may need to retransmit. This is the free space
82 * limit for producer.
83 */
84 volatile size_t unacked;
85
86 /*
87 * Start of unsent data, consumer reads/sends from here (up till
88 * "vacant"). Not declared volatile since it's only accessed from
89 * the consumer thread.
90 */
91 size_t unsent;
92};
93
94
95/**
96 */
97struct pxtcp {
98 /**
99 * Our poll manager handler. Must be first, strong/weak
100 * references depend on this "inheritance".
101 */
102 struct pollmgr_handler pmhdl;
103
104 /**
105 * lwIP (internal/guest) side of the proxied connection.
106 */
107 struct tcp_pcb *pcb;
108
109 /**
110 * Host (external) side of the proxied connection.
111 */
112 SOCKET sock;
113
114 /**
115 * Socket events we are currently polling for.
116 */
117 int events;
118
119 /**
120 * Socket error. Currently used to save connect(2) errors so that
121 * we can decide if we need to send ICMP error.
122 */
123 int sockerr;
124
125 /**
126 * Interface that we have got the SYN from. Needed to send ICMP
127 * with correct source address.
128 */
129 struct netif *netif;
130
131 /**
132 * For tentatively accepted connections for which we are in
133 * process of connecting to the real destination this is the
134 * initial pbuf that we might need to build ICMP error.
135 *
136 * When connection is established this is used to hold outbound
137 * pbuf chain received by pxtcp_pcb_recv() but not yet completely
138 * forwarded over the socket. We cannot "return" it to lwIP since
139 * the head of the chain is already sent and freed.
140 */
141 struct pbuf *unsent;
142
143 /**
144 * Guest has closed its side. Reported to pxtcp_pcb_recv() only
145 * once and we might not be able to forward it immediately if we
146 * have unsent pbuf.
147 */
148 int outbound_close;
149
150 /**
151 * Outbound half-close has been done on the socket.
152 */
153 int outbound_close_done;
154
155 /**
156 * External has closed its side. We might not be able to forward
157 * it immediately if we have unforwarded data.
158 */
159 int inbound_close;
160
161 /**
162 * Inbound half-close has been done on the pcb.
163 */
164 int inbound_close_done;
165
166 /**
167 * On systems that report POLLHUP as soon as the final FIN is
168 * received on a socket we cannot continue polling for the rest of
169 * input, so we have to read (pull) last data from the socket on
170 * the lwIP thread instead of polling/pushing it from the poll
171 * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
172 */
173 int inbound_pull;
174
175
176 /**
177 * When poll manager schedules delete we may not be able to delete
178 * a pxtcp immediately if not all inbound data has been acked by
179 * the guest: lwIP may need to resend and the data are in pxtcp's
180 * inbuf::buf. We defer delete until all data are acked to
181 * pxtcp_pcb_sent().
182 */
183 int deferred_delete;
184
185 /**
186 * Ring-buffer for inbound data.
187 */
188 struct ringbuf inbuf;
189
190 /**
191 * lwIP thread's strong reference to us.
192 */
193 struct pollmgr_refptr *rp;
194
195
196 /*
197 * We use static messages to call functions on the lwIP thread to
198 * void malloc/free overhead.
199 */
200 struct tcpip_msg msg_delete; /* delete pxtcp */
201 struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
202 struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
203 struct tcpip_msg msg_outbound; /* trigger send of outbound data */
204 struct tcpip_msg msg_inbound; /* trigger send of inbound data */
205 struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
206};
207
208
209
210static struct pxtcp *pxtcp_allocate(void);
211static void pxtcp_free(struct pxtcp *);
212
213static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
214static void pxtcp_pcb_dissociate(struct pxtcp *);
215
216/* poll manager callbacks for pxtcp related channels */
217static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
218static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
219static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
220#if !(HAVE_TCP_POLLHUP & POLLOUT)
221static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
222#endif
223static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
224
225/* helper functions for sending/receiving pxtcp over poll manager channels */
226static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
227static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
228static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
229static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
230
231/* poll manager callbacks for individual sockets */
232static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
233static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
234
235/* get incoming traffic into ring buffer */
236static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
237static ssize_t pxtcp_sock_recv(struct pxtcp *, IOVEC *, size_t); /* default */
238
239/* convenience functions for poll manager callbacks */
240static int pxtcp_schedule_delete(struct pxtcp *);
241static int pxtcp_schedule_reset(struct pxtcp *);
242static int pxtcp_schedule_reject(struct pxtcp *);
243
244/* lwip thread callbacks called via proxy_lwip_post() */
245static void pxtcp_pcb_delete_pxtcp(void *);
246static void pxtcp_pcb_reset_pxtcp(void *);
247static void pxtcp_pcb_accept_refuse(void *);
248static void pxtcp_pcb_accept_confirm(void *);
249static void pxtcp_pcb_write_outbound(void *);
250static void pxtcp_pcb_write_inbound(void *);
251static void pxtcp_pcb_pull_inbound(void *);
252
253/* tcp pcb callbacks */
254static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, err_t); /* global */
255static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
256static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
257static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
258static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
259static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
260static void pxtcp_pcb_err(void *, err_t);
261
262static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
263static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
264
265static ssize_t pxtcp_sock_send(struct pxtcp *, IOVEC *, size_t);
266
267static void pxtcp_pcb_forward_inbound(struct pxtcp *);
268static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
269DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
270static void pxtcp_pcb_schedule_poll(struct pxtcp *);
271static void pxtcp_pcb_cancel_poll(struct pxtcp *);
272
273static void pxtcp_pcb_reject(struct netif *, struct tcp_pcb *, struct pbuf *, int);
274DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
275
276/* poll manager handlers for pxtcp channels */
277static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
278static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
279static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
280#if !(HAVE_TCP_POLLHUP & POLLOUT)
281static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
282#endif
283static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
284
285
286/**
287 * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
288 * manager threads haven't been created yet.
289 */
290void
291pxtcp_init(void)
292{
293 /*
294 * Create channels.
295 */
296#define CHANNEL(SLOT, NAME) do { \
297 NAME##_hdl.callback = NAME; \
298 NAME##_hdl.data = NULL; \
299 NAME##_hdl.slot = -1; \
300 pollmgr_add_chan(SLOT, &NAME##_hdl); \
301 } while (0)
302
303 CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
304 CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
305 CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
306#if !(HAVE_TCP_POLLHUP & POLLOUT)
307 CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
308#endif
309 CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
310
311#undef CHANNEL
312
313 /*
314 * Listen to outgoing connection from guest(s).
315 */
316 tcp_proxy_accept(pxtcp_pcb_heard);
317}
318
319
320/**
321 * Syntactic sugar for sending pxtcp pointer over poll manager
322 * channel. Used by lwip thread functions.
323 */
324static ssize_t
325pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
326{
327 return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
328}
329
330
331/**
332 * Syntactic sugar for sending weak reference to pxtcp over poll
333 * manager channel. Used by lwip thread functions.
334 */
335static ssize_t
336pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
337{
338 pollmgr_refptr_weak_ref(pxtcp->rp);
339 return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
340}
341
342
343/**
344 * Counterpart of pxtcp_chan_send().
345 */
346static struct pxtcp *
347pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
348{
349 struct pxtcp *pxtcp;
350
351 pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
352 return pxtcp;
353}
354
355
356/**
357 * Counterpart of pxtcp_chan_send_weak().
358 */
359static struct pxtcp *
360pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
361{
362 struct pollmgr_refptr *rp;
363 struct pollmgr_handler *base;
364 struct pxtcp *pxtcp;
365
366 rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
367 base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
368 pxtcp = (struct pxtcp *)base;
369
370 return pxtcp;
371}
372
373
374/**
375 * Register pxtcp with poll manager.
376 *
377 * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
378 * error handling is different in these two cases, we leave it up to
379 * the caller.
380 */
381int
382pxtcp_pmgr_add(struct pxtcp *pxtcp)
383{
384 int status;
385
386 LWIP_ASSERT1(pxtcp != NULL);
387 LWIP_ASSERT1(pxtcp->sock >= 0);
388 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
389 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
390 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
391
392 status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
393 return status;
394}
395
396
397/**
398 * Unregister pxtcp with poll manager.
399 *
400 * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
401 * leg).
402 */
403void
404pxtcp_pmgr_del(struct pxtcp *pxtcp)
405{
406 LWIP_ASSERT1(pxtcp != NULL);
407
408 pollmgr_del_slot(pxtcp->pmhdl.slot);
409}
410
411
412/**
413 * POLLMGR_CHAN_PXTCP_ADD handler.
414 *
415 * Get new pxtcp from lwip thread and start polling its socket.
416 */
417static int
418pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
419{
420 struct pxtcp *pxtcp;
421 int status;
422
423 pxtcp = pxtcp_chan_recv(handler, fd, revents);
424 DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
425 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
426
427 status = pxtcp_pmgr_add(pxtcp);
428 if (status < 0) {
429 (void) pxtcp_schedule_reset(pxtcp);
430 }
431
432 return POLLIN;
433}
434
435
436/**
437 * POLLMGR_CHAN_PXTCP_POLLOUT handler.
438 *
439 * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
440 * and failed, it now requests us to poll the socket for POLLOUT and
441 * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
442 */
443static int
444pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
445{
446 struct pxtcp *pxtcp;
447
448 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
449 DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
450
451 if (pxtcp == NULL) {
452 return POLLIN;
453 }
454
455 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
456 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
457
458 pxtcp->events |= POLLOUT;
459 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
460
461 return POLLIN;
462}
463
464
465/**
466 * POLLMGR_CHAN_PXTCP_POLLIN handler.
467 */
468static int
469pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
470{
471 struct pxtcp *pxtcp;
472
473 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
474 DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
475
476 if (pxtcp == NULL) {
477 return POLLIN;
478 }
479
480 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
481 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
482
483 if (pxtcp->inbound_close) {
484 return POLLIN;
485 }
486
487 pxtcp->events |= POLLIN;
488 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
489
490 return POLLIN;
491}
492
493
494#if !(HAVE_TCP_POLLHUP & POLLOUT)
495/**
496 * POLLMGR_CHAN_PXTCP_DEL handler.
497 *
498 * Schedule pxtcp deletion. We only need this if host system doesn't
499 * report POLLHUP for fully closed tcp sockets.
500 */
501static int
502pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
503{
504 struct pxtcp *pxtcp;
505
506 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
507 if (pxtcp == NULL) {
508 return POLLIN;
509 }
510
511 DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
512 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
513
514 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
515 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
516
517 LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
518 LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
519
520 pxtcp_pmgr_del(pxtcp);
521 (void) pxtcp_schedule_delete(pxtcp);
522
523 return POLLIN;
524}
525#endif /* !(HAVE_TCP_POLLHUP & POLLOUT) */
526
527
528/**
529 * POLLMGR_CHAN_PXTCP_RESET handler.
530 *
531 * Close the socket with RST and delete pxtcp.
532 */
533static int
534pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
535{
536 struct pxtcp *pxtcp;
537
538 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
539 if (pxtcp == NULL) {
540 return POLLIN;
541 }
542
543 DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
544 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
545
546 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
547 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
548
549 pxtcp_pmgr_del(pxtcp);
550
551 proxy_reset_socket(pxtcp->sock);
552 pxtcp->sock = INVALID_SOCKET;
553
554 (void) pxtcp_schedule_reset(pxtcp);
555
556 return POLLIN;
557}
558
559
560static struct pxtcp *
561pxtcp_allocate(void)
562{
563 struct pxtcp *pxtcp;
564
565 pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
566 if (pxtcp == NULL) {
567 return NULL;
568 }
569
570 pxtcp->pmhdl.callback = NULL;
571 pxtcp->pmhdl.data = (void *)pxtcp;
572 pxtcp->pmhdl.slot = -1;
573
574 pxtcp->pcb = NULL;
575 pxtcp->sock = INVALID_SOCKET;
576 pxtcp->events = 0;
577 pxtcp->sockerr = 0;
578 pxtcp->netif = NULL;
579 pxtcp->unsent = NULL;
580 pxtcp->outbound_close = 0;
581 pxtcp->outbound_close_done = 0;
582 pxtcp->inbound_close = 0;
583 pxtcp->inbound_close_done = 0;
584 pxtcp->inbound_pull = 0;
585 pxtcp->deferred_delete = 0;
586
587 pxtcp->inbuf.bufsize = 64 * 1024;
588 pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
589 if (pxtcp->inbuf.buf == NULL) {
590 free(pxtcp);
591 return NULL;
592 }
593 pxtcp->inbuf.vacant = 0;
594 pxtcp->inbuf.unacked = 0;
595 pxtcp->inbuf.unsent = 0;
596
597 pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
598 if (pxtcp->rp == NULL) {
599 free(pxtcp->inbuf.buf);
600 free(pxtcp);
601 return NULL;
602 }
603
604#define CALLBACK_MSG(MSG, FUNC) \
605 do { \
606 pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
607 pxtcp->MSG.sem = NULL; \
608 pxtcp->MSG.msg.cb.function = FUNC; \
609 pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
610 } while (0)
611
612 CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
613 CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
614 CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
615 CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
616 CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
617 CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
618
619#undef CALLBACK_MSG
620
621 return pxtcp;
622}
623
624
625/**
626 * Exported to fwtcp to create pxtcp for incoming port-forwarded
627 * connections. Completed with pcb in pxtcp_pcb_connect().
628 */
629struct pxtcp *
630pxtcp_create_forwarded(SOCKET sock)
631{
632 struct pxtcp *pxtcp;
633
634 pxtcp = pxtcp_allocate();
635 if (pxtcp == NULL) {
636 return NULL;
637 }
638
639 pxtcp->sock = sock;
640 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
641 pxtcp->events = 0;
642
643 return pxtcp;
644}
645
646
647static void
648pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
649{
650 LWIP_ASSERT1(pxtcp != NULL);
651 LWIP_ASSERT1(pcb != NULL);
652
653 pxtcp->pcb = pcb;
654
655 tcp_arg(pcb, pxtcp);
656
657 tcp_recv(pcb, pxtcp_pcb_recv);
658 tcp_sent(pcb, pxtcp_pcb_sent);
659 tcp_poll(pcb, NULL, 255);
660 tcp_err(pcb, pxtcp_pcb_err);
661}
662
663
664static void
665pxtcp_free(struct pxtcp *pxtcp)
666{
667 if (pxtcp->unsent != NULL) {
668 pbuf_free(pxtcp->unsent);
669 }
670 if (pxtcp->inbuf.buf != NULL) {
671 free(pxtcp->inbuf.buf);
672 }
673 free(pxtcp);
674}
675
676
677/**
678 * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
679 * fwtcp failed to register with poll manager to post to lwip thread
680 * for doing connect.
681 */
682void
683pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
684{
685 LWIP_ASSERT1(pxtcp->pcb == NULL);
686 pxtcp_pcb_reset_pxtcp(pxtcp);
687}
688
689
690static void
691pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
692{
693 if (pxtcp == NULL || pxtcp->pcb == NULL) {
694 return;
695 }
696
697 DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
698 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
699
700 /*
701 * We must have dissociated from a fully closed pcb immediately
702 * since lwip recycles them and we don't wan't to mess with what
703 * would be someone else's pcb that we happen to have a stale
704 * pointer to.
705 */
706 LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
707
708 tcp_recv(pxtcp->pcb, NULL);
709 tcp_sent(pxtcp->pcb, NULL);
710 tcp_poll(pxtcp->pcb, NULL, 255);
711 tcp_err(pxtcp->pcb, NULL);
712 tcp_arg(pxtcp->pcb, NULL);
713 pxtcp->pcb = NULL;
714}
715
716
717/**
718 * Lwip thread callback invoked via pxtcp::msg_delete
719 *
720 * Since we use static messages to communicate to the lwip thread, we
721 * cannot delete pxtcp without making sure there are no unprocessed
722 * messages in the lwip thread mailbox.
723 *
724 * The easiest way to ensure that is to send this "delete" message as
725 * the last one and when it's processed we know there are no more and
726 * it's safe to delete pxtcp.
727 *
728 * Poll manager handlers should use pxtcp_schedule_delete()
729 * convenience function.
730 */
731static void
732pxtcp_pcb_delete_pxtcp(void *ctx)
733{
734 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
735
736 DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
737 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
738 (pxtcp->deferred_delete && !pxtcp->inbound_pull
739 ? " (was deferred)" : "")));
740
741 LWIP_ASSERT1(pxtcp != NULL);
742 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
743 LWIP_ASSERT1(pxtcp->outbound_close_done);
744 LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
745
746
747 /*
748 * pxtcp is no longer registered with poll manager, so it's safe
749 * to close the socket.
750 */
751 if (pxtcp->sock != INVALID_SOCKET) {
752 closesocket(pxtcp->sock);
753 pxtcp->sock = INVALID_SOCKET;
754 }
755
756 /*
757 * We might have already dissociated from a fully closed pcb, or
758 * guest might have sent us a reset while msg_delete was in
759 * transit. If there's no pcb, we are done.
760 */
761 if (pxtcp->pcb == NULL) {
762 pollmgr_refptr_unref(pxtcp->rp);
763 pxtcp_free(pxtcp);
764 return;
765 }
766
767 /*
768 * Have we completely forwarded all inbound traffic to the guest?
769 *
770 * We may still be waiting for ACKs. We may have failed to send
771 * some of the data (tcp_write() failed with ERR_MEM). We may
772 * have failed to send the FIN (tcp_shutdown() failed with
773 * ERR_MEM).
774 */
775 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
776 pxtcp_pcb_dissociate(pxtcp);
777 pollmgr_refptr_unref(pxtcp->rp);
778 pxtcp_free(pxtcp);
779 }
780 else {
781 DPRINTF2(("delete: pxtcp %p; pcb %p:"
782 " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
783 (void *)pxtcp, (void *)pxtcp->pcb,
784 (int)pxtcp->inbuf.unacked,
785 (int)pxtcp->inbuf.unsent,
786 (int)pxtcp->inbuf.vacant,
787 pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
788
789 LWIP_ASSERT1(!pxtcp->deferred_delete);
790 pxtcp->deferred_delete = 1;
791 }
792}
793
794
795/**
796 * If we couldn't delete pxtcp right away in the msg_delete callback
797 * from the poll manager thread, we repeat the check at the end of
798 * relevant pcb callbacks.
799 */
800DECLINLINE(void)
801pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
802{
803 if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
804 pxtcp_pcb_delete_pxtcp(pxtcp);
805 }
806}
807
808
809/**
810 * Poll manager callbacks should use this convenience wrapper to
811 * schedule pxtcp deletion on the lwip thread and to deregister from
812 * the poll manager.
813 */
814static int
815pxtcp_schedule_delete(struct pxtcp *pxtcp)
816{
817 /*
818 * If pollmgr_refptr_get() is called by any channel before
819 * scheduled deletion happens, let them know we are gone.
820 */
821 pxtcp->pmhdl.slot = -1;
822
823 /*
824 * Schedule deletion. Since poll manager thread may be pre-empted
825 * right after we send the message, the deletion may actually
826 * happen on the lwip thread before we return from this function,
827 * so it's not safe to refer to pxtcp after this call.
828 */
829 proxy_lwip_post(&pxtcp->msg_delete);
830
831 /* tell poll manager to deregister us */
832 return -1;
833}
834
835
836/**
837 * Lwip thread callback invoked via pxtcp::msg_reset
838 *
839 * Like pxtcp_pcb_delete(), but sends RST to the guest before
840 * deleting this pxtcp.
841 */
842static void
843pxtcp_pcb_reset_pxtcp(void *ctx)
844{
845 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
846 LWIP_ASSERT1(pxtcp != NULL);
847
848 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
849 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
850
851 if (pxtcp->sock != INVALID_SOCKET) {
852 proxy_reset_socket(pxtcp->sock);
853 pxtcp->sock = INVALID_SOCKET;
854 }
855
856 if (pxtcp->pcb != NULL) {
857 struct tcp_pcb *pcb = pxtcp->pcb;
858 pxtcp_pcb_dissociate(pxtcp);
859 tcp_abort(pcb);
860 }
861
862 pollmgr_refptr_unref(pxtcp->rp);
863 pxtcp_free(pxtcp);
864}
865
866
867
868/**
869 * Poll manager callbacks should use this convenience wrapper to
870 * schedule pxtcp reset and deletion on the lwip thread and to
871 * deregister from the poll manager.
872 *
873 * See pxtcp_schedule_delete() for additional comments.
874 */
875static int
876pxtcp_schedule_reset(struct pxtcp *pxtcp)
877{
878 pxtcp->pmhdl.slot = -1;
879 proxy_lwip_post(&pxtcp->msg_reset);
880 return -1;
881}
882
883
884/**
885 * Reject proxy connection attempt. Depending on the cause (sockerr)
886 * we may just drop the pcb silently, generate an ICMP datagram or
887 * send TCP reset.
888 */
889static void
890pxtcp_pcb_reject(struct netif *netif, struct tcp_pcb *pcb,
891 struct pbuf *p, int sockerr)
892{
893 struct netif *oif;
894 int reset = 0;
895
896 oif = ip_current_netif();
897 ip_current_netif() = netif;
898
899 if (sockerr == ECONNREFUSED) {
900 reset = 1;
901 }
902 else if (PCB_ISIPV6(pcb)) {
903 if (sockerr == EHOSTDOWN) {
904 icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
905 }
906 else if (sockerr == EHOSTUNREACH
907 || sockerr == ENETDOWN
908 || sockerr == ENETUNREACH)
909 {
910 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
911 }
912 }
913 else {
914 if (sockerr == EHOSTDOWN
915 || sockerr == EHOSTUNREACH
916 || sockerr == ENETDOWN
917 || sockerr == ENETUNREACH)
918 {
919 icmp_dest_unreach(p, ICMP_DUR_HOST);
920 }
921 }
922
923 ip_current_netif() = oif;
924
925 tcp_abandon(pcb, reset);
926}
927
928
929/**
930 * Called from poll manager thread via pxtcp::msg_accept when proxy
931 * failed to connect to the destination. Also called when we failed
932 * to register pxtcp with poll manager.
933 *
934 * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
935 * how this unestablished connection is terminated.
936 */
937static void
938pxtcp_pcb_accept_refuse(void *ctx)
939{
940 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
941
942 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: %R[sockerr]\n",
943 __func__, (void *)pxtcp, (void *)pxtcp->pcb,
944 pxtcp->sock, pxtcp->sockerr));
945
946 LWIP_ASSERT1(pxtcp != NULL);
947 LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
948
949 if (pxtcp->pcb != NULL) {
950 struct tcp_pcb *pcb = pxtcp->pcb;
951 pxtcp_pcb_dissociate(pxtcp);
952 pxtcp_pcb_reject(pxtcp->netif, pcb, pxtcp->unsent, pxtcp->sockerr);
953 }
954
955 pollmgr_refptr_unref(pxtcp->rp);
956 pxtcp_free(pxtcp);
957}
958
959
960/**
961 * Convenience wrapper for poll manager connect callback to reject
962 * connection attempt.
963 *
964 * Like pxtcp_schedule_reset(), but the callback is more discriminate
965 * in how this unestablished connection is terminated.
966 */
967static int
968pxtcp_schedule_reject(struct pxtcp *pxtcp)
969{
970 pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
971 pxtcp->pmhdl.slot = -1;
972 proxy_lwip_post(&pxtcp->msg_accept);
973 return -1;
974}
975
976
977/**
978 * Global tcp_proxy_accept() callback for proxied outgoing TCP
979 * connections from guest(s).
980 */
981static err_t
982pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, err_t error)
983{
984 struct pbuf *p = (struct pbuf *)arg;
985 struct pxtcp *pxtcp;
986 ipX_addr_t dst_addr;
987 int sdom;
988 SOCKET sock;
989 ssize_t nsent;
990 int sockerr = 0;
991
992 LWIP_UNUSED_ARG(error); /* always ERR_OK */
993
994 /*
995 * TCP first calls accept callback when it receives the first SYN
996 * and "tentatively accepts" new proxied connection attempt. When
997 * proxy "confirms" the SYN and sends SYN|ACK and the guest
998 * replies with ACK the accept callback is called again, this time
999 * with the established connection.
1000 */
1001 LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
1002 tcp_accept(newpcb, pxtcp_pcb_accept);
1003 tcp_arg(newpcb, NULL);
1004
1005 tcp_setprio(newpcb, TCP_PRIO_MAX);
1006
1007 pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip);
1008
1009 sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET;
1010 sock = proxy_connected_socket(sdom, SOCK_STREAM,
1011 &dst_addr, newpcb->local_port);
1012 if (sock == INVALID_SOCKET) {
1013 sockerr = SOCKERRNO();
1014 goto abort;
1015 }
1016
1017 pxtcp = pxtcp_allocate();
1018 if (pxtcp == NULL) {
1019 proxy_reset_socket(sock);
1020 goto abort;
1021 }
1022
1023 /* save initial datagram in case we need to reply with ICMP */
1024 pbuf_ref(p);
1025 pxtcp->unsent = p;
1026 pxtcp->netif = ip_current_netif();
1027
1028 pxtcp_pcb_associate(pxtcp, newpcb);
1029 pxtcp->sock = sock;
1030
1031 pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
1032 pxtcp->events = POLLOUT;
1033
1034 nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
1035 if (nsent < 0) {
1036 pxtcp->sock = INVALID_SOCKET;
1037 proxy_reset_socket(sock);
1038 pxtcp_pcb_accept_refuse(pxtcp);
1039 return ERR_ABRT;
1040 }
1041
1042 return ERR_OK;
1043
1044 abort:
1045 DPRINTF0(("%s: pcb %p, sock %d: %R[sockerr]\n",
1046 __func__, (void *)newpcb, sock, sockerr));
1047 pxtcp_pcb_reject(ip_current_netif(), newpcb, p, sockerr);
1048 return ERR_ABRT;
1049}
1050
1051
1052/**
1053 * tcp_proxy_accept() callback for accepted proxied outgoing TCP
1054 * connections from guest(s). This is "real" accept with three-way
1055 * handshake completed.
1056 */
1057static err_t
1058pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
1059{
1060 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1061
1062 LWIP_UNUSED_ARG(pcb); /* used only in asserts */
1063 LWIP_UNUSED_ARG(error); /* always ERR_OK */
1064
1065 LWIP_ASSERT1(pxtcp != NULL);
1066 LWIP_ASSERT1(pxtcp->pcb = pcb);
1067 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1068
1069 /* send any inbound data that are already queued */
1070 pxtcp_pcb_forward_inbound(pxtcp);
1071 return ERR_OK;
1072}
1073
1074
1075/**
1076 * Initial poll manager callback for proxied outgoing TCP connections.
1077 * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
1078 *
1079 * Waits for connect(2) to the destination to complete. On success
1080 * replaces itself with pxtcp_pmgr_pump() callback common to all
1081 * established TCP connections.
1082 */
1083static int
1084pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
1085{
1086 struct pxtcp *pxtcp;
1087
1088 pxtcp = (struct pxtcp *)handler->data;
1089 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1090 LWIP_ASSERT1(fd == pxtcp->sock);
1091 LWIP_ASSERT1(pxtcp->sockerr == 0);
1092
1093 if (revents & POLLNVAL) {
1094 pxtcp->sock = INVALID_SOCKET;
1095 pxtcp->sockerr = ETIMEDOUT;
1096 return pxtcp_schedule_reject(pxtcp);
1097 }
1098
1099 /*
1100 * Solaris and NetBSD don't report either POLLERR or POLLHUP when
1101 * connect(2) fails, just POLLOUT. In that case we always need to
1102 * check SO_ERROR.
1103 */
1104#if defined(RT_OS_SOLARIS) || defined(RT_OS_NETBSD)
1105# define CONNECT_CHECK_ERROR POLLOUT
1106#else
1107# define CONNECT_CHECK_ERROR (POLLERR | POLLHUP)
1108#endif
1109
1110 /*
1111 * Check the cause of the failure so that pxtcp_pcb_reject() may
1112 * behave accordingly.
1113 */
1114 if (revents & CONNECT_CHECK_ERROR) {
1115 socklen_t optlen = (socklen_t)sizeof(pxtcp->sockerr);
1116 int status;
1117 SOCKET s;
1118
1119 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1120 (char *)&pxtcp->sockerr, &optlen);
1121 if (RT_UNLIKELY(status == SOCKET_ERROR)) { /* should not happen */
1122 DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
1123 __func__, fd, SOCKERRNO()));
1124 pxtcp->sockerr = ETIMEDOUT;
1125 }
1126 else {
1127 /* don't spam this log on successful connect(2) */
1128 if ((revents & (POLLERR | POLLHUP)) /* we were told it's failed */
1129 || pxtcp->sockerr != 0) /* we determined it's failed */
1130 {
1131 DPRINTF(("%s: sock %d: connect: %R[sockerr]\n",
1132 __func__, fd, pxtcp->sockerr));
1133 }
1134
1135 if ((revents & (POLLERR | POLLHUP))
1136 && RT_UNLIKELY(pxtcp->sockerr == 0))
1137 {
1138 /* if we're told it's failed, make sure it's marked as such */
1139 pxtcp->sockerr = ETIMEDOUT;
1140 }
1141 }
1142
1143 if (pxtcp->sockerr != 0) {
1144 s = pxtcp->sock;
1145 pxtcp->sock = INVALID_SOCKET;
1146 closesocket(s);
1147 return pxtcp_schedule_reject(pxtcp);
1148 }
1149 }
1150
1151 if (revents & POLLOUT) { /* connect is successful */
1152 /* confirm accept to the guest */
1153 proxy_lwip_post(&pxtcp->msg_accept);
1154
1155 /*
1156 * Switch to common callback used for all established proxied
1157 * connections.
1158 */
1159 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
1160
1161 /*
1162 * Initially we poll for incoming traffic only. Outgoing
1163 * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
1164 * it will ask us to poll for POLLOUT too.
1165 */
1166 pxtcp->events = POLLIN;
1167 return pxtcp->events;
1168 }
1169
1170 /* should never get here */
1171 DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
1172 __func__, (void *)pxtcp, fd, revents));
1173 return pxtcp_schedule_reset(pxtcp);
1174}
1175
1176
1177/**
1178 * Called from poll manager thread via pxtcp::msg_accept when proxy
1179 * connected to the destination. Finalize accept by sending SYN|ACK
1180 * to the guest.
1181 */
1182static void
1183pxtcp_pcb_accept_confirm(void *ctx)
1184{
1185 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1186 err_t error;
1187
1188 LWIP_ASSERT1(pxtcp != NULL);
1189 if (pxtcp->pcb == NULL) {
1190 return;
1191 }
1192
1193 /* we are not going to reply with ICMP, so we can drop initial pbuf */
1194 LWIP_ASSERT1(pxtcp->unsent != NULL);
1195 pbuf_free(pxtcp->unsent);
1196 pxtcp->unsent = NULL;
1197
1198 error = tcp_proxy_accept_confirm(pxtcp->pcb);
1199
1200 /*
1201 * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
1202 * abandons the pcb. Retrying that is not very easy, since it
1203 * would require keeping "fractional state". From guest's point
1204 * of view there is no reply to its SYN so it will either resend
1205 * the SYN (effetively triggering full connection retry for us),
1206 * or it will eventually time out.
1207 */
1208 if (error == ERR_ABRT) {
1209 pxtcp->pcb = NULL; /* pcb is gone */
1210 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1211 }
1212
1213 /*
1214 * else if (error != ERR_OK): even if tcp_output() failed with
1215 * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
1216 * retransmitted eventually.
1217 */
1218}
1219
1220
1221/**
1222 * Entry point for port-forwarding.
1223 *
1224 * fwtcp accepts new incoming connection, creates pxtcp for the socket
1225 * (with no pcb yet) and adds it to the poll manager (polling for
1226 * errors only). Then it calls this function to construct the pcb and
1227 * perform connection to the guest.
1228 */
1229void
1230pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
1231{
1232 struct sockaddr_storage ss;
1233 socklen_t sslen;
1234 struct tcp_pcb *pcb;
1235 ipX_addr_t src_addr, dst_addr;
1236 u16_t src_port, dst_port;
1237 int status;
1238 err_t error;
1239
1240 LWIP_ASSERT1(pxtcp != NULL);
1241 LWIP_ASSERT1(pxtcp->pcb == NULL);
1242 LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
1243
1244 pcb = tcp_new();
1245 if (pcb == NULL) {
1246 goto reset;
1247 }
1248
1249 tcp_setprio(pcb, TCP_PRIO_MAX);
1250 pxtcp_pcb_associate(pxtcp, pcb);
1251
1252 sslen = sizeof(ss);
1253 status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
1254 if (status == SOCKET_ERROR) {
1255 goto reset;
1256 }
1257
1258 /* nit: comapres PF and AF, but they are the same everywhere */
1259 LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
1260
1261 status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
1262 if (status == PXREMAP_FAILED) {
1263 goto reset;
1264 }
1265
1266 if (ss.ss_family == PF_INET) {
1267 const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
1268
1269 src_port = peer4->sin_port;
1270
1271 memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
1272 dst_port = fwspec->dst.sin.sin_port;
1273 }
1274 else { /* PF_INET6 */
1275 const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
1276 ip_set_v6(pcb, 1);
1277
1278 src_port = peer6->sin6_port;
1279
1280 memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
1281 dst_port = fwspec->dst.sin6.sin6_port;
1282 }
1283
1284 /* lwip port arguments are in host order */
1285 src_port = ntohs(src_port);
1286 dst_port = ntohs(dst_port);
1287
1288 error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
1289 if (error != ERR_OK) {
1290 goto reset;
1291 }
1292
1293 error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
1294 /* callback: */ pxtcp_pcb_connected);
1295 if (error != ERR_OK) {
1296 goto reset;
1297 }
1298
1299 return;
1300
1301 reset:
1302 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1303}
1304
1305
1306/**
1307 * Port-forwarded connection to guest is successful, pump data.
1308 */
1309static err_t
1310pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
1311{
1312 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1313
1314 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1315 LWIP_UNUSED_ARG(error);
1316
1317 LWIP_ASSERT1(pxtcp != NULL);
1318 LWIP_ASSERT1(pxtcp->pcb == pcb);
1319 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1320 LWIP_UNUSED_ARG(pcb);
1321
1322 DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
1323 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
1324
1325 /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
1326 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
1327
1328 return ERR_OK;
1329}
1330
1331
1332/**
1333 * tcp_recv() callback.
1334 */
1335static err_t
1336pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
1337{
1338 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1339
1340 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1341 LWIP_UNUSED_ARG(error);
1342
1343 LWIP_ASSERT1(pxtcp != NULL);
1344 LWIP_ASSERT1(pxtcp->pcb == pcb);
1345 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1346 LWIP_UNUSED_ARG(pcb);
1347
1348
1349 /*
1350 * Have we done sending previous batch?
1351 */
1352 if (pxtcp->unsent != NULL) {
1353 if (p != NULL) {
1354 /*
1355 * Return an error to tell TCP to hold onto that pbuf.
1356 * It will be presented to us later from tcp_fasttmr().
1357 */
1358 return ERR_WOULDBLOCK;
1359 }
1360 else {
1361 /*
1362 * Unlike data, p == NULL indicating orderly shutdown is
1363 * NOT presented to us again
1364 */
1365 pxtcp->outbound_close = 1;
1366 return ERR_OK;
1367 }
1368 }
1369
1370
1371 /*
1372 * Guest closed?
1373 */
1374 if (p == NULL) {
1375 pxtcp->outbound_close = 1;
1376 pxtcp_pcb_forward_outbound_close(pxtcp);
1377 return ERR_OK;
1378 }
1379
1380
1381 /*
1382 * Got data, send what we can without blocking.
1383 */
1384 return pxtcp_pcb_forward_outbound(pxtcp, p);
1385}
1386
1387
1388/**
1389 * Guest half-closed its TX side of the connection.
1390 *
1391 * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
1392 * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
1393 * previously unsent data and sees pxtcp::outbound_close flag saved by
1394 * pxtcp_pcb_recv().
1395 */
1396static void
1397pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
1398{
1399 struct tcp_pcb *pcb;
1400
1401 LWIP_ASSERT1(pxtcp != NULL);
1402 LWIP_ASSERT1(pxtcp->outbound_close);
1403 LWIP_ASSERT1(!pxtcp->outbound_close_done);
1404
1405 pcb = pxtcp->pcb;
1406 LWIP_ASSERT1(pcb != NULL);
1407
1408 DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
1409 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1410
1411
1412 /* set the flag first, since shutdown() may trigger POLLHUP */
1413 pxtcp->outbound_close_done = 1;
1414 shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
1415
1416#if !(HAVE_TCP_POLLHUP & POLLOUT)
1417 /*
1418 * We need to nudge poll manager manually, since OS will not
1419 * report POLLHUP.
1420 */
1421 if (pxtcp->inbound_close) {
1422 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
1423 }
1424#endif
1425
1426
1427 /* no more outbound data coming to us */
1428 tcp_recv(pcb, NULL);
1429
1430 /*
1431 * If we have already done inbound close previously (active close
1432 * on the pcb), then we must not hold onto a pcb in TIME_WAIT
1433 * state since those will be recycled by lwip when it runs out of
1434 * free pcbs in the pool.
1435 *
1436 * The test is true also for a pcb in CLOSING state that waits
1437 * just for the ACK of its FIN (to transition to TIME_WAIT).
1438 */
1439 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
1440 pxtcp_pcb_dissociate(pxtcp);
1441 }
1442}
1443
1444
1445/**
1446 * Forward outbound data from pcb to socket.
1447 *
1448 * Called by pxtcp_pcb_recv() to forward new data and by callout
1449 * triggered by POLLOUT on the socket to send previously unsent data.
1450 *
1451 * (Re)scehdules one-time callout if not all data are sent.
1452 */
1453static err_t
1454pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
1455{
1456 struct pbuf *qs, *q;
1457 size_t qoff;
1458 size_t forwarded;
1459 int sockerr;
1460
1461 LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
1462
1463 forwarded = 0;
1464 sockerr = 0;
1465
1466 q = NULL;
1467 qoff = 0;
1468
1469 qs = p;
1470 while (qs != NULL) {
1471 IOVEC iov[8];
1472 const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
1473 size_t fwd1;
1474 ssize_t nsent;
1475 size_t i;
1476
1477 fwd1 = 0;
1478 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1479 LWIP_ASSERT1(q->len > 0);
1480 IOVEC_SET_BASE(iov[i], q->payload);
1481 IOVEC_SET_LEN(iov[i], q->len);
1482 fwd1 += q->len;
1483 }
1484
1485 /*
1486 * TODO: This is where application-level proxy can hook into
1487 * to process outbound traffic.
1488 */
1489 nsent = pxtcp_sock_send(pxtcp, iov, i);
1490
1491 if (nsent == (ssize_t)fwd1) {
1492 /* successfully sent this chain fragment completely */
1493 forwarded += nsent;
1494 qs = q;
1495 }
1496 else if (nsent >= 0) {
1497 /* successfully sent only some data */
1498 forwarded += nsent;
1499
1500 /* find the first pbuf that was not completely forwarded */
1501 qoff = nsent;
1502 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1503 if (qoff < q->len) {
1504 break;
1505 }
1506 qoff -= q->len;
1507 }
1508 LWIP_ASSERT1(q != NULL);
1509 LWIP_ASSERT1(qoff < q->len);
1510 break;
1511 }
1512 else {
1513 sockerr = -nsent;
1514
1515 /*
1516 * Some errors are really not errors - if we get them,
1517 * it's not different from getting nsent == 0, so filter
1518 * them out here.
1519 */
1520 if (proxy_error_is_transient(sockerr)) {
1521 sockerr = 0;
1522 }
1523 q = qs;
1524 qoff = 0;
1525 break;
1526 }
1527 }
1528
1529 if (forwarded > 0) {
1530 tcp_recved(pxtcp->pcb, (u16_t)forwarded);
1531 }
1532
1533 if (q == NULL) { /* everything is forwarded? */
1534 LWIP_ASSERT1(sockerr == 0);
1535 LWIP_ASSERT1(forwarded == p->tot_len);
1536
1537 pxtcp->unsent = NULL;
1538 pbuf_free(p);
1539 if (pxtcp->outbound_close) {
1540 pxtcp_pcb_forward_outbound_close(pxtcp);
1541 }
1542 }
1543 else {
1544 if (q != p) {
1545 /* free forwarded pbufs at the beginning of the chain */
1546 pbuf_ref(q);
1547 pbuf_free(p);
1548 }
1549 if (qoff > 0) {
1550 /* advance payload pointer past the forwarded part */
1551 pbuf_header(q, -(s16_t)qoff);
1552 }
1553 pxtcp->unsent = q;
1554
1555 /*
1556 * Have sendmsg() failed?
1557 *
1558 * Connection reset will be detected by poll and
1559 * pxtcp_schedule_reset() will be called.
1560 *
1561 * Otherwise something *really* unexpected must have happened,
1562 * so we'd better abort.
1563 */
1564 if (sockerr != 0 && sockerr != ECONNRESET) {
1565 struct tcp_pcb *pcb = pxtcp->pcb;
1566 pxtcp_pcb_dissociate(pxtcp);
1567
1568 tcp_abort(pcb);
1569
1570 /* call error callback manually since we've already dissociated */
1571 pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
1572 return ERR_ABRT;
1573 }
1574
1575 /* schedule one-shot POLLOUT on the socket */
1576 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
1577 }
1578 return ERR_OK;
1579}
1580
1581
1582#if !defined(RT_OS_WINDOWS)
1583static ssize_t
1584pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1585{
1586 struct msghdr mh;
1587 ssize_t nsent;
1588
1589#ifdef MSG_NOSIGNAL
1590 const int send_flags = MSG_NOSIGNAL;
1591#else
1592 const int send_flags = 0;
1593#endif
1594
1595 memset(&mh, 0, sizeof(mh));
1596
1597 mh.msg_iov = iov;
1598 mh.msg_iovlen = iovlen;
1599
1600 nsent = sendmsg(pxtcp->sock, &mh, send_flags);
1601 if (nsent < 0) {
1602 nsent = -SOCKERRNO();
1603 }
1604
1605 return nsent;
1606}
1607#else /* RT_OS_WINDOWS */
1608static ssize_t
1609pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1610{
1611 DWORD nsent;
1612 int status;
1613
1614 status = WSASend(pxtcp->sock, iov, (DWORD)iovlen, &nsent,
1615 0, NULL, NULL);
1616 if (status == SOCKET_ERROR) {
1617 return -SOCKERRNO();
1618 }
1619
1620 return nsent;
1621}
1622#endif /* RT_OS_WINDOWS */
1623
1624
1625/**
1626 * Callback from poll manager (on POLLOUT) to send data from
1627 * pxtcp::unsent pbuf to socket.
1628 */
1629static void
1630pxtcp_pcb_write_outbound(void *ctx)
1631{
1632 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1633 LWIP_ASSERT1(pxtcp != NULL);
1634
1635 if (pxtcp->pcb == NULL) {
1636 return;
1637 }
1638
1639 pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
1640}
1641
1642
1643/**
1644 * Common poll manager callback used by both outgoing and incoming
1645 * (port-forwarded) connections that has connected socket.
1646 */
1647static int
1648pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1649{
1650 struct pxtcp *pxtcp;
1651 int status;
1652 int sockerr;
1653
1654 pxtcp = (struct pxtcp *)handler->data;
1655 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1656 LWIP_ASSERT1(fd == pxtcp->sock);
1657
1658 if (revents & POLLNVAL) {
1659 pxtcp->sock = INVALID_SOCKET;
1660 return pxtcp_schedule_reset(pxtcp);
1661 }
1662
1663 if (revents & POLLERR) {
1664 socklen_t optlen = (socklen_t)sizeof(sockerr);
1665
1666 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1667 (char *)&sockerr, &optlen);
1668 if (status == SOCKET_ERROR) { /* should not happen */
1669 DPRINTF(("sock %d: SO_ERROR failed: %R[sockerr]\n",
1670 fd, SOCKERRNO()));
1671 }
1672 else {
1673 DPRINTF0(("sock %d: %R[sockerr]\n", fd, sockerr));
1674 }
1675 return pxtcp_schedule_reset(pxtcp);
1676 }
1677
1678 if (revents & POLLOUT) {
1679 pxtcp->events &= ~POLLOUT;
1680 proxy_lwip_post(&pxtcp->msg_outbound);
1681 }
1682
1683 if (revents & POLLIN) {
1684 ssize_t nread;
1685 int stop_pollin;
1686
1687 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
1688 if (nread < 0) {
1689 sockerr = -(int)nread;
1690 DPRINTF0(("sock %d: %R[sockerr]\n", fd, sockerr));
1691 return pxtcp_schedule_reset(pxtcp);
1692 }
1693
1694 if (stop_pollin) {
1695 pxtcp->events &= ~POLLIN;
1696 }
1697
1698 if (nread > 0) {
1699 proxy_lwip_post(&pxtcp->msg_inbound);
1700#if !HAVE_TCP_POLLHUP
1701 /*
1702 * If host does not report POLLHUP for closed sockets
1703 * (e.g. NetBSD) we should check for full close manually.
1704 */
1705 if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
1706 LWIP_ASSERT1((revents & POLLHUP) == 0);
1707 return pxtcp_schedule_delete(pxtcp);
1708 }
1709#endif
1710 }
1711 }
1712
1713#if !HAVE_TCP_POLLHUP
1714 LWIP_ASSERT1((revents & POLLHUP) == 0);
1715#else
1716 if (revents & POLLHUP) {
1717 DPRINTF(("sock %d: HUP\n", fd));
1718#if HAVE_TCP_POLLHUP == POLLIN
1719 /*
1720 * Remote closed inbound.
1721 */
1722 if (!pxtcp->outbound_close_done) {
1723 /*
1724 * We might still need to poll for POLLOUT, but we can not
1725 * poll for POLLIN anymore (even if not all data are read)
1726 * because we will be spammed by POLLHUP.
1727 */
1728 pxtcp->events &= ~POLLIN;
1729 if (!pxtcp->inbound_close) {
1730 /* the rest of the input has to be pulled */
1731 proxy_lwip_post(&pxtcp->msg_inpull);
1732 }
1733 }
1734 else
1735#endif
1736 /*
1737 * Both directions are closed.
1738 */
1739 {
1740 LWIP_ASSERT1(pxtcp->outbound_close_done);
1741
1742 if (pxtcp->inbound_close) {
1743 /* there's no unread data, we are done */
1744 return pxtcp_schedule_delete(pxtcp);
1745 }
1746 else {
1747 /* pull the rest of the input first (deferred_delete) */
1748 pxtcp->pmhdl.slot = -1;
1749 proxy_lwip_post(&pxtcp->msg_inpull);
1750 return -1;
1751 }
1752 /* NOTREACHED */
1753 }
1754
1755 }
1756#endif /* HAVE_TCP_POLLHUP */
1757
1758 return pxtcp->events;
1759}
1760
1761
1762/**
1763 * Read data from socket to ringbuf. This may be used both on lwip
1764 * and poll manager threads.
1765 *
1766 * Flag pointed to by pstop is set when further reading is impossible,
1767 * either temporary when buffer is full, or permanently when EOF is
1768 * received.
1769 *
1770 * Returns number of bytes read. NB: EOF is reported as 1!
1771 *
1772 * Returns zero if nothing was read, either because buffer is full, or
1773 * if no data is available (EWOULDBLOCK, EINTR &c).
1774 *
1775 * Returns -errno on real socket errors.
1776 */
1777static ssize_t
1778pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
1779{
1780 IOVEC iov[2];
1781 size_t iovlen;
1782 ssize_t nread;
1783
1784 const size_t sz = pxtcp->inbuf.bufsize;
1785 size_t beg, lim, wrnew;
1786
1787 *pstop = 0;
1788
1789 beg = pxtcp->inbuf.vacant;
1790 IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
1791
1792 /* lim is the index we can NOT write to */
1793 lim = pxtcp->inbuf.unacked;
1794 if (lim == 0) {
1795 lim = sz - 1; /* empty slot at the end */
1796 }
1797 else if (lim == 1 && beg != 0) {
1798 lim = sz; /* empty slot at the beginning */
1799 }
1800 else {
1801 --lim;
1802 }
1803
1804 if (beg == lim) {
1805 /*
1806 * Buffer is full, stop polling for POLLIN.
1807 *
1808 * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
1809 * data, freeing space in the ring buffer.
1810 */
1811 *pstop = 1;
1812 return 0;
1813 }
1814
1815 if (beg < lim) {
1816 /* free space in one chunk */
1817 iovlen = 1;
1818 IOVEC_SET_LEN(iov[0], lim - beg);
1819 }
1820 else {
1821 /* free space in two chunks */
1822 iovlen = 2;
1823 IOVEC_SET_LEN(iov[0], sz - beg);
1824 IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
1825 IOVEC_SET_LEN(iov[1], lim);
1826 }
1827
1828 /*
1829 * TODO: This is where application-level proxy can hook into to
1830 * process inbound traffic.
1831 */
1832 nread = pxtcp_sock_recv(pxtcp, iov, iovlen);
1833
1834 if (nread > 0) {
1835 wrnew = beg + nread;
1836 if (wrnew >= sz) {
1837 wrnew -= sz;
1838 }
1839 pxtcp->inbuf.vacant = wrnew;
1840 DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
1841 (void *)pxtcp, pxtcp->sock, (int)nread));
1842 return nread;
1843 }
1844 else if (nread == 0) {
1845 *pstop = 1;
1846 pxtcp->inbound_close = 1;
1847 DPRINTF2(("pxtcp %p: sock %d read EOF\n",
1848 (void *)pxtcp, pxtcp->sock));
1849 return 1;
1850 }
1851 else {
1852 int sockerr = -nread;
1853
1854 if (proxy_error_is_transient(sockerr)) {
1855 /* haven't read anything, just return */
1856 DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
1857 (void *)pxtcp, pxtcp->sock));
1858 return 0;
1859 }
1860 else {
1861 /* socket error! */
1862 DPRINTF0(("pxtcp %p: sock %d read: %R[sockerr]\n",
1863 (void *)pxtcp, pxtcp->sock, sockerr));
1864 return -sockerr;
1865 }
1866 }
1867}
1868
1869
1870#if !defined(RT_OS_WINDOWS)
1871static ssize_t
1872pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1873{
1874 struct msghdr mh;
1875 ssize_t nread;
1876
1877 memset(&mh, 0, sizeof(mh));
1878
1879 mh.msg_iov = iov;
1880 mh.msg_iovlen = iovlen;
1881
1882 nread = recvmsg(pxtcp->sock, &mh, 0);
1883 if (nread < 0) {
1884 nread = -SOCKERRNO();
1885 }
1886
1887 return nread;
1888}
1889#else /* RT_OS_WINDOWS */
1890static ssize_t
1891pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1892{
1893 DWORD flags;
1894 DWORD nread;
1895 int status;
1896
1897 flags = 0;
1898 status = WSARecv(pxtcp->sock, iov, (DWORD)iovlen, &nread,
1899 &flags, NULL, NULL);
1900 if (status == SOCKET_ERROR) {
1901 return -SOCKERRNO();
1902 }
1903
1904 return (ssize_t)nread;
1905}
1906#endif /* RT_OS_WINDOWS */
1907
1908
1909/**
1910 * Callback from poll manager (pxtcp::msg_inbound) to trigger output
1911 * from ringbuf to guest.
1912 */
1913static void
1914pxtcp_pcb_write_inbound(void *ctx)
1915{
1916 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1917 LWIP_ASSERT1(pxtcp != NULL);
1918
1919 if (pxtcp->pcb == NULL) {
1920 return;
1921 }
1922
1923 pxtcp_pcb_forward_inbound(pxtcp);
1924}
1925
1926
1927/**
1928 * tcp_poll() callback
1929 *
1930 * We swtich it on when tcp_write() or tcp_shutdown() fail with
1931 * ERR_MEM to prevent connection from stalling. If there are ACKs or
1932 * more inbound data then pxtcp_pcb_forward_inbound() will be
1933 * triggered again, but if neither happens, tcp_poll() comes to the
1934 * rescue.
1935 */
1936static err_t
1937pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
1938{
1939 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1940 LWIP_UNUSED_ARG(pcb);
1941
1942 DPRINTF2(("%s: pxtcp %p; pcb %p\n",
1943 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
1944
1945 pxtcp_pcb_forward_inbound(pxtcp);
1946
1947 /*
1948 * If the last thing holding up deletion of the pxtcp was failed
1949 * tcp_shutdown() and it succeeded, we may be the last callback.
1950 */
1951 pxtcp_pcb_maybe_deferred_delete(pxtcp);
1952
1953 return ERR_OK;
1954}
1955
1956
1957static void
1958pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
1959{
1960 tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
1961}
1962
1963
1964static void
1965pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
1966{
1967 tcp_poll(pxtcp->pcb, NULL, 255);
1968}
1969
1970
1971/**
1972 * Forward inbound data from ring buffer to the guest.
1973 *
1974 * Scheduled by poll manager thread after it receives more data into
1975 * the ring buffer (we have more data to send).
1976
1977 * Also called from tcp_sent() callback when guest ACKs some data,
1978 * increasing pcb->snd_buf (we are permitted to send more data).
1979 *
1980 * Also called from tcp_poll() callback if previous attempt to forward
1981 * inbound data failed with ERR_MEM (we need to try again).
1982 */
1983static void
1984pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
1985{
1986 struct tcp_pcb *pcb;
1987 size_t sndbuf;
1988 size_t beg, lim, sndlim;
1989 size_t toeob, tolim;
1990 size_t nsent;
1991 err_t error;
1992
1993 LWIP_ASSERT1(pxtcp != NULL);
1994 pcb = pxtcp->pcb;
1995 if (pcb == NULL) {
1996 return;
1997 }
1998
1999 if (/* __predict_false */ pcb->state < ESTABLISHED) {
2000 /*
2001 * If we have just confirmed accept of this connection, the
2002 * pcb is in SYN_RCVD state and we still haven't received the
2003 * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
2004 * transition that lwip decrements pcb->acked so that that ACK
2005 * is not reported to pxtcp_pcb_sent(). If we send something
2006 * now and immediately close (think "daytime", e.g.) while
2007 * still in SYN_RCVD state, we will move directly to
2008 * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
2009 * report it to pxtcp_pcb_sent().
2010 */
2011 DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
2012 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2013 return;
2014 }
2015
2016
2017 beg = pxtcp->inbuf.unsent; /* private to lwip thread */
2018 lim = pxtcp->inbuf.vacant;
2019
2020 if (beg == lim) {
2021 if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
2022 pxtcp_pcb_forward_inbound_close(pxtcp);
2023 tcp_output(pcb);
2024 return;
2025 }
2026
2027 /*
2028 * Else, there's no data to send.
2029 *
2030 * If there is free space in the buffer, producer will
2031 * reschedule us as it receives more data and vacant (lim)
2032 * advances.
2033 *
2034 * If buffer is full when all data have been passed to
2035 * tcp_write() but not yet acknowledged, we will advance
2036 * unacked on ACK, freeing some space for producer to write to
2037 * (then see above).
2038 */
2039 return;
2040 }
2041
2042 sndbuf = tcp_sndbuf(pcb);
2043 if (sndbuf == 0) {
2044 /*
2045 * Can't send anything now. As guest ACKs some data, TCP will
2046 * call pxtcp_pcb_sent() callback and we will come here again.
2047 */
2048 return;
2049 }
2050
2051 nsent = 0;
2052
2053 /*
2054 * We have three limits to consider:
2055 * - how much data we have in the ringbuf
2056 * - how much data we are allowed to send
2057 * - ringbuf size
2058 */
2059 toeob = pxtcp->inbuf.bufsize - beg;
2060 if (lim < beg) { /* lim wrapped */
2061 if (sndbuf < toeob) { /* but we are limited by sndbuf */
2062 /* so beg is not going to wrap, treat sndbuf as lim */
2063 lim = beg + sndbuf; /* ... and proceed to the simple case */
2064 }
2065 else { /* we are limited by the end of the buffer, beg will wrap */
2066 u8_t maybemore;
2067 if (toeob == sndbuf || lim == 0) {
2068 maybemore = 0;
2069 }
2070 else {
2071 maybemore = TCP_WRITE_FLAG_MORE;
2072 }
2073
2074 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], toeob, maybemore);
2075 if (error != ERR_OK) {
2076 goto writeerr;
2077 }
2078 nsent += toeob;
2079 pxtcp->inbuf.unsent = 0; /* wrap */
2080
2081 if (maybemore) {
2082 beg = 0;
2083 sndbuf -= toeob;
2084 }
2085 else {
2086 /* we are done sending, but ... */
2087 goto check_inbound_close;
2088 }
2089 }
2090 }
2091
2092 LWIP_ASSERT1(beg < lim);
2093 sndlim = beg + sndbuf;
2094 if (lim > sndlim) {
2095 lim = sndlim;
2096 }
2097 tolim = lim - beg;
2098 if (tolim > 0) {
2099 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
2100 if (error != ERR_OK) {
2101 goto writeerr;
2102 }
2103 nsent += tolim;
2104 pxtcp->inbuf.unsent = lim;
2105 }
2106
2107 check_inbound_close:
2108 if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
2109 pxtcp_pcb_forward_inbound_close(pxtcp);
2110 }
2111
2112 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
2113 (void *)pxtcp, (void *)pcb, (int)nsent));
2114 tcp_output(pcb);
2115 pxtcp_pcb_cancel_poll(pxtcp);
2116 return;
2117
2118 writeerr:
2119 if (error == ERR_MEM) {
2120 if (nsent > 0) { /* first write succeeded, second failed */
2121 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
2122 (void *)pxtcp, (void *)pcb, (int)nsent));
2123 tcp_output(pcb);
2124 }
2125 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
2126 (void *)pxtcp, (void *)pcb));
2127 pxtcp_pcb_schedule_poll(pxtcp);
2128 }
2129 else {
2130 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
2131 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2132
2133 /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
2134 LWIP_ASSERT1(error == ERR_MEM);
2135 }
2136}
2137
2138
2139static void
2140pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
2141{
2142 struct tcp_pcb *pcb;
2143 err_t error;
2144
2145 LWIP_ASSERT1(pxtcp != NULL);
2146 LWIP_ASSERT1(pxtcp->inbound_close);
2147 LWIP_ASSERT1(!pxtcp->inbound_close_done);
2148 LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
2149
2150 pcb = pxtcp->pcb;
2151 LWIP_ASSERT1(pcb != NULL);
2152
2153 DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
2154 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2155
2156 error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
2157 if (error != ERR_OK) {
2158 DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
2159 " tcp_shutdown: error=%s\n",
2160 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2161 pxtcp_pcb_schedule_poll(pxtcp);
2162 return;
2163 }
2164
2165 pxtcp_pcb_cancel_poll(pxtcp);
2166 pxtcp->inbound_close_done = 1;
2167
2168
2169 /*
2170 * If we have already done outbound close previously (passive
2171 * close on the pcb), then we must not hold onto a pcb in LAST_ACK
2172 * state since those will be deleted by lwip when that last ack
2173 * comes from the guest.
2174 *
2175 * NB: We do NOT check for deferred delete here, even though we
2176 * have just set one of its conditions, inbound_close_done. We
2177 * let pcb callbacks that called us do that. It's simpler and
2178 * cleaner that way.
2179 */
2180 if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
2181 pxtcp_pcb_dissociate(pxtcp);
2182 }
2183}
2184
2185
2186/**
2187 * Check that all forwarded inbound data is sent and acked, and that
2188 * inbound close is scheduled (we aren't called back when it's acked).
2189 */
2190DECLINLINE(int)
2191pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
2192{
2193 return (pxtcp->inbound_close_done /* also implies that all data forwarded */
2194 && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
2195}
2196
2197
2198/**
2199 * tcp_sent() callback - guest acknowledged len bytes.
2200 *
2201 * We can advance inbuf::unacked index, making more free space in the
2202 * ringbuf and wake up producer on poll manager thread.
2203 *
2204 * We can also try to send more data if we have any since pcb->snd_buf
2205 * was increased and we are now permitted to send more.
2206 */
2207static err_t
2208pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
2209{
2210 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2211 size_t unacked;
2212
2213 LWIP_ASSERT1(pxtcp != NULL);
2214 LWIP_ASSERT1(pxtcp->pcb == pcb);
2215 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2216 LWIP_UNUSED_ARG(pcb); /* only in assert */
2217
2218 DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
2219 " unacked %d, unsent %d, vacant %d\n",
2220 __func__, (void *)pxtcp, (void *)pcb, (int)len,
2221 (int)pxtcp->inbuf.unacked,
2222 (int)pxtcp->inbuf.unsent,
2223 (int)pxtcp->inbuf.vacant));
2224
2225 if (/* __predict_false */ len == 0) {
2226 /* we are notified to start pulling */
2227 LWIP_ASSERT1(!pxtcp->inbound_close);
2228 LWIP_ASSERT1(pxtcp->inbound_pull);
2229
2230 unacked = pxtcp->inbuf.unacked;
2231 }
2232 else {
2233 /*
2234 * Advance unacked index. Guest acknowledged the data, so it
2235 * won't be needed again for potential retransmits.
2236 */
2237 unacked = pxtcp->inbuf.unacked + len;
2238 if (unacked > pxtcp->inbuf.bufsize) {
2239 unacked -= pxtcp->inbuf.bufsize;
2240 }
2241 pxtcp->inbuf.unacked = unacked;
2242 }
2243
2244 /* arrange for more inbound data */
2245 if (!pxtcp->inbound_close) {
2246 if (!pxtcp->inbound_pull) {
2247 /* wake up producer, in case it has stopped polling for POLLIN */
2248 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
2249#ifdef RT_OS_WINDOWS
2250 /**
2251 * We have't got enought room in ring buffer to read atm,
2252 * but we don't want to lose notification from WSAW4ME when
2253 * space would be available, so we reset event with empty recv
2254 */
2255 recv(pxtcp->sock, NULL, 0, 0);
2256#endif
2257 }
2258 else {
2259 ssize_t nread;
2260 int stop_pollin; /* ignored */
2261
2262 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
2263
2264 if (nread < 0) {
2265 int sockerr = -(int)nread;
2266 LWIP_UNUSED_ARG(sockerr);
2267 DPRINTF0(("%s: sock %d: %R[sockerr]\n",
2268 __func__, pxtcp->sock, sockerr));
2269
2270 /*
2271 * Since we are pulling, pxtcp is no longer registered
2272 * with poll manager so we can kill it directly.
2273 */
2274 pxtcp_pcb_reset_pxtcp(pxtcp);
2275 return ERR_ABRT;
2276 }
2277 }
2278 }
2279
2280 /* forward more data if we can */
2281 if (!pxtcp->inbound_close_done) {
2282 pxtcp_pcb_forward_inbound(pxtcp);
2283
2284 /*
2285 * NB: we might have dissociated from a pcb that transitioned
2286 * to LAST_ACK state, so don't refer to pcb below.
2287 */
2288 }
2289
2290
2291 /* have we got all the acks? */
2292 if (pxtcp->inbound_close /* no more new data */
2293 && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
2294 && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
2295 {
2296 char *buf;
2297
2298 DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
2299 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2300
2301 /* no more retransmits, so buf is not needed */
2302 buf = pxtcp->inbuf.buf;
2303 pxtcp->inbuf.buf = NULL;
2304 free(buf);
2305
2306 /* no more acks, so no more callbacks */
2307 if (pxtcp->pcb != NULL) {
2308 tcp_sent(pxtcp->pcb, NULL);
2309 }
2310
2311 /*
2312 * We may be the last callback for this pcb if we have also
2313 * successfully forwarded inbound_close.
2314 */
2315 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2316 }
2317
2318 return ERR_OK;
2319}
2320
2321
2322/**
2323 * Callback from poll manager (pxtcp::msg_inpull) to switch
2324 * pxtcp_pcb_sent() to actively pull the last bits of input. See
2325 * POLLHUP comment in pxtcp_pmgr_pump().
2326 *
2327 * pxtcp::sock is deregistered from poll manager after this callback
2328 * is scheduled.
2329 */
2330static void
2331pxtcp_pcb_pull_inbound(void *ctx)
2332{
2333 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2334 LWIP_ASSERT1(pxtcp != NULL);
2335
2336 if (pxtcp->pcb == NULL) {
2337 DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
2338 pxtcp_pcb_reset_pxtcp(pxtcp);
2339 return;
2340 }
2341
2342 pxtcp->inbound_pull = 1;
2343 if (pxtcp->outbound_close_done) {
2344 DPRINTF(("%s: pxtcp %p: pcb %p (deferred delete)\n",
2345 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2346 pxtcp->deferred_delete = 1;
2347 }
2348 else {
2349 DPRINTF(("%s: pxtcp %p: pcb %p\n",
2350 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2351 }
2352
2353 pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
2354}
2355
2356
2357/**
2358 * tcp_err() callback.
2359 *
2360 * pcb is not passed to this callback since it may be already
2361 * deallocated by the stack, but we can't do anything useful with it
2362 * anyway since connection is gone.
2363 */
2364static void
2365pxtcp_pcb_err(void *arg, err_t error)
2366{
2367 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2368 LWIP_ASSERT1(pxtcp != NULL);
2369
2370 /*
2371 * ERR_CLSD is special - it is reported here when:
2372 *
2373 * . guest has already half-closed
2374 * . we send FIN to guest when external half-closes
2375 * . guest acks that FIN
2376 *
2377 * Since connection is closed but receive has been already closed
2378 * lwip can only report this via tcp_err. At this point the pcb
2379 * is still alive, so we can peek at it if need be.
2380 *
2381 * The interesting twist is when the ACK from guest that akcs our
2382 * FIN also acks some data. In this scenario lwip will NOT call
2383 * tcp_sent() callback with the ACK for that last bit of data but
2384 * instead will call tcp_err with ERR_CLSD right away. Since that
2385 * ACK also acknowledges all the data, we should run some of
2386 * pxtcp_pcb_sent() logic here.
2387 */
2388 if (error == ERR_CLSD) {
2389 struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
2390
2391 DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
2392 " pcb->acked %d;"
2393 " unacked %d, unsent %d, vacant %d\n",
2394 (void *)pxtcp, (void *)pcb,
2395 pcb->acked,
2396 (int)pxtcp->inbuf.unacked,
2397 (int)pxtcp->inbuf.unsent,
2398 (int)pxtcp->inbuf.vacant));
2399
2400 LWIP_ASSERT1(pxtcp->pcb == pcb);
2401 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2402
2403 if (pcb->acked > 0) {
2404 pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
2405 }
2406 return;
2407 }
2408
2409 DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
2410 (void *)pxtcp, proxy_lwip_strerr(error)));
2411
2412 pxtcp->pcb = NULL; /* pcb is gone */
2413 if (pxtcp->deferred_delete) {
2414 pxtcp_pcb_reset_pxtcp(pxtcp);
2415 }
2416 else {
2417 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2418 }
2419}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette