VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxtcp.c@ 49707

Last change on this file since 49707 was 49399, checked in by vboxsync, 11 years ago

Fix indentation.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 66.0 KB
Line 
1/* -*- indent-tabs-mode: nil; -*- */
2#include "winutils.h"
3
4#include "pxtcp.h"
5
6#include "proxy.h"
7#include "proxy_pollmgr.h"
8#include "pxremap.h"
9#include "portfwd.h" /* fwspec */
10
11#ifndef RT_OS_WINDOWS
12#include <sys/types.h>
13#include <sys/socket.h>
14#include <sys/ioctl.h>
15#ifdef RT_OS_SOLARIS
16#include <sys/filio.h> /* FIONREAD is BSD'ism */
17#endif
18#include <stdlib.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <string.h>
22#include <poll.h>
23
24#include <err.h> /* BSD'ism */
25#else
26#include <stdlib.h>
27#include <stdio.h>
28#include <string.h>
29
30#include <iprt/stdint.h>
31#include "winpoll.h"
32#endif
33
34#include "lwip/opt.h"
35
36#include "lwip/sys.h"
37#include "lwip/tcpip.h"
38#include "lwip/netif.h"
39#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
40#include "lwip/icmp.h"
41#include "lwip/icmp6.h"
42
43/* NetBSD doesn't report POLLHUP for TCP sockets */
44#ifdef __NetBSD__
45# define HAVE_TCP_POLLHUP 0
46#else
47# define HAVE_TCP_POLLHUP 1
48#endif
49
50
51/**
52 * Ring buffer for inbound data. Filled with data from the host
53 * socket on poll manager thread. Data consumed by scheduling
54 * tcp_write() to the pcb on the lwip thread.
55 *
56 * NB: There is actually third party present, the lwip stack itself.
57 * Thus the buffer doesn't have dual free vs. data split, but rather
58 * three-way free / send and unACKed data / unsent data split.
59 */
60struct ringbuf {
61 char *buf;
62 size_t bufsize;
63
64 /*
65 * Start of free space, producer writes here (up till "unacked").
66 */
67 volatile size_t vacant;
68
69 /*
70 * Start of sent but unacknowledged data. The data are "owned" by
71 * the stack as it may need to retransmit. This is the free space
72 * limit for producer.
73 */
74 volatile size_t unacked;
75
76 /*
77 * Start of unsent data, consumer reads/sends from here (up till
78 * "vacant"). Not declared volatile since it's only accessed from
79 * the consumer thread.
80 */
81 size_t unsent;
82};
83
84
85/**
86 */
87struct pxtcp {
88 /**
89 * Our poll manager handler. Must be first, strong/weak
90 * references depend on this "inheritance".
91 */
92 struct pollmgr_handler pmhdl;
93
94 /**
95 * lwIP (internal/guest) side of the proxied connection.
96 */
97 struct tcp_pcb *pcb;
98
99 /**
100 * Host (external) side of the proxied connection.
101 */
102 SOCKET sock;
103
104 /**
105 * Socket events we are currently polling for.
106 */
107 int events;
108
109 /**
110 * Socket error. Currently used to save connect(2) errors so that
111 * we can decide if we need to send ICMP error.
112 */
113 int sockerr;
114
115 /**
116 * Interface that we have got the SYN from. Needed to send ICMP
117 * with correct source address.
118 */
119 struct netif *netif;
120
121 /**
122 * For tentatively accepted connections for which we are in
123 * process of connecting to the real destination this is the
124 * initial pbuf that we might need to build ICMP error.
125 *
126 * When connection is established this is used to hold outbound
127 * pbuf chain received by pxtcp_pcb_recv() but not yet completely
128 * forwarded over the socket. We cannot "return" it to lwIP since
129 * the head of the chain is already sent and freed.
130 */
131 struct pbuf *unsent;
132
133 /**
134 * Guest has closed its side. Reported to pxtcp_pcb_recv() only
135 * once and we might not be able to forward it immediately if we
136 * have unsent pbuf.
137 */
138 int outbound_close;
139
140 /**
141 * Outbound half-close has been done on the socket.
142 */
143 int outbound_close_done;
144
145 /**
146 * External has closed its side. We might not be able to forward
147 * it immediately if we have unforwarded data.
148 */
149 int inbound_close;
150
151 /**
152 * Inbound half-close has been done on the pcb.
153 */
154 int inbound_close_done;
155
156 /**
157 * On systems that report POLLHUP as soon as the final FIN is
158 * received on a socket we cannot continue polling for the rest of
159 * input, so we have to read (pull) last data from the socket on
160 * the lwIP thread instead of polling/pushing it from the poll
161 * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
162 */
163 int inbound_pull;
164
165
166 /**
167 * When poll manager schedules delete we may not be able to delete
168 * a pxtcp immediately if not all inbound data has been acked by
169 * the guest: lwIP may need to resend and the data are in pxtcp's
170 * inbuf::buf. We defer delete until all data are acked to
171 * pxtcp_pcb_sent().
172 *
173 * It's also implied by inbound_pull. It probably means that
174 * "deferred" is not a very fortunate name.
175 */
176 int deferred_delete;
177
178 /**
179 * Ring-buffer for inbound data.
180 */
181 struct ringbuf inbuf;
182
183 /**
184 * lwIP thread's strong reference to us.
185 */
186 struct pollmgr_refptr *rp;
187
188
189 /*
190 * We use static messages to call functions on the lwIP thread to
191 * void malloc/free overhead.
192 */
193 struct tcpip_msg msg_delete; /* delete pxtcp */
194 struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
195 struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
196 struct tcpip_msg msg_outbound; /* trigger send of outbound data */
197 struct tcpip_msg msg_inbound; /* trigger send of inbound data */
198 struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
199};
200
201
202
203static struct pxtcp *pxtcp_allocate(void);
204static void pxtcp_free(struct pxtcp *);
205
206static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
207static void pxtcp_pcb_dissociate(struct pxtcp *);
208
209/* poll manager callbacks for pxtcp related channels */
210static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
211static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
212static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
213#if !HAVE_TCP_POLLHUP
214static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
215#endif
216static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
217
218/* helper functions for sending/receiving pxtcp over poll manager channels */
219static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
220static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
221static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
222static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
223
224/* poll manager callbacks for individual sockets */
225static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
226static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
227
228static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
229
230/* convenience functions for poll manager callbacks */
231static int pxtcp_schedule_delete(struct pxtcp *);
232static int pxtcp_schedule_reset(struct pxtcp *);
233static int pxtcp_schedule_reject(struct pxtcp *);
234
235/* lwip thread callbacks called via proxy_lwip_post() */
236static void pxtcp_pcb_delete_pxtcp(void *);
237static void pxtcp_pcb_reset_pxtcp(void *);
238static void pxtcp_pcb_accept_refuse(void *);
239static void pxtcp_pcb_accept_confirm(void *);
240static void pxtcp_pcb_write_outbound(void *);
241static void pxtcp_pcb_write_inbound(void *);
242static void pxtcp_pcb_pull_inbound(void *);
243
244/* tcp pcb callbacks */
245static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, err_t); /* global */
246static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
247static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
248static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
249static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
250static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
251static void pxtcp_pcb_err(void *, err_t);
252
253static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
254static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
255
256static void pxtcp_pcb_forward_inbound(struct pxtcp *);
257static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
258DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
259static void pxtcp_pcb_schedule_poll(struct pxtcp *);
260static void pxtcp_pcb_cancel_poll(struct pxtcp *);
261
262static void pxtcp_pcb_reject(struct netif *, struct tcp_pcb *, struct pbuf *, int);
263DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
264
265/* poll manager handlers for pxtcp channels */
266static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
267static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
268static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
269#if !HAVE_TCP_POLLHUP
270static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
271#endif
272static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
273
274
275/**
276 * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
277 * manager threads haven't been created yet.
278 */
279void
280pxtcp_init(void)
281{
282 /*
283 * Create channels.
284 */
285#define CHANNEL(SLOT, NAME) do { \
286 NAME##_hdl.callback = NAME; \
287 NAME##_hdl.data = NULL; \
288 NAME##_hdl.slot = -1; \
289 pollmgr_add_chan(SLOT, &NAME##_hdl); \
290 } while (0)
291
292 CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
293 CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
294 CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
295#if !HAVE_TCP_POLLHUP
296 CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
297#endif
298 CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
299
300#undef CHANNEL
301
302 /*
303 * Listen to outgoing connection from guest(s).
304 */
305 tcp_proxy_accept(pxtcp_pcb_heard);
306}
307
308
309/**
310 * Syntactic sugar for sending pxtcp pointer over poll manager
311 * channel. Used by lwip thread functions.
312 */
313static ssize_t
314pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
315{
316 return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
317}
318
319
320/**
321 * Syntactic sugar for sending weak reference to pxtcp over poll
322 * manager channel. Used by lwip thread functions.
323 */
324static ssize_t
325pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
326{
327 pollmgr_refptr_weak_ref(pxtcp->rp);
328 return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
329}
330
331
332/**
333 * Counterpart of pxtcp_chan_send().
334 */
335static struct pxtcp *
336pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
337{
338 struct pxtcp *pxtcp;
339
340 pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
341 return pxtcp;
342}
343
344
345/**
346 * Counterpart of pxtcp_chan_send_weak().
347 */
348static struct pxtcp *
349pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
350{
351 struct pollmgr_refptr *rp;
352 struct pollmgr_handler *base;
353 struct pxtcp *pxtcp;
354
355 rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
356 base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
357 pxtcp = (struct pxtcp *)base;
358
359 return pxtcp;
360}
361
362
363/**
364 * Register pxtcp with poll manager.
365 *
366 * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
367 * error handling is different in these two cases, we leave it up to
368 * the caller.
369 */
370int
371pxtcp_pmgr_add(struct pxtcp *pxtcp)
372{
373 int status;
374
375 LWIP_ASSERT1(pxtcp != NULL);
376 LWIP_ASSERT1(pxtcp->sock >= 0);
377 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
378 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
379 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
380
381 status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
382 return status;
383}
384
385
386/**
387 * Unregister pxtcp with poll manager.
388 *
389 * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
390 * leg).
391 */
392void
393pxtcp_pmgr_del(struct pxtcp *pxtcp)
394{
395 LWIP_ASSERT1(pxtcp != NULL);
396
397 pollmgr_del_slot(pxtcp->pmhdl.slot);
398}
399
400
401/**
402 * POLLMGR_CHAN_PXTCP_ADD handler.
403 *
404 * Get new pxtcp from lwip thread and start polling its socket.
405 */
406static int
407pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
408{
409 struct pxtcp *pxtcp;
410 int status;
411
412 pxtcp = pxtcp_chan_recv(handler, fd, revents);
413 DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
414 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
415
416 status = pxtcp_pmgr_add(pxtcp);
417 if (status < 0) {
418 (void) pxtcp_schedule_reset(pxtcp);
419 }
420
421 return POLLIN;
422}
423
424
425/**
426 * POLLMGR_CHAN_PXTCP_POLLOUT handler.
427 *
428 * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
429 * and failed, it now requests us to poll the socket for POLLOUT and
430 * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
431 */
432static int
433pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
434{
435 struct pxtcp *pxtcp;
436
437 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
438 DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
439
440 if (pxtcp == NULL) {
441 return POLLIN;
442 }
443
444 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
445 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
446
447 pxtcp->events |= POLLOUT;
448 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
449
450 return POLLIN;
451}
452
453
454/**
455 * POLLMGR_CHAN_PXTCP_POLLIN handler.
456 */
457static int
458pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
459{
460 struct pxtcp *pxtcp;
461
462 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
463 DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
464
465 if (pxtcp == NULL) {
466 return POLLIN;
467 }
468
469 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
470 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
471
472 if (pxtcp->inbound_close) {
473 return POLLIN;
474 }
475
476 pxtcp->events |= POLLIN;
477 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
478
479 return POLLIN;
480}
481
482
483#if !HAVE_TCP_POLLHUP
484/**
485 * POLLMGR_CHAN_PXTCP_DEL handler.
486 *
487 * Schedule pxtcp deletion. We only need this if host system doesn't
488 * report POLLHUP for fully closed tcp sockets.
489 */
490static int
491pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
492{
493 struct pxtcp *pxtcp;
494
495 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
496 if (pxtcp == NULL) {
497 return POLLIN;
498 }
499
500 DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
501 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
502
503 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
504 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
505
506 LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
507 LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
508
509 pxtcp_pmgr_del(pxtcp);
510 (void) pxtcp_schedule_delete(pxtcp);
511
512 return POLLIN;
513}
514#endif /* !HAVE_TCP_POLLHUP */
515
516
517/**
518 * POLLMGR_CHAN_PXTCP_RESET handler.
519 *
520 * Close the socket with RST and delete pxtcp.
521 */
522static int
523pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
524{
525 struct pxtcp *pxtcp;
526
527 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
528 if (pxtcp == NULL) {
529 return POLLIN;
530 }
531
532 DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
533 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
534
535 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
536 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
537
538 pxtcp_pmgr_del(pxtcp);
539
540 proxy_reset_socket(pxtcp->sock);
541 pxtcp->sock = INVALID_SOCKET;
542
543 (void) pxtcp_schedule_reset(pxtcp);
544
545 return POLLIN;
546}
547
548
549static struct pxtcp *
550pxtcp_allocate(void)
551{
552 struct pxtcp *pxtcp;
553
554 pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
555 if (pxtcp == NULL) {
556 return NULL;
557 }
558
559 pxtcp->pmhdl.callback = NULL;
560 pxtcp->pmhdl.data = (void *)pxtcp;
561 pxtcp->pmhdl.slot = -1;
562
563 pxtcp->pcb = NULL;
564 pxtcp->sock = INVALID_SOCKET;
565 pxtcp->events = 0;
566 pxtcp->sockerr = 0;
567 pxtcp->netif = NULL;
568 pxtcp->unsent = NULL;
569 pxtcp->outbound_close = 0;
570 pxtcp->outbound_close_done = 0;
571 pxtcp->inbound_close = 0;
572 pxtcp->inbound_close_done = 0;
573 pxtcp->inbound_pull = 0;
574 pxtcp->deferred_delete = 0;
575
576 pxtcp->inbuf.bufsize = 64 * 1024;
577 pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
578 if (pxtcp->inbuf.buf == NULL) {
579 free(pxtcp);
580 return NULL;
581 }
582 pxtcp->inbuf.vacant = 0;
583 pxtcp->inbuf.unacked = 0;
584 pxtcp->inbuf.unsent = 0;
585
586 pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
587 if (pxtcp->rp == NULL) {
588 free(pxtcp->inbuf.buf);
589 free(pxtcp);
590 return NULL;
591 }
592
593#define CALLBACK_MSG(MSG, FUNC) \
594 do { \
595 pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
596 pxtcp->MSG.sem = NULL; \
597 pxtcp->MSG.msg.cb.function = FUNC; \
598 pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
599 } while (0)
600
601 CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
602 CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
603 CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
604 CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
605 CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
606 CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
607
608#undef CALLBACK_MSG
609
610 return pxtcp;
611}
612
613
614/**
615 * Exported to fwtcp to create pxtcp for incoming port-forwarded
616 * connections. Completed with pcb in pxtcp_pcb_connect().
617 */
618struct pxtcp *
619pxtcp_create_forwarded(SOCKET sock)
620{
621 struct pxtcp *pxtcp;
622
623 pxtcp = pxtcp_allocate();
624 if (pxtcp == NULL) {
625 return NULL;
626 }
627
628 pxtcp->sock = sock;
629 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
630 pxtcp->events = 0;
631
632 return pxtcp;
633}
634
635
636static void
637pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
638{
639 LWIP_ASSERT1(pxtcp != NULL);
640 LWIP_ASSERT1(pcb != NULL);
641
642 pxtcp->pcb = pcb;
643
644 tcp_arg(pcb, pxtcp);
645
646 tcp_recv(pcb, pxtcp_pcb_recv);
647 tcp_sent(pcb, pxtcp_pcb_sent);
648 tcp_poll(pcb, NULL, 255);
649 tcp_err(pcb, pxtcp_pcb_err);
650}
651
652
653static void
654pxtcp_free(struct pxtcp *pxtcp)
655{
656 if (pxtcp->unsent != NULL) {
657 pbuf_free(pxtcp->unsent);
658 }
659 if (pxtcp->inbuf.buf != NULL) {
660 free(pxtcp->inbuf.buf);
661 }
662 free(pxtcp);
663}
664
665
666/**
667 * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
668 * fwtcp failed to register with poll manager to post to lwip thread
669 * for doing connect.
670 */
671void
672pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
673{
674 LWIP_ASSERT1(pxtcp->pcb == NULL);
675 pxtcp_pcb_reset_pxtcp(pxtcp);
676}
677
678
679static void
680pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
681{
682 if (pxtcp == NULL || pxtcp->pcb == NULL) {
683 return;
684 }
685
686 DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
687 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
688
689 /*
690 * We must have dissociated from a fully closed pcb immediately
691 * since lwip recycles them and we don't wan't to mess with what
692 * would be someone else's pcb that we happen to have a stale
693 * pointer to.
694 */
695 LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
696
697 tcp_recv(pxtcp->pcb, NULL);
698 tcp_sent(pxtcp->pcb, NULL);
699 tcp_poll(pxtcp->pcb, NULL, 255);
700 tcp_err(pxtcp->pcb, NULL);
701 tcp_arg(pxtcp->pcb, NULL);
702 pxtcp->pcb = NULL;
703}
704
705
706/**
707 * Lwip thread callback invoked via pxtcp::msg_delete
708 *
709 * Since we use static messages to communicate to the lwip thread, we
710 * cannot delete pxtcp without making sure there are no unprocessed
711 * messages in the lwip thread mailbox.
712 *
713 * The easiest way to ensure that is to send this "delete" message as
714 * the last one and when it's processed we know there are no more and
715 * it's safe to delete pxtcp.
716 *
717 * Poll manager handlers should use pxtcp_schedule_delete()
718 * convenience function.
719 */
720static void
721pxtcp_pcb_delete_pxtcp(void *ctx)
722{
723 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
724
725 DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
726 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
727 (pxtcp->deferred_delete && !pxtcp->inbound_pull
728 ? " (was deferred)" : "")));
729
730 LWIP_ASSERT1(pxtcp != NULL);
731 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
732 LWIP_ASSERT1(pxtcp->outbound_close_done);
733 LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
734
735
736 /*
737 * pxtcp is no longer registered with poll manager, so it's safe
738 * to close the socket.
739 */
740 if (pxtcp->sock != INVALID_SOCKET) {
741 closesocket(pxtcp->sock);
742 pxtcp->sock = INVALID_SOCKET;
743 }
744
745 /*
746 * We might have already dissociated from a fully closed pcb, or
747 * guest might have sent us a reset while msg_delete was in
748 * transit. If there's no pcb, we are done.
749 */
750 if (pxtcp->pcb == NULL) {
751 pollmgr_refptr_unref(pxtcp->rp);
752 pxtcp_free(pxtcp);
753 return;
754 }
755
756 /*
757 * Have we completely forwarded all inbound traffic to the guest?
758 *
759 * We may still be waiting for ACKs. We may have failed to send
760 * some of the data (tcp_write() failed with ERR_MEM). We may
761 * have failed to send the FIN (tcp_shutdown() failed with
762 * ERR_MEM).
763 */
764 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
765 pxtcp_pcb_dissociate(pxtcp);
766 pollmgr_refptr_unref(pxtcp->rp);
767 pxtcp_free(pxtcp);
768 }
769 else {
770 DPRINTF2(("delete: pxtcp %p; pcb %p:"
771 " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
772 (void *)pxtcp, (void *)pxtcp->pcb,
773 (int)pxtcp->inbuf.unacked,
774 (int)pxtcp->inbuf.unsent,
775 (int)pxtcp->inbuf.vacant,
776 pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
777
778 LWIP_ASSERT1(!pxtcp->deferred_delete);
779 pxtcp->deferred_delete = 1;
780 }
781}
782
783
784/**
785 * If we couldn't delete pxtcp right away in the msg_delete callback
786 * from the poll manager thread, we repeat the check at the end of
787 * relevant pcb callbacks.
788 */
789DECLINLINE(void)
790pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
791{
792 if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
793 pxtcp_pcb_delete_pxtcp(pxtcp);
794 }
795}
796
797
798/**
799 * Poll manager callbacks should use this convenience wrapper to
800 * schedule pxtcp deletion on the lwip thread and to deregister from
801 * the poll manager.
802 */
803static int
804pxtcp_schedule_delete(struct pxtcp *pxtcp)
805{
806 /*
807 * If pollmgr_refptr_get() is called by any channel before
808 * scheduled deletion happens, let them know we are gone.
809 */
810 pxtcp->pmhdl.slot = -1;
811
812 /*
813 * Schedule deletion. Since poll manager thread may be pre-empted
814 * right after we send the message, the deletion may actually
815 * happen on the lwip thread before we return from this function,
816 * so it's not safe to refer to pxtcp after this call.
817 */
818 proxy_lwip_post(&pxtcp->msg_delete);
819
820 /* tell poll manager to deregister us */
821 return -1;
822}
823
824
825/**
826 * Lwip thread callback invoked via pxtcp::msg_reset
827 *
828 * Like pxtcp_pcb_delete(), but sends RST to the guest before
829 * deleting this pxtcp.
830 */
831static void
832pxtcp_pcb_reset_pxtcp(void *ctx)
833{
834 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
835 LWIP_ASSERT1(pxtcp != NULL);
836
837 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
838 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
839
840 if (pxtcp->sock != INVALID_SOCKET) {
841 proxy_reset_socket(pxtcp->sock);
842 pxtcp->sock = INVALID_SOCKET;
843 }
844
845 if (pxtcp->pcb != NULL) {
846 struct tcp_pcb *pcb = pxtcp->pcb;
847 pxtcp_pcb_dissociate(pxtcp);
848 tcp_abort(pcb);
849 }
850
851 pollmgr_refptr_unref(pxtcp->rp);
852 pxtcp_free(pxtcp);
853}
854
855
856
857/**
858 * Poll manager callbacks should use this convenience wrapper to
859 * schedule pxtcp reset and deletion on the lwip thread and to
860 * deregister from the poll manager.
861 *
862 * See pxtcp_schedule_delete() for additional comments.
863 */
864static int
865pxtcp_schedule_reset(struct pxtcp *pxtcp)
866{
867 pxtcp->pmhdl.slot = -1;
868 proxy_lwip_post(&pxtcp->msg_reset);
869 return -1;
870}
871
872
873/**
874 * Reject proxy connection attempt. Depending on the cause (sockerr)
875 * we may just drop the pcb silently, generate an ICMP datagram or
876 * send TCP reset.
877 */
878static void
879pxtcp_pcb_reject(struct netif *netif, struct tcp_pcb *pcb,
880 struct pbuf *p, int sockerr)
881{
882 struct netif *oif;
883 int reset = 0;
884
885 oif = ip_current_netif();
886 ip_current_netif() = netif;
887
888 if (sockerr == ECONNREFUSED) {
889 reset = 1;
890 }
891 else if (PCB_ISIPV6(pcb)) {
892 if (sockerr == EHOSTDOWN) {
893 icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
894 }
895 else if (sockerr == EHOSTUNREACH
896 || sockerr == ENETDOWN
897 || sockerr == ENETUNREACH)
898 {
899 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
900 }
901 }
902 else {
903 if (sockerr == EHOSTDOWN
904 || sockerr == EHOSTUNREACH
905 || sockerr == ENETDOWN
906 || sockerr == ENETUNREACH)
907 {
908 icmp_dest_unreach(p, ICMP_DUR_HOST);
909 }
910 }
911
912 ip_current_netif() = oif;
913
914 tcp_abandon(pcb, reset);
915}
916
917
918/**
919 * Called from poll manager thread via pxtcp::msg_accept when proxy
920 * failed to connect to the destination. Also called when we failed
921 * to register pxtcp with poll manager.
922 *
923 * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
924 * how this unestablished connection is terminated.
925 */
926static void
927pxtcp_pcb_accept_refuse(void *ctx)
928{
929 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
930
931 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: errno %d\n",
932 __func__, (void *)pxtcp, (void *)pxtcp->pcb,
933 pxtcp->sock, pxtcp->sockerr));
934
935 LWIP_ASSERT1(pxtcp != NULL);
936 LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
937
938 if (pxtcp->pcb != NULL) {
939 struct tcp_pcb *pcb = pxtcp->pcb;
940 pxtcp_pcb_dissociate(pxtcp);
941 pxtcp_pcb_reject(pxtcp->netif, pcb, pxtcp->unsent, pxtcp->sockerr);
942 }
943
944 pollmgr_refptr_unref(pxtcp->rp);
945 pxtcp_free(pxtcp);
946}
947
948
949/**
950 * Convenience wrapper for poll manager connect callback to reject
951 * connection attempt.
952 *
953 * Like pxtcp_schedule_reset(), but the callback is more discriminate
954 * in how this unestablished connection is terminated.
955 */
956static int
957pxtcp_schedule_reject(struct pxtcp *pxtcp)
958{
959 pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
960 pxtcp->pmhdl.slot = -1;
961 proxy_lwip_post(&pxtcp->msg_accept);
962 return -1;
963}
964
965
966/**
967 * Global tcp_proxy_accept() callback for proxied outgoing TCP
968 * connections from guest(s).
969 */
970static err_t
971pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, err_t error)
972{
973 struct pbuf *p = (struct pbuf *)arg;
974 struct pxtcp *pxtcp;
975 ipX_addr_t dst_addr;
976 int sdom;
977 SOCKET sock;
978 ssize_t nsent;
979 int sockerr = 0;
980
981 LWIP_UNUSED_ARG(error); /* always ERR_OK */
982
983 /*
984 * TCP first calls accept callback when it receives the first SYN
985 * and "tentatively accepts" new proxied connection attempt. When
986 * proxy "confirms" the SYN and sends SYN|ACK and the guest
987 * replies with ACK the accept callback is called again, this time
988 * with the established connection.
989 */
990 LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
991 tcp_accept(newpcb, pxtcp_pcb_accept);
992 tcp_arg(newpcb, NULL);
993
994 tcp_setprio(newpcb, TCP_PRIO_MAX);
995
996 pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip);
997
998 sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET;
999 sock = proxy_connected_socket(sdom, SOCK_STREAM,
1000 &dst_addr, newpcb->local_port);
1001 if (sock == INVALID_SOCKET) {
1002 sockerr = errno;
1003 goto abort;
1004 }
1005
1006 pxtcp = pxtcp_allocate();
1007 if (pxtcp == NULL) {
1008 proxy_reset_socket(sock);
1009 goto abort;
1010 }
1011
1012 /* save initial datagram in case we need to reply with ICMP */
1013 pbuf_ref(p);
1014 pxtcp->unsent = p;
1015 pxtcp->netif = ip_current_netif();
1016
1017 pxtcp_pcb_associate(pxtcp, newpcb);
1018 pxtcp->sock = sock;
1019
1020 pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
1021 pxtcp->events = POLLOUT;
1022
1023 nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
1024 if (nsent < 0) {
1025 pxtcp->sock = INVALID_SOCKET;
1026 proxy_reset_socket(sock);
1027 pxtcp_pcb_accept_refuse(pxtcp);
1028 return ERR_ABRT;
1029 }
1030
1031 return ERR_OK;
1032
1033 abort:
1034 DPRINTF0(("%s: pcb %p, sock %d: errno %d\n",
1035 __func__, (void *)newpcb, sock, sockerr));
1036 pxtcp_pcb_reject(ip_current_netif(), newpcb, p, sockerr);
1037 return ERR_ABRT;
1038}
1039
1040
1041/**
1042 * tcp_proxy_accept() callback for accepted proxied outgoing TCP
1043 * connections from guest(s). This is "real" accept with three-way
1044 * handshake completed.
1045 */
1046static err_t
1047pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
1048{
1049 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1050
1051 LWIP_UNUSED_ARG(pcb); /* used only in asserts */
1052 LWIP_UNUSED_ARG(error); /* always ERR_OK */
1053
1054 LWIP_ASSERT1(pxtcp != NULL);
1055 LWIP_ASSERT1(pxtcp->pcb = pcb);
1056 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1057
1058 /* send any inbound data that are already queued */
1059 pxtcp_pcb_forward_inbound(pxtcp);
1060 return ERR_OK;
1061}
1062
1063
1064/**
1065 * Initial poll manager callback for proxied outgoing TCP connections.
1066 * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
1067 *
1068 * Waits for connect(2) to the destination to complete. On success
1069 * replaces itself with pxtcp_pmgr_pump() callback common to all
1070 * established TCP connections.
1071 */
1072static int
1073pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
1074{
1075 struct pxtcp *pxtcp;
1076 int sockerr;
1077
1078 pxtcp = (struct pxtcp *)handler->data;
1079 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1080 LWIP_ASSERT1(fd == pxtcp->sock);
1081
1082 if (revents & (POLLNVAL | POLLHUP | POLLERR)) {
1083 if (revents & POLLNVAL) {
1084 pxtcp->sock = INVALID_SOCKET;
1085 pxtcp->sockerr = ETIMEDOUT;
1086 }
1087 else {
1088 socklen_t optlen = (socklen_t)sizeof(sockerr);
1089 int status;
1090 SOCKET s;
1091
1092 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1093 (char *)&pxtcp->sockerr, &optlen);
1094 if (status < 0) { /* should not happen */
1095 sockerr = errno; /* ??? */
1096 perror("connect: getsockopt");
1097 }
1098 else {
1099#ifndef RT_OS_WINDOWS
1100 errno = pxtcp->sockerr; /* to avoid strerror_r */
1101#else
1102 /* see winutils.h */
1103 WSASetLastError(pxtcp->sockerr);
1104#endif
1105 perror("connect");
1106 }
1107 s = pxtcp->sock;
1108 pxtcp->sock = INVALID_SOCKET;
1109 closesocket(s);
1110 }
1111 return pxtcp_schedule_reject(pxtcp);
1112 }
1113
1114 if (revents & POLLOUT) { /* connect is successful */
1115 /* confirm accept to the guest */
1116 proxy_lwip_post(&pxtcp->msg_accept);
1117
1118 /*
1119 * Switch to common callback used for all established proxied
1120 * connections.
1121 */
1122 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
1123
1124 /*
1125 * Initially we poll for incoming traffic only. Outgoing
1126 * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
1127 * it will ask us to poll for POLLOUT too.
1128 */
1129 pxtcp->events = POLLIN;
1130 return pxtcp->events;
1131 }
1132
1133 /* should never get here */
1134 DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
1135 __func__, (void *)pxtcp, fd, revents));
1136 return pxtcp_schedule_reset(pxtcp);
1137}
1138
1139
1140/**
1141 * Called from poll manager thread via pxtcp::msg_accept when proxy
1142 * connected to the destination. Finalize accept by sending SYN|ACK
1143 * to the guest.
1144 */
1145static void
1146pxtcp_pcb_accept_confirm(void *ctx)
1147{
1148 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1149 err_t error;
1150
1151 LWIP_ASSERT1(pxtcp != NULL);
1152 if (pxtcp->pcb == NULL) {
1153 return;
1154 }
1155
1156 /* we are not going to reply with ICMP, so we can drop initial pbuf */
1157 LWIP_ASSERT1(pxtcp->unsent != NULL);
1158 pbuf_free(pxtcp->unsent);
1159 pxtcp->unsent = NULL;
1160
1161 error = tcp_proxy_accept_confirm(pxtcp->pcb);
1162
1163 /*
1164 * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
1165 * abandons the pcb. Retrying that is not very easy, since it
1166 * would require keeping "fractional state". From guest's point
1167 * of view there is no reply to its SYN so it will either resend
1168 * the SYN (effetively triggering full connection retry for us),
1169 * or it will eventually time out.
1170 */
1171 if (error == ERR_ABRT) {
1172 pxtcp->pcb = NULL; /* pcb is gone */
1173 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1174 }
1175
1176 /*
1177 * else if (error != ERR_OK): even if tcp_output() failed with
1178 * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
1179 * retransmitted eventually.
1180 */
1181}
1182
1183
1184/**
1185 * Entry point for port-forwarding.
1186 *
1187 * fwtcp accepts new incoming connection, creates pxtcp for the socket
1188 * (with no pcb yet) and adds it to the poll manager (polling for
1189 * errors only). Then it calls this function to construct the pcb and
1190 * perform connection to the guest.
1191 */
1192void
1193pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
1194{
1195 struct sockaddr_storage ss;
1196 socklen_t sslen;
1197 struct tcp_pcb *pcb;
1198 ipX_addr_t src_addr, dst_addr;
1199 u16_t src_port, dst_port;
1200 int status;
1201 err_t error;
1202
1203 LWIP_ASSERT1(pxtcp != NULL);
1204 LWIP_ASSERT1(pxtcp->pcb == NULL);
1205 LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
1206
1207 pcb = tcp_new();
1208 if (pcb == NULL) {
1209 goto reset;
1210 }
1211
1212 tcp_setprio(pcb, TCP_PRIO_MAX);
1213 pxtcp_pcb_associate(pxtcp, pcb);
1214
1215 sslen = sizeof(ss);
1216 status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
1217 if (status == SOCKET_ERROR) {
1218 goto reset;
1219 }
1220
1221 /* nit: comapres PF and AF, but they are the same everywhere */
1222 LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
1223
1224 status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
1225 if (status == PXREMAP_FAILED) {
1226 goto reset;
1227 }
1228
1229 if (ss.ss_family == PF_INET) {
1230 const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
1231
1232 src_port = peer4->sin_port;
1233
1234 memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
1235 dst_port = fwspec->dst.sin.sin_port;
1236 }
1237 else { /* PF_INET6 */
1238 const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
1239 ip_set_v6(pcb, 1);
1240
1241 src_port = peer6->sin6_port;
1242
1243 memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
1244 dst_port = fwspec->dst.sin6.sin6_port;
1245 }
1246
1247 /* lwip port arguments are in host order */
1248 src_port = ntohs(src_port);
1249 dst_port = ntohs(dst_port);
1250
1251 error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
1252 if (error != ERR_OK) {
1253 goto reset;
1254 }
1255
1256 error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
1257 /* callback: */ pxtcp_pcb_connected);
1258 if (error != ERR_OK) {
1259 goto reset;
1260 }
1261
1262 return;
1263
1264 reset:
1265 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1266}
1267
1268
1269/**
1270 * Port-forwarded connection to guest is successful, pump data.
1271 */
1272static err_t
1273pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
1274{
1275 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1276
1277 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1278 LWIP_UNUSED_ARG(error);
1279
1280 LWIP_ASSERT1(pxtcp != NULL);
1281 LWIP_ASSERT1(pxtcp->pcb == pcb);
1282 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1283 LWIP_UNUSED_ARG(pcb);
1284
1285 DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
1286 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
1287
1288 /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
1289 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
1290
1291 return ERR_OK;
1292}
1293
1294
1295/**
1296 * tcp_recv() callback.
1297 */
1298static err_t
1299pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
1300{
1301 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1302
1303 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1304 LWIP_UNUSED_ARG(error);
1305
1306 LWIP_ASSERT1(pxtcp != NULL);
1307 LWIP_ASSERT1(pxtcp->pcb == pcb);
1308 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1309 LWIP_UNUSED_ARG(pcb);
1310
1311
1312 /*
1313 * Have we done sending previous batch?
1314 */
1315 if (pxtcp->unsent != NULL) {
1316 if (p != NULL) {
1317 /*
1318 * Return an error to tell TCP to hold onto that pbuf.
1319 * It will be presented to us later from tcp_fasttmr().
1320 */
1321 return ERR_WOULDBLOCK;
1322 }
1323 else {
1324 /*
1325 * Unlike data, p == NULL indicating orderly shutdown is
1326 * NOT presented to us again
1327 */
1328 pxtcp->outbound_close = 1;
1329 return ERR_OK;
1330 }
1331 }
1332
1333
1334 /*
1335 * Guest closed?
1336 */
1337 if (p == NULL) {
1338 pxtcp->outbound_close = 1;
1339 pxtcp_pcb_forward_outbound_close(pxtcp);
1340 return ERR_OK;
1341 }
1342
1343
1344 /*
1345 * Got data, send what we can without blocking.
1346 */
1347 return pxtcp_pcb_forward_outbound(pxtcp, p);
1348}
1349
1350
1351/**
1352 * Guest half-closed its TX side of the connection.
1353 *
1354 * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
1355 * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
1356 * previously unsent data and sees pxtcp::outbound_close flag saved by
1357 * pxtcp_pcb_recv().
1358 */
1359static void
1360pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
1361{
1362 struct tcp_pcb *pcb;
1363
1364 LWIP_ASSERT1(pxtcp != NULL);
1365 LWIP_ASSERT1(pxtcp->outbound_close);
1366 LWIP_ASSERT1(!pxtcp->outbound_close_done);
1367
1368 pcb = pxtcp->pcb;
1369 LWIP_ASSERT1(pcb != NULL);
1370
1371 DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
1372 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1373
1374
1375 /*
1376 * NB: set the flag first, since shutdown() will trigger POLLHUP
1377 * if inbound is already closed, and poll manager asserts
1378 * outbound_close_done (may be it should not?).
1379 */
1380 pxtcp->outbound_close_done = 1;
1381 shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
1382
1383#if !HAVE_TCP_POLLHUP
1384 /*
1385 * On NetBSD POLLHUP is not reported for TCP sockets, so we need
1386 * to nudge poll manager manually.
1387 */
1388 if (pxtcp->inbound_close) {
1389 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
1390 }
1391#endif
1392
1393
1394 /* no more outbound data coming to us */
1395 tcp_recv(pcb, NULL);
1396
1397 /*
1398 * If we have already done inbound close previously (active close
1399 * on the pcb), then we must not hold onto a pcb in TIME_WAIT
1400 * state since those will be recycled by lwip when it runs out of
1401 * free pcbs in the pool.
1402 *
1403 * The test is true also for a pcb in CLOSING state that waits
1404 * just for the ACK of its FIN (to transition to TIME_WAIT).
1405 */
1406 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
1407 pxtcp_pcb_dissociate(pxtcp);
1408 }
1409}
1410
1411
1412/**
1413 * Forward outbound data from pcb to socket.
1414 *
1415 * Called by pxtcp_pcb_recv() to forward new data and by callout
1416 * triggered by POLLOUT on the socket to send previously unsent data.
1417 *
1418 * (Re)scehdules one-time callout if not all data are sent.
1419 */
1420static err_t
1421pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
1422{
1423 struct pbuf *qs, *q;
1424 size_t qoff;
1425 size_t forwarded;
1426 int sockerr;
1427
1428#if defined(MSG_NOSIGNAL)
1429 const int send_flags = MSG_NOSIGNAL;
1430#else
1431 const int send_flags = 0;
1432#endif
1433
1434
1435 LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
1436
1437 forwarded = 0;
1438 sockerr = 0;
1439
1440 q = NULL;
1441 qoff = 0;
1442
1443 qs = p;
1444 while (qs != NULL) {
1445#ifndef RT_OS_WINDOWS
1446 struct msghdr mh;
1447#else
1448 int rc;
1449#endif
1450 IOVEC iov[8];
1451 const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
1452 size_t fwd1;
1453 ssize_t nsent;
1454 size_t i;
1455
1456 fwd1 = 0;
1457 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1458 LWIP_ASSERT1(q->len > 0);
1459 IOVEC_SET_BASE(iov[i], q->payload);
1460 IOVEC_SET_LEN(iov[i], q->len);
1461 fwd1 += q->len;
1462 }
1463
1464#ifndef RT_OS_WINDOWS
1465 memset(&mh, 0, sizeof(mh));
1466 mh.msg_iov = iov;
1467 mh.msg_iovlen = i;
1468
1469 nsent = sendmsg(pxtcp->sock, &mh, send_flags);
1470#else
1471 /**
1472 * WSASend(,,,DWORD *,,,) - takes SSIZE_T (64bit value) ... so all nsent's
1473 * bits should be zeroed before passing to WSASent.
1474 */
1475 nsent = 0;
1476 rc = WSASend(pxtcp->sock, iov, (DWORD)i, (DWORD *)&nsent, 0, NULL, NULL);
1477 if (rc == SOCKET_ERROR) {
1478 /* WSASent reports SOCKET_ERROR and updates error accessible with
1479 * WSAGetLastError(). We assign nsent to -1, enforcing code below
1480 * to access error in BSD style.
1481 */
1482 warn("pxtcp_pcb_forward_outbound:WSASend error:%d nsent:%d\n",
1483 WSAGetLastError(),
1484 nsent);
1485 nsent = -1;
1486 }
1487#endif
1488
1489 if (nsent == (ssize_t)fwd1) {
1490 /* successfully sent this chain fragment completely */
1491 forwarded += nsent;
1492 qs = q;
1493 }
1494 else if (nsent >= 0) {
1495 /* successfully sent only some data */
1496 forwarded += nsent;
1497
1498 /* find the first pbuf that was not completely forwarded */
1499 qoff = nsent;
1500 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1501 if (qoff < q->len) {
1502 break;
1503 }
1504 qoff -= q->len;
1505 }
1506 LWIP_ASSERT1(q != NULL);
1507 LWIP_ASSERT1(qoff < q->len);
1508 break;
1509 }
1510 else {
1511 /*
1512 * Some errors are really not errors - if we get them,
1513 * it's not different from getting nsent == 0, so filter
1514 * them out here.
1515 */
1516 if (errno != EWOULDBLOCK
1517 && errno != EAGAIN
1518 && errno != ENOBUFS
1519 && errno != ENOMEM
1520 && errno != EINTR)
1521 {
1522 sockerr = errno;
1523 }
1524 q = qs;
1525 qoff = 0;
1526 break;
1527 }
1528 }
1529
1530 if (forwarded > 0) {
1531 tcp_recved(pxtcp->pcb, (u16_t)forwarded);
1532 }
1533
1534 if (q == NULL) { /* everything is forwarded? */
1535 LWIP_ASSERT1(sockerr == 0);
1536 LWIP_ASSERT1(forwarded == p->tot_len);
1537
1538 pxtcp->unsent = NULL;
1539 pbuf_free(p);
1540 if (pxtcp->outbound_close) {
1541 pxtcp_pcb_forward_outbound_close(pxtcp);
1542 }
1543 }
1544 else {
1545 if (q != p) {
1546 /* free forwarded pbufs at the beginning of the chain */
1547 pbuf_ref(q);
1548 pbuf_free(p);
1549 }
1550 if (qoff > 0) {
1551 /* advance payload pointer past the forwarded part */
1552 pbuf_header(q, -(s16_t)qoff);
1553 }
1554 pxtcp->unsent = q;
1555
1556 /*
1557 * Have sendmsg() failed?
1558 *
1559 * Connection reset will be detected by poll and
1560 * pxtcp_schedule_reset() will be called.
1561 *
1562 * Otherwise something *really* unexpected must have happened,
1563 * so we'd better abort.
1564 */
1565 if (sockerr != 0 && sockerr != ECONNRESET) {
1566 struct tcp_pcb *pcb = pxtcp->pcb;
1567 pxtcp_pcb_dissociate(pxtcp);
1568
1569 tcp_abort(pcb);
1570
1571 /* call error callback manually since we've already dissociated */
1572 pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
1573 return ERR_ABRT;
1574 }
1575
1576 /* schedule one-shot POLLOUT on the socket */
1577 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
1578 }
1579 return ERR_OK;
1580}
1581
1582
1583/**
1584 * Callback from poll manager (on POLLOUT) to send data from
1585 * pxtcp::unsent pbuf to socket.
1586 */
1587static void
1588pxtcp_pcb_write_outbound(void *ctx)
1589{
1590 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1591 LWIP_ASSERT1(pxtcp != NULL);
1592
1593 if (pxtcp->pcb == NULL) {
1594 return;
1595 }
1596
1597 pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
1598}
1599
1600
1601/**
1602 * Common poll manager callback used by both outgoing and incoming
1603 * (port-forwarded) connections that has connected socket.
1604 */
1605static int
1606pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1607{
1608 struct pxtcp *pxtcp;
1609 int status;
1610 int sockerr;
1611
1612 pxtcp = (struct pxtcp *)handler->data;
1613 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1614 LWIP_ASSERT1(fd == pxtcp->sock);
1615
1616 if (revents & POLLNVAL) {
1617 pxtcp->sock = INVALID_SOCKET;
1618 return pxtcp_schedule_reset(pxtcp);
1619 }
1620
1621 if (revents & POLLERR) {
1622 socklen_t optlen = (socklen_t)sizeof(sockerr);
1623
1624 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1625 (char *)&sockerr, &optlen);
1626 if (status < 0) { /* should not happen */
1627 perror("getsockopt");
1628 sockerr = ECONNRESET;
1629 }
1630
1631 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1632 return pxtcp_schedule_reset(pxtcp);
1633 }
1634
1635 if (revents & POLLOUT) {
1636 pxtcp->events &= ~POLLOUT;
1637 proxy_lwip_post(&pxtcp->msg_outbound);
1638 }
1639
1640 if (revents & POLLIN) {
1641 ssize_t nread;
1642 int stop_pollin;
1643
1644 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
1645 if (nread < 0) {
1646 sockerr = -(int)nread;
1647 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1648 return pxtcp_schedule_reset(pxtcp);
1649 }
1650
1651 if (stop_pollin) {
1652 pxtcp->events &= ~POLLIN;
1653 }
1654
1655 if (nread > 0) {
1656 proxy_lwip_post(&pxtcp->msg_inbound);
1657#if !HAVE_TCP_POLLHUP
1658 /*
1659 * If host does not report POLLHUP for closed sockets
1660 * (e.g. NetBSD) we should check for full close manually.
1661 */
1662 if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
1663 LWIP_ASSERT1((revents & POLLHUP) == 0);
1664 return pxtcp_schedule_delete(pxtcp);
1665 }
1666#endif
1667 }
1668 }
1669
1670#if !HAVE_TCP_POLLHUP
1671 LWIP_ASSERT1((revents & POLLHUP) == 0);
1672#else
1673 if (revents & POLLHUP) {
1674 /*
1675 * Linux and Darwin seems to report POLLHUP when both
1676 * directions are shut down. And they do report POLLHUP even
1677 * when there's unread data (which they aslo report as POLLIN
1678 * along with that POLLHUP).
1679 *
1680 * FreeBSD (from source inspection) seems to follow Linux,
1681 * reporting POLLHUP when both directions are shut down, but
1682 * POLLHUP is always accompanied with POLLIN.
1683 *
1684 * NetBSD never reports POLLHUP for sockets.
1685 *
1686 * ---
1687 *
1688 * If external half-closes first, we don't get POLLHUP, we
1689 * recv 0 bytes from the socket as EOF indicator, stop polling
1690 * for POLLIN and poll with events == 0 (with occasional
1691 * one-shot POLLOUT). When guest eventually closes, we get
1692 * POLLHUP.
1693 *
1694 * If guest half-closes first things are more tricky. As soon
1695 * as host sees the FIN from external it will spam POLLHUP,
1696 * even when there's unread data. The problem is that we
1697 * might have stopped polling for POLLIN because the ring
1698 * buffer is full or we were polling POLLIN but can't read all
1699 * of the data becuase buffer doesn't have enough space.
1700 * Either way, there's unread data but we can't keep polling
1701 * the socket.
1702 */
1703 DPRINTF(("sock %d: HUP\n", fd));
1704 LWIP_ASSERT1(pxtcp->outbound_close_done);
1705
1706 if (pxtcp->inbound_close) {
1707 /* there's no unread data, we are done */
1708 return pxtcp_schedule_delete(pxtcp);
1709 }
1710 else {
1711 /* DPRINTF */ {
1712#ifndef RT_OS_WINDOWS
1713 int unread;
1714#else
1715 u_long unread;
1716#endif
1717 status = ioctlsocket(fd, FIONREAD, &unread);
1718 if (status == SOCKET_ERROR) {
1719 perror("FIONREAD");
1720 }
1721 else {
1722 DPRINTF2(("sock %d: %d UNREAD bytes\n", fd, unread));
1723 }
1724 }
1725
1726 /*
1727 * We cannot just set a flag here and let pxtcp_pcb_sent()
1728 * notice and start pulling, because if we are preempted
1729 * before setting the flag and all data in inbuf is ACKed
1730 * there will be no more calls to pxtcp_pcb_sent() to
1731 * notice the flag.
1732 *
1733 * We cannot set a flag and then send a message to make
1734 * sure it noticed, because if it has and it has read all
1735 * data while the message is in transit it will delete
1736 * pxtcp.
1737 *
1738 * In a sense this message is like msg_delete (except we
1739 * ask to pull some data first).
1740 */
1741 proxy_lwip_post(&pxtcp->msg_inpull);
1742 pxtcp->pmhdl.slot = -1;
1743 return -1;
1744 }
1745 /* NOTREACHED */
1746 } /* POLLHUP */
1747#endif /* HAVE_TCP_POLLHUP */
1748
1749 return pxtcp->events;
1750}
1751
1752
1753/**
1754 * Read data from socket to ringbuf. This may be used both on lwip
1755 * and poll manager threads.
1756 *
1757 * Flag pointed to by pstop is set when further reading is impossible,
1758 * either temporary when buffer is full, or permanently when EOF is
1759 * received.
1760 *
1761 * Returns number of bytes read. NB: EOF is reported as 1!
1762 *
1763 * Returns zero if nothing was read, either because buffer is full, or
1764 * if no data is available (EAGAIN, EINTR &c).
1765 *
1766 * Returns -errno on real socket errors.
1767 */
1768static ssize_t
1769pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
1770{
1771 IOVEC iov[2];
1772#ifndef RT_OS_WINDOWS
1773 struct msghdr mh;
1774#else
1775 DWORD dwFlags;
1776 int rc;
1777#endif
1778 int iovlen;
1779 ssize_t nread;
1780
1781 const size_t sz = pxtcp->inbuf.bufsize;
1782 size_t beg, lim, wrnew;
1783
1784 *pstop = 0;
1785
1786#ifndef RT_OS_WINDOWS
1787 memset(&mh, 0, sizeof(mh));
1788 mh.msg_iov = iov;
1789#endif
1790
1791 beg = pxtcp->inbuf.vacant;
1792 IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
1793
1794 /* lim is the index we can NOT write to */
1795 lim = pxtcp->inbuf.unacked;
1796 if (lim == 0) {
1797 lim = sz - 1; /* empty slot at the end */
1798 }
1799 else if (lim == 1) {
1800 lim = sz; /* empty slot at the beginning */
1801 }
1802 else {
1803 --lim;
1804 }
1805
1806 if (beg == lim) {
1807 /*
1808 * Buffer is full, stop polling for POLLIN.
1809 *
1810 * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
1811 * data, freeing space in the ring buffer.
1812 */
1813 *pstop = 1;
1814 return 0;
1815 }
1816
1817 if (beg < lim) {
1818 /* free space in one chunk */
1819 iovlen = 1;
1820 IOVEC_SET_LEN(iov[0], lim - beg);
1821 }
1822 else {
1823 /* free space in two chunks */
1824 iovlen = 2;
1825 IOVEC_SET_LEN(iov[0], sz - beg);
1826 IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
1827 IOVEC_SET_LEN(iov[1], lim);
1828 }
1829
1830#ifndef RT_OS_WINDOWS
1831 mh.msg_iovlen = iovlen;
1832 nread = recvmsg(pxtcp->sock, &mh, 0);
1833#else
1834 dwFlags = 0;
1835 /* We can't assign nread to -1 expecting, that we'll got it back in case of error,
1836 * instead, WSARecv(,,,DWORD *,,,) will rewrite only half of the 64bit value.
1837 */
1838 nread = 0;
1839 rc = WSARecv(pxtcp->sock, iov, iovlen, (DWORD *)&nread, &dwFlags, NULL, NULL);
1840 if (rc == SOCKET_ERROR) {
1841 warn("pxtcp_sock_read:WSARecv(%d) error:%d nread:%d\n",
1842 pxtcp->sock,
1843 WSAGetLastError(),
1844 nread);
1845 nread = -1;
1846 }
1847
1848 if (dwFlags) {
1849 warn("pxtcp_sock_read:WSARecv(%d) dwFlags:%x nread:%d\n",
1850 pxtcp->sock,
1851 WSAGetLastError(),
1852 nread);
1853 }
1854#endif
1855
1856 if (nread > 0) {
1857 wrnew = beg + nread;
1858 if (wrnew >= sz) {
1859 wrnew -= sz;
1860 }
1861 pxtcp->inbuf.vacant = wrnew;
1862 DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
1863 (void *)pxtcp, pxtcp->sock, (int)nread));
1864 return nread;
1865 }
1866 else if (nread == 0) {
1867 *pstop = 1;
1868 pxtcp->inbound_close = 1;
1869 DPRINTF2(("pxtcp %p: sock %d read EOF\n",
1870 (void *)pxtcp, pxtcp->sock));
1871 return 1;
1872 }
1873 else if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) {
1874 /* haven't read anything, just return */
1875 DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
1876 (void *)pxtcp, pxtcp->sock));
1877 return 0;
1878 }
1879 else {
1880 /* socket error! */
1881 DPRINTF0(("pxtcp %p: sock %d read errno %d\n",
1882 (void *)pxtcp, pxtcp->sock, errno));
1883 return -errno;
1884 }
1885}
1886
1887
1888/**
1889 * Callback from poll manager (pxtcp::msg_inbound) to trigger output
1890 * from ringbuf to guest.
1891 */
1892static void
1893pxtcp_pcb_write_inbound(void *ctx)
1894{
1895 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1896 LWIP_ASSERT1(pxtcp != NULL);
1897
1898 if (pxtcp->pcb == NULL) {
1899 return;
1900 }
1901
1902 pxtcp_pcb_forward_inbound(pxtcp);
1903}
1904
1905
1906/**
1907 * tcp_poll() callback
1908 *
1909 * We swtich it on when tcp_write() or tcp_shutdown() fail with
1910 * ERR_MEM to prevent connection from stalling. If there are ACKs or
1911 * more inbound data then pxtcp_pcb_forward_inbound() will be
1912 * triggered again, but if neither happens, tcp_poll() comes to the
1913 * rescue.
1914 */
1915static err_t
1916pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
1917{
1918 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1919 LWIP_UNUSED_ARG(pcb);
1920
1921 DPRINTF2(("%s: pxtcp %p; pcb %p\n",
1922 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
1923
1924 pxtcp_pcb_forward_inbound(pxtcp);
1925
1926 /*
1927 * If the last thing holding up deletion of the pxtcp was failed
1928 * tcp_shutdown() and it succeeded, we may be the last callback.
1929 */
1930 pxtcp_pcb_maybe_deferred_delete(pxtcp);
1931
1932 return ERR_OK;
1933}
1934
1935
1936static void
1937pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
1938{
1939 tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
1940}
1941
1942
1943static void
1944pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
1945{
1946 tcp_poll(pxtcp->pcb, NULL, 255);
1947}
1948
1949
1950/**
1951 * Forward inbound data from ring buffer to the guest.
1952 *
1953 * Scheduled by poll manager thread after it receives more data into
1954 * the ring buffer (we have more data to send).
1955
1956 * Also called from tcp_sent() callback when guest ACKs some data,
1957 * increasing pcb->snd_buf (we are permitted to send more data).
1958 *
1959 * Also called from tcp_poll() callback if previous attempt to forward
1960 * inbound data failed with ERR_MEM (we need to try again).
1961 */
1962static void
1963pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
1964{
1965 struct tcp_pcb *pcb;
1966 size_t sndbuf;
1967 size_t beg, lim, sndlim;
1968 size_t toeob, tolim;
1969 size_t nsent;
1970 err_t error;
1971
1972 LWIP_ASSERT1(pxtcp != NULL);
1973 pcb = pxtcp->pcb;
1974 if (pcb == NULL) {
1975 return;
1976 }
1977
1978 if (/* __predict_false */ pcb->state < ESTABLISHED) {
1979 /*
1980 * If we have just confirmed accept of this connection, the
1981 * pcb is in SYN_RCVD state and we still haven't received the
1982 * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
1983 * transition that lwip decrements pcb->acked so that that ACK
1984 * is not reported to pxtcp_pcb_sent(). If we send something
1985 * now and immediately close (think "daytime", e.g.) while
1986 * still in SYN_RCVD state, we will move directly to
1987 * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
1988 * report it to pxtcp_pcb_sent().
1989 */
1990 DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
1991 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1992 return;
1993 }
1994
1995
1996 beg = pxtcp->inbuf.unsent; /* private to lwip thread */
1997 lim = pxtcp->inbuf.vacant;
1998
1999 if (beg == lim) {
2000 if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
2001 pxtcp_pcb_forward_inbound_close(pxtcp);
2002 tcp_output(pcb);
2003 return;
2004 }
2005
2006 /*
2007 * Else, there's no data to send.
2008 *
2009 * If there is free space in the buffer, producer will
2010 * reschedule us as it receives more data and vacant (lim)
2011 * advances.
2012 *
2013 * If buffer is full when all data have been passed to
2014 * tcp_write() but not yet acknowledged, we will advance
2015 * unacked on ACK, freeing some space for producer to write to
2016 * (then see above).
2017 */
2018 return;
2019 }
2020
2021 sndbuf = tcp_sndbuf(pcb);
2022 if (sndbuf == 0) {
2023 /*
2024 * Can't send anything now. As guest ACKs some data, TCP will
2025 * call pxtcp_pcb_sent() callback and we will come here again.
2026 */
2027 return;
2028 }
2029
2030 nsent = 0;
2031
2032 /*
2033 * We have three limits to consider:
2034 * - how much data we have in the ringbuf
2035 * - how much data we are allowed to send
2036 * - ringbuf size
2037 */
2038 toeob = pxtcp->inbuf.bufsize - beg;
2039 if (lim < beg) { /* lim wrapped */
2040 if (sndbuf < toeob) { /* but we are limited by sndbuf */
2041 /* so beg is not going to wrap, treat sndbuf as lim */
2042 lim = beg + sndbuf; /* ... and proceed to the simple case */
2043 }
2044 else { /* we are limited by the end of the buffer, beg will wrap */
2045 u8_t maybemore;
2046 if (toeob == sndbuf || lim == 0) {
2047 maybemore = 0;
2048 }
2049 else {
2050 maybemore = TCP_WRITE_FLAG_MORE;
2051 }
2052
2053 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], toeob, maybemore);
2054 if (error != ERR_OK) {
2055 goto writeerr;
2056 }
2057 nsent += toeob;
2058 pxtcp->inbuf.unsent = 0; /* wrap */
2059
2060 if (maybemore) {
2061 beg = 0;
2062 sndbuf -= toeob;
2063 }
2064 else {
2065 /* we are done sending, but ... */
2066 goto check_inbound_close;
2067 }
2068 }
2069 }
2070
2071 LWIP_ASSERT1(beg < lim);
2072 sndlim = beg + sndbuf;
2073 if (lim > sndlim) {
2074 lim = sndlim;
2075 }
2076 tolim = lim - beg;
2077 if (tolim > 0) {
2078 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
2079 if (error != ERR_OK) {
2080 goto writeerr;
2081 }
2082 nsent += tolim;
2083 pxtcp->inbuf.unsent = lim;
2084 }
2085
2086 check_inbound_close:
2087 if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
2088 pxtcp_pcb_forward_inbound_close(pxtcp);
2089 }
2090
2091 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
2092 (void *)pxtcp, (void *)pcb, (int)nsent));
2093 tcp_output(pcb);
2094 pxtcp_pcb_cancel_poll(pxtcp);
2095 return;
2096
2097 writeerr:
2098 if (error == ERR_MEM) {
2099 if (nsent > 0) { /* first write succeeded, second failed */
2100 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
2101 (void *)pxtcp, (void *)pcb, (int)nsent));
2102 tcp_output(pcb);
2103 }
2104 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
2105 (void *)pxtcp, (void *)pcb));
2106 pxtcp_pcb_schedule_poll(pxtcp);
2107 }
2108 else {
2109 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
2110 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2111
2112 /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
2113 LWIP_ASSERT1(error == ERR_MEM);
2114 }
2115}
2116
2117
2118static void
2119pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
2120{
2121 struct tcp_pcb *pcb;
2122 err_t error;
2123
2124 LWIP_ASSERT1(pxtcp != NULL);
2125 LWIP_ASSERT1(pxtcp->inbound_close);
2126 LWIP_ASSERT1(!pxtcp->inbound_close_done);
2127 LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
2128
2129 pcb = pxtcp->pcb;
2130 LWIP_ASSERT1(pcb != NULL);
2131
2132 DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
2133 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2134
2135 error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
2136 if (error != ERR_OK) {
2137 DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
2138 " tcp_shutdown: error=%s\n",
2139 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2140 pxtcp_pcb_schedule_poll(pxtcp);
2141 return;
2142 }
2143
2144 pxtcp_pcb_cancel_poll(pxtcp);
2145 pxtcp->inbound_close_done = 1;
2146
2147
2148 /*
2149 * If we have already done outbound close previously (passive
2150 * close on the pcb), then we must not hold onto a pcb in LAST_ACK
2151 * state since those will be deleted by lwip when that last ack
2152 * comes from the guest.
2153 *
2154 * NB: We do NOT check for deferred delete here, even though we
2155 * have just set one of its conditions, inbound_close_done. We
2156 * let pcb callbacks that called us do that. It's simpler and
2157 * cleaner that way.
2158 */
2159 if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
2160 pxtcp_pcb_dissociate(pxtcp);
2161 }
2162}
2163
2164
2165/**
2166 * Check that all forwarded inbound data is sent and acked, and that
2167 * inbound close is scheduled (we aren't called back when it's acked).
2168 */
2169DECLINLINE(int)
2170pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
2171{
2172 return (pxtcp->inbound_close_done /* also implies that all data forwarded */
2173 && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
2174}
2175
2176
2177/**
2178 * tcp_sent() callback - guest acknowledged len bytes.
2179 *
2180 * We can advance inbuf::unacked index, making more free space in the
2181 * ringbuf and wake up producer on poll manager thread.
2182 *
2183 * We can also try to send more data if we have any since pcb->snd_buf
2184 * was increased and we are now permitted to send more.
2185 */
2186static err_t
2187pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
2188{
2189 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2190 size_t unacked;
2191
2192 LWIP_ASSERT1(pxtcp != NULL);
2193 LWIP_ASSERT1(pxtcp->pcb == pcb);
2194 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2195 LWIP_UNUSED_ARG(pcb); /* only in assert */
2196
2197 DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
2198 " unacked %d, unsent %d, vacant %d\n",
2199 __func__, (void *)pxtcp, (void *)pcb, (int)len,
2200 (int)pxtcp->inbuf.unacked,
2201 (int)pxtcp->inbuf.unsent,
2202 (int)pxtcp->inbuf.vacant));
2203
2204 if (/* __predict_false */ len == 0) {
2205 /* we are notified to start pulling */
2206 LWIP_ASSERT1(pxtcp->outbound_close_done);
2207 LWIP_ASSERT1(!pxtcp->inbound_close);
2208 LWIP_ASSERT1(pxtcp->inbound_pull);
2209
2210 unacked = pxtcp->inbuf.unacked;
2211 }
2212 else {
2213 /*
2214 * Advance unacked index. Guest acknowledged the data, so it
2215 * won't be needed again for potential retransmits.
2216 */
2217 unacked = pxtcp->inbuf.unacked + len;
2218 if (unacked > pxtcp->inbuf.bufsize) {
2219 unacked -= pxtcp->inbuf.bufsize;
2220 }
2221 pxtcp->inbuf.unacked = unacked;
2222 }
2223
2224 /* arrange for more inbound data */
2225 if (!pxtcp->inbound_close) {
2226 if (!pxtcp->inbound_pull) {
2227 /* wake up producer, in case it has stopped polling for POLLIN */
2228 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
2229#ifdef RT_OS_WINDOWS
2230 /**
2231 * We have't got enought room in ring buffer to read atm,
2232 * but we don't want to lose notification from WSAW4ME when
2233 * space would be available, so we reset event with empty recv
2234 */
2235 recv(pxtcp->sock, NULL, 0, 0);
2236#endif
2237 }
2238 else {
2239 ssize_t nread;
2240 int stop_pollin; /* ignored */
2241
2242 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
2243
2244 if (nread < 0) {
2245 int sockerr = -(int)nread;
2246 LWIP_UNUSED_ARG(sockerr);
2247 DPRINTF0(("%s: sock %d: errno %d\n",
2248 __func__, pxtcp->sock, sockerr));
2249
2250 /*
2251 * Since we are pulling, pxtcp is no longer registered
2252 * with poll manager so we can kill it directly.
2253 */
2254 pxtcp_pcb_reset_pxtcp(pxtcp);
2255 return ERR_ABRT;
2256 }
2257 }
2258 }
2259
2260 /* forward more data if we can */
2261 if (!pxtcp->inbound_close_done) {
2262 pxtcp_pcb_forward_inbound(pxtcp);
2263
2264 /*
2265 * NB: we might have dissociated from a pcb that transitioned
2266 * to LAST_ACK state, so don't refer to pcb below.
2267 */
2268 }
2269
2270
2271 /* have we got all the acks? */
2272 if (pxtcp->inbound_close /* no more new data */
2273 && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
2274 && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
2275 {
2276 char *buf;
2277
2278 DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
2279 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2280
2281 /* no more retransmits, so buf is not needed */
2282 buf = pxtcp->inbuf.buf;
2283 pxtcp->inbuf.buf = NULL;
2284 free(buf);
2285
2286 /* no more acks, so no more callbacks */
2287 if (pxtcp->pcb != NULL) {
2288 tcp_sent(pxtcp->pcb, NULL);
2289 }
2290
2291 /*
2292 * We may be the last callback for this pcb if we have also
2293 * successfully forwarded inbound_close.
2294 */
2295 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2296 }
2297
2298 return ERR_OK;
2299}
2300
2301
2302/**
2303 * Callback from poll manager (pxtcp::msg_inpull) to switch
2304 * pxtcp_pcb_sent() to actively pull the last bits of input. See
2305 * POLLHUP comment in pxtcp_pmgr_pump().
2306 *
2307 * pxtcp::sock is deregistered from poll manager after this callback
2308 * is scheduled.
2309 */
2310static void
2311pxtcp_pcb_pull_inbound(void *ctx)
2312{
2313 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2314 LWIP_ASSERT1(pxtcp != NULL);
2315
2316 if (pxtcp->pcb == NULL) {
2317 DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
2318 pxtcp_pcb_reset_pxtcp(pxtcp);
2319 return;
2320 }
2321
2322 DPRINTF(("%s: pxtcp %p: pcb %p\n",
2323 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2324 pxtcp->inbound_pull = 1;
2325 pxtcp->deferred_delete = 1;
2326 pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
2327}
2328
2329
2330/**
2331 * tcp_err() callback.
2332 *
2333 * pcb is not passed to this callback since it may be already
2334 * deallocated by the stack, but we can't do anything useful with it
2335 * anyway since connection is gone.
2336 */
2337static void
2338pxtcp_pcb_err(void *arg, err_t error)
2339{
2340 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2341 LWIP_ASSERT1(pxtcp != NULL);
2342
2343 /*
2344 * ERR_CLSD is special - it is reported here when:
2345 *
2346 * . guest has already half-closed
2347 * . we send FIN to guest when external half-closes
2348 * . guest acks that FIN
2349 *
2350 * Since connection is closed but receive has been already closed
2351 * lwip can only report this via tcp_err. At this point the pcb
2352 * is still alive, so we can peek at it if need be.
2353 *
2354 * The interesting twist is when the ACK from guest that akcs our
2355 * FIN also acks some data. In this scenario lwip will NOT call
2356 * tcp_sent() callback with the ACK for that last bit of data but
2357 * instead will call tcp_err with ERR_CLSD right away. Since that
2358 * ACK also acknowledges all the data, we should run some of
2359 * pxtcp_pcb_sent() logic here.
2360 */
2361 if (error == ERR_CLSD) {
2362 struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
2363
2364 DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
2365 " pcb->acked %d;"
2366 " unacked %d, unsent %d, vacant %d\n",
2367 (void *)pxtcp, (void *)pcb,
2368 pcb->acked,
2369 (int)pxtcp->inbuf.unacked,
2370 (int)pxtcp->inbuf.unsent,
2371 (int)pxtcp->inbuf.vacant));
2372
2373 LWIP_ASSERT1(pxtcp->pcb == pcb);
2374 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2375
2376 if (pcb->acked > 0) {
2377 pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
2378 }
2379 return;
2380 }
2381
2382 DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
2383 (void *)pxtcp, proxy_lwip_strerr(error)));
2384
2385 pxtcp->pcb = NULL; /* pcb is gone */
2386 if (pxtcp->deferred_delete) {
2387 pxtcp_pcb_reset_pxtcp(pxtcp);
2388 }
2389 else {
2390 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2391 }
2392}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette