VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxtcp.c@ 63281

Last change on this file since 63281 was 63281, checked in by vboxsync, 8 years ago

NetworkServices: warnings

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 69.0 KB
Line 
1/* $Id: pxtcp.c 63281 2016-08-10 14:52:51Z vboxsync $ */
2/** @file
3 * NAT Network - TCP proxy.
4 */
5
6/*
7 * Copyright (C) 2013-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18#define LOG_GROUP LOG_GROUP_NAT_SERVICE
19
20#include "winutils.h"
21
22#include "pxtcp.h"
23
24#include "proxy.h"
25#include "proxy_pollmgr.h"
26#include "pxremap.h"
27#include "portfwd.h" /* fwspec */
28
29#ifndef RT_OS_WINDOWS
30#include <sys/types.h>
31#include <sys/socket.h>
32#include <sys/ioctl.h>
33#ifdef RT_OS_SOLARIS
34#include <sys/filio.h> /* FIONREAD is BSD'ism */
35#endif
36#include <stdlib.h>
37#include <stdint.h>
38#include <stdio.h>
39#include <string.h>
40#include <poll.h>
41
42#include <err.h> /* BSD'ism */
43#else
44#include <stdlib.h>
45#include <stdio.h>
46#include <string.h>
47
48#include <iprt/stdint.h>
49#include "winpoll.h"
50#endif
51
52#include "lwip/opt.h"
53
54#include "lwip/sys.h"
55#include "lwip/tcpip.h"
56#include "lwip/netif.h"
57#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
58#include "lwip/icmp.h"
59#include "lwip/icmp6.h"
60
61/*
62 * Different OSes have different quirks in reporting POLLHUP for TCP
63 * sockets.
64 *
65 * Using shutdown(2) "how" values here would be more readable, but
66 * since SHUT_RD is 0, we can't use 0 for "none", unfortunately.
67 */
68#if defined(RT_OS_NETBSD) || defined(RT_OS_SOLARIS)
69# define HAVE_TCP_POLLHUP 0 /* not reported */
70#elif defined(RT_OS_DARWIN) || defined(RT_OS_WINDOWS)
71# define HAVE_TCP_POLLHUP POLLIN /* reported when remote closes */
72#else
73# define HAVE_TCP_POLLHUP (POLLIN|POLLOUT) /* reported when both directions are closed */
74#endif
75
76
77/**
78 * Ring buffer for inbound data. Filled with data from the host
79 * socket on poll manager thread. Data consumed by scheduling
80 * tcp_write() to the pcb on the lwip thread.
81 *
82 * NB: There is actually third party present, the lwip stack itself.
83 * Thus the buffer doesn't have dual free vs. data split, but rather
84 * three-way free / send and unACKed data / unsent data split.
85 */
86struct ringbuf {
87 char *buf;
88 size_t bufsize;
89
90 /*
91 * Start of free space, producer writes here (up till "unacked").
92 */
93 volatile size_t vacant;
94
95 /*
96 * Start of sent but unacknowledged data. The data are "owned" by
97 * the stack as it may need to retransmit. This is the free space
98 * limit for producer.
99 */
100 volatile size_t unacked;
101
102 /*
103 * Start of unsent data, consumer reads/sends from here (up till
104 * "vacant"). Not declared volatile since it's only accessed from
105 * the consumer thread.
106 */
107 size_t unsent;
108};
109
110
111/**
112 */
113struct pxtcp {
114 /**
115 * Our poll manager handler. Must be first, strong/weak
116 * references depend on this "inheritance".
117 */
118 struct pollmgr_handler pmhdl;
119
120 /**
121 * lwIP (internal/guest) side of the proxied connection.
122 */
123 struct tcp_pcb *pcb;
124
125 /**
126 * Host (external) side of the proxied connection.
127 */
128 SOCKET sock;
129
130 /**
131 * Socket events we are currently polling for.
132 */
133 int events;
134
135 /**
136 * Socket error. Currently used to save connect(2) errors so that
137 * we can decide if we need to send ICMP error.
138 */
139 int sockerr;
140
141 /**
142 * Interface that we have got the SYN from. Needed to send ICMP
143 * with correct source address.
144 */
145 struct netif *netif;
146
147 /**
148 * For tentatively accepted connections for which we are in
149 * process of connecting to the real destination this is the
150 * initial pbuf that we might need to build ICMP error.
151 *
152 * When connection is established this is used to hold outbound
153 * pbuf chain received by pxtcp_pcb_recv() but not yet completely
154 * forwarded over the socket. We cannot "return" it to lwIP since
155 * the head of the chain is already sent and freed.
156 */
157 struct pbuf *unsent;
158
159 /**
160 * Guest has closed its side. Reported to pxtcp_pcb_recv() only
161 * once and we might not be able to forward it immediately if we
162 * have unsent pbuf.
163 */
164 int outbound_close;
165
166 /**
167 * Outbound half-close has been done on the socket.
168 */
169 int outbound_close_done;
170
171 /**
172 * External has closed its side. We might not be able to forward
173 * it immediately if we have unforwarded data.
174 */
175 int inbound_close;
176
177 /**
178 * Inbound half-close has been done on the pcb.
179 */
180 int inbound_close_done;
181
182 /**
183 * On systems that report POLLHUP as soon as the final FIN is
184 * received on a socket we cannot continue polling for the rest of
185 * input, so we have to read (pull) last data from the socket on
186 * the lwIP thread instead of polling/pushing it from the poll
187 * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
188 */
189 int inbound_pull;
190
191
192 /**
193 * When poll manager schedules delete we may not be able to delete
194 * a pxtcp immediately if not all inbound data has been acked by
195 * the guest: lwIP may need to resend and the data are in pxtcp's
196 * inbuf::buf. We defer delete until all data are acked to
197 * pxtcp_pcb_sent().
198 */
199 int deferred_delete;
200
201 /**
202 * Ring-buffer for inbound data.
203 */
204 struct ringbuf inbuf;
205
206 /**
207 * lwIP thread's strong reference to us.
208 */
209 struct pollmgr_refptr *rp;
210
211
212 /*
213 * We use static messages to call functions on the lwIP thread to
214 * void malloc/free overhead.
215 */
216 struct tcpip_msg msg_delete; /* delete pxtcp */
217 struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
218 struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
219 struct tcpip_msg msg_outbound; /* trigger send of outbound data */
220 struct tcpip_msg msg_inbound; /* trigger send of inbound data */
221 struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
222};
223
224
225
226static struct pxtcp *pxtcp_allocate(void);
227static void pxtcp_free(struct pxtcp *);
228
229static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
230static void pxtcp_pcb_dissociate(struct pxtcp *);
231
232/* poll manager callbacks for pxtcp related channels */
233static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
234static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
235static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
236#if !(HAVE_TCP_POLLHUP & POLLOUT)
237static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
238#endif
239static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
240
241/* helper functions for sending/receiving pxtcp over poll manager channels */
242static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
243static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
244static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
245static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
246
247/* poll manager callbacks for individual sockets */
248static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
249static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
250
251/* get incoming traffic into ring buffer */
252static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
253static ssize_t pxtcp_sock_recv(struct pxtcp *, IOVEC *, size_t); /* default */
254
255/* convenience functions for poll manager callbacks */
256static int pxtcp_schedule_delete(struct pxtcp *);
257static int pxtcp_schedule_reset(struct pxtcp *);
258static int pxtcp_schedule_reject(struct pxtcp *);
259
260/* lwip thread callbacks called via proxy_lwip_post() */
261static void pxtcp_pcb_delete_pxtcp(void *);
262static void pxtcp_pcb_reset_pxtcp(void *);
263static void pxtcp_pcb_accept_refuse(void *);
264static void pxtcp_pcb_accept_confirm(void *);
265static void pxtcp_pcb_write_outbound(void *);
266static void pxtcp_pcb_write_inbound(void *);
267static void pxtcp_pcb_pull_inbound(void *);
268
269/* tcp pcb callbacks */
270static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, struct pbuf *); /* global */
271static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
272static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
273static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
274static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
275static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
276static void pxtcp_pcb_err(void *, err_t);
277
278static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
279static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
280
281static ssize_t pxtcp_sock_send(struct pxtcp *, IOVEC *, size_t);
282
283static void pxtcp_pcb_forward_inbound(struct pxtcp *);
284static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
285DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
286static void pxtcp_pcb_schedule_poll(struct pxtcp *);
287static void pxtcp_pcb_cancel_poll(struct pxtcp *);
288
289static void pxtcp_pcb_reject(struct tcp_pcb *, int, struct netif *, struct pbuf *);
290DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
291
292/* poll manager handlers for pxtcp channels */
293static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
294static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
295static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
296#if !(HAVE_TCP_POLLHUP & POLLOUT)
297static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
298#endif
299static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
300
301
302/**
303 * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
304 * manager threads haven't been created yet.
305 */
306void
307pxtcp_init(void)
308{
309 /*
310 * Create channels.
311 */
312#define CHANNEL(SLOT, NAME) do { \
313 NAME##_hdl.callback = NAME; \
314 NAME##_hdl.data = NULL; \
315 NAME##_hdl.slot = -1; \
316 pollmgr_add_chan(SLOT, &NAME##_hdl); \
317 } while (0)
318
319 CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
320 CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
321 CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
322#if !(HAVE_TCP_POLLHUP & POLLOUT)
323 CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
324#endif
325 CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
326
327#undef CHANNEL
328
329 /*
330 * Listen to outgoing connection from guest(s).
331 */
332 tcp_proxy_accept(pxtcp_pcb_heard);
333}
334
335
336/**
337 * Syntactic sugar for sending pxtcp pointer over poll manager
338 * channel. Used by lwip thread functions.
339 */
340static ssize_t
341pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
342{
343 return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
344}
345
346
347/**
348 * Syntactic sugar for sending weak reference to pxtcp over poll
349 * manager channel. Used by lwip thread functions.
350 */
351static ssize_t
352pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
353{
354 pollmgr_refptr_weak_ref(pxtcp->rp);
355 return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
356}
357
358
359/**
360 * Counterpart of pxtcp_chan_send().
361 */
362static struct pxtcp *
363pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
364{
365 struct pxtcp *pxtcp;
366
367 pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
368 return pxtcp;
369}
370
371
372/**
373 * Counterpart of pxtcp_chan_send_weak().
374 */
375static struct pxtcp *
376pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
377{
378 struct pollmgr_refptr *rp;
379 struct pollmgr_handler *base;
380 struct pxtcp *pxtcp;
381
382 rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
383 base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
384 pxtcp = (struct pxtcp *)base;
385
386 return pxtcp;
387}
388
389
390/**
391 * Register pxtcp with poll manager.
392 *
393 * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
394 * error handling is different in these two cases, we leave it up to
395 * the caller.
396 */
397int
398pxtcp_pmgr_add(struct pxtcp *pxtcp)
399{
400 int status;
401
402 LWIP_ASSERT1(pxtcp != NULL);
403 LWIP_ASSERT1(pxtcp->sock >= 0);
404 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
405 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
406 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
407
408 status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
409 return status;
410}
411
412
413/**
414 * Unregister pxtcp with poll manager.
415 *
416 * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
417 * leg).
418 */
419void
420pxtcp_pmgr_del(struct pxtcp *pxtcp)
421{
422 LWIP_ASSERT1(pxtcp != NULL);
423
424 pollmgr_del_slot(pxtcp->pmhdl.slot);
425}
426
427
428/**
429 * POLLMGR_CHAN_PXTCP_ADD handler.
430 *
431 * Get new pxtcp from lwip thread and start polling its socket.
432 */
433static int
434pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
435{
436 struct pxtcp *pxtcp;
437 int status;
438
439 pxtcp = pxtcp_chan_recv(handler, fd, revents);
440 DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
441 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
442
443 status = pxtcp_pmgr_add(pxtcp);
444 if (status < 0) {
445 (void) pxtcp_schedule_reset(pxtcp);
446 }
447
448 return POLLIN;
449}
450
451
452/**
453 * POLLMGR_CHAN_PXTCP_POLLOUT handler.
454 *
455 * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
456 * and failed, it now requests us to poll the socket for POLLOUT and
457 * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
458 */
459static int
460pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
461{
462 struct pxtcp *pxtcp;
463
464 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
465 DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
466
467 if (pxtcp == NULL) {
468 return POLLIN;
469 }
470
471 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
472 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
473
474 pxtcp->events |= POLLOUT;
475 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
476
477 return POLLIN;
478}
479
480
481/**
482 * POLLMGR_CHAN_PXTCP_POLLIN handler.
483 */
484static int
485pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
486{
487 struct pxtcp *pxtcp;
488
489 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
490 DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
491
492 if (pxtcp == NULL) {
493 return POLLIN;
494 }
495
496 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
497 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
498
499 if (pxtcp->inbound_close) {
500 return POLLIN;
501 }
502
503 pxtcp->events |= POLLIN;
504 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
505
506 return POLLIN;
507}
508
509
510#if !(HAVE_TCP_POLLHUP & POLLOUT)
511/**
512 * POLLMGR_CHAN_PXTCP_DEL handler.
513 *
514 * Schedule pxtcp deletion. We only need this if host system doesn't
515 * report POLLHUP for fully closed tcp sockets.
516 */
517static int
518pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
519{
520 struct pxtcp *pxtcp;
521
522 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
523 if (pxtcp == NULL) {
524 return POLLIN;
525 }
526
527 DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
528 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
529
530 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
531 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
532
533 LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
534 LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
535
536 pxtcp_pmgr_del(pxtcp);
537 (void) pxtcp_schedule_delete(pxtcp);
538
539 return POLLIN;
540}
541#endif /* !(HAVE_TCP_POLLHUP & POLLOUT) */
542
543
544/**
545 * POLLMGR_CHAN_PXTCP_RESET handler.
546 *
547 * Close the socket with RST and delete pxtcp.
548 */
549static int
550pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
551{
552 struct pxtcp *pxtcp;
553
554 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
555 if (pxtcp == NULL) {
556 return POLLIN;
557 }
558
559 DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
560 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
561
562 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
563 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
564
565 pxtcp_pmgr_del(pxtcp);
566
567 proxy_reset_socket(pxtcp->sock);
568 pxtcp->sock = INVALID_SOCKET;
569
570 (void) pxtcp_schedule_reset(pxtcp);
571
572 return POLLIN;
573}
574
575
576static struct pxtcp *
577pxtcp_allocate(void)
578{
579 struct pxtcp *pxtcp;
580
581 pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
582 if (pxtcp == NULL) {
583 return NULL;
584 }
585
586 pxtcp->pmhdl.callback = NULL;
587 pxtcp->pmhdl.data = (void *)pxtcp;
588 pxtcp->pmhdl.slot = -1;
589
590 pxtcp->pcb = NULL;
591 pxtcp->sock = INVALID_SOCKET;
592 pxtcp->events = 0;
593 pxtcp->sockerr = 0;
594 pxtcp->netif = NULL;
595 pxtcp->unsent = NULL;
596 pxtcp->outbound_close = 0;
597 pxtcp->outbound_close_done = 0;
598 pxtcp->inbound_close = 0;
599 pxtcp->inbound_close_done = 0;
600 pxtcp->inbound_pull = 0;
601 pxtcp->deferred_delete = 0;
602
603 pxtcp->inbuf.bufsize = 64 * 1024;
604 pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
605 if (pxtcp->inbuf.buf == NULL) {
606 free(pxtcp);
607 return NULL;
608 }
609 pxtcp->inbuf.vacant = 0;
610 pxtcp->inbuf.unacked = 0;
611 pxtcp->inbuf.unsent = 0;
612
613 pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
614 if (pxtcp->rp == NULL) {
615 free(pxtcp->inbuf.buf);
616 free(pxtcp);
617 return NULL;
618 }
619
620#define CALLBACK_MSG(MSG, FUNC) \
621 do { \
622 pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
623 pxtcp->MSG.sem = NULL; \
624 pxtcp->MSG.msg.cb.function = FUNC; \
625 pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
626 } while (0)
627
628 CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
629 CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
630 CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
631 CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
632 CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
633 CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
634
635#undef CALLBACK_MSG
636
637 return pxtcp;
638}
639
640
641/**
642 * Exported to fwtcp to create pxtcp for incoming port-forwarded
643 * connections. Completed with pcb in pxtcp_pcb_connect().
644 */
645struct pxtcp *
646pxtcp_create_forwarded(SOCKET sock)
647{
648 struct pxtcp *pxtcp;
649
650 pxtcp = pxtcp_allocate();
651 if (pxtcp == NULL) {
652 return NULL;
653 }
654
655 pxtcp->sock = sock;
656 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
657 pxtcp->events = 0;
658
659 return pxtcp;
660}
661
662
663static void
664pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
665{
666 LWIP_ASSERT1(pxtcp != NULL);
667 LWIP_ASSERT1(pcb != NULL);
668
669 pxtcp->pcb = pcb;
670
671 tcp_arg(pcb, pxtcp);
672
673 tcp_recv(pcb, pxtcp_pcb_recv);
674 tcp_sent(pcb, pxtcp_pcb_sent);
675 tcp_poll(pcb, NULL, 255);
676 tcp_err(pcb, pxtcp_pcb_err);
677}
678
679
680static void
681pxtcp_free(struct pxtcp *pxtcp)
682{
683 if (pxtcp->unsent != NULL) {
684 pbuf_free(pxtcp->unsent);
685 }
686 if (pxtcp->inbuf.buf != NULL) {
687 free(pxtcp->inbuf.buf);
688 }
689 free(pxtcp);
690}
691
692
693/**
694 * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
695 * fwtcp failed to register with poll manager to post to lwip thread
696 * for doing connect.
697 */
698void
699pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
700{
701 LWIP_ASSERT1(pxtcp->pcb == NULL);
702 pxtcp_pcb_reset_pxtcp(pxtcp);
703}
704
705
706static void
707pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
708{
709 if (pxtcp == NULL || pxtcp->pcb == NULL) {
710 return;
711 }
712
713 DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
714 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
715
716 /*
717 * We must have dissociated from a fully closed pcb immediately
718 * since lwip recycles them and we don't wan't to mess with what
719 * would be someone else's pcb that we happen to have a stale
720 * pointer to.
721 */
722 LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
723
724 tcp_recv(pxtcp->pcb, NULL);
725 tcp_sent(pxtcp->pcb, NULL);
726 tcp_poll(pxtcp->pcb, NULL, 255);
727 tcp_err(pxtcp->pcb, NULL);
728 tcp_arg(pxtcp->pcb, NULL);
729 pxtcp->pcb = NULL;
730}
731
732
733/**
734 * Lwip thread callback invoked via pxtcp::msg_delete
735 *
736 * Since we use static messages to communicate to the lwip thread, we
737 * cannot delete pxtcp without making sure there are no unprocessed
738 * messages in the lwip thread mailbox.
739 *
740 * The easiest way to ensure that is to send this "delete" message as
741 * the last one and when it's processed we know there are no more and
742 * it's safe to delete pxtcp.
743 *
744 * Poll manager handlers should use pxtcp_schedule_delete()
745 * convenience function.
746 */
747static void
748pxtcp_pcb_delete_pxtcp(void *ctx)
749{
750 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
751
752 DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
753 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
754 (pxtcp->deferred_delete && !pxtcp->inbound_pull
755 ? " (was deferred)" : "")));
756
757 LWIP_ASSERT1(pxtcp != NULL);
758 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
759 LWIP_ASSERT1(pxtcp->outbound_close_done);
760 LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
761
762
763 /*
764 * pxtcp is no longer registered with poll manager, so it's safe
765 * to close the socket.
766 */
767 if (pxtcp->sock != INVALID_SOCKET) {
768 closesocket(pxtcp->sock);
769 pxtcp->sock = INVALID_SOCKET;
770 }
771
772 /*
773 * We might have already dissociated from a fully closed pcb, or
774 * guest might have sent us a reset while msg_delete was in
775 * transit. If there's no pcb, we are done.
776 */
777 if (pxtcp->pcb == NULL) {
778 pollmgr_refptr_unref(pxtcp->rp);
779 pxtcp_free(pxtcp);
780 return;
781 }
782
783 /*
784 * Have we completely forwarded all inbound traffic to the guest?
785 *
786 * We may still be waiting for ACKs. We may have failed to send
787 * some of the data (tcp_write() failed with ERR_MEM). We may
788 * have failed to send the FIN (tcp_shutdown() failed with
789 * ERR_MEM).
790 */
791 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
792 pxtcp_pcb_dissociate(pxtcp);
793 pollmgr_refptr_unref(pxtcp->rp);
794 pxtcp_free(pxtcp);
795 }
796 else {
797 DPRINTF2(("delete: pxtcp %p; pcb %p:"
798 " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
799 (void *)pxtcp, (void *)pxtcp->pcb,
800 (int)pxtcp->inbuf.unacked,
801 (int)pxtcp->inbuf.unsent,
802 (int)pxtcp->inbuf.vacant,
803 pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
804
805 LWIP_ASSERT1(!pxtcp->deferred_delete);
806 pxtcp->deferred_delete = 1;
807 }
808}
809
810
811/**
812 * If we couldn't delete pxtcp right away in the msg_delete callback
813 * from the poll manager thread, we repeat the check at the end of
814 * relevant pcb callbacks.
815 */
816DECLINLINE(void)
817pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
818{
819 if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
820 pxtcp_pcb_delete_pxtcp(pxtcp);
821 }
822}
823
824
825/**
826 * Poll manager callbacks should use this convenience wrapper to
827 * schedule pxtcp deletion on the lwip thread and to deregister from
828 * the poll manager.
829 */
830static int
831pxtcp_schedule_delete(struct pxtcp *pxtcp)
832{
833 /*
834 * If pollmgr_refptr_get() is called by any channel before
835 * scheduled deletion happens, let them know we are gone.
836 */
837 pxtcp->pmhdl.slot = -1;
838
839 /*
840 * Schedule deletion. Since poll manager thread may be pre-empted
841 * right after we send the message, the deletion may actually
842 * happen on the lwip thread before we return from this function,
843 * so it's not safe to refer to pxtcp after this call.
844 */
845 proxy_lwip_post(&pxtcp->msg_delete);
846
847 /* tell poll manager to deregister us */
848 return -1;
849}
850
851
852/**
853 * Lwip thread callback invoked via pxtcp::msg_reset
854 *
855 * Like pxtcp_pcb_delete(), but sends RST to the guest before
856 * deleting this pxtcp.
857 */
858static void
859pxtcp_pcb_reset_pxtcp(void *ctx)
860{
861 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
862 LWIP_ASSERT1(pxtcp != NULL);
863
864 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
865 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
866
867 if (pxtcp->sock != INVALID_SOCKET) {
868 proxy_reset_socket(pxtcp->sock);
869 pxtcp->sock = INVALID_SOCKET;
870 }
871
872 if (pxtcp->pcb != NULL) {
873 struct tcp_pcb *pcb = pxtcp->pcb;
874 pxtcp_pcb_dissociate(pxtcp);
875 tcp_abort(pcb);
876 }
877
878 pollmgr_refptr_unref(pxtcp->rp);
879 pxtcp_free(pxtcp);
880}
881
882
883
884/**
885 * Poll manager callbacks should use this convenience wrapper to
886 * schedule pxtcp reset and deletion on the lwip thread and to
887 * deregister from the poll manager.
888 *
889 * See pxtcp_schedule_delete() for additional comments.
890 */
891static int
892pxtcp_schedule_reset(struct pxtcp *pxtcp)
893{
894 pxtcp->pmhdl.slot = -1;
895 proxy_lwip_post(&pxtcp->msg_reset);
896 return -1;
897}
898
899
900/**
901 * Reject proxy connection attempt. Depending on the cause (sockerr)
902 * we may just drop the pcb silently, generate an ICMP datagram or
903 * send TCP reset.
904 */
905static void
906pxtcp_pcb_reject(struct tcp_pcb *pcb, int sockerr,
907 struct netif *netif, struct pbuf *p)
908{
909 int reset = 0;
910
911 if (sockerr == ECONNREFUSED) {
912 reset = 1;
913 }
914 else if (p != NULL) {
915 struct netif *oif;
916
917 LWIP_ASSERT1(netif != NULL);
918
919 oif = ip_current_netif();
920 ip_current_netif() = netif;
921
922 if (PCB_ISIPV6(pcb)) {
923 if (sockerr == EHOSTDOWN) {
924 icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
925 }
926 else if (sockerr == EHOSTUNREACH
927 || sockerr == ENETDOWN
928 || sockerr == ENETUNREACH)
929 {
930 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
931 }
932 }
933 else {
934 if (sockerr == EHOSTDOWN
935 || sockerr == EHOSTUNREACH
936 || sockerr == ENETDOWN
937 || sockerr == ENETUNREACH)
938 {
939 icmp_dest_unreach(p, ICMP_DUR_HOST);
940 }
941 }
942
943 ip_current_netif() = oif;
944 }
945
946 tcp_abandon(pcb, reset);
947}
948
949
950/**
951 * Called from poll manager thread via pxtcp::msg_accept when proxy
952 * failed to connect to the destination. Also called when we failed
953 * to register pxtcp with poll manager.
954 *
955 * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
956 * how this unestablished connection is terminated.
957 */
958static void
959pxtcp_pcb_accept_refuse(void *ctx)
960{
961 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
962
963 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: %R[sockerr]\n",
964 __func__, (void *)pxtcp, (void *)pxtcp->pcb,
965 pxtcp->sock, pxtcp->sockerr));
966
967 LWIP_ASSERT1(pxtcp != NULL);
968 LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
969
970 if (pxtcp->pcb != NULL) {
971 struct tcp_pcb *pcb = pxtcp->pcb;
972 pxtcp_pcb_dissociate(pxtcp);
973 pxtcp_pcb_reject(pcb, pxtcp->sockerr, pxtcp->netif, pxtcp->unsent);
974 }
975
976 pollmgr_refptr_unref(pxtcp->rp);
977 pxtcp_free(pxtcp);
978}
979
980
981/**
982 * Convenience wrapper for poll manager connect callback to reject
983 * connection attempt.
984 *
985 * Like pxtcp_schedule_reset(), but the callback is more discriminate
986 * in how this unestablished connection is terminated.
987 */
988static int
989pxtcp_schedule_reject(struct pxtcp *pxtcp)
990{
991 pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
992 pxtcp->pmhdl.slot = -1;
993 proxy_lwip_post(&pxtcp->msg_accept);
994 return -1;
995}
996
997
998/**
999 * Global tcp_proxy_accept() callback for proxied outgoing TCP
1000 * connections from guest(s).
1001 */
1002static err_t
1003pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, struct pbuf *syn)
1004{
1005 LWIP_UNUSED_ARG(arg);
1006
1007 return pxtcp_pcb_accept_outbound(newpcb, syn,
1008 PCB_ISIPV6(newpcb), &newpcb->local_ip, newpcb->local_port);
1009}
1010
1011
1012err_t
1013pxtcp_pcb_accept_outbound(struct tcp_pcb *newpcb, struct pbuf *p,
1014 int is_ipv6, ipX_addr_t *dst_addr, u16_t dst_port)
1015{
1016 struct pxtcp *pxtcp;
1017 ipX_addr_t mapped_dst_addr;
1018 int sdom;
1019 SOCKET sock;
1020 ssize_t nsent;
1021 int sockerr = 0;
1022
1023 /*
1024 * TCP first calls accept callback when it receives the first SYN
1025 * and "tentatively accepts" new proxied connection attempt. When
1026 * proxy "confirms" the SYN and sends SYN|ACK and the guest
1027 * replies with ACK the accept callback is called again, this time
1028 * with the established connection.
1029 */
1030 LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
1031 tcp_accept(newpcb, pxtcp_pcb_accept);
1032 tcp_arg(newpcb, NULL);
1033
1034 tcp_setprio(newpcb, TCP_PRIO_MAX);
1035
1036 pxremap_outbound_ipX(is_ipv6, &mapped_dst_addr, dst_addr);
1037
1038 sdom = is_ipv6 ? PF_INET6 : PF_INET;
1039 sock = proxy_connected_socket(sdom, SOCK_STREAM,
1040 &mapped_dst_addr, dst_port);
1041 if (sock == INVALID_SOCKET) {
1042 sockerr = SOCKERRNO();
1043 goto abort;
1044 }
1045
1046 pxtcp = pxtcp_allocate();
1047 if (pxtcp == NULL) {
1048 proxy_reset_socket(sock);
1049 goto abort;
1050 }
1051
1052 /* save initial datagram in case we need to reply with ICMP */
1053 if (p != NULL) {
1054 pbuf_ref(p);
1055 pxtcp->unsent = p;
1056 pxtcp->netif = ip_current_netif();
1057 }
1058
1059 pxtcp_pcb_associate(pxtcp, newpcb);
1060 pxtcp->sock = sock;
1061
1062 pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
1063 pxtcp->events = POLLOUT;
1064
1065 nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
1066 if (nsent < 0) {
1067 pxtcp->sock = INVALID_SOCKET;
1068 proxy_reset_socket(sock);
1069 pxtcp_pcb_accept_refuse(pxtcp);
1070 return ERR_ABRT;
1071 }
1072
1073 return ERR_OK;
1074
1075 abort:
1076 DPRINTF0(("%s: pcb %p, sock %d: %R[sockerr]\n",
1077 __func__, (void *)newpcb, sock, sockerr));
1078 pxtcp_pcb_reject(newpcb, sockerr, ip_current_netif(), p);
1079 return ERR_ABRT;
1080}
1081
1082
1083/**
1084 * tcp_proxy_accept() callback for accepted proxied outgoing TCP
1085 * connections from guest(s). This is "real" accept with three-way
1086 * handshake completed.
1087 */
1088static err_t
1089pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
1090{
1091 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1092
1093 LWIP_UNUSED_ARG(pcb); /* used only in asserts */
1094 LWIP_UNUSED_ARG(error); /* always ERR_OK */
1095
1096 LWIP_ASSERT1(pxtcp != NULL);
1097 LWIP_ASSERT1(pxtcp->pcb = pcb);
1098 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1099
1100 /* send any inbound data that are already queued */
1101 pxtcp_pcb_forward_inbound(pxtcp);
1102 return ERR_OK;
1103}
1104
1105
1106/**
1107 * Initial poll manager callback for proxied outgoing TCP connections.
1108 * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
1109 *
1110 * Waits for connect(2) to the destination to complete. On success
1111 * replaces itself with pxtcp_pmgr_pump() callback common to all
1112 * established TCP connections.
1113 */
1114static int
1115pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
1116{
1117 struct pxtcp *pxtcp;
1118 RT_NOREF(fd);
1119
1120 pxtcp = (struct pxtcp *)handler->data;
1121 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1122 LWIP_ASSERT1(fd == pxtcp->sock);
1123 LWIP_ASSERT1(pxtcp->sockerr == 0);
1124
1125 if (revents & POLLNVAL) {
1126 pxtcp->sock = INVALID_SOCKET;
1127 pxtcp->sockerr = ETIMEDOUT;
1128 return pxtcp_schedule_reject(pxtcp);
1129 }
1130
1131 /*
1132 * Solaris and NetBSD don't report either POLLERR or POLLHUP when
1133 * connect(2) fails, just POLLOUT. In that case we always need to
1134 * check SO_ERROR.
1135 */
1136#if defined(RT_OS_SOLARIS) || defined(RT_OS_NETBSD)
1137# define CONNECT_CHECK_ERROR POLLOUT
1138#else
1139# define CONNECT_CHECK_ERROR (POLLERR | POLLHUP)
1140#endif
1141
1142 /*
1143 * Check the cause of the failure so that pxtcp_pcb_reject() may
1144 * behave accordingly.
1145 */
1146 if (revents & CONNECT_CHECK_ERROR) {
1147 socklen_t optlen = (socklen_t)sizeof(pxtcp->sockerr);
1148 int status;
1149 SOCKET s;
1150
1151 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1152 (char *)&pxtcp->sockerr, &optlen);
1153 if (RT_UNLIKELY(status == SOCKET_ERROR)) { /* should not happen */
1154 DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
1155 __func__, fd, SOCKERRNO()));
1156 pxtcp->sockerr = ETIMEDOUT;
1157 }
1158 else {
1159 /* don't spam this log on successful connect(2) */
1160 if ((revents & (POLLERR | POLLHUP)) /* we were told it's failed */
1161 || pxtcp->sockerr != 0) /* we determined it's failed */
1162 {
1163 DPRINTF(("%s: sock %d: connect: %R[sockerr]\n",
1164 __func__, fd, pxtcp->sockerr));
1165 }
1166
1167 if ((revents & (POLLERR | POLLHUP))
1168 && RT_UNLIKELY(pxtcp->sockerr == 0))
1169 {
1170 /* if we're told it's failed, make sure it's marked as such */
1171 pxtcp->sockerr = ETIMEDOUT;
1172 }
1173 }
1174
1175 if (pxtcp->sockerr != 0) {
1176 s = pxtcp->sock;
1177 pxtcp->sock = INVALID_SOCKET;
1178 closesocket(s);
1179 return pxtcp_schedule_reject(pxtcp);
1180 }
1181 }
1182
1183 if (revents & POLLOUT) { /* connect is successful */
1184 /* confirm accept to the guest */
1185 proxy_lwip_post(&pxtcp->msg_accept);
1186
1187 /*
1188 * Switch to common callback used for all established proxied
1189 * connections.
1190 */
1191 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
1192
1193 /*
1194 * Initially we poll for incoming traffic only. Outgoing
1195 * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
1196 * it will ask us to poll for POLLOUT too.
1197 */
1198 pxtcp->events = POLLIN;
1199 return pxtcp->events;
1200 }
1201
1202 /* should never get here */
1203 DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
1204 __func__, (void *)pxtcp, fd, revents));
1205 return pxtcp_schedule_reset(pxtcp);
1206}
1207
1208
1209/**
1210 * Called from poll manager thread via pxtcp::msg_accept when proxy
1211 * connected to the destination. Finalize accept by sending SYN|ACK
1212 * to the guest.
1213 */
1214static void
1215pxtcp_pcb_accept_confirm(void *ctx)
1216{
1217 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1218 err_t error;
1219
1220 LWIP_ASSERT1(pxtcp != NULL);
1221 if (pxtcp->pcb == NULL) {
1222 return;
1223 }
1224
1225 /* we are not going to reply with ICMP, so we can drop initial pbuf */
1226 if (pxtcp->unsent != NULL) {
1227 pbuf_free(pxtcp->unsent);
1228 pxtcp->unsent = NULL;
1229 }
1230
1231 error = tcp_proxy_accept_confirm(pxtcp->pcb);
1232
1233 /*
1234 * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
1235 * abandons the pcb. Retrying that is not very easy, since it
1236 * would require keeping "fractional state". From guest's point
1237 * of view there is no reply to its SYN so it will either resend
1238 * the SYN (effetively triggering full connection retry for us),
1239 * or it will eventually time out.
1240 */
1241 if (error == ERR_ABRT) {
1242 pxtcp->pcb = NULL; /* pcb is gone */
1243 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1244 }
1245
1246 /*
1247 * else if (error != ERR_OK): even if tcp_output() failed with
1248 * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
1249 * retransmitted eventually.
1250 */
1251}
1252
1253
1254/**
1255 * Entry point for port-forwarding.
1256 *
1257 * fwtcp accepts new incoming connection, creates pxtcp for the socket
1258 * (with no pcb yet) and adds it to the poll manager (polling for
1259 * errors only). Then it calls this function to construct the pcb and
1260 * perform connection to the guest.
1261 */
1262void
1263pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
1264{
1265 struct sockaddr_storage ss;
1266 socklen_t sslen;
1267 struct tcp_pcb *pcb;
1268 ipX_addr_t src_addr, dst_addr;
1269 u16_t src_port, dst_port;
1270 int status;
1271 err_t error;
1272
1273 LWIP_ASSERT1(pxtcp != NULL);
1274 LWIP_ASSERT1(pxtcp->pcb == NULL);
1275 LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
1276
1277 pcb = tcp_new();
1278 if (pcb == NULL) {
1279 goto reset;
1280 }
1281
1282 tcp_setprio(pcb, TCP_PRIO_MAX);
1283 pxtcp_pcb_associate(pxtcp, pcb);
1284
1285 sslen = sizeof(ss);
1286 status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
1287 if (status == SOCKET_ERROR) {
1288 goto reset;
1289 }
1290
1291 /* nit: compares PF and AF, but they are the same everywhere */
1292 LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
1293
1294 status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
1295 if (status == PXREMAP_FAILED) {
1296 goto reset;
1297 }
1298
1299 if (ss.ss_family == PF_INET) {
1300 const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
1301
1302 src_port = peer4->sin_port;
1303
1304 memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
1305 dst_port = fwspec->dst.sin.sin_port;
1306 }
1307 else { /* PF_INET6 */
1308 const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
1309 ip_set_v6(pcb, 1);
1310
1311 src_port = peer6->sin6_port;
1312
1313 memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
1314 dst_port = fwspec->dst.sin6.sin6_port;
1315 }
1316
1317 /* lwip port arguments are in host order */
1318 src_port = ntohs(src_port);
1319 dst_port = ntohs(dst_port);
1320
1321 error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
1322 if (error != ERR_OK) {
1323 goto reset;
1324 }
1325
1326 error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
1327 /* callback: */ pxtcp_pcb_connected);
1328 if (error != ERR_OK) {
1329 goto reset;
1330 }
1331
1332 return;
1333
1334 reset:
1335 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1336}
1337
1338
1339/**
1340 * Port-forwarded connection to guest is successful, pump data.
1341 */
1342static err_t
1343pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
1344{
1345 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1346
1347 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1348 LWIP_UNUSED_ARG(error);
1349
1350 LWIP_ASSERT1(pxtcp != NULL);
1351 LWIP_ASSERT1(pxtcp->pcb == pcb);
1352 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1353 LWIP_UNUSED_ARG(pcb);
1354
1355 DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
1356 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
1357
1358 /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
1359 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
1360
1361 return ERR_OK;
1362}
1363
1364
1365/**
1366 * tcp_recv() callback.
1367 */
1368static err_t
1369pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
1370{
1371 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1372
1373 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1374 LWIP_UNUSED_ARG(error);
1375
1376 LWIP_ASSERT1(pxtcp != NULL);
1377 LWIP_ASSERT1(pxtcp->pcb == pcb);
1378 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1379 LWIP_UNUSED_ARG(pcb);
1380
1381
1382 /*
1383 * Have we done sending previous batch?
1384 */
1385 if (pxtcp->unsent != NULL) {
1386 if (p != NULL) {
1387 /*
1388 * Return an error to tell TCP to hold onto that pbuf.
1389 * It will be presented to us later from tcp_fasttmr().
1390 */
1391 return ERR_WOULDBLOCK;
1392 }
1393 else {
1394 /*
1395 * Unlike data, p == NULL indicating orderly shutdown is
1396 * NOT presented to us again
1397 */
1398 pxtcp->outbound_close = 1;
1399 return ERR_OK;
1400 }
1401 }
1402
1403
1404 /*
1405 * Guest closed?
1406 */
1407 if (p == NULL) {
1408 pxtcp->outbound_close = 1;
1409 pxtcp_pcb_forward_outbound_close(pxtcp);
1410 return ERR_OK;
1411 }
1412
1413
1414 /*
1415 * Got data, send what we can without blocking.
1416 */
1417 return pxtcp_pcb_forward_outbound(pxtcp, p);
1418}
1419
1420
1421/**
1422 * Guest half-closed its TX side of the connection.
1423 *
1424 * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
1425 * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
1426 * previously unsent data and sees pxtcp::outbound_close flag saved by
1427 * pxtcp_pcb_recv().
1428 */
1429static void
1430pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
1431{
1432 struct tcp_pcb *pcb;
1433
1434 LWIP_ASSERT1(pxtcp != NULL);
1435 LWIP_ASSERT1(pxtcp->outbound_close);
1436 LWIP_ASSERT1(!pxtcp->outbound_close_done);
1437
1438 pcb = pxtcp->pcb;
1439 LWIP_ASSERT1(pcb != NULL);
1440
1441 DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
1442 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1443
1444
1445 /* set the flag first, since shutdown() may trigger POLLHUP */
1446 pxtcp->outbound_close_done = 1;
1447 shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
1448
1449#if !(HAVE_TCP_POLLHUP & POLLOUT)
1450 /*
1451 * We need to nudge poll manager manually, since OS will not
1452 * report POLLHUP.
1453 */
1454 if (pxtcp->inbound_close) {
1455 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
1456 }
1457#endif
1458
1459
1460 /* no more outbound data coming to us */
1461 tcp_recv(pcb, NULL);
1462
1463 /*
1464 * If we have already done inbound close previously (active close
1465 * on the pcb), then we must not hold onto a pcb in TIME_WAIT
1466 * state since those will be recycled by lwip when it runs out of
1467 * free pcbs in the pool.
1468 *
1469 * The test is true also for a pcb in CLOSING state that waits
1470 * just for the ACK of its FIN (to transition to TIME_WAIT).
1471 */
1472 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
1473 pxtcp_pcb_dissociate(pxtcp);
1474 }
1475}
1476
1477
1478/**
1479 * Forward outbound data from pcb to socket.
1480 *
1481 * Called by pxtcp_pcb_recv() to forward new data and by callout
1482 * triggered by POLLOUT on the socket to send previously unsent data.
1483 *
1484 * (Re)scehdules one-time callout if not all data are sent.
1485 */
1486static err_t
1487pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
1488{
1489 struct pbuf *qs, *q;
1490 size_t qoff;
1491 size_t forwarded;
1492 int sockerr;
1493
1494 LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
1495
1496 forwarded = 0;
1497 sockerr = 0;
1498
1499 q = NULL;
1500 qoff = 0;
1501
1502 qs = p;
1503 while (qs != NULL) {
1504 IOVEC iov[8];
1505 const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
1506 size_t fwd1;
1507 ssize_t nsent;
1508 size_t i;
1509
1510 fwd1 = 0;
1511 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1512 LWIP_ASSERT1(q->len > 0);
1513 IOVEC_SET_BASE(iov[i], q->payload);
1514 IOVEC_SET_LEN(iov[i], q->len);
1515 fwd1 += q->len;
1516 }
1517
1518 /*
1519 * TODO: This is where application-level proxy can hook into
1520 * to process outbound traffic.
1521 */
1522 nsent = pxtcp_sock_send(pxtcp, iov, i);
1523
1524 if (nsent == (ssize_t)fwd1) {
1525 /* successfully sent this chain fragment completely */
1526 forwarded += nsent;
1527 qs = q;
1528 }
1529 else if (nsent >= 0) {
1530 /* successfully sent only some data */
1531 forwarded += nsent;
1532
1533 /* find the first pbuf that was not completely forwarded */
1534 qoff = nsent;
1535 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1536 if (qoff < q->len) {
1537 break;
1538 }
1539 qoff -= q->len;
1540 }
1541 LWIP_ASSERT1(q != NULL);
1542 LWIP_ASSERT1(qoff < q->len);
1543 break;
1544 }
1545 else {
1546 sockerr = -nsent;
1547
1548 /*
1549 * Some errors are really not errors - if we get them,
1550 * it's not different from getting nsent == 0, so filter
1551 * them out here.
1552 */
1553 if (proxy_error_is_transient(sockerr)) {
1554 sockerr = 0;
1555 }
1556 q = qs;
1557 qoff = 0;
1558 break;
1559 }
1560 }
1561
1562 if (forwarded > 0) {
1563 DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: sent %d bytes\n",
1564 (void *)pxtcp, (void *)pxtcp->pcb, (int)forwarded));
1565 tcp_recved(pxtcp->pcb, (u16_t)forwarded);
1566 }
1567
1568 if (q == NULL) { /* everything is forwarded? */
1569 LWIP_ASSERT1(sockerr == 0);
1570 LWIP_ASSERT1(forwarded == p->tot_len);
1571
1572 pxtcp->unsent = NULL;
1573 pbuf_free(p);
1574 if (pxtcp->outbound_close) {
1575 pxtcp_pcb_forward_outbound_close(pxtcp);
1576 }
1577 }
1578 else {
1579 if (q != p) {
1580 /* free forwarded pbufs at the beginning of the chain */
1581 pbuf_ref(q);
1582 pbuf_free(p);
1583 }
1584 if (qoff > 0) {
1585 /* advance payload pointer past the forwarded part */
1586 pbuf_header(q, -(s16_t)qoff);
1587 }
1588 pxtcp->unsent = q;
1589 DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: kept %d bytes\n",
1590 (void *)pxtcp, (void *)pxtcp->pcb, (int)q->tot_len));
1591
1592 /*
1593 * Have sendmsg() failed?
1594 *
1595 * Connection reset will be detected by poll and
1596 * pxtcp_schedule_reset() will be called.
1597 *
1598 * Otherwise something *really* unexpected must have happened,
1599 * so we'd better abort.
1600 */
1601 if (sockerr != 0 && sockerr != ECONNRESET) {
1602 struct tcp_pcb *pcb = pxtcp->pcb;
1603 DPRINTF2(("forward_outbound: pxtcp %p, pcb %p: %R[sockerr]\n",
1604 (void *)pxtcp, (void *)pcb, sockerr));
1605
1606 pxtcp_pcb_dissociate(pxtcp);
1607
1608 tcp_abort(pcb);
1609
1610 /* call error callback manually since we've already dissociated */
1611 pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
1612 return ERR_ABRT;
1613 }
1614
1615 /* schedule one-shot POLLOUT on the socket */
1616 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
1617 }
1618 return ERR_OK;
1619}
1620
1621
1622#if !defined(RT_OS_WINDOWS)
1623static ssize_t
1624pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1625{
1626 struct msghdr mh;
1627 ssize_t nsent;
1628
1629#ifdef MSG_NOSIGNAL
1630 const int send_flags = MSG_NOSIGNAL;
1631#else
1632 const int send_flags = 0;
1633#endif
1634
1635 memset(&mh, 0, sizeof(mh));
1636
1637 mh.msg_iov = iov;
1638 mh.msg_iovlen = iovlen;
1639
1640 nsent = sendmsg(pxtcp->sock, &mh, send_flags);
1641 if (nsent < 0) {
1642 nsent = -SOCKERRNO();
1643 }
1644
1645 return nsent;
1646}
1647#else /* RT_OS_WINDOWS */
1648static ssize_t
1649pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1650{
1651 DWORD nsent;
1652 int status;
1653
1654 status = WSASend(pxtcp->sock, iov, (DWORD)iovlen, &nsent,
1655 0, NULL, NULL);
1656 if (status == SOCKET_ERROR) {
1657 return -SOCKERRNO();
1658 }
1659
1660 return nsent;
1661}
1662#endif /* RT_OS_WINDOWS */
1663
1664
1665/**
1666 * Callback from poll manager (on POLLOUT) to send data from
1667 * pxtcp::unsent pbuf to socket.
1668 */
1669static void
1670pxtcp_pcb_write_outbound(void *ctx)
1671{
1672 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1673 LWIP_ASSERT1(pxtcp != NULL);
1674
1675 if (pxtcp->pcb == NULL) {
1676 return;
1677 }
1678
1679 pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
1680}
1681
1682
1683/**
1684 * Common poll manager callback used by both outgoing and incoming
1685 * (port-forwarded) connections that has connected socket.
1686 */
1687static int
1688pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1689{
1690 struct pxtcp *pxtcp;
1691 int status;
1692 int sockerr;
1693 RT_NOREF(fd);
1694
1695 pxtcp = (struct pxtcp *)handler->data;
1696 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1697 LWIP_ASSERT1(fd == pxtcp->sock);
1698
1699 if (revents & POLLNVAL) {
1700 pxtcp->sock = INVALID_SOCKET;
1701 return pxtcp_schedule_reset(pxtcp);
1702 }
1703
1704 if (revents & POLLERR) {
1705 socklen_t optlen = (socklen_t)sizeof(sockerr);
1706
1707 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1708 (char *)&sockerr, &optlen);
1709 if (status == SOCKET_ERROR) { /* should not happen */
1710 DPRINTF(("sock %d: POLLERR: SO_ERROR failed: %R[sockerr]\n",
1711 fd, SOCKERRNO()));
1712 }
1713 else {
1714 DPRINTF0(("sock %d: POLLERR: %R[sockerr]\n", fd, sockerr));
1715 }
1716 return pxtcp_schedule_reset(pxtcp);
1717 }
1718
1719 if (revents & POLLOUT) {
1720 pxtcp->events &= ~POLLOUT;
1721 proxy_lwip_post(&pxtcp->msg_outbound);
1722 }
1723
1724 if (revents & POLLIN) {
1725 ssize_t nread;
1726 int stop_pollin;
1727
1728 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
1729 if (nread < 0) {
1730 sockerr = -(int)nread;
1731 DPRINTF0(("sock %d: POLLIN: %R[sockerr]\n", fd, sockerr));
1732 return pxtcp_schedule_reset(pxtcp);
1733 }
1734
1735 if (stop_pollin) {
1736 pxtcp->events &= ~POLLIN;
1737 }
1738
1739 if (nread > 0) {
1740 proxy_lwip_post(&pxtcp->msg_inbound);
1741#if !HAVE_TCP_POLLHUP
1742 /*
1743 * If host does not report POLLHUP for closed sockets
1744 * (e.g. NetBSD) we should check for full close manually.
1745 */
1746 if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
1747 LWIP_ASSERT1((revents & POLLHUP) == 0);
1748 return pxtcp_schedule_delete(pxtcp);
1749 }
1750#endif
1751 }
1752 }
1753
1754#if !HAVE_TCP_POLLHUP
1755 LWIP_ASSERT1((revents & POLLHUP) == 0);
1756#else
1757 if (revents & POLLHUP) {
1758 DPRINTF(("sock %d: HUP\n", fd));
1759
1760#if HAVE_TCP_POLLHUP == POLLIN
1761 /*
1762 * XXX: OSX reports POLLHUP once more when inbound is already
1763 * half-closed (which has already been reported as a "normal"
1764 * POLLHUP, handled below), the socket is polled for POLLOUT
1765 * (guest sends a lot of data that we can't push out fast
1766 * enough), and remote sends a reset - e.g. an http client
1767 * that half-closes after request and then aborts the transfer.
1768 *
1769 * It really should have been reported as POLLERR, but it
1770 * seems OSX never reports POLLERR for sockets.
1771 */
1772#if defined(RT_OS_DARWIN)
1773 {
1774 socklen_t optlen = (socklen_t)sizeof(sockerr);
1775
1776 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1777 (char *)&sockerr, &optlen);
1778 if (status == SOCKET_ERROR) { /* should not happen */
1779 DPRINTF(("sock %d: POLLHUP: SO_ERROR failed: %R[sockerr]\n",
1780 fd, SOCKERRNO()));
1781 sockerr = ECONNRESET;
1782 }
1783 else if (sockerr != 0) {
1784 DPRINTF0(("sock %d: POLLHUP: %R[sockerr]\n", fd, sockerr));
1785 }
1786
1787 if (sockerr != 0) { /* XXX: should have been POLLERR */
1788 return pxtcp_schedule_reset(pxtcp);
1789 }
1790 }
1791#endif /* RT_OS_DARWIN */
1792
1793 /*
1794 * Remote closed inbound.
1795 */
1796 if (!pxtcp->outbound_close_done) {
1797 /*
1798 * We might still need to poll for POLLOUT, but we can not
1799 * poll for POLLIN anymore (even if not all data are read)
1800 * because we will be spammed by POLLHUP.
1801 */
1802 pxtcp->events &= ~POLLIN;
1803 if (!pxtcp->inbound_close) {
1804 /* the rest of the input has to be pulled */
1805 proxy_lwip_post(&pxtcp->msg_inpull);
1806 }
1807 }
1808 else
1809#endif
1810 /*
1811 * Both directions are closed.
1812 */
1813 {
1814 LWIP_ASSERT1(pxtcp->outbound_close_done);
1815
1816 if (pxtcp->inbound_close) {
1817 /* there's no unread data, we are done */
1818 return pxtcp_schedule_delete(pxtcp);
1819 }
1820 else {
1821 /* pull the rest of the input first (deferred_delete) */
1822 pxtcp->pmhdl.slot = -1;
1823 proxy_lwip_post(&pxtcp->msg_inpull);
1824 return -1;
1825 }
1826 /* NOTREACHED */
1827 }
1828
1829 }
1830#endif /* HAVE_TCP_POLLHUP */
1831
1832 return pxtcp->events;
1833}
1834
1835
1836/**
1837 * Read data from socket to ringbuf. This may be used both on lwip
1838 * and poll manager threads.
1839 *
1840 * Flag pointed to by pstop is set when further reading is impossible,
1841 * either temporary when buffer is full, or permanently when EOF is
1842 * received.
1843 *
1844 * Returns number of bytes read. NB: EOF is reported as 1!
1845 *
1846 * Returns zero if nothing was read, either because buffer is full, or
1847 * if no data is available (EWOULDBLOCK, EINTR &c).
1848 *
1849 * Returns -errno on real socket errors.
1850 */
1851static ssize_t
1852pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
1853{
1854 IOVEC iov[2];
1855 size_t iovlen;
1856 ssize_t nread;
1857
1858 const size_t sz = pxtcp->inbuf.bufsize;
1859 size_t beg, lim, wrnew;
1860
1861 *pstop = 0;
1862
1863 beg = pxtcp->inbuf.vacant;
1864 IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
1865
1866 /* lim is the index we can NOT write to */
1867 lim = pxtcp->inbuf.unacked;
1868 if (lim == 0) {
1869 lim = sz - 1; /* empty slot at the end */
1870 }
1871 else if (lim == 1 && beg != 0) {
1872 lim = sz; /* empty slot at the beginning */
1873 }
1874 else {
1875 --lim;
1876 }
1877
1878 if (beg == lim) {
1879 /*
1880 * Buffer is full, stop polling for POLLIN.
1881 *
1882 * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
1883 * data, freeing space in the ring buffer.
1884 */
1885 *pstop = 1;
1886 return 0;
1887 }
1888
1889 if (beg < lim) {
1890 /* free space in one chunk */
1891 iovlen = 1;
1892 IOVEC_SET_LEN(iov[0], lim - beg);
1893 }
1894 else {
1895 /* free space in two chunks */
1896 iovlen = 2;
1897 IOVEC_SET_LEN(iov[0], sz - beg);
1898 IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
1899 IOVEC_SET_LEN(iov[1], lim);
1900 }
1901
1902 /*
1903 * TODO: This is where application-level proxy can hook into to
1904 * process inbound traffic.
1905 */
1906 nread = pxtcp_sock_recv(pxtcp, iov, iovlen);
1907
1908 if (nread > 0) {
1909 wrnew = beg + nread;
1910 if (wrnew >= sz) {
1911 wrnew -= sz;
1912 }
1913 pxtcp->inbuf.vacant = wrnew;
1914 DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
1915 (void *)pxtcp, pxtcp->sock, (int)nread));
1916 return nread;
1917 }
1918 else if (nread == 0) {
1919 *pstop = 1;
1920 pxtcp->inbound_close = 1;
1921 DPRINTF2(("pxtcp %p: sock %d read EOF\n",
1922 (void *)pxtcp, pxtcp->sock));
1923 return 1;
1924 }
1925 else {
1926 int sockerr = -nread;
1927
1928 if (proxy_error_is_transient(sockerr)) {
1929 /* haven't read anything, just return */
1930 DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
1931 (void *)pxtcp, pxtcp->sock));
1932 return 0;
1933 }
1934 else {
1935 /* socket error! */
1936 DPRINTF0(("pxtcp %p: sock %d read: %R[sockerr]\n",
1937 (void *)pxtcp, pxtcp->sock, sockerr));
1938 return -sockerr;
1939 }
1940 }
1941}
1942
1943
1944#if !defined(RT_OS_WINDOWS)
1945static ssize_t
1946pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1947{
1948 struct msghdr mh;
1949 ssize_t nread;
1950
1951 memset(&mh, 0, sizeof(mh));
1952
1953 mh.msg_iov = iov;
1954 mh.msg_iovlen = iovlen;
1955
1956 nread = recvmsg(pxtcp->sock, &mh, 0);
1957 if (nread < 0) {
1958 nread = -SOCKERRNO();
1959 }
1960
1961 return nread;
1962}
1963#else /* RT_OS_WINDOWS */
1964static ssize_t
1965pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1966{
1967 DWORD flags;
1968 DWORD nread;
1969 int status;
1970
1971 flags = 0;
1972 status = WSARecv(pxtcp->sock, iov, (DWORD)iovlen, &nread,
1973 &flags, NULL, NULL);
1974 if (status == SOCKET_ERROR) {
1975 return -SOCKERRNO();
1976 }
1977
1978 return (ssize_t)nread;
1979}
1980#endif /* RT_OS_WINDOWS */
1981
1982
1983/**
1984 * Callback from poll manager (pxtcp::msg_inbound) to trigger output
1985 * from ringbuf to guest.
1986 */
1987static void
1988pxtcp_pcb_write_inbound(void *ctx)
1989{
1990 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1991 LWIP_ASSERT1(pxtcp != NULL);
1992
1993 if (pxtcp->pcb == NULL) {
1994 return;
1995 }
1996
1997 pxtcp_pcb_forward_inbound(pxtcp);
1998}
1999
2000
2001/**
2002 * tcp_poll() callback
2003 *
2004 * We swtich it on when tcp_write() or tcp_shutdown() fail with
2005 * ERR_MEM to prevent connection from stalling. If there are ACKs or
2006 * more inbound data then pxtcp_pcb_forward_inbound() will be
2007 * triggered again, but if neither happens, tcp_poll() comes to the
2008 * rescue.
2009 */
2010static err_t
2011pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
2012{
2013 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2014 LWIP_UNUSED_ARG(pcb);
2015
2016 DPRINTF2(("%s: pxtcp %p; pcb %p\n",
2017 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2018
2019 pxtcp_pcb_forward_inbound(pxtcp);
2020
2021 /*
2022 * If the last thing holding up deletion of the pxtcp was failed
2023 * tcp_shutdown() and it succeeded, we may be the last callback.
2024 */
2025 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2026
2027 return ERR_OK;
2028}
2029
2030
2031static void
2032pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
2033{
2034 tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
2035}
2036
2037
2038static void
2039pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
2040{
2041 tcp_poll(pxtcp->pcb, NULL, 255);
2042}
2043
2044
2045/**
2046 * Forward inbound data from ring buffer to the guest.
2047 *
2048 * Scheduled by poll manager thread after it receives more data into
2049 * the ring buffer (we have more data to send).
2050
2051 * Also called from tcp_sent() callback when guest ACKs some data,
2052 * increasing pcb->snd_buf (we are permitted to send more data).
2053 *
2054 * Also called from tcp_poll() callback if previous attempt to forward
2055 * inbound data failed with ERR_MEM (we need to try again).
2056 */
2057static void
2058pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
2059{
2060 struct tcp_pcb *pcb;
2061 size_t sndbuf;
2062 size_t beg, lim, sndlim;
2063 size_t toeob, tolim;
2064 size_t nsent;
2065 err_t error;
2066
2067 LWIP_ASSERT1(pxtcp != NULL);
2068 pcb = pxtcp->pcb;
2069 if (pcb == NULL) {
2070 return;
2071 }
2072
2073 if (/* __predict_false */ pcb->state < ESTABLISHED) {
2074 /*
2075 * If we have just confirmed accept of this connection, the
2076 * pcb is in SYN_RCVD state and we still haven't received the
2077 * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
2078 * transition that lwip decrements pcb->acked so that that ACK
2079 * is not reported to pxtcp_pcb_sent(). If we send something
2080 * now and immediately close (think "daytime", e.g.) while
2081 * still in SYN_RCVD state, we will move directly to
2082 * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
2083 * report it to pxtcp_pcb_sent().
2084 */
2085 DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
2086 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2087 return;
2088 }
2089
2090
2091 beg = pxtcp->inbuf.unsent; /* private to lwip thread */
2092 lim = pxtcp->inbuf.vacant;
2093
2094 if (beg == lim) {
2095 if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
2096 pxtcp_pcb_forward_inbound_close(pxtcp);
2097 tcp_output(pcb);
2098 return;
2099 }
2100
2101 /*
2102 * Else, there's no data to send.
2103 *
2104 * If there is free space in the buffer, producer will
2105 * reschedule us as it receives more data and vacant (lim)
2106 * advances.
2107 *
2108 * If buffer is full when all data have been passed to
2109 * tcp_write() but not yet acknowledged, we will advance
2110 * unacked on ACK, freeing some space for producer to write to
2111 * (then see above).
2112 */
2113 return;
2114 }
2115
2116 sndbuf = tcp_sndbuf(pcb);
2117 if (sndbuf == 0) {
2118 /*
2119 * Can't send anything now. As guest ACKs some data, TCP will
2120 * call pxtcp_pcb_sent() callback and we will come here again.
2121 */
2122 return;
2123 }
2124
2125 nsent = 0;
2126
2127 /*
2128 * We have three limits to consider:
2129 * - how much data we have in the ringbuf
2130 * - how much data we are allowed to send
2131 * - ringbuf size
2132 */
2133 toeob = pxtcp->inbuf.bufsize - beg;
2134 if (lim < beg) { /* lim wrapped */
2135 if (sndbuf < toeob) { /* but we are limited by sndbuf */
2136 /* so beg is not going to wrap, treat sndbuf as lim */
2137 lim = beg + sndbuf; /* ... and proceed to the simple case */
2138 }
2139 else { /* we are limited by the end of the buffer, beg will wrap */
2140 u8_t maybemore;
2141 if (toeob == sndbuf || lim == 0) {
2142 maybemore = 0;
2143 }
2144 else {
2145 maybemore = TCP_WRITE_FLAG_MORE;
2146 }
2147
2148 Assert(toeob == (u16_t)toeob);
2149 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)toeob, maybemore);
2150 if (error != ERR_OK) {
2151 goto writeerr;
2152 }
2153 nsent += toeob;
2154 pxtcp->inbuf.unsent = 0; /* wrap */
2155
2156 if (maybemore) {
2157 beg = 0;
2158 sndbuf -= toeob;
2159 }
2160 else {
2161 /* we are done sending, but ... */
2162 goto check_inbound_close;
2163 }
2164 }
2165 }
2166
2167 LWIP_ASSERT1(beg < lim);
2168 sndlim = beg + sndbuf;
2169 if (lim > sndlim) {
2170 lim = sndlim;
2171 }
2172 tolim = lim - beg;
2173 if (tolim > 0) {
2174 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
2175 if (error != ERR_OK) {
2176 goto writeerr;
2177 }
2178 nsent += tolim;
2179 pxtcp->inbuf.unsent = lim;
2180 }
2181
2182 check_inbound_close:
2183 if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
2184 pxtcp_pcb_forward_inbound_close(pxtcp);
2185 }
2186
2187 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
2188 (void *)pxtcp, (void *)pcb, (int)nsent));
2189 tcp_output(pcb);
2190 pxtcp_pcb_cancel_poll(pxtcp);
2191 return;
2192
2193 writeerr:
2194 if (error == ERR_MEM) {
2195 if (nsent > 0) { /* first write succeeded, second failed */
2196 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
2197 (void *)pxtcp, (void *)pcb, (int)nsent));
2198 tcp_output(pcb);
2199 }
2200 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
2201 (void *)pxtcp, (void *)pcb));
2202 pxtcp_pcb_schedule_poll(pxtcp);
2203 }
2204 else {
2205 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
2206 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2207
2208 /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
2209 LWIP_ASSERT1(error == ERR_MEM);
2210 }
2211}
2212
2213
2214static void
2215pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
2216{
2217 struct tcp_pcb *pcb;
2218 err_t error;
2219
2220 LWIP_ASSERT1(pxtcp != NULL);
2221 LWIP_ASSERT1(pxtcp->inbound_close);
2222 LWIP_ASSERT1(!pxtcp->inbound_close_done);
2223 LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
2224
2225 pcb = pxtcp->pcb;
2226 LWIP_ASSERT1(pcb != NULL);
2227
2228 DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
2229 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2230
2231 error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
2232 if (error != ERR_OK) {
2233 DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
2234 " tcp_shutdown: error=%s\n",
2235 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2236 pxtcp_pcb_schedule_poll(pxtcp);
2237 return;
2238 }
2239
2240 pxtcp_pcb_cancel_poll(pxtcp);
2241 pxtcp->inbound_close_done = 1;
2242
2243
2244 /*
2245 * If we have already done outbound close previously (passive
2246 * close on the pcb), then we must not hold onto a pcb in LAST_ACK
2247 * state since those will be deleted by lwip when that last ack
2248 * comes from the guest.
2249 *
2250 * NB: We do NOT check for deferred delete here, even though we
2251 * have just set one of its conditions, inbound_close_done. We
2252 * let pcb callbacks that called us do that. It's simpler and
2253 * cleaner that way.
2254 */
2255 if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
2256 pxtcp_pcb_dissociate(pxtcp);
2257 }
2258}
2259
2260
2261/**
2262 * Check that all forwarded inbound data is sent and acked, and that
2263 * inbound close is scheduled (we aren't called back when it's acked).
2264 */
2265DECLINLINE(int)
2266pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
2267{
2268 return (pxtcp->inbound_close_done /* also implies that all data forwarded */
2269 && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
2270}
2271
2272
2273/**
2274 * tcp_sent() callback - guest acknowledged len bytes.
2275 *
2276 * We can advance inbuf::unacked index, making more free space in the
2277 * ringbuf and wake up producer on poll manager thread.
2278 *
2279 * We can also try to send more data if we have any since pcb->snd_buf
2280 * was increased and we are now permitted to send more.
2281 */
2282static err_t
2283pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
2284{
2285 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2286 size_t unacked;
2287
2288 LWIP_ASSERT1(pxtcp != NULL);
2289 LWIP_ASSERT1(pxtcp->pcb == pcb);
2290 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2291 LWIP_UNUSED_ARG(pcb); /* only in assert */
2292
2293 DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
2294 " unacked %d, unsent %d, vacant %d\n",
2295 __func__, (void *)pxtcp, (void *)pcb, (int)len,
2296 (int)pxtcp->inbuf.unacked,
2297 (int)pxtcp->inbuf.unsent,
2298 (int)pxtcp->inbuf.vacant));
2299
2300 if (/* __predict_false */ len == 0) {
2301 /* we are notified to start pulling */
2302 LWIP_ASSERT1(!pxtcp->inbound_close);
2303 LWIP_ASSERT1(pxtcp->inbound_pull);
2304
2305 unacked = pxtcp->inbuf.unacked;
2306 }
2307 else {
2308 /*
2309 * Advance unacked index. Guest acknowledged the data, so it
2310 * won't be needed again for potential retransmits.
2311 */
2312 unacked = pxtcp->inbuf.unacked + len;
2313 if (unacked > pxtcp->inbuf.bufsize) {
2314 unacked -= pxtcp->inbuf.bufsize;
2315 }
2316 pxtcp->inbuf.unacked = unacked;
2317 }
2318
2319 /* arrange for more inbound data */
2320 if (!pxtcp->inbound_close) {
2321 if (!pxtcp->inbound_pull) {
2322 /* wake up producer, in case it has stopped polling for POLLIN */
2323 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
2324#ifdef RT_OS_WINDOWS
2325 /**
2326 * We have't got enought room in ring buffer to read atm,
2327 * but we don't want to lose notification from WSAW4ME when
2328 * space would be available, so we reset event with empty recv
2329 */
2330 recv(pxtcp->sock, NULL, 0, 0);
2331#endif
2332 }
2333 else {
2334 ssize_t nread;
2335 int stop_pollin; /* ignored */
2336
2337 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
2338
2339 if (nread < 0) {
2340 int sockerr = -(int)nread;
2341 LWIP_UNUSED_ARG(sockerr);
2342 DPRINTF0(("%s: sock %d: %R[sockerr]\n",
2343 __func__, pxtcp->sock, sockerr));
2344
2345#if HAVE_TCP_POLLHUP == POLLIN /* see counterpart in pxtcp_pmgr_pump() */
2346 /*
2347 * It may still be registered with poll manager for POLLOUT.
2348 */
2349 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2350 return ERR_OK;
2351#else
2352 /*
2353 * It is no longer registered with poll manager so we
2354 * can kill it directly.
2355 */
2356 pxtcp_pcb_reset_pxtcp(pxtcp);
2357 return ERR_ABRT;
2358#endif
2359 }
2360 }
2361 }
2362
2363 /* forward more data if we can */
2364 if (!pxtcp->inbound_close_done) {
2365 pxtcp_pcb_forward_inbound(pxtcp);
2366
2367 /*
2368 * NB: we might have dissociated from a pcb that transitioned
2369 * to LAST_ACK state, so don't refer to pcb below.
2370 */
2371 }
2372
2373
2374 /* have we got all the acks? */
2375 if (pxtcp->inbound_close /* no more new data */
2376 && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
2377 && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
2378 {
2379 char *buf;
2380
2381 DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
2382 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2383
2384 /* no more retransmits, so buf is not needed */
2385 buf = pxtcp->inbuf.buf;
2386 pxtcp->inbuf.buf = NULL;
2387 free(buf);
2388
2389 /* no more acks, so no more callbacks */
2390 if (pxtcp->pcb != NULL) {
2391 tcp_sent(pxtcp->pcb, NULL);
2392 }
2393
2394 /*
2395 * We may be the last callback for this pcb if we have also
2396 * successfully forwarded inbound_close.
2397 */
2398 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2399 }
2400
2401 return ERR_OK;
2402}
2403
2404
2405/**
2406 * Callback from poll manager (pxtcp::msg_inpull) to switch
2407 * pxtcp_pcb_sent() to actively pull the last bits of input. See
2408 * POLLHUP comment in pxtcp_pmgr_pump().
2409 *
2410 * pxtcp::sock is deregistered from poll manager after this callback
2411 * is scheduled.
2412 */
2413static void
2414pxtcp_pcb_pull_inbound(void *ctx)
2415{
2416 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2417 LWIP_ASSERT1(pxtcp != NULL);
2418
2419 if (pxtcp->pcb == NULL) {
2420 DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
2421 pxtcp_pcb_reset_pxtcp(pxtcp);
2422 return;
2423 }
2424
2425 pxtcp->inbound_pull = 1;
2426 if (pxtcp->outbound_close_done) {
2427 DPRINTF(("%s: pxtcp %p: pcb %p (deferred delete)\n",
2428 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2429 pxtcp->deferred_delete = 1;
2430 }
2431 else {
2432 DPRINTF(("%s: pxtcp %p: pcb %p\n",
2433 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2434 }
2435
2436 pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
2437}
2438
2439
2440/**
2441 * tcp_err() callback.
2442 *
2443 * pcb is not passed to this callback since it may be already
2444 * deallocated by the stack, but we can't do anything useful with it
2445 * anyway since connection is gone.
2446 */
2447static void
2448pxtcp_pcb_err(void *arg, err_t error)
2449{
2450 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2451 LWIP_ASSERT1(pxtcp != NULL);
2452
2453 /*
2454 * ERR_CLSD is special - it is reported here when:
2455 *
2456 * . guest has already half-closed
2457 * . we send FIN to guest when external half-closes
2458 * . guest acks that FIN
2459 *
2460 * Since connection is closed but receive has been already closed
2461 * lwip can only report this via tcp_err. At this point the pcb
2462 * is still alive, so we can peek at it if need be.
2463 *
2464 * The interesting twist is when the ACK from guest that akcs our
2465 * FIN also acks some data. In this scenario lwip will NOT call
2466 * tcp_sent() callback with the ACK for that last bit of data but
2467 * instead will call tcp_err with ERR_CLSD right away. Since that
2468 * ACK also acknowledges all the data, we should run some of
2469 * pxtcp_pcb_sent() logic here.
2470 */
2471 if (error == ERR_CLSD) {
2472 struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
2473
2474 DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
2475 " pcb->acked %d;"
2476 " unacked %d, unsent %d, vacant %d\n",
2477 (void *)pxtcp, (void *)pcb,
2478 pcb->acked,
2479 (int)pxtcp->inbuf.unacked,
2480 (int)pxtcp->inbuf.unsent,
2481 (int)pxtcp->inbuf.vacant));
2482
2483 LWIP_ASSERT1(pxtcp->pcb == pcb);
2484 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2485
2486 if (pcb->acked > 0) {
2487 pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
2488 }
2489 return;
2490 }
2491
2492 DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
2493 (void *)pxtcp, proxy_lwip_strerr(error)));
2494
2495 pxtcp->pcb = NULL; /* pcb is gone */
2496 if (pxtcp->deferred_delete) {
2497 pxtcp_pcb_reset_pxtcp(pxtcp);
2498 }
2499 else {
2500 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2501 }
2502}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette