VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/proxy.c@ 55993

Last change on this file since 55993 was 54900, checked in by vboxsync, 10 years ago

NAT/Network: proxy_create_socket - bump up SNDBUF for TCP sockets Windows.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 18.2 KB
Line 
1/* $Id: proxy.c 54900 2015-03-23 03:35:41Z vboxsync $ */
2/** @file
3 * NAT Network - proxy setup and utilities.
4 */
5
6/*
7 * Copyright (C) 2013-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18#define LOG_GROUP LOG_GROUP_NAT_SERVICE
19
20#include "winutils.h"
21
22#include "proxy.h"
23#include "proxy_pollmgr.h"
24#include "portfwd.h"
25
26#include "lwip/opt.h"
27
28#include "lwip/sys.h"
29#include "lwip/tcpip.h"
30
31#ifndef RT_OS_WINDOWS
32#include <sys/poll.h>
33#include <sys/socket.h>
34#include <netinet/in.h>
35#include <arpa/inet.h>
36#include <fcntl.h>
37#include <stdio.h>
38#include <iprt/string.h>
39#include <unistd.h>
40#include <err.h>
41#else
42# include <iprt/string.h>
43#endif
44
45#if defined(SOCK_NONBLOCK) && defined(RT_OS_NETBSD) /* XXX: PR kern/47569 */
46# undef SOCK_NONBLOCK
47#endif
48
49#ifndef __arraycount
50# define __arraycount(a) (sizeof(a)/sizeof(a[0]))
51#endif
52
53static FNRTSTRFORMATTYPE proxy_sockerr_rtstrfmt;
54
55static SOCKET proxy_create_socket(int, int);
56
57volatile struct proxy_options *g_proxy_options;
58static sys_thread_t pollmgr_tid;
59
60/* XXX: for mapping loopbacks to addresses in our network (ip4) */
61struct netif *g_proxy_netif;
62
63
64/*
65 * Called on the lwip thread (aka tcpip thread) from tcpip_init() via
66 * its "tcpip_init_done" callback. Raw API is ok to use here
67 * (e.g. rtadvd), but netconn API is not.
68 */
69void
70proxy_init(struct netif *proxy_netif, struct proxy_options *opts)
71{
72 int status;
73
74 LWIP_ASSERT1(opts != NULL);
75 LWIP_UNUSED_ARG(proxy_netif);
76
77 status = RTStrFormatTypeRegister("sockerr", proxy_sockerr_rtstrfmt, NULL);
78 AssertRC(status);
79
80 g_proxy_options = opts;
81 g_proxy_netif = proxy_netif;
82
83#if 1
84 proxy_rtadvd_start(proxy_netif);
85#endif
86
87 /*
88 * XXX: We use stateless DHCPv6 only to report IPv6 address(es) of
89 * nameserver(s). Since we don't yet support IPv6 addresses in
90 * HostDnsService, there's no point in running DHCPv6.
91 */
92#if 0
93 dhcp6ds_init(proxy_netif);
94#endif
95
96 if (opts->tftp_root != NULL) {
97 tftpd_init(proxy_netif, opts->tftp_root);
98 }
99
100 status = pollmgr_init();
101 if (status < 0) {
102 errx(EXIT_FAILURE, "failed to initialize poll manager");
103 /* NOTREACHED */
104 }
105
106 pxtcp_init();
107 pxudp_init();
108
109 portfwd_init();
110
111 pxdns_init(proxy_netif);
112
113 pxping_init(proxy_netif, opts->icmpsock4, opts->icmpsock6);
114
115 pollmgr_tid = sys_thread_new("pollmgr_thread",
116 pollmgr_thread, NULL,
117 DEFAULT_THREAD_STACKSIZE,
118 DEFAULT_THREAD_PRIO);
119 if (!pollmgr_tid) {
120 errx(EXIT_FAILURE, "failed to create poll manager thread");
121 /* NOTREACHED */
122 }
123}
124
125
126#if !defined(RT_OS_WINDOWS)
127/**
128 * Formatter for %R[sockerr] - unix strerror_r() version.
129 */
130static DECLCALLBACK(size_t)
131proxy_sockerr_rtstrfmt(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
132 const char *pszType, const void *pvValue,
133 int cchWidth, int cchPrecision, unsigned int fFlags,
134 void *pvUser)
135{
136 const int error = (int)(intptr_t)pvValue;
137 size_t cb = 0;
138
139 const char *msg = NULL;
140 char buf[128];
141
142 NOREF(cchWidth);
143 NOREF(cchPrecision);
144 NOREF(fFlags);
145 NOREF(pvUser);
146
147 AssertReturn(strcmp(pszType, "sockerr") == 0, 0);
148
149 /* make sure return type mismatch is caught */
150#if defined(RT_OS_LINUX) && defined(_GNU_SOURCE)
151 msg = strerror_r(error, buf, sizeof(buf));
152#else
153 {
154 int status = strerror_r(error, buf, sizeof(buf));
155 msg = buf;
156 }
157#endif
158 return RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, "%s", msg);
159}
160
161#else /* RT_OS_WINDOWS */
162
163/**
164 * Formatter for %R[sockerr] - windows FormatMessage() version.
165 */
166static DECLCALLBACK(size_t)
167proxy_sockerr_rtstrfmt(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
168 const char *pszType, const void *pvValue,
169 int cchWidth, int cchPrecision, unsigned int fFlags,
170 void *pvUser)
171{
172 const int error = (int)(intptr_t)pvValue;
173 size_t cb = 0;
174
175 NOREF(cchWidth);
176 NOREF(cchPrecision);
177 NOREF(fFlags);
178 NOREF(pvUser);
179
180 AssertReturn(strcmp(pszType, "sockerr") == 0, 0);
181
182 /*
183 * XXX: Windows strerror() doesn't handle posix error codes, but
184 * since winsock uses its own, it shouldn't be much of a problem.
185 * If you see a strange error message, it's probably from
186 * FormatMessage() for an error from <WinError.h> that has the
187 * same numeric value.
188 */
189 if (error < _sys_nerr) {
190 char buf[128] = "";
191 int status;
192
193 status = strerror_s(buf, sizeof(buf), error);
194 if (status == 0) {
195 if (strcmp(buf, "Unknown error") == 0) {
196 /* windows strerror() doesn't add the numeric value */
197 cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
198 "Unknown error: %d", error);
199 }
200 else {
201 cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
202 "%s", buf);
203 }
204 }
205 else {
206 cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
207 "Unknown error: %d", error);
208 }
209 }
210 else {
211 DWORD nchars;
212 char *msg = NULL;
213
214 nchars = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM
215 | FORMAT_MESSAGE_ALLOCATE_BUFFER,
216 NULL, error, LANG_NEUTRAL,
217 (LPSTR)&msg, 0,
218 NULL);
219 if (nchars == 0 || msg == NULL) {
220 cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
221 "Unknown error: %d", error);
222 }
223 else {
224 /* FormatMessage() "helpfully" adds newline; get rid of it */
225 char *crpos = strchr(msg, '\r');
226 if (crpos != NULL) {
227 *crpos = '\0';
228 }
229
230 cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
231 "%s", msg);
232 }
233
234 if (msg != NULL) {
235 LocalFree(msg);
236 }
237 }
238
239 return cb;
240}
241#endif /* RT_OS_WINDOWS */
242
243
244/**
245 * Send static callback message from poll manager thread to lwip
246 * thread, scheduling a function call in lwip thread context.
247 *
248 * XXX: Existing lwip api only provides non-blocking version for this.
249 * It may fail when lwip thread is not running (mbox invalid) or if
250 * post failed (mbox full). How to handle these?
251 */
252void
253proxy_lwip_post(struct tcpip_msg *msg)
254{
255 struct tcpip_callback_msg *m;
256 err_t error;
257
258 LWIP_ASSERT1(msg != NULL);
259
260 /*
261 * lwip plays games with fake incomplete struct tag to enforce API
262 */
263 m = (struct tcpip_callback_msg *)msg;
264 error = tcpip_callbackmsg(m);
265
266 if (error == ERR_VAL) {
267 /* XXX: lwip thread is not running (mbox invalid) */
268 LWIP_ASSERT1(error != ERR_VAL);
269 }
270
271 LWIP_ASSERT1(error == ERR_OK);
272}
273
274
275/**
276 * Create a non-blocking socket. Disable SIGPIPE for TCP sockets if
277 * possible. On Linux it's not possible and should be disabled for
278 * each send(2) individually.
279 */
280static SOCKET
281proxy_create_socket(int sdom, int stype)
282{
283 SOCKET s;
284 int stype_and_flags;
285 int status;
286
287 LWIP_UNUSED_ARG(status); /* depends on ifdefs */
288
289
290 stype_and_flags = stype;
291
292#if defined(SOCK_NONBLOCK)
293 stype_and_flags |= SOCK_NONBLOCK;
294#endif
295
296 /*
297 * Disable SIGPIPE on disconnected socket. It might be easier to
298 * forgo it and just use MSG_NOSIGNAL on each send*(2), since we
299 * have to do it for Linux anyway, but Darwin does NOT have that
300 * flag (but has SO_NOSIGPIPE socket option).
301 */
302#if !defined(SOCK_NOSIGPIPE) && !defined(SO_NOSIGPIPE) && !defined(MSG_NOSIGNAL)
303#if 0 /* XXX: Solaris has neither, the program should ignore SIGPIPE globally */
304#error Need a way to disable SIGPIPE on connection oriented sockets!
305#endif
306#endif
307
308#if defined(SOCK_NOSIGPIPE)
309 if (stype == SOCK_STREAM) {
310 stype_and_flags |= SOCK_NOSIGPIPE;
311 }
312#endif
313
314 s = socket(sdom, stype_and_flags, 0);
315 if (s == INVALID_SOCKET) {
316 DPRINTF(("socket: %R[sockerr]\n", SOCKERRNO()));
317 return INVALID_SOCKET;
318 }
319
320#if defined(RT_OS_WINDOWS)
321 {
322 u_long mode = 1;
323 status = ioctlsocket(s, FIONBIO, &mode);
324 if (status == SOCKET_ERROR) {
325 DPRINTF(("FIONBIO: %R[sockerr]\n", SOCKERRNO()));
326 closesocket(s);
327 return INVALID_SOCKET;
328 }
329 }
330#elif !defined(SOCK_NONBLOCK)
331 {
332 int sflags;
333
334 sflags = fcntl(s, F_GETFL, 0);
335 if (sflags < 0) {
336 DPRINTF(("F_GETFL: %R[sockerr]\n", SOCKERRNO()));
337 closesocket(s);
338 return INVALID_SOCKET;
339 }
340
341 status = fcntl(s, F_SETFL, sflags | O_NONBLOCK);
342 if (status < 0) {
343 DPRINTF(("O_NONBLOCK: %R[sockerr]\n", SOCKERRNO()));
344 closesocket(s);
345 return INVALID_SOCKET;
346 }
347 }
348#endif
349
350#if !defined(SOCK_NOSIGPIPE) && defined(SO_NOSIGPIPE)
351 if (stype == SOCK_STREAM) {
352 int on = 1;
353 const socklen_t onlen = sizeof(on);
354
355 status = setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &on, onlen);
356 if (status < 0) {
357 DPRINTF(("SO_NOSIGPIPE: %R[sockerr]\n", SOCKERRNO()));
358 closesocket(s);
359 return INVALID_SOCKET;
360 }
361 }
362#endif
363
364#if defined(RT_OS_WINDOWS)
365 /*
366 * lwIP only holds one packet of "refused data" for us. Proxy
367 * relies on OS socket send buffer and doesn't do its own
368 * buffering. Unfortunately on Windows send buffer is very small
369 * (8K by default) and is not dynamically adpated by the OS it
370 * seems. So a single large write will fill it up and that will
371 * make lwIP drop segments, causing guest TCP into pathologic
372 * resend patterns. As a quick and dirty fix just bump it up.
373 */
374 if (stype == SOCK_STREAM) {
375 int sndbuf;
376 socklen_t optlen = sizeof(sndbuf);
377
378 status = getsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &optlen);
379 if (status == 0) {
380 if (sndbuf < 64 * 1024) {
381 sndbuf = 64 * 1024;
382 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF,
383 (char *)&sndbuf, optlen);
384 if (status != 0) {
385 DPRINTF(("SO_SNDBUF: setsockopt: %R[sockerr]\n", SOCKERRNO()));
386 }
387 }
388 }
389 else {
390 DPRINTF(("SO_SNDBUF: getsockopt: %R[sockerr]\n", SOCKERRNO()));
391 }
392 }
393#endif
394
395 return s;
396}
397
398
399/**
400 * Create a socket for outbound connection to dst_addr:dst_port.
401 *
402 * The socket is non-blocking and TCP sockets has SIGPIPE disabled if
403 * possible. On Linux it's not possible and should be disabled for
404 * each send(2) individually.
405 */
406SOCKET
407proxy_connected_socket(int sdom, int stype,
408 ipX_addr_t *dst_addr, u16_t dst_port)
409{
410 struct sockaddr_in6 dst_sin6;
411 struct sockaddr_in dst_sin;
412 struct sockaddr *pdst_sa;
413 socklen_t dst_sa_len;
414 void *pdst_addr;
415 const struct sockaddr *psrc_sa;
416 socklen_t src_sa_len;
417 int status;
418 int sockerr;
419 SOCKET s;
420
421 LWIP_ASSERT1(sdom == PF_INET || sdom == PF_INET6);
422 LWIP_ASSERT1(stype == SOCK_STREAM || stype == SOCK_DGRAM);
423
424 DPRINTF(("---> %s ", stype == SOCK_STREAM ? "TCP" : "UDP"));
425 if (sdom == PF_INET6) {
426 pdst_sa = (struct sockaddr *)&dst_sin6;
427 pdst_addr = (void *)&dst_sin6.sin6_addr;
428
429 memset(&dst_sin6, 0, sizeof(dst_sin6));
430#if HAVE_SA_LEN
431 dst_sin6.sin6_len =
432#endif
433 dst_sa_len = sizeof(dst_sin6);
434 dst_sin6.sin6_family = AF_INET6;
435 memcpy(&dst_sin6.sin6_addr, &dst_addr->ip6, sizeof(ip6_addr_t));
436 dst_sin6.sin6_port = htons(dst_port);
437
438 DPRINTF(("[%RTnaipv6]:%d ", &dst_sin6.sin6_addr, dst_port));
439 }
440 else { /* sdom = PF_INET */
441 pdst_sa = (struct sockaddr *)&dst_sin;
442 pdst_addr = (void *)&dst_sin.sin_addr;
443
444 memset(&dst_sin, 0, sizeof(dst_sin));
445#if HAVE_SA_LEN
446 dst_sin.sin_len =
447#endif
448 dst_sa_len = sizeof(dst_sin);
449 dst_sin.sin_family = AF_INET;
450 dst_sin.sin_addr.s_addr = dst_addr->ip4.addr; /* byte-order? */
451 dst_sin.sin_port = htons(dst_port);
452
453 DPRINTF(("%RTnaipv4:%d ", dst_sin.sin_addr.s_addr, dst_port));
454 }
455
456 s = proxy_create_socket(sdom, stype);
457 if (s == INVALID_SOCKET) {
458 return INVALID_SOCKET;
459 }
460 DPRINTF(("socket %d\n", s));
461
462 /* TODO: needs locking if dynamic modifyvm is allowed */
463 if (sdom == PF_INET6) {
464 psrc_sa = (const struct sockaddr *)g_proxy_options->src6;
465 src_sa_len = sizeof(struct sockaddr_in6);
466 }
467 else {
468 psrc_sa = (const struct sockaddr *)g_proxy_options->src4;
469 src_sa_len = sizeof(struct sockaddr_in);
470 }
471 if (psrc_sa != NULL) {
472 status = bind(s, psrc_sa, src_sa_len);
473 if (status == SOCKET_ERROR) {
474 sockerr = SOCKERRNO();
475 DPRINTF(("socket %d: bind: %R[sockerr]\n", s, sockerr));
476 closesocket(s);
477 SET_SOCKERRNO(sockerr);
478 return INVALID_SOCKET;
479 }
480 }
481
482 status = connect(s, pdst_sa, dst_sa_len);
483 if (status == SOCKET_ERROR
484#if !defined(RT_OS_WINDOWS)
485 && SOCKERRNO() != EINPROGRESS
486#else
487 && SOCKERRNO() != EWOULDBLOCK
488#endif
489 )
490 {
491 sockerr = SOCKERRNO();
492 DPRINTF(("socket %d: connect: %R[sockerr]\n", s, sockerr));
493 closesocket(s);
494 SET_SOCKERRNO(sockerr);
495 return INVALID_SOCKET;
496 }
497
498 return s;
499}
500
501
502/**
503 * Create a socket for inbound (port-forwarded) connections to
504 * src_addr (port is part of sockaddr, so not a separate argument).
505 *
506 * The socket is non-blocking and TCP sockets has SIGPIPE disabled if
507 * possible. On Linux it's not possible and should be disabled for
508 * each send(2) individually.
509 *
510 * TODO?: Support v6-mapped v4 so that user can specify she wants
511 * "udp" and get both versions?
512 */
513SOCKET
514proxy_bound_socket(int sdom, int stype, struct sockaddr *src_addr)
515{
516 SOCKET s;
517 int on;
518 const socklen_t onlen = sizeof(on);
519 int status;
520 int sockerr;
521
522 s = proxy_create_socket(sdom, stype);
523 if (s == INVALID_SOCKET) {
524 return INVALID_SOCKET;
525 }
526 DPRINTF(("socket %d\n", s));
527
528 on = 1;
529 status = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&on, onlen);
530 if (status < 0) { /* not good, but not fatal */
531 DPRINTF(("SO_REUSEADDR: %R[sockerr]\n", SOCKERRNO()));
532 }
533
534 status = bind(s, src_addr,
535 sdom == PF_INET ?
536 sizeof(struct sockaddr_in)
537 : sizeof(struct sockaddr_in6));
538 if (status == SOCKET_ERROR) {
539 sockerr = SOCKERRNO();
540 DPRINTF(("bind: %R[sockerr]\n", sockerr));
541 closesocket(s);
542 SET_SOCKERRNO(sockerr);
543 return INVALID_SOCKET;
544 }
545
546 if (stype == SOCK_STREAM) {
547 status = listen(s, 5);
548 if (status == SOCKET_ERROR) {
549 sockerr = SOCKERRNO();
550 DPRINTF(("listen: %R[sockerr]\n", sockerr));
551 closesocket(s);
552 SET_SOCKERRNO(sockerr);
553 return INVALID_SOCKET;
554 }
555 }
556
557 return s;
558}
559
560
561void
562proxy_reset_socket(SOCKET s)
563{
564 struct linger linger;
565
566 linger.l_onoff = 1;
567 linger.l_linger = 0;
568
569 /* On Windows we can run into issue here, perhaps SO_LINGER isn't enough, and
570 * we should use WSA{Send,Recv}Disconnect instead.
571 *
572 * Links for the reference:
573 * http://msdn.microsoft.com/en-us/library/windows/desktop/ms738547%28v=vs.85%29.aspx
574 * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4468997
575 */
576 setsockopt(s, SOL_SOCKET, SO_LINGER, (char *)&linger, sizeof(linger));
577
578 closesocket(s);
579}
580
581
582int
583proxy_sendto(SOCKET sock, struct pbuf *p, void *name, size_t namelen)
584{
585 struct pbuf *q;
586 size_t i, clen;
587#ifndef RT_OS_WINDOWS
588 struct msghdr mh;
589 ssize_t nsent;
590#else
591 DWORD nsent;
592#endif
593 int rc;
594 IOVEC fixiov[8]; /* fixed size (typical case) */
595 const size_t fixiovsize = sizeof(fixiov)/sizeof(fixiov[0]);
596 IOVEC *dyniov; /* dynamically sized */
597 IOVEC *iov;
598 int error = 0;
599
600 /*
601 * Static iov[] is usually enough since UDP protocols use small
602 * datagrams to avoid fragmentation, but be prepared.
603 */
604 clen = pbuf_clen(p);
605 if (clen > fixiovsize) {
606 /*
607 * XXX: TODO: check that clen is shorter than IOV_MAX
608 */
609 dyniov = (IOVEC *)malloc(clen * sizeof(*dyniov));
610 if (dyniov == NULL) {
611 error = -errno; /* sic: not a socket error */
612 goto out;
613 }
614 iov = dyniov;
615 }
616 else {
617 dyniov = NULL;
618 iov = fixiov;
619 }
620
621
622 for (q = p, i = 0; i < clen; q = q->next, ++i) {
623 LWIP_ASSERT1(q != NULL);
624
625 IOVEC_SET_BASE(iov[i], q->payload);
626 IOVEC_SET_LEN(iov[i], q->len);
627 }
628
629#ifndef RT_OS_WINDOWS
630 memset(&mh, 0, sizeof(mh));
631 mh.msg_name = name;
632 mh.msg_namelen = namelen;
633 mh.msg_iov = iov;
634 mh.msg_iovlen = clen;
635
636 nsent = sendmsg(sock, &mh, 0);
637 rc = (nsent >= 0) ? 0 : SOCKET_ERROR;
638#else
639 rc = WSASendTo(sock, iov, (DWORD)clen, &nsent, 0,
640 name, (int)namelen, NULL, NULL);
641#endif
642 if (rc == SOCKET_ERROR) {
643 error = SOCKERRNO();
644 DPRINTF(("%s: socket %d: sendmsg: %R[sockerr]\n",
645 __func__, sock, error));
646 error = -error;
647 }
648
649 out:
650 if (dyniov != NULL) {
651 free(dyniov);
652 }
653 return error;
654}
655
656
657static const char *lwiperr[] = {
658 "ERR_OK",
659 "ERR_MEM",
660 "ERR_BUF",
661 "ERR_TIMEOUT",
662 "ERR_RTE",
663 "ERR_INPROGRESS",
664 "ERR_VAL",
665 "ERR_WOULDBLOCK",
666 "ERR_USE",
667 "ERR_ISCONN",
668 "ERR_ABRT",
669 "ERR_RST",
670 "ERR_CLSD",
671 "ERR_CONN",
672 "ERR_ARG",
673 "ERR_IF"
674};
675
676
677const char *
678proxy_lwip_strerr(err_t error)
679{
680 static char buf[32];
681 int e = -error;
682
683 if (0 <= e && e < (int)__arraycount(lwiperr)) {
684 return lwiperr[e];
685 }
686 else {
687 RTStrPrintf(buf, sizeof(buf), "unknown error %d", error);
688 return buf;
689 }
690}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette