VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/slirp.c@ 15890

Last change on this file since 15890 was 15890, checked in by vboxsync, 16 years ago

NAT: 1. wo sync enhancement branch is still functional (was corrupted with using ICMP file handler in select(1))

  1. after sending send queue doesn't need to synchronize with NAT thread to free mbuf instead NAT queue used to call freeing slirp routine.
  2. no more copying on slirp to guest sent.


  • Property svn:eol-style set to native
File size: 36.6 KB
Line 
1#include "slirp.h"
2#ifdef RT_OS_OS2
3# include <paths.h>
4#endif
5
6/* disable these counters for the final release */
7/* #define VBOX_WITHOUT_RELEASE_STATISTICS */
8
9#include <VBox/err.h>
10#include <VBox/pdmdrv.h>
11#include <iprt/assert.h>
12
13#if !defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) || !defined(RT_OS_WINDOWS)
14
15# define DO_ENGAGE_EVENT1(so, fdset, label) \
16 do { \
17 FD_SET((so)->s, (fdset)); \
18 UPD_NFDS((so)->s); \
19 } while(0)
20
21
22# define DO_ENGAGE_EVENT2(so, fdset1, fdset2, label) \
23 do { \
24 FD_SET((so)->s, (fdset1)); \
25 FD_SET((so)->s, (fdset2)); \
26 UPD_NFDS((so)->s); \
27 } while(0)
28
29# define DO_POLL_EVENTS(rc, error, so, events, label) do {} while (0)
30
31# define DO_CHECK_FD_SET(so, events, fdset) (FD_ISSET((so)->s, (fdset)))
32
33# define DO_WIN_CHECK_FD_SET(so, events, fdset ) 0 /* specific for Windows Winsock API */
34
35# ifndef RT_OS_WINDOWS
36# define ICMP_ENGAGE_EVENT(so, fdset) \
37 do { \
38 if (pData->icmp_socket.s != -1) \
39 DO_ENGAGE_EVENT1((so), (fdset), ICMP); \
40 } while (0)
41# else /* !RT_OS_WINDOWS */
42# define ICMP_ENGAGE_EVENT(so, fdset) do {} while(0)
43#endif /* RT_OS_WINDOWS */
44
45#else /* defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS) */
46
47/*
48 * On Windows, we will be notified by IcmpSendEcho2() when the response arrives.
49 * So no call to WSAEventSelect necessary.
50 */
51# define ICMP_ENGAGE_EVENT(so, fdset) do {} while(0)
52
53# define DO_ENGAGE_EVENT1(so, fdset1, label) \
54 do { \
55 rc = WSAEventSelect((so)->s, VBOX_SOCKET_EVENT, FD_ALL_EVENTS); \
56 if (rc == SOCKET_ERROR) \
57 { \
58 /* This should not happen */ \
59 error = WSAGetLastError(); \
60 LogRel(("WSAEventSelector (" #label ") error %d (so=%x, socket=%s, event=%x)\n", \
61 error, (so), (so)->s, VBOX_SOCKET_EVENT)); \
62 } \
63 } while(0); \
64 continue
65
66# define DO_ENGAGE_EVENT2(so, fdset1, fdset2, label) \
67 DO_ENGAGE_EVENT1((so), (fdset1), label)
68
69# define DO_POLL_EVENTS(rc, error, so, events, label) \
70 (rc) = WSAEnumNetworkEvents((so)->s, VBOX_SOCKET_EVENT, (events)); \
71 if ((rc) == SOCKET_ERROR) \
72 { \
73 (error) = WSAGetLastError(); \
74 LogRel(("WSAEnumNetworkEvents " #label " error %d\n", (error))); \
75 continue; \
76 }
77
78# define acceptds_win FD_ACCEPT
79# define acceptds_win_bit FD_ACCEPT_BIT
80
81# define readfds_win FD_READ
82# define readfds_win_bit FD_READ_BIT
83
84# define writefds_win FD_WRITE
85# define writefds_win_bit FD_WRITE_BIT
86
87# define xfds_win FD_OOB
88# define xfds_win_bit FD_OOB_BIT
89
90# define DO_CHECK_FD_SET(so, events, fdset) \
91 (((events).lNetworkEvents & fdset ## _win) && ((events).iErrorCode[fdset ## _win_bit] == 0))
92
93# define DO_WIN_CHECK_FD_SET(so, events, fdset ) DO_CHECK_FD_SET((so), (events), fdset)
94
95#endif /* defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS) */
96
97#define TCP_ENGAGE_EVENT1(so, fdset) \
98 DO_ENGAGE_EVENT1((so), (fdset), TCP)
99
100#define TCP_ENGAGE_EVENT2(so, fdset1, fdset2) \
101 DO_ENGAGE_EVENT2((so), (fdset1), (fdset2), TCP)
102
103#define UDP_ENGAGE_EVENT(so, fdset) \
104 DO_ENGAGE_EVENT1((so), (fdset), UDP)
105
106#define POLL_TCP_EVENTS(rc, error, so, events) \
107 DO_POLL_EVENTS((rc), (error), (so), (events), TCP)
108
109#define POLL_UDP_EVENTS(rc, error, so, events) \
110 DO_POLL_EVENTS((rc), (error), (so), (events), UDP)
111
112#define CHECK_FD_SET(so, events, set) \
113 (DO_CHECK_FD_SET((so), (events), set))
114
115#define WIN_CHECK_FD_SET(so, events, set) \
116 (DO_WIN_CHECK_FD_SET((so), (events), set))
117
118/*
119 * Loging macros
120 */
121#if VBOX_WITH_DEBUG_NAT_SOCKETS
122# if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
123# define DO_LOG_NAT_SOCK(so, proto, winevent, r_fdset, w_fdset, x_fdset) \
124 do { \
125 LogRel((" " #proto "%R[natsock] %R[natwinnetevents]\n", (so), (winevent))); \
126 } while (0)
127# else
128# define DO_LOG_NAT_SOCK(so, proto, winevent, r_fdset, w_fdset, x_fdset) \
129 do { \
130 LogRel((" " #proto " %R[natsock] %s %s %s\n", (so), FD_ISSET((so)->s, (r_fdset))?"READ":"",\
131 FD_ISSET((so)->s, (w_fdset))?"WRITE":"", FD_ISSET((so)->s, (x_fdset))?"OOB":"")); \
132 } while (0)
133# endif /* VBOX_WITH_DEBUG_NAT_SOCKETS */
134#else
135# define DO_LOG_NAT_SOCK(so, proto, winevent, r_fdset, w_fdset, x_fdset) do {} while (0)
136#endif /* !VBOX_WITH_DEBUG_NAT_SOCKETS */
137
138#define LOG_NAT_SOCK(so, proto, winevent, r_fdset, w_fdset, x_fdset) DO_LOG_NAT_SOCK((so), proto, (winevent), (r_fdset), (w_fdset), (x_fdset))
139
140static const uint8_t special_ethaddr[6] =
141{
142 0x52, 0x54, 0x00, 0x12, 0x35, 0x00
143};
144
145#ifdef RT_OS_WINDOWS
146
147static int get_dns_addr_domain(PNATState pData, bool fVerbose,
148 struct in_addr *pdns_addr,
149 const char **ppszDomain)
150{
151 int rc = 0;
152 FIXED_INFO *FixedInfo = NULL;
153 ULONG BufLen;
154 DWORD ret;
155 IP_ADDR_STRING *pIPAddr;
156 struct in_addr tmp_addr;
157
158 FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO));
159 BufLen = sizeof(FIXED_INFO);
160
161 /** @todo: this API returns all DNS servers, no matter whether the
162 * corresponding network adapter is disabled or not. Maybe replace
163 * this by GetAdapterAddresses(), which is XP/Vista only though. */
164 if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen))
165 {
166 if (FixedInfo)
167 {
168 GlobalFree(FixedInfo);
169 FixedInfo = NULL;
170 }
171 FixedInfo = GlobalAlloc(GPTR, BufLen);
172 }
173
174 if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS)
175 {
176 Log(("GetNetworkParams failed. ret = %08x\n", (u_int)ret ));
177 if (FixedInfo)
178 {
179 GlobalFree(FixedInfo);
180 FixedInfo = NULL;
181 }
182 rc = -1;
183 goto get_dns_prefix;
184 }
185
186 pIPAddr = &(FixedInfo->DnsServerList);
187 inet_aton(pIPAddr->IpAddress.String, &tmp_addr);
188 Log(("nat: DNS Servers:\n"));
189 if (fVerbose || pdns_addr->s_addr != tmp_addr.s_addr)
190 LogRel(("NAT: DNS address: %s\n", pIPAddr->IpAddress.String));
191 *pdns_addr = tmp_addr;
192
193 pIPAddr = FixedInfo -> DnsServerList.Next;
194 while (pIPAddr)
195 {
196 if (fVerbose)
197 LogRel(("NAT: ignored DNS address: %s\n", pIPAddr ->IpAddress.String));
198 pIPAddr = pIPAddr ->Next;
199 }
200 if (FixedInfo)
201 {
202 GlobalFree(FixedInfo);
203 FixedInfo = NULL;
204 }
205
206get_dns_prefix:
207 if (ppszDomain)
208 {
209 OSVERSIONINFO ver;
210 char szDnsDomain[256];
211 DWORD dwSize = sizeof(szDnsDomain);
212
213 *ppszDomain = NULL;
214 GetVersionEx(&ver);
215 if (ver.dwMajorVersion >= 5)
216 {
217 /* GetComputerNameEx exists in Windows versions starting with 2000. */
218 if (GetComputerNameEx(ComputerNameDnsDomain, szDnsDomain, &dwSize))
219 {
220 if (szDnsDomain[0])
221 {
222 /* Just non-empty strings are valid. */
223 *ppszDomain = RTStrDup(szDnsDomain);
224 if (pData->fPassDomain)
225 {
226 if (fVerbose)
227 LogRel(("NAT: passing domain name %s\n", szDnsDomain));
228 }
229 else
230 Log(("nat: ignoring domain %s\n", szDnsDomain));
231 }
232 }
233 else
234 Log(("nat: GetComputerNameEx failed (%d)\n", GetLastError()));
235 }
236 }
237 return rc;
238}
239
240#else
241
242static int get_dns_addr_domain(PNATState pData, bool fVerbose,
243 struct in_addr *pdns_addr,
244 const char **ppszDomain)
245{
246 char buff[512];
247 char buff2[256];
248 FILE *f;
249 int found = 0;
250 struct in_addr tmp_addr;
251
252#ifdef RT_OS_OS2
253 /* Try various locations. */
254 char *etc = getenv("ETC");
255 f = NULL;
256 if (etc)
257 {
258 snprintf(buff, sizeof(buff), "%s/RESOLV2", etc);
259 f = fopen(buff, "rt");
260 }
261 if (!f)
262 {
263 snprintf(buff, sizeof(buff), "%s/RESOLV2", _PATH_ETC);
264 f = fopen(buff, "rt");
265 }
266 if (!f)
267 {
268 snprintf(buff, sizeof(buff), "%s/resolv.conf", _PATH_ETC);
269 f = fopen(buff, "rt");
270 }
271#else
272 f = fopen("/etc/resolv.conf", "r");
273#endif
274 if (!f)
275 return -1;
276
277 if (ppszDomain)
278 *ppszDomain = NULL;
279 Log(("nat: DNS Servers:\n"));
280 while (fgets(buff, 512, f) != NULL)
281 {
282 if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1)
283 {
284 if (!inet_aton(buff2, &tmp_addr))
285 continue;
286 if (tmp_addr.s_addr == loopback_addr.s_addr)
287 tmp_addr = our_addr;
288 /* If it's the first one, set it to dns_addr */
289 if (!found)
290 {
291 if (fVerbose || pdns_addr->s_addr != tmp_addr.s_addr)
292 LogRel(("NAT: DNS address: %s\n", buff2));
293 *pdns_addr = tmp_addr;
294 }
295 else
296 {
297 if (fVerbose)
298 LogRel(("NAT: ignored DNS address: %s\n", buff2));
299 }
300 found++;
301 }
302 if ( ppszDomain
303 && (!strncmp(buff, "domain", 6) || !strncmp(buff, "search", 6)))
304 {
305 /* Domain name/search list present. Pick first entry */
306 if (*ppszDomain == NULL)
307 {
308 char *tok;
309 char *saveptr;
310 tok = strtok_r(&buff[6], " \t\n", &saveptr);
311 if (tok)
312 {
313 *ppszDomain = RTStrDup(tok);
314 if (pData->fPassDomain)
315 {
316 if (fVerbose)
317 LogRel(("NAT: passing domain name %s\n", tok));
318 }
319 else
320 Log(("nat: ignoring domain %s\n", tok));
321 }
322 }
323 }
324 }
325 fclose(f);
326 if (!found)
327 return -1;
328 return 0;
329}
330
331#endif
332
333int get_dns_addr(PNATState pData, struct in_addr *pdns_addr)
334{
335 return get_dns_addr_domain(pData, false, pdns_addr, NULL);
336}
337
338int slirp_init(PNATState *ppData, const char *pszNetAddr, uint32_t u32Netmask,
339 bool fPassDomain, const char *pszTFTPPrefix,
340 const char *pszBootFile, void *pvUser)
341{
342 int fNATfailed = 0;
343 PNATState pData = RTMemAlloc(sizeof(NATState));
344 *ppData = pData;
345 if (!pData)
346 return VERR_NO_MEMORY;
347 if (u32Netmask & 0x1f)
348 /* CTL is x.x.x.15, bootp passes up to 16 IPs (15..31) */
349 return VERR_INVALID_PARAMETER;
350 memset(pData, '\0', sizeof(NATState));
351 pData->fPassDomain = fPassDomain;
352 pData->pvUser = pvUser;
353 tftp_prefix = pszTFTPPrefix;
354 bootp_filename = pszBootFile;
355 pData->netmask = u32Netmask;
356
357#ifdef RT_OS_WINDOWS
358 {
359 WSADATA Data;
360 WSAStartup(MAKEWORD(2,0), &Data);
361 }
362# if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC)
363 pData->phEvents[VBOX_SOCKET_EVENT_INDEX] = CreateEvent(NULL, FALSE, FALSE, NULL);
364# endif
365#endif
366
367 link_up = 1;
368
369 debug_init();
370 if_init(pData);
371 ip_init(pData);
372 icmp_init(pData);
373
374 /* Initialise mbufs *after* setting the MTU */
375 m_init(pData);
376
377 /* set default addresses */
378 inet_aton("127.0.0.1", &loopback_addr);
379 inet_aton("127.0.0.1", &dns_addr);
380
381 if (get_dns_addr_domain(pData, true, &dns_addr, &pData->pszDomain) < 0)
382 fNATfailed = 1;
383
384 inet_aton(pszNetAddr, &special_addr);
385 alias_addr.s_addr = special_addr.s_addr | htonl(CTL_ALIAS);
386 getouraddr(pData);
387 return fNATfailed ? VINF_NAT_DNS : VINF_SUCCESS;
388}
389
390/**
391 * Statistics counters.
392 */
393void slirp_register_timers(PNATState pData, PPDMDRVINS pDrvIns)
394{
395#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
396 PDMDrvHlpSTAMRegisterF(pDrvIns, &pData->StatFill, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
397 STAMUNIT_TICKS_PER_CALL, "Profiling slirp fills", "/Drivers/NAT%d/Fill", pDrvIns->iInstance);
398 PDMDrvHlpSTAMRegisterF(pDrvIns, &pData->StatPoll, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
399 STAMUNIT_TICKS_PER_CALL, "Profiling slirp polls", "/Drivers/NAT%d/Poll", pDrvIns->iInstance);
400 PDMDrvHlpSTAMRegisterF(pDrvIns, &pData->StatFastTimer, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
401 STAMUNIT_TICKS_PER_CALL, "Profiling slirp fast timer", "/Drivers/NAT%d/TimerFast", pDrvIns->iInstance);
402 PDMDrvHlpSTAMRegisterF(pDrvIns, &pData->StatSlowTimer, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
403 STAMUNIT_TICKS_PER_CALL, "Profiling slirp slow timer", "/Drivers/NAT%d/TimerSlow", pDrvIns->iInstance);
404 PDMDrvHlpSTAMRegisterF(pDrvIns, &pData->StatTCP, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
405 STAMUNIT_COUNT, "TCP sockets", "/Drivers/NAT%d/SockTCP", pDrvIns->iInstance);
406 PDMDrvHlpSTAMRegisterF(pDrvIns, &pData->StatTCPHot, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
407 STAMUNIT_COUNT, "TCP sockets active", "/Drivers/NAT%d/SockTCPHot", pDrvIns->iInstance);
408 PDMDrvHlpSTAMRegisterF(pDrvIns, &pData->StatUDP, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
409 STAMUNIT_COUNT, "UDP sockets", "/Drivers/NAT%d/SockUDP", pDrvIns->iInstance);
410 PDMDrvHlpSTAMRegisterF(pDrvIns, &pData->StatUDPHot, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
411 STAMUNIT_COUNT, "UDP sockets active", "/Drivers/NAT%d/SockUDPHot", pDrvIns->iInstance);
412#endif /* VBOX_WITHOUT_RELEASE_STATISTICS */
413}
414
415/**
416 * Marks the link as up, making it possible to establish new connections.
417 */
418void slirp_link_up(PNATState pData)
419{
420 link_up = 1;
421}
422
423/**
424 * Marks the link as down and cleans up the current connections.
425 */
426void slirp_link_down(PNATState pData)
427{
428 struct socket *so;
429
430 while ((so = tcb.so_next) != &tcb)
431 {
432 if (so->so_state & SS_NOFDREF || so->s == -1)
433 sofree(pData, so);
434 else
435 tcp_drop(pData, sototcpcb(so), 0);
436 }
437
438 while ((so = udb.so_next) != &udb)
439 udp_detach(pData, so);
440
441 link_up = 0;
442}
443
444/**
445 * Terminates the slirp component.
446 */
447void slirp_term(PNATState pData)
448{
449 if (pData->pszDomain)
450 RTStrFree((char *)(void *)pData->pszDomain);
451
452#ifdef RT_OS_WINDOWS
453 pData->pfIcmpCloseHandle(pData->icmp_socket.sh);
454 FreeLibrary(pData->hmIcmpLibrary);
455 RTMemFree(pData->pvIcmpBuffer);
456# else
457 closesocket(pData->icmp_socket.s);
458#endif
459
460 slirp_link_down(pData);
461#ifdef RT_OS_WINDOWS
462 WSACleanup();
463#endif
464#ifdef LOG_ENABLED
465 Log(("\n"
466 "NAT statistics\n"
467 "--------------\n"
468 "\n"));
469 ipstats(pData);
470 tcpstats(pData);
471 udpstats(pData);
472 icmpstats(pData);
473 mbufstats(pData);
474 sockstats(pData);
475 Log(("\n"
476 "\n"
477 "\n"));
478#endif
479 RTMemFree(pData);
480}
481
482
483#define CONN_CANFSEND(so) (((so)->so_state & (SS_FCANTSENDMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED)
484#define CONN_CANFRCV(so) (((so)->so_state & (SS_FCANTRCVMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED)
485#define UPD_NFDS(x) if (nfds < (x)) nfds = (x)
486
487/*
488 * curtime kept to an accuracy of 1ms
489 */
490#ifdef RT_OS_WINDOWS
491static void updtime(PNATState pData)
492{
493 struct _timeb tb;
494
495 _ftime(&tb);
496 curtime = (u_int)tb.time * (u_int)1000;
497 curtime += (u_int)tb.millitm;
498}
499#else
500static void updtime(PNATState pData)
501{
502 gettimeofday(&tt, 0);
503
504 curtime = (u_int)tt.tv_sec * (u_int)1000;
505 curtime += (u_int)tt.tv_usec / (u_int)1000;
506
507 if ((tt.tv_usec % 1000) >= 500)
508 curtime++;
509}
510#endif
511
512void slirp_select_fill(PNATState pData, int *pnfds,
513 fd_set *readfds, fd_set *writefds, fd_set *xfds)
514{
515 struct socket *so, *so_next;
516 int nfds;
517#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
518 int rc;
519 int error;
520#endif
521 int i;
522
523 STAM_REL_PROFILE_START(&pData->StatFill, a);
524
525 nfds = *pnfds;
526
527 /*
528 * First, TCP sockets
529 */
530 do_slowtimo = 0;
531 if (link_up)
532 {
533 /*
534 * *_slowtimo needs calling if there are IP fragments
535 * in the fragment queue, or there are TCP connections active
536 */
537 /* XXX:
538 * triggering of fragment expiration should be the same but use new macroses
539 */
540 do_slowtimo = (tcb.so_next != &tcb);
541 if (!do_slowtimo)
542 {
543 for (i = 0; i < IPREASS_NHASH; i++)
544 {
545 if (!TAILQ_EMPTY(&ipq[i]))
546 {
547 do_slowtimo = 1;
548 break;
549 }
550 }
551 }
552 ICMP_ENGAGE_EVENT(&pData->icmp_socket, readfds);
553
554 STAM_REL_COUNTER_RESET(&pData->StatTCP);
555 STAM_REL_COUNTER_RESET(&pData->StatTCPHot);
556
557 for (so = tcb.so_next; so != &tcb; so = so_next)
558 {
559 so_next = so->so_next;
560
561 STAM_REL_COUNTER_INC(&pData->StatTCP);
562
563 /*
564 * See if we need a tcp_fasttimo
565 */
566 if (time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK)
567 time_fasttimo = curtime; /* Flag when we want a fasttimo */
568
569 /*
570 * NOFDREF can include still connecting to local-host,
571 * newly socreated() sockets etc. Don't want to select these.
572 */
573 if (so->so_state & SS_NOFDREF || so->s == -1)
574 continue;
575
576 /*
577 * Set for reading sockets which are accepting
578 */
579 if (so->so_state & SS_FACCEPTCONN)
580 {
581 STAM_REL_COUNTER_INC(&pData->StatTCPHot);
582 TCP_ENGAGE_EVENT1(so, readfds);
583 continue;
584 }
585
586 /*
587 * Set for writing sockets which are connecting
588 */
589 if (so->so_state & SS_ISFCONNECTING)
590 {
591 Log2(("connecting %R[natsock] engaged\n",so));
592 STAM_REL_COUNTER_INC(&pData->StatTCPHot);
593 TCP_ENGAGE_EVENT1(so, writefds);
594 }
595
596 /*
597 * Set for writing if we are connected, can send more, and
598 * we have something to send
599 */
600 if (CONN_CANFSEND(so) && so->so_rcv.sb_cc)
601 {
602 STAM_REL_COUNTER_INC(&pData->StatTCPHot);
603 TCP_ENGAGE_EVENT1(so, writefds);
604 }
605
606 /*
607 * Set for reading (and urgent data) if we are connected, can
608 * receive more, and we have room for it XXX /2 ?
609 */
610 if (CONN_CANFRCV(so) && (so->so_snd.sb_cc < (so->so_snd.sb_datalen/2)))
611 {
612 STAM_REL_COUNTER_INC(&pData->StatTCPHot);
613 TCP_ENGAGE_EVENT2(so, readfds, xfds);
614 }
615 }
616
617 /*
618 * UDP sockets
619 */
620 STAM_REL_COUNTER_RESET(&pData->StatUDP);
621 STAM_REL_COUNTER_RESET(&pData->StatUDPHot);
622
623 for (so = udb.so_next; so != &udb; so = so_next)
624 {
625 so_next = so->so_next;
626
627 STAM_REL_COUNTER_INC(&pData->StatUDP);
628
629 /*
630 * See if it's timed out
631 */
632 if (so->so_expire)
633 {
634 if (so->so_expire <= curtime)
635 {
636 udp_detach(pData, so);
637 continue;
638 }
639 else
640 do_slowtimo = 1; /* Let socket expire */
641 }
642
643 /*
644 * When UDP packets are received from over the link, they're
645 * sendto()'d straight away, so no need for setting for writing
646 * Limit the number of packets queued by this session to 4.
647 * Note that even though we try and limit this to 4 packets,
648 * the session could have more queued if the packets needed
649 * to be fragmented.
650 *
651 * (XXX <= 4 ?)
652 */
653 if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4)
654 {
655 STAM_REL_COUNTER_INC(&pData->StatUDPHot);
656 UDP_ENGAGE_EVENT(so, readfds);
657 }
658 }
659
660 }
661
662#if !defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) || !defined(RT_OS_WINDOWS)
663 *pnfds = nfds;
664#else
665 *pnfds = VBOX_EVENT_COUNT;
666#endif
667
668 STAM_REL_PROFILE_STOP(&pData->StatFill, a);
669}
670
671#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
672void slirp_select_poll(PNATState pData, int fTimeout, int fIcmp)
673#else
674void slirp_select_poll(PNATState pData, fd_set *readfds, fd_set *writefds, fd_set *xfds)
675#endif
676{
677 struct socket *so, *so_next;
678 int ret;
679#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
680 WSANETWORKEVENTS NetworkEvents;
681 int rc;
682 int error;
683#endif
684
685 STAM_REL_PROFILE_START(&pData->StatPoll, a);
686
687 /* Update time */
688 updtime(pData);
689
690 /*
691 * See if anything has timed out
692 */
693 if (link_up)
694 {
695 if (time_fasttimo && ((curtime - time_fasttimo) >= 2))
696 {
697 STAM_REL_PROFILE_START(&pData->StatFastTimer, a);
698 tcp_fasttimo(pData);
699 time_fasttimo = 0;
700 STAM_REL_PROFILE_STOP(&pData->StatFastTimer, a);
701 }
702 if (do_slowtimo && ((curtime - last_slowtimo) >= 499))
703 {
704 STAM_REL_PROFILE_START(&pData->StatSlowTimer, a);
705 ip_slowtimo(pData);
706 tcp_slowtimo(pData);
707 last_slowtimo = curtime;
708 STAM_REL_PROFILE_STOP(&pData->StatSlowTimer, a);
709 }
710 }
711#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
712 if (fTimeout)
713 return; /* only timer update */
714#endif
715
716 /*
717 * Check sockets
718 */
719 if (link_up)
720 {
721#if defined(RT_OS_WINDOWS)
722 /*XXX: before renaming please make see define
723 * fIcmp in slirp_state.h
724 */
725 if (fIcmp)
726 sorecvfrom(pData, &pData->icmp_socket);
727#else
728 if (pData->icmp_socket.s != -1 && FD_ISSET(pData->icmp_socket.s, readfds))
729 sorecvfrom(pData, &pData->icmp_socket);
730#endif
731 /*
732 * Check TCP sockets
733 */
734 for (so = tcb.so_next; so != &tcb; so = so_next)
735 {
736 so_next = so->so_next;
737
738 /*
739 * FD_ISSET is meaningless on these sockets
740 * (and they can crash the program)
741 */
742 if (so->so_state & SS_NOFDREF || so->s == -1)
743 continue;
744
745 POLL_TCP_EVENTS(rc, error, so, &NetworkEvents);
746
747 LOG_NAT_SOCK(so, TCP, &NetworkEvents, readfds, writefds, xfds);
748
749 /*
750 * Check for URG data
751 * This will soread as well, so no need to
752 * test for readfds below if this succeeds
753 */
754
755 /* out-of-band data */
756 if (CHECK_FD_SET(so, NetworkEvents, xfds))
757 {
758 sorecvoob(pData, so);
759 }
760
761 /*
762 * Check sockets for reading
763 */
764 else if ( CHECK_FD_SET(so, NetworkEvents, readfds)
765 || WIN_CHECK_FD_SET(so, NetworkEvents, acceptds))
766 {
767 /*
768 * Check for incoming connections
769 */
770 if (so->so_state & SS_FACCEPTCONN)
771 {
772 tcp_connect(pData, so);
773#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
774 if (!(NetworkEvents.lNetworkEvents & FD_CLOSE))
775#endif
776 continue;
777 }
778
779 ret = soread(pData, so, /*fCloseIfNothingRead=*/false);
780 /* Output it if we read something */
781 if (ret > 0)
782 tcp_output(pData, sototcpcb(so));
783 }
784
785#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
786 /*
787 * Check for FD_CLOSE events.
788 */
789 if (NetworkEvents.lNetworkEvents & FD_CLOSE)
790 {
791 /*
792 * drain the socket
793 */
794 for (;;)
795 {
796 ret = soread(pData, so, /*fCloseIfNothingRead=*/true);
797 if (ret > 0)
798 tcp_output(pData, sototcpcb(so));
799 else
800 break;
801 }
802 }
803#endif
804
805 /*
806 * Check sockets for writing
807 */
808 if (CHECK_FD_SET(so, NetworkEvents, writefds))
809 {
810 /*
811 * Check for non-blocking, still-connecting sockets
812 */
813 if (so->so_state & SS_ISFCONNECTING)
814 {
815 Log2(("connecting %R[natsock] catched\n", so));
816 /* Connected */
817 so->so_state &= ~SS_ISFCONNECTING;
818
819 /*
820 * This should be probably guarded by PROBE_CONN too. Anyway,
821 * we disable it on OS/2 because the below send call returns
822 * EFAULT which causes the opened TCP socket to close right
823 * after it has been opened and connected.
824 */
825#ifndef RT_OS_OS2
826 ret = send(so->s, (const char *)&ret, 0, 0);
827 if (ret < 0)
828 {
829 /* XXXXX Must fix, zero bytes is a NOP */
830 if ( errno == EAGAIN
831 || errno == EWOULDBLOCK
832 || errno == EINPROGRESS
833 || errno == ENOTCONN)
834 continue;
835
836 /* else failed */
837 so->so_state = SS_NOFDREF;
838 }
839 /* else so->so_state &= ~SS_ISFCONNECTING; */
840#endif
841
842 /*
843 * Continue tcp_input
844 */
845 tcp_input(pData, (struct mbuf *)NULL, sizeof(struct ip), so);
846 /* continue; */
847 }
848 else
849 ret = sowrite(pData, so);
850 /*
851 * XXX If we wrote something (a lot), there could be the need
852 * for a window update. In the worst case, the remote will send
853 * a window probe to get things going again.
854 */
855 }
856
857 /*
858 * Probe a still-connecting, non-blocking socket
859 * to check if it's still alive
860 */
861#ifdef PROBE_CONN
862 if (so->so_state & SS_ISFCONNECTING)
863 {
864 ret = recv(so->s, (char *)&ret, 0, 0);
865
866 if (ret < 0)
867 {
868 /* XXX */
869 if ( errno == EAGAIN
870 || errno == EWOULDBLOCK
871 || errno == EINPROGRESS
872 || errno == ENOTCONN)
873 {
874 continue; /* Still connecting, continue */
875 }
876
877 /* else failed */
878 so->so_state = SS_NOFDREF;
879
880 /* tcp_input will take care of it */
881 }
882 else
883 {
884 ret = send(so->s, &ret, 0, 0);
885 if (ret < 0)
886 {
887 /* XXX */
888 if ( errno == EAGAIN
889 || errno == EWOULDBLOCK
890 || errno == EINPROGRESS
891 || errno == ENOTCONN)
892 {
893 continue;
894 }
895 /* else failed */
896 so->so_state = SS_NOFDREF;
897 }
898 else
899 so->so_state &= ~SS_ISFCONNECTING;
900
901 }
902 tcp_input((struct mbuf *)NULL, sizeof(struct ip),so);
903 } /* SS_ISFCONNECTING */
904#endif
905 }
906
907 /*
908 * Now UDP sockets.
909 * Incoming packets are sent straight away, they're not buffered.
910 * Incoming UDP data isn't buffered either.
911 */
912 for (so = udb.so_next; so != &udb; so = so_next)
913 {
914 so_next = so->so_next;
915
916 POLL_UDP_EVENTS(rc, error, so, &NetworkEvents);
917
918 LOG_NAT_SOCK(so, UDP, &NetworkEvents, readfds, writefds, xfds);
919
920 if (so->s != -1 && CHECK_FD_SET(so, NetworkEvents, readfds))
921 {
922 sorecvfrom(pData, so);
923 }
924 }
925
926 }
927
928 /*
929 * See if we can start outputting
930 */
931 if (if_queued && link_up)
932 if_start(pData);
933
934 STAM_REL_PROFILE_STOP(&pData->StatPoll, a);
935}
936
937#define ETH_ALEN 6
938#define ETH_HLEN 14
939
940#define ARPOP_REQUEST 1 /* ARP request */
941#define ARPOP_REPLY 2 /* ARP reply */
942
943struct ethhdr
944{
945 unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
946 unsigned char h_source[ETH_ALEN]; /* source ether addr */
947 unsigned short h_proto; /* packet type ID field */
948};
949
950struct arphdr
951{
952 unsigned short ar_hrd; /* format of hardware address */
953 unsigned short ar_pro; /* format of protocol address */
954 unsigned char ar_hln; /* length of hardware address */
955 unsigned char ar_pln; /* length of protocol address */
956 unsigned short ar_op; /* ARP opcode (command) */
957
958 /*
959 * Ethernet looks like this : This bit is variable sized however...
960 */
961 unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
962 unsigned char ar_sip[4]; /* sender IP address */
963 unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
964 unsigned char ar_tip[4]; /* target IP address */
965};
966
967static
968#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
969void arp_input(PNATState pData, struct mbuf *m)
970#else
971void arp_input(PNATState pData, const uint8_t *pkt, int pkt_len)
972#endif
973{
974 struct ethhdr *eh;
975 struct ethhdr *reh;
976 struct arphdr *ah;
977 struct arphdr *rah;
978 int ar_op;
979 struct ex_list *ex_ptr;
980 uint32_t htip;
981#ifndef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
982 uint8_t arp_reply[sizeof(struct arphdr) + ETH_HLEN];
983 eh = (struct ethhdr *)pkt;
984#else
985 struct mbuf *mr;
986 eh = mtod(m, struct ethhdr *);
987#endif
988 ah = (struct arphdr *)&eh[1];
989 htip = ntohl(*(uint32_t*)ah->ar_tip);
990
991#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
992 mr = m_get(pData);
993 mr->m_data += if_maxlinkhdr;
994 mr->m_len = sizeof(struct arphdr);
995 rah = mtod(mr, struct arphdr *);
996#else
997 reh = (struct ethhdr *)arp_reply;
998 rah = (struct arphdr *)&reh[1];
999#endif
1000
1001 ar_op = ntohs(ah->ar_op);
1002 switch(ar_op)
1003 {
1004 case ARPOP_REQUEST:
1005 if ((htip & pData->netmask) == ntohl(special_addr.s_addr))
1006 {
1007 if ( (htip & ~pData->netmask) == CTL_DNS
1008 || (htip & ~pData->netmask) == CTL_ALIAS)
1009 goto arp_ok;
1010 for (ex_ptr = exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next)
1011 {
1012 if ((htip & ~pData->netmask) == ex_ptr->ex_addr)
1013 goto arp_ok;
1014 }
1015 return;
1016 arp_ok:
1017
1018#ifndef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
1019 memcpy(reh->h_dest, eh->h_source, ETH_ALEN);
1020 memcpy(reh->h_source, &special_addr, ETH_ALEN);
1021 reh->h_source[5] = ah->ar_tip[3];
1022 reh->h_proto = htons(ETH_P_ARP);
1023#endif
1024 rah->ar_hrd = htons(1);
1025 rah->ar_pro = htons(ETH_P_IP);
1026 rah->ar_hln = ETH_ALEN;
1027 rah->ar_pln = 4;
1028 rah->ar_op = htons(ARPOP_REPLY);
1029 memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN);
1030
1031 switch (htip & ~pData->netmask)
1032 {
1033 case CTL_DNS:
1034 case CTL_ALIAS:
1035 rah->ar_sha[5] = (uint8_t)(htip & ~pData->netmask);
1036 break;
1037 default:;
1038 }
1039
1040 memcpy(rah->ar_sip, ah->ar_tip, 4);
1041 memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN);
1042 memcpy(rah->ar_tip, ah->ar_sip, 4);
1043#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
1044 if_encap(pData, ETH_P_ARP, mr);
1045 m_free(pData, m);
1046#else
1047 slirp_output(pData->pvUser, arp_reply, sizeof(arp_reply));
1048#endif
1049 }
1050 break;
1051 default:
1052 break;
1053 }
1054}
1055
1056void slirp_input(PNATState pData, const uint8_t *pkt, int pkt_len)
1057{
1058 struct mbuf *m;
1059 int proto;
1060
1061 if (pkt_len < ETH_HLEN)
1062 {
1063 LogRel(("packet having size %d has been ingnored\n", pkt_len));
1064 return;
1065 }
1066
1067 m = m_get(pData);
1068 if (m == NULL)
1069 {
1070 LogRel(("can't allocate new mbuf\n"));
1071 }
1072 /* Note: we add to align the IP header */
1073
1074 if (M_FREEROOM(m) < pkt_len + 2)
1075 {
1076 m_inc(m, pkt_len + 2);
1077 }
1078 m->m_len = pkt_len + 2;
1079 memcpy(m->m_data + 2, pkt, pkt_len);
1080
1081 proto = ntohs(*(uint16_t *)(pkt + 12));
1082 switch(proto)
1083 {
1084 case ETH_P_ARP:
1085#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
1086 arp_input(pData, m);
1087#else
1088 arp_input(pData, pkt, pkt_len);
1089 m_free(pData, m);
1090#endif
1091 break;
1092 case ETH_P_IP:
1093 /* Update time. Important if the network is very quiet, as otherwise
1094 * the first outgoing connection gets an incorrect timestamp. */
1095 updtime(pData);
1096 m->m_data += ETH_HLEN + 2;
1097 m->m_len -= ETH_HLEN + 2;
1098 ip_input(pData, m);
1099 break;
1100 default:
1101 LogRel(("Unsupported protocol %x\n", proto));
1102 m_free(pData, m);
1103 break;
1104 }
1105}
1106
1107/* output the IP packet to the ethernet device */
1108#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
1109void if_encap(PNATState pData, uint16_t eth_proto, struct mbuf *m)
1110#else
1111void if_encap(PNATState pData, uint8_t *ip_data, int ip_data_len)
1112#endif
1113{
1114#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
1115 struct ethhdr *eh;
1116 m->m_data -= if_maxlinkhdr;
1117 m->m_len += ETH_HLEN;
1118 eh = mtod(m, struct ethhdr *);
1119#else
1120 uint8_t buf[1600];
1121 struct ethhdr *eh = (struct ethhdr *)buf;
1122
1123 if (ip_data_len + ETH_HLEN > sizeof(buf))
1124 return;
1125
1126 memcpy(buf + sizeof(struct ethhdr), ip_data, ip_data_len);
1127#endif
1128
1129
1130 memcpy(eh->h_dest, client_ethaddr, ETH_ALEN);
1131 memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 1);
1132 /* XXX: not correct */
1133 eh->h_source[5] = CTL_ALIAS;
1134#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
1135 eh->h_proto = htons(eth_proto);
1136 slirp_output(pData->pvUser, m, mtod(m, uint8_t *), m->m_len);
1137#else
1138 eh->h_proto = htons(ETH_P_IP);
1139 slirp_output(pData->pvUser, buf, ip_data_len + ETH_HLEN);
1140#endif
1141}
1142
1143int slirp_redir(PNATState pData, int is_udp, int host_port,
1144 struct in_addr guest_addr, int guest_port)
1145{
1146 if (is_udp)
1147 {
1148 if (!udp_listen(pData, htons(host_port), guest_addr.s_addr,
1149 htons(guest_port), 0))
1150 return -1;
1151 }
1152 else
1153 {
1154 if (!solisten(pData, htons(host_port), guest_addr.s_addr,
1155 htons(guest_port), 0))
1156 return -1;
1157 }
1158 return 0;
1159}
1160
1161int slirp_add_exec(PNATState pData, int do_pty, const char *args, int addr_low_byte,
1162 int guest_port)
1163{
1164 return add_exec(&exec_list, do_pty, (char *)args,
1165 addr_low_byte, htons(guest_port));
1166}
1167
1168void slirp_set_ethaddr(PNATState pData, const uint8_t *ethaddr)
1169{
1170 memcpy(client_ethaddr, ethaddr, ETH_ALEN);
1171}
1172
1173#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
1174HANDLE *slirp_get_events(PNATState pData)
1175{
1176 return pData->phEvents;
1177}
1178void slirp_register_external_event(PNATState pData, HANDLE hEvent, int index)
1179{
1180 pData->phEvents[index] = hEvent;
1181}
1182#endif
1183
1184unsigned int slirp_get_timeout_ms(PNATState pData)
1185{
1186 if (link_up)
1187 {
1188 if (time_fasttimo)
1189 return 2;
1190 if (do_slowtimo)
1191 return 500; /* see PR_SLOWHZ */
1192 }
1193 return 0;
1194}
1195
1196/*
1197 * this function called from NAT thread
1198 */
1199void slirp_post_sent(PNATState pData, void *pvArg)
1200{
1201 struct socket *so = 0;
1202 struct tcpcb *tp = 0;
1203 struct mbuf *m = (struct mbuf *)pvArg;
1204 m_free(pData, m);
1205}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette