VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 69298

Last change on this file since 69298 was 65849, checked in by vboxsync, 8 years ago

Devices: trailing spaces

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.0 KB
Line 
1/* $Id: socket.c 65849 2017-02-23 09:37:26Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*
19 * This code is based on:
20 *
21 * Copyright (c) 1995 Danny Gasparovski.
22 *
23 * Please read the file COPYRIGHT for the
24 * terms and conditions of the copyright.
25 */
26
27#include <slirp.h>
28#include "ip_icmp.h"
29#include "main.h"
30#ifdef __sun__
31#include <sys/filio.h>
32#endif
33#include <VBox/vmm/pdmdrv.h>
34#if defined (RT_OS_WINDOWS)
35#include <iprt/win/iphlpapi.h>
36#include <icmpapi.h>
37#endif
38
39#if defined(DECLARE_IOVEC) && defined(RT_OS_WINDOWS)
40AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, WSABUF, buf);
41AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, WSABUF, len);
42#endif
43
44#ifdef VBOX_WITH_NAT_UDP_SOCKET_CLONE
45/**
46 *
47 */
48struct socket * soCloneUDPSocketWithForegnAddr(PNATState pData, bool fBindSocket, struct socket *pSo, uint32_t u32ForeignAddr)
49{
50 struct socket *pNewSocket = NULL;
51 LogFlowFunc(("Enter: fBindSocket:%RTbool, so:%R[natsock], u32ForeignAddr:%RTnaipv4\n", fBindSocket, pSo, u32ForeignAddr));
52 pNewSocket = socreate();
53 if (!pNewSocket)
54 {
55 LogFunc(("Can't create socket\n"));
56 LogFlowFunc(("Leave: NULL\n"));
57 return NULL;
58 }
59 if (fBindSocket)
60 {
61 if (udp_attach(pData, pNewSocket, 0) <= 0)
62 {
63 sofree(pData, pNewSocket);
64 LogFunc(("Can't attach fresh created socket\n"));
65 return NULL;
66 }
67 }
68 else
69 {
70 pNewSocket->so_cloneOf = (struct socket *)pSo;
71 pNewSocket->s = pSo->s;
72 insque(pData, pNewSocket, &udb);
73 }
74 pNewSocket->so_laddr = pSo->so_laddr;
75 pNewSocket->so_lport = pSo->so_lport;
76 pNewSocket->so_faddr.s_addr = u32ForeignAddr;
77 pNewSocket->so_fport = pSo->so_fport;
78 pSo->so_cCloneCounter++;
79 LogFlowFunc(("Leave: %R[natsock]\n", pNewSocket));
80 return pNewSocket;
81}
82
83struct socket *soLookUpClonedUDPSocket(PNATState pData, const struct socket *pcSo, uint32_t u32ForeignAddress)
84{
85 struct socket *pSoClone = NULL;
86 LogFlowFunc(("Enter: pcSo:%R[natsock], u32ForeignAddress:%RTnaipv4\n", pcSo, u32ForeignAddress));
87 for (pSoClone = udb.so_next; pSoClone != &udb; pSoClone = pSoClone->so_next)
88 {
89 if ( pSoClone->so_cloneOf
90 && pSoClone->so_cloneOf == pcSo
91 && pSoClone->so_lport == pcSo->so_lport
92 && pSoClone->so_fport == pcSo->so_fport
93 && pSoClone->so_laddr.s_addr == pcSo->so_laddr.s_addr
94 && pSoClone->so_faddr.s_addr == u32ForeignAddress)
95 goto done;
96 }
97 pSoClone = NULL;
98done:
99 LogFlowFunc(("Leave: pSoClone: %R[natsock]\n", pSoClone));
100 return pSoClone;
101}
102#endif
103
104#ifdef VBOX_WITH_NAT_SEND2HOME
105DECLINLINE(bool) slirpSend2Home(PNATState pData, struct socket *pSo, const void *pvBuf, uint32_t cbBuf, int iFlags)
106{
107 int idxAddr;
108 int ret = 0;
109 bool fSendDone = false;
110 LogFlowFunc(("Enter pSo:%R[natsock] pvBuf: %p, cbBuf: %d, iFlags: %d\n", pSo, pvBuf, cbBuf, iFlags));
111 for (idxAddr = 0; idxAddr < pData->cInHomeAddressSize; ++idxAddr)
112 {
113
114 struct socket *pNewSocket = soCloneUDPSocketWithForegnAddr(pData, pSo, pData->pInSockAddrHomeAddress[idxAddr].sin_addr);
115 AssertReturn((pNewSocket, false));
116 pData->pInSockAddrHomeAddress[idxAddr].sin_port = pSo->so_fport;
117 /** @todo more verbose on errors,
118 * @note: we shouldn't care if this send fail or not (we're in broadcast).
119 */
120 LogFunc(("send %d bytes to %RTnaipv4 from %R[natsock]\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr, pNewSocket));
121 ret = sendto(pNewSocket->s, pvBuf, cbBuf, iFlags, (struct sockaddr *)&pData->pInSockAddrHomeAddress[idxAddr], sizeof(struct sockaddr_in));
122 if (ret < 0)
123 LogFunc(("Failed to send %d bytes to %RTnaipv4\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr));
124 fSendDone |= ret > 0;
125 }
126 LogFlowFunc(("Leave %RTbool\n", fSendDone));
127 return fSendDone;
128}
129#endif /* !VBOX_WITH_NAT_SEND2HOME */
130
131#if !defined(RT_OS_WINDOWS)
132static void send_icmp_to_guest(PNATState, char *, size_t, const struct sockaddr_in *);
133static void sorecvfrom_icmp_unix(PNATState, struct socket *);
134#endif /* !RT_OS_WINDOWS */
135
136void
137so_init(void)
138{
139}
140
141struct socket *
142solookup(struct socket *head, struct in_addr laddr,
143 u_int lport, struct in_addr faddr, u_int fport)
144{
145 struct socket *so;
146
147 for (so = head->so_next; so != head; so = so->so_next)
148 {
149 if ( so->so_lport == lport
150 && so->so_laddr.s_addr == laddr.s_addr
151 && so->so_faddr.s_addr == faddr.s_addr
152 && so->so_fport == fport)
153 return so;
154 }
155
156 return (struct socket *)NULL;
157}
158
159/*
160 * Create a new socket, initialise the fields
161 * It is the responsibility of the caller to
162 * insque() it into the correct linked-list
163 */
164struct socket *
165socreate(void)
166{
167 struct socket *so;
168
169 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
170 if (so)
171 {
172 so->so_state = SS_NOFDREF;
173 so->s = -1;
174#if !defined(RT_OS_WINDOWS)
175 so->so_poll_index = -1;
176#endif
177 }
178 return so;
179}
180
181/*
182 * remque and free a socket, clobber cache
183 */
184void
185sofree(PNATState pData, struct socket *so)
186{
187 LogFlowFunc(("ENTER:%R[natsock]\n", so));
188 /*
189 * We should not remove socket when polling routine do the polling
190 * instead we mark it for deletion.
191 */
192 if (so->fUnderPolling)
193 {
194 so->fShouldBeRemoved = 1;
195 LogFlowFunc(("LEAVE:%R[natsock] postponed deletion\n", so));
196 return;
197 }
198 /**
199 * Check that we don't freeng socket with tcbcb
200 */
201 Assert(!sototcpcb(so));
202 /* udp checks */
203 Assert(!so->so_timeout);
204 Assert(!so->so_timeout_arg);
205 if (so == tcp_last_so)
206 tcp_last_so = &tcb;
207 else if (so == udp_last_so)
208 udp_last_so = &udb;
209
210 /* check if mbuf haven't been already freed */
211 if (so->so_m != NULL)
212 {
213 m_freem(pData, so->so_m);
214 so->so_m = NULL;
215 }
216
217 if (so->so_ohdr != NULL)
218 {
219 RTMemFree(so->so_ohdr);
220 so->so_ohdr = NULL;
221 }
222
223 if (so->so_next && so->so_prev)
224 {
225 remque(pData, so); /* crashes if so is not in a queue */
226 NSOCK_DEC();
227 }
228
229 RTMemFree(so);
230 LogFlowFuncLeave();
231}
232
233/*
234 * Read from so's socket into sb_snd, updating all relevant sbuf fields
235 * NOTE: This will only be called if it is select()ed for reading, so
236 * a read() of 0 (or less) means it's disconnected
237 */
238int
239soread(PNATState pData, struct socket *so)
240{
241 int n, nn, lss, total;
242 struct sbuf *sb = &so->so_snd;
243 u_int len = sb->sb_datalen - sb->sb_cc;
244 struct iovec iov[2];
245 int mss = so->so_tcpcb->t_maxseg;
246 int sockerr;
247
248 STAM_PROFILE_START(&pData->StatIOread, a);
249 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
250 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
251
252 QSOCKET_LOCK(tcb);
253 SOCKET_LOCK(so);
254 QSOCKET_UNLOCK(tcb);
255
256 LogFlow(("soread: so = %R[natsock]\n", so));
257 Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
258
259 /*
260 * No need to check if there's enough room to read.
261 * soread wouldn't have been called if there weren't
262 */
263
264 len = sb->sb_datalen - sb->sb_cc;
265
266 iov[0].iov_base = sb->sb_wptr;
267 iov[1].iov_base = 0;
268 iov[1].iov_len = 0;
269 if (sb->sb_wptr < sb->sb_rptr)
270 {
271 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
272 /* Should never succeed, but... */
273 if (iov[0].iov_len > len)
274 iov[0].iov_len = len;
275 if (iov[0].iov_len > mss)
276 iov[0].iov_len -= iov[0].iov_len%mss;
277 n = 1;
278 }
279 else
280 {
281 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
282 /* Should never succeed, but... */
283 if (iov[0].iov_len > len)
284 iov[0].iov_len = len;
285 len -= iov[0].iov_len;
286 if (len)
287 {
288 iov[1].iov_base = sb->sb_data;
289 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
290 if (iov[1].iov_len > len)
291 iov[1].iov_len = len;
292 total = iov[0].iov_len + iov[1].iov_len;
293 if (total > mss)
294 {
295 lss = total % mss;
296 if (iov[1].iov_len > lss)
297 {
298 iov[1].iov_len -= lss;
299 n = 2;
300 }
301 else
302 {
303 lss -= iov[1].iov_len;
304 iov[0].iov_len -= lss;
305 n = 1;
306 }
307 }
308 else
309 n = 2;
310 }
311 else
312 {
313 if (iov[0].iov_len > mss)
314 iov[0].iov_len -= iov[0].iov_len%mss;
315 n = 1;
316 }
317 }
318
319#ifdef HAVE_READV
320 nn = readv(so->s, (struct iovec *)iov, n);
321#else
322 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
323#endif
324 if (nn < 0)
325 sockerr = errno; /* save it, as it may be clobbered by logging */
326 else
327 sockerr = 0;
328
329 Log2(("%s: read(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
330 Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
331 if (nn <= 0)
332 {
333#ifdef RT_OS_WINDOWS
334 /*
335 * Windows reports ESHUTDOWN after SHUT_RD (SD_RECEIVE)
336 * instead of just returning EOF indication.
337 */
338 if (nn < 0 && sockerr == ESHUTDOWN)
339 {
340 nn = 0;
341 sockerr = 0;
342 }
343#endif
344
345 if (nn == 0) /* XXX: should this be inside #if defined(RT_OS_WINDOWS)? */
346 {
347 /*
348 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
349 * _could_ mean that the connection is closed. But we will receive an
350 * FD_CLOSE event later if the connection was _really_ closed. With
351 * www.youtube.com I see this very often. Closing the socket too early
352 * would be dangerous.
353 */
354 int status;
355 unsigned long pending = 0;
356 status = ioctlsocket(so->s, FIONREAD, &pending);
357 if (status < 0)
358 Log(("NAT:%s: error in WSAIoctl: %d\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, errno));
359 if (pending != 0)
360 {
361 SOCKET_UNLOCK(so);
362 STAM_PROFILE_STOP(&pData->StatIOread, a);
363 return 0;
364 }
365 }
366
367 if ( nn < 0
368 && soIgnorableErrorCode(sockerr))
369 {
370 SOCKET_UNLOCK(so);
371 STAM_PROFILE_STOP(&pData->StatIOread, a);
372 return 0;
373 }
374 else
375 {
376 int fUninitializedTemplate = 0;
377 int shuterr;
378
379 fUninitializedTemplate = RT_BOOL(( sototcpcb(so)
380 && ( sototcpcb(so)->t_template.ti_src.s_addr == INADDR_ANY
381 || sototcpcb(so)->t_template.ti_dst.s_addr == INADDR_ANY)));
382 /* nn == 0 means peer has performed an orderly shutdown */
383 Log2(("%s: disconnected, nn = %d, errno = %d (%s)\n",
384 RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sockerr, strerror(sockerr)));
385
386 shuterr = sofcantrcvmore(so);
387 if (!sockerr && !shuterr && !fUninitializedTemplate)
388 tcp_sockclosed(pData, sototcpcb(so));
389 else
390 {
391 LogRel2(("NAT: sockerr %d, shuterr %d - %R[natsock]\n", sockerr, shuterr, so));
392 tcp_drop(pData, sototcpcb(so), sockerr);
393 }
394 SOCKET_UNLOCK(so);
395 STAM_PROFILE_STOP(&pData->StatIOread, a);
396 return -1;
397 }
398 }
399 STAM_STATS(
400 if (n == 1)
401 {
402 STAM_COUNTER_INC(&pData->StatIORead_in_1);
403 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
404 }
405 else
406 {
407 STAM_COUNTER_INC(&pData->StatIORead_in_2);
408 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
409 }
410 );
411
412#ifndef HAVE_READV
413 /*
414 * If there was no error, try and read the second time round
415 * We read again if n = 2 (ie, there's another part of the buffer)
416 * and we read as much as we could in the first read
417 * We don't test for <= 0 this time, because there legitimately
418 * might not be any more data (since the socket is non-blocking),
419 * a close will be detected on next iteration.
420 * A return of -1 wont (shouldn't) happen, since it didn't happen above
421 */
422 if (n == 2 && (unsigned)nn == iov[0].iov_len)
423 {
424 int ret;
425 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
426 if (ret > 0)
427 nn += ret;
428 STAM_STATS(
429 if (ret > 0)
430 {
431 STAM_COUNTER_INC(&pData->StatIORead_in_2);
432 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
433 }
434 );
435 }
436
437 Log2(("%s: read(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
438#endif
439
440 /* Update fields */
441 sb->sb_cc += nn;
442 sb->sb_wptr += nn;
443 Log2(("%s: update so_snd (readed nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
444 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
445 {
446 sb->sb_wptr -= sb->sb_datalen;
447 Log2(("%s: alter sb_wptr so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
448 }
449 STAM_PROFILE_STOP(&pData->StatIOread, a);
450 SOCKET_UNLOCK(so);
451 return nn;
452}
453
454/*
455 * Get urgent data
456 *
457 * When the socket is created, we set it SO_OOBINLINE,
458 * so when OOB data arrives, we soread() it and everything
459 * in the send buffer is sent as urgent data
460 */
461void
462sorecvoob(PNATState pData, struct socket *so)
463{
464 struct tcpcb *tp = sototcpcb(so);
465 ssize_t ret;
466
467 LogFlowFunc(("sorecvoob: so = %R[natsock]\n", so));
468
469 /*
470 * We take a guess at how much urgent data has arrived.
471 * In most situations, when urgent data arrives, the next
472 * read() should get all the urgent data. This guess will
473 * be wrong however if more data arrives just after the
474 * urgent data, or the read() doesn't return all the
475 * urgent data.
476 */
477 ret = soread(pData, so);
478 if (RT_LIKELY(ret > 0))
479 {
480 tp->snd_up = tp->snd_una + SBUF_LEN(&so->so_snd);
481 tp->t_force = 1;
482 tcp_output(pData, tp);
483 tp->t_force = 0;
484 }
485}
486
487/*
488 * Send urgent data
489 * There's a lot duplicated code here, but...
490 */
491int
492sosendoob(struct socket *so)
493{
494 struct sbuf *sb = &so->so_rcv;
495 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
496
497 int n, len;
498
499 LogFlowFunc(("sosendoob so = %R[natsock]\n", so));
500
501 if (so->so_urgc > sizeof(buff))
502 so->so_urgc = sizeof(buff); /* XXX */
503
504 if (sb->sb_rptr < sb->sb_wptr)
505 {
506 /* We can send it directly */
507 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
508 so->so_urgc -= n;
509
510 Log2((" --- sent %d bytes urgent data, %d urgent bytes left\n",
511 n, so->so_urgc));
512 }
513 else
514 {
515 /*
516 * Since there's no sendv or sendtov like writev,
517 * we must copy all data to a linear buffer then
518 * send it all
519 */
520 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
521 if (len > so->so_urgc)
522 len = so->so_urgc;
523 memcpy(buff, sb->sb_rptr, len);
524 so->so_urgc -= len;
525 if (so->so_urgc)
526 {
527 n = sb->sb_wptr - sb->sb_data;
528 if (n > so->so_urgc)
529 n = so->so_urgc;
530 memcpy(buff + len, sb->sb_data, n);
531 so->so_urgc -= n;
532 len += n;
533 }
534 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
535#ifdef DEBUG
536 if (n != len)
537 Log(("Didn't send all data urgently XXXXX\n"));
538#endif
539 Log2((" ---2 sent %d bytes urgent data, %d urgent bytes left\n",
540 n, so->so_urgc));
541 }
542
543 sb->sb_cc -= n;
544 sb->sb_rptr += n;
545 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
546 sb->sb_rptr -= sb->sb_datalen;
547
548 return n;
549}
550
551/*
552 * Write data from so_rcv to so's socket,
553 * updating all sbuf field as necessary
554 */
555int
556sowrite(PNATState pData, struct socket *so)
557{
558 int n, nn;
559 struct sbuf *sb = &so->so_rcv;
560 u_int len = sb->sb_cc;
561 struct iovec iov[2];
562
563 STAM_PROFILE_START(&pData->StatIOwrite, a);
564 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
565 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
566 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
567 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
568 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
569 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
570 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
571 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
572 LogFlowFunc(("so = %R[natsock]\n", so));
573 Log2(("%s: so = %R[natsock] so->so_rcv = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
574 QSOCKET_LOCK(tcb);
575 SOCKET_LOCK(so);
576 QSOCKET_UNLOCK(tcb);
577 if (so->so_urgc)
578 {
579 sosendoob(so);
580 if (sb->sb_cc == 0)
581 {
582 SOCKET_UNLOCK(so);
583 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
584 return 0;
585 }
586 }
587
588 /*
589 * No need to check if there's something to write,
590 * sowrite wouldn't have been called otherwise
591 */
592
593 len = sb->sb_cc;
594
595 iov[0].iov_base = sb->sb_rptr;
596 iov[1].iov_base = 0;
597 iov[1].iov_len = 0;
598 if (sb->sb_rptr < sb->sb_wptr)
599 {
600 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
601 /* Should never succeed, but... */
602 if (iov[0].iov_len > len)
603 iov[0].iov_len = len;
604 n = 1;
605 }
606 else
607 {
608 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
609 if (iov[0].iov_len > len)
610 iov[0].iov_len = len;
611 len -= iov[0].iov_len;
612 if (len)
613 {
614 iov[1].iov_base = sb->sb_data;
615 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
616 if (iov[1].iov_len > len)
617 iov[1].iov_len = len;
618 n = 2;
619 }
620 else
621 n = 1;
622 }
623 STAM_STATS({
624 if (n == 1)
625 {
626 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
627 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
628 }
629 else
630 {
631 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
632 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
633 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
634 }
635 });
636 /* Check if there's urgent data to send, and if so, send it */
637#ifdef HAVE_READV
638 nn = writev(so->s, (const struct iovec *)iov, n);
639#else
640 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
641#endif
642 Log2(("%s: wrote(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
643 /* This should never happen, but people tell me it does *shrug* */
644 if ( nn < 0
645 && soIgnorableErrorCode(errno))
646 {
647 SOCKET_UNLOCK(so);
648 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
649 return 0;
650 }
651
652 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
653 {
654 Log2(("%s: disconnected, so->so_state = %x, errno = %d\n",
655 RT_GCC_EXTENSION __PRETTY_FUNCTION__, so->so_state, errno));
656 sofcantsendmore(so);
657 tcp_sockclosed(pData, sototcpcb(so));
658 SOCKET_UNLOCK(so);
659 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
660 return -1;
661 }
662
663#ifndef HAVE_READV
664 if (n == 2 && (unsigned)nn == iov[0].iov_len)
665 {
666 int ret;
667 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
668 if (ret > 0)
669 nn += ret;
670# ifdef VBOX_WITH_STATISTICS
671 if (ret > 0 && ret != (ssize_t)iov[1].iov_len)
672 {
673 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
674 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (iov[1].iov_len - ret));
675 }
676#endif
677 }
678 Log2(("%s: wrote(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
679#endif
680
681 /* Update sbuf */
682 sb->sb_cc -= nn;
683 sb->sb_rptr += nn;
684 Log2(("%s: update so_rcv (written nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
685 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
686 {
687 sb->sb_rptr -= sb->sb_datalen;
688 Log2(("%s: alter sb_rptr of so_rcv %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
689 }
690
691 /*
692 * If in DRAIN mode, and there's no more data, set
693 * it CANTSENDMORE
694 */
695 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
696 sofcantsendmore(so);
697
698 SOCKET_UNLOCK(so);
699 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
700 return nn;
701}
702
703/*
704 * recvfrom() a UDP socket
705 */
706void
707sorecvfrom(PNATState pData, struct socket *so)
708{
709 LogFlowFunc(("sorecvfrom: so = %p\n", so));
710
711#ifdef RT_OS_WINDOWS
712 /* ping is handled with ICMP API in ip_icmpwin.c */
713 Assert(so->so_type == IPPROTO_UDP);
714#else
715 if (so->so_type == IPPROTO_ICMP)
716 {
717 /* This is a "ping" reply */
718 sorecvfrom_icmp_unix(pData, so);
719 udp_detach(pData, so);
720 }
721 else
722#endif /* !RT_OS_WINDOWS */
723 {
724 static char achBuf[64 * 1024];
725
726 /* A "normal" UDP packet */
727 struct sockaddr_in addr;
728 socklen_t addrlen = sizeof(struct sockaddr_in);
729 struct iovec iov[2];
730 ssize_t nread;
731 struct mbuf *m;
732
733 QSOCKET_LOCK(udb);
734 SOCKET_LOCK(so);
735 QSOCKET_UNLOCK(udb);
736
737 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, slirp_size(pData));
738 if (m == NULL)
739 {
740 SOCKET_UNLOCK(so);
741 return;
742 }
743
744 m->m_data += ETH_HLEN;
745 m->m_pkthdr.header = mtod(m, void *);
746
747 m->m_data += sizeof(struct udpiphdr);
748
749 /* small packets will fit without copying */
750 iov[0].iov_base = mtod(m, char *);
751 iov[0].iov_len = M_TRAILINGSPACE(m);
752
753 /* large packets will spill into a temp buffer */
754 iov[1].iov_base = achBuf;
755 iov[1].iov_len = sizeof(achBuf);
756
757#if !defined(RT_OS_WINDOWS)
758 {
759 struct msghdr mh;
760 memset(&mh, 0, sizeof(mh));
761
762 mh.msg_iov = iov;
763 mh.msg_iovlen = 2;
764 mh.msg_name = &addr;
765 mh.msg_namelen = addrlen;
766
767 nread = recvmsg(so->s, &mh, 0);
768 }
769#else /* RT_OS_WINDOWS */
770 {
771 DWORD nbytes; /* NB: can't use nread b/c of different size */
772 DWORD flags = 0;
773 int status;
774 AssertCompile(sizeof(WSABUF) == sizeof(struct iovec));
775 AssertCompileMembersSameSizeAndOffset(WSABUF, len, struct iovec, iov_len);
776 AssertCompileMembersSameSizeAndOffset(WSABUF, buf, struct iovec, iov_base);
777 status = WSARecvFrom(so->s, (WSABUF *)&iov[0], 2, &nbytes, &flags,
778 (struct sockaddr *)&addr, &addrlen,
779 NULL, NULL);
780 if (status != SOCKET_ERROR)
781 nread = nbytes;
782 else
783 nread = -1;
784 }
785#endif
786 if (nread >= 0)
787 {
788 if (nread <= iov[0].iov_len)
789 m->m_len = nread;
790 else
791 {
792 m->m_len = iov[0].iov_len;
793 m_append(pData, m, nread - iov[0].iov_len, iov[1].iov_base);
794 }
795 Assert(m_length(m, NULL) == (size_t)nread);
796
797 /*
798 * Hack: domain name lookup will be used the most for UDP,
799 * and since they'll only be used once there's no need
800 * for the 4 minute (or whatever) timeout... So we time them
801 * out much quicker (10 seconds for now...)
802 */
803 if (so->so_expire)
804 {
805 if (so->so_fport != RT_H2N_U16_C(53))
806 so->so_expire = curtime + SO_EXPIRE;
807 }
808
809 /*
810 * DNS proxy requests are forwarded to the real resolver,
811 * but its socket's so_faddr is that of the DNS proxy
812 * itself.
813 *
814 * last argument should be changed if Slirp will inject IP attributes
815 */
816 if ( pData->fUseDnsProxy
817 && so->so_fport == RT_H2N_U16_C(53)
818 && CTL_CHECK(so->so_faddr.s_addr, CTL_DNS))
819 dnsproxy_answer(pData, so, m);
820
821 /* packets definetly will be fragmented, could confuse receiver peer. */
822 if (nread > if_mtu)
823 m->m_flags |= M_SKIP_FIREWALL;
824
825 /*
826 * If this packet was destined for CTL_ADDR,
827 * make it look like that's where it came from, done by udp_output
828 */
829 udp_output(pData, so, m, &addr);
830 }
831 else
832 {
833 m_freem(pData, m);
834
835 if (!soIgnorableErrorCode(errno))
836 {
837 u_char code;
838 if (errno == EHOSTUNREACH)
839 code = ICMP_UNREACH_HOST;
840 else if (errno == ENETUNREACH)
841 code = ICMP_UNREACH_NET;
842 else
843 code = ICMP_UNREACH_PORT;
844
845 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
846 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
847 so->so_m = NULL;
848 }
849 }
850
851 SOCKET_UNLOCK(so);
852 }
853}
854
855/*
856 * sendto() a socket
857 */
858int
859sosendto(PNATState pData, struct socket *so, struct mbuf *m)
860{
861 int ret;
862 struct sockaddr_in *paddr;
863 struct sockaddr addr;
864#if 0
865 struct sockaddr_in host_addr;
866#endif
867 caddr_t buf = 0;
868 int mlen;
869
870 LogFlowFunc(("sosendto: so = %R[natsock], m = %p\n", so, m));
871
872 memset(&addr, 0, sizeof(struct sockaddr));
873#ifdef RT_OS_DARWIN
874 addr.sa_len = sizeof(struct sockaddr_in);
875#endif
876 paddr = (struct sockaddr_in *)&addr;
877 paddr->sin_family = AF_INET;
878 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
879 {
880 /* It's an alias */
881 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
882 switch(last_byte)
883 {
884#if 0
885 /* handle this case at 'default:' */
886 case CTL_BROADCAST:
887 addr.sin_addr.s_addr = INADDR_BROADCAST;
888 /* Send the packet to host to fully emulate broadcast */
889 /** @todo r=klaus: on Linux host this causes the host to receive
890 * the packet twice for some reason. And I cannot find any place
891 * in the man pages which states that sending a broadcast does not
892 * reach the host itself. */
893 host_addr.sin_family = AF_INET;
894 host_addr.sin_port = so->so_fport;
895 host_addr.sin_addr = our_addr;
896 sendto(so->s, m->m_data, m->m_len, 0,
897 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
898 break;
899#endif
900 case CTL_DNS:
901 case CTL_ALIAS:
902 default:
903 if (last_byte == ~pData->netmask)
904 paddr->sin_addr.s_addr = INADDR_BROADCAST;
905 else
906 paddr->sin_addr = loopback_addr;
907 break;
908 }
909 }
910 else
911 paddr->sin_addr = so->so_faddr;
912 paddr->sin_port = so->so_fport;
913
914 Log2((" sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
915 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
916
917 /* Don't care what port we get */
918 /*
919 * > nmap -sV -T4 -O -A -v -PU3483 255.255.255.255
920 * generates bodyless messages, annoying memmory management system.
921 */
922 mlen = m_length(m, NULL);
923 if (mlen > 0)
924 {
925 buf = RTMemAlloc(mlen);
926 if (buf == NULL)
927 {
928 return -1;
929 }
930 m_copydata(m, 0, mlen, buf);
931 }
932 ret = sendto(so->s, buf, mlen, 0,
933 (struct sockaddr *)&addr, sizeof (struct sockaddr));
934#ifdef VBOX_WITH_NAT_SEND2HOME
935 if (slirpIsWideCasting(pData, so->so_faddr.s_addr))
936 {
937 slirpSend2Home(pData, so, buf, mlen, 0);
938 }
939#endif
940 if (buf)
941 RTMemFree(buf);
942 if (ret < 0)
943 {
944 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
945 return -1;
946 }
947
948 /*
949 * Kill the socket if there's no reply in 4 minutes,
950 * but only if it's an expirable socket
951 */
952 if (so->so_expire)
953 so->so_expire = curtime + SO_EXPIRE;
954 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
955 return 0;
956}
957
958/*
959 * XXX This should really be tcp_listen
960 */
961struct socket *
962solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
963{
964 struct sockaddr_in addr;
965 struct socket *so;
966 socklen_t addrlen = sizeof(addr);
967 int s, opt = 1;
968 int status;
969
970 LogFlowFunc(("solisten: port = %d, laddr = %x, lport = %d, flags = %x\n", port, laddr, lport, flags));
971
972 if ((so = socreate()) == NULL)
973 {
974 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
975 return NULL;
976 }
977
978 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
979 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
980 {
981 RTMemFree(so);
982 return NULL;
983 }
984
985 SOCKET_LOCK_CREATE(so);
986 SOCKET_LOCK(so);
987 QSOCKET_LOCK(tcb);
988 insque(pData, so,&tcb);
989 NSOCK_INC();
990 QSOCKET_UNLOCK(tcb);
991
992 /*
993 * SS_FACCEPTONCE sockets must time out.
994 */
995 if (flags & SS_FACCEPTONCE)
996 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
997
998 so->so_state = (SS_FACCEPTCONN|flags);
999 so->so_lport = lport; /* Kept in network format */
1000 so->so_laddr.s_addr = laddr; /* Ditto */
1001
1002 memset(&addr, 0, sizeof(addr));
1003#ifdef RT_OS_DARWIN
1004 addr.sin_len = sizeof(addr);
1005#endif
1006 addr.sin_family = AF_INET;
1007 addr.sin_addr.s_addr = bind_addr;
1008 addr.sin_port = port;
1009
1010 /**
1011 * changing listen(,1->SOMAXCONN) shouldn't be harmful for NAT's TCP/IP stack,
1012 * kernel will choose the optimal value for requests queue length.
1013 * @note: MSDN recommends low (2-4) values for bluetooth networking devices.
1014 */
1015 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
1016 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
1017 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
1018 || (listen(s, pData->soMaxConn) < 0))
1019 {
1020#ifdef RT_OS_WINDOWS
1021 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
1022 closesocket(s);
1023 QSOCKET_LOCK(tcb);
1024 sofree(pData, so);
1025 QSOCKET_UNLOCK(tcb);
1026 /* Restore the real errno */
1027 WSASetLastError(tmperrno);
1028#else
1029 int tmperrno = errno; /* Don't clobber the real reason we failed */
1030 close(s);
1031 if (sototcpcb(so))
1032 tcp_close(pData, sototcpcb(so));
1033 else
1034 sofree(pData, so);
1035 /* Restore the real errno */
1036 errno = tmperrno;
1037#endif
1038 return NULL;
1039 }
1040 fd_nonblock(s);
1041 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
1042
1043 getsockname(s,(struct sockaddr *)&addr,&addrlen);
1044 so->so_fport = addr.sin_port;
1045 /* set socket buffers */
1046 opt = pData->socket_rcv;
1047 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
1048 if (status < 0)
1049 {
1050 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
1051 goto no_sockopt;
1052 }
1053 opt = pData->socket_snd;
1054 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
1055 if (status < 0)
1056 {
1057 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
1058 goto no_sockopt;
1059 }
1060no_sockopt:
1061 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
1062 so->so_faddr = alias_addr;
1063 else
1064 so->so_faddr = addr.sin_addr;
1065
1066 so->s = s;
1067 SOCKET_UNLOCK(so);
1068 return so;
1069}
1070
1071/*
1072 * Data is available in so_rcv
1073 * Just write() the data to the socket
1074 * XXX not yet...
1075 * @todo do we really need this function, what it's intended to do?
1076 */
1077void
1078sorwakeup(struct socket *so)
1079{
1080 NOREF(so);
1081#if 0
1082 sowrite(so);
1083 FD_CLR(so->s,&writefds);
1084#endif
1085}
1086
1087/*
1088 * Data has been freed in so_snd
1089 * We have room for a read() if we want to
1090 * For now, don't read, it'll be done in the main loop
1091 */
1092void
1093sowwakeup(struct socket *so)
1094{
1095 NOREF(so);
1096}
1097
1098/*
1099 * Various session state calls
1100 * XXX Should be #define's
1101 * The socket state stuff needs work, these often get call 2 or 3
1102 * times each when only 1 was needed
1103 */
1104void
1105soisfconnecting(struct socket *so)
1106{
1107 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
1108 SS_FCANTSENDMORE|SS_FWDRAIN);
1109 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
1110}
1111
1112void
1113soisfconnected(struct socket *so)
1114{
1115 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1116 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1117 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1118 LogFlowFunc(("LEAVE: so:%R[natsock]\n", so));
1119}
1120
1121int
1122sofcantrcvmore(struct socket *so)
1123{
1124 int err = 0;
1125
1126 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1127 if ((so->so_state & SS_NOFDREF) == 0)
1128 {
1129 /*
1130 * If remote closes first and then sends an RST, the recv() in
1131 * soread() will keep reporting EOF without any error
1132 * indication. As far as I can tell the only way to detect
1133 * this on Linux is to check if shutdown() succeeds here (but
1134 * see below).
1135 *
1136 * OTOH on OS X shutdown() "helpfully" checks if remote has
1137 * already closed and then always returns ENOTCONN
1138 * immediately.
1139 */
1140 int status = shutdown(so->s, SHUT_RD);
1141#if defined(RT_OS_LINUX)
1142 if (status < 0)
1143 err = errno;
1144#else
1145 RT_NOREF(status);
1146#endif
1147 }
1148 so->so_state &= ~(SS_ISFCONNECTING);
1149 if (so->so_state & SS_FCANTSENDMORE)
1150 {
1151#if defined(RT_OS_LINUX)
1152 /*
1153 * If we have closed first, and remote closes, shutdown will
1154 * return ENOTCONN, but this is expected. Don't tell the
1155 * caller there was an error.
1156 */
1157 if (err == ENOTCONN)
1158 err = 0;
1159#endif
1160 so->so_state = SS_NOFDREF; /* Don't select it */
1161 /* XXX close() here as well? */
1162 }
1163 else
1164 so->so_state |= SS_FCANTRCVMORE;
1165
1166 LogFlowFunc(("LEAVE: %d\n", err));
1167 return err;
1168}
1169
1170void
1171sofcantsendmore(struct socket *so)
1172{
1173 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1174 if ((so->so_state & SS_NOFDREF) == 0)
1175 shutdown(so->s, 1); /* send FIN to fhost */
1176
1177 so->so_state &= ~(SS_ISFCONNECTING);
1178 if (so->so_state & SS_FCANTRCVMORE)
1179 so->so_state = SS_NOFDREF; /* as above */
1180 else
1181 so->so_state |= SS_FCANTSENDMORE;
1182 LogFlowFuncLeave();
1183}
1184
1185void
1186soisfdisconnected(struct socket *so)
1187{
1188 NOREF(so);
1189#if 0
1190 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1191 close(so->s);
1192 so->so_state = SS_ISFDISCONNECTED;
1193 /*
1194 * XXX Do nothing ... ?
1195 */
1196#endif
1197}
1198
1199/*
1200 * Set write drain mode
1201 * Set CANTSENDMORE once all data has been write()n
1202 */
1203void
1204sofwdrain(struct socket *so)
1205{
1206 if (SBUF_LEN(&so->so_rcv))
1207 so->so_state |= SS_FWDRAIN;
1208 else
1209 sofcantsendmore(so);
1210}
1211
1212#if !defined(RT_OS_WINDOWS)
1213static void
1214send_icmp_to_guest(PNATState pData, char *buff, size_t len, const struct sockaddr_in *addr)
1215{
1216 struct ip *ip;
1217 uint32_t dst, src;
1218 char ip_copy[256];
1219 struct icmp *icp;
1220 int old_ip_len = 0;
1221 int hlen, original_hlen = 0;
1222 struct mbuf *m;
1223 struct icmp_msg *icm;
1224 uint8_t proto;
1225 int type = 0;
1226
1227 ip = (struct ip *)buff;
1228 /* Fix ip->ip_len to contain the total packet length including the header
1229 * in _host_ byte order for all OSes. On Darwin, that value already is in
1230 * host byte order. Solaris and Darwin report only the payload. */
1231#ifndef RT_OS_DARWIN
1232 ip->ip_len = RT_N2H_U16(ip->ip_len);
1233#endif
1234 hlen = (ip->ip_hl << 2);
1235#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1236 ip->ip_len += hlen;
1237#endif
1238 if (ip->ip_len < hlen + ICMP_MINLEN)
1239 {
1240 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1241 return;
1242 }
1243 icp = (struct icmp *)((char *)ip + hlen);
1244
1245 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1246 if ( icp->icmp_type != ICMP_ECHOREPLY
1247 && icp->icmp_type != ICMP_TIMXCEED
1248 && icp->icmp_type != ICMP_UNREACH)
1249 {
1250 return;
1251 }
1252
1253 /*
1254 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1255 * ICMP_ECHOREPLY assuming data 0
1256 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1257 */
1258 if (ip->ip_len < hlen + 8)
1259 {
1260 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1261 return;
1262 }
1263
1264 type = icp->icmp_type;
1265 if ( type == ICMP_TIMXCEED
1266 || type == ICMP_UNREACH)
1267 {
1268 /*
1269 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1270 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1271 */
1272 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1273 {
1274 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1275 return;
1276 }
1277 ip = &icp->icmp_ip;
1278 }
1279
1280 icm = icmp_find_original_mbuf(pData, ip);
1281 if (icm == NULL)
1282 {
1283 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1284 return;
1285 }
1286
1287 m = icm->im_m;
1288 if (!m)
1289 {
1290 LogFunc(("%R[natsock] hasn't stored it's mbuf on sent\n", icm->im_so));
1291 goto done;
1292 }
1293
1294 src = addr->sin_addr.s_addr;
1295 if (type == ICMP_ECHOREPLY)
1296 {
1297 struct ip *ip0 = mtod(m, struct ip *);
1298 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1299 if (icp0->icmp_type != ICMP_ECHO)
1300 {
1301 Log(("NAT: we haven't found echo for this reply\n"));
1302 goto done;
1303 }
1304 /*
1305 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1306 * IP header combined by OS network stack, our local copy of IP header contians values
1307 * in host byte order so no byte order conversion is required. IP headers fields are converting
1308 * in ip_output0 routine only.
1309 */
1310 if ( (ip->ip_len - hlen)
1311 != (ip0->ip_len - (ip0->ip_hl << 2)))
1312 {
1313 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1314 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1315 goto done;
1316 }
1317 }
1318
1319 /* ip points on origianal ip header */
1320 ip = mtod(m, struct ip *);
1321 proto = ip->ip_p;
1322 /* Now ip is pointing on header we've sent from guest */
1323 if ( icp->icmp_type == ICMP_TIMXCEED
1324 || icp->icmp_type == ICMP_UNREACH)
1325 {
1326 old_ip_len = (ip->ip_hl << 2) + 64;
1327 if (old_ip_len > sizeof(ip_copy))
1328 old_ip_len = sizeof(ip_copy);
1329 memcpy(ip_copy, ip, old_ip_len);
1330 }
1331
1332 /* source address from original IP packet*/
1333 dst = ip->ip_src.s_addr;
1334
1335 /* overide ther tail of old packet */
1336 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1337 original_hlen = ip->ip_hl << 2;
1338 /* saves original ip header and options */
1339 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1340 ip->ip_len = m_length(m, NULL);
1341 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1342
1343 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1344 type = icp->icmp_type;
1345 if ( type == ICMP_TIMXCEED
1346 || type == ICMP_UNREACH)
1347 {
1348 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1349 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1350
1351 /* undo byte order conversions done in ip_input() */
1352 HTONS(icp->icmp_ip.ip_len);
1353 HTONS(icp->icmp_ip.ip_id);
1354 HTONS(icp->icmp_ip.ip_off);
1355
1356 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1357 }
1358
1359 ip->ip_src.s_addr = src;
1360 ip->ip_dst.s_addr = dst;
1361 icmp_reflect(pData, m);
1362 /* m was freed */
1363 icm->im_m = NULL;
1364
1365 done:
1366 icmp_msg_delete(pData, icm);
1367}
1368
1369static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1370{
1371 struct sockaddr_in addr;
1372 socklen_t addrlen = sizeof(struct sockaddr_in);
1373 struct ip ip;
1374 char *buff;
1375 int len = 0;
1376
1377 /* 1- step: read the ip header */
1378 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1379 (struct sockaddr *)&addr, &addrlen);
1380 if ( len < 0
1381 && ( soIgnorableErrorCode(errno)
1382 || errno == ENOTCONN))
1383 {
1384 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1385 return;
1386 }
1387
1388 if ( len < sizeof(struct ip)
1389 || len < 0
1390 || len == 0)
1391 {
1392 u_char code;
1393 code = ICMP_UNREACH_PORT;
1394
1395 if (errno == EHOSTUNREACH)
1396 code = ICMP_UNREACH_HOST;
1397 else if (errno == ENETUNREACH)
1398 code = ICMP_UNREACH_NET;
1399
1400 LogRel(("NAT: UDP ICMP rx errno=%d (%s)\n", errno, strerror(errno)));
1401 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1402 so->so_m = NULL;
1403 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm\n"));
1404 return;
1405 }
1406 /* basic check of IP header */
1407 if ( ip.ip_v != IPVERSION
1408# ifndef RT_OS_DARWIN
1409 || ip.ip_p != IPPROTO_ICMP
1410# endif
1411 )
1412 {
1413 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4\n"));
1414 return;
1415 }
1416# ifndef RT_OS_DARWIN
1417 /* Darwin reports the IP length already in host byte order. */
1418 ip.ip_len = RT_N2H_U16(ip.ip_len);
1419# endif
1420# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1421 /* Solaris and Darwin report the payload only */
1422 ip.ip_len += (ip.ip_hl << 2);
1423# endif
1424 /* Note: ip->ip_len in host byte order (all OS) */
1425 len = ip.ip_len;
1426 buff = RTMemAlloc(len);
1427 if (buff == NULL)
1428 {
1429 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1430 return;
1431 }
1432 /* 2 - step: we're reading rest of the datagramm to the buffer */
1433 addrlen = sizeof(struct sockaddr_in);
1434 memset(&addr, 0, addrlen);
1435 len = recvfrom(so->s, buff, len, 0,
1436 (struct sockaddr *)&addr, &addrlen);
1437 if ( len < 0
1438 && ( soIgnorableErrorCode(errno)
1439 || errno == ENOTCONN))
1440 {
1441 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1442 ip.ip_len));
1443 RTMemFree(buff);
1444 return;
1445 }
1446 if ( len < 0
1447 || len == 0)
1448 {
1449 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1450 errno, len, (ip.ip_len - sizeof(struct ip))));
1451 RTMemFree(buff);
1452 return;
1453 }
1454 /* len is modified in 2nd read, when the rest of the datagramm was read */
1455 send_icmp_to_guest(pData, buff, len, &addr);
1456 RTMemFree(buff);
1457}
1458#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette