VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 28502

Last change on this file since 28502 was 28502, checked in by vboxsync, 14 years ago

NAT: correct handling end of zone's resources.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.1 KB
Line 
1/* $Id: socket.c 28502 2010-04-20 07:16:46Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*
23 * This code is based on:
24 *
25 * Copyright (c) 1995 Danny Gasparovski.
26 *
27 * Please read the file COPYRIGHT for the
28 * terms and conditions of the copyright.
29 */
30
31#define WANT_SYS_IOCTL_H
32#include <slirp.h>
33#include "ip_icmp.h"
34#include "main.h"
35#ifdef __sun__
36#include <sys/filio.h>
37#endif
38#include <VBox/pdmdrv.h>
39#if defined (RT_OS_WINDOWS)
40#include <iphlpapi.h>
41#include <icmpapi.h>
42#endif
43
44
45static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
46#ifdef RT_OS_WINDOWS
47static void sorecvfrom_icmp_win(PNATState, struct socket *);
48#else /* RT_OS_WINDOWS */
49static void sorecvfrom_icmp_unix(PNATState, struct socket *);
50#endif /* !RT_OS_WINDOWS */
51
52void
53so_init()
54{
55}
56
57struct socket *
58solookup(struct socket *head, struct in_addr laddr,
59 u_int lport, struct in_addr faddr, u_int fport)
60{
61 struct socket *so;
62
63 for (so = head->so_next; so != head; so = so->so_next)
64 {
65 if ( so->so_lport == lport
66 && so->so_laddr.s_addr == laddr.s_addr
67 && so->so_faddr.s_addr == faddr.s_addr
68 && so->so_fport == fport)
69 return so;
70 }
71
72 return (struct socket *)NULL;
73}
74
75/*
76 * Create a new socket, initialise the fields
77 * It is the responsibility of the caller to
78 * insque() it into the correct linked-list
79 */
80struct socket *
81socreate()
82{
83 struct socket *so;
84
85 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
86 if (so)
87 {
88 so->so_state = SS_NOFDREF;
89 so->s = -1;
90#if !defined(RT_OS_WINDOWS)
91 so->so_poll_index = -1;
92#endif
93 }
94 return so;
95}
96
97/*
98 * remque and free a socket, clobber cache
99 * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because
100 * in sofree we don't know from which queue item beeing removed.
101 */
102void
103sofree(PNATState pData, struct socket *so)
104{
105 struct socket *so_prev = NULL;
106 if (so == tcp_last_so)
107 tcp_last_so = &tcb;
108 else if (so == udp_last_so)
109 udp_last_so = &udb;
110
111 /* check if mbuf haven't been already freed */
112 if (so->so_m != NULL)
113 m_freem(pData, so->so_m);
114#ifndef VBOX_WITH_SLIRP_MT
115 if (so->so_next && so->so_prev)
116 {
117 remque(pData, so); /* crashes if so is not in a queue */
118 NSOCK_DEC();
119 }
120
121 RTMemFree(so);
122#else
123 so->so_deleted = 1;
124#endif
125}
126
127#ifdef VBOX_WITH_SLIRP_MT
128void
129soread_queue(PNATState pData, struct socket *so, int *ret)
130{
131 *ret = soread(pData, so);
132}
133#endif
134
135/*
136 * Read from so's socket into sb_snd, updating all relevant sbuf fields
137 * NOTE: This will only be called if it is select()ed for reading, so
138 * a read() of 0 (or less) means it's disconnected
139 */
140int
141soread(PNATState pData, struct socket *so)
142{
143 int n, nn, lss, total;
144 struct sbuf *sb = &so->so_snd;
145 size_t len = sb->sb_datalen - sb->sb_cc;
146 struct iovec iov[2];
147 int mss = so->so_tcpcb->t_maxseg;
148
149 STAM_PROFILE_START(&pData->StatIOread, a);
150 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
151 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
152
153 QSOCKET_LOCK(tcb);
154 SOCKET_LOCK(so);
155 QSOCKET_UNLOCK(tcb);
156
157 DEBUG_CALL("soread");
158 DEBUG_ARG("so = %lx", (long)so);
159
160 /*
161 * No need to check if there's enough room to read.
162 * soread wouldn't have been called if there weren't
163 */
164
165 len = sb->sb_datalen - sb->sb_cc;
166
167 iov[0].iov_base = sb->sb_wptr;
168 iov[1].iov_base = 0;
169 iov[1].iov_len = 0;
170 if (sb->sb_wptr < sb->sb_rptr)
171 {
172 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
173 /* Should never succeed, but... */
174 if (iov[0].iov_len > len)
175 iov[0].iov_len = len;
176 if (iov[0].iov_len > mss)
177 iov[0].iov_len -= iov[0].iov_len%mss;
178 n = 1;
179 }
180 else
181 {
182 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
183 /* Should never succeed, but... */
184 if (iov[0].iov_len > len)
185 iov[0].iov_len = len;
186 len -= iov[0].iov_len;
187 if (len)
188 {
189 iov[1].iov_base = sb->sb_data;
190 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
191 if (iov[1].iov_len > len)
192 iov[1].iov_len = len;
193 total = iov[0].iov_len + iov[1].iov_len;
194 if (total > mss)
195 {
196 lss = total % mss;
197 if (iov[1].iov_len > lss)
198 {
199 iov[1].iov_len -= lss;
200 n = 2;
201 }
202 else
203 {
204 lss -= iov[1].iov_len;
205 iov[0].iov_len -= lss;
206 n = 1;
207 }
208 }
209 else
210 n = 2;
211 }
212 else
213 {
214 if (iov[0].iov_len > mss)
215 iov[0].iov_len -= iov[0].iov_len%mss;
216 n = 1;
217 }
218 }
219
220#ifdef HAVE_READV
221 nn = readv(so->s, (struct iovec *)iov, n);
222 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
223#else
224 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
225#endif
226 if (nn <= 0)
227 {
228 /*
229 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
230 * _could_ mean that the connection is closed. But we will receive an
231 * FD_CLOSE event later if the connection was _really_ closed. With
232 * www.youtube.com I see this very often. Closing the socket too early
233 * would be dangerous.
234 */
235 int status;
236 unsigned long pending = 0;
237 status = ioctlsocket(so->s, FIONREAD, &pending);
238 if (status < 0)
239 LogRel(("NAT:error in WSAIoctl: %d\n", errno));
240 if (nn == 0 && (pending != 0))
241 {
242 SOCKET_UNLOCK(so);
243 STAM_PROFILE_STOP(&pData->StatIOread, a);
244 return 0;
245 }
246 if ( nn < 0
247 && ( errno == EINTR
248 || errno == EAGAIN
249 || errno == EWOULDBLOCK))
250 {
251 SOCKET_UNLOCK(so);
252 STAM_PROFILE_STOP(&pData->StatIOread, a);
253 return 0;
254 }
255 else
256 {
257 /* nn == 0 means peer has performed an orderly shutdown */
258 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n",
259 nn, errno, strerror(errno)));
260 sofcantrcvmore(so);
261 tcp_sockclosed(pData, sototcpcb(so));
262 SOCKET_UNLOCK(so);
263 STAM_PROFILE_STOP(&pData->StatIOread, a);
264 return -1;
265 }
266 }
267 STAM_STATS(
268 if (n == 1)
269 {
270 STAM_COUNTER_INC(&pData->StatIORead_in_1);
271 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
272 }
273 else
274 {
275 STAM_COUNTER_INC(&pData->StatIORead_in_2);
276 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
277 }
278 );
279
280#ifndef HAVE_READV
281 /*
282 * If there was no error, try and read the second time round
283 * We read again if n = 2 (ie, there's another part of the buffer)
284 * and we read as much as we could in the first read
285 * We don't test for <= 0 this time, because there legitimately
286 * might not be any more data (since the socket is non-blocking),
287 * a close will be detected on next iteration.
288 * A return of -1 wont (shouldn't) happen, since it didn't happen above
289 */
290 if (n == 2 && nn == iov[0].iov_len)
291 {
292 int ret;
293 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
294 if (ret > 0)
295 nn += ret;
296 STAM_STATS(
297 if (ret > 0)
298 {
299 STAM_COUNTER_INC(&pData->StatIORead_in_2);
300 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
301 }
302 );
303 }
304
305 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
306#endif
307
308 /* Update fields */
309 sb->sb_cc += nn;
310 sb->sb_wptr += nn;
311 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
312 sb->sb_wptr -= sb->sb_datalen;
313 STAM_PROFILE_STOP(&pData->StatIOread, a);
314 SOCKET_UNLOCK(so);
315 return nn;
316}
317
318/*
319 * Get urgent data
320 *
321 * When the socket is created, we set it SO_OOBINLINE,
322 * so when OOB data arrives, we soread() it and everything
323 * in the send buffer is sent as urgent data
324 */
325void
326sorecvoob(PNATState pData, struct socket *so)
327{
328 struct tcpcb *tp = sototcpcb(so);
329 ssize_t ret;
330
331 DEBUG_CALL("sorecvoob");
332 DEBUG_ARG("so = %lx", (long)so);
333
334 /*
335 * We take a guess at how much urgent data has arrived.
336 * In most situations, when urgent data arrives, the next
337 * read() should get all the urgent data. This guess will
338 * be wrong however if more data arrives just after the
339 * urgent data, or the read() doesn't return all the
340 * urgent data.
341 */
342 ret = soread(pData, so);
343 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
344 tp->t_force = 1;
345 tcp_output(pData, tp);
346 tp->t_force = 0;
347}
348
349/*
350 * Send urgent data
351 * There's a lot duplicated code here, but...
352 */
353int
354sosendoob(struct socket *so)
355{
356 struct sbuf *sb = &so->so_rcv;
357 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
358
359 int n, len;
360
361 DEBUG_CALL("sosendoob");
362 DEBUG_ARG("so = %lx", (long)so);
363 DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
364
365 if (so->so_urgc > sizeof(buff))
366 so->so_urgc = sizeof(buff); /* XXX */
367
368 if (sb->sb_rptr < sb->sb_wptr)
369 {
370 /* We can send it directly */
371 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
372 so->so_urgc -= n;
373
374 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n",
375 n, so->so_urgc));
376 }
377 else
378 {
379 /*
380 * Since there's no sendv or sendtov like writev,
381 * we must copy all data to a linear buffer then
382 * send it all
383 */
384 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
385 if (len > so->so_urgc)
386 len = so->so_urgc;
387 memcpy(buff, sb->sb_rptr, len);
388 so->so_urgc -= len;
389 if (so->so_urgc)
390 {
391 n = sb->sb_wptr - sb->sb_data;
392 if (n > so->so_urgc)
393 n = so->so_urgc;
394 memcpy(buff + len, sb->sb_data, n);
395 so->so_urgc -= n;
396 len += n;
397 }
398 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
399#ifdef DEBUG
400 if (n != len)
401 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
402#endif
403 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n",
404 n, so->so_urgc));
405 }
406
407 sb->sb_cc -= n;
408 sb->sb_rptr += n;
409 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
410 sb->sb_rptr -= sb->sb_datalen;
411
412 return n;
413}
414
415/*
416 * Write data from so_rcv to so's socket,
417 * updating all sbuf field as necessary
418 */
419int
420sowrite(PNATState pData, struct socket *so)
421{
422 int n, nn;
423 struct sbuf *sb = &so->so_rcv;
424 size_t len = sb->sb_cc;
425 struct iovec iov[2];
426
427 STAM_PROFILE_START(&pData->StatIOwrite, a);
428 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
429 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
430 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
431 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
432 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
433 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
434 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
435 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
436 DEBUG_CALL("sowrite");
437 DEBUG_ARG("so = %lx", (long)so);
438 QSOCKET_LOCK(tcb);
439 SOCKET_LOCK(so);
440 QSOCKET_UNLOCK(tcb);
441 if (so->so_urgc)
442 {
443 sosendoob(so);
444 if (sb->sb_cc == 0)
445 {
446 SOCKET_UNLOCK(so);
447 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
448 return 0;
449 }
450 }
451
452 /*
453 * No need to check if there's something to write,
454 * sowrite wouldn't have been called otherwise
455 */
456
457 len = sb->sb_cc;
458
459 iov[0].iov_base = sb->sb_rptr;
460 iov[1].iov_base = 0;
461 iov[1].iov_len = 0;
462 if (sb->sb_rptr < sb->sb_wptr)
463 {
464 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
465 /* Should never succeed, but... */
466 if (iov[0].iov_len > len)
467 iov[0].iov_len = len;
468 n = 1;
469 }
470 else
471 {
472 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
473 if (iov[0].iov_len > len)
474 iov[0].iov_len = len;
475 len -= iov[0].iov_len;
476 if (len)
477 {
478 iov[1].iov_base = sb->sb_data;
479 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
480 if (iov[1].iov_len > len)
481 iov[1].iov_len = len;
482 n = 2;
483 }
484 else
485 n = 1;
486 }
487 STAM_STATS({
488 if (n == 1)
489 {
490 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
491 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
492 }
493 else
494 {
495 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
496 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
497 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
498 }
499 });
500 /* Check if there's urgent data to send, and if so, send it */
501#ifdef HAVE_READV
502 nn = writev(so->s, (const struct iovec *)iov, n);
503 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
504#else
505 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
506#endif
507 /* This should never happen, but people tell me it does *shrug* */
508 if ( nn < 0
509 && ( errno == EAGAIN
510 || errno == EINTR
511 || errno == EWOULDBLOCK))
512 {
513 SOCKET_UNLOCK(so);
514 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
515 return 0;
516 }
517
518 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
519 {
520 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
521 so->so_state, errno));
522 sofcantsendmore(so);
523 tcp_sockclosed(pData, sototcpcb(so));
524 SOCKET_UNLOCK(so);
525 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
526 return -1;
527 }
528
529#ifndef HAVE_READV
530 if (n == 2 && nn == iov[0].iov_len)
531 {
532 int ret;
533 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
534 if (ret > 0)
535 nn += ret;
536 STAM_STATS({
537 if (ret > 0 && ret != iov[1].iov_len)
538 {
539 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
540 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (ret - iov[1].iov_len));
541 }
542 });
543 }
544 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
545#endif
546
547 /* Update sbuf */
548 sb->sb_cc -= nn;
549 sb->sb_rptr += nn;
550 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
551 sb->sb_rptr -= sb->sb_datalen;
552
553 /*
554 * If in DRAIN mode, and there's no more data, set
555 * it CANTSENDMORE
556 */
557 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
558 sofcantsendmore(so);
559
560 SOCKET_UNLOCK(so);
561 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
562 return nn;
563}
564
565/*
566 * recvfrom() a UDP socket
567 */
568void
569sorecvfrom(PNATState pData, struct socket *so)
570{
571 ssize_t ret = 0;
572 struct sockaddr_in addr;
573 socklen_t addrlen = sizeof(struct sockaddr_in);
574
575 DEBUG_CALL("sorecvfrom");
576 DEBUG_ARG("so = %lx", (long)so);
577
578 if (so->so_type == IPPROTO_ICMP)
579 {
580 /* This is a "ping" reply */
581#ifdef RT_OS_WINDOWS
582 sorecvfrom_icmp_win(pData, so);
583#else /* RT_OS_WINDOWS */
584 sorecvfrom_icmp_unix(pData, so);
585#endif /* !RT_OS_WINDOWS */
586 udp_detach(pData, so);
587 }
588 else
589 {
590 /* A "normal" UDP packet */
591 struct mbuf *m;
592 ssize_t len;
593 u_long n = 0;
594#ifdef VBOX_WITH_SLIRP_BSD_MBUF
595 int size;
596#endif
597 int rc = 0;
598 static int signalled = 0;
599
600 QSOCKET_LOCK(udb);
601 SOCKET_LOCK(so);
602 QSOCKET_UNLOCK(udb);
603
604#ifndef VBOX_WITH_SLIRP_BSD_MBUF
605 if (!(m = m_get(pData)))
606 {
607 SOCKET_UNLOCK(so);
608 return;
609 }
610 /* adjust both parameters to maks M_FREEROOM calculate correct */
611 m->m_data += if_maxlinkhdr + sizeof(struct udphdr) + sizeof(struct ip);
612
613 /*
614 * XXX Shouldn't FIONREAD packets destined for port 53,
615 * but I don't know the max packet size for DNS lookups
616 */
617 len = M_FREEROOM(m);
618 /* if (so->so_fport != RT_H2N_U16_C(53)) */
619 rc = ioctlsocket(so->s, FIONREAD, &n);
620 if ( rc == -1
621 && ( errno == EAGAIN
622 || errno == EWOULDBLOCK
623 || errno == EINPROGRESS
624 || errno == ENOTCONN))
625 {
626 m_freem(pData, m);
627 return;
628 }
629
630 Log2(("NAT: %R[natsock] ioctlsocket before read "
631 "(rc:%d errno:%d, n:%d)\n", so, rc, errno, n));
632
633 if (rc == -1 && signalled == 0)
634 {
635 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
636 signalled = 1;
637 m_freem(pData, m);
638 return;
639 }
640
641 if (rc != -1 && n > len)
642 {
643 n = (m->m_data - m->m_dat) + m->m_len + n + 1;
644 m_inc(m, n);
645 len = M_FREEROOM(m);
646 }
647 ret = recvfrom(so->s, m->m_data, len, 0,
648 (struct sockaddr *)&addr, &addrlen);
649 Log2(("NAT: %R[natsock] ioctlsocket after read "
650 "(rc:%d errno:%d, n:%d) ret:%d, len:%d\n", so,
651 rc, errno, n, ret, len));
652#else
653 /*How many data has been received ?*/
654 /*
655 * 1. calculate how much we can read
656 * 2. read as much as possible
657 * 3. attach buffer to allocated header mbuf
658 */
659 rc = ioctlsocket(so->s, FIONREAD, &n);
660 if (rc == -1 && signalled == 0)
661 {
662 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
663 signalled = 1;
664 }
665
666 len = sizeof(struct udpiphdr) + ETH_HLEN;
667 if (n > (if_mtu - len))
668 {
669 n = if_mtu - len; /* can't read than we can put in the mbuf*/
670 }
671 len += n;
672
673 size = MCLBYTES;
674 if (len < MSIZE)
675 size = MCLBYTES;
676 else if (len < MCLBYTES)
677 size = MCLBYTES;
678 else if (len < MJUM9BYTES)
679 size = MJUM9BYTES;
680 else if (len < MJUM16BYTES)
681 size = MJUM16BYTES;
682 else
683 AssertMsgFailed(("Unsupported size"));
684
685 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
686 if (m == NULL)
687 return;
688 m->m_data += ETH_HLEN;
689 m->m_pkthdr.header = mtod(m, void *);
690 m->m_data += sizeof(struct udpiphdr);
691 ret = recvfrom(so->s, mtod(m, char *), n, 0,
692 (struct sockaddr *)&addr, &addrlen);
693 /* @todo (vvl) check which flags and type should be passed */
694#endif
695 m->m_len = ret;
696 if (ret < 0)
697 {
698 u_char code = ICMP_UNREACH_PORT;
699
700 if (errno == EHOSTUNREACH)
701 code = ICMP_UNREACH_HOST;
702 else if (errno == ENETUNREACH)
703 code = ICMP_UNREACH_NET;
704
705 m_freem(pData, m);
706 if ( errno == EAGAIN
707 || errno == EWOULDBLOCK
708 || errno == EINPROGRESS
709 || errno == ENOTCONN)
710 {
711 return;
712 }
713
714 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
715 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
716 so->so_m = NULL;
717 }
718 else
719 {
720 /*
721 * Hack: domain name lookup will be used the most for UDP,
722 * and since they'll only be used once there's no need
723 * for the 4 minute (or whatever) timeout... So we time them
724 * out much quicker (10 seconds for now...)
725 */
726 if (so->so_expire)
727 {
728 if (so->so_fport != RT_H2N_U16_C(53))
729 so->so_expire = curtime + SO_EXPIRE;
730 }
731 /*
732 * last argument should be changed if Slirp will inject IP attributes
733 * Note: Here we can't check if dnsproxy's sent initial request
734 */
735#ifndef VBOX_WITH_SLIRP_BSD_MBUF
736 if (so->so_fport == RT_H2N_U16_C(53))
737 dnsproxy_answer(pData, so, m);
738#endif
739
740#if 0
741 if (m->m_len == len)
742 {
743 m_inc(m, MINCSIZE);
744 m->m_len = 0;
745 }
746#endif
747
748 /*
749 * If this packet was destined for CTL_ADDR,
750 * make it look like that's where it came from, done by udp_output
751 */
752 udp_output(pData, so, m, &addr);
753 SOCKET_UNLOCK(so);
754 } /* rx error */
755 } /* if ping packet */
756}
757
758/*
759 * sendto() a socket
760 */
761int
762sosendto(PNATState pData, struct socket *so, struct mbuf *m)
763{
764 int ret;
765 struct sockaddr_in *paddr;
766 struct sockaddr addr;
767#if 0
768 struct sockaddr_in host_addr;
769#endif
770#ifdef VBOX_WITH_SLIRP_BSD_MBUF
771 caddr_t buf;
772 int mlen;
773#endif
774
775 DEBUG_CALL("sosendto");
776 DEBUG_ARG("so = %lx", (long)so);
777 DEBUG_ARG("m = %lx", (long)m);
778
779 memset(&addr, 0, sizeof(struct sockaddr));
780#ifdef RT_OS_DARWIN
781 addr.sa_len = sizeof(struct sockaddr_in);
782#endif
783 paddr = (struct sockaddr_in *)&addr;
784 paddr->sin_family = AF_INET;
785 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
786 {
787 /* It's an alias */
788 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
789 switch(last_byte)
790 {
791#if 0
792 /* handle this case at 'default:' */
793 case CTL_BROADCAST:
794 addr.sin_addr.s_addr = INADDR_BROADCAST;
795 /* Send the packet to host to fully emulate broadcast */
796 /** @todo r=klaus: on Linux host this causes the host to receive
797 * the packet twice for some reason. And I cannot find any place
798 * in the man pages which states that sending a broadcast does not
799 * reach the host itself. */
800 host_addr.sin_family = AF_INET;
801 host_addr.sin_port = so->so_fport;
802 host_addr.sin_addr = our_addr;
803 sendto(so->s, m->m_data, m->m_len, 0,
804 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
805 break;
806#endif
807 case CTL_DNS:
808 case CTL_ALIAS:
809 default:
810 if (last_byte == ~pData->netmask)
811 paddr->sin_addr.s_addr = INADDR_BROADCAST;
812 else
813 paddr->sin_addr = loopback_addr;
814 break;
815 }
816 }
817 else
818 paddr->sin_addr = so->so_faddr;
819 paddr->sin_port = so->so_fport;
820
821 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
822 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
823
824 /* Don't care what port we get */
825#ifndef VBOX_WITH_SLIRP_BSD_MBUF
826 ret = sendto(so->s, m->m_data, m->m_len, 0, &addr, sizeof (struct sockaddr_in));
827#else
828 mlen = m_length(m, NULL);
829 buf = RTMemAlloc(mlen);
830 if (buf == NULL)
831 {
832 return -1;
833 }
834 m_copydata(m, 0, mlen, buf);
835 ret = sendto(so->s, buf, mlen, 0,
836 (struct sockaddr *)&addr, sizeof (struct sockaddr));
837#endif
838 if (ret < 0)
839 {
840 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
841 return -1;
842 }
843
844 /*
845 * Kill the socket if there's no reply in 4 minutes,
846 * but only if it's an expirable socket
847 */
848 if (so->so_expire)
849 so->so_expire = curtime + SO_EXPIRE;
850 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
851 return 0;
852}
853
854/*
855 * XXX This should really be tcp_listen
856 */
857struct socket *
858solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
859{
860 struct sockaddr_in addr;
861 struct socket *so;
862 socklen_t addrlen = sizeof(addr);
863 int s, opt = 1;
864 int status;
865
866 DEBUG_CALL("solisten");
867 DEBUG_ARG("port = %d", port);
868 DEBUG_ARG("laddr = %x", laddr);
869 DEBUG_ARG("lport = %d", lport);
870 DEBUG_ARG("flags = %x", flags);
871
872 if ((so = socreate()) == NULL)
873 {
874 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
875 return NULL;
876 }
877
878 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
879 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
880 {
881 RTMemFree(so);
882 return NULL;
883 }
884
885 SOCKET_LOCK_CREATE(so);
886 SOCKET_LOCK(so);
887 QSOCKET_LOCK(tcb);
888 insque(pData, so,&tcb);
889 NSOCK_INC();
890 QSOCKET_UNLOCK(tcb);
891
892 /*
893 * SS_FACCEPTONCE sockets must time out.
894 */
895 if (flags & SS_FACCEPTONCE)
896 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
897
898 so->so_state = (SS_FACCEPTCONN|flags);
899 so->so_lport = lport; /* Kept in network format */
900 so->so_laddr.s_addr = laddr; /* Ditto */
901
902 memset(&addr, 0, sizeof(addr));
903#ifdef RT_OS_DARWIN
904 addr.sin_len = sizeof(addr);
905#endif
906 addr.sin_family = AF_INET;
907 addr.sin_addr.s_addr = bind_addr;
908 addr.sin_port = port;
909
910 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
911 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
912 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
913 || (listen(s, 1) < 0))
914 {
915#ifdef RT_OS_WINDOWS
916 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
917 closesocket(s);
918 QSOCKET_LOCK(tcb);
919 sofree(pData, so);
920 QSOCKET_UNLOCK(tcb);
921 /* Restore the real errno */
922 WSASetLastError(tmperrno);
923#else
924 int tmperrno = errno; /* Don't clobber the real reason we failed */
925 close(s);
926 QSOCKET_LOCK(tcb);
927 sofree(pData, so);
928 QSOCKET_UNLOCK(tcb);
929 /* Restore the real errno */
930 errno = tmperrno;
931#endif
932 return NULL;
933 }
934 fd_nonblock(s);
935 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
936
937 getsockname(s,(struct sockaddr *)&addr,&addrlen);
938 so->so_fport = addr.sin_port;
939 /* set socket buffers */
940 opt = pData->socket_rcv;
941 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
942 if (status < 0)
943 {
944 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
945 goto no_sockopt;
946 }
947 opt = pData->socket_snd;
948 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
949 if (status < 0)
950 {
951 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
952 goto no_sockopt;
953 }
954no_sockopt:
955 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
956 so->so_faddr = alias_addr;
957 else
958 so->so_faddr = addr.sin_addr;
959
960 so->s = s;
961 SOCKET_UNLOCK(so);
962 return so;
963}
964
965/*
966 * Data is available in so_rcv
967 * Just write() the data to the socket
968 * XXX not yet...
969 */
970void
971sorwakeup(struct socket *so)
972{
973#if 0
974 sowrite(so);
975 FD_CLR(so->s,&writefds);
976#endif
977}
978
979/*
980 * Data has been freed in so_snd
981 * We have room for a read() if we want to
982 * For now, don't read, it'll be done in the main loop
983 */
984void
985sowwakeup(struct socket *so)
986{
987}
988
989/*
990 * Various session state calls
991 * XXX Should be #define's
992 * The socket state stuff needs work, these often get call 2 or 3
993 * times each when only 1 was needed
994 */
995void
996soisfconnecting(struct socket *so)
997{
998 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
999 SS_FCANTSENDMORE|SS_FWDRAIN);
1000 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
1001}
1002
1003void
1004soisfconnected(struct socket *so)
1005{
1006 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1007 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1008}
1009
1010void
1011sofcantrcvmore(struct socket *so)
1012{
1013 if ((so->so_state & SS_NOFDREF) == 0)
1014 {
1015 shutdown(so->s, 0);
1016 }
1017 so->so_state &= ~(SS_ISFCONNECTING);
1018 if (so->so_state & SS_FCANTSENDMORE)
1019 so->so_state = SS_NOFDREF; /* Don't select it */
1020 /* XXX close() here as well? */
1021 else
1022 so->so_state |= SS_FCANTRCVMORE;
1023}
1024
1025void
1026sofcantsendmore(struct socket *so)
1027{
1028 if ((so->so_state & SS_NOFDREF) == 0)
1029 shutdown(so->s, 1); /* send FIN to fhost */
1030
1031 so->so_state &= ~(SS_ISFCONNECTING);
1032 if (so->so_state & SS_FCANTRCVMORE)
1033 so->so_state = SS_NOFDREF; /* as above */
1034 else
1035 so->so_state |= SS_FCANTSENDMORE;
1036}
1037
1038void
1039soisfdisconnected(struct socket *so)
1040{
1041#if 0
1042 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1043 close(so->s);
1044 so->so_state = SS_ISFDISCONNECTED;
1045 /*
1046 * XXX Do nothing ... ?
1047 */
1048#endif
1049}
1050
1051/*
1052 * Set write drain mode
1053 * Set CANTSENDMORE once all data has been write()n
1054 */
1055void
1056sofwdrain(struct socket *so)
1057{
1058 if (so->so_rcv.sb_cc)
1059 so->so_state |= SS_FWDRAIN;
1060 else
1061 sofcantsendmore(so);
1062}
1063
1064static void
1065send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
1066{
1067 struct ip *ip;
1068 uint32_t dst, src;
1069 char ip_copy[256];
1070 struct icmp *icp;
1071 int old_ip_len = 0;
1072 int hlen, original_hlen = 0;
1073 struct mbuf *m;
1074 struct icmp_msg *icm;
1075 uint8_t proto;
1076 int type = 0;
1077#ifndef VBOX_WITH_SLIRP_BSD_MBUF
1078 int m_room;
1079#endif
1080
1081 ip = (struct ip *)buff;
1082 /* Fix ip->ip_len to contain the total packet length including the header
1083 * in _host_ byte order for all OSes. On Darwin, that value already is in
1084 * host byte order. Solaris and Darwin report only the payload. */
1085#ifndef RT_OS_DARWIN
1086 ip->ip_len = RT_N2H_U16(ip->ip_len);
1087#endif
1088 hlen = (ip->ip_hl << 2);
1089#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1090 ip->ip_len += hlen;
1091#endif
1092 if (ip->ip_len < hlen + ICMP_MINLEN)
1093 {
1094 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1095 return;
1096 }
1097 icp = (struct icmp *)((char *)ip + hlen);
1098
1099 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1100 if ( icp->icmp_type != ICMP_ECHOREPLY
1101 && icp->icmp_type != ICMP_TIMXCEED
1102 && icp->icmp_type != ICMP_UNREACH)
1103 {
1104 return;
1105 }
1106
1107 /*
1108 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1109 * ICMP_ECHOREPLY assuming data 0
1110 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1111 */
1112 if (ip->ip_len < hlen + 8)
1113 {
1114 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1115 return;
1116 }
1117
1118 type = icp->icmp_type;
1119 if ( type == ICMP_TIMXCEED
1120 || type == ICMP_UNREACH)
1121 {
1122 /*
1123 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1124 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1125 */
1126 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1127 {
1128 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1129 return;
1130 }
1131 ip = &icp->icmp_ip;
1132 }
1133
1134 icm = icmp_find_original_mbuf(pData, ip);
1135 if (icm == NULL)
1136 {
1137 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1138 return;
1139 }
1140
1141 m = icm->im_m;
1142 Assert(m != NULL);
1143
1144 src = addr->sin_addr.s_addr;
1145 if (type == ICMP_ECHOREPLY)
1146 {
1147 struct ip *ip0 = mtod(m, struct ip *);
1148 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1149 if (icp0->icmp_type != ICMP_ECHO)
1150 {
1151 Log(("NAT: we haven't found echo for this reply\n"));
1152 return;
1153 }
1154 /*
1155 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1156 * IP header combined by OS network stack, our local copy of IP header contians values
1157 * in host byte order so no byte order conversion is required. IP headers fields are converting
1158 * in ip_output0 routine only.
1159 */
1160 if ( (ip->ip_len - hlen)
1161 != (ip0->ip_len - (ip0->ip_hl << 2)))
1162 {
1163 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1164 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1165 return;
1166 }
1167 }
1168
1169 /* ip points on origianal ip header */
1170 ip = mtod(m, struct ip *);
1171 proto = ip->ip_p;
1172 /* Now ip is pointing on header we've sent from guest */
1173 if ( icp->icmp_type == ICMP_TIMXCEED
1174 || icp->icmp_type == ICMP_UNREACH)
1175 {
1176 old_ip_len = (ip->ip_hl << 2) + 64;
1177 if (old_ip_len > sizeof(ip_copy))
1178 old_ip_len = sizeof(ip_copy);
1179 memcpy(ip_copy, ip, old_ip_len);
1180 }
1181
1182 /* source address from original IP packet*/
1183 dst = ip->ip_src.s_addr;
1184
1185 /* overide ther tail of old packet */
1186 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1187 original_hlen = ip->ip_hl << 2;
1188 /* saves original ip header and options */
1189#ifdef VBOX_WITH_SLIRP_BSD_MBUF
1190 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1191 ip->ip_len = m_length(m, NULL);
1192#else
1193 /* m_room space in the saved m buffer */
1194 m_room = M_ROOM(m);
1195 if (m_room < len - hlen + original_hlen)
1196 {
1197 /* we need involve ether header length into new buffer buffer calculation */
1198 m_inc(m, if_maxlinkhdr + len - hlen + original_hlen);
1199 if (m->m_size < if_maxlinkhdr + len - hlen + original_hlen)
1200 {
1201 Log(("send_icmp_to_guest: extending buffer was failed (packet is dropped)\n"));
1202 return;
1203 }
1204 }
1205 memcpy(m->m_data + original_hlen, buff + hlen, len - hlen);
1206 m->m_len = len - hlen + original_hlen;
1207 ip->ip_len = m->m_len;
1208#endif
1209 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1210
1211 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1212 type = icp->icmp_type;
1213 if ( type == ICMP_TIMXCEED
1214 || type == ICMP_UNREACH)
1215 {
1216 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1217 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1218 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1219 }
1220
1221 ip->ip_src.s_addr = src;
1222 ip->ip_dst.s_addr = dst;
1223 icmp_reflect(pData, m);
1224 LIST_REMOVE(icm, im_list);
1225 /* Don't call m_free here*/
1226
1227 if ( type == ICMP_TIMXCEED
1228 || type == ICMP_UNREACH)
1229 {
1230 icm->im_so->so_m = NULL;
1231 switch (proto)
1232 {
1233 case IPPROTO_UDP:
1234 /*XXX: so->so_m already freed so we shouldn't call sofree */
1235 udp_detach(pData, icm->im_so);
1236 break;
1237 case IPPROTO_TCP:
1238 /*close tcp should be here */
1239 break;
1240 default:
1241 /* do nothing */
1242 break;
1243 }
1244 }
1245 RTMemFree(icm);
1246}
1247
1248#ifdef RT_OS_WINDOWS
1249static void
1250sorecvfrom_icmp_win(PNATState pData, struct socket *so)
1251{
1252 int len;
1253 int i;
1254 struct ip *ip;
1255 struct mbuf *m;
1256 struct icmp *icp;
1257 struct icmp_msg *icm;
1258 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
1259 uint32_t src;
1260 ICMP_ECHO_REPLY *icr;
1261 int hlen = 0;
1262 int data_len = 0;
1263 int nbytes = 0;
1264 u_char code = ~0;
1265 int out_len;
1266 int size;
1267
1268 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
1269 if (len < 0)
1270 {
1271 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
1272 return;
1273 }
1274 if (len == 0)
1275 return; /* no error */
1276
1277 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
1278 for (i = 0; i < len; ++i)
1279 {
1280 switch(icr[i].Status)
1281 {
1282 case IP_DEST_HOST_UNREACHABLE:
1283 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
1284 case IP_DEST_NET_UNREACHABLE:
1285 code = (code != ~0 ? code : ICMP_UNREACH_NET);
1286 case IP_DEST_PROT_UNREACHABLE:
1287 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
1288 /* UNREACH error inject here */
1289 case IP_DEST_PORT_UNREACHABLE:
1290 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
1291 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
1292 so->so_m = NULL;
1293 break;
1294 case IP_SUCCESS: /* echo replied */
1295# ifndef VBOX_WITH_SLIRP_BSD_MBUF
1296 m = m_get(pData);
1297# else
1298 out_len = ETH_HLEN + sizeof(struct ip) + 8;
1299 size;
1300 size = MCLBYTES;
1301 if (out_len < MSIZE)
1302 size = MCLBYTES;
1303 else if (out_len < MCLBYTES)
1304 size = MCLBYTES;
1305 else if (out_len < MJUM9BYTES)
1306 size = MJUM9BYTES;
1307 else if (out_len < MJUM16BYTES)
1308 size = MJUM16BYTES;
1309 else
1310 AssertMsgFailed(("Unsupported size"));
1311
1312 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
1313 if (m == NULL)
1314 return;
1315# endif
1316 m->m_len = 0;
1317 m->m_data += if_maxlinkhdr;
1318 ip = mtod(m, struct ip *);
1319 ip->ip_src.s_addr = icr[i].Address;
1320 ip->ip_p = IPPROTO_ICMP;
1321 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
1322 data_len = sizeof(struct ip);
1323 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
1324 ip->ip_ttl = icr[i].Options.Ttl;
1325
1326 icp = (struct icmp *)&ip[1]; /* no options */
1327 icp->icmp_type = ICMP_ECHOREPLY;
1328 icp->icmp_code = 0;
1329 icp->icmp_id = so->so_icmp_id;
1330 icp->icmp_seq = so->so_icmp_seq;
1331
1332 data_len += ICMP_MINLEN;
1333
1334# ifndef VBOX_WITH_SLIRP_BSD_MBUF
1335 nbytes = (data_len + icr[i].DataSize > m->m_size? m->m_size - data_len: icr[i].DataSize);
1336 memcpy(icp->icmp_data, icr[i].Data, nbytes);
1337# else
1338 hlen = (ip->ip_hl << 2);
1339 m->m_pkthdr.header = mtod(m, void *);
1340 m->m_len = data_len;
1341
1342 m_copyback(pData, m, hlen + 8, icr[i].DataSize, icr[i].Data);
1343# endif
1344
1345 data_len += icr[i].DataSize;
1346
1347 ip->ip_len = data_len;
1348 m->m_len = ip->ip_len;
1349
1350 icmp_reflect(pData, m);
1351 break;
1352 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
1353
1354 ip_broken = icr[i].Data;
1355 icm = icmp_find_original_mbuf(pData, ip_broken);
1356 if (icm == NULL) {
1357 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
1358 return;
1359 }
1360 m = icm->im_m;
1361 ip = mtod(m, struct ip *);
1362 ip->ip_ttl = icr[i].Options.Ttl;
1363 src = ip->ip_src.s_addr;
1364 ip->ip_dst.s_addr = src;
1365 ip->ip_dst.s_addr = icr[i].Address;
1366
1367 hlen = (ip->ip_hl << 2);
1368 icp = (struct icmp *)((char *)ip + hlen);
1369 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
1370 data_len = (ip_broken->ip_hl << 2) + 64;
1371
1372#ifndef VBOX_WITH_SLIRP_BSD_MBUF
1373 nbytes =(hlen + ICMP_MINLEN + data_len > m->m_size? m->m_size - (hlen + ICMP_MINLEN): data_len);
1374 memcpy(icp->icmp_data, ip_broken, nbytes);
1375#else
1376 m->m_len = data_len;
1377 m->m_pkthdr.header = mtod(m, void *);
1378 m_copyback(pData, m, ip->ip_hl >> 2, icr[i].DataSize, icr[i].Data);
1379#endif
1380 icmp_reflect(pData, m);
1381 break;
1382 default:
1383 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1384 break;
1385 }
1386 }
1387}
1388#else /* !RT_OS_WINDOWS */
1389static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1390{
1391 struct sockaddr_in addr;
1392 socklen_t addrlen = sizeof(struct sockaddr_in);
1393 struct ip ip;
1394 char *buff;
1395 int len = 0;
1396
1397 /* 1- step: read the ip header */
1398 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1399 (struct sockaddr *)&addr, &addrlen);
1400 if ( len < 0
1401 && ( errno == EAGAIN
1402 || errno == EWOULDBLOCK
1403 || errno == EINPROGRESS
1404 || errno == ENOTCONN))
1405 {
1406 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1407 return;
1408 }
1409
1410 if ( len < sizeof(struct ip)
1411 || len < 0
1412 || len == 0)
1413 {
1414 u_char code;
1415 code = ICMP_UNREACH_PORT;
1416
1417 if (errno == EHOSTUNREACH)
1418 code = ICMP_UNREACH_HOST;
1419 else if (errno == ENETUNREACH)
1420 code = ICMP_UNREACH_NET;
1421
1422 LogRel((" udp icmp rx errno = %d-%s\n",
1423 errno, strerror(errno)));
1424 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1425 so->so_m = NULL;
1426 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm \n"));
1427 return;
1428 }
1429 /* basic check of IP header */
1430 if ( ip.ip_v != IPVERSION
1431# ifndef RT_OS_DARWIN
1432 || ip.ip_p != IPPROTO_ICMP
1433# endif
1434 )
1435 {
1436 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4 \n"));
1437 return;
1438 }
1439# ifndef RT_OS_DARWIN
1440 /* Darwin reports the IP length already in host byte order. */
1441 ip.ip_len = RT_N2H_U16(ip.ip_len);
1442# endif
1443# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1444 /* Solaris and Darwin report the payload only */
1445 ip.ip_len += (ip.ip_hl << 2);
1446# endif
1447 /* Note: ip->ip_len in host byte order (all OS) */
1448 len = ip.ip_len;
1449 buff = RTMemAlloc(len);
1450 if (buff == NULL)
1451 {
1452 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1453 return;
1454 }
1455 /* 2 - step: we're reading rest of the datagramm to the buffer */
1456 addrlen = sizeof(struct sockaddr_in);
1457 memset(&addr, 0, addrlen);
1458 len = recvfrom(so->s, buff, len, 0,
1459 (struct sockaddr *)&addr, &addrlen);
1460 if ( len < 0
1461 && ( errno == EAGAIN
1462 || errno == EWOULDBLOCK
1463 || errno == EINPROGRESS
1464 || errno == ENOTCONN))
1465 {
1466 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1467 ip.ip_len));
1468 RTMemFree(buff);
1469 return;
1470 }
1471 if ( len < 0
1472 || len == 0)
1473 {
1474 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1475 errno, len, (ip.ip_len - sizeof(struct ip))));
1476 RTMemFree(buff);
1477 return;
1478 }
1479 /* len is modified in 2nd read, when the rest of the datagramm was read */
1480 send_icmp_to_guest(pData, buff, len, so, &addr);
1481 RTMemFree(buff);
1482}
1483#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette