VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 28915

Last change on this file since 28915 was 28800, checked in by vboxsync, 14 years ago

Automated rebranding to Oracle copyright/license strings via filemuncher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 42.9 KB
Line 
1/* $Id: socket.c 28800 2010-04-27 08:22:32Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*
19 * This code is based on:
20 *
21 * Copyright (c) 1995 Danny Gasparovski.
22 *
23 * Please read the file COPYRIGHT for the
24 * terms and conditions of the copyright.
25 */
26
27#define WANT_SYS_IOCTL_H
28#include <slirp.h>
29#include "ip_icmp.h"
30#include "main.h"
31#ifdef __sun__
32#include <sys/filio.h>
33#endif
34#include <VBox/pdmdrv.h>
35#if defined (RT_OS_WINDOWS)
36#include <iphlpapi.h>
37#include <icmpapi.h>
38#endif
39
40
41static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
42#ifdef RT_OS_WINDOWS
43static void sorecvfrom_icmp_win(PNATState, struct socket *);
44#else /* RT_OS_WINDOWS */
45static void sorecvfrom_icmp_unix(PNATState, struct socket *);
46#endif /* !RT_OS_WINDOWS */
47
48void
49so_init()
50{
51}
52
53struct socket *
54solookup(struct socket *head, struct in_addr laddr,
55 u_int lport, struct in_addr faddr, u_int fport)
56{
57 struct socket *so;
58
59 for (so = head->so_next; so != head; so = so->so_next)
60 {
61 if ( so->so_lport == lport
62 && so->so_laddr.s_addr == laddr.s_addr
63 && so->so_faddr.s_addr == faddr.s_addr
64 && so->so_fport == fport)
65 return so;
66 }
67
68 return (struct socket *)NULL;
69}
70
71/*
72 * Create a new socket, initialise the fields
73 * It is the responsibility of the caller to
74 * insque() it into the correct linked-list
75 */
76struct socket *
77socreate()
78{
79 struct socket *so;
80
81 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
82 if (so)
83 {
84 so->so_state = SS_NOFDREF;
85 so->s = -1;
86#if !defined(RT_OS_WINDOWS)
87 so->so_poll_index = -1;
88#endif
89 }
90 return so;
91}
92
93/*
94 * remque and free a socket, clobber cache
95 * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because
96 * in sofree we don't know from which queue item beeing removed.
97 */
98void
99sofree(PNATState pData, struct socket *so)
100{
101 struct socket *so_prev = NULL;
102 if (so == tcp_last_so)
103 tcp_last_so = &tcb;
104 else if (so == udp_last_so)
105 udp_last_so = &udb;
106
107 /* check if mbuf haven't been already freed */
108 if (so->so_m != NULL)
109 m_freem(pData, so->so_m);
110#ifndef VBOX_WITH_SLIRP_MT
111 if (so->so_next && so->so_prev)
112 {
113 remque(pData, so); /* crashes if so is not in a queue */
114 NSOCK_DEC();
115 }
116
117 RTMemFree(so);
118#else
119 so->so_deleted = 1;
120#endif
121}
122
123#ifdef VBOX_WITH_SLIRP_MT
124void
125soread_queue(PNATState pData, struct socket *so, int *ret)
126{
127 *ret = soread(pData, so);
128}
129#endif
130
131/*
132 * Read from so's socket into sb_snd, updating all relevant sbuf fields
133 * NOTE: This will only be called if it is select()ed for reading, so
134 * a read() of 0 (or less) means it's disconnected
135 */
136int
137soread(PNATState pData, struct socket *so)
138{
139 int n, nn, lss, total;
140 struct sbuf *sb = &so->so_snd;
141 size_t len = sb->sb_datalen - sb->sb_cc;
142 struct iovec iov[2];
143 int mss = so->so_tcpcb->t_maxseg;
144
145 STAM_PROFILE_START(&pData->StatIOread, a);
146 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
147 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
148
149 QSOCKET_LOCK(tcb);
150 SOCKET_LOCK(so);
151 QSOCKET_UNLOCK(tcb);
152
153 DEBUG_CALL("soread");
154 DEBUG_ARG("so = %lx", (long)so);
155
156 /*
157 * No need to check if there's enough room to read.
158 * soread wouldn't have been called if there weren't
159 */
160
161 len = sb->sb_datalen - sb->sb_cc;
162
163 iov[0].iov_base = sb->sb_wptr;
164 iov[1].iov_base = 0;
165 iov[1].iov_len = 0;
166 if (sb->sb_wptr < sb->sb_rptr)
167 {
168 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
169 /* Should never succeed, but... */
170 if (iov[0].iov_len > len)
171 iov[0].iov_len = len;
172 if (iov[0].iov_len > mss)
173 iov[0].iov_len -= iov[0].iov_len%mss;
174 n = 1;
175 }
176 else
177 {
178 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
179 /* Should never succeed, but... */
180 if (iov[0].iov_len > len)
181 iov[0].iov_len = len;
182 len -= iov[0].iov_len;
183 if (len)
184 {
185 iov[1].iov_base = sb->sb_data;
186 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
187 if (iov[1].iov_len > len)
188 iov[1].iov_len = len;
189 total = iov[0].iov_len + iov[1].iov_len;
190 if (total > mss)
191 {
192 lss = total % mss;
193 if (iov[1].iov_len > lss)
194 {
195 iov[1].iov_len -= lss;
196 n = 2;
197 }
198 else
199 {
200 lss -= iov[1].iov_len;
201 iov[0].iov_len -= lss;
202 n = 1;
203 }
204 }
205 else
206 n = 2;
207 }
208 else
209 {
210 if (iov[0].iov_len > mss)
211 iov[0].iov_len -= iov[0].iov_len%mss;
212 n = 1;
213 }
214 }
215
216#ifdef HAVE_READV
217 nn = readv(so->s, (struct iovec *)iov, n);
218 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
219#else
220 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
221#endif
222 if (nn <= 0)
223 {
224 /*
225 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
226 * _could_ mean that the connection is closed. But we will receive an
227 * FD_CLOSE event later if the connection was _really_ closed. With
228 * www.youtube.com I see this very often. Closing the socket too early
229 * would be dangerous.
230 */
231 int status;
232 unsigned long pending = 0;
233 status = ioctlsocket(so->s, FIONREAD, &pending);
234 if (status < 0)
235 LogRel(("NAT:error in WSAIoctl: %d\n", errno));
236 if (nn == 0 && (pending != 0))
237 {
238 SOCKET_UNLOCK(so);
239 STAM_PROFILE_STOP(&pData->StatIOread, a);
240 return 0;
241 }
242 if ( nn < 0
243 && ( errno == EINTR
244 || errno == EAGAIN
245 || errno == EWOULDBLOCK))
246 {
247 SOCKET_UNLOCK(so);
248 STAM_PROFILE_STOP(&pData->StatIOread, a);
249 return 0;
250 }
251 else
252 {
253 /* nn == 0 means peer has performed an orderly shutdown */
254 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n",
255 nn, errno, strerror(errno)));
256 sofcantrcvmore(so);
257 tcp_sockclosed(pData, sototcpcb(so));
258 SOCKET_UNLOCK(so);
259 STAM_PROFILE_STOP(&pData->StatIOread, a);
260 return -1;
261 }
262 }
263 STAM_STATS(
264 if (n == 1)
265 {
266 STAM_COUNTER_INC(&pData->StatIORead_in_1);
267 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
268 }
269 else
270 {
271 STAM_COUNTER_INC(&pData->StatIORead_in_2);
272 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
273 }
274 );
275
276#ifndef HAVE_READV
277 /*
278 * If there was no error, try and read the second time round
279 * We read again if n = 2 (ie, there's another part of the buffer)
280 * and we read as much as we could in the first read
281 * We don't test for <= 0 this time, because there legitimately
282 * might not be any more data (since the socket is non-blocking),
283 * a close will be detected on next iteration.
284 * A return of -1 wont (shouldn't) happen, since it didn't happen above
285 */
286 if (n == 2 && nn == iov[0].iov_len)
287 {
288 int ret;
289 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
290 if (ret > 0)
291 nn += ret;
292 STAM_STATS(
293 if (ret > 0)
294 {
295 STAM_COUNTER_INC(&pData->StatIORead_in_2);
296 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
297 }
298 );
299 }
300
301 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
302#endif
303
304 /* Update fields */
305 sb->sb_cc += nn;
306 sb->sb_wptr += nn;
307 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
308 sb->sb_wptr -= sb->sb_datalen;
309 STAM_PROFILE_STOP(&pData->StatIOread, a);
310 SOCKET_UNLOCK(so);
311 return nn;
312}
313
314/*
315 * Get urgent data
316 *
317 * When the socket is created, we set it SO_OOBINLINE,
318 * so when OOB data arrives, we soread() it and everything
319 * in the send buffer is sent as urgent data
320 */
321void
322sorecvoob(PNATState pData, struct socket *so)
323{
324 struct tcpcb *tp = sototcpcb(so);
325 ssize_t ret;
326
327 DEBUG_CALL("sorecvoob");
328 DEBUG_ARG("so = %lx", (long)so);
329
330 /*
331 * We take a guess at how much urgent data has arrived.
332 * In most situations, when urgent data arrives, the next
333 * read() should get all the urgent data. This guess will
334 * be wrong however if more data arrives just after the
335 * urgent data, or the read() doesn't return all the
336 * urgent data.
337 */
338 ret = soread(pData, so);
339 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
340 tp->t_force = 1;
341 tcp_output(pData, tp);
342 tp->t_force = 0;
343}
344
345/*
346 * Send urgent data
347 * There's a lot duplicated code here, but...
348 */
349int
350sosendoob(struct socket *so)
351{
352 struct sbuf *sb = &so->so_rcv;
353 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
354
355 int n, len;
356
357 DEBUG_CALL("sosendoob");
358 DEBUG_ARG("so = %lx", (long)so);
359 DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
360
361 if (so->so_urgc > sizeof(buff))
362 so->so_urgc = sizeof(buff); /* XXX */
363
364 if (sb->sb_rptr < sb->sb_wptr)
365 {
366 /* We can send it directly */
367 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
368 so->so_urgc -= n;
369
370 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n",
371 n, so->so_urgc));
372 }
373 else
374 {
375 /*
376 * Since there's no sendv or sendtov like writev,
377 * we must copy all data to a linear buffer then
378 * send it all
379 */
380 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
381 if (len > so->so_urgc)
382 len = so->so_urgc;
383 memcpy(buff, sb->sb_rptr, len);
384 so->so_urgc -= len;
385 if (so->so_urgc)
386 {
387 n = sb->sb_wptr - sb->sb_data;
388 if (n > so->so_urgc)
389 n = so->so_urgc;
390 memcpy(buff + len, sb->sb_data, n);
391 so->so_urgc -= n;
392 len += n;
393 }
394 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
395#ifdef DEBUG
396 if (n != len)
397 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
398#endif
399 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n",
400 n, so->so_urgc));
401 }
402
403 sb->sb_cc -= n;
404 sb->sb_rptr += n;
405 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
406 sb->sb_rptr -= sb->sb_datalen;
407
408 return n;
409}
410
411/*
412 * Write data from so_rcv to so's socket,
413 * updating all sbuf field as necessary
414 */
415int
416sowrite(PNATState pData, struct socket *so)
417{
418 int n, nn;
419 struct sbuf *sb = &so->so_rcv;
420 size_t len = sb->sb_cc;
421 struct iovec iov[2];
422
423 STAM_PROFILE_START(&pData->StatIOwrite, a);
424 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
425 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
426 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
427 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
428 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
429 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
430 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
431 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
432 DEBUG_CALL("sowrite");
433 DEBUG_ARG("so = %lx", (long)so);
434 QSOCKET_LOCK(tcb);
435 SOCKET_LOCK(so);
436 QSOCKET_UNLOCK(tcb);
437 if (so->so_urgc)
438 {
439 sosendoob(so);
440 if (sb->sb_cc == 0)
441 {
442 SOCKET_UNLOCK(so);
443 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
444 return 0;
445 }
446 }
447
448 /*
449 * No need to check if there's something to write,
450 * sowrite wouldn't have been called otherwise
451 */
452
453 len = sb->sb_cc;
454
455 iov[0].iov_base = sb->sb_rptr;
456 iov[1].iov_base = 0;
457 iov[1].iov_len = 0;
458 if (sb->sb_rptr < sb->sb_wptr)
459 {
460 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
461 /* Should never succeed, but... */
462 if (iov[0].iov_len > len)
463 iov[0].iov_len = len;
464 n = 1;
465 }
466 else
467 {
468 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
469 if (iov[0].iov_len > len)
470 iov[0].iov_len = len;
471 len -= iov[0].iov_len;
472 if (len)
473 {
474 iov[1].iov_base = sb->sb_data;
475 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
476 if (iov[1].iov_len > len)
477 iov[1].iov_len = len;
478 n = 2;
479 }
480 else
481 n = 1;
482 }
483 STAM_STATS({
484 if (n == 1)
485 {
486 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
487 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
488 }
489 else
490 {
491 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
492 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
493 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
494 }
495 });
496 /* Check if there's urgent data to send, and if so, send it */
497#ifdef HAVE_READV
498 nn = writev(so->s, (const struct iovec *)iov, n);
499 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
500#else
501 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
502#endif
503 /* This should never happen, but people tell me it does *shrug* */
504 if ( nn < 0
505 && ( errno == EAGAIN
506 || errno == EINTR
507 || errno == EWOULDBLOCK))
508 {
509 SOCKET_UNLOCK(so);
510 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
511 return 0;
512 }
513
514 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
515 {
516 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
517 so->so_state, errno));
518 sofcantsendmore(so);
519 tcp_sockclosed(pData, sototcpcb(so));
520 SOCKET_UNLOCK(so);
521 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
522 return -1;
523 }
524
525#ifndef HAVE_READV
526 if (n == 2 && nn == iov[0].iov_len)
527 {
528 int ret;
529 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
530 if (ret > 0)
531 nn += ret;
532 STAM_STATS({
533 if (ret > 0 && ret != iov[1].iov_len)
534 {
535 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
536 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (ret - iov[1].iov_len));
537 }
538 });
539 }
540 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
541#endif
542
543 /* Update sbuf */
544 sb->sb_cc -= nn;
545 sb->sb_rptr += nn;
546 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
547 sb->sb_rptr -= sb->sb_datalen;
548
549 /*
550 * If in DRAIN mode, and there's no more data, set
551 * it CANTSENDMORE
552 */
553 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
554 sofcantsendmore(so);
555
556 SOCKET_UNLOCK(so);
557 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
558 return nn;
559}
560
561/*
562 * recvfrom() a UDP socket
563 */
564void
565sorecvfrom(PNATState pData, struct socket *so)
566{
567 ssize_t ret = 0;
568 struct sockaddr_in addr;
569 socklen_t addrlen = sizeof(struct sockaddr_in);
570
571 DEBUG_CALL("sorecvfrom");
572 DEBUG_ARG("so = %lx", (long)so);
573
574 if (so->so_type == IPPROTO_ICMP)
575 {
576 /* This is a "ping" reply */
577#ifdef RT_OS_WINDOWS
578 sorecvfrom_icmp_win(pData, so);
579#else /* RT_OS_WINDOWS */
580 sorecvfrom_icmp_unix(pData, so);
581#endif /* !RT_OS_WINDOWS */
582 udp_detach(pData, so);
583 }
584 else
585 {
586 /* A "normal" UDP packet */
587 struct mbuf *m;
588 ssize_t len;
589 u_long n = 0;
590#ifdef VBOX_WITH_SLIRP_BSD_MBUF
591 int size;
592#endif
593 int rc = 0;
594 static int signalled = 0;
595
596 QSOCKET_LOCK(udb);
597 SOCKET_LOCK(so);
598 QSOCKET_UNLOCK(udb);
599
600#ifndef VBOX_WITH_SLIRP_BSD_MBUF
601 if (!(m = m_get(pData)))
602 {
603 SOCKET_UNLOCK(so);
604 return;
605 }
606 /* adjust both parameters to maks M_FREEROOM calculate correct */
607 m->m_data += if_maxlinkhdr + sizeof(struct udphdr) + sizeof(struct ip);
608
609 /*
610 * XXX Shouldn't FIONREAD packets destined for port 53,
611 * but I don't know the max packet size for DNS lookups
612 */
613 len = M_FREEROOM(m);
614 /* if (so->so_fport != RT_H2N_U16_C(53)) */
615 rc = ioctlsocket(so->s, FIONREAD, &n);
616 if ( rc == -1
617 && ( errno == EAGAIN
618 || errno == EWOULDBLOCK
619 || errno == EINPROGRESS
620 || errno == ENOTCONN))
621 {
622 m_freem(pData, m);
623 return;
624 }
625
626 Log2(("NAT: %R[natsock] ioctlsocket before read "
627 "(rc:%d errno:%d, n:%d)\n", so, rc, errno, n));
628
629 if (rc == -1 && signalled == 0)
630 {
631 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
632 signalled = 1;
633 m_freem(pData, m);
634 return;
635 }
636
637 if (rc != -1 && n > len)
638 {
639 n = (m->m_data - m->m_dat) + m->m_len + n + 1;
640 m_inc(m, n);
641 len = M_FREEROOM(m);
642 }
643 ret = recvfrom(so->s, m->m_data, len, 0,
644 (struct sockaddr *)&addr, &addrlen);
645 Log2(("NAT: %R[natsock] ioctlsocket after read "
646 "(rc:%d errno:%d, n:%d) ret:%d, len:%d\n", so,
647 rc, errno, n, ret, len));
648#else
649 /*How many data has been received ?*/
650 /*
651 * 1. calculate how much we can read
652 * 2. read as much as possible
653 * 3. attach buffer to allocated header mbuf
654 */
655 rc = ioctlsocket(so->s, FIONREAD, &n);
656 if (rc == -1 && signalled == 0)
657 {
658 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
659 signalled = 1;
660 }
661
662 len = sizeof(struct udpiphdr) + ETH_HLEN;
663 if (n > (if_mtu - len))
664 {
665 n = if_mtu - len; /* can't read than we can put in the mbuf*/
666 }
667 len += n;
668
669 size = MCLBYTES;
670 if (len < MSIZE)
671 size = MCLBYTES;
672 else if (len < MCLBYTES)
673 size = MCLBYTES;
674 else if (len < MJUM9BYTES)
675 size = MJUM9BYTES;
676 else if (len < MJUM16BYTES)
677 size = MJUM16BYTES;
678 else
679 AssertMsgFailed(("Unsupported size"));
680
681 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
682 if (m == NULL)
683 return;
684 m->m_data += ETH_HLEN;
685 m->m_pkthdr.header = mtod(m, void *);
686 m->m_data += sizeof(struct udpiphdr);
687 ret = recvfrom(so->s, mtod(m, char *), n, 0,
688 (struct sockaddr *)&addr, &addrlen);
689 /* @todo (vvl) check which flags and type should be passed */
690#endif
691 m->m_len = ret;
692 if (ret < 0)
693 {
694 u_char code = ICMP_UNREACH_PORT;
695
696 if (errno == EHOSTUNREACH)
697 code = ICMP_UNREACH_HOST;
698 else if (errno == ENETUNREACH)
699 code = ICMP_UNREACH_NET;
700
701 m_freem(pData, m);
702 if ( errno == EAGAIN
703 || errno == EWOULDBLOCK
704 || errno == EINPROGRESS
705 || errno == ENOTCONN)
706 {
707 return;
708 }
709
710 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
711 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
712 so->so_m = NULL;
713 }
714 else
715 {
716 /*
717 * Hack: domain name lookup will be used the most for UDP,
718 * and since they'll only be used once there's no need
719 * for the 4 minute (or whatever) timeout... So we time them
720 * out much quicker (10 seconds for now...)
721 */
722 if (so->so_expire)
723 {
724 if (so->so_fport != RT_H2N_U16_C(53))
725 so->so_expire = curtime + SO_EXPIRE;
726 }
727 /*
728 * last argument should be changed if Slirp will inject IP attributes
729 * Note: Here we can't check if dnsproxy's sent initial request
730 */
731#ifndef VBOX_WITH_SLIRP_BSD_MBUF
732 if (so->so_fport == RT_H2N_U16_C(53))
733 dnsproxy_answer(pData, so, m);
734#endif
735
736#if 0
737 if (m->m_len == len)
738 {
739 m_inc(m, MINCSIZE);
740 m->m_len = 0;
741 }
742#endif
743
744 /*
745 * If this packet was destined for CTL_ADDR,
746 * make it look like that's where it came from, done by udp_output
747 */
748 udp_output(pData, so, m, &addr);
749 SOCKET_UNLOCK(so);
750 } /* rx error */
751 } /* if ping packet */
752}
753
754/*
755 * sendto() a socket
756 */
757int
758sosendto(PNATState pData, struct socket *so, struct mbuf *m)
759{
760 int ret;
761 struct sockaddr_in *paddr;
762 struct sockaddr addr;
763#if 0
764 struct sockaddr_in host_addr;
765#endif
766#ifdef VBOX_WITH_SLIRP_BSD_MBUF
767 caddr_t buf;
768 int mlen;
769#endif
770
771 DEBUG_CALL("sosendto");
772 DEBUG_ARG("so = %lx", (long)so);
773 DEBUG_ARG("m = %lx", (long)m);
774
775 memset(&addr, 0, sizeof(struct sockaddr));
776#ifdef RT_OS_DARWIN
777 addr.sa_len = sizeof(struct sockaddr_in);
778#endif
779 paddr = (struct sockaddr_in *)&addr;
780 paddr->sin_family = AF_INET;
781 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
782 {
783 /* It's an alias */
784 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
785 switch(last_byte)
786 {
787#if 0
788 /* handle this case at 'default:' */
789 case CTL_BROADCAST:
790 addr.sin_addr.s_addr = INADDR_BROADCAST;
791 /* Send the packet to host to fully emulate broadcast */
792 /** @todo r=klaus: on Linux host this causes the host to receive
793 * the packet twice for some reason. And I cannot find any place
794 * in the man pages which states that sending a broadcast does not
795 * reach the host itself. */
796 host_addr.sin_family = AF_INET;
797 host_addr.sin_port = so->so_fport;
798 host_addr.sin_addr = our_addr;
799 sendto(so->s, m->m_data, m->m_len, 0,
800 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
801 break;
802#endif
803 case CTL_DNS:
804 case CTL_ALIAS:
805 default:
806 if (last_byte == ~pData->netmask)
807 paddr->sin_addr.s_addr = INADDR_BROADCAST;
808 else
809 paddr->sin_addr = loopback_addr;
810 break;
811 }
812 }
813 else
814 paddr->sin_addr = so->so_faddr;
815 paddr->sin_port = so->so_fport;
816
817 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
818 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
819
820 /* Don't care what port we get */
821#ifndef VBOX_WITH_SLIRP_BSD_MBUF
822 ret = sendto(so->s, m->m_data, m->m_len, 0, &addr, sizeof (struct sockaddr_in));
823#else
824 mlen = m_length(m, NULL);
825 buf = RTMemAlloc(mlen);
826 if (buf == NULL)
827 {
828 return -1;
829 }
830 m_copydata(m, 0, mlen, buf);
831 ret = sendto(so->s, buf, mlen, 0,
832 (struct sockaddr *)&addr, sizeof (struct sockaddr));
833#endif
834 if (ret < 0)
835 {
836 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
837 return -1;
838 }
839
840 /*
841 * Kill the socket if there's no reply in 4 minutes,
842 * but only if it's an expirable socket
843 */
844 if (so->so_expire)
845 so->so_expire = curtime + SO_EXPIRE;
846 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
847 return 0;
848}
849
850/*
851 * XXX This should really be tcp_listen
852 */
853struct socket *
854solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
855{
856 struct sockaddr_in addr;
857 struct socket *so;
858 socklen_t addrlen = sizeof(addr);
859 int s, opt = 1;
860 int status;
861
862 DEBUG_CALL("solisten");
863 DEBUG_ARG("port = %d", port);
864 DEBUG_ARG("laddr = %x", laddr);
865 DEBUG_ARG("lport = %d", lport);
866 DEBUG_ARG("flags = %x", flags);
867
868 if ((so = socreate()) == NULL)
869 {
870 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
871 return NULL;
872 }
873
874 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
875 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
876 {
877 RTMemFree(so);
878 return NULL;
879 }
880
881 SOCKET_LOCK_CREATE(so);
882 SOCKET_LOCK(so);
883 QSOCKET_LOCK(tcb);
884 insque(pData, so,&tcb);
885 NSOCK_INC();
886 QSOCKET_UNLOCK(tcb);
887
888 /*
889 * SS_FACCEPTONCE sockets must time out.
890 */
891 if (flags & SS_FACCEPTONCE)
892 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
893
894 so->so_state = (SS_FACCEPTCONN|flags);
895 so->so_lport = lport; /* Kept in network format */
896 so->so_laddr.s_addr = laddr; /* Ditto */
897
898 memset(&addr, 0, sizeof(addr));
899#ifdef RT_OS_DARWIN
900 addr.sin_len = sizeof(addr);
901#endif
902 addr.sin_family = AF_INET;
903 addr.sin_addr.s_addr = bind_addr;
904 addr.sin_port = port;
905
906 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
907 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
908 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
909 || (listen(s, 1) < 0))
910 {
911#ifdef RT_OS_WINDOWS
912 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
913 closesocket(s);
914 QSOCKET_LOCK(tcb);
915 sofree(pData, so);
916 QSOCKET_UNLOCK(tcb);
917 /* Restore the real errno */
918 WSASetLastError(tmperrno);
919#else
920 int tmperrno = errno; /* Don't clobber the real reason we failed */
921 close(s);
922 QSOCKET_LOCK(tcb);
923 sofree(pData, so);
924 QSOCKET_UNLOCK(tcb);
925 /* Restore the real errno */
926 errno = tmperrno;
927#endif
928 return NULL;
929 }
930 fd_nonblock(s);
931 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
932
933 getsockname(s,(struct sockaddr *)&addr,&addrlen);
934 so->so_fport = addr.sin_port;
935 /* set socket buffers */
936 opt = pData->socket_rcv;
937 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
938 if (status < 0)
939 {
940 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
941 goto no_sockopt;
942 }
943 opt = pData->socket_snd;
944 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
945 if (status < 0)
946 {
947 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
948 goto no_sockopt;
949 }
950no_sockopt:
951 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
952 so->so_faddr = alias_addr;
953 else
954 so->so_faddr = addr.sin_addr;
955
956 so->s = s;
957 SOCKET_UNLOCK(so);
958 return so;
959}
960
961/*
962 * Data is available in so_rcv
963 * Just write() the data to the socket
964 * XXX not yet...
965 */
966void
967sorwakeup(struct socket *so)
968{
969#if 0
970 sowrite(so);
971 FD_CLR(so->s,&writefds);
972#endif
973}
974
975/*
976 * Data has been freed in so_snd
977 * We have room for a read() if we want to
978 * For now, don't read, it'll be done in the main loop
979 */
980void
981sowwakeup(struct socket *so)
982{
983}
984
985/*
986 * Various session state calls
987 * XXX Should be #define's
988 * The socket state stuff needs work, these often get call 2 or 3
989 * times each when only 1 was needed
990 */
991void
992soisfconnecting(struct socket *so)
993{
994 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
995 SS_FCANTSENDMORE|SS_FWDRAIN);
996 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
997}
998
999void
1000soisfconnected(struct socket *so)
1001{
1002 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1003 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1004}
1005
1006void
1007sofcantrcvmore(struct socket *so)
1008{
1009 if ((so->so_state & SS_NOFDREF) == 0)
1010 {
1011 shutdown(so->s, 0);
1012 }
1013 so->so_state &= ~(SS_ISFCONNECTING);
1014 if (so->so_state & SS_FCANTSENDMORE)
1015 so->so_state = SS_NOFDREF; /* Don't select it */
1016 /* XXX close() here as well? */
1017 else
1018 so->so_state |= SS_FCANTRCVMORE;
1019}
1020
1021void
1022sofcantsendmore(struct socket *so)
1023{
1024 if ((so->so_state & SS_NOFDREF) == 0)
1025 shutdown(so->s, 1); /* send FIN to fhost */
1026
1027 so->so_state &= ~(SS_ISFCONNECTING);
1028 if (so->so_state & SS_FCANTRCVMORE)
1029 so->so_state = SS_NOFDREF; /* as above */
1030 else
1031 so->so_state |= SS_FCANTSENDMORE;
1032}
1033
1034void
1035soisfdisconnected(struct socket *so)
1036{
1037#if 0
1038 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1039 close(so->s);
1040 so->so_state = SS_ISFDISCONNECTED;
1041 /*
1042 * XXX Do nothing ... ?
1043 */
1044#endif
1045}
1046
1047/*
1048 * Set write drain mode
1049 * Set CANTSENDMORE once all data has been write()n
1050 */
1051void
1052sofwdrain(struct socket *so)
1053{
1054 if (so->so_rcv.sb_cc)
1055 so->so_state |= SS_FWDRAIN;
1056 else
1057 sofcantsendmore(so);
1058}
1059
1060static void
1061send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
1062{
1063 struct ip *ip;
1064 uint32_t dst, src;
1065 char ip_copy[256];
1066 struct icmp *icp;
1067 int old_ip_len = 0;
1068 int hlen, original_hlen = 0;
1069 struct mbuf *m;
1070 struct icmp_msg *icm;
1071 uint8_t proto;
1072 int type = 0;
1073#ifndef VBOX_WITH_SLIRP_BSD_MBUF
1074 int m_room;
1075#endif
1076
1077 ip = (struct ip *)buff;
1078 /* Fix ip->ip_len to contain the total packet length including the header
1079 * in _host_ byte order for all OSes. On Darwin, that value already is in
1080 * host byte order. Solaris and Darwin report only the payload. */
1081#ifndef RT_OS_DARWIN
1082 ip->ip_len = RT_N2H_U16(ip->ip_len);
1083#endif
1084 hlen = (ip->ip_hl << 2);
1085#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1086 ip->ip_len += hlen;
1087#endif
1088 if (ip->ip_len < hlen + ICMP_MINLEN)
1089 {
1090 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1091 return;
1092 }
1093 icp = (struct icmp *)((char *)ip + hlen);
1094
1095 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1096 if ( icp->icmp_type != ICMP_ECHOREPLY
1097 && icp->icmp_type != ICMP_TIMXCEED
1098 && icp->icmp_type != ICMP_UNREACH)
1099 {
1100 return;
1101 }
1102
1103 /*
1104 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1105 * ICMP_ECHOREPLY assuming data 0
1106 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1107 */
1108 if (ip->ip_len < hlen + 8)
1109 {
1110 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1111 return;
1112 }
1113
1114 type = icp->icmp_type;
1115 if ( type == ICMP_TIMXCEED
1116 || type == ICMP_UNREACH)
1117 {
1118 /*
1119 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1120 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1121 */
1122 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1123 {
1124 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1125 return;
1126 }
1127 ip = &icp->icmp_ip;
1128 }
1129
1130 icm = icmp_find_original_mbuf(pData, ip);
1131 if (icm == NULL)
1132 {
1133 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1134 return;
1135 }
1136
1137 m = icm->im_m;
1138 Assert(m != NULL);
1139
1140 src = addr->sin_addr.s_addr;
1141 if (type == ICMP_ECHOREPLY)
1142 {
1143 struct ip *ip0 = mtod(m, struct ip *);
1144 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1145 if (icp0->icmp_type != ICMP_ECHO)
1146 {
1147 Log(("NAT: we haven't found echo for this reply\n"));
1148 return;
1149 }
1150 /*
1151 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1152 * IP header combined by OS network stack, our local copy of IP header contians values
1153 * in host byte order so no byte order conversion is required. IP headers fields are converting
1154 * in ip_output0 routine only.
1155 */
1156 if ( (ip->ip_len - hlen)
1157 != (ip0->ip_len - (ip0->ip_hl << 2)))
1158 {
1159 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1160 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1161 return;
1162 }
1163 }
1164
1165 /* ip points on origianal ip header */
1166 ip = mtod(m, struct ip *);
1167 proto = ip->ip_p;
1168 /* Now ip is pointing on header we've sent from guest */
1169 if ( icp->icmp_type == ICMP_TIMXCEED
1170 || icp->icmp_type == ICMP_UNREACH)
1171 {
1172 old_ip_len = (ip->ip_hl << 2) + 64;
1173 if (old_ip_len > sizeof(ip_copy))
1174 old_ip_len = sizeof(ip_copy);
1175 memcpy(ip_copy, ip, old_ip_len);
1176 }
1177
1178 /* source address from original IP packet*/
1179 dst = ip->ip_src.s_addr;
1180
1181 /* overide ther tail of old packet */
1182 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1183 original_hlen = ip->ip_hl << 2;
1184 /* saves original ip header and options */
1185#ifdef VBOX_WITH_SLIRP_BSD_MBUF
1186 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1187 ip->ip_len = m_length(m, NULL);
1188#else
1189 /* m_room space in the saved m buffer */
1190 m_room = M_ROOM(m);
1191 if (m_room < len - hlen + original_hlen)
1192 {
1193 /* we need involve ether header length into new buffer buffer calculation */
1194 m_inc(m, if_maxlinkhdr + len - hlen + original_hlen);
1195 if (m->m_size < if_maxlinkhdr + len - hlen + original_hlen)
1196 {
1197 Log(("send_icmp_to_guest: extending buffer was failed (packet is dropped)\n"));
1198 return;
1199 }
1200 }
1201 memcpy(m->m_data + original_hlen, buff + hlen, len - hlen);
1202 m->m_len = len - hlen + original_hlen;
1203 ip->ip_len = m->m_len;
1204#endif
1205 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1206
1207 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1208 type = icp->icmp_type;
1209 if ( type == ICMP_TIMXCEED
1210 || type == ICMP_UNREACH)
1211 {
1212 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1213 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1214 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1215 }
1216
1217 ip->ip_src.s_addr = src;
1218 ip->ip_dst.s_addr = dst;
1219 icmp_reflect(pData, m);
1220 LIST_REMOVE(icm, im_list);
1221 /* Don't call m_free here*/
1222
1223 if ( type == ICMP_TIMXCEED
1224 || type == ICMP_UNREACH)
1225 {
1226 icm->im_so->so_m = NULL;
1227 switch (proto)
1228 {
1229 case IPPROTO_UDP:
1230 /*XXX: so->so_m already freed so we shouldn't call sofree */
1231 udp_detach(pData, icm->im_so);
1232 break;
1233 case IPPROTO_TCP:
1234 /*close tcp should be here */
1235 break;
1236 default:
1237 /* do nothing */
1238 break;
1239 }
1240 }
1241 RTMemFree(icm);
1242}
1243
1244#ifdef RT_OS_WINDOWS
1245static void
1246sorecvfrom_icmp_win(PNATState pData, struct socket *so)
1247{
1248 int len;
1249 int i;
1250 struct ip *ip;
1251 struct mbuf *m;
1252 struct icmp *icp;
1253 struct icmp_msg *icm;
1254 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
1255 uint32_t src;
1256 ICMP_ECHO_REPLY *icr;
1257 int hlen = 0;
1258 int data_len = 0;
1259 int nbytes = 0;
1260 u_char code = ~0;
1261 int out_len;
1262 int size;
1263
1264 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
1265 if (len < 0)
1266 {
1267 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
1268 return;
1269 }
1270 if (len == 0)
1271 return; /* no error */
1272
1273 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
1274 for (i = 0; i < len; ++i)
1275 {
1276 switch(icr[i].Status)
1277 {
1278 case IP_DEST_HOST_UNREACHABLE:
1279 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
1280 case IP_DEST_NET_UNREACHABLE:
1281 code = (code != ~0 ? code : ICMP_UNREACH_NET);
1282 case IP_DEST_PROT_UNREACHABLE:
1283 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
1284 /* UNREACH error inject here */
1285 case IP_DEST_PORT_UNREACHABLE:
1286 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
1287 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
1288 so->so_m = NULL;
1289 break;
1290 case IP_SUCCESS: /* echo replied */
1291# ifndef VBOX_WITH_SLIRP_BSD_MBUF
1292 m = m_get(pData);
1293# else
1294 out_len = ETH_HLEN + sizeof(struct ip) + 8;
1295 size;
1296 size = MCLBYTES;
1297 if (out_len < MSIZE)
1298 size = MCLBYTES;
1299 else if (out_len < MCLBYTES)
1300 size = MCLBYTES;
1301 else if (out_len < MJUM9BYTES)
1302 size = MJUM9BYTES;
1303 else if (out_len < MJUM16BYTES)
1304 size = MJUM16BYTES;
1305 else
1306 AssertMsgFailed(("Unsupported size"));
1307
1308 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
1309 if (m == NULL)
1310 return;
1311# endif
1312 m->m_len = 0;
1313 m->m_data += if_maxlinkhdr;
1314 ip = mtod(m, struct ip *);
1315 ip->ip_src.s_addr = icr[i].Address;
1316 ip->ip_p = IPPROTO_ICMP;
1317 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
1318 data_len = sizeof(struct ip);
1319 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
1320 ip->ip_ttl = icr[i].Options.Ttl;
1321
1322 icp = (struct icmp *)&ip[1]; /* no options */
1323 icp->icmp_type = ICMP_ECHOREPLY;
1324 icp->icmp_code = 0;
1325 icp->icmp_id = so->so_icmp_id;
1326 icp->icmp_seq = so->so_icmp_seq;
1327
1328 data_len += ICMP_MINLEN;
1329
1330# ifndef VBOX_WITH_SLIRP_BSD_MBUF
1331 nbytes = (data_len + icr[i].DataSize > m->m_size? m->m_size - data_len: icr[i].DataSize);
1332 memcpy(icp->icmp_data, icr[i].Data, nbytes);
1333# else
1334 hlen = (ip->ip_hl << 2);
1335 m->m_pkthdr.header = mtod(m, void *);
1336 m->m_len = data_len;
1337
1338 m_copyback(pData, m, hlen + 8, icr[i].DataSize, icr[i].Data);
1339# endif
1340
1341 data_len += icr[i].DataSize;
1342
1343 ip->ip_len = data_len;
1344 m->m_len = ip->ip_len;
1345
1346 icmp_reflect(pData, m);
1347 break;
1348 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
1349
1350 ip_broken = icr[i].Data;
1351 icm = icmp_find_original_mbuf(pData, ip_broken);
1352 if (icm == NULL) {
1353 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
1354 return;
1355 }
1356 m = icm->im_m;
1357 ip = mtod(m, struct ip *);
1358 ip->ip_ttl = icr[i].Options.Ttl;
1359 src = ip->ip_src.s_addr;
1360 ip->ip_dst.s_addr = src;
1361 ip->ip_dst.s_addr = icr[i].Address;
1362
1363 hlen = (ip->ip_hl << 2);
1364 icp = (struct icmp *)((char *)ip + hlen);
1365 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
1366 data_len = (ip_broken->ip_hl << 2) + 64;
1367
1368#ifndef VBOX_WITH_SLIRP_BSD_MBUF
1369 nbytes =(hlen + ICMP_MINLEN + data_len > m->m_size? m->m_size - (hlen + ICMP_MINLEN): data_len);
1370 memcpy(icp->icmp_data, ip_broken, nbytes);
1371#else
1372 m->m_len = data_len;
1373 m->m_pkthdr.header = mtod(m, void *);
1374 m_copyback(pData, m, ip->ip_hl >> 2, icr[i].DataSize, icr[i].Data);
1375#endif
1376 icmp_reflect(pData, m);
1377 break;
1378 default:
1379 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1380 break;
1381 }
1382 }
1383}
1384#else /* !RT_OS_WINDOWS */
1385static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1386{
1387 struct sockaddr_in addr;
1388 socklen_t addrlen = sizeof(struct sockaddr_in);
1389 struct ip ip;
1390 char *buff;
1391 int len = 0;
1392
1393 /* 1- step: read the ip header */
1394 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1395 (struct sockaddr *)&addr, &addrlen);
1396 if ( len < 0
1397 && ( errno == EAGAIN
1398 || errno == EWOULDBLOCK
1399 || errno == EINPROGRESS
1400 || errno == ENOTCONN))
1401 {
1402 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1403 return;
1404 }
1405
1406 if ( len < sizeof(struct ip)
1407 || len < 0
1408 || len == 0)
1409 {
1410 u_char code;
1411 code = ICMP_UNREACH_PORT;
1412
1413 if (errno == EHOSTUNREACH)
1414 code = ICMP_UNREACH_HOST;
1415 else if (errno == ENETUNREACH)
1416 code = ICMP_UNREACH_NET;
1417
1418 LogRel((" udp icmp rx errno = %d-%s\n",
1419 errno, strerror(errno)));
1420 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1421 so->so_m = NULL;
1422 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm \n"));
1423 return;
1424 }
1425 /* basic check of IP header */
1426 if ( ip.ip_v != IPVERSION
1427# ifndef RT_OS_DARWIN
1428 || ip.ip_p != IPPROTO_ICMP
1429# endif
1430 )
1431 {
1432 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4 \n"));
1433 return;
1434 }
1435# ifndef RT_OS_DARWIN
1436 /* Darwin reports the IP length already in host byte order. */
1437 ip.ip_len = RT_N2H_U16(ip.ip_len);
1438# endif
1439# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1440 /* Solaris and Darwin report the payload only */
1441 ip.ip_len += (ip.ip_hl << 2);
1442# endif
1443 /* Note: ip->ip_len in host byte order (all OS) */
1444 len = ip.ip_len;
1445 buff = RTMemAlloc(len);
1446 if (buff == NULL)
1447 {
1448 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1449 return;
1450 }
1451 /* 2 - step: we're reading rest of the datagramm to the buffer */
1452 addrlen = sizeof(struct sockaddr_in);
1453 memset(&addr, 0, addrlen);
1454 len = recvfrom(so->s, buff, len, 0,
1455 (struct sockaddr *)&addr, &addrlen);
1456 if ( len < 0
1457 && ( errno == EAGAIN
1458 || errno == EWOULDBLOCK
1459 || errno == EINPROGRESS
1460 || errno == ENOTCONN))
1461 {
1462 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1463 ip.ip_len));
1464 RTMemFree(buff);
1465 return;
1466 }
1467 if ( len < 0
1468 || len == 0)
1469 {
1470 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1471 errno, len, (ip.ip_len - sizeof(struct ip))));
1472 RTMemFree(buff);
1473 return;
1474 }
1475 /* len is modified in 2nd read, when the rest of the datagramm was read */
1476 send_icmp_to_guest(pData, buff, len, so, &addr);
1477 RTMemFree(buff);
1478}
1479#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette