VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxping.c@ 51287

Last change on this file since 51287 was 50106, checked in by vboxsync, 11 years ago

G/c temporary local redefinitions of DPRINTF*.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 52.3 KB
Line 
1/* -*- indent-tabs-mode: nil; -*- */
2
3#include "winutils.h"
4#include "proxy.h"
5#include "proxy_pollmgr.h"
6#include "pxremap.h"
7
8#ifndef RT_OS_WINDOWS
9#include <sys/types.h>
10#include <sys/socket.h>
11#ifdef RT_OS_DARWIN
12# define __APPLE_USE_RFC_3542
13#endif
14#include <netinet/in.h>
15#include <arpa/inet.h> /* XXX: inet_ntop */
16#include <poll.h>
17#include <stdint.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#else
22#include <iprt/stdint.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include "winpoll.h"
27#endif
28
29#include "lwip/opt.h"
30
31#include "lwip/sys.h"
32#include "lwip/tcpip.h"
33#include "lwip/inet_chksum.h"
34#include "lwip/ip.h"
35#include "lwip/icmp.h"
36
37#if defined(RT_OS_LINUX) && !defined(__USE_GNU)
38#if __GLIBC_PREREQ(2, 8)
39/*
40 * XXX: This is gross. in6_pktinfo is now hidden behind _GNU_SOURCE
41 * https://sourceware.org/bugzilla/show_bug.cgi?id=6775
42 *
43 * But in older glibc versions, e.g. RHEL5, it is not! I don't want
44 * to deal with _GNU_SOURCE now, so as a kludge check for glibc
45 * version. It seems the __USE_GNU guard was introduced in 2.8.
46 */
47struct in6_pktinfo {
48 struct in6_addr ipi6_addr;
49 unsigned int ipi6_ifindex;
50};
51#endif /* __GLIBC_PREREQ */
52#endif /* RT_OS_LINUX && !__USE_GNU */
53
54
55/* forward */
56struct ping_pcb;
57
58
59/**
60 * Global state for ping proxy collected in one entity to minimize
61 * globals. There's only one instance of this structure.
62 *
63 * Raw ICMP sockets are promiscuous, so it doesn't make sense to have
64 * multiple. If this code ever needs to support multiple netifs, the
65 * netif member should be exiled into "pcb".
66 */
67struct pxping {
68 SOCKET sock4;
69
70#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS)
71# define DF_WITH_IP_HDRINCL
72 int hdrincl;
73#else
74 int df;
75#endif
76 int ttl;
77 int tos;
78
79 SOCKET sock6;
80#ifdef RT_OS_WINDOWS
81 LPFN_WSARECVMSG pfWSARecvMsg6;
82#endif
83 int hopl;
84
85 struct pollmgr_handler pmhdl4;
86 struct pollmgr_handler pmhdl6;
87
88 struct netif *netif;
89
90 /**
91 * Protect lwIP and pmgr accesses to the list of pcbs.
92 */
93 sys_mutex_t lock;
94
95 /*
96 * We need to find pcbs both from the guest side and from the host
97 * side. If we need to support industrial grade ping throughput,
98 * we will need two pcb hashes. For now, a short linked list
99 * should be enough. Cf. pxping_pcb_for_request() and
100 * pxping_pcb_for_reply().
101 */
102#define PXPING_MAX_PCBS 8
103 size_t npcbs;
104 struct ping_pcb *pcbs;
105
106#define TIMEOUT 5
107 int timer_active;
108 size_t timeout_slot;
109 struct ping_pcb *timeout_list[TIMEOUT];
110};
111
112
113/**
114 * Quasi PCB for ping.
115 */
116struct ping_pcb {
117 ipX_addr_t src;
118 ipX_addr_t dst;
119
120 u8_t is_ipv6;
121 u8_t is_mapped;
122
123 u16_t guest_id;
124 u16_t host_id;
125
126 /**
127 * Desired slot in pxping::timeout_list. See pxping_timer().
128 */
129 size_t timeout_slot;
130
131 /**
132 * Chaining for pxping::timeout_list
133 */
134 struct ping_pcb **pprev_timeout;
135 struct ping_pcb *next_timeout;
136
137 /**
138 * Chaining for pxping::pcbs
139 */
140 struct ping_pcb *next;
141
142 union {
143 struct sockaddr_in sin;
144 struct sockaddr_in6 sin6;
145 } peer;
146};
147
148
149/**
150 * lwIP thread callback message for IPv4 ping.
151 *
152 * We pass raw IP datagram for ip_output_if() so we only need pbuf and
153 * netif (from pxping).
154 */
155struct ping_msg {
156 struct tcpip_msg msg;
157 struct pxping *pxping;
158 struct pbuf *p;
159};
160
161
162/**
163 * lwIP thread callback message for IPv6 ping.
164 *
165 * We cannot obtain raw IPv6 datagram from host without extra trouble,
166 * so we pass ICMPv6 payload in pbuf and also other parameters to
167 * ip6_output_if().
168 */
169struct ping6_msg {
170 struct tcpip_msg msg;
171 struct pxping *pxping;
172 struct pbuf *p;
173 ip6_addr_t src, dst;
174 int hopl, tclass;
175};
176
177
178#ifdef RT_OS_WINDOWS
179static int pxping_init_windows(struct pxping *pxping);
180#endif
181static void pxping_recv4(void *arg, struct pbuf *p);
182static void pxping_recv6(void *arg, struct pbuf *p);
183
184static void pxping_timer(void *arg);
185static void pxping_timer_needed(struct pxping *pxping);
186
187static struct ping_pcb *pxping_pcb_for_request(struct pxping *pxping,
188 int is_ipv6,
189 ipX_addr_t *src, ipX_addr_t *dst,
190 u16_t guest_id);
191static struct ping_pcb *pxping_pcb_for_reply(struct pxping *pxping, int is_ipv6,
192 ipX_addr_t *dst, u16_t host_id);
193
194static struct ping_pcb *pxping_pcb_allocate(struct pxping *pxping);
195static void pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb);
196static void pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb);
197static void pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb);
198static void pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb);
199static void pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb);
200static void pxping_pcb_debug_print(struct ping_pcb *pcb);
201
202static int pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents);
203
204static void pxping_pmgr_icmp4(struct pxping *pxping);
205static void pxping_pmgr_icmp4_echo(struct pxping *pxping,
206 u16_t iplen, struct sockaddr_in *peer);
207static void pxping_pmgr_icmp4_error(struct pxping *pxping,
208 u16_t iplen, struct sockaddr_in *peer);
209static void pxping_pmgr_icmp6(struct pxping *pxping);
210static void pxping_pmgr_icmp6_echo(struct pxping *pxping,
211 ip6_addr_t *src, ip6_addr_t *dst,
212 int hopl, int tclass, u16_t icmplen);
213static void pxping_pmgr_icmp6_error(struct pxping *pxping,
214 ip6_addr_t *src, ip6_addr_t *dst,
215 int hopl, int tclass, u16_t icmplen);
216
217static void pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen);
218static void pxping_pcb_forward_inbound(void *arg);
219
220static void pxping_pmgr_forward_inbound6(struct pxping *pxping,
221 ip6_addr_t *src, ip6_addr_t *dst,
222 u8_t hopl, u8_t tclass,
223 u16_t icmplen);
224static void pxping_pcb_forward_inbound6(void *arg);
225
226/*
227 * NB: This is not documented except in RTFS.
228 *
229 * If ip_output_if() is passed dest == NULL then it treats p as
230 * complete IP packet with payload pointing to the IP header. It does
231 * not build IP header, ignores all header-related arguments, fetches
232 * real destination from the header in the pbuf and outputs pbuf to
233 * the specified netif.
234 */
235#define ip_raw_output_if(p, netif) \
236 (ip_output_if((p), NULL, NULL, 0, 0, 0, (netif)))
237
238
239
240static struct pxping g_pxping;
241
242
243err_t
244pxping_init(struct netif *netif, SOCKET sock4, SOCKET sock6)
245{
246 const int on = 1;
247 int status;
248
249 if (sock4 == INVALID_SOCKET && sock6 == INVALID_SOCKET) {
250 return ERR_VAL;
251 }
252
253 g_pxping.netif = netif;
254 sys_mutex_new(&g_pxping.lock);
255
256 g_pxping.sock4 = sock4;
257 if (g_pxping.sock4 != INVALID_SOCKET) {
258#ifdef DF_WITH_IP_HDRINCL
259 g_pxping.hdrincl = -1;
260#else
261 g_pxping.df = -1;
262#endif
263 g_pxping.ttl = -1;
264 g_pxping.tos = 0;
265
266#ifdef RT_OS_LINUX
267 {
268 const int dont = IP_PMTUDISC_DONT;
269 status = setsockopt(sock4, IPPROTO_IP, IP_MTU_DISCOVER,
270 &dont, sizeof(dont));
271 if (status != 0) {
272 perror("IP_MTU_DISCOVER");
273 }
274 }
275#endif /* RT_OS_LINUX */
276
277 g_pxping.pmhdl4.callback = pxping_pmgr_pump;
278 g_pxping.pmhdl4.data = (void *)&g_pxping;
279 g_pxping.pmhdl4.slot = -1;
280 pollmgr_add(&g_pxping.pmhdl4, g_pxping.sock4, POLLIN);
281
282 ping_proxy_accept(pxping_recv4, &g_pxping);
283 }
284
285 g_pxping.sock6 = sock6;
286#ifdef RT_OS_WINDOWS
287 /* we need recvmsg */
288 if (g_pxping.sock6 != INVALID_SOCKET) {
289 status = pxping_init_windows(&g_pxping);
290 if (status == SOCKET_ERROR) {
291 g_pxping.sock6 = INVALID_SOCKET;
292 /* close(sock6); */
293 }
294 }
295#endif
296 if (g_pxping.sock6 != INVALID_SOCKET) {
297 g_pxping.hopl = -1;
298
299#if !defined(IPV6_RECVPKTINFO)
300#define IPV6_RECVPKTINFO (IPV6_PKTINFO)
301#endif
302 status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVPKTINFO,
303 (const char *)&on, sizeof(on));
304 if (status < 0) {
305 perror("IPV6_RECVPKTINFO");
306 /* XXX: for now this is fatal */
307 }
308
309#if !defined(IPV6_RECVHOPLIMIT)
310#define IPV6_RECVHOPLIMIT (IPV6_HOPLIMIT)
311#endif
312 status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
313 (const char *)&on, sizeof(on));
314 if (status < 0) {
315 perror("IPV6_RECVHOPLIMIT");
316 }
317
318#ifdef IPV6_RECVTCLASS /* new in RFC 3542, there's no RFC 2292 counterpart */
319 /* TODO: IPV6_RECVTCLASS */
320#endif
321
322 g_pxping.pmhdl6.callback = pxping_pmgr_pump;
323 g_pxping.pmhdl6.data = (void *)&g_pxping;
324 g_pxping.pmhdl6.slot = -1;
325 pollmgr_add(&g_pxping.pmhdl6, g_pxping.sock6, POLLIN);
326
327 ping6_proxy_accept(pxping_recv6, &g_pxping);
328 }
329
330 return ERR_OK;
331}
332
333
334#ifdef RT_OS_WINDOWS
335static int
336pxping_init_windows(struct pxping *pxping)
337{
338 GUID WSARecvMsgGUID = WSAID_WSARECVMSG;
339 DWORD nread;
340 int status;
341
342 pxping->pfWSARecvMsg6 = NULL;
343 status = WSAIoctl(pxping->sock6,
344 SIO_GET_EXTENSION_FUNCTION_POINTER,
345 &WSARecvMsgGUID, sizeof(WSARecvMsgGUID),
346 &pxping->pfWSARecvMsg6, sizeof(pxping->pfWSARecvMsg6),
347 &nread,
348 NULL, NULL);
349 return status;
350}
351#endif /* RT_OS_WINDOWS */
352
353
354static u32_t
355chksum_delta_16(u16_t oval, u16_t nval)
356{
357 u32_t sum = (u16_t)~oval;
358 sum += nval;
359 return sum;
360}
361
362
363static u32_t
364chksum_update_16(u16_t *oldp, u16_t nval)
365{
366 u32_t sum = chksum_delta_16(*oldp, nval);
367 *oldp = nval;
368 return sum;
369}
370
371
372static u32_t
373chksum_delta_32(u32_t oval, u32_t nval)
374{
375 u32_t sum = ~oval;
376 sum = FOLD_U32T(sum);
377 sum += FOLD_U32T(nval);
378 return sum;
379}
380
381
382static u32_t
383chksum_update_32(u32_t *oldp, u32_t nval)
384{
385 u32_t sum = chksum_delta_32(*oldp, nval);
386 *oldp = nval;
387 return sum;
388}
389
390
391static u32_t
392chksum_delta_ipv6(const ip6_addr_t *oldp, const ip6_addr_t *newp)
393{
394 u32_t sum;
395
396 sum = chksum_delta_32(oldp->addr[0], newp->addr[0]);
397 sum += chksum_delta_32(oldp->addr[1], newp->addr[1]);
398 sum += chksum_delta_32(oldp->addr[2], newp->addr[2]);
399 sum += chksum_delta_32(oldp->addr[3], newp->addr[3]);
400
401 return sum;
402}
403
404
405static u32_t
406chksum_update_ipv6(ip6_addr_t *oldp, const ip6_addr_t *newp)
407{
408 u32_t sum;
409
410 sum = chksum_update_32(&oldp->addr[0], newp->addr[0]);
411 sum += chksum_update_32(&oldp->addr[1], newp->addr[1]);
412 sum += chksum_update_32(&oldp->addr[2], newp->addr[2]);
413 sum += chksum_update_32(&oldp->addr[3], newp->addr[3]);
414
415 return sum;
416}
417
418
419/**
420 * ICMP Echo Request in pbuf "p" is to be proxied.
421 */
422static void
423pxping_recv4(void *arg, struct pbuf *p)
424{
425 struct pxping *pxping = (struct pxping *)arg;
426 struct ping_pcb *pcb;
427#ifdef DF_WITH_IP_HDRINCL
428 struct ip_hdr iph_orig;
429#endif
430 struct icmp_echo_hdr icmph_orig;
431 struct ip_hdr *iph;
432 struct icmp_echo_hdr *icmph;
433 int df, ttl, tos;
434 u32_t sum;
435 u16_t iphlen;
436 int status;
437
438 iphlen = ip_current_header_tot_len();
439 if (iphlen != IP_HLEN) { /* we don't do options */
440 pbuf_free(p);
441 return;
442 }
443
444 iph = (/* UNCONST */ struct ip_hdr *)ip_current_header();
445 icmph = (struct icmp_echo_hdr *)p->payload;
446
447 pcb = pxping_pcb_for_request(pxping, 0,
448 ipX_current_src_addr(),
449 ipX_current_dest_addr(),
450 icmph->id);
451 if (pcb == NULL) {
452 pbuf_free(p);
453 return;
454 }
455
456 pxping_pcb_debug_print(pcb); /* XXX */
457 DPRINTF((" seq %d len %u ttl %d\n",
458 ntohs(icmph->seqno), (unsigned int)p->tot_len,
459 IPH_TTL(iph)));
460
461 ttl = IPH_TTL(iph);
462 if (!pcb->is_mapped) {
463 if (RT_UNLIKELY(ttl == 1)) {
464 status = pbuf_header(p, iphlen); /* back to IP header */
465 if (RT_LIKELY(status == 0)) {
466 icmp_time_exceeded(p, ICMP_TE_TTL);
467 }
468 pbuf_free(p);
469 return;
470 }
471 --ttl;
472 }
473
474 /*
475 * OS X doesn't provide a socket option to control fragmentation.
476 * Solaris doesn't provide IP_DONTFRAG on all releases we support.
477 * In this case we have to use IP_HDRINCL. We don't want to use
478 * it always since it doesn't handle fragmentation (but that's ok
479 * for DF) and Windows doesn't do automatic source address
480 * selection with IP_HDRINCL.
481 */
482 df = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0;
483
484#ifdef DF_WITH_IP_HDRINCL
485 if (df != pxping->hdrincl) {
486 status = setsockopt(pxping->sock4, IPPROTO_IP, IP_HDRINCL,
487 &df, sizeof(df));
488 if (RT_LIKELY(status == 0)) {
489 pxping->hdrincl = df;
490 }
491 else {
492 perror("IP_HDRINCL");
493 }
494 }
495
496 if (pxping->hdrincl) {
497 status = pbuf_header(p, iphlen); /* back to IP header */
498 if (RT_UNLIKELY(status != 0)) {
499 pbuf_free(p);
500 return;
501 }
502
503 /* we will overwrite IP header, save original for ICMP errors */
504 memcpy(&iph_orig, iph, iphlen);
505
506 if (g_proxy_options->src4 != NULL) {
507 memcpy(&iph->src, &g_proxy_options->src4->sin_addr,
508 sizeof(g_proxy_options->src4->sin_addr));
509 }
510 else {
511 /* let the kernel select suitable source address */
512 memset(&iph->src, 0, sizeof(iph->src));
513 }
514
515 IPH_TTL_SET(iph, ttl); /* already decremented */
516 IPH_ID_SET(iph, 0); /* kernel will set one */
517#ifdef RT_OS_DARWIN
518 /* wants ip_offset and ip_len fields in host order */
519 IPH_OFFSET_SET(iph, ntohs(IPH_OFFSET(iph)));
520 IPH_LEN_SET(iph, ntohs(IPH_LEN(iph)));
521 /* wants checksum of everything (sic!), in host order */
522 sum = inet_chksum_pbuf(p);
523 IPH_CHKSUM_SET(iph, sum);
524#else /* !RT_OS_DARWIN */
525 IPH_CHKSUM_SET(iph, 0); /* kernel will recalculate */
526#endif
527 }
528 else /* !pxping->hdrincl */
529#endif /* DF_WITH_IP_HDRINCL */
530 {
531#if !defined(DF_WITH_IP_HDRINCL)
532 /* control DF flag via setsockopt(2) */
533#define USE_DF_OPTION(_Optname) \
534 const int dfopt = _Optname; \
535 const char * const dfoptname = #_Optname;
536#if defined(RT_OS_LINUX)
537 USE_DF_OPTION(IP_MTU_DISCOVER);
538 df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
539#elif defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
540 USE_DF_OPTION(IP_DONTFRAG);
541#elif defined(RT_OS_WINDOWS)
542 USE_DF_OPTION(IP_DONTFRAGMENT);
543#endif
544 if (df != pxping->df) {
545 status = setsockopt(pxping->sock4, IPPROTO_IP, dfopt,
546 (char *)&df, sizeof(df));
547 if (RT_LIKELY(status == 0)) {
548 pxping->df = df;
549 }
550 else {
551 perror(dfoptname);
552 }
553 }
554#endif /* !DF_WITH_IP_HDRINCL */
555
556 if (ttl != pxping->ttl) {
557 status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TTL,
558 (char *)&ttl, sizeof(ttl));
559 if (RT_LIKELY(status == 0)) {
560 pxping->ttl = ttl;
561 }
562 else {
563 perror("IP_TTL");
564 }
565 }
566
567 tos = IPH_TOS(iph);
568 if (tos != pxping->tos) {
569 status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TOS,
570 (char *)&tos, sizeof(tos));
571 if (RT_LIKELY(status == 0)) {
572 pxping->tos = tos;
573 }
574 else {
575 perror("IP_TOS");
576 }
577 }
578 }
579
580 /* rewrite ICMP echo header */
581 memcpy(&icmph_orig, icmph, sizeof(*icmph));
582 sum = (u16_t)~icmph->chksum;
583 sum += chksum_update_16(&icmph->id, pcb->host_id);
584 sum = FOLD_U32T(sum);
585 icmph->chksum = ~sum;
586
587 status = proxy_sendto(pxping->sock4, p,
588 &pcb->peer.sin, sizeof(pcb->peer.sin));
589 if (status != 0) {
590 int error = -status;
591 DPRINTF(("%s: sendto errno %d\n", __func__, error));
592
593#ifdef DF_WITH_IP_HDRINCL
594 if (pxping->hdrincl) {
595 /* restore original IP header */
596 memcpy(iph, &iph_orig, iphlen);
597 }
598 else
599#endif
600 {
601 status = pbuf_header(p, iphlen); /* back to IP header */
602 if (RT_UNLIKELY(status != 0)) {
603 pbuf_free(p);
604 return;
605 }
606 }
607
608 /* restore original ICMP header */
609 memcpy(icmph, &icmph_orig, sizeof(*icmph));
610
611 /*
612 * Some ICMP errors may be generated by the kernel and we read
613 * them from the socket and forward them normally, hence the
614 * ifdefs below.
615 */
616 switch (error) {
617
618#if !( defined(RT_OS_SOLARIS) \
619 || (defined(RT_OS_LINUX) && !defined(DF_WITH_IP_HDRINCL)) \
620 )
621 case EMSGSIZE:
622 icmp_dest_unreach(p, ICMP_DUR_FRAG);
623 break;
624#endif
625
626 case ENETDOWN:
627 case ENETUNREACH:
628 icmp_dest_unreach(p, ICMP_DUR_NET);
629 break;
630
631 case EHOSTDOWN:
632 case EHOSTUNREACH:
633 icmp_dest_unreach(p, ICMP_DUR_HOST);
634 break;
635 }
636 }
637
638 pbuf_free(p);
639}
640
641
642/**
643 * ICMPv6 Echo Request in pbuf "p" is to be proxied.
644 */
645static void
646pxping_recv6(void *arg, struct pbuf *p)
647{
648 struct pxping *pxping = (struct pxping *)arg;
649 struct ping_pcb *pcb;
650 struct ip6_hdr *iph;
651 struct icmp6_echo_hdr *icmph;
652 int hopl;
653 u16_t iphlen;
654 u16_t id, seq;
655 int status;
656
657 iph = (/* UNCONST */ struct ip6_hdr *)ip6_current_header();
658 iphlen = ip_current_header_tot_len();
659
660 icmph = (struct icmp6_echo_hdr *)p->payload;
661
662 id = icmph->id;
663 seq = icmph->seqno;
664
665 pcb = pxping_pcb_for_request(pxping, 1,
666 ipX_current_src_addr(),
667 ipX_current_dest_addr(),
668 id);
669 if (pcb == NULL) {
670 pbuf_free(p);
671 return;
672 }
673
674 pxping_pcb_debug_print(pcb); /* XXX */
675 DPRINTF((" seq %d len %u hopl %d\n",
676 ntohs(seq), (unsigned int)p->tot_len,
677 IP6H_HOPLIM(iph)));
678
679 hopl = IP6H_HOPLIM(iph);
680 if (!pcb->is_mapped) {
681 if (hopl == 1) {
682 status = pbuf_header(p, iphlen); /* back to IP header */
683 if (RT_LIKELY(status == 0)) {
684 icmp6_time_exceeded(p, ICMP6_TE_HL);
685 }
686 pbuf_free(p);
687 return;
688 }
689 --hopl;
690 }
691
692 /*
693 * Rewrite ICMPv6 echo header. We don't need to recompute the
694 * checksum since, unlike IPv4, checksum includes pseudo-header.
695 * OS computes checksum for us on send() since it needs to select
696 * source address.
697 */
698 icmph->id = pcb->host_id;
699
700 /* TODO: use control messages to save a syscall? */
701 if (hopl != pxping->hopl) {
702 status = setsockopt(pxping->sock6, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
703 (char *)&hopl, sizeof(hopl));
704 if (status == 0) {
705 pxping->hopl = hopl;
706 }
707 else {
708 perror("IPV6_HOPLIMIT");
709 }
710 }
711
712 status = proxy_sendto(pxping->sock6, p,
713 &pcb->peer.sin6, sizeof(pcb->peer.sin6));
714 if (status != 0) {
715 int error = -status;
716 DPRINTF(("%s: sendto errno %d\n", __func__, error));
717
718 status = pbuf_header(p, iphlen); /* back to IP header */
719 if (RT_UNLIKELY(status != 0)) {
720 pbuf_free(p);
721 return;
722 }
723
724 /* restore original ICMP header */
725 icmph->id = pcb->guest_id;
726
727 switch (error) {
728 case EACCES:
729 icmp6_dest_unreach(p, ICMP6_DUR_PROHIBITED);
730 break;
731
732#ifdef ENONET
733 case ENONET:
734#endif
735 case ENETDOWN:
736 case ENETUNREACH:
737 case EHOSTDOWN:
738 case EHOSTUNREACH:
739 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
740 break;
741 }
742 }
743
744 pbuf_free(p);
745}
746
747
748static void
749pxping_pcb_debug_print(struct ping_pcb *pcb)
750{
751 char addrbuf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
752 const char *addrstr;
753 int sdom = pcb->is_ipv6 ? AF_INET6 : AF_INET;
754
755 DPRINTF(("ping %p:", (void *)pcb));
756
757 addrstr = inet_ntop(sdom, (void *)&pcb->src, addrbuf, sizeof(addrbuf));
758 DPRINTF((" %s", addrstr));
759
760 DPRINTF((" ->"));
761
762 addrstr = inet_ntop(sdom, (void *)&pcb->dst, addrbuf, sizeof(addrbuf));
763 DPRINTF((" %s", addrstr));
764
765 DPRINTF((" id %04x->%04x", ntohs(pcb->guest_id), ntohs(pcb->host_id)));
766}
767
768
769static struct ping_pcb *
770pxping_pcb_allocate(struct pxping *pxping)
771{
772 struct ping_pcb *pcb;
773
774 if (pxping->npcbs >= PXPING_MAX_PCBS) {
775 return NULL;
776 }
777
778 pcb = (struct ping_pcb *)malloc(sizeof(*pcb));
779 if (pcb == NULL) {
780 return NULL;
781 }
782
783 ++pxping->npcbs;
784 return pcb;
785}
786
787
788static void
789pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb)
790{
791 LWIP_ASSERT1(pxping->npcbs > 0);
792 LWIP_ASSERT1(pcb->next == NULL);
793 LWIP_ASSERT1(pcb->pprev_timeout == NULL);
794
795 DPRINTF(("%s: ping %p\n", __func__, (void *)pcb));
796
797 --pxping->npcbs;
798 free(pcb);
799}
800
801
802static void
803pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb)
804{
805 struct ping_pcb **chain;
806
807 LWIP_ASSERT1(pcb->pprev_timeout == NULL);
808
809 chain = &pxping->timeout_list[pcb->timeout_slot];
810 if ((pcb->next_timeout = *chain) != NULL) {
811 (*chain)->pprev_timeout = &pcb->next_timeout;
812 }
813 *chain = pcb;
814 pcb->pprev_timeout = chain;
815}
816
817
818static void
819pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb)
820{
821 LWIP_UNUSED_ARG(pxping);
822
823 LWIP_ASSERT1(pcb->pprev_timeout != NULL);
824 if (pcb->next_timeout != NULL) {
825 pcb->next_timeout->pprev_timeout = pcb->pprev_timeout;
826 }
827 *pcb->pprev_timeout = pcb->next_timeout;
828 pcb->pprev_timeout = NULL;
829 pcb->next_timeout = NULL;
830}
831
832
833static void
834pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb)
835{
836 pcb->next = pxping->pcbs;
837 pxping->pcbs = pcb;
838
839 pxping_timeout_add(pxping, pcb);
840}
841
842
843static void
844pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb)
845{
846 struct ping_pcb **p;
847
848 for (p = &pxping->pcbs; *p != NULL; p = &(*p)->next) {
849 if (*p == pcb) {
850 *p = pcb->next;
851 pcb->next = NULL;
852 break;
853 }
854 }
855
856 pxping_timeout_del(pxping, pcb);
857}
858
859
860static struct ping_pcb *
861pxping_pcb_for_request(struct pxping *pxping,
862 int is_ipv6, ipX_addr_t *src, ipX_addr_t *dst,
863 u16_t guest_id)
864{
865 struct ping_pcb *pcb;
866
867 /* on lwip thread, so no concurrent updates */
868 for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) {
869 if (pcb->guest_id == guest_id
870 && pcb->is_ipv6 == is_ipv6
871 && ipX_addr_cmp(is_ipv6, &pcb->dst, dst)
872 && ipX_addr_cmp(is_ipv6, &pcb->src, src))
873 {
874 break;
875 }
876 }
877
878 if (pcb == NULL) {
879 int mapped;
880
881 pcb = pxping_pcb_allocate(pxping);
882 if (pcb == NULL) {
883 return NULL;
884 }
885
886 pcb->is_ipv6 = is_ipv6;
887 ipX_addr_copy(is_ipv6, pcb->src, *src);
888 ipX_addr_copy(is_ipv6, pcb->dst, *dst);
889
890 pcb->guest_id = guest_id;
891#ifdef RT_OS_WINDOWS
892# define random() (rand())
893#endif
894 pcb->host_id = random() & 0xffffUL;
895
896 pcb->pprev_timeout = NULL;
897 pcb->next_timeout = NULL;
898
899 if (is_ipv6) {
900 pcb->peer.sin6.sin6_family = AF_INET6;
901#if HAVE_SA_LEN
902 pcb->peer.sin6.sin6_len = sizeof(pcb->peer.sin6);
903#endif
904 pcb->peer.sin6.sin6_port = htons(IPPROTO_ICMPV6);
905 pcb->peer.sin6.sin6_flowinfo = 0;
906 mapped = pxremap_outbound_ip6((ip6_addr_t *)&pcb->peer.sin6.sin6_addr,
907 ipX_2_ip6(&pcb->dst));
908 }
909 else {
910 pcb->peer.sin.sin_family = AF_INET;
911#if HAVE_SA_LEN
912 pcb->peer.sin.sin_len = sizeof(pcb->peer.sin);
913#endif
914 pcb->peer.sin.sin_port = htons(IPPROTO_ICMP);
915 mapped = pxremap_outbound_ip4((ip_addr_t *)&pcb->peer.sin.sin_addr,
916 ipX_2_ip(&pcb->dst));
917 }
918
919 if (mapped == PXREMAP_FAILED) {
920 free(pcb);
921 return NULL;
922 }
923 else {
924 pcb->is_mapped = (mapped == PXREMAP_MAPPED);
925 }
926
927 pcb->timeout_slot = pxping->timeout_slot;
928
929 sys_mutex_lock(&pxping->lock);
930 pxping_pcb_register(pxping, pcb);
931 sys_mutex_unlock(&pxping->lock);
932
933 pxping_pcb_debug_print(pcb); /* XXX */
934 DPRINTF((" - created\n"));
935
936 pxping_timer_needed(pxping);
937 }
938 else {
939 /* just bump up expiration timeout lazily */
940 pxping_pcb_debug_print(pcb); /* XXX */
941 DPRINTF((" - slot %d -> %d\n",
942 (unsigned int)pcb->timeout_slot,
943 (unsigned int)pxping->timeout_slot));
944 pcb->timeout_slot = pxping->timeout_slot;
945 }
946
947 return pcb;
948}
949
950
951/**
952 * Called on pollmgr thread. Caller must do the locking since caller
953 * is going to use the returned pcb, which needs to be protected from
954 * being expired by pxping_timer() on lwip thread.
955 */
956static struct ping_pcb *
957pxping_pcb_for_reply(struct pxping *pxping,
958 int is_ipv6, ipX_addr_t *dst, u16_t host_id)
959{
960 struct ping_pcb *pcb;
961
962 for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) {
963 if (pcb->host_id == host_id
964 && pcb->is_ipv6 == is_ipv6
965 /* XXX: allow broadcast pings? */
966 && ipX_addr_cmp(is_ipv6, &pcb->dst, dst))
967 {
968 return pcb;
969 }
970 }
971
972 return NULL;
973}
974
975
976static void
977pxping_timer(void *arg)
978{
979 struct pxping *pxping = (struct pxping *)arg;
980 struct ping_pcb **chain, *pcb;
981
982 pxping->timer_active = 0;
983
984 /*
985 * New slot points to the list of pcbs to check for expiration.
986 */
987 LWIP_ASSERT1(pxping->timeout_slot < TIMEOUT);
988 if (++pxping->timeout_slot == TIMEOUT) {
989 pxping->timeout_slot = 0;
990 }
991
992 chain = &pxping->timeout_list[pxping->timeout_slot];
993 pcb = *chain;
994
995 /* protect from pollmgr concurrent reads */
996 sys_mutex_lock(&pxping->lock);
997
998 while (pcb != NULL) {
999 struct ping_pcb *xpcb = pcb;
1000 pcb = pcb->next_timeout;
1001
1002 if (xpcb->timeout_slot == pxping->timeout_slot) {
1003 /* expired */
1004 pxping_pcb_deregister(pxping, xpcb);
1005 pxping_pcb_delete(pxping, xpcb);
1006 }
1007 else {
1008 /*
1009 * If there was another request, we updated timeout_slot
1010 * but delayed actually moving the pcb until now.
1011 */
1012 pxping_timeout_del(pxping, xpcb); /* from current slot */
1013 pxping_timeout_add(pxping, xpcb); /* to new slot */
1014 }
1015 }
1016
1017 sys_mutex_unlock(&pxping->lock);
1018 pxping_timer_needed(pxping);
1019}
1020
1021
1022static void
1023pxping_timer_needed(struct pxping *pxping)
1024{
1025 if (!pxping->timer_active && pxping->pcbs != NULL) {
1026 pxping->timer_active = 1;
1027 sys_timeout(1 * 1000, pxping_timer, pxping);
1028 }
1029}
1030
1031
1032static int
1033pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1034{
1035 struct pxping *pxping;
1036
1037 pxping = (struct pxping *)handler->data;
1038 LWIP_ASSERT1(fd == pxping->sock4 || fd == pxping->sock6);
1039
1040 if (revents & ~(POLLIN|POLLERR)) {
1041 DPRINTF0(("%s: unexpected revents 0x%x\n", __func__, revents));
1042 return POLLIN;
1043 }
1044
1045 if (revents & POLLERR) {
1046 int sockerr = -1;
1047 socklen_t optlen = (socklen_t)sizeof(sockerr);
1048 int status;
1049
1050 status = getsockopt(fd, SOL_SOCKET,
1051 SO_ERROR, (char *)&sockerr, &optlen);
1052 if (status < 0) {
1053 DPRINTF(("%s: sock %d: SO_ERROR failed with errno %d\n",
1054 __func__, fd, errno));
1055 }
1056 else {
1057 DPRINTF(("%s: sock %d: errno %d\n",
1058 __func__, fd, sockerr));
1059 }
1060 }
1061
1062 if ((revents & POLLIN) == 0) {
1063 return POLLIN;
1064 }
1065
1066 if (fd == pxping->sock4) {
1067 pxping_pmgr_icmp4(pxping);
1068 }
1069 else /* fd == pxping->sock6 */ {
1070 pxping_pmgr_icmp6(pxping);
1071 }
1072
1073 return POLLIN;
1074}
1075
1076
1077/**
1078 * Process incoming ICMP message for the host.
1079 * NB: we will get a lot of spam here and have to sift through it.
1080 */
1081static void
1082pxping_pmgr_icmp4(struct pxping *pxping)
1083{
1084 struct sockaddr_in sin;
1085 socklen_t salen = sizeof(sin);
1086 ssize_t nread;
1087 struct ip_hdr *iph;
1088 struct icmp_echo_hdr *icmph;
1089 u16_t iplen;
1090
1091 memset(&sin, 0, sizeof(sin));
1092
1093 /*
1094 * Reads from raw IPv4 sockets deliver complete IP datagrams with
1095 * IP header included.
1096 */
1097 nread = recvfrom(pxping->sock4, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0,
1098 (struct sockaddr *)&sin, &salen);
1099 if (nread < 0) {
1100 perror(__func__);
1101 return;
1102 }
1103
1104 if (nread < IP_HLEN) {
1105 DPRINTF2(("%s: read %d bytes, IP header truncated\n",
1106 __func__, (unsigned int)nread));
1107 return;
1108 }
1109
1110 iph = (struct ip_hdr *)pollmgr_udpbuf;
1111
1112 /* match version */
1113 if (IPH_V(iph) != 4) {
1114 DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(iph)));
1115 return;
1116 }
1117
1118 /* no fragmentation */
1119 if ((IPH_OFFSET(iph) & PP_HTONS(IP_OFFMASK | IP_MF)) != 0) {
1120 DPRINTF2(("%s: dropping fragmented datagram\n", __func__));
1121 return;
1122 }
1123
1124 /* no options */
1125 if (IPH_HL(iph) * 4 != IP_HLEN) {
1126 DPRINTF2(("%s: dropping datagram with options (IP header length %d)\n",
1127 __func__, IPH_HL(iph) * 4));
1128 return;
1129 }
1130
1131 if (IPH_PROTO(iph) != IP_PROTO_ICMP) {
1132 DPRINTF2(("%s: unexpected protocol %d\n", __func__, IPH_PROTO(iph)));
1133 return;
1134 }
1135
1136 iplen = IPH_LEN(iph);
1137#if !defined(RT_OS_DARWIN)
1138 /* darwin reports IPH_LEN in host byte order */
1139 iplen = ntohs(iplen);
1140#endif
1141#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS)
1142 /* darwin and solaris change IPH_LEN to payload length only */
1143 iplen += IP_HLEN; /* we verified there are no options */
1144 IPH_LEN(iph) = htons(iplen);
1145#endif
1146 if (nread < iplen) {
1147 DPRINTF2(("%s: read %d bytes but total length is %d bytes\n",
1148 __func__, (unsigned int)nread, (unsigned int)iplen));
1149 return;
1150 }
1151
1152 if (iplen < IP_HLEN + ICMP_HLEN) {
1153 DPRINTF2(("%s: IP length %d bytes, ICMP header truncated\n",
1154 __func__, iplen));
1155 return;
1156 }
1157
1158 icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN);
1159 if (ICMPH_TYPE(icmph) == ICMP_ER) {
1160 pxping_pmgr_icmp4_echo(pxping, iplen, &sin);
1161 }
1162 else if (ICMPH_TYPE(icmph) == ICMP_DUR || ICMPH_TYPE(icmph) == ICMP_TE) {
1163 pxping_pmgr_icmp4_error(pxping, iplen, &sin);
1164 }
1165#if 1
1166 else {
1167 DPRINTF2(("%s: ignoring ICMP type %d\n", __func__, ICMPH_TYPE(icmph)));
1168 }
1169#endif
1170}
1171
1172
1173/**
1174 * Check if this incoming ICMP echo reply is for one of our pings and
1175 * forward it to the guest.
1176 */
1177static void
1178pxping_pmgr_icmp4_echo(struct pxping *pxping,
1179 u16_t iplen, struct sockaddr_in *peer)
1180{
1181 struct ip_hdr *iph;
1182 struct icmp_echo_hdr *icmph;
1183 u16_t id, seq;
1184 ip_addr_t guest_ip, target_ip;
1185 int mapped;
1186 struct ping_pcb *pcb;
1187 u16_t guest_id;
1188 u32_t sum;
1189
1190 iph = (struct ip_hdr *)pollmgr_udpbuf;
1191 icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN);
1192
1193 id = icmph->id;
1194 seq = icmph->seqno;
1195
1196 {
1197 char addrbuf[sizeof "255.255.255.255"];
1198 const char *addrstr;
1199
1200 addrstr = inet_ntop(AF_INET, &peer->sin_addr, addrbuf, sizeof(addrbuf));
1201 DPRINTF(("<--- PING %s id 0x%x seq %d\n",
1202 addrstr, ntohs(id), ntohs(seq)));
1203 }
1204
1205
1206 /*
1207 * Is this a reply to one of our pings?
1208 */
1209
1210 ip_addr_copy(target_ip, iph->src);
1211 mapped = pxremap_inbound_ip4(&target_ip, &target_ip);
1212 if (mapped == PXREMAP_FAILED) {
1213 return;
1214 }
1215 if (mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) {
1216 DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
1217 return;
1218 }
1219
1220 sys_mutex_lock(&pxping->lock);
1221 pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id);
1222 if (pcb == NULL) {
1223 sys_mutex_unlock(&pxping->lock);
1224 DPRINTF2(("%s: no match\n", __func__));
1225 return;
1226 }
1227
1228 DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
1229
1230 /* save info before unlocking since pcb may expire */
1231 ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src));
1232 guest_id = pcb->guest_id;
1233
1234 sys_mutex_unlock(&pxping->lock);
1235
1236
1237 /*
1238 * Rewrite headers and forward to guest.
1239 */
1240
1241 /* rewrite ICMP echo header */
1242 sum = (u16_t)~icmph->chksum;
1243 sum += chksum_update_16(&icmph->id, guest_id);
1244 sum = FOLD_U32T(sum);
1245 icmph->chksum = ~sum;
1246
1247 /* rewrite IP header */
1248 sum = (u16_t)~IPH_CHKSUM(iph);
1249 sum += chksum_update_32((u32_t *)&iph->dest,
1250 ip4_addr_get_u32(&guest_ip));
1251 if (mapped == PXREMAP_MAPPED) {
1252 sum += chksum_update_32((u32_t *)&iph->src,
1253 ip4_addr_get_u32(&target_ip));
1254 }
1255 else {
1256 IPH_TTL_SET(iph, IPH_TTL(iph) - 1);
1257 sum += PP_NTOHS(~0x0100);
1258 }
1259 sum = FOLD_U32T(sum);
1260 IPH_CHKSUM_SET(iph, ~sum);
1261
1262 pxping_pmgr_forward_inbound(pxping, iplen);
1263}
1264
1265
1266/**
1267 * Check if this incoming ICMP error (destination unreachable or time
1268 * exceeded) is about one of our pings and forward it to the guest.
1269 */
1270static void
1271pxping_pmgr_icmp4_error(struct pxping *pxping,
1272 u16_t iplen, struct sockaddr_in *peer)
1273{
1274 struct ip_hdr *iph, *oiph;
1275 struct icmp_echo_hdr *icmph, *oicmph;
1276 u16_t oipoff, oiphlen, oiplen;
1277 u16_t id, seq;
1278 ip_addr_t guest_ip, target_ip, error_ip;
1279 int target_mapped, error_mapped;
1280 struct ping_pcb *pcb;
1281 u16_t guest_id;
1282 u32_t sum;
1283
1284 iph = (struct ip_hdr *)pollmgr_udpbuf;
1285 icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN);
1286
1287 /*
1288 * Inner IP datagram is not checked by the kernel and may be
1289 * anything, possibly malicious.
1290 */
1291
1292 oipoff = IP_HLEN + ICMP_HLEN;
1293 oiplen = iplen - oipoff; /* NB: truncated length, not IPH_LEN(oiph) */
1294 if (oiplen < IP_HLEN) {
1295 DPRINTF2(("%s: original datagram truncated to %d bytes\n",
1296 __func__, oiplen));
1297 }
1298
1299 /* IP header of the original message */
1300 oiph = (struct ip_hdr *)(pollmgr_udpbuf + oipoff);
1301
1302 /* match version */
1303 if (IPH_V(oiph) != 4) {
1304 DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(oiph)));
1305 return;
1306 }
1307
1308 /* can't match fragments except the first one */
1309 if ((IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK)) != 0) {
1310 DPRINTF2(("%s: ignoring fragment with offset %d\n",
1311 __func__, ntohs(IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK))));
1312 return;
1313 }
1314
1315 if (IPH_PROTO(oiph) != IP_PROTO_ICMP) {
1316#if 0
1317 /* don't spam with every "destination unreachable" in the system */
1318 DPRINTF2(("%s: ignoring protocol %d\n", __func__, IPH_PROTO(oiph)));
1319#endif
1320 return;
1321 }
1322
1323 oiphlen = IPH_HL(oiph) * 4;
1324 if (oiplen < oiphlen + ICMP_HLEN) {
1325 DPRINTF2(("%s: original datagram truncated to %d bytes\n",
1326 __func__, oiplen));
1327 return;
1328 }
1329
1330 oicmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + oipoff + oiphlen);
1331 if (ICMPH_TYPE(oicmph) != ICMP_ECHO) {
1332 DPRINTF2(("%s: ignoring ICMP error for original ICMP type %d\n",
1333 __func__, ICMPH_TYPE(oicmph)));
1334 return;
1335 }
1336
1337 id = oicmph->id;
1338 seq = oicmph->seqno;
1339
1340 {
1341 char addrbuf[sizeof "255.255.255.255"];
1342 const char *addrstr;
1343
1344 addrstr = inet_ntop(AF_INET, &oiph->dest, addrbuf, sizeof(addrbuf));
1345 DPRINTF2(("%s: ping %s id 0x%x seq %d",
1346 __func__, addrstr, ntohs(id), ntohs(seq)));
1347 if (ICMPH_TYPE(icmph) == ICMP_DUR) {
1348 DPRINTF2((" unreachable (code %d)\n", ICMPH_CODE(icmph)));
1349 }
1350 else {
1351 DPRINTF2((" time exceeded\n"));
1352 }
1353 }
1354
1355
1356 /*
1357 * Is the inner (failed) datagram one of our pings?
1358 */
1359
1360 ip_addr_copy(target_ip, oiph->dest); /* inner (failed) */
1361 target_mapped = pxremap_inbound_ip4(&target_ip, &target_ip);
1362 if (target_mapped == PXREMAP_FAILED) {
1363 return;
1364 }
1365
1366 sys_mutex_lock(&pxping->lock);
1367 pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id);
1368 if (pcb == NULL) {
1369 sys_mutex_unlock(&pxping->lock);
1370 DPRINTF2(("%s: no match\n", __func__));
1371 return;
1372 }
1373
1374 DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
1375
1376 /* save info before unlocking since pcb may expire */
1377 ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src));
1378 guest_id = pcb->guest_id;
1379
1380 sys_mutex_unlock(&pxping->lock);
1381
1382
1383 /*
1384 * Rewrite both inner and outer headers and forward to guest.
1385 * Note that the checksum of the outer ICMP error message is
1386 * preserved by the changes we do to inner headers.
1387 */
1388
1389 ip_addr_copy(error_ip, iph->src); /* node that reports the error */
1390 error_mapped = pxremap_inbound_ip4(&error_ip, &error_ip);
1391 if (error_mapped == PXREMAP_FAILED) {
1392 return;
1393 }
1394 if (error_mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) {
1395 DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
1396 return;
1397 }
1398
1399 /* rewrite inner ICMP echo header */
1400 sum = (u16_t)~oicmph->chksum;
1401 sum += chksum_update_16(&oicmph->id, guest_id);
1402 sum = FOLD_U32T(sum);
1403 oicmph->chksum = ~sum;
1404
1405 /* rewrite inner IP header */
1406 sum = (u16_t)~IPH_CHKSUM(oiph);
1407 sum += chksum_update_32((u32_t *)&oiph->src, ip4_addr_get_u32(&guest_ip));
1408 if (target_mapped == PXREMAP_MAPPED) {
1409 sum += chksum_update_32((u32_t *)&oiph->dest, ip4_addr_get_u32(&target_ip));
1410 }
1411 sum = FOLD_U32T(sum);
1412 IPH_CHKSUM_SET(oiph, ~sum);
1413
1414 /* rewrite outer IP header */
1415 sum = (u16_t)~IPH_CHKSUM(iph);
1416 sum += chksum_update_32((u32_t *)&iph->dest, ip4_addr_get_u32(&guest_ip));
1417 if (error_mapped == PXREMAP_MAPPED) {
1418 sum += chksum_update_32((u32_t *)&iph->src, ip4_addr_get_u32(&error_ip));
1419 }
1420 else {
1421 IPH_TTL_SET(iph, IPH_TTL(iph) - 1);
1422 sum += PP_NTOHS(~0x0100);
1423 }
1424 sum = FOLD_U32T(sum);
1425 IPH_CHKSUM_SET(iph, ~sum);
1426
1427 pxping_pmgr_forward_inbound(pxping, iplen);
1428}
1429
1430
1431/**
1432 * Process incoming ICMPv6 message for the host.
1433 * NB: we will get a lot of spam here and have to sift through it.
1434 */
1435static void
1436pxping_pmgr_icmp6(struct pxping *pxping)
1437{
1438#ifndef RT_OS_WINDOWS
1439 struct msghdr mh;
1440 ssize_t nread;
1441#else
1442 WSAMSG mh;
1443 DWORD nread;
1444#endif
1445 IOVEC iov[1];
1446 static u8_t cmsgbuf[128];
1447 struct cmsghdr *cmh;
1448 struct sockaddr_in6 sin6;
1449 socklen_t salen = sizeof(sin6);
1450 struct icmp6_echo_hdr *icmph;
1451 struct in6_pktinfo *pktinfo;
1452 int hopl, tclass;
1453 int status;
1454
1455 char addrbuf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
1456 const char *addrstr;
1457
1458 /*
1459 * Reads from raw IPv6 sockets deliver only the payload. Full
1460 * headers are available via recvmsg(2)/cmsg(3).
1461 */
1462 IOVEC_SET_BASE(iov[0], pollmgr_udpbuf);
1463 IOVEC_SET_LEN(iov[0], sizeof(pollmgr_udpbuf));
1464
1465 memset(&mh, 0, sizeof(mh));
1466#ifndef RT_OS_WINDOWS
1467 mh.msg_name = &sin6;
1468 mh.msg_namelen = sizeof(sin6);
1469 mh.msg_iov = iov;
1470 mh.msg_iovlen = 1;
1471 mh.msg_control = cmsgbuf;
1472 mh.msg_controllen = sizeof(cmsgbuf);
1473 mh.msg_flags = 0;
1474
1475 nread = recvmsg(pxping->sock6, &mh, 0);
1476 if (nread < 0) {
1477 perror(__func__);
1478 return;
1479 }
1480#else /* RT_OS_WINDOWS */
1481 mh.name = (LPSOCKADDR)&sin6;
1482 mh.namelen = sizeof(sin6);
1483 mh.lpBuffers = iov;
1484 mh.dwBufferCount = 1;
1485 mh.Control.buf = cmsgbuf;
1486 mh.Control.len = sizeof(cmsgbuf);
1487 mh.dwFlags = 0;
1488
1489 status = (*pxping->pfWSARecvMsg6)(pxping->sock6, &mh, &nread, NULL, NULL);
1490 if (status == SOCKET_ERROR) {
1491 DPRINTF2(("%s: error %d\n", __func__, WSAGetLastError()));
1492 return;
1493 }
1494#endif
1495
1496 icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf;
1497
1498 addrstr = inet_ntop(AF_INET6, (void *)&sin6.sin6_addr,
1499 addrbuf, sizeof(addrbuf));
1500 DPRINTF2(("%s: %s ICMPv6: ", __func__, addrstr));
1501
1502 if (icmph->type == ICMP6_TYPE_EREP) {
1503 DPRINTF2(("echo reply %04x %u\n",
1504 (unsigned int)icmph->id, (unsigned int)icmph->seqno));
1505 }
1506 else { /* XXX */
1507 if (icmph->type == ICMP6_TYPE_EREQ) {
1508 DPRINTF2(("echo request %04x %u\n",
1509 (unsigned int)icmph->id, (unsigned int)icmph->seqno));
1510 }
1511 else if (icmph->type == ICMP6_TYPE_DUR) {
1512 DPRINTF2(("destination unreachable\n"));
1513 }
1514 else if (icmph->type == ICMP6_TYPE_PTB) {
1515 DPRINTF2(("packet too big\n"));
1516 }
1517 else if (icmph->type == ICMP6_TYPE_TE) {
1518 DPRINTF2(("time exceeded\n"));
1519 }
1520 else if (icmph->type == ICMP6_TYPE_PP) {
1521 DPRINTF2(("parameter problem\n"));
1522 }
1523 else {
1524 DPRINTF2(("type %d len %u\n", icmph->type, (unsigned int)nread));
1525 }
1526
1527 if (icmph->type >= ICMP6_TYPE_EREQ) {
1528 return; /* informational message */
1529 }
1530 }
1531
1532 pktinfo = NULL;
1533 hopl = -1;
1534 tclass = -1;
1535 for (cmh = CMSG_FIRSTHDR(&mh); cmh != NULL; cmh = CMSG_NXTHDR(&mh, cmh)) {
1536 if (cmh->cmsg_len == 0)
1537 break;
1538
1539 if (cmh->cmsg_level == IPPROTO_IPV6
1540 && cmh->cmsg_type == IPV6_HOPLIMIT
1541 && cmh->cmsg_len == CMSG_LEN(sizeof(int)))
1542 {
1543 hopl = *(int *)CMSG_DATA(cmh);
1544 DPRINTF2(("hoplimit = %d\n", hopl));
1545 }
1546
1547 if (cmh->cmsg_level == IPPROTO_IPV6
1548 && cmh->cmsg_type == IPV6_PKTINFO
1549 && cmh->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo)))
1550 {
1551 pktinfo = (struct in6_pktinfo *)CMSG_DATA(cmh);
1552 DPRINTF2(("pktinfo found\n"));
1553 }
1554 }
1555
1556 if (pktinfo == NULL) {
1557 /*
1558 * ip6_output_if() doesn't do checksum for us so we need to
1559 * manually recompute it - for this we must know the
1560 * destination address of the pseudo-header that we will
1561 * rewrite with guest's address. (TODO: yeah, yeah, we can
1562 * compute it from scratch...)
1563 */
1564 DPRINTF2(("%s: unable to get pktinfo\n", __func__));
1565 return;
1566 }
1567
1568 if (hopl < 0) {
1569 hopl = LWIP_ICMP6_HL;
1570 }
1571
1572 if (icmph->type == ICMP6_TYPE_EREP) {
1573 pxping_pmgr_icmp6_echo(pxping,
1574 (ip6_addr_t *)&sin6.sin6_addr,
1575 (ip6_addr_t *)&pktinfo->ipi6_addr,
1576 hopl, tclass, (u16_t)nread);
1577 }
1578 else if (icmph->type < ICMP6_TYPE_EREQ) {
1579 pxping_pmgr_icmp6_error(pxping,
1580 (ip6_addr_t *)&sin6.sin6_addr,
1581 (ip6_addr_t *)&pktinfo->ipi6_addr,
1582 hopl, tclass, (u16_t)nread);
1583 }
1584}
1585
1586
1587/**
1588 * Check if this incoming ICMPv6 echo reply is for one of our pings
1589 * and forward it to the guest.
1590 */
1591static void
1592pxping_pmgr_icmp6_echo(struct pxping *pxping,
1593 ip6_addr_t *src, ip6_addr_t *dst,
1594 int hopl, int tclass, u16_t icmplen)
1595{
1596 struct icmp6_echo_hdr *icmph;
1597 ip6_addr_t guest_ip, target_ip;
1598 int mapped;
1599 struct ping_pcb *pcb;
1600 u16_t id, guest_id;
1601 u32_t sum;
1602
1603 ip6_addr_copy(target_ip, *src);
1604 mapped = pxremap_inbound_ip6(&target_ip, &target_ip);
1605 if (mapped == PXREMAP_FAILED) {
1606 return;
1607 }
1608 else if (mapped == PXREMAP_ASIS) {
1609 if (hopl == 1) {
1610 DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
1611 return;
1612 }
1613 --hopl;
1614 }
1615
1616 icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf;
1617 id = icmph->id;
1618
1619 sys_mutex_lock(&pxping->lock);
1620 pcb = pxping_pcb_for_reply(pxping, 1, ip6_2_ipX(&target_ip), id);
1621 if (pcb == NULL) {
1622 sys_mutex_unlock(&pxping->lock);
1623 DPRINTF2(("%s: no match\n", __func__));
1624 return;
1625 }
1626
1627 DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
1628
1629 /* save info before unlocking since pcb may expire */
1630 ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src));
1631 guest_id = pcb->guest_id;
1632
1633 sys_mutex_unlock(&pxping->lock);
1634
1635 /* rewrite ICMPv6 echo header */
1636 sum = (u16_t)~icmph->chksum;
1637 sum += chksum_update_16(&icmph->id, guest_id);
1638 sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */
1639 if (mapped) {
1640 sum += chksum_delta_ipv6(src, &target_ip); /* pseudo */
1641 }
1642 sum = FOLD_U32T(sum);
1643 icmph->chksum = ~sum;
1644
1645 pxping_pmgr_forward_inbound6(pxping,
1646 &target_ip, /* echo reply src */
1647 &guest_ip, /* echo reply dst */
1648 hopl, tclass, icmplen);
1649}
1650
1651
1652/**
1653 * Check if this incoming ICMPv6 error is about one of our pings and
1654 * forward it to the guest.
1655 */
1656static void
1657pxping_pmgr_icmp6_error(struct pxping *pxping,
1658 ip6_addr_t *src, ip6_addr_t *dst,
1659 int hopl, int tclass, u16_t icmplen)
1660{
1661 struct icmp6_hdr *icmph;
1662 u8_t *bufptr;
1663 size_t buflen, hlen;
1664 int proto;
1665 struct ip6_hdr *oiph;
1666 struct icmp6_echo_hdr *oicmph;
1667 struct ping_pcb *pcb;
1668 ip6_addr_t guest_ip, target_ip, error_ip;
1669 int target_mapped, error_mapped;
1670 u16_t guest_id;
1671 u32_t sum;
1672
1673 icmph = (struct icmp6_hdr *)pollmgr_udpbuf;
1674
1675 /*
1676 * Inner IP datagram is not checked by the kernel and may be
1677 * anything, possibly malicious.
1678 */
1679 oiph = NULL;
1680 oicmph = NULL;
1681
1682 bufptr = pollmgr_udpbuf;
1683 buflen = icmplen;
1684
1685 hlen = sizeof(*icmph);
1686 proto = IP6_NEXTH_ENCAPS; /* i.e. IPv6, lwIP's name is unfortuate */
1687 for (;;) {
1688 if (hlen > buflen) {
1689 DPRINTF2(("truncated datagram inside ICMPv6 error message is too short\n"));
1690 return;
1691 }
1692 buflen -= hlen;
1693 bufptr += hlen;
1694
1695 if (proto == IP6_NEXTH_ENCAPS && oiph == NULL) { /* outermost IPv6 */
1696 oiph = (struct ip6_hdr *)bufptr;
1697 if (IP6H_V(oiph) != 6) {
1698 DPRINTF2(("%s: unexpected IP version %d\n", __func__, IP6H_V(oiph)));
1699 return;
1700 }
1701
1702 proto = IP6H_NEXTH(oiph);
1703 hlen = IP6_HLEN;
1704 }
1705 else if (proto == IP6_NEXTH_ICMP6) {
1706 oicmph = (struct icmp6_echo_hdr *)bufptr;
1707 break;
1708 }
1709 else if (proto == IP6_NEXTH_ROUTING
1710 || proto == IP6_NEXTH_HOPBYHOP
1711 || proto == IP6_NEXTH_DESTOPTS)
1712 {
1713 proto = bufptr[0];
1714 hlen = (bufptr[1] + 1) * 8;
1715 }
1716 else {
1717 DPRINTF2(("%s: stopping at protocol %d\n", __func__, proto));
1718 break;
1719 }
1720 }
1721
1722 if (oiph == NULL || oicmph == NULL) {
1723 return;
1724 }
1725
1726 if (buflen < sizeof(*oicmph)) {
1727 DPRINTF2(("%s: original ICMPv6 is truncated too short\n", __func__));
1728 return;
1729 }
1730
1731 if (oicmph->type != ICMP6_TYPE_EREQ) {
1732 DPRINTF2(("%s: ignoring original ICMPv6 type %d\n", __func__, oicmph->type));
1733 return;
1734 }
1735
1736 memcpy(&target_ip, &oiph->dest, sizeof(target_ip)); /* inner (failed) */
1737 target_mapped = pxremap_inbound_ip6(&target_ip, &target_ip);
1738 if (target_mapped == PXREMAP_FAILED) {
1739 return;
1740 }
1741
1742 sys_mutex_lock(&pxping->lock);
1743 pcb = pxping_pcb_for_reply(pxping, 1, ip_2_ipX(&target_ip), oicmph->id);
1744 if (pcb == NULL) {
1745 sys_mutex_unlock(&pxping->lock);
1746 DPRINTF2(("%s: no match\n", __func__));
1747 return;
1748 }
1749
1750 DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
1751
1752 /* save info before unlocking since pcb may expire */
1753 ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src));
1754 guest_id = pcb->guest_id;
1755
1756 sys_mutex_unlock(&pxping->lock);
1757
1758
1759 /*
1760 * Rewrite inner and outer headers and forward to guest. Note
1761 * that IPv6 has no IP header checksum, but uses pseudo-header for
1762 * ICMPv6, so we update both in one go, adjusting ICMPv6 checksum
1763 * as we rewrite IP header.
1764 */
1765
1766 ip6_addr_copy(error_ip, *src); /* node that reports the error */
1767 error_mapped = pxremap_inbound_ip6(&error_ip, &error_ip);
1768 if (error_mapped == PXREMAP_FAILED) {
1769 return;
1770 }
1771 if (error_mapped == PXREMAP_ASIS && hopl == 1) {
1772 DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
1773 return;
1774 }
1775
1776 /* rewrite inner ICMPv6 echo header and inner IPv6 header */
1777 sum = (u16_t)~oicmph->chksum;
1778 sum += chksum_update_16(&oicmph->id, guest_id);
1779 sum += chksum_update_ipv6((ip6_addr_t *)&oiph->src, &guest_ip);
1780 if (target_mapped) {
1781 sum += chksum_delta_ipv6((ip6_addr_t *)&oiph->dest, &target_ip);
1782 }
1783 sum = FOLD_U32T(sum);
1784 oicmph->chksum = ~sum;
1785
1786 /* rewrite outer ICMPv6 error header */
1787 sum = (u16_t)~icmph->chksum;
1788 sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */
1789 if (error_mapped) {
1790 sum += chksum_delta_ipv6(src, &error_ip); /* pseudo */
1791 }
1792 sum = FOLD_U32T(sum);
1793 icmph->chksum = ~sum;
1794
1795 pxping_pmgr_forward_inbound6(pxping,
1796 &error_ip, /* error src */
1797 &guest_ip, /* error dst */
1798 hopl, tclass, icmplen);
1799}
1800
1801
1802/**
1803 * Hand off ICMP datagram to the lwip thread where it will be
1804 * forwarded to the guest.
1805 *
1806 * We no longer need ping_pcb. The pcb may get expired on the lwip
1807 * thread, but we have already patched necessary information into the
1808 * datagram.
1809 */
1810static void
1811pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen)
1812{
1813 struct pbuf *p;
1814 struct ping_msg *msg;
1815 err_t error;
1816
1817 p = pbuf_alloc(PBUF_LINK, iplen, PBUF_RAM);
1818 if (p == NULL) {
1819 DPRINTF(("%s: pbuf_alloc(%d) failed\n",
1820 __func__, (unsigned int)iplen));
1821 return;
1822 }
1823
1824 error = pbuf_take(p, pollmgr_udpbuf, iplen);
1825 if (error != ERR_OK) {
1826 DPRINTF(("%s: pbuf_take(%d) failed\n",
1827 __func__, (unsigned int)iplen));
1828 pbuf_free(p);
1829 return;
1830 }
1831
1832 msg = (struct ping_msg *)malloc(sizeof(*msg));
1833 if (msg == NULL) {
1834 pbuf_free(p);
1835 return;
1836 }
1837
1838 msg->msg.type = TCPIP_MSG_CALLBACK_STATIC;
1839 msg->msg.sem = NULL;
1840 msg->msg.msg.cb.function = pxping_pcb_forward_inbound;
1841 msg->msg.msg.cb.ctx = (void *)msg;
1842
1843 msg->pxping = pxping;
1844 msg->p = p;
1845
1846 proxy_lwip_post(&msg->msg);
1847}
1848
1849
1850static void
1851pxping_pcb_forward_inbound(void *arg)
1852{
1853 struct ping_msg *msg = (struct ping_msg *)arg;
1854 err_t error;
1855
1856 LWIP_ASSERT1(msg != NULL);
1857 LWIP_ASSERT1(msg->pxping != NULL);
1858 LWIP_ASSERT1(msg->p != NULL);
1859
1860 error = ip_raw_output_if(msg->p, msg->pxping->netif);
1861 if (error != ERR_OK) {
1862 DPRINTF(("%s: ip_output_if: %s\n",
1863 __func__, proxy_lwip_strerr(error)));
1864 }
1865 pbuf_free(msg->p);
1866 free(msg);
1867}
1868
1869
1870static void
1871pxping_pmgr_forward_inbound6(struct pxping *pxping,
1872 ip6_addr_t *src, ip6_addr_t *dst,
1873 u8_t hopl, u8_t tclass,
1874 u16_t icmplen)
1875{
1876 struct pbuf *p;
1877 struct ping6_msg *msg;
1878
1879 err_t error;
1880
1881 p = pbuf_alloc(PBUF_IP, icmplen, PBUF_RAM);
1882 if (p == NULL) {
1883 DPRINTF(("%s: pbuf_alloc(%d) failed\n",
1884 __func__, (unsigned int)icmplen));
1885 return;
1886 }
1887
1888 error = pbuf_take(p, pollmgr_udpbuf, icmplen);
1889 if (error != ERR_OK) {
1890 DPRINTF(("%s: pbuf_take(%d) failed\n",
1891 __func__, (unsigned int)icmplen));
1892 pbuf_free(p);
1893 return;
1894 }
1895
1896 msg = (struct ping6_msg *)malloc(sizeof(*msg));
1897 if (msg == NULL) {
1898 pbuf_free(p);
1899 return;
1900 }
1901
1902 msg->msg.type = TCPIP_MSG_CALLBACK_STATIC;
1903 msg->msg.sem = NULL;
1904 msg->msg.msg.cb.function = pxping_pcb_forward_inbound6;
1905 msg->msg.msg.cb.ctx = (void *)msg;
1906
1907 msg->pxping = pxping;
1908 msg->p = p;
1909 ip6_addr_copy(msg->src, *src);
1910 ip6_addr_copy(msg->dst, *dst);
1911 msg->hopl = hopl;
1912 msg->tclass = tclass;
1913
1914 proxy_lwip_post(&msg->msg);
1915}
1916
1917
1918static void
1919pxping_pcb_forward_inbound6(void *arg)
1920{
1921 struct ping6_msg *msg = (struct ping6_msg *)arg;
1922 err_t error;
1923
1924 LWIP_ASSERT1(msg != NULL);
1925 LWIP_ASSERT1(msg->pxping != NULL);
1926 LWIP_ASSERT1(msg->p != NULL);
1927
1928 error = ip6_output_if(msg->p,
1929 &msg->src, &msg->dst, msg->hopl, msg->tclass,
1930 IP6_NEXTH_ICMP6, msg->pxping->netif);
1931 if (error != ERR_OK) {
1932 DPRINTF(("%s: ip6_output_if: %s\n",
1933 __func__, proxy_lwip_strerr(error)));
1934 }
1935 pbuf_free(msg->p);
1936 free(msg);
1937}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette