VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/ip_icmpwin.c@ 88525

Last change on this file since 88525 was 88525, checked in by vboxsync, 4 years ago

NAT: Handle IP options in the ping echo request. bugref:9986.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Data Id Revision
File size: 14.6 KB
Line 
1/* $Id: ip_icmpwin.c 88525 2021-04-15 11:40:05Z vboxsync $ */
2/** @file
3 * NAT - Windows ICMP API based ping proxy.
4 */
5
6/*
7 * Copyright (C) 2006-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18#include "slirp.h"
19#include "ip_icmp.h"
20
21#include <winternl.h> /* for PIO_APC_ROUTINE &c */
22#ifndef PIO_APC_ROUTINE_DEFINED
23# define PIO_APC_ROUTINE_DEFINED 1
24#endif
25#include <iprt/win/iphlpapi.h>
26#include <icmpapi.h>
27
28/*
29 * A header of ICMP ECHO. Intended for storage, unlike struct icmp
30 * which is intended to be overlayed onto a buffer.
31 */
32struct icmp_echo {
33 uint8_t icmp_type;
34 uint8_t icmp_code;
35 uint16_t icmp_cksum;
36 uint16_t icmp_echo_id;
37 uint16_t icmp_echo_seq;
38};
39
40AssertCompileSize(struct icmp_echo, 8);
41
42
43struct pong {
44 PNATState pData;
45
46 TAILQ_ENTRY(pong) queue_entry;
47
48 union {
49 struct ip ip;
50 uint8_t au[60];
51 } reqiph;
52 struct icmp_echo reqicmph;
53
54 size_t bufsize;
55 uint8_t buf[1];
56};
57
58
59static VOID WINAPI icmpwin_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved);
60static VOID WINAPI icmpwin_callback_old(void *ctx);
61
62static void icmpwin_callback(struct pong *pong);
63static void icmpwin_pong(struct pong *pong);
64
65static struct mbuf *icmpwin_get_error(struct pong *pong, int type, int code);
66static struct mbuf *icmpwin_get_mbuf(PNATState pData, size_t reqsize);
67
68
69/*
70 * On Windows XP and Windows Server 2003 IcmpSendEcho2() callback
71 * is FARPROC, but starting from Vista it's PIO_APC_ROUTINE with
72 * two extra arguments. Callbacks use WINAPI (stdcall) calling
73 * convention with callee responsible for popping the arguments,
74 * so to avoid stack corruption we check windows version at run
75 * time and provide correct callback.
76 *
77 * XXX: this is system-wide, but what about multiple NAT threads?
78 */
79static PIO_APC_ROUTINE g_pfnIcmpCallback;
80
81
82int
83icmpwin_init(PNATState pData)
84{
85 if (g_pfnIcmpCallback == NULL)
86 {
87 OSVERSIONINFO osvi;
88 int status;
89
90 ZeroMemory(&osvi, sizeof(OSVERSIONINFO));
91 osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
92 status = GetVersionEx(&osvi);
93 if (status == 0)
94 return 1;
95
96 if (osvi.dwMajorVersion >= 6)
97 g_pfnIcmpCallback = icmpwin_callback_apc;
98 else
99 g_pfnIcmpCallback = (PIO_APC_ROUTINE)icmpwin_callback_old;
100 }
101
102 TAILQ_INIT(&pData->pongs_expected);
103 TAILQ_INIT(&pData->pongs_received);
104
105 pData->icmp_socket.sh = IcmpCreateFile();
106 pData->phEvents[VBOX_ICMP_EVENT_INDEX] = CreateEvent(NULL, FALSE, FALSE, NULL);
107
108 return 0;
109}
110
111
112void
113icmpwin_finit(PNATState pData)
114{
115 IcmpCloseHandle(pData->icmp_socket.sh);
116
117 while (!TAILQ_EMPTY(&pData->pongs_received)) {
118 struct pong *pong = TAILQ_FIRST(&pData->pongs_received);
119 TAILQ_REMOVE(&pData->pongs_received, pong, queue_entry);
120 RTMemFree(pong);
121 }
122
123 /* this should be empty */
124 while (!TAILQ_EMPTY(&pData->pongs_expected)) {
125 struct pong *pong = TAILQ_FIRST(&pData->pongs_expected);
126 TAILQ_REMOVE(&pData->pongs_expected, pong, queue_entry);
127 pong->pData = NULL;
128 }
129}
130
131
132/*
133 * Outgoing ping from guest.
134 */
135void
136icmpwin_ping(PNATState pData, struct mbuf *m, int hlen)
137{
138 struct ip *ip = mtod(m, struct ip *);
139 size_t reqsize, pongsize;
140 uint8_t ttl;
141 size_t bufsize;
142 struct pong *pong;
143 IPAddr dst;
144 IP_OPTION_INFORMATION opts;
145 void *reqdata;
146 int status;
147
148 ttl = ip->ip_ttl;
149 AssertReturnVoid(ttl > 0);
150
151 size_t hdrsize = hlen + sizeof(struct icmp_echo);
152 reqsize = ip->ip_len - hdrsize;
153
154 bufsize = sizeof(ICMP_ECHO_REPLY);
155 if (reqsize < sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo))
156 bufsize += sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo);
157 else
158 bufsize += reqsize;
159 bufsize += 16; /* whatever that is; empirically at least XP needs it */
160
161 pongsize = RT_UOFFSETOF(struct pong, buf) + bufsize;
162 if (pData->cbIcmpPending + pongsize > 1024 * 1024)
163 return;
164
165 pong = RTMemAlloc(pongsize);
166 if (RT_UNLIKELY(pong == NULL))
167 return;
168
169 pong->pData = pData;
170 pong->bufsize = bufsize;
171 m_copydata(m, 0, hlen, (caddr_t)&pong->reqiph);
172 m_copydata(m, hlen, sizeof(struct icmp_echo), (caddr_t)&pong->reqicmph);
173 AssertReturnVoid(pong->reqicmph.icmp_type == ICMP_ECHO);
174
175 if (m->m_next == NULL)
176 {
177 /* already in single contiguous buffer */
178 reqdata = mtod(m, char *) + hdrsize;
179 }
180 else
181 {
182 /* use reply buffer as temporary storage */
183 reqdata = pong->buf;
184 m_copydata(m, (int)hdrsize, (int)reqsize, reqdata);
185 }
186
187 dst = ip->ip_dst.s_addr;
188
189 opts.Ttl = ttl;
190 opts.Tos = ip->ip_tos; /* affected by DisableUserTOSSetting key */
191 opts.Flags = (ip->ip_off & IP_DF) != 0 ? IP_FLAG_DF : 0;
192 opts.OptionsSize = 0;
193 opts.OptionsData = 0;
194
195
196 status = IcmpSendEcho2(pData->icmp_socket.sh, NULL,
197 g_pfnIcmpCallback, pong,
198 dst, reqdata, (WORD)reqsize, &opts,
199 pong->buf, (DWORD)pong->bufsize,
200 5 * 1000 /* ms */);
201
202 if (RT_UNLIKELY(status != 0))
203 {
204 Log2(("NAT: IcmpSendEcho2: unexpected status %d\n", status));
205 }
206 else if ((status = GetLastError()) != ERROR_IO_PENDING)
207 {
208 int code;
209
210 Log2(("NAT: IcmpSendEcho2: error %d\n", status));
211 switch (status) {
212 case ERROR_NETWORK_UNREACHABLE:
213 code = ICMP_UNREACH_NET;
214 break;
215 case ERROR_HOST_UNREACHABLE:
216 code = ICMP_UNREACH_HOST;
217 break;
218 default:
219 code = -1;
220 break;
221 }
222
223 if (code != -1) /* send icmp error */
224 {
225 struct mbuf *em = icmpwin_get_error(pong, ICMP_UNREACH, code);
226 if (em != NULL)
227 {
228 struct ip *eip = mtod(em, struct ip *);
229 eip->ip_src = alias_addr;
230 ip_output(pData, NULL, em);
231 }
232 }
233 }
234 else /* success */
235 {
236 Log2(("NAT: pong %p for ping %RTnaipv4 id 0x%04x seq %d len %zu (%zu)\n",
237 pong, dst,
238 RT_N2H_U16(pong->reqicmph.icmp_echo_id),
239 RT_N2H_U16(pong->reqicmph.icmp_echo_seq),
240 pongsize, reqsize));
241
242 pData->cbIcmpPending += pongsize;
243 TAILQ_INSERT_TAIL(&pData->pongs_expected, pong, queue_entry);
244 pong = NULL; /* callback owns it now */
245 }
246
247 if (pong != NULL)
248 RTMemFree(pong);
249}
250
251
252static VOID WINAPI
253icmpwin_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved)
254{
255 struct pong *pong = (struct pong *)ctx;
256 if (pong != NULL)
257 icmpwin_callback(pong);
258 RT_NOREF2(iob, reserved);
259}
260
261
262static VOID WINAPI
263icmpwin_callback_old(void *ctx)
264{
265 struct pong *pong = (struct pong *)ctx;
266 if (pong != NULL)
267 icmpwin_callback(pong);
268}
269
270
271/*
272 * Actual callback code for IcmpSendEcho2(). OS version specific
273 * trampoline will free "pong" argument for us.
274 *
275 * Since async callback can be called anytime the thread is alertable,
276 * it's not safe to do any processing here. Instead queue it and
277 * notify the main loop.
278 */
279static void
280icmpwin_callback(struct pong *pong)
281{
282 PNATState pData = pong->pData;
283
284 if (pData == NULL)
285 {
286 RTMemFree(pong);
287 return;
288 }
289
290#ifdef DEBUG
291 {
292 struct pong *expected, *already;
293
294 TAILQ_FOREACH(expected, &pData->pongs_expected, queue_entry)
295 {
296 if (expected == pong)
297 break;
298 }
299 Assert(expected);
300
301 TAILQ_FOREACH(already, &pData->pongs_received, queue_entry)
302 {
303 if (already == pong)
304 break;
305 }
306 Assert(!already);
307 }
308#endif
309
310 TAILQ_REMOVE(&pData->pongs_expected, pong, queue_entry);
311 TAILQ_INSERT_TAIL(&pData->pongs_received, pong, queue_entry);
312
313 WSASetEvent(pData->phEvents[VBOX_ICMP_EVENT_INDEX]);
314}
315
316
317void
318icmpwin_process(PNATState pData)
319{
320 struct pong_tailq pongs;
321
322 if (TAILQ_EMPTY(&pData->pongs_received))
323 return;
324
325 TAILQ_INIT(&pongs);
326 TAILQ_CONCAT(&pongs, &pData->pongs_received, queue_entry);
327
328 while (!TAILQ_EMPTY(&pongs)) {
329 struct pong *pong = TAILQ_FIRST(&pongs);
330 size_t sz;
331
332 sz = RT_UOFFSETOF(struct pong, buf) + pong->bufsize;
333 Assert(pData->cbIcmpPending >= sz);
334 pData->cbIcmpPending -= sz;
335
336 icmpwin_pong(pong);
337
338 TAILQ_REMOVE(&pongs, pong, queue_entry);
339 RTMemFree(pong);
340 }
341}
342
343
344void
345icmpwin_pong(struct pong *pong)
346{
347 PNATState pData;
348 DWORD nreplies;
349 ICMP_ECHO_REPLY *reply;
350 struct mbuf *m;
351 struct ip *ip;
352 struct icmp_echo *icmp;
353 size_t reqsize;
354
355 pData = pong->pData; /* to make slirp_state.h macro hackery work */
356
357 nreplies = IcmpParseReplies(pong->buf, (DWORD)pong->bufsize);
358 if (nreplies == 0)
359 {
360 DWORD error = GetLastError();
361 if (error == IP_REQ_TIMED_OUT)
362 Log2(("NAT: ping %p timed out\n", (void *)pong));
363 else
364 Log2(("NAT: ping %p: IcmpParseReplies: error %d\n",
365 (void *)pong, error));
366 return;
367 }
368
369 reply = (ICMP_ECHO_REPLY *)pong->buf;
370
371 if (reply->Status == IP_SUCCESS)
372 {
373 if (reply->Options.OptionsSize != 0) /* don't do options */
374 return;
375
376 /* need to remap &reply->Address ? */
377 if (/* not a mapped loopback */ 1)
378 {
379 if (reply->Options.Ttl <= 1)
380 return;
381 --reply->Options.Ttl;
382 }
383
384 reqsize = reply->DataSize;
385 if ( (reply->Options.Flags & IP_FLAG_DF) != 0
386 && sizeof(struct ip) + sizeof(struct icmp_echo) + reqsize > (size_t)if_mtu)
387 return;
388
389 m = icmpwin_get_mbuf(pData, reqsize);
390 if (m == NULL)
391 return;
392
393 ip = mtod(m, struct ip *);
394 icmp = (struct icmp_echo *)(mtod(m, char *) + sizeof(*ip));
395
396 /* fill in ip (ip_output0() does the boilerplate for us) */
397 ip->ip_tos = reply->Options.Tos;
398 ip->ip_len = sizeof(*ip) + sizeof(*icmp) + (int)reqsize;
399 ip->ip_off = 0;
400 ip->ip_ttl = reply->Options.Ttl;
401 ip->ip_p = IPPROTO_ICMP;
402 ip->ip_src.s_addr = reply->Address;
403 ip->ip_dst = pong->reqiph.ip.ip_src;
404
405 icmp->icmp_type = ICMP_ECHOREPLY;
406 icmp->icmp_code = 0;
407 icmp->icmp_cksum = 0;
408 icmp->icmp_echo_id = pong->reqicmph.icmp_echo_id;
409 icmp->icmp_echo_seq = pong->reqicmph.icmp_echo_seq;
410
411 m_append(pData, m, (int)reqsize, reply->Data);
412
413 icmp->icmp_cksum = in_cksum_skip(m, ip->ip_len, sizeof(*ip));
414 }
415 else {
416 uint8_t type, code;
417
418 switch (reply->Status) {
419 case IP_DEST_NET_UNREACHABLE:
420 type = ICMP_UNREACH; code = ICMP_UNREACH_NET;
421 break;
422 case IP_DEST_HOST_UNREACHABLE:
423 type = ICMP_UNREACH; code = ICMP_UNREACH_HOST;
424 break;
425 case IP_DEST_PROT_UNREACHABLE:
426 type = ICMP_UNREACH; code = ICMP_UNREACH_PROTOCOL;
427 break;
428 case IP_PACKET_TOO_BIG:
429 type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG;
430 break;
431 case IP_SOURCE_QUENCH:
432 type = ICMP_SOURCEQUENCH; code = 0;
433 break;
434 case IP_TTL_EXPIRED_TRANSIT:
435 type = ICMP_TIMXCEED; code = ICMP_TIMXCEED_INTRANS;
436 break;
437 case IP_TTL_EXPIRED_REASSEM:
438 type = ICMP_TIMXCEED; code = ICMP_TIMXCEED_REASS;
439 break;
440 default:
441 Log2(("NAT: ping reply status %d, dropped\n", reply->Status));
442 return;
443 }
444
445 Log2(("NAT: ping status %d -> type %d/code %d\n",
446 reply->Status, type, code));
447
448 /*
449 * XXX: we don't know the TTL of the request at the time this
450 * ICMP error was generated (we can guess it was 1 for ttl
451 * exceeded, but don't bother faking it).
452 */
453 m = icmpwin_get_error(pong, type, code);
454 if (m == NULL)
455 return;
456
457 ip = mtod(m, struct ip *);
458
459 ip->ip_tos = reply->Options.Tos;
460 ip->ip_ttl = reply->Options.Ttl; /* XXX: decrement */
461 ip->ip_src.s_addr = reply->Address;
462 }
463
464 Assert(ip->ip_len == m_length(m, NULL));
465 ip_output(pData, NULL, m);
466}
467
468
469/*
470 * Prepare mbuf with ICMP error type/code.
471 * IP source must be filled by the caller.
472 */
473static struct mbuf *
474icmpwin_get_error(struct pong *pong, int type, int code)
475{
476 PNATState pData = pong->pData;
477 struct mbuf *m;
478 struct ip *ip;
479 struct icmp_echo *icmp;
480 size_t reqsize;
481
482 Log2(("NAT: ping error type %d/code %d\n", type, code));
483
484 size_t reqhlen = pong->reqiph.ip.ip_hl << 2;
485 reqsize = reqhlen + sizeof(pong->reqicmph);
486
487 m = icmpwin_get_mbuf(pData, reqsize);
488 if (m == NULL)
489 return NULL;
490
491 ip = mtod(m, struct ip *);
492 icmp = (struct icmp_echo *)(mtod(m, char *) + sizeof(*ip));
493
494 ip->ip_tos = 0;
495 ip->ip_len = sizeof(*ip) + sizeof(*icmp) + (int)reqsize;
496 ip->ip_off = 0;
497 ip->ip_ttl = IPDEFTTL;
498 ip->ip_p = IPPROTO_ICMP;
499 ip->ip_src.s_addr = 0; /* NB */
500 ip->ip_dst = pong->reqiph.ip.ip_src;
501
502 icmp->icmp_type = type;
503 icmp->icmp_code = code;
504 icmp->icmp_cksum = 0;
505 icmp->icmp_echo_id = 0;
506 icmp->icmp_echo_seq = 0;
507
508 /* payload: the IP and ICMP headers of the original request */
509 m_append(pData, m, (int)reqhlen, (caddr_t)&pong->reqiph);
510 m_append(pData, m, sizeof(pong->reqicmph), (caddr_t)&pong->reqicmph);
511
512 icmp->icmp_cksum = in_cksum_skip(m, ip->ip_len, sizeof(*ip));
513
514 return m;
515}
516
517
518/*
519 * Replacing original simple slirp mbufs with real mbufs from freebsd
520 * was a bit messy since assumption are different. This leads to
521 * rather ugly code at times. Hide the gore here.
522 */
523static struct mbuf *
524icmpwin_get_mbuf(PNATState pData, size_t reqsize)
525{
526 struct mbuf *m;
527
528 reqsize += if_maxlinkhdr;
529 reqsize += sizeof(struct ip) + sizeof(struct icmp_echo);
530
531 if (reqsize <= MHLEN)
532 /* good pings come in small packets */
533 m = m_gethdr(pData, M_NOWAIT, MT_HEADER);
534 else
535 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, (int)slirp_size(pData));
536
537 if (m == NULL)
538 return NULL;
539
540 m->m_flags |= M_SKIP_FIREWALL;
541 m->m_data += if_maxlinkhdr; /* reserve leading space for ethernet header */
542
543 m->m_pkthdr.header = mtod(m, void *);
544 m->m_len = sizeof(struct ip) + sizeof(struct icmp_echo);
545
546 return m;
547}
548
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette