ip_input.c@ 93115

Last change on this file since 93115 was 93115, checked in by vboxsync, 3 years ago
scm --update-copyright-year
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 18.1 KB

Line
1	/* $Id: ip_input.c 93115 2022-01-01 11:31:46Z vboxsync $ */
2	/** @file
3	* NAT - IP input.
4	*/
5
6	/*
7	* Copyright (C) 2006-2022 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18	/*
19	* This code is based on:
20	*
21	* Copyright (c) 1982, 1986, 1988, 1993
22	* The Regents of the University of California. All rights reserved.
23	*
24	* Redistribution and use in source and binary forms, with or without
25	* modification, are permitted provided that the following conditions
26	* are met:
27	* 1. Redistributions of source code must retain the above copyright
28	* notice, this list of conditions and the following disclaimer.
29	* 2. Redistributions in binary form must reproduce the above copyright
30	* notice, this list of conditions and the following disclaimer in the
31	* documentation and/or other materials provided with the distribution.
32	* 3. All advertising materials mentioning features or use of this software
33	* must display the following acknowledgement:
34	* This product includes software developed by the University of
35	* California, Berkeley and its contributors.
36	* 4. Neither the name of the University nor the names of its contributors
37	* may be used to endorse or promote products derived from this software
38	* without specific prior written permission.
39	*
40	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50	* SUCH DAMAGE.
51	*
52	* @(#)ip_input.c 8.2 (Berkeley) 1/4/94
53	* ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp
54	*/
55
56	/*
57	* Changes and additions relating to SLiRP are
58	* Copyright (c) 1995 Danny Gasparovski.
59	*
60	* Please read the file COPYRIGHT for the
61	* terms and conditions of the copyright.
62	*/
63
64	#include <slirp.h>
65	#include "ip_icmp.h"
66	#include "alias.h"
67
68
69	/*
70	* IP initialization: fill in IP protocol switch table.
71	* All protocols not implemented in kernel go to raw IP protocol handler.
72	*/
73	void
74	ip_init(PNATState pData)
75	{
76	int i = 0;
77	for (i = 0; i < IPREASS_NHASH; ++i)
78	TAILQ_INIT(&ipq[i]);
79	maxnipq = 100; /* ??? */
80	maxfragsperpacket = 16;
81	nipq = 0;
82	ip_currid = tt.tv_sec & 0xffff;
83	udp_init(pData);
84	tcp_init(pData);
85	}
86
87	/*
88	* Ip input routine. Checksum and byte swap header. If fragmented
89	* try to reassemble. Process options. Pass to next level.
90	*/
91	void
92	ip_input(PNATState pData, struct mbuf *m)
93	{
94	register struct ip *ip;
95	int hlen = 0;
96	int mlen = 0;
97	int iplen = 0;
98
99	STAM_PROFILE_START(&pData->StatIP_input, a);
100
101	LogFlowFunc(("ENTER: m = %p\n", m));
102	ip = mtod(m, struct ip *);
103	Log2(("ip_dst=%RTnaipv4(len:%d) m_len = %d\n", ip->ip_dst, RT_N2H_U16(ip->ip_len), m->m_len));
104
105	ipstat.ips_total++;
106
107	mlen = m->m_len;
108
109	if (mlen < sizeof(struct ip))
110	{
111	ipstat.ips_toosmall++;
112	goto bad_free_m;
113	}
114
115	ip = mtod(m, struct ip *);
116	if (ip->ip_v != IPVERSION)
117	{
118	ipstat.ips_badvers++;
119	goto bad_free_m;
120	}
121
122	hlen = ip->ip_hl << 2;
123	if ( hlen < sizeof(struct ip)
124	\|\| hlen > mlen)
125	{
126	/* min header length */
127	ipstat.ips_badhlen++; /* or packet too short */
128	goto bad_free_m;
129	}
130
131	/* keep ip header intact for ICMP reply
132	* ip->ip_sum = cksum(m, hlen);
133	* if (ip->ip_sum) {
134	*/
135	if (cksum(m, hlen))
136	{
137	ipstat.ips_badsum++;
138	goto bad_free_m;
139	}
140
141	iplen = RT_N2H_U16(ip->ip_len);
142	if (iplen < hlen)
143	{
144	ipstat.ips_badlen++;
145	goto bad_free_m;
146	}
147
148	/*
149	* Check that the amount of data in the buffers
150	* is as at least much as the IP header would have us expect.
151	* Trim mbufs if longer than we expect.
152	* Drop packet if shorter than we expect.
153	*/
154	if (mlen < iplen)
155	{
156	ipstat.ips_tooshort++;
157	goto bad_free_m;
158	}
159
160	/* Should drop packet if mbuf too long? hmmm... */
161	if (mlen > iplen)
162	{
163	m_adj(m, iplen - mlen);
164	mlen = m->m_len;
165	}
166
167	/* source must be unicast */
168	if ((ip->ip_src.s_addr & RT_N2H_U32_C(0xe0000000)) == RT_N2H_U32_C(0xe0000000))
169	goto free_m;
170
171	/*
172	* Drop multicast (class d) and reserved (class e) here. The rest
173	* of the code is not yet prepared to deal with it. IGMP is not
174	* implemented either.
175	*/
176	if ( (ip->ip_dst.s_addr & RT_N2H_U32_C(0xe0000000)) == RT_N2H_U32_C(0xe0000000)
177	&& ip->ip_dst.s_addr != 0xffffffff)
178	{
179	goto free_m;
180	}
181
182
183	/* do we need to "forward" this packet? */
184	if (!CTL_CHECK_MINE(ip->ip_dst.s_addr))
185	{
186	if (ip->ip_ttl <= 1)
187	{
188	/* icmp_error expects these in host order */
189	NTOHS(ip->ip_len);
190	NTOHS(ip->ip_id);
191	NTOHS(ip->ip_off);
192
193	icmp_error(pData, m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl");
194	goto no_free_m;
195	}
196
197	/* ignore packets to other nodes from our private network */
198	if ( CTL_CHECK_NETWORK(ip->ip_dst.s_addr)
199	&& !CTL_CHECK_BROADCAST(ip->ip_dst.s_addr))
200	{
201	/* XXX: send ICMP_REDIRECT_HOST to be pedantic? */
202	goto free_m;
203	}
204
205	ip->ip_ttl--;
206	if (ip->ip_sum > RT_H2N_U16_C(0xffffU - (1 << 8)))
207	ip->ip_sum += RT_H2N_U16_C(1 << 8) + 1;
208	else
209	ip->ip_sum += RT_H2N_U16_C(1 << 8);
210	}
211
212	/* run it through libalias */
213	{
214	int rc;
215	if (!(m->m_flags & M_SKIP_FIREWALL))
216	{
217	STAM_PROFILE_START(&pData->StatALIAS_input, b);
218	rc = LibAliasIn(pData->proxy_alias, mtod(m, char *), mlen);
219	STAM_PROFILE_STOP(&pData->StatALIAS_input, b);
220	Log2(("NAT: LibAlias return %d\n", rc));
221	}
222	else
223	m->m_flags &= ~M_SKIP_FIREWALL;
224
225	#if 0 /* disabled: no module we use does it in this direction */
226	/*
227	* XXX: spooky action at a distance - libalias may modify the
228	* packet and will update ip_len to reflect the new length.
229	*/
230	if (iplen != RT_N2H_U16(ip->ip_len))
231	{
232	iplen = RT_N2H_U16(ip->ip_len);
233	m->m_len = iplen;
234	mlen = m->m_len;
235	}
236	#endif
237	}
238
239	/*
240	* Convert fields to host representation.
241	*/
242	NTOHS(ip->ip_len);
243	NTOHS(ip->ip_id);
244	NTOHS(ip->ip_off);
245
246	/*
247	* If offset or IP_MF are set, must reassemble.
248	* Otherwise, nothing need be done.
249	* (We could look in the reassembly queue to see
250	* if the packet was previously fragmented,
251	* but it's not worth the time; just let them time out.)
252	*
253	*/
254	if (ip->ip_off & (IP_MF \| IP_OFFMASK))
255	{
256	m = ip_reass(pData, m);
257	if (m == NULL)
258	goto no_free_m;
259	ip = mtod(m, struct ip *);
260	hlen = ip->ip_hl << 2;
261	}
262	else
263	ip->ip_len -= hlen;
264
265	/*
266	* Switch out to protocol's input routine.
267	*/
268	ipstat.ips_delivered++;
269	switch (ip->ip_p)
270	{
271	case IPPROTO_TCP:
272	tcp_input(pData, m, hlen, (struct socket *)NULL);
273	break;
274	case IPPROTO_UDP:
275	udp_input(pData, m, hlen);
276	break;
277	case IPPROTO_ICMP:
278	icmp_input(pData, m, hlen);
279	break;
280	default:
281	ipstat.ips_noproto++;
282	m_freem(pData, m);
283	}
284	goto no_free_m;
285
286	bad_free_m:
287	Log2(("NAT: IP datagram to %RTnaipv4 with size(%d) claimed as bad\n",
288	ip->ip_dst, ip->ip_len));
289	free_m:
290	m_freem(pData, m);
291	no_free_m:
292	STAM_PROFILE_STOP(&pData->StatIP_input, a);
293	LogFlowFuncLeave();
294	return;
295	}
296
297	struct mbuf *
298	ip_reass(PNATState pData, struct mbuf* m)
299	{
300	struct ip *ip;
301	struct mbuf p, q, *nq;
302	struct ipq_t *fp = NULL;
303	struct ipqhead *head;
304	int i, hlen, next;
305	u_short hash;
306
307	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
308	LogFlowFunc(("ENTER: m:%p\n", m));
309	if ( maxnipq == 0
310	\|\| maxfragsperpacket == 0)
311	{
312	ipstat.ips_fragments++;
313	ipstat.ips_fragdropped++;
314	m_freem(pData, m);
315	LogFlowFunc(("LEAVE: NULL\n"));
316	return (NULL);
317	}
318
319	ip = mtod(m, struct ip *);
320	hlen = ip->ip_hl << 2;
321
322	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
323	head = &ipq[hash];
324
325	/*
326	* Look for queue of fragments
327	* of this datagram.
328	*/
329	TAILQ_FOREACH(fp, head, ipq_list)
330	if (ip->ip_id == fp->ipq_id &&
331	ip->ip_src.s_addr == fp->ipq_src.s_addr &&
332	ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
333	ip->ip_p == fp->ipq_p)
334	goto found;
335
336	fp = NULL;
337
338	/*
339	* Attempt to trim the number of allocated fragment queues if it
340	* exceeds the administrative limit.
341	*/
342	if ((nipq > maxnipq) && (maxnipq > 0))
343	{
344	/*
345	* drop something from the tail of the current queue
346	* before proceeding further
347	*/
348	struct ipq_t *pHead = TAILQ_LAST(head, ipqhead);
349	if (pHead == NULL)
350	{
351	/* gak */
352	for (i = 0; i < IPREASS_NHASH; i++)
353	{
354	struct ipq_t *pTail = TAILQ_LAST(&ipq[i], ipqhead);
355	if (pTail)
356	{
357	ipstat.ips_fragtimeout += pTail->ipq_nfrags;
358	ip_freef(pData, &ipq[i], pTail);
359	break;
360	}
361	}
362	}
363	else
364	{
365	ipstat.ips_fragtimeout += pHead->ipq_nfrags;
366	ip_freef(pData, head, pHead);
367	}
368	}
369
370	found:
371	/*
372	* Adjust ip_len to not reflect header,
373	* convert offset of this to bytes.
374	*/
375	ip->ip_len -= hlen;
376	if (ip->ip_off & IP_MF)
377	{
378	/*
379	* Make sure that fragments have a data length
380	* that's a non-zero multiple of 8 bytes.
381	*/
382	if (ip->ip_len == 0 \|\| (ip->ip_len & 0x7) != 0)
383	{
384	ipstat.ips_toosmall++; /* XXX */
385	goto dropfrag;
386	}
387	m->m_flags \|= M_FRAG;
388	}
389	else
390	m->m_flags &= ~M_FRAG;
391	ip->ip_off <<= 3;
392
393
394	/*
395	* Attempt reassembly; if it succeeds, proceed.
396	* ip_reass() will return a different mbuf.
397	*/
398	ipstat.ips_fragments++;
399
400	/* Previous ip_reass() started here. */
401	/*
402	* Presence of header sizes in mbufs
403	* would confuse code below.
404	*/
405	m->m_data += hlen;
406	m->m_len -= hlen;
407
408	/*
409	* If first fragment to arrive, create a reassembly queue.
410	*/
411	if (fp == NULL)
412	{
413	fp = RTMemAlloc(sizeof(struct ipq_t));
414	if (fp == NULL)
415	goto dropfrag;
416	TAILQ_INSERT_HEAD(head, fp, ipq_list);
417	nipq++;
418	fp->ipq_nfrags = 1;
419	fp->ipq_ttl = IPFRAGTTL;
420	fp->ipq_p = ip->ip_p;
421	fp->ipq_id = ip->ip_id;
422	fp->ipq_src = ip->ip_src;
423	fp->ipq_dst = ip->ip_dst;
424	fp->ipq_frags = m;
425	m->m_nextpkt = NULL;
426	goto done;
427	}
428	else
429	{
430	fp->ipq_nfrags++;
431	}
432
433	#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header))
434
435	/*
436	* Find a segment which begins after this one does.
437	*/
438	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
439	if (GETIP(q)->ip_off > ip->ip_off)
440	break;
441
442	/*
443	* If there is a preceding segment, it may provide some of
444	* our data already. If so, drop the data from the incoming
445	* segment. If it provides all of our data, drop us, otherwise
446	* stick new segment in the proper place.
447	*
448	* If some of the data is dropped from the preceding
449	* segment, then it's checksum is invalidated.
450	*/
451	if (p)
452	{
453	i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
454	if (i > 0)
455	{
456	if (i >= ip->ip_len)
457	goto dropfrag;
458	m_adj(m, i);
459	ip->ip_off += i;
460	ip->ip_len -= i;
461	}
462	m->m_nextpkt = p->m_nextpkt;
463	p->m_nextpkt = m;
464	}
465	else
466	{
467	m->m_nextpkt = fp->ipq_frags;
468	fp->ipq_frags = m;
469	}
470
471	/*
472	* While we overlap succeeding segments trim them or,
473	* if they are completely covered, dequeue them.
474	*/
475	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
476	q = nq)
477	{
478	i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
479	if (i < GETIP(q)->ip_len)
480	{
481	GETIP(q)->ip_len -= i;
482	GETIP(q)->ip_off += i;
483	m_adj(q, i);
484	break;
485	}
486	nq = q->m_nextpkt;
487	m->m_nextpkt = nq;
488	ipstat.ips_fragdropped++;
489	fp->ipq_nfrags--;
490	m_freem(pData, q);
491	}
492
493	/*
494	* Check for complete reassembly and perform frag per packet
495	* limiting.
496	*
497	* Frag limiting is performed here so that the nth frag has
498	* a chance to complete the packet before we drop the packet.
499	* As a result, n+1 frags are actually allowed per packet, but
500	* only n will ever be stored. (n = maxfragsperpacket.)
501	*
502	*/
503	next = 0;
504	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
505	{
506	if (GETIP(q)->ip_off != next)
507	{
508	if (fp->ipq_nfrags > maxfragsperpacket)
509	{
510	ipstat.ips_fragdropped += fp->ipq_nfrags;
511	ip_freef(pData, head, fp);
512	}
513	goto done;
514	}
515	next += GETIP(q)->ip_len;
516	}
517	/* Make sure the last packet didn't have the IP_MF flag */
518	if (p->m_flags & M_FRAG)
519	{
520	if (fp->ipq_nfrags > maxfragsperpacket)
521	{
522	ipstat.ips_fragdropped += fp->ipq_nfrags;
523	ip_freef(pData, head, fp);
524	}
525	goto done;
526	}
527
528	/*
529	* Reassembly is complete. Make sure the packet is a sane size.
530	*/
531	q = fp->ipq_frags;
532	ip = GETIP(q);
533	hlen = ip->ip_hl << 2;
534	if (next + hlen > IP_MAXPACKET)
535	{
536	ipstat.ips_fragdropped += fp->ipq_nfrags;
537	ip_freef(pData, head, fp);
538	goto done;
539	}
540
541	/*
542	* Concatenate fragments.
543	*/
544	m = q;
545	nq = q->m_nextpkt;
546	q->m_nextpkt = NULL;
547	for (q = nq; q != NULL; q = nq)
548	{
549	nq = q->m_nextpkt;
550	q->m_nextpkt = NULL;
551	m_cat(pData, m, q);
552
553	m->m_len += hlen;
554	m->m_data -= hlen;
555	ip = mtod(m, struct ip ); /update ip pointer */
556	hlen = ip->ip_hl << 2;
557	m->m_len -= hlen;
558	m->m_data += hlen;
559	}
560	m->m_len += hlen;
561	m->m_data -= hlen;
562
563	/*
564	* Create header for new ip packet by modifying header of first
565	* packet; dequeue and discard fragment reassembly header.
566	* Make header visible.
567	*/
568
569	ip->ip_len = next;
570	ip->ip_src = fp->ipq_src;
571	ip->ip_dst = fp->ipq_dst;
572	TAILQ_REMOVE(head, fp, ipq_list);
573	nipq--;
574	RTMemFree(fp);
575
576	Assert((ip->ip_len == next));
577	/* some debugging cruft by sklower, below, will go away soon */
578	#if 0
579	if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */
580	m_fixhdr(m);
581	#endif
582	ipstat.ips_reassembled++;
583	LogFlowFunc(("LEAVE: %p\n", m));
584	return (m);
585
586	dropfrag:
587	ipstat.ips_fragdropped++;
588	if (fp != NULL)
589	fp->ipq_nfrags--;
590	m_freem(pData, m);
591
592	done:
593	LogFlowFunc(("LEAVE: NULL\n"));
594	return NULL;
595
596	#undef GETIP
597	}
598
599	void
600	ip_freef(PNATState pData, struct ipqhead fhp, struct ipq_t fp)
601	{
602	struct mbuf *q;
603
604	while (fp->ipq_frags)
605	{
606	q = fp->ipq_frags;
607	fp->ipq_frags = q->m_nextpkt;
608	m_freem(pData, q);
609	}
610	TAILQ_REMOVE(fhp, fp, ipq_list);
611	RTMemFree(fp);
612	nipq--;
613	}
614
615	/*
616	* IP timer processing;
617	* if a timer expires on a reassembly
618	* queue, discard it.
619	*/
620	void
621	ip_slowtimo(PNATState pData)
622	{
623	register struct ipq_t *fp;
624
625	/* XXX: the fragment expiration is the same but requier
626	* additional loop see (see ip_input.c in FreeBSD tree)
627	*/
628	int i;
629	LogFlow(("ip_slowtimo:\n"));
630	for (i = 0; i < IPREASS_NHASH; i++)
631	{
632	for(fp = TAILQ_FIRST(&ipq[i]); fp;)
633	{
634	struct ipq_t *fpp;
635
636	fpp = fp;
637	fp = TAILQ_NEXT(fp, ipq_list);
638	if(--fpp->ipq_ttl == 0)
639	{
640	ipstat.ips_fragtimeout += fpp->ipq_nfrags;
641	ip_freef(pData, &ipq[i], fpp);
642	}
643	}
644	}
645	/*
646	* If we are over the maximum number of fragments
647	* (due to the limit being lowered), drain off
648	* enough to get down to the new limit.
649	*/
650	if (maxnipq >= 0 && nipq > maxnipq)
651	{
652	for (i = 0; i < IPREASS_NHASH; i++)
653	{
654	while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i]))
655	{
656	ipstat.ips_fragdropped += TAILQ_FIRST(&ipq[i])->ipq_nfrags;
657	ip_freef(pData, &ipq[i], TAILQ_FIRST(&ipq[i]));
658	}
659	}
660	}
661	}
662
663
664	/*
665	* Strip out IP options, at higher
666	* level protocol in the kernel.
667	* Second argument is buffer to which options
668	* will be moved, and return value is their length.
669	* (XXX) should be deleted; last arg currently ignored.
670	*/
671	void
672	ip_stripoptions(struct mbuf m, struct mbuf mopt)
673	{
674	register int i;
675	struct ip ip = mtod(m, struct ip );
676	register caddr_t opts;
677	int olen;
678	NOREF(mopt); /** @todo do we really will need this options buffer? */
679
680	olen = (ip->ip_hl<<2) - sizeof(struct ip);
681	opts = (caddr_t)(ip + 1);
682	i = m->m_len - (sizeof(struct ip) + olen);
683	memcpy(opts, opts + olen, (unsigned)i);
684	m->m_len -= olen;
685
686	ip->ip_hl = sizeof(struct ip) >> 2;
687	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Devices/Network/slirp/ip_input.c@ 93115

Download in other formats: