socket.c@ 57358

Last change on this file since 57358 was 56957, checked in by vboxsync, 9 years ago
NAT: undo byte order conversions on the saved IP header when relaying back an ICMP error.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 41.0 KB

Line
1	/* $Id: socket.c 56957 2015-07-16 16:43:20Z vboxsync $ */
2	/** @file
3	* NAT - socket handling.
4	*/
5
6	/*
7	* Copyright (C) 2006-2015 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18	/*
19	* This code is based on:
20	*
21	* Copyright (c) 1995 Danny Gasparovski.
22	*
23	* Please read the file COPYRIGHT for the
24	* terms and conditions of the copyright.
25	*/
26
27	#include <slirp.h>
28	#include "ip_icmp.h"
29	#include "main.h"
30	#ifdef __sun__
31	#include <sys/filio.h>
32	#endif
33	#include <VBox/vmm/pdmdrv.h>
34	#if defined (RT_OS_WINDOWS)
35	#include <iphlpapi.h>
36	#include <icmpapi.h>
37	#endif
38
39	#if defined(DECLARE_IOVEC) && defined(RT_OS_WINDOWS)
40	AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, WSABUF, buf);
41	AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, WSABUF, len);
42	#endif
43
44	#ifdef VBOX_WITH_NAT_UDP_SOCKET_CLONE
45	/**
46	*
47	*/
48	struct socket * soCloneUDPSocketWithForegnAddr(PNATState pData, bool fBindSocket, struct socket *pSo, uint32_t u32ForeignAddr)
49	{
50	struct socket *pNewSocket = NULL;
51	LogFlowFunc(("Enter: fBindSocket:%RTbool, so:%R[natsock], u32ForeignAddr:%RTnaipv4\n", fBindSocket, pSo, u32ForeignAddr));
52	pNewSocket = socreate();
53	if (!pNewSocket)
54	{
55	LogFunc(("Can't create socket\n"));
56	LogFlowFunc(("Leave: NULL\n"));
57	return NULL;
58	}
59	if (fBindSocket)
60	{
61	if (udp_attach(pData, pNewSocket, 0) <= 0)
62	{
63	sofree(pData, pNewSocket);
64	LogFunc(("Can't attach fresh created socket\n"));
65	return NULL;
66	}
67	}
68	else
69	{
70	pNewSocket->so_cloneOf = (struct socket *)pSo;
71	pNewSocket->s = pSo->s;
72	insque(pData, pNewSocket, &udb);
73	}
74	pNewSocket->so_laddr = pSo->so_laddr;
75	pNewSocket->so_lport = pSo->so_lport;
76	pNewSocket->so_faddr.s_addr = u32ForeignAddr;
77	pNewSocket->so_fport = pSo->so_fport;
78	pSo->so_cCloneCounter++;
79	LogFlowFunc(("Leave: %R[natsock]\n", pNewSocket));
80	return pNewSocket;
81	}
82
83	struct socket soLookUpClonedUDPSocket(PNATState pData, const struct socket pcSo, uint32_t u32ForeignAddress)
84	{
85	struct socket *pSoClone = NULL;
86	LogFlowFunc(("Enter: pcSo:%R[natsock], u32ForeignAddress:%RTnaipv4\n", pcSo, u32ForeignAddress));
87	for (pSoClone = udb.so_next; pSoClone != &udb; pSoClone = pSoClone->so_next)
88	{
89	if ( pSoClone->so_cloneOf
90	&& pSoClone->so_cloneOf == pcSo
91	&& pSoClone->so_lport == pcSo->so_lport
92	&& pSoClone->so_fport == pcSo->so_fport
93	&& pSoClone->so_laddr.s_addr == pcSo->so_laddr.s_addr
94	&& pSoClone->so_faddr.s_addr == u32ForeignAddress)
95	goto done;
96	}
97	pSoClone = NULL;
98	done:
99	LogFlowFunc(("Leave: pSoClone: %R[natsock]\n", pSoClone));
100	return pSoClone;
101	}
102	#endif
103
104	#ifdef VBOX_WITH_NAT_SEND2HOME
105	DECLINLINE(bool) slirpSend2Home(PNATState pData, struct socket pSo, const void pvBuf, uint32_t cbBuf, int iFlags)
106	{
107	int idxAddr;
108	int ret = 0;
109	bool fSendDone = false;
110	LogFlowFunc(("Enter pSo:%R[natsock] pvBuf: %p, cbBuf: %d, iFlags: %d\n", pSo, pvBuf, cbBuf, iFlags));
111	for (idxAddr = 0; idxAddr < pData->cInHomeAddressSize; ++idxAddr)
112	{
113
114	struct socket *pNewSocket = soCloneUDPSocketWithForegnAddr(pData, pSo, pData->pInSockAddrHomeAddress[idxAddr].sin_addr);
115	AssertReturn((pNewSocket, false));
116	pData->pInSockAddrHomeAddress[idxAddr].sin_port = pSo->so_fport;
117	/* @todo: more verbose on errors,
118	* @note: we shouldn't care if this send fail or not (we're in broadcast).
119	*/
120	LogFunc(("send %d bytes to %RTnaipv4 from %R[natsock]\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr, pNewSocket));
121	ret = sendto(pNewSocket->s, pvBuf, cbBuf, iFlags, (struct sockaddr *)&pData->pInSockAddrHomeAddress[idxAddr], sizeof(struct sockaddr_in));
122	if (ret < 0)
123	LogFunc(("Failed to send %d bytes to %RTnaipv4\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr));
124	fSendDone \|= ret > 0;
125	}
126	LogFlowFunc(("Leave %RTbool\n", fSendDone));
127	return fSendDone;
128	}
129	#endif /* !VBOX_WITH_NAT_SEND2HOME */
130
131	#if !defined(RT_OS_WINDOWS)
132	static void send_icmp_to_guest(PNATState, char , size_t, const struct sockaddr_in );
133	static void sorecvfrom_icmp_unix(PNATState, struct socket *);
134	#endif /* !RT_OS_WINDOWS */
135
136	void
137	so_init()
138	{
139	}
140
141	struct socket *
142	solookup(struct socket *head, struct in_addr laddr,
143	u_int lport, struct in_addr faddr, u_int fport)
144	{
145	struct socket *so;
146
147	for (so = head->so_next; so != head; so = so->so_next)
148	{
149	if ( so->so_lport == lport
150	&& so->so_laddr.s_addr == laddr.s_addr
151	&& so->so_faddr.s_addr == faddr.s_addr
152	&& so->so_fport == fport)
153	return so;
154	}
155
156	return (struct socket *)NULL;
157	}
158
159	/*
160	* Create a new socket, initialise the fields
161	* It is the responsibility of the caller to
162	* insque() it into the correct linked-list
163	*/
164	struct socket *
165	socreate()
166	{
167	struct socket *so;
168
169	so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
170	if (so)
171	{
172	so->so_state = SS_NOFDREF;
173	so->s = -1;
174	#if !defined(RT_OS_WINDOWS)
175	so->so_poll_index = -1;
176	#endif
177	}
178	return so;
179	}
180
181	/*
182	* remque and free a socket, clobber cache
183	*/
184	void
185	sofree(PNATState pData, struct socket *so)
186	{
187	LogFlowFunc(("ENTER:%R[natsock]\n", so));
188	/*
189	* We should not remove socket when polling routine do the polling
190	* instead we mark it for deletion.
191	*/
192	if (so->fUnderPolling)
193	{
194	so->fShouldBeRemoved = 1;
195	LogFlowFunc(("LEAVE:%R[natsock] postponed deletion\n", so));
196	return;
197	}
198	/**
199	* Check that we don't freeng socket with tcbcb
200	*/
201	Assert(!sototcpcb(so));
202	/* udp checks */
203	Assert(!so->so_timeout);
204	Assert(!so->so_timeout_arg);
205	if (so == tcp_last_so)
206	tcp_last_so = &tcb;
207	else if (so == udp_last_so)
208	udp_last_so = &udb;
209
210	/* check if mbuf haven't been already freed */
211	if (so->so_m != NULL)
212	{
213	m_freem(pData, so->so_m);
214	so->so_m = NULL;
215	}
216
217	if (so->so_ohdr != NULL)
218	{
219	RTMemFree(so->so_ohdr);
220	so->so_ohdr = NULL;
221	}
222
223	if (so->so_next && so->so_prev)
224	{
225	remque(pData, so); /* crashes if so is not in a queue */
226	NSOCK_DEC();
227	}
228
229	RTMemFree(so);
230	LogFlowFuncLeave();
231	}
232
233	/*
234	* Read from so's socket into sb_snd, updating all relevant sbuf fields
235	* NOTE: This will only be called if it is select()ed for reading, so
236	* a read() of 0 (or less) means it's disconnected
237	*/
238	int
239	soread(PNATState pData, struct socket *so)
240	{
241	int n, nn, lss, total;
242	struct sbuf *sb = &so->so_snd;
243	size_t len = sb->sb_datalen - sb->sb_cc;
244	struct iovec iov[2];
245	int mss = so->so_tcpcb->t_maxseg;
246
247	STAM_PROFILE_START(&pData->StatIOread, a);
248	STAM_COUNTER_RESET(&pData->StatIORead_in_1);
249	STAM_COUNTER_RESET(&pData->StatIORead_in_2);
250
251	QSOCKET_LOCK(tcb);
252	SOCKET_LOCK(so);
253	QSOCKET_UNLOCK(tcb);
254
255	LogFlow(("soread: so = %R[natsock]\n", so));
256	Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
257
258	/*
259	* No need to check if there's enough room to read.
260	* soread wouldn't have been called if there weren't
261	*/
262
263	len = sb->sb_datalen - sb->sb_cc;
264
265	iov[0].iov_base = sb->sb_wptr;
266	iov[1].iov_base = 0;
267	iov[1].iov_len = 0;
268	if (sb->sb_wptr < sb->sb_rptr)
269	{
270	iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
271	/* Should never succeed, but... */
272	if (iov[0].iov_len > len)
273	iov[0].iov_len = len;
274	if (iov[0].iov_len > mss)
275	iov[0].iov_len -= iov[0].iov_len%mss;
276	n = 1;
277	}
278	else
279	{
280	iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
281	/* Should never succeed, but... */
282	if (iov[0].iov_len > len)
283	iov[0].iov_len = len;
284	len -= iov[0].iov_len;
285	if (len)
286	{
287	iov[1].iov_base = sb->sb_data;
288	iov[1].iov_len = sb->sb_rptr - sb->sb_data;
289	if (iov[1].iov_len > len)
290	iov[1].iov_len = len;
291	total = iov[0].iov_len + iov[1].iov_len;
292	if (total > mss)
293	{
294	lss = total % mss;
295	if (iov[1].iov_len > lss)
296	{
297	iov[1].iov_len -= lss;
298	n = 2;
299	}
300	else
301	{
302	lss -= iov[1].iov_len;
303	iov[0].iov_len -= lss;
304	n = 1;
305	}
306	}
307	else
308	n = 2;
309	}
310	else
311	{
312	if (iov[0].iov_len > mss)
313	iov[0].iov_len -= iov[0].iov_len%mss;
314	n = 1;
315	}
316	}
317
318	#ifdef HAVE_READV
319	nn = readv(so->s, (struct iovec *)iov, n);
320	#else
321	nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
322	#endif
323	Log2(("%s: read(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
324	Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
325	if (nn <= 0)
326	{
327	/*
328	* Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
329	* _could_ mean that the connection is closed. But we will receive an
330	* FD_CLOSE event later if the connection was _really_ closed. With
331	* www.youtube.com I see this very often. Closing the socket too early
332	* would be dangerous.
333	*/
334	int status;
335	unsigned long pending = 0;
336	status = ioctlsocket(so->s, FIONREAD, &pending);
337	if (status < 0)
338	Log(("NAT:%s: error in WSAIoctl: %d\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, errno));
339	if (nn == 0 && (pending != 0))
340	{
341	SOCKET_UNLOCK(so);
342	STAM_PROFILE_STOP(&pData->StatIOread, a);
343	return 0;
344	}
345	if ( nn < 0
346	&& soIgnorableErrorCode(errno))
347	{
348	SOCKET_UNLOCK(so);
349	STAM_PROFILE_STOP(&pData->StatIOread, a);
350	return 0;
351	}
352	else
353	{
354	int fUninitiolizedTemplate = 0;
355	fUninitiolizedTemplate = RT_BOOL(( sototcpcb(so)
356	&& ( sototcpcb(so)->t_template.ti_src.s_addr == INADDR_ANY
357	\|\| sototcpcb(so)->t_template.ti_dst.s_addr == INADDR_ANY)));
358	/* nn == 0 means peer has performed an orderly shutdown */
359	Log2(("%s: disconnected, nn = %d, errno = %d (%s)\n",
360	RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, errno, strerror(errno)));
361	sofcantrcvmore(so);
362	if (!fUninitiolizedTemplate)
363	tcp_sockclosed(pData, sototcpcb(so));
364	else
365	tcp_drop(pData, sototcpcb(so), errno);
366	SOCKET_UNLOCK(so);
367	STAM_PROFILE_STOP(&pData->StatIOread, a);
368	return -1;
369	}
370	}
371	STAM_STATS(
372	if (n == 1)
373	{
374	STAM_COUNTER_INC(&pData->StatIORead_in_1);
375	STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
376	}
377	else
378	{
379	STAM_COUNTER_INC(&pData->StatIORead_in_2);
380	STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
381	}
382	);
383
384	#ifndef HAVE_READV
385	/*
386	* If there was no error, try and read the second time round
387	* We read again if n = 2 (ie, there's another part of the buffer)
388	* and we read as much as we could in the first read
389	* We don't test for <= 0 this time, because there legitimately
390	* might not be any more data (since the socket is non-blocking),
391	* a close will be detected on next iteration.
392	* A return of -1 wont (shouldn't) happen, since it didn't happen above
393	*/
394	if (n == 2 && nn == iov[0].iov_len)
395	{
396	int ret;
397	ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
398	if (ret > 0)
399	nn += ret;
400	STAM_STATS(
401	if (ret > 0)
402	{
403	STAM_COUNTER_INC(&pData->StatIORead_in_2);
404	STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
405	}
406	);
407	}
408
409	Log2(("%s: read(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
410	#endif
411
412	/* Update fields */
413	sb->sb_cc += nn;
414	sb->sb_wptr += nn;
415	Log2(("%s: update so_snd (readed nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
416	if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
417	{
418	sb->sb_wptr -= sb->sb_datalen;
419	Log2(("%s: alter sb_wptr so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
420	}
421	STAM_PROFILE_STOP(&pData->StatIOread, a);
422	SOCKET_UNLOCK(so);
423	return nn;
424	}
425
426	/*
427	* Get urgent data
428	*
429	* When the socket is created, we set it SO_OOBINLINE,
430	* so when OOB data arrives, we soread() it and everything
431	* in the send buffer is sent as urgent data
432	*/
433	void
434	sorecvoob(PNATState pData, struct socket *so)
435	{
436	struct tcpcb *tp = sototcpcb(so);
437	ssize_t ret;
438
439	LogFlowFunc(("sorecvoob: so = %R[natsock]\n", so));
440
441	/*
442	* We take a guess at how much urgent data has arrived.
443	* In most situations, when urgent data arrives, the next
444	* read() should get all the urgent data. This guess will
445	* be wrong however if more data arrives just after the
446	* urgent data, or the read() doesn't return all the
447	* urgent data.
448	*/
449	ret = soread(pData, so);
450	if (RT_LIKELY(ret > 0))
451	{
452	tp->snd_up = tp->snd_una + SBUF_LEN(&so->so_snd);
453	tp->t_force = 1;
454	tcp_output(pData, tp);
455	tp->t_force = 0;
456	}
457	}
458
459	/*
460	* Send urgent data
461	* There's a lot duplicated code here, but...
462	*/
463	int
464	sosendoob(struct socket *so)
465	{
466	struct sbuf *sb = &so->so_rcv;
467	char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
468
469	int n, len;
470
471	LogFlowFunc(("sosendoob so = %R[natsock]\n", so));
472
473	if (so->so_urgc > sizeof(buff))
474	so->so_urgc = sizeof(buff); /* XXX */
475
476	if (sb->sb_rptr < sb->sb_wptr)
477	{
478	/* We can send it directly */
479	n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* \|MSG_DONTWAIT)); */
480	so->so_urgc -= n;
481
482	Log2((" --- sent %d bytes urgent data, %d urgent bytes left\n",
483	n, so->so_urgc));
484	}
485	else
486	{
487	/*
488	* Since there's no sendv or sendtov like writev,
489	* we must copy all data to a linear buffer then
490	* send it all
491	*/
492	len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
493	if (len > so->so_urgc)
494	len = so->so_urgc;
495	memcpy(buff, sb->sb_rptr, len);
496	so->so_urgc -= len;
497	if (so->so_urgc)
498	{
499	n = sb->sb_wptr - sb->sb_data;
500	if (n > so->so_urgc)
501	n = so->so_urgc;
502	memcpy(buff + len, sb->sb_data, n);
503	so->so_urgc -= n;
504	len += n;
505	}
506	n = send(so->s, buff, len, (MSG_OOB)); /* \|MSG_DONTWAIT)); */
507	#ifdef DEBUG
508	if (n != len)
509	Log(("Didn't send all data urgently XXXXX\n"));
510	#endif
511	Log2((" ---2 sent %d bytes urgent data, %d urgent bytes left\n",
512	n, so->so_urgc));
513	}
514
515	sb->sb_cc -= n;
516	sb->sb_rptr += n;
517	if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
518	sb->sb_rptr -= sb->sb_datalen;
519
520	return n;
521	}
522
523	/*
524	* Write data from so_rcv to so's socket,
525	* updating all sbuf field as necessary
526	*/
527	int
528	sowrite(PNATState pData, struct socket *so)
529	{
530	int n, nn;
531	struct sbuf *sb = &so->so_rcv;
532	size_t len = sb->sb_cc;
533	struct iovec iov[2];
534
535	STAM_PROFILE_START(&pData->StatIOwrite, a);
536	STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
537	STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
538	STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
539	STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
540	STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
541	STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
542	STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
543	STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
544	LogFlowFunc(("so = %R[natsock]\n", so));
545	Log2(("%s: so = %R[natsock] so->so_rcv = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
546	QSOCKET_LOCK(tcb);
547	SOCKET_LOCK(so);
548	QSOCKET_UNLOCK(tcb);
549	if (so->so_urgc)
550	{
551	sosendoob(so);
552	if (sb->sb_cc == 0)
553	{
554	SOCKET_UNLOCK(so);
555	STAM_PROFILE_STOP(&pData->StatIOwrite, a);
556	return 0;
557	}
558	}
559
560	/*
561	* No need to check if there's something to write,
562	* sowrite wouldn't have been called otherwise
563	*/
564
565	len = sb->sb_cc;
566
567	iov[0].iov_base = sb->sb_rptr;
568	iov[1].iov_base = 0;
569	iov[1].iov_len = 0;
570	if (sb->sb_rptr < sb->sb_wptr)
571	{
572	iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
573	/* Should never succeed, but... */
574	if (iov[0].iov_len > len)
575	iov[0].iov_len = len;
576	n = 1;
577	}
578	else
579	{
580	iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
581	if (iov[0].iov_len > len)
582	iov[0].iov_len = len;
583	len -= iov[0].iov_len;
584	if (len)
585	{
586	iov[1].iov_base = sb->sb_data;
587	iov[1].iov_len = sb->sb_wptr - sb->sb_data;
588	if (iov[1].iov_len > len)
589	iov[1].iov_len = len;
590	n = 2;
591	}
592	else
593	n = 1;
594	}
595	STAM_STATS({
596	if (n == 1)
597	{
598	STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
599	STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
600	}
601	else
602	{
603	STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
604	STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
605	STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
606	}
607	});
608	/* Check if there's urgent data to send, and if so, send it */
609	#ifdef HAVE_READV
610	nn = writev(so->s, (const struct iovec *)iov, n);
611	#else
612	nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
613	#endif
614	Log2(("%s: wrote(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
615	/* This should never happen, but people tell me it does shrug */
616	if ( nn < 0
617	&& soIgnorableErrorCode(errno))
618	{
619	SOCKET_UNLOCK(so);
620	STAM_PROFILE_STOP(&pData->StatIOwrite, a);
621	return 0;
622	}
623
624	if (nn < 0 \|\| (nn == 0 && iov[0].iov_len > 0))
625	{
626	Log2(("%s: disconnected, so->so_state = %x, errno = %d\n",
627	RT_GCC_EXTENSION __PRETTY_FUNCTION__, so->so_state, errno));
628	sofcantsendmore(so);
629	tcp_sockclosed(pData, sototcpcb(so));
630	SOCKET_UNLOCK(so);
631	STAM_PROFILE_STOP(&pData->StatIOwrite, a);
632	return -1;
633	}
634
635	#ifndef HAVE_READV
636	if (n == 2 && nn == iov[0].iov_len)
637	{
638	int ret;
639	ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
640	if (ret > 0)
641	nn += ret;
642	STAM_STATS({
643	if (ret > 0 && ret != iov[1].iov_len)
644	{
645	STAM_COUNTER_INC(&pData->StatIOWrite_rest);
646	STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (iov[1].iov_len - ret));
647	}
648	});
649	}
650	Log2(("%s: wrote(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
651	#endif
652
653	/* Update sbuf */
654	sb->sb_cc -= nn;
655	sb->sb_rptr += nn;
656	Log2(("%s: update so_rcv (written nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
657	if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
658	{
659	sb->sb_rptr -= sb->sb_datalen;
660	Log2(("%s: alter sb_rptr of so_rcv %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
661	}
662
663	/*
664	* If in DRAIN mode, and there's no more data, set
665	* it CANTSENDMORE
666	*/
667	if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
668	sofcantsendmore(so);
669
670	SOCKET_UNLOCK(so);
671	STAM_PROFILE_STOP(&pData->StatIOwrite, a);
672	return nn;
673	}
674
675	/*
676	* recvfrom() a UDP socket
677	*/
678	void
679	sorecvfrom(PNATState pData, struct socket *so)
680	{
681	LogFlowFunc(("sorecvfrom: so = %lx\n", (long)so));
682
683	#ifdef RT_OS_WINDOWS
684	/* ping is handled with ICMP API in ip_icmpwin.c */
685	Assert(so->so_type == IPPROTO_UDP);
686	#else
687	if (so->so_type == IPPROTO_ICMP)
688	{
689	/* This is a "ping" reply */
690	sorecvfrom_icmp_unix(pData, so);
691	udp_detach(pData, so);
692	}
693	else
694	#endif /* !RT_OS_WINDOWS */
695	{
696	static uint8_t au8Buf[64 * 1024];
697
698	/* A "normal" UDP packet */
699	struct sockaddr_in addr;
700	socklen_t addrlen = sizeof(struct sockaddr_in);
701	struct iovec iov[2];
702	ssize_t nread;
703	struct mbuf *m;
704
705	QSOCKET_LOCK(udb);
706	SOCKET_LOCK(so);
707	QSOCKET_UNLOCK(udb);
708
709	m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, slirp_size(pData));
710	if (m == NULL)
711	{
712	SOCKET_UNLOCK(so);
713	return;
714	}
715
716	m->m_data += ETH_HLEN;
717	m->m_pkthdr.header = mtod(m, void *);
718
719	m->m_data += sizeof(struct udpiphdr);
720
721	/* small packets will fit without copying */
722	iov[0].iov_base = mtod(m, char *);
723	iov[0].iov_len = M_TRAILINGSPACE(m);
724
725	/* large packets will spill into a temp buffer */
726	iov[1].iov_base = au8Buf;
727	iov[1].iov_len = sizeof(au8Buf);
728
729	#if !defined(RT_OS_WINDOWS)
730	{
731	struct msghdr mh;
732	memset(&mh, 0, sizeof(mh));
733
734	mh.msg_iov = iov;
735	mh.msg_iovlen = 2;
736	mh.msg_name = &addr;
737	mh.msg_namelen = addrlen;
738
739	nread = recvmsg(so->s, &mh, 0);
740	}
741	#else /* RT_OS_WINDOWS */
742	{
743	DWORD nbytes; /* NB: can't use nread b/c of different size */
744	DWORD flags;
745	int status;
746
747	flags = 0;
748	status = WSARecvFrom(so->s, iov, 2, &nbytes, &flags,
749	(struct sockaddr *)&addr, &addrlen,
750	NULL, NULL);
751	if (status != SOCKET_ERROR)
752	nread = nbytes;
753	else
754	nread = -1;
755	}
756	#endif
757	if (nread >= 0)
758	{
759	if (nread <= iov[0].iov_len)
760	m->m_len = nread;
761	else
762	{
763	m->m_len = iov[0].iov_len;
764	m_append(pData, m, nread - iov[0].iov_len, iov[1].iov_base);
765	}
766	Assert((m_length(m, NULL) == nread));
767
768	/*
769	* Hack: domain name lookup will be used the most for UDP,
770	* and since they'll only be used once there's no need
771	* for the 4 minute (or whatever) timeout... So we time them
772	* out much quicker (10 seconds for now...)
773	*/
774	if (so->so_expire)
775	{
776	if (so->so_fport != RT_H2N_U16_C(53))
777	so->so_expire = curtime + SO_EXPIRE;
778	}
779
780	/*
781	* DNS proxy requests are forwarded to the real resolver,
782	* but its socket's so_faddr is that of the DNS proxy
783	* itself.
784	*
785	* last argument should be changed if Slirp will inject IP attributes
786	*/
787	if ( pData->fUseDnsProxy
788	&& so->so_fport == RT_H2N_U16_C(53)
789	&& CTL_CHECK(so->so_faddr.s_addr, CTL_DNS))
790	dnsproxy_answer(pData, so, m);
791
792	/* packets definetly will be fragmented, could confuse receiver peer. */
793	if (nread > if_mtu)
794	m->m_flags \|= M_SKIP_FIREWALL;
795
796	/*
797	* If this packet was destined for CTL_ADDR,
798	* make it look like that's where it came from, done by udp_output
799	*/
800	udp_output(pData, so, m, &addr);
801	}
802	else
803	{
804	m_freem(pData, m);
805
806	if (!soIgnorableErrorCode(errno))
807	{
808	u_char code;
809	if (errno == EHOSTUNREACH)
810	code = ICMP_UNREACH_HOST;
811	else if (errno == ENETUNREACH)
812	code = ICMP_UNREACH_NET;
813	else
814	code = ICMP_UNREACH_PORT;
815
816	Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
817	icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
818	so->so_m = NULL;
819	}
820	}
821
822	SOCKET_UNLOCK(so);
823	}
824	}
825
826	/*
827	* sendto() a socket
828	*/
829	int
830	sosendto(PNATState pData, struct socket so, struct mbuf m)
831	{
832	int ret;
833	struct sockaddr_in *paddr;
834	struct sockaddr addr;
835	#if 0
836	struct sockaddr_in host_addr;
837	#endif
838	caddr_t buf = 0;
839	int mlen;
840
841	LogFlowFunc(("sosendto: so = %R[natsock], m = %lx\n", so, (long)m));
842
843	memset(&addr, 0, sizeof(struct sockaddr));
844	#ifdef RT_OS_DARWIN
845	addr.sa_len = sizeof(struct sockaddr_in);
846	#endif
847	paddr = (struct sockaddr_in *)&addr;
848	paddr->sin_family = AF_INET;
849	if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
850	{
851	/* It's an alias */
852	uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
853	switch(last_byte)
854	{
855	#if 0
856	/* handle this case at 'default:' */
857	case CTL_BROADCAST:
858	addr.sin_addr.s_addr = INADDR_BROADCAST;
859	/* Send the packet to host to fully emulate broadcast */
860	/** @todo r=klaus: on Linux host this causes the host to receive
861	* the packet twice for some reason. And I cannot find any place
862	* in the man pages which states that sending a broadcast does not
863	* reach the host itself. */
864	host_addr.sin_family = AF_INET;
865	host_addr.sin_port = so->so_fport;
866	host_addr.sin_addr = our_addr;
867	sendto(so->s, m->m_data, m->m_len, 0,
868	(struct sockaddr *)&host_addr, sizeof (struct sockaddr));
869	break;
870	#endif
871	case CTL_DNS:
872	case CTL_ALIAS:
873	default:
874	if (last_byte == ~pData->netmask)
875	paddr->sin_addr.s_addr = INADDR_BROADCAST;
876	else
877	paddr->sin_addr = loopback_addr;
878	break;
879	}
880	}
881	else
882	paddr->sin_addr = so->so_faddr;
883	paddr->sin_port = so->so_fport;
884
885	Log2((" sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
886	RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
887
888	/* Don't care what port we get */
889	/*
890	* > nmap -sV -T4 -O -A -v -PU3483 255.255.255.255
891	* generates bodyless messages, annoying memmory management system.
892	*/
893	mlen = m_length(m, NULL);
894	if (mlen > 0)
895	{
896	buf = RTMemAlloc(mlen);
897	if (buf == NULL)
898	{
899	return -1;
900	}
901	m_copydata(m, 0, mlen, buf);
902	}
903	ret = sendto(so->s, buf, mlen, 0,
904	(struct sockaddr *)&addr, sizeof (struct sockaddr));
905	#ifdef VBOX_WITH_NAT_SEND2HOME
906	if (slirpIsWideCasting(pData, so->so_faddr.s_addr))
907	{
908	slirpSend2Home(pData, so, buf, mlen, 0);
909	}
910	#endif
911	if (buf)
912	RTMemFree(buf);
913	if (ret < 0)
914	{
915	Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
916	return -1;
917	}
918
919	/*
920	* Kill the socket if there's no reply in 4 minutes,
921	* but only if it's an expirable socket
922	*/
923	if (so->so_expire)
924	so->so_expire = curtime + SO_EXPIRE;
925	so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
926	return 0;
927	}
928
929	/*
930	* XXX This should really be tcp_listen
931	*/
932	struct socket *
933	solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
934	{
935	struct sockaddr_in addr;
936	struct socket *so;
937	socklen_t addrlen = sizeof(addr);
938	int s, opt = 1;
939	int status;
940
941	LogFlowFunc(("solisten: port = %d, laddr = %x, lport = %d, flags = %x\n", port, laddr, lport, flags));
942
943	if ((so = socreate()) == NULL)
944	{
945	/* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
946	return NULL;
947	}
948
949	/* Don't tcp_attach... we don't need so_snd nor so_rcv */
950	if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
951	{
952	RTMemFree(so);
953	return NULL;
954	}
955
956	SOCKET_LOCK_CREATE(so);
957	SOCKET_LOCK(so);
958	QSOCKET_LOCK(tcb);
959	insque(pData, so,&tcb);
960	NSOCK_INC();
961	QSOCKET_UNLOCK(tcb);
962
963	/*
964	* SS_FACCEPTONCE sockets must time out.
965	*/
966	if (flags & SS_FACCEPTONCE)
967	so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
968
969	so->so_state = (SS_FACCEPTCONN\|flags);
970	so->so_lport = lport; /* Kept in network format */
971	so->so_laddr.s_addr = laddr; /* Ditto */
972
973	memset(&addr, 0, sizeof(addr));
974	#ifdef RT_OS_DARWIN
975	addr.sin_len = sizeof(addr);
976	#endif
977	addr.sin_family = AF_INET;
978	addr.sin_addr.s_addr = bind_addr;
979	addr.sin_port = port;
980
981	/**
982	* changing listen(,1->SOMAXCONN) shouldn't be harmful for NAT's TCP/IP stack,
983	* kernel will choose the optimal value for requests queue length.
984	* @note: MSDN recommends low (2-4) values for bluetooth networking devices.
985	*/
986	if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
987	\|\| (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
988	\|\| (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
989	\|\| (listen(s, pData->soMaxConn) < 0))
990	{
991	#ifdef RT_OS_WINDOWS
992	int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
993	closesocket(s);
994	QSOCKET_LOCK(tcb);
995	sofree(pData, so);
996	QSOCKET_UNLOCK(tcb);
997	/* Restore the real errno */
998	WSASetLastError(tmperrno);
999	#else
1000	int tmperrno = errno; /* Don't clobber the real reason we failed */
1001	close(s);
1002	if (sototcpcb(so))
1003	tcp_close(pData, sototcpcb(so));
1004	else
1005	sofree(pData, so);
1006	/* Restore the real errno */
1007	errno = tmperrno;
1008	#endif
1009	return NULL;
1010	}
1011	fd_nonblock(s);
1012	setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
1013
1014	getsockname(s,(struct sockaddr *)&addr,&addrlen);
1015	so->so_fport = addr.sin_port;
1016	/* set socket buffers */
1017	opt = pData->socket_rcv;
1018	status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
1019	if (status < 0)
1020	{
1021	LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
1022	goto no_sockopt;
1023	}
1024	opt = pData->socket_snd;
1025	status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
1026	if (status < 0)
1027	{
1028	LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
1029	goto no_sockopt;
1030	}
1031	no_sockopt:
1032	if (addr.sin_addr.s_addr == 0 \|\| addr.sin_addr.s_addr == loopback_addr.s_addr)
1033	so->so_faddr = alias_addr;
1034	else
1035	so->so_faddr = addr.sin_addr;
1036
1037	so->s = s;
1038	SOCKET_UNLOCK(so);
1039	return so;
1040	}
1041
1042	/*
1043	* Data is available in so_rcv
1044	* Just write() the data to the socket
1045	* XXX not yet...
1046	* @todo do we really need this function, what it's intended to do?
1047	*/
1048	void
1049	sorwakeup(struct socket *so)
1050	{
1051	NOREF(so);
1052	#if 0
1053	sowrite(so);
1054	FD_CLR(so->s,&writefds);
1055	#endif
1056	}
1057
1058	/*
1059	* Data has been freed in so_snd
1060	* We have room for a read() if we want to
1061	* For now, don't read, it'll be done in the main loop
1062	*/
1063	void
1064	sowwakeup(struct socket *so)
1065	{
1066	NOREF(so);
1067	}
1068
1069	/*
1070	* Various session state calls
1071	* XXX Should be #define's
1072	* The socket state stuff needs work, these often get call 2 or 3
1073	* times each when only 1 was needed
1074	*/
1075	void
1076	soisfconnecting(struct socket *so)
1077	{
1078	so->so_state &= ~(SS_NOFDREF\|SS_ISFCONNECTED\|SS_FCANTRCVMORE\|
1079	SS_FCANTSENDMORE\|SS_FWDRAIN);
1080	so->so_state \|= SS_ISFCONNECTING; /* Clobber other states */
1081	}
1082
1083	void
1084	soisfconnected(struct socket *so)
1085	{
1086	LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1087	so->so_state &= ~(SS_ISFCONNECTING\|SS_FWDRAIN\|SS_NOFDREF);
1088	so->so_state \|= SS_ISFCONNECTED; /* Clobber other states */
1089	LogFlowFunc(("LEAVE: so:%R[natsock]\n", so));
1090	}
1091
1092	void
1093	sofcantrcvmore(struct socket *so)
1094	{
1095	LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1096	if ((so->so_state & SS_NOFDREF) == 0)
1097	{
1098	shutdown(so->s, 0);
1099	}
1100	so->so_state &= ~(SS_ISFCONNECTING);
1101	if (so->so_state & SS_FCANTSENDMORE)
1102	so->so_state = SS_NOFDREF; /* Don't select it */
1103	/* XXX close() here as well? */
1104	else
1105	so->so_state \|= SS_FCANTRCVMORE;
1106	LogFlowFuncLeave();
1107	}
1108
1109	void
1110	sofcantsendmore(struct socket *so)
1111	{
1112	LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1113	if ((so->so_state & SS_NOFDREF) == 0)
1114	shutdown(so->s, 1); /* send FIN to fhost */
1115
1116	so->so_state &= ~(SS_ISFCONNECTING);
1117	if (so->so_state & SS_FCANTRCVMORE)
1118	so->so_state = SS_NOFDREF; /* as above */
1119	else
1120	so->so_state \|= SS_FCANTSENDMORE;
1121	LogFlowFuncLeave();
1122	}
1123
1124	void
1125	soisfdisconnected(struct socket *so)
1126	{
1127	NOREF(so);
1128	#if 0
1129	so->so_state &= ~(SS_ISFCONNECTING\|SS_ISFCONNECTED);
1130	close(so->s);
1131	so->so_state = SS_ISFDISCONNECTED;
1132	/*
1133	* XXX Do nothing ... ?
1134	*/
1135	#endif
1136	}
1137
1138	/*
1139	* Set write drain mode
1140	* Set CANTSENDMORE once all data has been write()n
1141	*/
1142	void
1143	sofwdrain(struct socket *so)
1144	{
1145	if (SBUF_LEN(&so->so_rcv))
1146	so->so_state \|= SS_FWDRAIN;
1147	else
1148	sofcantsendmore(so);
1149	}
1150
1151	#if !defined(RT_OS_WINDOWS)
1152	static void
1153	send_icmp_to_guest(PNATState pData, char buff, size_t len, const struct sockaddr_in addr)
1154	{
1155	struct ip *ip;
1156	uint32_t dst, src;
1157	char ip_copy[256];
1158	struct icmp *icp;
1159	int old_ip_len = 0;
1160	int hlen, original_hlen = 0;
1161	struct mbuf *m;
1162	struct icmp_msg *icm;
1163	uint8_t proto;
1164	int type = 0;
1165
1166	ip = (struct ip *)buff;
1167	/* Fix ip->ip_len to contain the total packet length including the header
1168	* in _host_ byte order for all OSes. On Darwin, that value already is in
1169	* host byte order. Solaris and Darwin report only the payload. */
1170	#ifndef RT_OS_DARWIN
1171	ip->ip_len = RT_N2H_U16(ip->ip_len);
1172	#endif
1173	hlen = (ip->ip_hl << 2);
1174	#if defined(RT_OS_SOLARIS) \|\| defined(RT_OS_DARWIN)
1175	ip->ip_len += hlen;
1176	#endif
1177	if (ip->ip_len < hlen + ICMP_MINLEN)
1178	{
1179	Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1180	return;
1181	}
1182	icp = (struct icmp )((char )ip + hlen);
1183
1184	Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1185	if ( icp->icmp_type != ICMP_ECHOREPLY
1186	&& icp->icmp_type != ICMP_TIMXCEED
1187	&& icp->icmp_type != ICMP_UNREACH)
1188	{
1189	return;
1190	}
1191
1192	/*
1193	* ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1194	* ICMP_ECHOREPLY assuming data 0
1195	* icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1196	*/
1197	if (ip->ip_len < hlen + 8)
1198	{
1199	Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1200	return;
1201	}
1202
1203	type = icp->icmp_type;
1204	if ( type == ICMP_TIMXCEED
1205	\|\| type == ICMP_UNREACH)
1206	{
1207	/*
1208	* ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1209	* icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1210	*/
1211	if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1212	{
1213	Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1214	return;
1215	}
1216	ip = &icp->icmp_ip;
1217	}
1218
1219	icm = icmp_find_original_mbuf(pData, ip);
1220	if (icm == NULL)
1221	{
1222	Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1223	return;
1224	}
1225
1226	m = icm->im_m;
1227	if (!m)
1228	{
1229	LogFunc(("%R[natsock] hasn't stored it's mbuf on sent\n", icm->im_so));
1230	goto done;
1231	}
1232
1233	src = addr->sin_addr.s_addr;
1234	if (type == ICMP_ECHOREPLY)
1235	{
1236	struct ip ip0 = mtod(m, struct ip );
1237	struct icmp icp0 = (struct icmp )((char *)ip0 + (ip0->ip_hl << 2));
1238	if (icp0->icmp_type != ICMP_ECHO)
1239	{
1240	Log(("NAT: we haven't found echo for this reply\n"));
1241	goto done;
1242	}
1243	/*
1244	* while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1245	* IP header combined by OS network stack, our local copy of IP header contians values
1246	* in host byte order so no byte order conversion is required. IP headers fields are converting
1247	* in ip_output0 routine only.
1248	*/
1249	if ( (ip->ip_len - hlen)
1250	!= (ip0->ip_len - (ip0->ip_hl << 2)))
1251	{
1252	Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1253	(ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1254	goto done;
1255	}
1256	}
1257
1258	/* ip points on origianal ip header */
1259	ip = mtod(m, struct ip *);
1260	proto = ip->ip_p;
1261	/* Now ip is pointing on header we've sent from guest */
1262	if ( icp->icmp_type == ICMP_TIMXCEED
1263	\|\| icp->icmp_type == ICMP_UNREACH)
1264	{
1265	old_ip_len = (ip->ip_hl << 2) + 64;
1266	if (old_ip_len > sizeof(ip_copy))
1267	old_ip_len = sizeof(ip_copy);
1268	memcpy(ip_copy, ip, old_ip_len);
1269	}
1270
1271	/* source address from original IP packet*/
1272	dst = ip->ip_src.s_addr;
1273
1274	/* overide ther tail of old packet */
1275	ip = mtod(m, struct ip ); / ip is from mbuf we've overrided */
1276	original_hlen = ip->ip_hl << 2;
1277	/* saves original ip header and options */
1278	m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1279	ip->ip_len = m_length(m, NULL);
1280	ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1281
1282	icp = (struct icmp )((char )ip + (ip->ip_hl << 2));
1283	type = icp->icmp_type;
1284	if ( type == ICMP_TIMXCEED
1285	\|\| type == ICMP_UNREACH)
1286	{
1287	/* according RFC 793 error messages required copy of initial IP header + 64 bit */
1288	memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1289
1290	/* undo byte order conversions done in ip_input() */
1291	HTONS(icp->icmp_ip.ip_len);
1292	HTONS(icp->icmp_ip.ip_id);
1293	HTONS(icp->icmp_ip.ip_off);
1294
1295	ip->ip_tos = ((ip->ip_tos & 0x1E) \| 0xC0); /* high priority for errors */
1296	}
1297
1298	ip->ip_src.s_addr = src;
1299	ip->ip_dst.s_addr = dst;
1300	icmp_reflect(pData, m);
1301	/* m was freed */
1302	icm->im_m = NULL;
1303
1304	done:
1305	icmp_msg_delete(pData, icm);
1306	}
1307
1308	static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1309	{
1310	struct sockaddr_in addr;
1311	socklen_t addrlen = sizeof(struct sockaddr_in);
1312	struct ip ip;
1313	char *buff;
1314	int len = 0;
1315
1316	/* 1- step: read the ip header */
1317	len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1318	(struct sockaddr *)&addr, &addrlen);
1319	if ( len < 0
1320	&& ( soIgnorableErrorCode(errno)
1321	\|\| errno == ENOTCONN))
1322	{
1323	Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1324	return;
1325	}
1326
1327	if ( len < sizeof(struct ip)
1328	\|\| len < 0
1329	\|\| len == 0)
1330	{
1331	u_char code;
1332	code = ICMP_UNREACH_PORT;
1333
1334	if (errno == EHOSTUNREACH)
1335	code = ICMP_UNREACH_HOST;
1336	else if (errno == ENETUNREACH)
1337	code = ICMP_UNREACH_NET;
1338
1339	LogRel((" udp icmp rx errno = %d (%s)\n", errno, strerror(errno)));
1340	icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1341	so->so_m = NULL;
1342	Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm\n"));
1343	return;
1344	}
1345	/* basic check of IP header */
1346	if ( ip.ip_v != IPVERSION
1347	# ifndef RT_OS_DARWIN
1348	\|\| ip.ip_p != IPPROTO_ICMP
1349	# endif
1350	)
1351	{
1352	Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4\n"));
1353	return;
1354	}
1355	# ifndef RT_OS_DARWIN
1356	/* Darwin reports the IP length already in host byte order. */
1357	ip.ip_len = RT_N2H_U16(ip.ip_len);
1358	# endif
1359	# if defined(RT_OS_SOLARIS) \|\| defined(RT_OS_DARWIN)
1360	/* Solaris and Darwin report the payload only */
1361	ip.ip_len += (ip.ip_hl << 2);
1362	# endif
1363	/* Note: ip->ip_len in host byte order (all OS) */
1364	len = ip.ip_len;
1365	buff = RTMemAlloc(len);
1366	if (buff == NULL)
1367	{
1368	Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1369	return;
1370	}
1371	/* 2 - step: we're reading rest of the datagramm to the buffer */
1372	addrlen = sizeof(struct sockaddr_in);
1373	memset(&addr, 0, addrlen);
1374	len = recvfrom(so->s, buff, len, 0,
1375	(struct sockaddr *)&addr, &addrlen);
1376	if ( len < 0
1377	&& ( soIgnorableErrorCode(errno)
1378	\|\| errno == ENOTCONN))
1379	{
1380	Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1381	ip.ip_len));
1382	RTMemFree(buff);
1383	return;
1384	}
1385	if ( len < 0
1386	\|\| len == 0)
1387	{
1388	Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1389	errno, len, (ip.ip_len - sizeof(struct ip))));
1390	RTMemFree(buff);
1391	return;
1392	}
1393	/* len is modified in 2nd read, when the rest of the datagramm was read */
1394	send_icmp_to_guest(pData, buff, len, &addr);
1395	RTMemFree(buff);
1396	}
1397	#endif /* !RT_OS_WINDOWS */

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 57358

Download in other formats: