VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/SrvIntNetR0.cpp@ 56743

Last change on this file since 56743 was 56316, checked in by vboxsync, 10 years ago

whitespace

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 238.3 KB
Line 
1/* $Id: SrvIntNetR0.cpp 56316 2015-06-09 22:52:56Z vboxsync $ */
2/** @file
3 * Internal networking - The ring 0 service.
4 *
5 * @remarks No lazy code changes. If you don't understand exactly what you're
6 * doing, get an understanding or forget it.
7 * All changes shall be reviewed by bird before commit. If not around,
8 * email and let Frank and/or Klaus OK the changes before committing.
9 */
10
11/*
12 * Copyright (C) 2006-2015 Oracle Corporation
13 *
14 * This file is part of VirtualBox Open Source Edition (OSE), as
15 * available from http://www.virtualbox.org. This file is free software;
16 * you can redistribute it and/or modify it under the terms of the GNU
17 * General Public License (GPL) as published by the Free Software
18 * Foundation, in version 2 as it comes in the "COPYING" file of the
19 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
20 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
21 */
22
23
24/*******************************************************************************
25* Header Files *
26*******************************************************************************/
27#define LOG_GROUP LOG_GROUP_SRV_INTNET
28#include <VBox/intnet.h>
29#include <VBox/intnetinline.h>
30#include <VBox/vmm/pdmnetinline.h>
31#include <VBox/sup.h>
32#include <VBox/vmm/pdm.h>
33#include <VBox/log.h>
34
35#include <iprt/asm.h>
36#include <iprt/assert.h>
37#include <iprt/handletable.h>
38#include <iprt/mp.h>
39#include <iprt/mem.h>
40#include <iprt/net.h>
41#include <iprt/semaphore.h>
42#include <iprt/spinlock.h>
43#include <iprt/string.h>
44#include <iprt/thread.h>
45#include <iprt/time.h>
46
47
48/*******************************************************************************
49* Defined Constants And Macros *
50*******************************************************************************/
51/** @def INTNET_WITH_DHCP_SNOOPING
52 * Enabled DHCP snooping when in shared-mac-on-the-wire mode. */
53#define INTNET_WITH_DHCP_SNOOPING
54
55/** The maximum number of interface in a network. */
56#define INTNET_MAX_IFS (1023 + 1 + 16)
57
58/** The number of entries to grow the destination tables with. */
59#if 0
60# define INTNET_GROW_DSTTAB_SIZE 16
61#else
62# define INTNET_GROW_DSTTAB_SIZE 1
63#endif
64
65/** The wakeup bit in the INTNETIF::cBusy and INTNETRUNKIF::cBusy counters. */
66#define INTNET_BUSY_WAKEUP_MASK RT_BIT_32(30)
67
68
69/*******************************************************************************
70* Structures and Typedefs *
71*******************************************************************************/
72/**
73 * MAC address lookup table entry.
74 */
75typedef struct INTNETMACTABENTRY
76{
77 /** The MAC address of this entry. */
78 RTMAC MacAddr;
79 /** Is it is effectively promiscuous mode. */
80 bool fPromiscuousEff;
81 /** Is it promiscuous and should it see unrelated trunk traffic. */
82 bool fPromiscuousSeeTrunk;
83 /** Is it active.
84 * We ignore the entry if this is clear and may end up sending packets addressed
85 * to this interface onto the trunk. The reasoning for this is that this could
86 * be the interface of a VM that just has been teleported to a different host. */
87 bool fActive;
88 /** Pointer to the network interface. */
89 struct INTNETIF *pIf;
90} INTNETMACTABENTRY;
91/** Pointer to a MAC address lookup table entry. */
92typedef INTNETMACTABENTRY *PINTNETMACTABENTRY;
93
94/**
95 * MAC address lookup table.
96 *
97 * @todo Having this in a separate structure didn't work out as well as it
98 * should. Consider merging it into INTNETNETWORK.
99 */
100typedef struct INTNETMACTAB
101{
102 /** The current number of entries. */
103 uint32_t cEntries;
104 /** The number of entries we've allocated space for. */
105 uint32_t cEntriesAllocated;
106 /** Table entries. */
107 PINTNETMACTABENTRY paEntries;
108
109 /** The number of interface entries currently in promicuous mode. */
110 uint32_t cPromiscuousEntries;
111 /** The number of interface entries currently in promicuous mode that
112 * shall not see unrelated trunk traffic. */
113 uint32_t cPromiscuousNoTrunkEntries;
114
115 /** The host MAC address (reported). */
116 RTMAC HostMac;
117 /** The effective host promiscuous setting (reported). */
118 bool fHostPromiscuousEff;
119 /** The real host promiscuous setting (reported). */
120 bool fHostPromiscuousReal;
121 /** Whether the host is active. */
122 bool fHostActive;
123
124 /** Whether the wire is promiscuous (config). */
125 bool fWirePromiscuousEff;
126 /** Whether the wire is promiscuous (config).
127 * (Shadows INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE in
128 * INTNETNETWORK::fFlags.) */
129 bool fWirePromiscuousReal;
130 /** Whether the wire is active. */
131 bool fWireActive;
132
133 /** Pointer to the trunk interface. */
134 struct INTNETTRUNKIF *pTrunk;
135} INTNETMACTAB;
136/** Pointer to a MAC address . */
137typedef INTNETMACTAB *PINTNETMACTAB;
138
139/**
140 * Destination table.
141 */
142typedef struct INTNETDSTTAB
143{
144 /** The trunk destinations. */
145 uint32_t fTrunkDst;
146 /** Pointer to the trunk interface (referenced) if fTrunkDst is non-zero. */
147 struct INTNETTRUNKIF *pTrunk;
148 /** The number of destination interfaces. */
149 uint32_t cIfs;
150 /** The interfaces (referenced). Variable sized array. */
151 struct
152 {
153 /** The destination interface. */
154 struct INTNETIF *pIf;
155 /** Whether to replace the destination MAC address.
156 * This is used when sharing MAC address with the host on the wire(less). */
157 bool fReplaceDstMac;
158 } aIfs[1];
159} INTNETDSTTAB;
160/** Pointer to a destination table. */
161typedef INTNETDSTTAB *PINTNETDSTTAB;
162/** Pointer to a const destination table. */
163typedef INTNETDSTTAB const *PCINTNETDSTTAB;
164
165/**
166 * Address and type.
167 */
168typedef struct INTNETADDR
169{
170 /** The address type. */
171 INTNETADDRTYPE enmType;
172 /** The address. */
173 RTNETADDRU Addr;
174} INTNETADDR;
175/** Pointer to an address. */
176typedef INTNETADDR *PINTNETADDR;
177/** Pointer to a const address. */
178typedef INTNETADDR const *PCINTNETADDR;
179
180
181/**
182 * Address cache for a specific network layer.
183 */
184typedef struct INTNETADDRCACHE
185{
186 /** Pointer to the table of addresses. */
187 uint8_t *pbEntries;
188 /** The number of valid address entries. */
189 uint8_t cEntries;
190 /** The number of allocated address entries. */
191 uint8_t cEntriesAlloc;
192 /** The address size. */
193 uint8_t cbAddress;
194 /** The size of an entry. */
195 uint8_t cbEntry;
196} INTNETADDRCACHE;
197/** Pointer to an address cache. */
198typedef INTNETADDRCACHE *PINTNETADDRCACHE;
199/** Pointer to a const address cache. */
200typedef INTNETADDRCACHE const *PCINTNETADDRCACHE;
201
202
203/**
204 * A network interface.
205 *
206 * Unless explicitly stated, all members are protect by the network semaphore.
207 */
208typedef struct INTNETIF
209{
210 /** The MAC address.
211 * This is shadowed by INTNETMACTABENTRY::MacAddr. */
212 RTMAC MacAddr;
213 /** Set if the INTNET::MacAddr member has been explicitly set. */
214 bool fMacSet;
215 /** Tracks the desired promiscuous setting of the interface. */
216 bool fPromiscuousReal;
217 /** Whether the interface is active or not.
218 * This is shadowed by INTNETMACTABENTRY::fActive. */
219 bool fActive;
220 /** Whether someone is currently in the destructor or has indicated that
221 * the end is nigh by means of IntNetR0IfAbortWait. */
222 bool volatile fDestroying;
223 /** The flags specified when opening this interface. */
224 uint32_t fOpenFlags;
225 /** Number of yields done to try make the interface read pending data.
226 * We will stop yielding when this reaches a threshold assuming that the VM is
227 * paused or that it simply isn't worth all the delay. It is cleared when a
228 * successful send has been done. */
229 uint32_t cYields;
230 /** Pointer to the current exchange buffer (ring-0). */
231 PINTNETBUF pIntBuf;
232 /** Pointer to ring-3 mapping of the current exchange buffer. */
233 R3PTRTYPE(PINTNETBUF) pIntBufR3;
234 /** Pointer to the default exchange buffer for the interface. */
235 PINTNETBUF pIntBufDefault;
236 /** Pointer to ring-3 mapping of the default exchange buffer. */
237 R3PTRTYPE(PINTNETBUF) pIntBufDefaultR3;
238 /** Event semaphore which a receiver/consumer thread will sleep on while
239 * waiting for data to arrive. */
240 RTSEMEVENT volatile hRecvEvent;
241 /** Number of threads sleeping on the event semaphore. */
242 uint32_t cSleepers;
243 /** The interface handle.
244 * When this is INTNET_HANDLE_INVALID a sleeper which is waking up
245 * should return with the appropriate error condition. */
246 INTNETIFHANDLE volatile hIf;
247 /** Pointer to the network this interface is connected to.
248 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
249 struct INTNETNETWORK *pNetwork;
250 /** The session this interface is associated with. */
251 PSUPDRVSESSION pSession;
252 /** The SUPR0 object id. */
253 void *pvObj;
254 /** The network layer address cache. (Indexed by type, 0 entry isn't used.)
255 * This is protected by the address spinlock of the network. */
256 INTNETADDRCACHE aAddrCache[kIntNetAddrType_End];
257 /** Spinlock protecting the input (producer) side of the receive ring. */
258 RTSPINLOCK hRecvInSpinlock;
259 /** Busy count for tracking destination table references and active sends.
260 * Usually incremented while owning the switch table spinlock. The 30th bit
261 * is used to indicate wakeup. */
262 uint32_t volatile cBusy;
263 /** The preallocated destination table.
264 * This is NULL when it's in use as a precaution against unserialized
265 * transmitting. This is grown when new interfaces are added to the network. */
266 PINTNETDSTTAB volatile pDstTab;
267 /** Pointer to the trunk's per interface data. Can be NULL. */
268 void *pvIfData;
269 /** Header buffer for when we're carving GSO frames. */
270 uint8_t abGsoHdrs[256];
271} INTNETIF;
272/** Pointer to an internal network interface. */
273typedef INTNETIF *PINTNETIF;
274
275
276/**
277 * A trunk interface.
278 */
279typedef struct INTNETTRUNKIF
280{
281 /** The port interface we present to the component. */
282 INTNETTRUNKSWPORT SwitchPort;
283 /** The port interface we get from the component. */
284 PINTNETTRUNKIFPORT pIfPort;
285 /** Pointer to the network we're connect to.
286 * This may be NULL if we're orphaned? */
287 struct INTNETNETWORK *pNetwork;
288 /** The current MAC address for the interface. (reported)
289 * Updated while owning the switch table spinlock. */
290 RTMAC MacAddr;
291 /** Whether to supply physical addresses with the outbound SGs. (reported) */
292 bool fPhysSG;
293 /** Explicit alignment. */
294 bool fUnused;
295 /** Busy count for tracking destination table references and active sends.
296 * Usually incremented while owning the switch table spinlock. The 30th bit
297 * is used to indicate wakeup. */
298 uint32_t volatile cBusy;
299 /** Mask of destinations that pfnXmit cope with disabled preemption for. */
300 uint32_t fNoPreemptDsts;
301 /** The GSO capabilities of the wire destination. (reported) */
302 uint32_t fWireGsoCapabilites;
303 /** The GSO capabilities of the host destination. (reported)
304 * This is as bit map where each bit represents the GSO type with the same
305 * number. */
306 uint32_t fHostGsoCapabilites;
307 /** The destination table spinlock, interrupt safe.
308 * Protects apTaskDstTabs and apIntDstTabs. */
309 RTSPINLOCK hDstTabSpinlock;
310 /** The number of entries in apIntDstTabs. */
311 uint32_t cIntDstTabs;
312 /** The task time destination tables.
313 * @remarks intnetR0NetworkEnsureTabSpace and others ASSUMES this immediately
314 * precedes apIntDstTabs so that these two tables can be used as one
315 * contiguous one. */
316 PINTNETDSTTAB apTaskDstTabs[2];
317 /** The interrupt / disabled-preemption time destination tables.
318 * This is a variable sized array. */
319 PINTNETDSTTAB apIntDstTabs[1];
320} INTNETTRUNKIF;
321/** Pointer to a trunk interface. */
322typedef INTNETTRUNKIF *PINTNETTRUNKIF;
323
324/** Converts a pointer to INTNETTRUNKIF::SwitchPort to a PINTNETTRUNKIF. */
325#define INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort) ((PINTNETTRUNKIF)(pSwitchPort))
326
327
328/**
329 * Internal representation of a network.
330 */
331typedef struct INTNETNETWORK
332{
333 /** The Next network in the chain.
334 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
335 struct INTNETNETWORK *pNext;
336
337 /** The spinlock protecting MacTab, aAddrBlacklist and INTNETIF::aAddrCache.
338 * Interrupt safe. */
339 RTSPINLOCK hAddrSpinlock;
340 /** MAC address table.
341 * This doubles as interface collection. */
342 INTNETMACTAB MacTab;
343
344 /** The network layer address cache. (Indexed by type, 0 entry isn't used.
345 * Contains host addresses. We don't let guests spoof them. */
346 INTNETADDRCACHE aAddrBlacklist[kIntNetAddrType_End];
347
348 /** Wait for an interface to stop being busy so it can be removed or have its
349 * destination table replaced. We have to wait upon this while owning the
350 * network mutex. Will only ever have one waiter because of the big mutex. */
351 RTSEMEVENT hEvtBusyIf;
352 /** Pointer to the instance data. */
353 struct INTNET *pIntNet;
354 /** The SUPR0 object id. */
355 void *pvObj;
356 /** Pointer to the temporary buffer that is used when snooping fragmented packets.
357 * This is allocated after this structure if we're sharing the MAC address with
358 * the host. The buffer is INTNETNETWORK_TMP_SIZE big and aligned on a 64-byte boundary. */
359 uint8_t *pbTmp;
360 /** Network creation flags (INTNET_OPEN_FLAGS_*). */
361 uint32_t fFlags;
362 /** Any restrictive policies required as a minimum by some interface.
363 * (INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES) */
364 uint32_t fMinFlags;
365 /** The number of active interfaces (excluding the trunk). */
366 uint32_t cActiveIFs;
367 /** The length of the network name. */
368 uint8_t cchName;
369 /** The network name. */
370 char szName[INTNET_MAX_NETWORK_NAME];
371 /** The trunk type. */
372 INTNETTRUNKTYPE enmTrunkType;
373 /** The trunk name. */
374 char szTrunk[INTNET_MAX_TRUNK_NAME];
375} INTNETNETWORK;
376/** Pointer to an internal network. */
377typedef INTNETNETWORK *PINTNETNETWORK;
378/** Pointer to a const internal network. */
379typedef const INTNETNETWORK *PCINTNETNETWORK;
380
381/** The size of the buffer INTNETNETWORK::pbTmp points at. */
382#define INTNETNETWORK_TMP_SIZE 2048
383
384
385/**
386 * Internal networking instance.
387 */
388typedef struct INTNET
389{
390 /** Magic number (INTNET_MAGIC). */
391 uint32_t volatile u32Magic;
392 /** Mutex protecting the creation, opening and destruction of both networks and
393 * interfaces. (This means all operations affecting the pNetworks list.) */
394 RTSEMMUTEX hMtxCreateOpenDestroy;
395 /** List of networks. Protected by INTNET::Spinlock. */
396 PINTNETNETWORK volatile pNetworks;
397 /** Handle table for the interfaces. */
398 RTHANDLETABLE hHtIfs;
399} INTNET;
400/** Pointer to an internal network ring-0 instance. */
401typedef struct INTNET *PINTNET;
402
403/** Magic number for the internal network instance data (Hayao Miyazaki). */
404#define INTNET_MAGIC UINT32_C(0x19410105)
405
406
407/*******************************************************************************
408* Global Variables *
409*******************************************************************************/
410/** Pointer to the internal network instance data. */
411static PINTNET volatile g_pIntNet = NULL;
412
413static const struct INTNETOPENNETWORKFLAGS
414{
415 uint32_t fRestrictive; /**< The restrictive flag (deny/disabled). */
416 uint32_t fRelaxed; /**< The relaxed flag (allow/enabled). */
417 uint32_t fFixed; /**< The config-fixed flag. */
418 uint32_t fPair; /**< The pair of restrictive and relaxed flags. */
419}
420/** Open network policy flags relating to the network. */
421g_afIntNetOpenNetworkNetFlags[] =
422{
423 { INTNET_OPEN_FLAGS_ACCESS_RESTRICTED, INTNET_OPEN_FLAGS_ACCESS_PUBLIC, INTNET_OPEN_FLAGS_ACCESS_FIXED, INTNET_OPEN_FLAGS_ACCESS_RESTRICTED | INTNET_OPEN_FLAGS_ACCESS_PUBLIC },
424 { INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS | INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS },
425 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST },
426 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE },
427 { INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED, INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED },
428 { INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE },
429 { INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED, INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED },
430 { INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE },
431},
432/** Open network policy flags relating to the new interface. */
433g_afIntNetOpenNetworkIfFlags[] =
434{
435 { INTNET_OPEN_FLAGS_IF_PROMISC_DENY, INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_DENY | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW },
436 { INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK, INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK },
437};
438
439
440/*******************************************************************************
441* Forward Declarations *
442*******************************************************************************/
443static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork);
444
445
446/**
447 * Checks if a pointer belongs to the list of known networks without
448 * accessing memory it points to.
449 *
450 * @returns true, if such network is in the list.
451 * @param pIntNet The pointer to the internal network instance (global).
452 * @param pNetwork The pointer that must be validated.
453 */
454DECLINLINE(bool) intnetR0NetworkIsValid(PINTNET pIntNet, PINTNETNETWORK pNetwork)
455{
456 for (PINTNETNETWORK pCurr = pIntNet->pNetworks; pCurr; pCurr = pCurr->pNext)
457 if (pCurr == pNetwork)
458 return true;
459 return false;
460}
461
462
463/**
464 * Worker for intnetR0SgWritePart that deals with the case where the
465 * request doesn't fit into the first segment.
466 *
467 * @returns true, unless the request or SG invalid.
468 * @param pSG The SG list to write to.
469 * @param off Where to start writing (offset into the SG).
470 * @param cb How much to write.
471 * @param pvBuf The buffer to containing the bits to write.
472 */
473static bool intnetR0SgWritePartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
474{
475 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
476 return false;
477
478 /*
479 * Skip ahead to the segment where off starts.
480 */
481 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
482 unsigned iSeg = 0;
483 while (off > pSG->aSegs[iSeg].cb)
484 {
485 off -= pSG->aSegs[iSeg++].cb;
486 AssertReturn(iSeg < cSegs, false);
487 }
488
489 /*
490 * Copy the data, hoping that it's all from one segment...
491 */
492 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
493 if (cbCanCopy >= cb)
494 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cb);
495 else
496 {
497 /* copy the portion in the current segment. */
498 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cbCanCopy);
499 cb -= cbCanCopy;
500
501 /* copy the portions in the other segments. */
502 do
503 {
504 pvBuf = (uint8_t const *)pvBuf + cbCanCopy;
505 iSeg++;
506 AssertReturn(iSeg < cSegs, false);
507
508 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
509 memcpy(pSG->aSegs[iSeg].pv, pvBuf, cbCanCopy);
510
511 cb -= cbCanCopy;
512 } while (cb > 0);
513 }
514
515 return true;
516}
517
518
519/**
520 * Writes to a part of an SG.
521 *
522 * @returns true on success, false on failure (out of bounds).
523 * @param pSG The SG list to write to.
524 * @param off Where to start writing (offset into the SG).
525 * @param cb How much to write.
526 * @param pvBuf The buffer to containing the bits to write.
527 */
528DECLINLINE(bool) intnetR0SgWritePart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
529{
530 Assert(off + cb > off);
531
532 /* The optimized case. */
533 if (RT_LIKELY( pSG->cSegsUsed == 1
534 || pSG->aSegs[0].cb >= off + cb))
535 {
536 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
537 memcpy((uint8_t *)pSG->aSegs[0].pv + off, pvBuf, cb);
538 return true;
539 }
540 return intnetR0SgWritePartSlow(pSG, off, cb, pvBuf);
541}
542
543
544/**
545 * Reads a byte from a SG list.
546 *
547 * @returns The byte on success. 0xff on failure.
548 * @param pSG The SG list to read.
549 * @param off The offset (into the SG) off the byte.
550 */
551DECLINLINE(uint8_t) intnetR0SgReadByte(PCINTNETSG pSG, uint32_t off)
552{
553 if (RT_LIKELY(pSG->aSegs[0].cb > off))
554 return ((uint8_t const *)pSG->aSegs[0].pv)[off];
555
556 off -= pSG->aSegs[0].cb;
557 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
558 for (unsigned iSeg = 1; iSeg < cSegs; iSeg++)
559 {
560 if (pSG->aSegs[iSeg].cb > off)
561 return ((uint8_t const *)pSG->aSegs[iSeg].pv)[off];
562 off -= pSG->aSegs[iSeg].cb;
563 }
564 return false;
565}
566
567
568/**
569 * Worker for intnetR0SgReadPart that deals with the case where the
570 * requested data isn't in the first segment.
571 *
572 * @returns true, unless the SG is invalid.
573 * @param pSG The SG list to read.
574 * @param off Where to start reading (offset into the SG).
575 * @param cb How much to read.
576 * @param pvBuf The buffer to read into.
577 */
578static bool intnetR0SgReadPartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
579{
580 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
581 return false;
582
583 /*
584 * Skip ahead to the segment where off starts.
585 */
586 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
587 unsigned iSeg = 0;
588 while (off > pSG->aSegs[iSeg].cb)
589 {
590 off -= pSG->aSegs[iSeg++].cb;
591 AssertReturn(iSeg < cSegs, false);
592 }
593
594 /*
595 * Copy the data, hoping that it's all from one segment...
596 */
597 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
598 if (cbCanCopy >= cb)
599 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cb);
600 else
601 {
602 /* copy the portion in the current segment. */
603 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cbCanCopy);
604 cb -= cbCanCopy;
605
606 /* copy the portions in the other segments. */
607 do
608 {
609 pvBuf = (uint8_t *)pvBuf + cbCanCopy;
610 iSeg++;
611 AssertReturn(iSeg < cSegs, false);
612
613 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
614 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv, cbCanCopy);
615
616 cb -= cbCanCopy;
617 } while (cb > 0);
618 }
619
620 return true;
621}
622
623
624/**
625 * Reads a part of an SG into a buffer.
626 *
627 * @returns true on success, false on failure (out of bounds).
628 * @param pSG The SG list to read.
629 * @param off Where to start reading (offset into the SG).
630 * @param cb How much to read.
631 * @param pvBuf The buffer to read into.
632 */
633DECLINLINE(bool) intnetR0SgReadPart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
634{
635 Assert(off + cb > off);
636
637 /* The optimized case. */
638 if (RT_LIKELY( pSG->cSegsUsed == 1
639 || pSG->aSegs[0].cb >= off + cb))
640 {
641 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
642 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[0].pv + off, cb);
643 return true;
644 }
645 return intnetR0SgReadPartSlow(pSG, off, cb, pvBuf);
646}
647
648
649/**
650 * Wait for a busy counter to reach zero.
651 *
652 * @param pNetwork The network.
653 * @param pcBusy The busy counter.
654 */
655static void intnetR0BusyWait(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
656{
657 if (ASMAtomicReadU32(pcBusy) == 0)
658 return;
659
660 /*
661 * We have to be a bit cautious here so we don't destroy the network or the
662 * semaphore before intnetR0BusyDec has signalled us.
663 */
664
665 /* Reset the semaphore and flip the wakeup bit. */
666 RTSemEventWait(pNetwork->hEvtBusyIf, 0); /* clear it */
667 uint32_t cCurBusy = ASMAtomicReadU32(pcBusy);
668 do
669 {
670 if (cCurBusy == 0)
671 return;
672 AssertMsg(!(cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
673 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
674 } while (!ASMAtomicCmpXchgExU32(pcBusy, cCurBusy | INTNET_BUSY_WAKEUP_MASK, cCurBusy, &cCurBusy));
675
676 /* Wait for the count to reach zero. */
677 do
678 {
679 int rc2 = RTSemEventWait(pNetwork->hEvtBusyIf, 30000); NOREF(rc2);
680 //AssertMsg(RT_SUCCESS(rc2), ("rc=%Rrc *pcBusy=%#x (%#x)\n", rc2, ASMAtomicReadU32(pcBusy), cCurBusy ));
681 cCurBusy = ASMAtomicReadU32(pcBusy);
682 AssertMsg((cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
683 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
684 } while ( cCurBusy != INTNET_BUSY_WAKEUP_MASK
685 || !ASMAtomicCmpXchgU32(pcBusy, 0, INTNET_BUSY_WAKEUP_MASK));
686}
687
688
689/**
690 * Decrements the busy counter and maybe wakes up any threads waiting for it to
691 * reach zero.
692 *
693 * @param pNetwork The network.
694 * @param pcBusy The busy counter.
695 */
696DECLINLINE(void) intnetR0BusyDec(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
697{
698 uint32_t cNewBusy = ASMAtomicDecU32(pcBusy);
699 if (RT_UNLIKELY( cNewBusy == INTNET_BUSY_WAKEUP_MASK
700 && pNetwork))
701 RTSemEventSignal(pNetwork->hEvtBusyIf);
702 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
703}
704
705
706/**
707 * Increments the busy count of the specified interface.
708 *
709 * The caller must own the MAC address table spinlock.
710 *
711 * @param pIf The interface.
712 */
713DECLINLINE(void) intnetR0BusyDecIf(PINTNETIF pIf)
714{
715 intnetR0BusyDec(pIf->pNetwork, &pIf->cBusy);
716}
717
718
719/**
720 * Increments the busy count of the specified interface.
721 *
722 * The caller must own the MAC address table spinlock or an explicity reference.
723 *
724 * @param pTrunk The trunk.
725 */
726DECLINLINE(void) intnetR0BusyDecTrunk(PINTNETTRUNKIF pTrunk)
727{
728 if (pTrunk)
729 intnetR0BusyDec(pTrunk->pNetwork, &pTrunk->cBusy);
730}
731
732
733/**
734 * Increments the busy count of the specified interface.
735 *
736 * The caller must own the MAC address table spinlock or an explicity reference.
737 *
738 * @param pIf The interface.
739 */
740DECLINLINE(void) intnetR0BusyIncIf(PINTNETIF pIf)
741{
742 uint32_t cNewBusy = ASMAtomicIncU32(&pIf->cBusy);
743 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
744 NOREF(cNewBusy);
745}
746
747
748/**
749 * Increments the busy count of the specified interface.
750 *
751 * The caller must own the MAC address table spinlock or an explicity reference.
752 *
753 * @param pTrunk The trunk.
754 */
755DECLINLINE(void) intnetR0BusyIncTrunk(PINTNETTRUNKIF pTrunk)
756{
757 if (!pTrunk) return;
758 uint32_t cNewBusy = ASMAtomicIncU32(&pTrunk->cBusy);
759 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
760 NOREF(cNewBusy);
761}
762
763
764/**
765 * Retain an interface.
766 *
767 * @returns VBox status code, can assume success in most situations.
768 * @param pIf The interface instance.
769 * @param pSession The current session.
770 */
771DECLINLINE(int) intnetR0IfRetain(PINTNETIF pIf, PSUPDRVSESSION pSession)
772{
773 int rc = SUPR0ObjAddRefEx(pIf->pvObj, pSession, true /* fNoBlocking */);
774 AssertRCReturn(rc, rc);
775 return VINF_SUCCESS;
776}
777
778
779/**
780 * Release an interface previously retained by intnetR0IfRetain or
781 * by handle lookup/freeing.
782 *
783 * @returns true if destroyed, false if not.
784 * @param pIf The interface instance.
785 * @param pSession The current session.
786 */
787DECLINLINE(bool) intnetR0IfRelease(PINTNETIF pIf, PSUPDRVSESSION pSession)
788{
789 int rc = SUPR0ObjRelease(pIf->pvObj, pSession);
790 AssertRC(rc);
791 return rc == VINF_OBJECT_DESTROYED;
792}
793
794
795/**
796 * RTHandleCreateEx callback that retains an object in the
797 * handle table before returning it.
798 *
799 * (Avoids racing the freeing of the handle.)
800 *
801 * @returns VBox status code.
802 * @param hHandleTable The handle table (ignored).
803 * @param pvObj The object (INTNETIF).
804 * @param pvCtx The context (SUPDRVSESSION).
805 * @param pvUser The user context (ignored).
806 */
807static DECLCALLBACK(int) intnetR0IfRetainHandle(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
808{
809 NOREF(pvUser);
810 NOREF(hHandleTable);
811 PINTNETIF pIf = (PINTNETIF)pvObj;
812 if (pIf->hIf != INTNET_HANDLE_INVALID) /* Don't try retain it if called from intnetR0IfDestruct. */
813 return intnetR0IfRetain(pIf, (PSUPDRVSESSION)pvCtx);
814 return VINF_SUCCESS;
815}
816
817
818
819/**
820 * Checks if the interface has a usable MAC address or not.
821 *
822 * @returns true if MacAddr is usable, false if not.
823 * @param pIf The interface.
824 */
825DECL_FORCE_INLINE(bool) intnetR0IfHasMacAddr(PINTNETIF pIf)
826{
827 return pIf->fMacSet || !(pIf->MacAddr.au8[0] & 1);
828}
829
830
831/**
832 * Locates the MAC address table entry for the given interface.
833 *
834 * The caller holds the MAC address table spinlock, obviously.
835 *
836 * @returns Pointer to the entry on if found, NULL if not.
837 * @param pNetwork The network.
838 * @param pIf The interface.
839 */
840DECLINLINE(PINTNETMACTABENTRY) intnetR0NetworkFindMacAddrEntry(PINTNETNETWORK pNetwork, PINTNETIF pIf)
841{
842 uint32_t iIf = pNetwork->MacTab.cEntries;
843 while (iIf-- > 0)
844 {
845 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
846 return &pNetwork->MacTab.paEntries[iIf];
847 }
848 return NULL;
849}
850
851
852/**
853 * Checks if the IPv6 address is a good interface address.
854 * @returns true/false.
855 * @param addr The address, network endian.
856 */
857DECLINLINE(bool) intnetR0IPv6AddrIsGood(RTNETADDRIPV6 addr)
858{
859 return !( ( addr.QWords.qw0 == 0 && addr.QWords.qw1 == 0) /* :: */
860 || ( (addr.Words.w0 & RT_H2BE_U16(0xff00)) == RT_H2BE_U16(0xff00)) /* multicast */
861 || ( addr.Words.w0 == 0 && addr.Words.w1 == 0
862 && addr.Words.w2 == 0 && addr.Words.w3 == 0
863 && addr.Words.w4 == 0 && addr.Words.w5 == 0
864 && addr.Words.w6 == 0 && addr.Words.w7 == RT_H2BE_U16(0x0001))); /* ::1 */
865}
866
867
868/**
869 * Checks if the IPv4 address is a broadcast address.
870 * @returns true/false.
871 * @param Addr The address, network endian.
872 */
873DECLINLINE(bool) intnetR0IPv4AddrIsBroadcast(RTNETADDRIPV4 Addr)
874{
875 /* Just check for 255.255.255.255 atm. */
876 return Addr.u == UINT32_MAX;
877}
878
879
880/**
881 * Checks if the IPv4 address is a good interface address.
882 * @returns true/false.
883 * @param Addr The address, network endian.
884 */
885DECLINLINE(bool) intnetR0IPv4AddrIsGood(RTNETADDRIPV4 Addr)
886{
887 /* Usual suspects. */
888 if ( Addr.u == UINT32_MAX /* 255.255.255.255 - broadcast. */
889 || Addr.au8[0] == 0) /* Current network, can be used as source address. */
890 return false;
891
892 /* Unusual suspects. */
893 if (RT_UNLIKELY( Addr.au8[0] == 127 /* Loopback */
894 || (Addr.au8[0] & 0xf0) == 224 /* Multicast */
895 ))
896 return false;
897 return true;
898}
899
900
901/**
902 * Gets the address size of a network layer type.
903 *
904 * @returns size in bytes.
905 * @param enmType The type.
906 */
907DECLINLINE(uint8_t) intnetR0AddrSize(INTNETADDRTYPE enmType)
908{
909 switch (enmType)
910 {
911 case kIntNetAddrType_IPv4: return 4;
912 case kIntNetAddrType_IPv6: return 16;
913 case kIntNetAddrType_IPX: return 4 + 6;
914 default: AssertFailedReturn(0);
915 }
916}
917
918
919/**
920 * Compares two address to see if they are equal, assuming naturally align structures.
921 *
922 * @returns true if equal, false if not.
923 * @param pAddr1 The first address.
924 * @param pAddr2 The second address.
925 * @param cbAddr The address size.
926 */
927DECLINLINE(bool) intnetR0AddrUIsEqualEx(PCRTNETADDRU pAddr1, PCRTNETADDRU pAddr2, uint8_t const cbAddr)
928{
929 switch (cbAddr)
930 {
931 case 4: /* IPv4 */
932 return pAddr1->au32[0] == pAddr2->au32[0];
933 case 16: /* IPv6 */
934 return pAddr1->au64[0] == pAddr2->au64[0]
935 && pAddr1->au64[1] == pAddr2->au64[1];
936 case 10: /* IPX */
937 return pAddr1->au64[0] == pAddr2->au64[0]
938 && pAddr1->au16[4] == pAddr2->au16[4];
939 default:
940 AssertFailedReturn(false);
941 }
942}
943
944
945/**
946 * Worker for intnetR0IfAddrCacheLookup that performs the lookup
947 * in the remaining cache entries after the caller has check the
948 * most likely ones.
949 *
950 * @returns -1 if not found, the index of the cache entry if found.
951 * @param pCache The cache.
952 * @param pAddr The address.
953 * @param cbAddr The address size (optimization).
954 */
955static int intnetR0IfAddrCacheLookupSlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
956{
957 unsigned i = pCache->cEntries - 2;
958 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
959 while (i >= 1)
960 {
961 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
962 return i;
963 pbEntry -= pCache->cbEntry;
964 i--;
965 }
966
967 return -1;
968}
969
970/**
971 * Lookup an address in a cache without any expectations.
972 *
973 * @returns -1 if not found, the index of the cache entry if found.
974 * @param pCache The cache.
975 * @param pAddr The address.
976 * @param cbAddr The address size (optimization).
977 */
978DECLINLINE(int) intnetR0IfAddrCacheLookup(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
979{
980 Assert(pCache->cbAddress == cbAddr);
981
982 /*
983 * The optimized case is when there is one cache entry and
984 * it doesn't match.
985 */
986 unsigned i = pCache->cEntries;
987 if ( i > 0
988 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr))
989 return 0;
990 if (i <= 1)
991 return -1;
992
993 /*
994 * Check the last entry.
995 */
996 i--;
997 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))
998 return i;
999 if (i <= 1)
1000 return -1;
1001
1002 return intnetR0IfAddrCacheLookupSlow(pCache, pAddr, cbAddr);
1003}
1004
1005
1006/** Same as intnetR0IfAddrCacheLookup except we expect the address to be present already. */
1007DECLINLINE(int) intnetR0IfAddrCacheLookupLikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1008{
1009 /** @todo implement this. */
1010 return intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1011}
1012
1013
1014/**
1015 * Worker for intnetR0IfAddrCacheLookupUnlikely that performs
1016 * the lookup in the remaining cache entries after the caller
1017 * has check the most likely ones.
1018 *
1019 * The routine is expecting not to find the address.
1020 *
1021 * @returns -1 if not found, the index of the cache entry if found.
1022 * @param pCache The cache.
1023 * @param pAddr The address.
1024 * @param cbAddr The address size (optimization).
1025 */
1026static int intnetR0IfAddrCacheInCacheUnlikelySlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1027{
1028 /*
1029 * Perform a full table lookup.
1030 */
1031 unsigned i = pCache->cEntries - 2;
1032 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1033 while (i >= 1)
1034 {
1035 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1036 return i;
1037 pbEntry -= pCache->cbEntry;
1038 i--;
1039 }
1040
1041 return -1;
1042}
1043
1044
1045/**
1046 * Lookup an address in a cache expecting not to find it.
1047 *
1048 * @returns -1 if not found, the index of the cache entry if found.
1049 * @param pCache The cache.
1050 * @param pAddr The address.
1051 * @param cbAddr The address size (optimization).
1052 */
1053DECLINLINE(int) intnetR0IfAddrCacheLookupUnlikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1054{
1055 Assert(pCache->cbAddress == cbAddr);
1056
1057 /*
1058 * The optimized case is when there is one cache entry and
1059 * it doesn't match.
1060 */
1061 unsigned i = pCache->cEntries;
1062 if (RT_UNLIKELY( i > 0
1063 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)))
1064 return 0;
1065 if (RT_LIKELY(i <= 1))
1066 return -1;
1067
1068 /*
1069 * Then check the last entry and return if there are just two cache entries.
1070 */
1071 i--;
1072 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr)))
1073 return i;
1074 if (i <= 1)
1075 return -1;
1076
1077 return intnetR0IfAddrCacheInCacheUnlikelySlow(pCache, pAddr, cbAddr);
1078}
1079
1080
1081/**
1082 * Deletes a specific cache entry.
1083 *
1084 * Worker for intnetR0NetworkAddrCacheDelete and intnetR0NetworkAddrCacheDeleteMinusIf.
1085 *
1086 * @param pIf The interface (for logging).
1087 * @param pCache The cache.
1088 * @param iEntry The entry to delete.
1089 * @param pszMsg Log message.
1090 */
1091static void intnetR0IfAddrCacheDeleteIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, int iEntry, const char *pszMsg)
1092{
1093 AssertReturnVoid(iEntry < pCache->cEntries);
1094 AssertReturnVoid(iEntry >= 0);
1095#ifdef LOG_ENABLED
1096 INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
1097 PCRTNETADDRU pAddr = (PCRTNETADDRU)(pCache->pbEntries + iEntry * pCache->cbEntry);
1098 switch (enmAddrType)
1099 {
1100 case kIntNetAddrType_IPv4:
1101 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv4 deleted #%d %RTnaipv4 %s\n",
1102 pIf->hIf, &pIf->MacAddr, iEntry, pAddr->IPv4, pszMsg));
1103 break;
1104 case kIntNetAddrType_IPv6:
1105 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv6 deleted #%d %RTnaipv6 %s\n",
1106 pIf->hIf, &pIf->MacAddr, iEntry, pAddr->IPv6, pszMsg));
1107 break;
1108 default:
1109 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%RX32 MAC=%.6Rhxs type=%d #%d %.*Rhxs %s\n",
1110 pIf->hIf, &pIf->MacAddr, enmAddrType, iEntry, pCache->cbAddress, pAddr, pszMsg));
1111 break;
1112 }
1113#endif
1114
1115 pCache->cEntries--;
1116 if (iEntry < pCache->cEntries)
1117 memmove(pCache->pbEntries + iEntry * pCache->cbEntry,
1118 pCache->pbEntries + (iEntry + 1) * pCache->cbEntry,
1119 (pCache->cEntries - iEntry) * pCache->cbEntry);
1120}
1121
1122
1123/**
1124 * Deletes an address from the cache, assuming it isn't actually in the cache.
1125 *
1126 * May or may not own the spinlock when calling this.
1127 *
1128 * @param pIf The interface (for logging).
1129 * @param pCache The cache.
1130 * @param pAddr The address.
1131 * @param cbAddr The address size (optimization).
1132 */
1133DECLINLINE(void) intnetR0IfAddrCacheDelete(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
1134{
1135 int i = intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1136 if (RT_UNLIKELY(i >= 0))
1137 intnetR0IfAddrCacheDeleteIt(pIf, pCache, i, pszMsg);
1138}
1139
1140
1141/**
1142 * Deletes the address from all the interface caches.
1143 *
1144 * This is used to remove stale entries that has been reassigned to
1145 * other machines on the network.
1146 *
1147 * @param pNetwork The network.
1148 * @param pAddr The address.
1149 * @param enmType The address type.
1150 * @param cbAddr The address size (optimization).
1151 * @param pszMsg Log message.
1152 */
1153DECLINLINE(void) intnetR0NetworkAddrCacheDeleteLocked(PINTNETNETWORK pNetwork,
1154 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType,
1155 uint8_t const cbAddr,
1156 const char *pszMsg)
1157{
1158 uint32_t iIf = pNetwork->MacTab.cEntries;
1159 while (iIf--)
1160 {
1161 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1162
1163 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1164 if (RT_UNLIKELY(i >= 0))
1165 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1166 }
1167}
1168
1169
1170/**
1171 * Deletes the address from all the interface caches.
1172 *
1173 * This is used to remove stale entries that has been reassigned to
1174 * other machines on the network.
1175 *
1176 * @param pNetwork The network.
1177 * @param pAddr The address.
1178 * @param enmType The address type.
1179 * @param cbAddr The address size (optimization).
1180 * @param pszMsg Log message.
1181 */
1182DECLINLINE(void) intnetR0NetworkAddrCacheDelete(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType,
1183 uint8_t const cbAddr, const char *pszMsg)
1184{
1185 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1186
1187 intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, pszMsg);
1188
1189 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1190}
1191
1192
1193/**
1194 * Deletes the address from all the interface caches except the specified one.
1195 *
1196 * This is used to remove stale entries that has been reassigned to
1197 * other machines on the network.
1198 *
1199 * @param pNetwork The network.
1200 * @param pAddr The address.
1201 * @param enmType The address type.
1202 * @param cbAddr The address size (optimization).
1203 * @param pszMsg Log message.
1204 */
1205DECLINLINE(void) intnetR0NetworkAddrCacheDeleteMinusIf(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCRTNETADDRU pAddr,
1206 INTNETADDRTYPE const enmType, uint8_t const cbAddr, const char *pszMsg)
1207{
1208 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1209
1210 uint32_t iIf = pNetwork->MacTab.cEntries;
1211 while (iIf--)
1212 {
1213 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1214 if (pIf != pIfSender)
1215 {
1216 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1217 if (RT_UNLIKELY(i >= 0))
1218 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1219 }
1220 }
1221
1222 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1223}
1224
1225
1226/**
1227 * Lookup an address on the network, returning the (first) interface having it
1228 * in its address cache.
1229 *
1230 * @returns Pointer to the interface on success, NULL if not found. The caller
1231 * must release the interface by calling intnetR0BusyDecIf.
1232 * @param pNetwork The network.
1233 * @param pAddr The address to lookup.
1234 * @param enmType The address type.
1235 * @param cbAddr The size of the address.
1236 */
1237DECLINLINE(PINTNETIF) intnetR0NetworkAddrCacheLookupIf(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType, uint8_t const cbAddr)
1238{
1239 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1240
1241 uint32_t iIf = pNetwork->MacTab.cEntries;
1242 while (iIf--)
1243 {
1244 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1245 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1246 if (i >= 0)
1247 {
1248 intnetR0BusyIncIf(pIf);
1249 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1250 return pIf;
1251 }
1252 }
1253
1254 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1255 return NULL;
1256}
1257
1258
1259/**
1260 * Look up specified address in the network's blacklist.
1261 *
1262 * @param pNetwork The network.
1263 * @param enmType The address type.
1264 * @param pAddr The address.
1265 */
1266static bool intnetR0NetworkBlacklistLookup(PINTNETNETWORK pNetwork,
1267 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1268{
1269 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1270
1271 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1272 return false;
1273
1274 const uint8_t cbAddr = pCache->cbAddress;
1275 Assert(cbAddr == intnetR0AddrSize(enmType));
1276
1277 for (unsigned i = 0; i < pCache->cEntries; ++i)
1278 {
1279 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1280 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1281 return true;
1282 }
1283
1284 return false;
1285}
1286
1287
1288/**
1289 * Deletes specified address from network's blacklist.
1290 *
1291 * @param pNetwork The network.
1292 * @param enmType The address type.
1293 * @param pAddr The address.
1294 */
1295static void intnetR0NetworkBlacklistDelete(PINTNETNETWORK pNetwork,
1296 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1297{
1298 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1299
1300 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1301 return;
1302
1303 const uint8_t cbAddr = pCache->cbAddress;
1304 Assert(cbAddr == intnetR0AddrSize(enmType));
1305
1306 for (unsigned i = 0; i < pCache->cEntries; ++i)
1307 {
1308 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1309 if (!intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1310 continue;
1311
1312 --pCache->cEntries;
1313 memmove(pCache->pbEntries + i * pCache->cbEntry,
1314 pCache->pbEntries + (i + 1) * pCache->cbEntry,
1315 (pCache->cEntries - i) * pCache->cbEntry);
1316 return;
1317 }
1318}
1319
1320
1321/**
1322 * Adds specified address from network's blacklist.
1323 *
1324 * @param pNetwork The network.
1325 * @param enmType The address type.
1326 * @param pAddr The address.
1327 */
1328static void intnetR0NetworkBlacklistAdd(PINTNETNETWORK pNetwork,
1329 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1330{
1331 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1332
1333 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1334 return;
1335
1336 const uint8_t cbAddr = pCache->cbAddress;
1337 Assert(cbAddr == intnetR0AddrSize(enmType));
1338
1339 /* lookup */
1340 for (unsigned i = 0; i < pCache->cEntries; ++i)
1341 {
1342 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1343 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1344 return; /* already exists */
1345 }
1346
1347 if (pCache->cEntries >= pCache->cEntriesAlloc)
1348 {
1349 /* shift */
1350 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry,
1351 pCache->cbEntry * (pCache->cEntries - 1));
1352 --pCache->cEntries;
1353 }
1354
1355 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1356
1357 /* push */
1358 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1359 memcpy(pbEntry, pAddr, cbAddr);
1360 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - cbAddr);
1361 ++pCache->cEntries;
1362
1363 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1364}
1365
1366
1367/**
1368 * Adds an address to the cache, the caller is responsible for making sure it's
1369 * not already in the cache.
1370 *
1371 * The caller must not
1372 *
1373 * @param pIf The interface (for logging).
1374 * @param pCache The address cache.
1375 * @param pAddr The address.
1376 * @param pszMsg log message.
1377 */
1378static void intnetR0IfAddrCacheAddIt(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1379 const char *pszMsg)
1380{
1381 PINTNETNETWORK pNetwork = pIf->pNetwork;
1382 AssertReturnVoid(pNetwork);
1383
1384 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1385
1386 const uint8_t cbAddr = pCache->cbAddress;
1387 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1388
1389 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1390
1391 bool fBlacklisted = intnetR0NetworkBlacklistLookup(pNetwork, pAddr, enmAddrType);
1392 if (fBlacklisted)
1393 {
1394 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1395
1396#ifdef LOG_ENABLED
1397 switch (enmAddrType)
1398 {
1399 case kIntNetAddrType_IPv4:
1400 Log(("%s: spoofing attempt for %RTnaipv4\n",
1401 __FUNCTION__, pAddr->IPv4));
1402 break;
1403 case kIntNetAddrType_IPv6:
1404 Log(("%s: spoofing attempt for %RTnaipv6\n",
1405 __FUNCTION__, &pAddr->IPv6));
1406 break;
1407 default:
1408 Log(("%s: spoofing attempt for %.*Rhxs (type %d)\n",
1409 __FUNCTION__, cbAddr, pAddr, enmAddrType));
1410 break;
1411 }
1412#endif
1413 return;
1414 }
1415
1416 if (RT_UNLIKELY(!pCache->cEntriesAlloc))
1417 {
1418 /* This shouldn't happen*/
1419 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1420 return;
1421 }
1422
1423 /* When the table is full, drop the older entry (FIFO). Do proper ageing? */
1424 if (pCache->cEntries >= pCache->cEntriesAlloc)
1425 {
1426 Log(("intnetR0IfAddrCacheAddIt: type=%d replacing %.*Rhxs\n",
1427 (int)(uintptr_t)(pCache - &pIf->aAddrCache[0]), pCache->cbAddress, pCache->pbEntries));
1428 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry, pCache->cbEntry * (pCache->cEntries - 1));
1429 pCache->cEntries--;
1430 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1431 }
1432
1433 /*
1434 * Add the new entry to the end of the array.
1435 */
1436 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1437 memcpy(pbEntry, pAddr, pCache->cbAddress);
1438 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - pCache->cbAddress);
1439
1440#ifdef LOG_ENABLED
1441 switch (enmAddrType)
1442 {
1443 case kIntNetAddrType_IPv4:
1444 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv4 added #%d %RTnaipv4 %s\n",
1445 pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->IPv4, pszMsg));
1446 break;
1447 case kIntNetAddrType_IPv6:
1448 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv6 added #%d %RTnaipv6 %s\n",
1449 pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->IPv6, pszMsg));
1450 break;
1451 default:
1452 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs type=%d added #%d %.*Rhxs %s\n",
1453 pIf->hIf, &pIf->MacAddr, enmAddrType, pCache->cEntries, pCache->cbAddress, pAddr, pszMsg));
1454 break;
1455 }
1456#endif
1457 pCache->cEntries++;
1458 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1459
1460 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1461}
1462
1463
1464/**
1465 * A intnetR0IfAddrCacheAdd worker that performs the rest of the lookup.
1466 *
1467 * @param pIf The interface (for logging).
1468 * @param pCache The address cache.
1469 * @param pAddr The address.
1470 * @param cbAddr The size of the address (optimization).
1471 * @param pszMsg Log message.
1472 */
1473static void intnetR0IfAddrCacheAddSlow(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1474 const char *pszMsg)
1475{
1476 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1477
1478 const uint8_t cbAddr = pCache->cbAddress;
1479 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1480
1481 /*
1482 * Check all but the first and last entries, the caller
1483 * has already checked those.
1484 */
1485 int i = pCache->cEntries - 2;
1486 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry;
1487 while (i >= 1)
1488 {
1489 if (RT_LIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1490 return;
1491 pbEntry += pCache->cbEntry;
1492 i--;
1493 }
1494
1495 /*
1496 * Not found, add it.
1497 */
1498 intnetR0IfAddrCacheAddIt(pIf, enmAddrType, pAddr, pszMsg);
1499}
1500
1501
1502/**
1503 * Adds an address to the cache if it's not already there.
1504 *
1505 * Must not own any spinlocks when calling this function.
1506 *
1507 * @param pIf The interface (for logging).
1508 * @param pCache The address cache.
1509 * @param pAddr The address.
1510 * @param cbAddr The size of the address (optimization).
1511 * @param pszMsg Log message.
1512 */
1513DECLINLINE(void) intnetR0IfAddrCacheAdd(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1514 const char *pszMsg)
1515{
1516 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1517
1518 const uint8_t cbAddr = pCache->cbAddress;
1519 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1520
1521 /*
1522 * The optimized case is when the address the first or last cache entry.
1523 */
1524 unsigned i = pCache->cEntries;
1525 if (RT_LIKELY( i > 0
1526 && ( intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)
1527 || (i > 1
1528 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))) ))
1529 return;
1530
1531 intnetR0IfAddrCacheAddSlow(pIf, enmAddrType, pAddr, pszMsg);
1532}
1533
1534
1535/**
1536 * Destroys the specified address cache.
1537 * @param pCache The address cache.
1538 */
1539static void intnetR0IfAddrCacheDestroy(PINTNETADDRCACHE pCache)
1540{
1541 void *pvFree = pCache->pbEntries;
1542 pCache->pbEntries = NULL;
1543 pCache->cEntries = 0;
1544 pCache->cEntriesAlloc = 0;
1545 RTMemFree(pvFree);
1546}
1547
1548
1549/**
1550 * Initialize the address cache for the specified address type.
1551 *
1552 * The cache storage is preallocated and fixed size so that we can handle
1553 * inserts from problematic contexts.
1554 *
1555 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1556 * @param pCache The cache to initialize.
1557 * @param enmAddrType The address type.
1558 * @param fEnabled Whether the address cache is enabled or not.
1559 */
1560static int intnetR0IfAddrCacheInit(PINTNETADDRCACHE pCache, INTNETADDRTYPE enmAddrType, bool fEnabled)
1561{
1562 pCache->cEntries = 0;
1563 pCache->cbAddress = intnetR0AddrSize(enmAddrType);
1564 pCache->cbEntry = RT_ALIGN(pCache->cbAddress, 4);
1565 if (fEnabled)
1566 {
1567 pCache->cEntriesAlloc = 32;
1568 pCache->pbEntries = (uint8_t *)RTMemAllocZ(pCache->cEntriesAlloc * pCache->cbEntry);
1569 if (!pCache->pbEntries)
1570 return VERR_NO_MEMORY;
1571 }
1572 else
1573 {
1574 pCache->cEntriesAlloc = 0;
1575 pCache->pbEntries = NULL;
1576 }
1577 return VINF_SUCCESS;
1578}
1579
1580
1581/**
1582 * Is it a multicast or broadcast MAC address?
1583 *
1584 * @returns true if multicast, false if not.
1585 * @param pMacAddr The address to inspect.
1586 */
1587DECL_FORCE_INLINE(bool) intnetR0IsMacAddrMulticast(PCRTMAC pMacAddr)
1588{
1589 return !!(pMacAddr->au8[0] & 0x01);
1590}
1591
1592
1593/**
1594 * Is it a dummy MAC address?
1595 *
1596 * We use dummy MAC addresses for interfaces which we don't know the MAC
1597 * address of because they haven't sent anything (learning) or explicitly set
1598 * it.
1599 *
1600 * @returns true if dummy, false if not.
1601 * @param pMacAddr The address to inspect.
1602 */
1603DECL_FORCE_INLINE(bool) intnetR0IsMacAddrDummy(PCRTMAC pMacAddr)
1604{
1605 /* The dummy address are broadcast addresses, don't bother check it all. */
1606 return pMacAddr->au16[0] == 0xffff;
1607}
1608
1609
1610/**
1611 * Compares two MAC addresses.
1612 *
1613 * @returns true if equal, false if not.
1614 * @param pDstAddr1 Address 1.
1615 * @param pDstAddr2 Address 2.
1616 */
1617DECL_FORCE_INLINE(bool) intnetR0AreMacAddrsEqual(PCRTMAC pDstAddr1, PCRTMAC pDstAddr2)
1618{
1619 return pDstAddr1->au16[2] == pDstAddr2->au16[2]
1620 && pDstAddr1->au16[1] == pDstAddr2->au16[1]
1621 && pDstAddr1->au16[0] == pDstAddr2->au16[0];
1622}
1623
1624
1625/**
1626 * Switch a unicast frame based on the network layer address (OSI level 3) and
1627 * return a destination table.
1628 *
1629 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1630 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1631 * @param pNetwork The network to switch on.
1632 * @param pDstMacAddr The destination MAC address.
1633 * @param enmL3AddrType The level-3 destination address type.
1634 * @param pL3Addr The level-3 destination address.
1635 * @param cbL3Addr The size of the level-3 destination address.
1636 * @param fSrc The frame source (INTNETTRUNKDIR_WIRE).
1637 * @param pDstTab The destination output table.
1638 */
1639static INTNETSWDECISION intnetR0NetworkSwitchLevel3(PINTNETNETWORK pNetwork, PCRTMAC pDstMacAddr,
1640 INTNETADDRTYPE enmL3AddrType, PCRTNETADDRU pL3Addr, uint8_t cbL3Addr,
1641 uint32_t fSrc, PINTNETDSTTAB pDstTab)
1642{
1643 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1644
1645 /*
1646 * Grab the spinlock first and do the switching.
1647 */
1648 PINTNETMACTAB pTab = &pNetwork->MacTab;
1649 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1650
1651 pDstTab->fTrunkDst = 0;
1652 pDstTab->pTrunk = 0;
1653 pDstTab->cIfs = 0;
1654
1655 /* Find exactly matching or promiscuous interfaces. */
1656 uint32_t cExactHits = 0;
1657 uint32_t iIfMac = pTab->cEntries;
1658 while (iIfMac-- > 0)
1659 {
1660 if (pTab->paEntries[iIfMac].fActive)
1661 {
1662 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1663 bool fExact = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) >= 0;
1664 if (fExact || pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1665 {
1666 cExactHits += fExact;
1667
1668 uint32_t iIfDst = pDstTab->cIfs++;
1669 pDstTab->aIfs[iIfDst].pIf = pIf;
1670 pDstTab->aIfs[iIfDst].fReplaceDstMac = fExact;
1671 intnetR0BusyIncIf(pIf);
1672
1673 if (fExact)
1674 pDstMacAddr = &pIf->MacAddr; /* Avoids duplicates being sent to the host. */
1675 }
1676 }
1677 }
1678
1679 /* Network only promicuous mode ifs should see related trunk traffic. */
1680 if ( cExactHits
1681 && fSrc
1682 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1683 {
1684 iIfMac = pTab->cEntries;
1685 while (iIfMac-- > 0)
1686 {
1687 if ( pTab->paEntries[iIfMac].fActive
1688 && pTab->paEntries[iIfMac].fPromiscuousEff
1689 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1690 {
1691 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1692 if (intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) < 0)
1693 {
1694 uint32_t iIfDst = pDstTab->cIfs++;
1695 pDstTab->aIfs[iIfDst].pIf = pIf;
1696 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1697 intnetR0BusyIncIf(pIf);
1698 }
1699 }
1700 }
1701 }
1702
1703 /* Does it match the host, or is the host promiscuous? */
1704 if (pTab->fHostActive)
1705 {
1706 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstMacAddr);
1707 if ( fExact
1708 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1709 || pTab->fHostPromiscuousEff)
1710 {
1711 cExactHits += fExact;
1712 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1713 }
1714 }
1715
1716 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1717 if (pTab->fWireActive && (!cExactHits || pTab->fWirePromiscuousEff))
1718 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1719 pDstTab->fTrunkDst &= ~fSrc;
1720 if (pDstTab->fTrunkDst)
1721 {
1722 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1723 pDstTab->pTrunk = pTrunk;
1724 intnetR0BusyIncTrunk(pTrunk);
1725 }
1726
1727 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1728 return pDstTab->cIfs
1729 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1730 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1731}
1732
1733
1734/**
1735 * Pre-switch a unicast MAC address.
1736 *
1737 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1738 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1739 * @param pNetwork The network to switch on.
1740 * @param fSrc The frame source.
1741 * @param pSrcAddr The source address of the frame.
1742 * @param pDstAddr The destination address of the frame.
1743 */
1744static INTNETSWDECISION intnetR0NetworkPreSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PCRTMAC pSrcAddr,
1745 PCRTMAC pDstAddr)
1746{
1747 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1748 Assert(fSrc);
1749
1750 /*
1751 * Grab the spinlock first and do the switching.
1752 */
1753 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
1754 PINTNETMACTAB pTab = &pNetwork->MacTab;
1755 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1756
1757 /* Iterate the internal network interfaces and look for matching source and
1758 destination addresses. */
1759 uint32_t iIfMac = pTab->cEntries;
1760 while (iIfMac-- > 0)
1761 {
1762 if (pTab->paEntries[iIfMac].fActive)
1763 {
1764 /* Unknown interface address? */
1765 if (intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr))
1766 break;
1767
1768 /* Paranoia - this shouldn't happen, right? */
1769 if ( pSrcAddr
1770 && intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pSrcAddr))
1771 break;
1772
1773 /* Exact match? */
1774 if (intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr))
1775 {
1776 enmSwDecision = pTab->fHostPromiscuousEff && fSrc == INTNETTRUNKDIR_WIRE
1777 ? INTNETSWDECISION_BROADCAST
1778 : INTNETSWDECISION_INTNET;
1779 break;
1780 }
1781 }
1782 }
1783
1784 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1785 return enmSwDecision;
1786}
1787
1788
1789/**
1790 * Switch a unicast MAC address and return a destination table.
1791 *
1792 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1793 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1794 * @param pNetwork The network to switch on.
1795 * @param fSrc The frame source.
1796 * @param pIfSender The sender interface, NULL if trunk. Used to
1797 * prevent sending an echo to the sender.
1798 * @param pDstAddr The destination address of the frame.
1799 * @param pDstTab The destination output table.
1800 */
1801static INTNETSWDECISION intnetR0NetworkSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1802 PCRTMAC pDstAddr, PINTNETDSTTAB pDstTab)
1803{
1804 AssertPtr(pDstTab);
1805 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1806
1807 /*
1808 * Grab the spinlock first and do the switching.
1809 */
1810 PINTNETMACTAB pTab = &pNetwork->MacTab;
1811 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1812
1813 pDstTab->fTrunkDst = 0;
1814 pDstTab->pTrunk = 0;
1815 pDstTab->cIfs = 0;
1816
1817 /* Find exactly matching or promiscuous interfaces. */
1818 uint32_t cExactHits = 0;
1819 uint32_t iIfMac = pTab->cEntries;
1820 while (iIfMac-- > 0)
1821 {
1822 if (pTab->paEntries[iIfMac].fActive)
1823 {
1824 bool fExact = intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr);
1825 if ( fExact
1826 || intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr)
1827 || ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1828 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
1829 )
1830 {
1831 cExactHits += fExact;
1832
1833 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1834 if (RT_LIKELY(pIf != pIfSender)) /* paranoia */
1835 {
1836 uint32_t iIfDst = pDstTab->cIfs++;
1837 pDstTab->aIfs[iIfDst].pIf = pIf;
1838 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1839 intnetR0BusyIncIf(pIf);
1840 }
1841 }
1842 }
1843 }
1844
1845 /* Network only promicuous mode ifs should see related trunk traffic. */
1846 if ( cExactHits
1847 && fSrc
1848 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1849 {
1850 iIfMac = pTab->cEntries;
1851 while (iIfMac-- > 0)
1852 {
1853 if ( pTab->paEntries[iIfMac].fPromiscuousEff
1854 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1855 && pTab->paEntries[iIfMac].fActive
1856 && !intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr)
1857 && !intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr) )
1858 {
1859 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1860 uint32_t iIfDst = pDstTab->cIfs++;
1861 pDstTab->aIfs[iIfDst].pIf = pIf;
1862 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1863 intnetR0BusyIncIf(pIf);
1864 }
1865 }
1866 }
1867
1868 /* Does it match the host, or is the host promiscuous? */
1869 if ( fSrc != INTNETTRUNKDIR_HOST
1870 && pTab->fHostActive)
1871 {
1872 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstAddr);
1873 if ( fExact
1874 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1875 || pTab->fHostPromiscuousEff)
1876 {
1877 cExactHits += fExact;
1878 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1879 }
1880 }
1881
1882 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1883 if ( fSrc != INTNETTRUNKDIR_WIRE
1884 && pTab->fWireActive
1885 && (!cExactHits || pTab->fWirePromiscuousEff)
1886 )
1887 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1888
1889 /* Grab the trunk if we're sending to it. */
1890 if (pDstTab->fTrunkDst)
1891 {
1892 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1893 pDstTab->pTrunk = pTrunk;
1894 intnetR0BusyIncTrunk(pTrunk);
1895 }
1896
1897 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1898 return pDstTab->cIfs
1899 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1900 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1901}
1902
1903
1904/**
1905 * Create a destination table for a broadcast frame.
1906 *
1907 * @returns INTNETSWDECISION_BROADCAST.
1908 * @param pNetwork The network to switch on.
1909 * @param fSrc The frame source.
1910 * @param pIfSender The sender interface, NULL if trunk. Used to
1911 * prevent sending an echo to the sender.
1912 * @param pDstTab The destination output table.
1913 */
1914static INTNETSWDECISION intnetR0NetworkSwitchBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1915 PINTNETDSTTAB pDstTab)
1916{
1917 AssertPtr(pDstTab);
1918
1919 /*
1920 * Grab the spinlock first and record all active interfaces.
1921 */
1922 PINTNETMACTAB pTab = &pNetwork->MacTab;
1923 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1924
1925 pDstTab->fTrunkDst = 0;
1926 pDstTab->pTrunk = 0;
1927 pDstTab->cIfs = 0;
1928
1929 /* Regular interfaces. */
1930 uint32_t iIfMac = pTab->cEntries;
1931 while (iIfMac-- > 0)
1932 {
1933 if (pTab->paEntries[iIfMac].fActive)
1934 {
1935 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1936 if (pIf != pIfSender)
1937 {
1938 uint32_t iIfDst = pDstTab->cIfs++;
1939 pDstTab->aIfs[iIfDst].pIf = pIf;
1940 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1941 intnetR0BusyIncIf(pIf);
1942 }
1943 }
1944 }
1945
1946 /* The trunk interface. */
1947 if (pTab->fHostActive)
1948 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1949 if (pTab->fWireActive)
1950 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1951 pDstTab->fTrunkDst &= ~fSrc;
1952 if (pDstTab->fTrunkDst)
1953 {
1954 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1955 pDstTab->pTrunk = pTrunk;
1956 intnetR0BusyIncTrunk(pTrunk);
1957 }
1958
1959 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1960 return INTNETSWDECISION_BROADCAST;
1961}
1962
1963
1964/**
1965 * Create a destination table with the trunk and any promiscuous interfaces.
1966 *
1967 * This is only used in a fallback case of the level-3 switching, so we can
1968 * assume the wire as source and skip the sender interface filtering.
1969 *
1970 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1971 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1972 * @param pNetwork The network to switch on.
1973 * @param fSrc The frame source.
1974 * @param pDstTab The destination output table.
1975 */
1976static INTNETSWDECISION intnetR0NetworkSwitchTrunkAndPromisc(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
1977{
1978 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1979
1980 /*
1981 * Grab the spinlock first and do the switching.
1982 */
1983 PINTNETMACTAB pTab = &pNetwork->MacTab;
1984 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1985
1986 pDstTab->fTrunkDst = 0;
1987 pDstTab->pTrunk = 0;
1988 pDstTab->cIfs = 0;
1989
1990 /* Find promiscuous interfaces. */
1991 uint32_t iIfMac = pTab->cEntries;
1992 while (iIfMac-- > 0)
1993 {
1994 if ( pTab->paEntries[iIfMac].fActive
1995 && ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1996 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
1997 )
1998 {
1999 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
2000 uint32_t iIfDst = pDstTab->cIfs++;
2001 pDstTab->aIfs[iIfDst].pIf = pIf;
2002 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
2003 intnetR0BusyIncIf(pIf);
2004 }
2005 }
2006
2007 /* The trunk interface. */
2008 if (pTab->fHostActive)
2009 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2010 if (pTab->fWireActive)
2011 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2012 pDstTab->fTrunkDst &= ~fSrc;
2013 if (pDstTab->fTrunkDst)
2014 {
2015 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2016 pDstTab->pTrunk = pTrunk;
2017 intnetR0BusyIncTrunk(pTrunk);
2018 }
2019
2020 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2021 return !pDstTab->cIfs
2022 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK)
2023 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST);
2024}
2025
2026
2027/**
2028 * Create a destination table for a trunk frame.
2029 *
2030 * @returns INTNETSWDECISION_BROADCAST.
2031 * @param pNetwork The network to switch on.
2032 * @param fSrc The frame source.
2033 * @param pDstTab The destination output table.
2034 */
2035static INTNETSWDECISION intnetR0NetworkSwitchTrunk(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
2036{
2037 AssertPtr(pDstTab);
2038
2039 /*
2040 * Grab the spinlock first and record all active interfaces.
2041 */
2042 PINTNETMACTAB pTab= &pNetwork->MacTab;
2043 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2044
2045 pDstTab->fTrunkDst = 0;
2046 pDstTab->pTrunk = 0;
2047 pDstTab->cIfs = 0;
2048
2049 /* The trunk interface. */
2050 if (pTab->fHostActive)
2051 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2052 if (pTab->fWireActive)
2053 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2054 pDstTab->fTrunkDst &= ~fSrc;
2055 if (pDstTab->fTrunkDst)
2056 {
2057 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2058 pDstTab->pTrunk = pTrunk;
2059 intnetR0BusyIncTrunk(pTrunk);
2060 }
2061
2062 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2063 return pDstTab->fTrunkDst ? INTNETSWDECISION_TRUNK : INTNETSWDECISION_DROP;
2064}
2065
2066
2067/**
2068 * Wrapper around RTMemAlloc for allocating a destination table.
2069 *
2070 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
2071 * @param cEntries The size given as an entry count.
2072 * @param ppDstTab Where to store the pointer (always).
2073 */
2074DECLINLINE(int) intnetR0AllocDstTab(uint32_t cEntries, PINTNETDSTTAB *ppDstTab)
2075{
2076 PINTNETDSTTAB pDstTab;
2077 *ppDstTab = pDstTab = (PINTNETDSTTAB)RTMemAlloc(RT_OFFSETOF(INTNETDSTTAB, aIfs[cEntries]));
2078 if (RT_UNLIKELY(!pDstTab))
2079 return VERR_NO_MEMORY;
2080 return VINF_SUCCESS;
2081}
2082
2083
2084/**
2085 * Ensures that there is space for another interface in the MAC address lookup
2086 * table as well as all the destination tables.
2087 *
2088 * The caller must own the create/open/destroy mutex.
2089 *
2090 * @returns VINF_SUCCESS, VERR_NO_MEMORY or VERR_OUT_OF_RANGE.
2091 * @param pNetwork The network to operate on.
2092 */
2093static int intnetR0NetworkEnsureTabSpace(PINTNETNETWORK pNetwork)
2094{
2095 /*
2096 * The cEntries and cEntriesAllocated members are only updated while
2097 * owning the big mutex, so we only need the spinlock when doing the
2098 * actual table replacing.
2099 */
2100 PINTNETMACTAB pTab = &pNetwork->MacTab;
2101 int rc = VINF_SUCCESS;
2102 AssertReturn(pTab->cEntries <= pTab->cEntriesAllocated, VERR_INTERNAL_ERROR_2);
2103 if (pTab->cEntries + 1 > pTab->cEntriesAllocated)
2104 {
2105 uint32_t const cAllocated = pTab->cEntriesAllocated + INTNET_GROW_DSTTAB_SIZE;
2106 if (cAllocated <= INTNET_MAX_IFS)
2107 {
2108 /*
2109 * Resize the destination tables first, this can be kind of tedious.
2110 */
2111 for (uint32_t i = 0; i < pTab->cEntries; i++)
2112 {
2113 PINTNETIF pIf = pTab->paEntries[i].pIf; AssertPtr(pIf);
2114 PINTNETDSTTAB pNew;
2115 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2116 if (RT_FAILURE(rc))
2117 break;
2118
2119 for (;;)
2120 {
2121 PINTNETDSTTAB pOld = pIf->pDstTab;
2122 if ( pOld
2123 && ASMAtomicCmpXchgPtr(&pIf->pDstTab, pNew, pOld))
2124 {
2125 RTMemFree(pOld);
2126 break;
2127 }
2128 intnetR0BusyWait(pNetwork, &pIf->cBusy);
2129 }
2130 }
2131
2132 /*
2133 * The trunk.
2134 */
2135 if ( RT_SUCCESS(rc)
2136 && pNetwork->MacTab.pTrunk)
2137 {
2138 AssertCompileAdjacentMembers(INTNETTRUNKIF, apTaskDstTabs, apIntDstTabs);
2139 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
2140 PINTNETDSTTAB * const ppEndDstTab = &pTrunk->apIntDstTabs[pTrunk->cIntDstTabs];
2141 for (PINTNETDSTTAB *ppDstTab = &pTrunk->apTaskDstTabs[0];
2142 ppDstTab != ppEndDstTab && RT_SUCCESS(rc);
2143 ppDstTab++)
2144 {
2145 PINTNETDSTTAB pNew;
2146 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2147 if (RT_FAILURE(rc))
2148 break;
2149
2150 for (;;)
2151 {
2152 RTSpinlockAcquire(pTrunk->hDstTabSpinlock);
2153 void *pvOld = *ppDstTab;
2154 if (pvOld)
2155 *ppDstTab = pNew;
2156 RTSpinlockRelease(pTrunk->hDstTabSpinlock);
2157 if (pvOld)
2158 {
2159 RTMemFree(pvOld);
2160 break;
2161 }
2162 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
2163 }
2164 }
2165 }
2166
2167 /*
2168 * The MAC Address table itself.
2169 */
2170 if (RT_SUCCESS(rc))
2171 {
2172 PINTNETMACTABENTRY paNew = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * cAllocated);
2173 if (paNew)
2174 {
2175 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2176
2177 PINTNETMACTABENTRY paOld = pTab->paEntries;
2178 uint32_t i = pTab->cEntries;
2179 while (i-- > 0)
2180 {
2181 paNew[i] = paOld[i];
2182
2183 paOld[i].fActive = false;
2184 paOld[i].pIf = NULL;
2185 }
2186
2187 pTab->paEntries = paNew;
2188 pTab->cEntriesAllocated = cAllocated;
2189
2190 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2191
2192 RTMemFree(paOld);
2193 }
2194 else
2195 rc = VERR_NO_MEMORY;
2196 }
2197 }
2198 else
2199 rc = VERR_OUT_OF_RANGE;
2200 }
2201 return rc;
2202}
2203
2204
2205
2206
2207#ifdef INTNET_WITH_DHCP_SNOOPING
2208
2209/**
2210 * Snoops IP assignments and releases from the DHCPv4 traffic.
2211 *
2212 * The caller is responsible for making sure this traffic between the
2213 * BOOTPS and BOOTPC ports and validate the IP header. The UDP packet
2214 * need not be validated beyond the ports.
2215 *
2216 * @param pNetwork The network this frame was seen on.
2217 * @param pIpHdr Pointer to a valid IP header. This is for pseudo
2218 * header validation, so only the minimum header size
2219 * needs to be available and valid here.
2220 * @param pUdpHdr Pointer to the UDP header in the frame.
2221 * @param cbUdpPkt What's left of the frame when starting at the UDP header.
2222 * @param fGso Set if this is a GSO frame, clear if regular.
2223 */
2224static void intnetR0NetworkSnoopDhcp(PINTNETNETWORK pNetwork, PCRTNETIPV4 pIpHdr, PCRTNETUDP pUdpHdr, uint32_t cbUdpPkt)
2225{
2226 /*
2227 * Check if the DHCP message is valid and get the type.
2228 */
2229 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
2230 {
2231 Log6(("Bad UDP packet\n"));
2232 return;
2233 }
2234 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
2235 uint8_t MsgType;
2236 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &MsgType))
2237 {
2238 Log6(("Bad DHCP packet\n"));
2239 return;
2240 }
2241
2242#ifdef LOG_ENABLED
2243 /*
2244 * Log it.
2245 */
2246 const char *pszType = "unknown";
2247 switch (MsgType)
2248 {
2249 case RTNET_DHCP_MT_DISCOVER: pszType = "discover"; break;
2250 case RTNET_DHCP_MT_OFFER: pszType = "offer"; break;
2251 case RTNET_DHCP_MT_REQUEST: pszType = "request"; break;
2252 case RTNET_DHCP_MT_DECLINE: pszType = "decline"; break;
2253 case RTNET_DHCP_MT_ACK: pszType = "ack"; break;
2254 case RTNET_DHCP_MT_NAC: pszType = "nac"; break;
2255 case RTNET_DHCP_MT_RELEASE: pszType = "release"; break;
2256 case RTNET_DHCP_MT_INFORM: pszType = "inform"; break;
2257 }
2258 Log6(("DHCP msg: %d (%s) client %.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d\n", MsgType, pszType, &pDhcp->bp_chaddr,
2259 pDhcp->bp_ciaddr.au8[0], pDhcp->bp_ciaddr.au8[1], pDhcp->bp_ciaddr.au8[2], pDhcp->bp_ciaddr.au8[3],
2260 pDhcp->bp_yiaddr.au8[0], pDhcp->bp_yiaddr.au8[1], pDhcp->bp_yiaddr.au8[2], pDhcp->bp_yiaddr.au8[3]));
2261#endif /* LOG_EANBLED */
2262
2263 /*
2264 * Act upon the message.
2265 */
2266 switch (MsgType)
2267 {
2268#if 0
2269 case RTNET_DHCP_MT_REQUEST:
2270 /** @todo Check for valid non-broadcast requests w/ IP for any of the MACs we
2271 * know, and add the IP to the cache. */
2272 break;
2273#endif
2274
2275
2276 /*
2277 * Lookup the interface by its MAC address and insert the IPv4 address into the cache.
2278 * Delete the old client address first, just in case it changed in a renewal.
2279 */
2280 case RTNET_DHCP_MT_ACK:
2281 if (intnetR0IPv4AddrIsGood(pDhcp->bp_yiaddr))
2282 {
2283 PINTNETIF pMatchingIf = NULL;
2284 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2285
2286 uint32_t iIf = pNetwork->MacTab.cEntries;
2287 while (iIf-- > 0)
2288 {
2289 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2290 if ( intnetR0IfHasMacAddr(pCur)
2291 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2292 {
2293 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2294 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
2295 if (!pMatchingIf)
2296 {
2297 pMatchingIf = pCur;
2298 intnetR0BusyIncIf(pMatchingIf);
2299 }
2300 }
2301 }
2302
2303 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2304
2305 if (pMatchingIf)
2306 {
2307 intnetR0IfAddrCacheAdd(pMatchingIf, kIntNetAddrType_IPv4,
2308 (PCRTNETADDRU)&pDhcp->bp_yiaddr, "DHCP_MT_ACK");
2309 intnetR0BusyDecIf(pMatchingIf);
2310 }
2311 }
2312 return;
2313
2314
2315 /*
2316 * Lookup the interface by its MAC address and remove the IPv4 address(es) from the cache.
2317 */
2318 case RTNET_DHCP_MT_RELEASE:
2319 {
2320 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2321
2322 uint32_t iIf = pNetwork->MacTab.cEntries;
2323 while (iIf-- > 0)
2324 {
2325 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2326 if ( intnetR0IfHasMacAddr(pCur)
2327 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2328 {
2329 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2330 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2331 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2332 (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2333 }
2334 }
2335
2336 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2337 break;
2338 }
2339 }
2340
2341}
2342
2343
2344/**
2345 * Worker for intnetR0TrunkIfSnoopAddr that takes care of what
2346 * is likely to be a DHCP message.
2347 *
2348 * The caller has already check that the UDP source and destination ports
2349 * are BOOTPS or BOOTPC.
2350 *
2351 * @param pNetwork The network this frame was seen on.
2352 * @param pSG The gather list for the frame.
2353 */
2354static void intnetR0TrunkIfSnoopDhcp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2355{
2356 /*
2357 * Get a pointer to a linear copy of the full packet, using the
2358 * temporary buffer if necessary.
2359 */
2360 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
2361 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
2362 if (pSG->cSegsUsed > 1)
2363 {
2364 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
2365 Log6(("intnetR0TrunkIfSnoopDhcp: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
2366 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2367 return;
2368 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
2369 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
2370 }
2371
2372 /*
2373 * Validate the IP header and find the UDP packet.
2374 */
2375 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fChecksum*/))
2376 {
2377 Log(("intnetR0TrunkIfSnoopDhcp: bad ip header\n"));
2378 return;
2379 }
2380 uint32_t cbIpHdr = pIpHdr->ip_hl * 4;
2381
2382 /*
2383 * Hand it over to the common DHCP snooper.
2384 */
2385 intnetR0NetworkSnoopDhcp(pNetwork, pIpHdr, (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr), cbPacket - cbIpHdr);
2386}
2387
2388#endif /* INTNET_WITH_DHCP_SNOOPING */
2389
2390
2391/**
2392 * Snoops up source addresses from ARP requests and purge these from the address
2393 * caches.
2394 *
2395 * The purpose of this purging is to get rid of stale addresses.
2396 *
2397 * @param pNetwork The network this frame was seen on.
2398 * @param pSG The gather list for the frame.
2399 */
2400static void intnetR0TrunkIfSnoopArp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2401{
2402 /*
2403 * Check the minimum size first.
2404 */
2405 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
2406 return;
2407
2408 /*
2409 * Copy to temporary buffer if necessary.
2410 */
2411 uint32_t cbPacket = RT_MIN(pSG->cbTotal, sizeof(RTNETARPIPV4));
2412 PCRTNETARPIPV4 pArpIPv4 = (PCRTNETARPIPV4)((uintptr_t)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2413 if ( pSG->cSegsUsed != 1
2414 && pSG->aSegs[0].cb < cbPacket)
2415 {
2416 if ( (pSG->fFlags & (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP))
2417 != (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP)
2418 && !intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2419 return;
2420 pArpIPv4 = (PCRTNETARPIPV4)pNetwork->pbTmp;
2421 }
2422
2423 /*
2424 * Ignore packets which doesn't interest us or we perceive as malformed.
2425 */
2426 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2427 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2428 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2429 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2430 return;
2431 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2432 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2433 && ar_oper != RTNET_ARPOP_REPLY))
2434 {
2435 Log6(("ts-ar: op=%#x\n", ar_oper));
2436 return;
2437 }
2438
2439 /*
2440 * Delete the source address if it's OK.
2441 */
2442 if ( !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_sha)
2443 && ( pArpIPv4->ar_sha.au16[0]
2444 || pArpIPv4->ar_sha.au16[1]
2445 || pArpIPv4->ar_sha.au16[2])
2446 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2447 {
2448 Log6(("ts-ar: %d.%d.%d.%d / %.6Rhxs\n", pArpIPv4->ar_spa.au8[0], pArpIPv4->ar_spa.au8[1],
2449 pArpIPv4->ar_spa.au8[2], pArpIPv4->ar_spa.au8[3], &pArpIPv4->ar_sha));
2450 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_spa,
2451 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_spa), "tif/arp");
2452 }
2453}
2454
2455
2456#ifdef INTNET_WITH_DHCP_SNOOPING
2457/**
2458 * Snoop up addresses from ARP and DHCP traffic from frames coming
2459 * over the trunk connection.
2460 *
2461 * The caller is responsible for do some basic filtering before calling
2462 * this function.
2463 * For IPv4 this means checking against the minimum DHCPv4 frame size.
2464 *
2465 * @param pNetwork The network.
2466 * @param pSG The SG list for the frame.
2467 * @param EtherType The Ethertype of the frame.
2468 */
2469static void intnetR0TrunkIfSnoopAddr(PINTNETNETWORK pNetwork, PCINTNETSG pSG, uint16_t EtherType)
2470{
2471 switch (EtherType)
2472 {
2473 case RTNET_ETHERTYPE_IPV4:
2474 {
2475 uint32_t cbIpHdr;
2476 uint8_t b;
2477
2478 Assert(pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN);
2479 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN)
2480 {
2481 /* check if the protocol is UDP */
2482 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2483 if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP)
2484 return;
2485
2486 /* get the TCP header length */
2487 cbIpHdr = pIpHdr->ip_hl * 4;
2488 }
2489 else
2490 {
2491 /* check if the protocol is UDP */
2492 if ( intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_p))
2493 != RTNETIPV4_PROT_UDP)
2494 return;
2495
2496 /* get the TCP header length */
2497 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + 0); /* (IPv4 first byte, a bitfield) */
2498 cbIpHdr = (b & 0x0f) * 4;
2499 }
2500 if (cbIpHdr < RTNETIPV4_MIN_LEN)
2501 return;
2502
2503 /* compare the ports. */
2504 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + cbIpHdr + RTNETUDP_MIN_LEN)
2505 {
2506 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR) + cbIpHdr);
2507 if ( ( RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPS
2508 && RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS)
2509 || ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPC
2510 && RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC))
2511 return;
2512 }
2513 else
2514 {
2515 /* get the lower byte of the UDP source port number. */
2516 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport) + 1);
2517 if ( b != RTNETIPV4_PORT_BOOTPS
2518 && b != RTNETIPV4_PORT_BOOTPC)
2519 return;
2520 uint8_t SrcPort = b;
2521 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport));
2522 if (b)
2523 return;
2524
2525 /* get the lower byte of the UDP destination port number. */
2526 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport) + 1);
2527 if ( b != RTNETIPV4_PORT_BOOTPS
2528 && b != RTNETIPV4_PORT_BOOTPC)
2529 return;
2530 if (b == SrcPort)
2531 return;
2532 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport));
2533 if (b)
2534 return;
2535 }
2536 intnetR0TrunkIfSnoopDhcp(pNetwork, pSG);
2537 break;
2538 }
2539
2540 case RTNET_ETHERTYPE_ARP:
2541 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
2542 break;
2543 }
2544}
2545#endif /* INTNET_WITH_DHCP_SNOOPING */
2546
2547/**
2548 * Deals with an IPv6 packet.
2549 *
2550 * This will fish out the source IP address and add it to the cache.
2551 * Then it will look for DHCPRELEASE requests (?) and anything else
2552 * that we might find useful later.
2553 *
2554 * @param pIf The interface that's sending the frame.
2555 * @param pIpHdr Pointer to the IPv4 header in the frame.
2556 * @param cbPacket The size of the packet, or more correctly the
2557 * size of the frame without the ethernet header.
2558 * @param fGso Set if this is a GSO frame, clear if regular.
2559 */
2560static void intnetR0IfSnoopIPv6SourceAddr(PINTNETIF pIf, PCRTNETIPV6 pIpHdr, uint32_t cbPacket, bool fGso)
2561{
2562 NOREF(fGso);
2563
2564 /*
2565 * Check the header size first to prevent access invalid data.
2566 */
2567 if (cbPacket < RTNETIPV6_MIN_LEN)
2568 return;
2569
2570 /*
2571 * If the source address is good (not multicast) and
2572 * not already in the address cache of the sender, add it.
2573 */
2574 RTNETADDRU Addr;
2575 Addr.IPv6 = pIpHdr->ip6_src;
2576
2577 if ( intnetR0IPv6AddrIsGood(Addr.IPv6) && (pIpHdr->ip6_hlim == 0xff)
2578 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv6], &Addr, sizeof(Addr.IPv6)) < 0)
2579 {
2580 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv6, &Addr, "if/ipv6");
2581 }
2582}
2583
2584
2585/**
2586 * Deals with an IPv4 packet.
2587 *
2588 * This will fish out the source IP address and add it to the cache.
2589 * Then it will look for DHCPRELEASE requests (?) and anything else
2590 * that we might find useful later.
2591 *
2592 * @param pIf The interface that's sending the frame.
2593 * @param pIpHdr Pointer to the IPv4 header in the frame.
2594 * @param cbPacket The size of the packet, or more correctly the
2595 * size of the frame without the ethernet header.
2596 * @param fGso Set if this is a GSO frame, clear if regular.
2597 */
2598static void intnetR0IfSnoopIPv4SourceAddr(PINTNETIF pIf, PCRTNETIPV4 pIpHdr, uint32_t cbPacket, bool fGso)
2599{
2600 /*
2601 * Check the header size first to prevent access invalid data.
2602 */
2603 if (cbPacket < RTNETIPV4_MIN_LEN)
2604 return;
2605 uint32_t cbHdr = (uint32_t)pIpHdr->ip_hl * 4;
2606 if ( cbHdr < RTNETIPV4_MIN_LEN
2607 || cbPacket < cbHdr)
2608 return;
2609
2610 /*
2611 * If the source address is good (not broadcast or my network) and
2612 * not already in the address cache of the sender, add it. Validate
2613 * the IP header before adding it.
2614 */
2615 bool fValidatedIpHdr = false;
2616 RTNETADDRU Addr;
2617 Addr.IPv4 = pIpHdr->ip_src;
2618 if ( intnetR0IPv4AddrIsGood(Addr.IPv4)
2619 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, sizeof(Addr.IPv4)) < 0)
2620 {
2621 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2622 {
2623 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header\n"));
2624 return;
2625 }
2626
2627 intnetR0IfAddrCacheAddIt(pIf, kIntNetAddrType_IPv4, &Addr, "if/ipv4");
2628 fValidatedIpHdr = true;
2629 }
2630
2631#ifdef INTNET_WITH_DHCP_SNOOPING
2632 /*
2633 * Check for potential DHCP packets.
2634 */
2635 if ( pIpHdr->ip_p == RTNETIPV4_PROT_UDP /* DHCP is UDP. */
2636 && cbPacket >= cbHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN /* Min DHCP packet len. */
2637 && !fGso) /* GSO is not applicable to DHCP traffic. */
2638 {
2639 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pIpHdr + cbHdr);
2640 if ( ( RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPS
2641 || RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPS)
2642 && ( RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPC
2643 || RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPC))
2644 {
2645 if ( fValidatedIpHdr
2646 || RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2647 intnetR0NetworkSnoopDhcp(pIf->pNetwork, pIpHdr, pUdpHdr, cbPacket - cbHdr);
2648 else
2649 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header (dhcp)\n"));
2650 }
2651 }
2652#endif /* INTNET_WITH_DHCP_SNOOPING */
2653}
2654
2655
2656/**
2657 * Snoop up source addresses from an ARP request or reply.
2658 *
2659 * @param pIf The interface that's sending the frame.
2660 * @param pHdr The ARP header.
2661 * @param cbPacket The size of the packet (might be larger than the ARP
2662 * request 'cause of min ethernet frame size).
2663 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2664 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2665 */
2666static void intnetR0IfSnoopArpAddr(PINTNETIF pIf, PCRTNETARPIPV4 pArpIPv4, uint32_t cbPacket, uint16_t *pfSgFlags)
2667{
2668 /*
2669 * Ignore packets which doesn't interest us or we perceive as malformed.
2670 */
2671 if (RT_UNLIKELY(cbPacket < sizeof(RTNETARPIPV4)))
2672 return;
2673 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2674 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2675 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2676 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2677 return;
2678 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2679 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2680 && ar_oper != RTNET_ARPOP_REPLY))
2681 {
2682 Log6(("ar_oper=%#x\n", ar_oper));
2683 return;
2684 }
2685
2686 /*
2687 * Tag the SG as ARP IPv4 for later editing, then check for addresses
2688 * which can be removed or added to the address cache of the sender.
2689 */
2690 *pfSgFlags |= INTNETSG_FLAGS_ARP_IPV4;
2691
2692 if ( ar_oper == RTNET_ARPOP_REPLY
2693 && !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_tha)
2694 && ( pArpIPv4->ar_tha.au16[0]
2695 || pArpIPv4->ar_tha.au16[1]
2696 || pArpIPv4->ar_tha.au16[2])
2697 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_tpa))
2698 intnetR0IfAddrCacheDelete(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
2699 (PCRTNETADDRU)&pArpIPv4->ar_tpa, sizeof(RTNETADDRIPV4), "if/arp");
2700
2701 if ( !memcmp(&pArpIPv4->ar_sha, &pIf->MacAddr, sizeof(RTMAC))
2702 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2703 {
2704 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv4, (PCRTNETADDRU)&pArpIPv4->ar_spa, "if/arp");
2705 }
2706}
2707
2708
2709
2710/**
2711 * Checks packets send by a normal interface for new network
2712 * layer addresses.
2713 *
2714 * @param pIf The interface that's sending the frame.
2715 * @param pbFrame The frame.
2716 * @param cbFrame The size of the frame.
2717 * @param fGso Set if this is a GSO frame, clear if regular.
2718 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2719 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2720 */
2721static void intnetR0IfSnoopAddr(PINTNETIF pIf, uint8_t const *pbFrame, uint32_t cbFrame, bool fGso, uint16_t *pfSgFlags)
2722{
2723 /*
2724 * Fish out the ethertype and look for stuff we can handle.
2725 */
2726 if (cbFrame <= sizeof(RTNETETHERHDR))
2727 return;
2728 cbFrame -= sizeof(RTNETETHERHDR);
2729
2730 uint16_t EtherType = RT_H2BE_U16(((PCRTNETETHERHDR)pbFrame)->EtherType);
2731 switch (EtherType)
2732 {
2733 case RTNET_ETHERTYPE_IPV4:
2734 intnetR0IfSnoopIPv4SourceAddr(pIf, (PCRTNETIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2735 break;
2736
2737 case RTNET_ETHERTYPE_IPV6:
2738 intnetR0IfSnoopIPv6SourceAddr(pIf, (PCRTNETIPV6)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2739 break;
2740
2741#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
2742 case RTNET_ETHERTYPE_IPX_1:
2743 case RTNET_ETHERTYPE_IPX_2:
2744 case RTNET_ETHERTYPE_IPX_3:
2745 intnetR0IfSnoopIpxSourceAddr(pIf, (PCINTNETIPX)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2746 break;
2747#endif
2748 case RTNET_ETHERTYPE_ARP:
2749 intnetR0IfSnoopArpAddr(pIf, (PCRTNETARPIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2750 break;
2751 }
2752}
2753
2754
2755/**
2756 * Writes a frame packet to the ring buffer.
2757 *
2758 * @returns VBox status code.
2759 * @param pBuf The buffer.
2760 * @param pRingBuf The ring buffer to read from.
2761 * @param pSG The gather list.
2762 * @param pNewDstMac Set the destination MAC address to the address if specified.
2763 */
2764static int intnetR0RingWriteFrame(PINTNETRINGBUF pRingBuf, PCINTNETSG pSG, PCRTMAC pNewDstMac)
2765{
2766 PINTNETHDR pHdr = NULL; /* shut up gcc*/
2767 void *pvDst = NULL; /* ditto */
2768 int rc;
2769 if (pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
2770 rc = IntNetRingAllocateFrame(pRingBuf, pSG->cbTotal, &pHdr, &pvDst);
2771 else
2772 rc = IntNetRingAllocateGsoFrame(pRingBuf, pSG->cbTotal, &pSG->GsoCtx, &pHdr, &pvDst);
2773 if (RT_SUCCESS(rc))
2774 {
2775 IntNetSgRead(pSG, pvDst);
2776 if (pNewDstMac)
2777 ((PRTNETETHERHDR)pvDst)->DstMac = *pNewDstMac;
2778
2779 IntNetRingCommitFrame(pRingBuf, pHdr);
2780 return VINF_SUCCESS;
2781 }
2782 return rc;
2783}
2784
2785
2786/**
2787 * Sends a frame to a specific interface.
2788 *
2789 * @param pIf The interface.
2790 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
2791 * @param pSG The gather buffer which data is being sent to the interface.
2792 * @param pNewDstMac Set the destination MAC address to the address if specified.
2793 */
2794static void intnetR0IfSend(PINTNETIF pIf, PINTNETIF pIfSender, PINTNETSG pSG, PCRTMAC pNewDstMac)
2795{
2796 /*
2797 * Grab the receive/producer lock and copy over the frame.
2798 */
2799 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2800 int rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2801 RTSpinlockRelease(pIf->hRecvInSpinlock);
2802 if (RT_SUCCESS(rc))
2803 {
2804 pIf->cYields = 0;
2805 RTSemEventSignal(pIf->hRecvEvent);
2806 return;
2807 }
2808
2809 Log(("intnetR0IfSend: overflow cb=%d hIf=%RX32\n", pSG->cbTotal, pIf->hIf));
2810
2811 /*
2812 * Scheduling hack, for unicore machines primarily.
2813 */
2814 if ( pIf->fActive
2815 && pIf->cYields < 4 /* just twice */
2816 && pIfSender /* but not if it's from the trunk */
2817 && RTThreadPreemptIsEnabled(NIL_RTTHREAD)
2818 )
2819 {
2820 unsigned cYields = 2;
2821 while (--cYields > 0)
2822 {
2823 RTSemEventSignal(pIf->hRecvEvent);
2824 RTThreadYield();
2825
2826 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2827 rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2828 RTSpinlockRelease(pIf->hRecvInSpinlock);
2829 if (RT_SUCCESS(rc))
2830 {
2831 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsOk);
2832 RTSemEventSignal(pIf->hRecvEvent);
2833 return;
2834 }
2835 pIf->cYields++;
2836 }
2837 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsNok);
2838 }
2839
2840 /* ok, the frame is lost. */
2841 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatLost);
2842 RTSemEventSignal(pIf->hRecvEvent);
2843}
2844
2845
2846/**
2847 * Fallback path that does the GSO segmenting before passing the frame on to the
2848 * trunk interface.
2849 *
2850 * The caller holds the trunk lock.
2851 *
2852 * @param pThis The trunk.
2853 * @param pIfSender The IF sending the frame.
2854 * @param pSG Pointer to the gather list.
2855 * @param fDst The destination flags.
2856 */
2857static int intnetR0TrunkIfSendGsoFallback(PINTNETTRUNKIF pThis, PINTNETIF pIfSender, PINTNETSG pSG, uint32_t fDst)
2858{
2859 /*
2860 * Since we're only using this for GSO frame coming from the internal
2861 * network interfaces and never the trunk, we can assume there is only
2862 * one segment. This simplifies the code quite a bit.
2863 */
2864 Assert(PDMNetGsoIsValid(&pSG->GsoCtx, sizeof(pSG->GsoCtx), pSG->cbTotal));
2865 AssertReturn(pSG->cSegsUsed == 1, VERR_INTERNAL_ERROR_4);
2866
2867 union
2868 {
2869 uint8_t abBuf[sizeof(INTNETSG) + sizeof(INTNETSEG)];
2870 INTNETSG SG;
2871 } u;
2872
2873 /** @todo We have to adjust MSS so it does not exceed the value configured for
2874 * the host's interface.
2875 */
2876
2877 /*
2878 * Carve out the frame segments with the header and frame in different
2879 * scatter / gather segments.
2880 */
2881 uint32_t const cSegs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
2882 for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++)
2883 {
2884 uint32_t cbSegPayload, cbSegHdrs;
2885 uint32_t offSegPayload = PDMNetGsoCarveSegment(&pSG->GsoCtx, (uint8_t *)pSG->aSegs[0].pv, pSG->cbTotal, iSeg, cSegs,
2886 pIfSender->abGsoHdrs, &cbSegHdrs, &cbSegPayload);
2887
2888 IntNetSgInitTempSegs(&u.SG, cbSegHdrs + cbSegPayload, 2, 2);
2889 u.SG.aSegs[0].Phys = NIL_RTHCPHYS;
2890 u.SG.aSegs[0].pv = pIfSender->abGsoHdrs;
2891 u.SG.aSegs[0].cb = cbSegHdrs;
2892 u.SG.aSegs[1].Phys = NIL_RTHCPHYS;
2893 u.SG.aSegs[1].pv = (uint8_t *)pSG->aSegs[0].pv + offSegPayload;
2894 u.SG.aSegs[1].cb = (uint32_t)cbSegPayload;
2895
2896 int rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, &u.SG, fDst);
2897 if (RT_FAILURE(rc))
2898 return rc;
2899 }
2900 return VINF_SUCCESS;
2901}
2902
2903
2904/**
2905 * Checks if any of the given trunk destinations can handle this kind of GSO SG.
2906 *
2907 * @returns true if it can, false if it cannot.
2908 * @param pThis The trunk.
2909 * @param pSG The scatter / gather buffer.
2910 * @param fDst The destination mask.
2911 */
2912DECLINLINE(bool) intnetR0TrunkIfCanHandleGsoFrame(PINTNETTRUNKIF pThis, PINTNETSG pSG, uint32_t fDst)
2913{
2914 uint8_t u8Type = pSG->GsoCtx.u8Type;
2915 AssertReturn(u8Type < 32, false); /* paranoia */
2916 uint32_t fMask = RT_BIT_32(u8Type);
2917
2918 if (fDst == INTNETTRUNKDIR_HOST)
2919 return !!(pThis->fHostGsoCapabilites & fMask);
2920 if (fDst == INTNETTRUNKDIR_WIRE)
2921 return !!(pThis->fWireGsoCapabilites & fMask);
2922 Assert(fDst == (INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST));
2923 return !!(pThis->fHostGsoCapabilites & pThis->fWireGsoCapabilites & fMask);
2924}
2925
2926
2927/**
2928 * Calculates the checksum of a full ipv6 frame.
2929 *
2930 * @returns 16-bit hecksum value.
2931 * @param pIpHdr The IPv6 header (network endian (big)).
2932 * @param bProtocol The protocol number. This can be the same as the
2933 * ip6_nxt field, but doesn't need to be.
2934 * @param cbPkt The packet size (host endian of course). This can
2935 * be the same as the ip6_plen field, but as with @a
2936 * bProtocol it won't be when extension headers are
2937 * present. For UDP this will be uh_ulen converted to
2938 * host endian.
2939 */
2940static uint16_t computeIPv6FullChecksum(PCRTNETIPV6 pIpHdr)
2941{
2942 uint16_t const *data;
2943 int len = RT_BE2H_U16(pIpHdr->ip6_plen);
2944 uint32_t sum = RTNetIPv6PseudoChecksum(pIpHdr);
2945
2946 /* add the payload */
2947 data = (uint16_t *) (pIpHdr + 1);
2948 while(len > 1)
2949 {
2950 sum += *(data);
2951 data++;
2952 len -= 2;
2953 }
2954
2955 if(len > 0)
2956 sum += *((uint8_t *) data);
2957
2958 while(sum >> 16)
2959 sum = (sum & 0xffff) + (sum >> 16);
2960
2961 return (uint16_t) ~sum;
2962}
2963
2964
2965/**
2966 * Rewrite VM MAC address with shared host MAC address inside IPv6
2967 * Neighbor Discovery datagrams.
2968 */
2969static void intnetR0TrunkSharedMacEditIPv6FromIntNet(PINTNETTRUNKIF pThis, PINTNETIF pIfSender,
2970 PRTNETETHERHDR pEthHdr, uint32_t cb)
2971{
2972 if (RT_UNLIKELY(cb < sizeof(*pEthHdr)))
2973 return;
2974
2975 /* have IPv6 header */
2976 PRTNETIPV6 pIPv6 = (PRTNETIPV6)(pEthHdr + 1);
2977 cb -= sizeof(*pEthHdr);
2978 if (RT_UNLIKELY(cb < sizeof(*pIPv6)))
2979 return;
2980
2981 if ( pIPv6->ip6_nxt != RTNETIPV6_PROT_ICMPV6
2982 || pIPv6->ip6_hlim != 0xff)
2983 return;
2984
2985 PRTNETICMPV6HDR pICMPv6 = (PRTNETICMPV6HDR)(pIPv6 + 1);
2986 cb -= sizeof(*pIPv6);
2987 if (RT_UNLIKELY(cb < sizeof(*pICMPv6)))
2988 return;
2989
2990 uint32_t hdrlen = 0;
2991 uint8_t llaopt = RTNETIPV6_ICMP_ND_SLLA_OPT;
2992
2993 uint8_t type = pICMPv6->icmp6_type;
2994 switch (type)
2995 {
2996 case RTNETIPV6_ICMP_TYPE_RS:
2997 hdrlen = 8;
2998 break;
2999
3000 case RTNETIPV6_ICMP_TYPE_RA:
3001 hdrlen = 16;
3002 break;
3003
3004 case RTNETIPV6_ICMP_TYPE_NS:
3005 hdrlen = 24;
3006 break;
3007
3008 case RTNETIPV6_ICMP_TYPE_NA:
3009 hdrlen = 24;
3010 llaopt = RTNETIPV6_ICMP_ND_TLLA_OPT;
3011 break;
3012
3013 default:
3014 return;
3015 }
3016
3017 AssertReturnVoid(hdrlen > 0);
3018 if (RT_UNLIKELY(cb < hdrlen))
3019 return;
3020
3021 if (RT_UNLIKELY(pICMPv6->icmp6_code != 0))
3022 return;
3023
3024 PRTNETNDP_LLA_OPT pLLAOpt = NULL;
3025 char *pOpt = (char *)pICMPv6 + hdrlen;
3026 cb -= hdrlen;
3027
3028 while (cb >= 8)
3029 {
3030 uint8_t opt = ((uint8_t *)pOpt)[0];
3031 uint32_t optlen = (uint32_t)((uint8_t *)pOpt)[1] * 8;
3032 if (RT_UNLIKELY(cb < optlen))
3033 return;
3034
3035 if (opt == llaopt)
3036 {
3037 if (RT_UNLIKELY(optlen != 8))
3038 return;
3039 pLLAOpt = (PRTNETNDP_LLA_OPT)pOpt;
3040 break;
3041 }
3042
3043 pOpt += optlen;
3044 cb -= optlen;
3045 }
3046
3047 if (pLLAOpt == NULL)
3048 return;
3049
3050 if (memcmp(&pLLAOpt->lla, &pIfSender->MacAddr, sizeof(RTMAC)) != 0)
3051 return;
3052
3053 /* overwrite VM's MAC with host's MAC */
3054 pLLAOpt->lla = pThis->MacAddr;
3055
3056 /* recompute the checksum */
3057 pICMPv6->icmp6_cksum = 0;
3058 pICMPv6->icmp6_cksum = computeIPv6FullChecksum(pIPv6);
3059}
3060
3061
3062/**
3063 * Sends a frame down the trunk.
3064 *
3065 * @param pThis The trunk.
3066 * @param pNetwork The network the frame is being sent to.
3067 * @param pIfSender The IF sending the frame. Used for MAC address
3068 * checks in shared MAC mode.
3069 * @param fDst The destination flags.
3070 * @param pSG Pointer to the gather list.
3071 */
3072static void intnetR0TrunkIfSend(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork, PINTNETIF pIfSender,
3073 uint32_t fDst, PINTNETSG pSG)
3074{
3075 /*
3076 * Quick sanity check.
3077 */
3078 AssertPtr(pThis);
3079 AssertPtr(pNetwork);
3080 AssertPtr(pIfSender);
3081 AssertPtr(pSG);
3082 Assert(fDst);
3083 AssertReturnVoid(pThis->pIfPort);
3084
3085 /*
3086 * Edit the frame if we're sharing the MAC address with the host on the wire.
3087 *
3088 * If the frame is headed for both the host and the wire, we'll have to send
3089 * it to the host before making any modifications, and force the OS specific
3090 * backend to copy it. We do this by marking it as TEMP (which is always the
3091 * case right now).
3092 */
3093 if ( (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3094 && (fDst & INTNETTRUNKDIR_WIRE))
3095 {
3096 /*
3097 * Dispatch it to the host before making changes.
3098 */
3099 if (fDst & INTNETTRUNKDIR_HOST)
3100 {
3101 Assert(pSG->fFlags & INTNETSG_FLAGS_TEMP); /* make sure copy is forced */
3102 intnetR0TrunkIfSend(pThis, pNetwork, pIfSender, INTNETTRUNKDIR_HOST, pSG);
3103 fDst &= ~INTNETTRUNKDIR_HOST;
3104 }
3105
3106 /*
3107 * Edit the source address so that it it's the same as the host.
3108 */
3109 /* ASSUME frame from IntNetR0IfSend! */
3110 AssertReturnVoid(pSG->cSegsUsed == 1);
3111 AssertReturnVoid(pSG->cbTotal >= sizeof(RTNETETHERHDR));
3112 AssertReturnVoid(pIfSender);
3113 PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pSG->aSegs[0].pv;
3114
3115 pEthHdr->SrcMac = pThis->MacAddr;
3116
3117 /*
3118 * Deal with tags from the snooping phase.
3119 */
3120 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3121 {
3122 /*
3123 * APR IPv4: replace hardware (MAC) addresses because these end up
3124 * in ARP caches. So, if we don't the other machines will
3125 * send the packets to the MAC address of the guest
3126 * instead of the one of the host, which won't work on
3127 * wireless of course...
3128 */
3129 PRTNETARPIPV4 pArp = (PRTNETARPIPV4)(pEthHdr + 1);
3130 if (!memcmp(&pArp->ar_sha, &pIfSender->MacAddr, sizeof(RTMAC)))
3131 {
3132 Log6(("tw: ar_sha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_sha, &pThis->MacAddr));
3133 pArp->ar_sha = pThis->MacAddr;
3134 }
3135 if (!memcmp(&pArp->ar_tha, &pIfSender->MacAddr, sizeof(RTMAC))) /* just in case... */
3136 {
3137 Log6(("tw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_tha, &pThis->MacAddr));
3138 pArp->ar_tha = pThis->MacAddr;
3139 }
3140 }
3141 else if (pEthHdr->EtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6))
3142 {
3143 intnetR0TrunkSharedMacEditIPv6FromIntNet(pThis, pIfSender, pEthHdr, pSG->cbTotal);
3144 }
3145 }
3146
3147 /*
3148 * Send the frame, handling the GSO fallback.
3149 *
3150 * Note! The trunk implementation will re-check that the trunk is active
3151 * before sending, so we don't have to duplicate that effort here.
3152 */
3153 STAM_REL_PROFILE_START(&pIfSender->pIntBuf->StatSend2, a);
3154 int rc;
3155 if ( pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID
3156 || intnetR0TrunkIfCanHandleGsoFrame(pThis, pSG, fDst) )
3157 rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, pSG, fDst);
3158 else
3159 rc = intnetR0TrunkIfSendGsoFallback(pThis, pIfSender, pSG, fDst);
3160 STAM_REL_PROFILE_STOP(&pIfSender->pIntBuf->StatSend2, a);
3161
3162 /** @todo failure statistics? */
3163 Log2(("intnetR0TrunkIfSend: %Rrc fDst=%d\n", rc, fDst)); NOREF(rc);
3164}
3165
3166
3167/**
3168 * Detect broadcasts packaged as unicast and convert them back to broadcast.
3169 *
3170 * WiFi routers try to use ethernet unicast instead of broadcast or
3171 * multicast when possible. Look inside the packet and fix up
3172 * ethernet destination to be proper broadcast or multicast if
3173 * necessary.
3174 *
3175 * @returns true broadcast (pEthHdr & pSG are modified), false if not.
3176 * @param pNetwork The network the frame is being sent to.
3177 * @param pSG Pointer to the gather list for the frame. The
3178 * ethernet destination address is modified when
3179 * returning true.
3180 * @param pEthHdr Pointer to the ethernet header. The ethernet
3181 * destination address is modified when returning true.
3182 */
3183static bool intnetR0NetworkSharedMacDetectAndFixBroadcast(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3184{
3185 NOREF(pNetwork);
3186
3187 switch (pEthHdr->EtherType)
3188 {
3189 case RT_H2N_U16_C(RTNET_ETHERTYPE_ARP):
3190 {
3191 uint16_t ar_oper;
3192 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETARPHDR, ar_oper),
3193 sizeof(ar_oper), &ar_oper))
3194 return false;
3195
3196 if (ar_oper == RT_H2N_U16_C(RTNET_ARPOP_REQUEST))
3197 {
3198 /* change to broadcast */
3199 pEthHdr->DstMac.au16[0] = 0xffff;
3200 pEthHdr->DstMac.au16[1] = 0xffff;
3201 pEthHdr->DstMac.au16[2] = 0xffff;
3202 }
3203 else
3204 return false;
3205 break;
3206 }
3207
3208 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4):
3209 {
3210 RTNETADDRIPV4 ip_dst;
3211 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst),
3212 sizeof(ip_dst), &ip_dst))
3213 return false;
3214
3215 if (ip_dst.u == 0xffffffff) /* 255.255.255.255? */
3216 {
3217 /* change to broadcast */
3218 pEthHdr->DstMac.au16[0] = 0xffff;
3219 pEthHdr->DstMac.au16[1] = 0xffff;
3220 pEthHdr->DstMac.au16[2] = 0xffff;
3221 }
3222 else if ((ip_dst.au8[0] & 0xf0) == 0xe0) /* IPv4 multicast? */
3223 {
3224 /* change to 01:00:5e:xx:xx:xx multicast ... */
3225 pEthHdr->DstMac.au8[0] = 0x01;
3226 pEthHdr->DstMac.au8[1] = 0x00;
3227 pEthHdr->DstMac.au8[2] = 0x5e;
3228 /* ... with lower 23 bits from the multicast IP address */
3229 pEthHdr->DstMac.au8[3] = ip_dst.au8[1] & 0x7f;
3230 pEthHdr->DstMac.au8[4] = ip_dst.au8[2];
3231 pEthHdr->DstMac.au8[5] = ip_dst.au8[3];
3232 }
3233 else
3234 return false;
3235 break;
3236 }
3237
3238 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6):
3239 {
3240 RTNETADDRIPV6 ip6_dst;
3241 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst),
3242 sizeof(ip6_dst), &ip6_dst))
3243 return false;
3244
3245 if (ip6_dst.au8[0] == 0xff) /* IPv6 multicast? */
3246 {
3247 pEthHdr->DstMac.au16[0] = 0x3333;
3248 pEthHdr->DstMac.au16[1] = ip6_dst.au16[6];
3249 pEthHdr->DstMac.au16[2] = ip6_dst.au16[7];
3250 }
3251 else
3252 return false;
3253 break;
3254 }
3255
3256 default:
3257 return false;
3258 }
3259
3260
3261 /*
3262 * Update ethernet destination in the segment.
3263 */
3264 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(pEthHdr->DstMac), &pEthHdr->DstMac);
3265
3266 return true;
3267}
3268
3269
3270/**
3271 * Snoops a multicast ICMPv6 ND DAD from the wire via the trunk connection.
3272 *
3273 * @param pNetwork The network the frame is being sent to.
3274 * @param pSG Pointer to the gather list for the frame.
3275 * @param pEthHdr Pointer to the ethernet header.
3276 */
3277static void intnetR0NetworkSnoopNAFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3278{
3279 NOREF(pEthHdr);
3280
3281 /*
3282 * Check the minimum size and get a linear copy of the thing to work on,
3283 * using the temporary buffer if necessary.
3284 */
3285 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3286 sizeof(RTNETNDP)))
3287 return;
3288 PRTNETIPV6 pIPv6 = (PRTNETIPV6)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3289 if ( pSG->cSegsUsed != 1
3290 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3291 sizeof(RTNETNDP))
3292 {
3293 Log6(("fw: Copying IPv6 pkt %u\n", sizeof(RTNETIPV6)));
3294 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETIPV6)
3295 + sizeof(RTNETNDP), pNetwork->pbTmp))
3296 return;
3297 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3298 pIPv6 = (PRTNETIPV6)pNetwork->pbTmp;
3299 }
3300
3301 PCRTNETNDP pNd = (PCRTNETNDP) (pIPv6 + 1);
3302
3303 /*
3304 * a multicast NS with :: as source address means a DAD packet.
3305 * if it comes from the wire and we have the DAD'd address in our cache,
3306 * flush the entry as the address is being acquired by someone else on
3307 * the network.
3308 */
3309 if ( pIPv6->ip6_hlim == 0xff
3310 && pIPv6->ip6_nxt == RTNETIPV6_PROT_ICMPV6
3311 && pNd->Hdr.icmp6_type == RTNETIPV6_ICMP_TYPE_NS
3312 && pNd->Hdr.icmp6_code == 0
3313 && pIPv6->ip6_src.QWords.qw0 == 0
3314 && pIPv6->ip6_src.QWords.qw1 == 0)
3315 {
3316
3317 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU) &pNd->target_address,
3318 kIntNetAddrType_IPv6, sizeof(RTNETADDRIPV6), "tif/ip6");
3319 }
3320}
3321/**
3322 * Edits an ARP packet arriving from the wire via the trunk connection.
3323 *
3324 * @param pNetwork The network the frame is being sent to.
3325 * @param pSG Pointer to the gather list for the frame.
3326 * The flags and data content may be updated.
3327 * @param pEthHdr Pointer to the ethernet header. This may also be
3328 * updated if it's a unicast...
3329 */
3330static void intnetR0NetworkEditArpFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3331{
3332 /*
3333 * Check the minimum size and get a linear copy of the thing to work on,
3334 * using the temporary buffer if necessary.
3335 */
3336 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
3337 return;
3338 PRTNETARPIPV4 pArpIPv4 = (PRTNETARPIPV4)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3339 if ( pSG->cSegsUsed != 1
3340 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4))
3341 {
3342 Log6(("fw: Copying ARP pkt %u\n", sizeof(RTNETARPIPV4)));
3343 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETARPIPV4), pNetwork->pbTmp))
3344 return;
3345 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3346 pArpIPv4 = (PRTNETARPIPV4)pNetwork->pbTmp;
3347 }
3348
3349 /*
3350 * Ignore packets which doesn't interest us or we perceive as malformed.
3351 */
3352 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
3353 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
3354 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
3355 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
3356 return;
3357 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
3358 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
3359 && ar_oper != RTNET_ARPOP_REPLY))
3360 {
3361 Log6(("ar_oper=%#x\n", ar_oper));
3362 return;
3363 }
3364
3365 /* Tag it as ARP IPv4. */
3366 pSG->fFlags |= INTNETSG_FLAGS_ARP_IPV4;
3367
3368 /*
3369 * The thing we're interested in here is a reply to a query made by a guest
3370 * since we modified the MAC in the initial request the guest made.
3371 */
3372 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3373 RTMAC MacAddrTrunk;
3374 if (pNetwork->MacTab.pTrunk)
3375 MacAddrTrunk = pNetwork->MacTab.pTrunk->MacAddr;
3376 else
3377 memset(&MacAddrTrunk, 0, sizeof(MacAddrTrunk));
3378 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3379 if ( ar_oper == RTNET_ARPOP_REPLY
3380 && !memcmp(&pArpIPv4->ar_tha, &MacAddrTrunk, sizeof(RTMAC)))
3381 {
3382 PINTNETIF pIf = intnetR0NetworkAddrCacheLookupIf(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_tpa,
3383 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_tpa));
3384 if (pIf)
3385 {
3386 Log6(("fw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArpIPv4->ar_tha, &pIf->MacAddr));
3387 pArpIPv4->ar_tha = pIf->MacAddr;
3388 if (!memcmp(&pEthHdr->DstMac, &MacAddrTrunk, sizeof(RTMAC)))
3389 {
3390 Log6(("fw: DstMac %.6Rhxs -> %.6Rhxs\n", &pEthHdr->DstMac, &pIf->MacAddr));
3391 pEthHdr->DstMac = pIf->MacAddr;
3392 if ((void *)pEthHdr != pSG->aSegs[0].pv)
3393 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(RTMAC), &pIf->MacAddr);
3394 }
3395 intnetR0BusyDecIf(pIf);
3396
3397 /* Write back the packet if we've been making changes to a buffered copy. */
3398 if (pSG->fFlags & INTNETSG_FLAGS_PKT_CP_IN_TMP)
3399 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR), sizeof(PRTNETARPIPV4), pArpIPv4);
3400 }
3401 }
3402}
3403
3404
3405/**
3406 * Detects and edits an DHCP packet arriving from the internal net.
3407 *
3408 * @param pNetwork The network the frame is being sent to.
3409 * @param pSG Pointer to the gather list for the frame.
3410 * The flags and data content may be updated.
3411 * @param pEthHdr Pointer to the ethernet header. This may also be
3412 * updated if it's a unicast...
3413 */
3414static void intnetR0NetworkEditDhcpFromIntNet(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3415{
3416 NOREF(pEthHdr);
3417
3418 /*
3419 * Check the minimum size and get a linear copy of the thing to work on,
3420 * using the temporary buffer if necessary.
3421 */
3422 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN))
3423 return;
3424 /*
3425 * Get a pointer to a linear copy of the full packet, using the
3426 * temporary buffer if necessary.
3427 */
3428 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
3429 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
3430 if (pSG->cSegsUsed > 1)
3431 {
3432 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
3433 Log6(("intnetR0NetworkEditDhcpFromIntNet: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
3434 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
3435 return;
3436 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3437 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
3438 }
3439
3440 /*
3441 * Validate the IP header and find the UDP packet.
3442 */
3443 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fCheckSum*/))
3444 {
3445 Log6(("intnetR0NetworkEditDhcpFromIntNet: bad ip header\n"));
3446 return;
3447 }
3448 size_t cbIpHdr = pIpHdr->ip_hl * 4;
3449 if ( pIpHdr->ip_p != RTNETIPV4_PROT_UDP /* DHCP is UDP. */
3450 || cbPacket < cbIpHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN) /* Min DHCP packet len */
3451 return;
3452
3453 size_t cbUdpPkt = cbPacket - cbIpHdr;
3454 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr);
3455 /* We are only interested in DHCP packets coming from client to server. */
3456 if ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS
3457 || RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC)
3458 return;
3459
3460 /*
3461 * Check if the DHCP message is valid and get the type.
3462 */
3463 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
3464 {
3465 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad UDP packet\n"));
3466 return;
3467 }
3468 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
3469 uint8_t bMsgType;
3470 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &bMsgType))
3471 {
3472 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad DHCP packet\n"));
3473 return;
3474 }
3475
3476 switch (bMsgType)
3477 {
3478 case RTNET_DHCP_MT_DISCOVER:
3479 case RTNET_DHCP_MT_REQUEST:
3480 /*
3481 * Must set the broadcast flag or we won't catch the respons.
3482 */
3483 if (!(pDhcp->bp_flags & RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST)))
3484 {
3485 Log6(("intnetR0NetworkEditDhcpFromIntNet: Setting broadcast flag in DHCP %#x, previously %x\n",
3486 bMsgType, pDhcp->bp_flags));
3487
3488 /* Patch flags */
3489 uint16_t uFlags = pDhcp->bp_flags | RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3490 intnetR0SgWritePart(pSG, (uintptr_t)&pDhcp->bp_flags - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(uFlags), &uFlags);
3491
3492 /* Patch UDP checksum */
3493 uint32_t uChecksum = (uint32_t)~pUdpHdr->uh_sum + RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3494 while (uChecksum >> 16)
3495 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3496 uChecksum = ~uChecksum;
3497 intnetR0SgWritePart(pSG, (uintptr_t)&pUdpHdr->uh_sum - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(pUdpHdr->uh_sum), &uChecksum);
3498 }
3499
3500#ifdef RT_OS_DARWIN
3501 /*
3502 * Work around little endian checksum issue in mac os x 10.7.0 GM.
3503 */
3504 if ( pIpHdr->ip_tos
3505 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_WORKAROUND_1))
3506 {
3507 /* Patch it. */
3508 uint8_t uTos = pIpHdr->ip_tos;
3509 uint8_t uZero = 0;
3510 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + 1, sizeof(uZero), &uZero);
3511
3512 /* Patch the IP header checksum. */
3513 uint32_t uChecksum = (uint32_t)~pIpHdr->ip_sum - (uTos << 8);
3514 while (uChecksum >> 16)
3515 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3516 uChecksum = ~uChecksum;
3517
3518 Log(("intnetR0NetworkEditDhcpFromIntNet: cleared ip_tos (was %#04x); ip_sum=%#06x -> %#06x\n",
3519 uTos, RT_BE2H_U16(pIpHdr->ip_sum), RT_BE2H_U16(uChecksum) ));
3520 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_sum),
3521 sizeof(pIpHdr->ip_sum), &uChecksum);
3522 }
3523#endif
3524 break;
3525 }
3526}
3527
3528
3529/**
3530 * Checks if the callers context is okay for sending to the specified
3531 * destinations.
3532 *
3533 * @returns true if it's okay, false if it isn't.
3534 * @param pNetwork The network.
3535 * @param pIfSender The interface sending or NULL if it's the trunk.
3536 * @param pDstTab The destination table.
3537 */
3538DECLINLINE(bool) intnetR0NetworkIsContextOk(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCINTNETDSTTAB pDstTab)
3539{
3540 NOREF(pNetwork);
3541
3542 /* Sending to the trunk is the problematic path. If the trunk is the
3543 sender we won't be sending to it, so no problem..
3544 Note! fTrunkDst may be set event if if the trunk is the sender. */
3545 if (!pIfSender)
3546 return true;
3547
3548 uint32_t const fTrunkDst = pDstTab->fTrunkDst;
3549 if (!fTrunkDst)
3550 return true;
3551
3552 /* ASSUMES: that the trunk won't change its report while we're checking. */
3553 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3554 if (pTrunk && (fTrunkDst & pTrunk->fNoPreemptDsts) == fTrunkDst)
3555 return true;
3556
3557 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3558 non-preemptive systems as well.) */
3559 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3560 return true;
3561 return false;
3562}
3563
3564
3565/**
3566 * Checks if the callers context is okay for doing a broadcast given the
3567 * specified source.
3568 *
3569 * @returns true if it's okay, false if it isn't.
3570 * @param pNetwork The network.
3571 * @param fSrc The source of the packet. (0 (intnet),
3572 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3573 */
3574DECLINLINE(bool) intnetR0NetworkIsContextOkForBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc)
3575{
3576 /* Sending to the trunk is the problematic path. If the trunk is the
3577 sender we won't be sending to it, so no problem. */
3578 if (fSrc)
3579 return true;
3580
3581 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3582 non-preemptive systems as well.) */
3583 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3584 return true;
3585
3586 /* PARANOIA: Grab the spinlock to make sure the trunk structure cannot be
3587 freed while we're touching it. */
3588 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3589 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
3590
3591 bool fRc = !pTrunk
3592 || pTrunk->fNoPreemptDsts == (INTNETTRUNKDIR_HOST | INTNETTRUNKDIR_WIRE)
3593 || ( (!pNetwork->MacTab.fHostActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_HOST) )
3594 && (!pNetwork->MacTab.fWireActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_WIRE) ) );
3595
3596 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3597
3598 return fRc;
3599}
3600
3601
3602/**
3603 * Check context, edit, snoop and switch a broadcast frame when sharing MAC
3604 * address on the wire.
3605 *
3606 * The caller must hold at least one interface on the network busy to prevent it
3607 * from destructing beath us.
3608 *
3609 * @param pNetwork The network the frame is being sent to.
3610 * @param fSrc The source of the packet. (0 (intnet),
3611 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3612 * @param pIfSender The sender interface, NULL if trunk. Used to
3613 * prevent sending an echo to the sender.
3614 * @param pSG Pointer to the gather list.
3615 * @param pEthHdr Pointer to the ethernet header.
3616 * @param pDstTab The destination output table.
3617 */
3618static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchBroadcast(PINTNETNETWORK pNetwork,
3619 uint32_t fSrc, PINTNETIF pIfSender,
3620 PINTNETSG pSG, PRTNETETHERHDR pEthHdr,
3621 PINTNETDSTTAB pDstTab)
3622{
3623 /*
3624 * Before doing any work here, we need to figure out if we can handle it
3625 * in the current context. The restrictions are solely on the trunk.
3626 *
3627 * Note! Since at least one interface is busy, there won't be any changes
3628 * to the parameters here (unless the trunk changes its capability
3629 * report, which it shouldn't).
3630 */
3631 if (!intnetR0NetworkIsContextOkForBroadcast(pNetwork, fSrc))
3632 return INTNETSWDECISION_BAD_CONTEXT;
3633
3634 /*
3635 * Check for ICMPv6 Neighbor Advertisements coming from the trunk.
3636 * If we see an advertisement for an IP in our cache, we can safely remove
3637 * it as the IP has probably moved.
3638 */
3639 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3640 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV6
3641 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3642 intnetR0NetworkSnoopNAFromWire(pNetwork, pSG, pEthHdr);
3643
3644
3645 /*
3646 * Check for ARP packets from the wire since we'll have to make
3647 * modification to them if we're sharing the MAC address with the host.
3648 */
3649 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3650 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_ARP
3651 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3652 intnetR0NetworkEditArpFromWire(pNetwork, pSG, pEthHdr);
3653
3654 /*
3655 * Check for DHCP packets from the internal net since we'll have to set
3656 * broadcast flag in DHCP requests if we're sharing the MAC address with
3657 * the host. GSO is not applicable to DHCP traffic.
3658 */
3659 if ( !fSrc
3660 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV4
3661 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3662 intnetR0NetworkEditDhcpFromIntNet(pNetwork, pSG, pEthHdr);
3663
3664 /*
3665 * Snoop address info from packet originating from the trunk connection.
3666 */
3667 if (fSrc)
3668 {
3669#ifdef INTNET_WITH_DHCP_SNOOPING
3670 uint16_t EtherType = RT_BE2H_U16(pEthHdr->EtherType);
3671 if ( ( EtherType == RTNET_ETHERTYPE_IPV4 /* for DHCP */
3672 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3673 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID )
3674 || (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4) )
3675 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, EtherType);
3676#else
3677 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3678 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
3679#endif
3680 }
3681
3682 /*
3683 * Create the broadcast destination table.
3684 */
3685 return intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3686}
3687
3688
3689/**
3690 * Check context, snoop and switch a unicast frame using the network layer
3691 * address of the link layer one (when sharing MAC address on the wire).
3692 *
3693 * This function is only used for frames coming from the wire (trunk).
3694 *
3695 * @returns true if it's addressed to someone on the network, otherwise false.
3696 * @param pNetwork The network the frame is being sent to.
3697 * @param pSG Pointer to the gather list.
3698 * @param pEthHdr Pointer to the ethernet header.
3699 * @param pDstTab The destination output table.
3700 */
3701static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchUnicast(PINTNETNETWORK pNetwork, PINTNETSG pSG,
3702 PRTNETETHERHDR pEthHdr, PINTNETDSTTAB pDstTab)
3703{
3704 /*
3705 * Extract the network address from the packet.
3706 */
3707 RTNETADDRU Addr;
3708 INTNETADDRTYPE enmAddrType;
3709 uint8_t cbAddr;
3710 switch (RT_BE2H_U16(pEthHdr->EtherType))
3711 {
3712 case RTNET_ETHERTYPE_IPV4:
3713 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst), sizeof(Addr.IPv4), &Addr)))
3714 {
3715 Log(("intnetshareduni: failed to read ip_dst! cbTotal=%#x\n", pSG->cbTotal));
3716 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3717 }
3718 enmAddrType = kIntNetAddrType_IPv4;
3719 cbAddr = sizeof(Addr.IPv4);
3720 Log6(("intnetshareduni: IPv4 %d.%d.%d.%d\n", Addr.au8[0], Addr.au8[1], Addr.au8[2], Addr.au8[3]));
3721 break;
3722
3723 case RTNET_ETHERTYPE_IPV6:
3724 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst), sizeof(Addr.IPv6), &Addr)))
3725 {
3726 Log(("intnetshareduni: failed to read ip6_dst! cbTotal=%#x\n", pSG->cbTotal));
3727 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3728 }
3729 enmAddrType = kIntNetAddrType_IPv6;
3730 cbAddr = sizeof(Addr.IPv6);
3731 break;
3732#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
3733 case RTNET_ETHERTYPE_IPX_1:
3734 case RTNET_ETHERTYPE_IPX_2:
3735 case RTNET_ETHERTYPE_IPX_3:
3736 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPX, ipx_dstnet), sizeof(Addr.IPX), &Addr)))
3737 {
3738 Log(("intnetshareduni: failed to read ipx_dstnet! cbTotal=%#x\n", pSG->cbTotal));
3739 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3740 }
3741 enmAddrType = kIntNetAddrType_IPX;
3742 cbAddr = sizeof(Addr.IPX);
3743 break;
3744#endif
3745
3746 /*
3747 * Treat ARP as broadcast (it shouldn't end up here normally,
3748 * so it goes last in the switch).
3749 */
3750 case RTNET_ETHERTYPE_ARP:
3751 Log6(("intnetshareduni: ARP\n"));
3752 /** @todo revisit this broadcasting of unicast ARP frames! */
3753 return intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, INTNETTRUNKDIR_WIRE, NULL, pSG, pEthHdr, pDstTab);
3754
3755 /*
3756 * Unknown packets are sent to the trunk and any promiscuous interfaces.
3757 */
3758 default:
3759 {
3760 Log6(("intnetshareduni: unknown ethertype=%#x\n", RT_BE2H_U16(pEthHdr->EtherType)));
3761 return intnetR0NetworkSwitchTrunkAndPromisc(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3762 }
3763 }
3764
3765 /*
3766 * Do level-3 switching.
3767 */
3768 INTNETSWDECISION enmSwDecision = intnetR0NetworkSwitchLevel3(pNetwork, &pEthHdr->DstMac,
3769 enmAddrType, &Addr, cbAddr,
3770 INTNETTRUNKDIR_WIRE, pDstTab);
3771
3772#ifdef INTNET_WITH_DHCP_SNOOPING
3773 /*
3774 * Perform DHCP snooping. GSO is not applicable to DHCP traffic
3775 */
3776 if ( enmAddrType == kIntNetAddrType_IPv4
3777 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3778 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3779 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, RT_BE2H_U16(pEthHdr->EtherType));
3780#endif /* INTNET_WITH_DHCP_SNOOPING */
3781
3782 return enmSwDecision;
3783}
3784
3785
3786/**
3787 * Release all the interfaces in the destination table when we realize that
3788 * we're in a context where we cannot get the job done.
3789 *
3790 * @param pNetwork The network.
3791 * @param pDstTab The destination table.
3792 */
3793static void intnetR0NetworkReleaseDstTab(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab)
3794{
3795 /* The trunk interface. */
3796 if (pDstTab->fTrunkDst)
3797 {
3798 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3799 if (pTrunk)
3800 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3801 pDstTab->pTrunk = NULL;
3802 pDstTab->fTrunkDst = 0;
3803 }
3804
3805 /* Regular interfaces. */
3806 uint32_t iIf = pDstTab->cIfs;
3807 while (iIf-- > 0)
3808 {
3809 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3810 intnetR0BusyDecIf(pIf);
3811 pDstTab->aIfs[iIf].pIf = NULL;
3812 }
3813 pDstTab->cIfs = 0;
3814}
3815
3816
3817/**
3818 * Deliver the frame to the interfaces specified in the destination table.
3819 *
3820 * @param pNetwork The network.
3821 * @param pDstTab The destination table.
3822 * @param pSG The frame to send.
3823 * @param pIfSender The sender interface. NULL if it originated via
3824 * the trunk.
3825 */
3826static void intnetR0NetworkDeliver(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab, PINTNETSG pSG, PINTNETIF pIfSender)
3827{
3828 /*
3829 * Do the interfaces first before sending it to the wire and risk having to
3830 * modify it.
3831 */
3832 uint32_t iIf = pDstTab->cIfs;
3833 while (iIf-- > 0)
3834 {
3835 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3836 intnetR0IfSend(pIf, pIfSender, pSG,
3837 pDstTab->aIfs[iIf].fReplaceDstMac ? &pIf->MacAddr: NULL);
3838 intnetR0BusyDecIf(pIf);
3839 pDstTab->aIfs[iIf].pIf = NULL;
3840 }
3841 pDstTab->cIfs = 0;
3842
3843 /*
3844 * Send to the trunk.
3845 *
3846 * Note! The switching functions will include the trunk even when the frame
3847 * source is the trunk. This is because we need it to figure out
3848 * whether the other half of the trunk should see the frame or not
3849 * and let the caller know.
3850 *
3851 * So, we'll ignore trunk sends here if the frame origin is
3852 * INTNETTRUNKSWPORT::pfnRecv.
3853 */
3854 if (pDstTab->fTrunkDst)
3855 {
3856 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3857 if (pTrunk)
3858 {
3859 if (pIfSender)
3860 intnetR0TrunkIfSend(pTrunk, pNetwork, pIfSender, pDstTab->fTrunkDst, pSG);
3861 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3862 }
3863 pDstTab->pTrunk = NULL;
3864 pDstTab->fTrunkDst = 0;
3865 }
3866}
3867
3868
3869/**
3870 * Sends a frame.
3871 *
3872 * This function will distribute the frame to the interfaces it is addressed to.
3873 * It will also update the MAC address of the sender.
3874 *
3875 * The caller must own the network mutex.
3876 *
3877 * @returns The switching decision.
3878 * @param pNetwork The network the frame is being sent to.
3879 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
3880 * @param fSrc The source flags. This 0 if it's not from the trunk.
3881 * @param pSG Pointer to the gather list.
3882 * @param pDstTab The destination table to use.
3883 */
3884static INTNETSWDECISION intnetR0NetworkSend(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, uint32_t fSrc,
3885 PINTNETSG pSG, PINTNETDSTTAB pDstTab)
3886{
3887 /*
3888 * Assert reality.
3889 */
3890 AssertPtr(pNetwork);
3891 AssertPtrNull(pIfSender);
3892 Assert(pIfSender ? fSrc == 0 : fSrc != 0);
3893 Assert(!pIfSender || pNetwork == pIfSender->pNetwork);
3894 AssertPtr(pSG);
3895 Assert(pSG->cSegsUsed >= 1);
3896 Assert(pSG->cSegsUsed <= pSG->cSegsAlloc);
3897 if (pSG->cbTotal < sizeof(RTNETETHERHDR))
3898 return INTNETSWDECISION_INVALID;
3899
3900 /*
3901 * Get the ethernet header (might theoretically involve multiple segments).
3902 */
3903 RTNETETHERHDR EthHdr;
3904 if (pSG->aSegs[0].cb >= sizeof(EthHdr))
3905 EthHdr = *(PCRTNETETHERHDR)pSG->aSegs[0].pv;
3906 else if (!intnetR0SgReadPart(pSG, 0, sizeof(EthHdr), &EthHdr))
3907 return INTNETSWDECISION_INVALID;
3908 if ( (EthHdr.DstMac.au8[0] == 0x08 && EthHdr.DstMac.au8[1] == 0x00 && EthHdr.DstMac.au8[2] == 0x27)
3909 || (EthHdr.SrcMac.au8[0] == 0x08 && EthHdr.SrcMac.au8[1] == 0x00 && EthHdr.SrcMac.au8[2] == 0x27)
3910 || (EthHdr.DstMac.au8[0] == 0x00 && EthHdr.DstMac.au8[1] == 0x16 && EthHdr.DstMac.au8[2] == 0xcb)
3911 || (EthHdr.SrcMac.au8[0] == 0x00 && EthHdr.SrcMac.au8[1] == 0x16 && EthHdr.SrcMac.au8[2] == 0xcb)
3912 || EthHdr.DstMac.au8[0] == 0xff
3913 || EthHdr.SrcMac.au8[0] == 0xff)
3914 Log2(("D=%.6Rhxs S=%.6Rhxs T=%04x f=%x z=%x\n",
3915 &EthHdr.DstMac, &EthHdr.SrcMac, RT_BE2H_U16(EthHdr.EtherType), fSrc, pSG->cbTotal));
3916
3917 /*
3918 * Learn the MAC address of the sender. No re-learning as the interface
3919 * user will normally tell us the right MAC address.
3920 *
3921 * Note! We don't notify the trunk about these mainly because of the
3922 * problematic contexts we might be called in.
3923 */
3924 if (RT_UNLIKELY( pIfSender
3925 && !pIfSender->fMacSet
3926 && memcmp(&EthHdr.SrcMac, &pIfSender->MacAddr, sizeof(pIfSender->MacAddr))
3927 && !intnetR0IsMacAddrMulticast(&EthHdr.SrcMac)
3928 ))
3929 {
3930 Log2(("IF MAC: %.6Rhxs -> %.6Rhxs\n", &pIfSender->MacAddr, &EthHdr.SrcMac));
3931 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3932
3933 PINTNETMACTABENTRY pIfEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIfSender);
3934 if (pIfEntry)
3935 pIfEntry->MacAddr = EthHdr.SrcMac;
3936 pIfSender->MacAddr = EthHdr.SrcMac;
3937
3938 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3939 }
3940
3941 /*
3942 * Deal with MAC address sharing as that may required editing of the
3943 * packets before we dispatch them anywhere.
3944 */
3945 INTNETSWDECISION enmSwDecision;
3946 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3947 {
3948 if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3949 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3950 else if (fSrc & INTNETTRUNKDIR_WIRE)
3951 {
3952 if (intnetR0NetworkSharedMacDetectAndFixBroadcast(pNetwork, pSG, &EthHdr))
3953 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3954 else
3955 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchUnicast(pNetwork, pSG, &EthHdr, pDstTab);
3956 }
3957 else
3958 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3959 }
3960 else if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3961 enmSwDecision = intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3962 else
3963 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3964
3965 /*
3966 * Deliver to the destinations if we can.
3967 */
3968 if (enmSwDecision != INTNETSWDECISION_BAD_CONTEXT)
3969 {
3970 if (intnetR0NetworkIsContextOk(pNetwork, pIfSender, pDstTab))
3971 intnetR0NetworkDeliver(pNetwork, pDstTab, pSG, pIfSender);
3972 else
3973 {
3974 intnetR0NetworkReleaseDstTab(pNetwork, pDstTab);
3975 enmSwDecision = INTNETSWDECISION_BAD_CONTEXT;
3976 }
3977 }
3978
3979 return enmSwDecision;
3980}
3981
3982
3983/**
3984 * Sends one or more frames.
3985 *
3986 * The function will first the frame which is passed as the optional arguments
3987 * pvFrame and cbFrame. These are optional since it also possible to chain
3988 * together one or more frames in the send buffer which the function will
3989 * process after considering it's arguments.
3990 *
3991 * The caller is responsible for making sure that there are no concurrent calls
3992 * to this method (with the same handle).
3993 *
3994 * @returns VBox status code.
3995 * @param hIf The interface handle.
3996 * @param pSession The caller's session.
3997 */
3998INTNETR0DECL(int) IntNetR0IfSend(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
3999{
4000 Log5(("IntNetR0IfSend: hIf=%RX32\n", hIf));
4001
4002 /*
4003 * Validate input and translate the handle.
4004 */
4005 PINTNET pIntNet = g_pIntNet;
4006 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4007 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4008
4009 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4010 if (!pIf)
4011 return VERR_INVALID_HANDLE;
4012 STAM_REL_PROFILE_START(&pIf->pIntBuf->StatSend1, a);
4013
4014 /*
4015 * Make sure we've got a network.
4016 */
4017 int rc = VINF_SUCCESS;
4018 intnetR0BusyIncIf(pIf);
4019 PINTNETNETWORK pNetwork = pIf->pNetwork;
4020 if (RT_LIKELY(pNetwork))
4021 {
4022 /*
4023 * Grab the destination table.
4024 */
4025 PINTNETDSTTAB pDstTab = ASMAtomicXchgPtrT(&pIf->pDstTab, NULL, PINTNETDSTTAB);
4026 if (RT_LIKELY(pDstTab))
4027 {
4028 /*
4029 * Process the send buffer.
4030 */
4031 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
4032 INTNETSG Sg; /** @todo this will have to be changed if we're going to use async sending
4033 * with buffer sharing for some OS or service. Darwin copies everything so
4034 * I won't bother allocating and managing SGs right now. Sorry. */
4035 PINTNETHDR pHdr;
4036 while ((pHdr = IntNetRingGetNextFrameToRead(&pIf->pIntBuf->Send)) != NULL)
4037 {
4038 uint8_t const u8Type = pHdr->u8Type;
4039 if (u8Type == INTNETHDR_TYPE_FRAME)
4040 {
4041 /* Send regular frame. */
4042 void *pvCurFrame = IntNetHdrGetFramePtr(pHdr, pIf->pIntBuf);
4043 IntNetSgInitTemp(&Sg, pvCurFrame, pHdr->cbFrame);
4044 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4045 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, pHdr->cbFrame, false /*fGso*/, (uint16_t *)&Sg.fFlags);
4046 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4047 }
4048 else if (u8Type == INTNETHDR_TYPE_GSO)
4049 {
4050 /* Send GSO frame if sane. */
4051 PPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, pIf->pIntBuf);
4052 uint32_t cbFrame = pHdr->cbFrame - sizeof(*pGso);
4053 if (RT_LIKELY(PDMNetGsoIsValid(pGso, pHdr->cbFrame, cbFrame)))
4054 {
4055 void *pvCurFrame = pGso + 1;
4056 IntNetSgInitTempGso(&Sg, pvCurFrame, cbFrame, pGso);
4057 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4058 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, cbFrame, true /*fGso*/, (uint16_t *)&Sg.fFlags);
4059 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4060 }
4061 else
4062 {
4063 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4064 enmSwDecision = INTNETSWDECISION_DROP;
4065 }
4066 }
4067 /* Unless it's a padding frame, we're getting babble from the producer. */
4068 else
4069 {
4070 if (u8Type != INTNETHDR_TYPE_PADDING)
4071 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4072 enmSwDecision = INTNETSWDECISION_DROP;
4073 }
4074 if (enmSwDecision == INTNETSWDECISION_BAD_CONTEXT)
4075 {
4076 rc = VERR_TRY_AGAIN;
4077 break;
4078 }
4079
4080 /* Skip to the next frame. */
4081 IntNetRingSkipFrame(&pIf->pIntBuf->Send);
4082 }
4083
4084 /*
4085 * Put back the destination table.
4086 */
4087 Assert(!pIf->pDstTab);
4088 ASMAtomicWritePtr(&pIf->pDstTab, pDstTab);
4089 }
4090 else
4091 rc = VERR_INTERNAL_ERROR_4;
4092 }
4093 else
4094 rc = VERR_INTERNAL_ERROR_3;
4095
4096 /*
4097 * Release the interface.
4098 */
4099 intnetR0BusyDecIf(pIf);
4100 STAM_REL_PROFILE_STOP(&pIf->pIntBuf->StatSend1, a);
4101 intnetR0IfRelease(pIf, pSession);
4102 return rc;
4103}
4104
4105
4106/**
4107 * VMMR0 request wrapper for IntNetR0IfSend.
4108 *
4109 * @returns see IntNetR0IfSend.
4110 * @param pSession The caller's session.
4111 * @param pReq The request packet.
4112 */
4113INTNETR0DECL(int) IntNetR0IfSendReq(PSUPDRVSESSION pSession, PINTNETIFSENDREQ pReq)
4114{
4115 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4116 return VERR_INVALID_PARAMETER;
4117 return IntNetR0IfSend(pReq->hIf, pSession);
4118}
4119
4120
4121/**
4122 * Maps the default buffer into ring 3.
4123 *
4124 * @returns VBox status code.
4125 * @param hIf The interface handle.
4126 * @param pSession The caller's session.
4127 * @param ppRing3Buf Where to store the address of the ring-3 mapping
4128 * (optional).
4129 * @param ppRing0Buf Where to store the address of the ring-0 mapping
4130 * (optional).
4131 */
4132INTNETR0DECL(int) IntNetR0IfGetBufferPtrs(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession,
4133 R3PTRTYPE(PINTNETBUF) *ppRing3Buf, R0PTRTYPE(PINTNETBUF) *ppRing0Buf)
4134{
4135 LogFlow(("IntNetR0IfGetBufferPtrs: hIf=%RX32 ppRing3Buf=%p ppRing0Buf=%p\n", hIf, ppRing3Buf, ppRing0Buf));
4136
4137 /*
4138 * Validate input.
4139 */
4140 PINTNET pIntNet = g_pIntNet;
4141 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4142 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4143
4144 AssertPtrNullReturn(ppRing3Buf, VERR_INVALID_PARAMETER);
4145 AssertPtrNullReturn(ppRing0Buf, VERR_INVALID_PARAMETER);
4146 if (ppRing3Buf)
4147 *ppRing3Buf = 0;
4148 if (ppRing0Buf)
4149 *ppRing0Buf = 0;
4150
4151 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4152 if (!pIf)
4153 return VERR_INVALID_HANDLE;
4154
4155 /*
4156 * ASSUMES that only the process that created an interface can use it.
4157 * ASSUMES that we created the ring-3 mapping when selecting or
4158 * allocating the buffer.
4159 */
4160 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4161 if (RT_SUCCESS(rc))
4162 {
4163 if (ppRing3Buf)
4164 *ppRing3Buf = pIf->pIntBufR3;
4165 if (ppRing0Buf)
4166 *ppRing0Buf = (R0PTRTYPE(PINTNETBUF))pIf->pIntBuf; /* tstIntNetR0 mess */
4167
4168 rc = RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4169 }
4170
4171 intnetR0IfRelease(pIf, pSession);
4172 LogFlow(("IntNetR0IfGetBufferPtrs: returns %Rrc *ppRing3Buf=%p *ppRing0Buf=%p\n",
4173 rc, ppRing3Buf ? *ppRing3Buf : NIL_RTR3PTR, ppRing0Buf ? *ppRing0Buf : NIL_RTR0PTR));
4174 return rc;
4175}
4176
4177
4178/**
4179 * VMMR0 request wrapper for IntNetR0IfGetBufferPtrs.
4180 *
4181 * @returns see IntNetR0IfGetRing3Buffer.
4182 * @param pSession The caller's session.
4183 * @param pReq The request packet.
4184 */
4185INTNETR0DECL(int) IntNetR0IfGetBufferPtrsReq(PSUPDRVSESSION pSession, PINTNETIFGETBUFFERPTRSREQ pReq)
4186{
4187 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4188 return VERR_INVALID_PARAMETER;
4189 return IntNetR0IfGetBufferPtrs(pReq->hIf, pSession, &pReq->pRing3Buf, &pReq->pRing0Buf);
4190}
4191
4192
4193#if 0
4194/**
4195 * Gets the physical addresses of the default interface buffer.
4196 *
4197 * @returns VBox status code.
4198 * @param hIF The interface handle.
4199 * @param paPages Where to store the addresses. (The reserved fields will be set to zero.)
4200 * @param cPages
4201 */
4202INTNETR0DECL(int) IntNetR0IfGetPhysBuffer(INTNETIFHANDLE hIf, PSUPPAGE paPages, unsigned cPages)
4203{
4204 /*
4205 * Validate input.
4206 */
4207 PINTNET pIntNet = g_pIntNet;
4208 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4209 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4210
4211 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
4212 AssertPtrReturn((uint8_t *)&paPages[cPages] - 1, VERR_INVALID_PARAMETER);
4213 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4214 if (!pIf)
4215 return VERR_INVALID_HANDLE;
4216
4217 /*
4218 * Grab the lock and get the data.
4219 * ASSUMES that the handle isn't closed while we're here.
4220 */
4221 int rc = RTSemFastMutexRequest(pIf->pNetwork->FastMutex);
4222 if (RT_SUCCESS(rc))
4223 {
4224 /** @todo make a SUPR0 api for obtaining the array. SUPR0/IPRT is keeping track of everything, there
4225 * is no need for any extra bookkeeping here.. */
4226
4227 rc = RTSemFastMutexRelease(pIf->pNetwork->FastMutex);
4228 }
4229 intnetR0IfRelease(pIf, pSession);
4230 return VERR_NOT_IMPLEMENTED;
4231}
4232#endif
4233
4234
4235/**
4236 * Sets the promiscuous mode property of an interface.
4237 *
4238 * @returns VBox status code.
4239 * @param hIf The interface handle.
4240 * @param pSession The caller's session.
4241 * @param fPromiscuous Set if the interface should be in promiscuous mode, clear if not.
4242 */
4243INTNETR0DECL(int) IntNetR0IfSetPromiscuousMode(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fPromiscuous)
4244{
4245 LogFlow(("IntNetR0IfSetPromiscuousMode: hIf=%RX32 fPromiscuous=%d\n", hIf, fPromiscuous));
4246
4247 /*
4248 * Validate & translate input.
4249 */
4250 PINTNET pIntNet = g_pIntNet;
4251 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4252 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4253
4254 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4255 if (!pIf)
4256 {
4257 Log(("IntNetR0IfSetPromiscuousMode: returns VERR_INVALID_HANDLE\n"));
4258 return VERR_INVALID_HANDLE;
4259 }
4260
4261 /*
4262 * Get the network, take the address spinlock, and make the change.
4263 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4264 */
4265 int rc = VINF_SUCCESS;
4266 intnetR0BusyIncIf(pIf);
4267 PINTNETNETWORK pNetwork = pIf->pNetwork;
4268 if (pNetwork)
4269 {
4270 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4271
4272 if (pIf->fPromiscuousReal != fPromiscuous)
4273 {
4274 const bool fPromiscuousEff = fPromiscuous
4275 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW)
4276 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS);
4277 Log(("IntNetR0IfSetPromiscuousMode: hIf=%RX32: Changed from %d -> %d (%d)\n",
4278 hIf, !fPromiscuous, !!fPromiscuous, fPromiscuousEff));
4279
4280 pIf->fPromiscuousReal = fPromiscuous;
4281
4282 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4283 if (RT_LIKELY(pEntry))
4284 {
4285 if (pEntry->fPromiscuousEff)
4286 {
4287 pNetwork->MacTab.cPromiscuousEntries--;
4288 if (!pEntry->fPromiscuousSeeTrunk)
4289 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4290 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4291 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4292 }
4293
4294 pEntry->fPromiscuousEff = fPromiscuousEff;
4295 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
4296 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
4297
4298 if (pEntry->fPromiscuousEff)
4299 {
4300 pNetwork->MacTab.cPromiscuousEntries++;
4301 if (!pEntry->fPromiscuousSeeTrunk)
4302 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
4303 }
4304 Assert(pNetwork->MacTab.cPromiscuousEntries <= pNetwork->MacTab.cEntries);
4305 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries <= pNetwork->MacTab.cEntries);
4306 }
4307 }
4308
4309 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4310 }
4311 else
4312 rc = VERR_WRONG_ORDER;
4313
4314 intnetR0BusyDecIf(pIf);
4315 intnetR0IfRelease(pIf, pSession);
4316 return rc;
4317}
4318
4319
4320/**
4321 * VMMR0 request wrapper for IntNetR0IfSetPromiscuousMode.
4322 *
4323 * @returns see IntNetR0IfSetPromiscuousMode.
4324 * @param pSession The caller's session.
4325 * @param pReq The request packet.
4326 */
4327INTNETR0DECL(int) IntNetR0IfSetPromiscuousModeReq(PSUPDRVSESSION pSession, PINTNETIFSETPROMISCUOUSMODEREQ pReq)
4328{
4329 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4330 return VERR_INVALID_PARAMETER;
4331 return IntNetR0IfSetPromiscuousMode(pReq->hIf, pSession, pReq->fPromiscuous);
4332}
4333
4334
4335/**
4336 * Sets the MAC address of an interface.
4337 *
4338 * @returns VBox status code.
4339 * @param hIf The interface handle.
4340 * @param pSession The caller's session.
4341 * @param pMAC The new MAC address.
4342 */
4343INTNETR0DECL(int) IntNetR0IfSetMacAddress(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, PCRTMAC pMac)
4344{
4345 LogFlow(("IntNetR0IfSetMacAddress: hIf=%RX32 pMac=%p:{%.6Rhxs}\n", hIf, pMac, pMac));
4346
4347 /*
4348 * Validate & translate input.
4349 */
4350 PINTNET pIntNet = g_pIntNet;
4351 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4352 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4353
4354 AssertPtrReturn(pMac, VERR_INVALID_PARAMETER);
4355 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4356 if (!pIf)
4357 {
4358 Log(("IntNetR0IfSetMacAddress: returns VERR_INVALID_HANDLE\n"));
4359 return VERR_INVALID_HANDLE;
4360 }
4361
4362 /*
4363 * Get the network, take the address spinlock, and make the change.
4364 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4365 */
4366 int rc = VINF_SUCCESS;
4367 intnetR0BusyIncIf(pIf);
4368 PINTNETNETWORK pNetwork = pIf->pNetwork;
4369 if (pNetwork)
4370 {
4371 PINTNETTRUNKIF pTrunk = NULL;
4372
4373 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4374
4375 if (memcmp(&pIf->MacAddr, pMac, sizeof(pIf->MacAddr)))
4376 {
4377 Log(("IntNetR0IfSetMacAddress: hIf=%RX32: Changed from %.6Rhxs -> %.6Rhxs\n",
4378 hIf, &pIf->MacAddr, pMac));
4379
4380 /* Update the two copies. */
4381 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4382 if (RT_LIKELY(pEntry))
4383 pEntry->MacAddr = *pMac;
4384 pIf->MacAddr = *pMac;
4385 pIf->fMacSet = true;
4386
4387 /* Grab a busy reference to the trunk so we release the lock before notifying it. */
4388 pTrunk = pNetwork->MacTab.pTrunk;
4389 if (pTrunk)
4390 intnetR0BusyIncTrunk(pTrunk);
4391 }
4392
4393 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4394
4395 if (pTrunk)
4396 {
4397 Log(("IntNetR0IfSetMacAddress: pfnNotifyMacAddress hIf=%RX32\n", hIf));
4398 PINTNETTRUNKIFPORT pIfPort = pTrunk->pIfPort;
4399 if (pIfPort)
4400 pIfPort->pfnNotifyMacAddress(pIfPort, pIf->pvIfData, pMac);
4401 intnetR0BusyDecTrunk(pTrunk);
4402 }
4403 }
4404 else
4405 rc = VERR_WRONG_ORDER;
4406
4407 intnetR0BusyDecIf(pIf);
4408 intnetR0IfRelease(pIf, pSession);
4409 return rc;
4410}
4411
4412
4413/**
4414 * VMMR0 request wrapper for IntNetR0IfSetMacAddress.
4415 *
4416 * @returns see IntNetR0IfSetMacAddress.
4417 * @param pSession The caller's session.
4418 * @param pReq The request packet.
4419 */
4420INTNETR0DECL(int) IntNetR0IfSetMacAddressReq(PSUPDRVSESSION pSession, PINTNETIFSETMACADDRESSREQ pReq)
4421{
4422 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4423 return VERR_INVALID_PARAMETER;
4424 return IntNetR0IfSetMacAddress(pReq->hIf, pSession, &pReq->Mac);
4425}
4426
4427
4428/**
4429 * Worker for intnetR0IfSetActive and intnetR0IfDestruct.
4430 *
4431 * This function will update the active interface count on the network and
4432 * activate or deactivate the trunk connection if necessary.
4433 *
4434 * The call must own the giant lock (we cannot take it here).
4435 *
4436 * @returns VBox status code.
4437 * @param pNetwork The network.
4438 * @param fIf The interface.
4439 * @param fActive What to do.
4440 */
4441static int intnetR0NetworkSetIfActive(PINTNETNETWORK pNetwork, PINTNETIF pIf, bool fActive)
4442{
4443 /* quick sanity check */
4444 AssertPtr(pNetwork);
4445 AssertPtr(pIf);
4446
4447 /*
4448 * The address spinlock of the network protects the variables, while the
4449 * big lock protects the calling of pfnSetState. Grab both lock at once
4450 * to save us the extra hassle.
4451 */
4452 PINTNETTRUNKIF pTrunk = NULL;
4453 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4454
4455 /*
4456 * Do the update.
4457 */
4458 if (pIf->fActive != fActive)
4459 {
4460 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4461 if (RT_LIKELY(pEntry))
4462 {
4463 pEntry->fActive = fActive;
4464 pIf->fActive = fActive;
4465
4466 if (fActive)
4467 {
4468 pNetwork->cActiveIFs++;
4469 if (pNetwork->cActiveIFs == 1)
4470 {
4471 pTrunk = pNetwork->MacTab.pTrunk;
4472 if (pTrunk)
4473 {
4474 pNetwork->MacTab.fHostActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
4475 pNetwork->MacTab.fWireActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED);
4476 }
4477 }
4478 }
4479 else
4480 {
4481 pNetwork->cActiveIFs--;
4482 if (pNetwork->cActiveIFs == 0)
4483 {
4484 pTrunk = pNetwork->MacTab.pTrunk;
4485 pNetwork->MacTab.fHostActive = false;
4486 pNetwork->MacTab.fWireActive = false;
4487 }
4488 }
4489 }
4490 }
4491
4492 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4493
4494 /*
4495 * Tell the trunk if necessary.
4496 * The wait for !busy is for the Solaris streams trunk driver (mostly).
4497 */
4498 if (pTrunk && pTrunk->pIfPort)
4499 {
4500 if (!fActive)
4501 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
4502
4503 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, fActive ? INTNETTRUNKIFSTATE_ACTIVE : INTNETTRUNKIFSTATE_INACTIVE);
4504 }
4505
4506 return VINF_SUCCESS;
4507}
4508
4509
4510/**
4511 * Sets the active property of an interface.
4512 *
4513 * @returns VBox status code.
4514 * @param hIf The interface handle.
4515 * @param pSession The caller's session.
4516 * @param fActive The new state.
4517 */
4518INTNETR0DECL(int) IntNetR0IfSetActive(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fActive)
4519{
4520 LogFlow(("IntNetR0IfSetActive: hIf=%RX32 fActive=%RTbool\n", hIf, fActive));
4521
4522 /*
4523 * Validate & translate input.
4524 */
4525 PINTNET pIntNet = g_pIntNet;
4526 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4527 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4528
4529 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4530 if (!pIf)
4531 {
4532 Log(("IntNetR0IfSetActive: returns VERR_INVALID_HANDLE\n"));
4533 return VERR_INVALID_HANDLE;
4534 }
4535
4536 /*
4537 * Hand it to the network since it might involve the trunk and things are
4538 * tricky there wrt to locking order.
4539 *
4540 * 1. We take the giant lock here. This makes sure nobody is re-enabling
4541 * the network while we're pausing it and vice versa. This also enables
4542 * us to wait for the network to become idle before telling the trunk.
4543 * (Important on Solaris.)
4544 *
4545 * 2. For paranoid reasons, we grab a busy reference to the calling
4546 * interface. This is totally unnecessary but should hurt (when done
4547 * after grabbing the giant lock).
4548 */
4549 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4550 if (RT_SUCCESS(rc))
4551 {
4552 intnetR0BusyIncIf(pIf);
4553
4554 PINTNETNETWORK pNetwork = pIf->pNetwork;
4555 if (pNetwork)
4556 rc = intnetR0NetworkSetIfActive(pNetwork, pIf, fActive);
4557 else
4558 rc = VERR_WRONG_ORDER;
4559
4560 intnetR0BusyDecIf(pIf);
4561 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4562 }
4563
4564 intnetR0IfRelease(pIf, pSession);
4565 LogFlow(("IntNetR0IfSetActive: returns %Rrc\n", rc));
4566 return rc;
4567}
4568
4569
4570/**
4571 * VMMR0 request wrapper for IntNetR0IfSetActive.
4572 *
4573 * @returns see IntNetR0IfSetActive.
4574 * @param pIntNet The internal networking instance.
4575 * @param pSession The caller's session.
4576 * @param pReq The request packet.
4577 */
4578INTNETR0DECL(int) IntNetR0IfSetActiveReq(PSUPDRVSESSION pSession, PINTNETIFSETACTIVEREQ pReq)
4579{
4580 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4581 return VERR_INVALID_PARAMETER;
4582 return IntNetR0IfSetActive(pReq->hIf, pSession, pReq->fActive);
4583}
4584
4585
4586/**
4587 * Wait for the interface to get signaled.
4588 * The interface will be signaled when is put into the receive buffer.
4589 *
4590 * @returns VBox status code.
4591 * @param hIf The interface handle.
4592 * @param pSession The caller's session.
4593 * @param cMillies Number of milliseconds to wait. RT_INDEFINITE_WAIT should be
4594 * used if indefinite wait is desired.
4595 */
4596INTNETR0DECL(int) IntNetR0IfWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, uint32_t cMillies)
4597{
4598 Log4(("IntNetR0IfWait: hIf=%RX32 cMillies=%u\n", hIf, cMillies));
4599
4600 /*
4601 * Get and validate essential handles.
4602 */
4603 PINTNET pIntNet = g_pIntNet;
4604 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4605 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4606
4607 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4608 if (!pIf)
4609 {
4610 Log(("IntNetR0IfWait: returns VERR_INVALID_HANDLE\n"));
4611 return VERR_INVALID_HANDLE;
4612 }
4613
4614 const INTNETIFHANDLE hIfSelf = pIf->hIf;
4615 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4616 const bool fDestroying = ASMAtomicReadBool(&pIf->fDestroying);
4617 if ( hIfSelf != hIf /* paranoia */
4618 || hRecvEvent == NIL_RTSEMEVENT
4619 || fDestroying
4620 )
4621 {
4622 Log(("IntNetR0IfWait: returns VERR_SEM_DESTROYED\n"));
4623 return VERR_SEM_DESTROYED;
4624 }
4625
4626 /*
4627 * It is tempting to check if there is data to be read here,
4628 * but the problem with such an approach is that it will cause
4629 * one unnecessary supervisor->user->supervisor trip. There is
4630 * already a slight risk for such, so no need to increase it.
4631 */
4632
4633 /*
4634 * Increment the number of waiters before starting the wait.
4635 * Upon wakeup we must assert reality, checking that we're not
4636 * already destroyed or in the process of being destroyed. This
4637 * code must be aligned with the waiting code in intnetR0IfDestruct.
4638 */
4639 ASMAtomicIncU32(&pIf->cSleepers);
4640 int rc = RTSemEventWaitNoResume(hRecvEvent, cMillies);
4641 if (pIf->hRecvEvent == hRecvEvent)
4642 {
4643 ASMAtomicDecU32(&pIf->cSleepers);
4644 if (!pIf->fDestroying)
4645 {
4646 if (intnetR0IfRelease(pIf, pSession))
4647 rc = VERR_SEM_DESTROYED;
4648 }
4649 else
4650 rc = VERR_SEM_DESTROYED;
4651 }
4652 else
4653 rc = VERR_SEM_DESTROYED;
4654 Log4(("IntNetR0IfWait: returns %Rrc\n", rc));
4655 return rc;
4656}
4657
4658
4659/**
4660 * VMMR0 request wrapper for IntNetR0IfWait.
4661 *
4662 * @returns see IntNetR0IfWait.
4663 * @param pSession The caller's session.
4664 * @param pReq The request packet.
4665 */
4666INTNETR0DECL(int) IntNetR0IfWaitReq(PSUPDRVSESSION pSession, PINTNETIFWAITREQ pReq)
4667{
4668 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4669 return VERR_INVALID_PARAMETER;
4670 return IntNetR0IfWait(pReq->hIf, pSession, pReq->cMillies);
4671}
4672
4673
4674/**
4675 * Wake up any threads waiting on the interface.
4676 *
4677 * @returns VBox status code.
4678 * @param hIf The interface handle.
4679 * @param pSession The caller's session.
4680 * @param fNoMoreWaits When set, no more waits are permitted.
4681 */
4682INTNETR0DECL(int) IntNetR0IfAbortWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fNoMoreWaits)
4683{
4684 Log4(("IntNetR0IfAbortWait: hIf=%RX32 fNoMoreWaits=%RTbool\n", hIf, fNoMoreWaits));
4685
4686 /*
4687 * Get and validate essential handles.
4688 */
4689 PINTNET pIntNet = g_pIntNet;
4690 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4691 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4692
4693 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4694 if (!pIf)
4695 {
4696 Log(("IntNetR0IfAbortWait: returns VERR_INVALID_HANDLE\n"));
4697 return VERR_INVALID_HANDLE;
4698 }
4699
4700 const INTNETIFHANDLE hIfSelf = pIf->hIf;
4701 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4702 const bool fDestroying = ASMAtomicReadBool(&pIf->fDestroying);
4703 if ( hIfSelf != hIf /* paranoia */
4704 || hRecvEvent == NIL_RTSEMEVENT
4705 || fDestroying
4706 )
4707 {
4708 Log(("IntNetR0IfAbortWait: returns VERR_SEM_DESTROYED\n"));
4709 return VERR_SEM_DESTROYED;
4710 }
4711
4712 /*
4713 * Set fDestroying if requested to do so and then wake up all the sleeping
4714 * threads (usually just one). We leave the semaphore in the signalled
4715 * state so the next caller will return immediately.
4716 */
4717 if (fNoMoreWaits)
4718 ASMAtomicWriteBool(&pIf->fDestroying, true);
4719
4720 uint32_t cSleepers = ASMAtomicReadU32(&pIf->cSleepers) + 1;
4721 while (cSleepers-- > 0)
4722 {
4723 int rc = RTSemEventSignal(pIf->hRecvEvent);
4724 AssertRC(rc);
4725 }
4726
4727 Log4(("IntNetR0IfWait: returns %Rrc\n", VINF_SUCCESS));
4728 return VINF_SUCCESS;
4729}
4730
4731
4732/**
4733 * VMMR0 request wrapper for IntNetR0IfAbortWait.
4734 *
4735 * @returns see IntNetR0IfWait.
4736 * @param pSession The caller's session.
4737 * @param pReq The request packet.
4738 */
4739INTNETR0DECL(int) IntNetR0IfAbortWaitReq(PSUPDRVSESSION pSession, PINTNETIFABORTWAITREQ pReq)
4740{
4741 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4742 return VERR_INVALID_PARAMETER;
4743 return IntNetR0IfAbortWait(pReq->hIf, pSession, pReq->fNoMoreWaits);
4744}
4745
4746
4747/**
4748 * Close an interface.
4749 *
4750 * @returns VBox status code.
4751 * @param pIntNet The instance handle.
4752 * @param hIf The interface handle.
4753 * @param pSession The caller's session.
4754 */
4755INTNETR0DECL(int) IntNetR0IfClose(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4756{
4757 LogFlow(("IntNetR0IfClose: hIf=%RX32\n", hIf));
4758
4759 /*
4760 * Validate and free the handle.
4761 */
4762 PINTNET pIntNet = g_pIntNet;
4763 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4764 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4765
4766 PINTNETIF pIf = (PINTNETIF)RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pSession);
4767 if (!pIf)
4768 return VERR_INVALID_HANDLE;
4769
4770 /* Mark the handle as freed so intnetR0IfDestruct won't free it again. */
4771 ASMAtomicWriteU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4772
4773 /*
4774 * Signal the event semaphore to wake up any threads in IntNetR0IfWait
4775 * and give them a moment to get out and release the interface.
4776 */
4777 uint32_t i = pIf->cSleepers;
4778 while (i-- > 0)
4779 {
4780 RTSemEventSignal(pIf->hRecvEvent);
4781 RTThreadYield();
4782 }
4783 RTSemEventSignal(pIf->hRecvEvent);
4784
4785 /*
4786 * Release the references to the interface object (handle + free lookup).
4787 */
4788 void *pvObj = pIf->pvObj;
4789 intnetR0IfRelease(pIf, pSession); /* (RTHandleTableFreeWithCtx) */
4790
4791 int rc = SUPR0ObjRelease(pvObj, pSession);
4792 LogFlow(("IntNetR0IfClose: returns %Rrc\n", rc));
4793 return rc;
4794}
4795
4796
4797/**
4798 * VMMR0 request wrapper for IntNetR0IfCloseReq.
4799 *
4800 * @returns see IntNetR0IfClose.
4801 * @param pSession The caller's session.
4802 * @param pReq The request packet.
4803 */
4804INTNETR0DECL(int) IntNetR0IfCloseReq(PSUPDRVSESSION pSession, PINTNETIFCLOSEREQ pReq)
4805{
4806 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4807 return VERR_INVALID_PARAMETER;
4808 return IntNetR0IfClose(pReq->hIf, pSession);
4809}
4810
4811
4812/**
4813 * Interface destructor callback.
4814 * This is called for reference counted objectes when the count reaches 0.
4815 *
4816 * @param pvObj The object pointer.
4817 * @param pvUser1 Pointer to the interface.
4818 * @param pvUser2 Pointer to the INTNET instance data.
4819 */
4820static DECLCALLBACK(void) intnetR0IfDestruct(void *pvObj, void *pvUser1, void *pvUser2)
4821{
4822 PINTNETIF pIf = (PINTNETIF)pvUser1;
4823 PINTNET pIntNet = (PINTNET)pvUser2;
4824 Log(("intnetR0IfDestruct: pvObj=%p pIf=%p pIntNet=%p hIf=%RX32\n", pvObj, pIf, pIntNet, pIf->hIf));
4825
4826 /*
4827 * We grab the INTNET create/open/destroy semaphore to make sure nobody is
4828 * adding or removing interface while we're in here. For paranoid reasons
4829 * we also mark the interface as destroyed here so any waiting threads can
4830 * take evasive action (theoretical case).
4831 */
4832 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4833 ASMAtomicWriteBool(&pIf->fDestroying, true);
4834
4835 /*
4836 * Delete the interface handle so the object no longer can be used.
4837 * (Can happen if the client didn't close its session.)
4838 */
4839 INTNETIFHANDLE hIf = ASMAtomicXchgU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4840 if (hIf != INTNET_HANDLE_INVALID)
4841 {
4842 void *pvObj2 = RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pIf->pSession); NOREF(pvObj2);
4843 AssertMsg(pvObj2 == pIf, ("%p, %p, hIf=%RX32 pSession=%p\n", pvObj2, pIf, hIf, pIf->pSession));
4844 }
4845
4846 /*
4847 * If we've got a network deactivate and detach ourselves from it. Because
4848 * of cleanup order we might have been orphaned by the network destructor.
4849 */
4850 PINTNETNETWORK pNetwork = pIf->pNetwork;
4851 if (pNetwork)
4852 {
4853 /* set inactive. */
4854 intnetR0NetworkSetIfActive(pNetwork, pIf, false /*fActive*/);
4855
4856 /* remove ourselves from the switch table. */
4857 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4858
4859 uint32_t iIf = pNetwork->MacTab.cEntries;
4860 while (iIf-- > 0)
4861 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
4862 {
4863 if (pNetwork->MacTab.paEntries[iIf].fPromiscuousEff)
4864 {
4865 pNetwork->MacTab.cPromiscuousEntries--;
4866 if (!pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk)
4867 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4868 }
4869 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4870 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4871
4872 if (iIf + 1 < pNetwork->MacTab.cEntries)
4873 memmove(&pNetwork->MacTab.paEntries[iIf],
4874 &pNetwork->MacTab.paEntries[iIf + 1],
4875 (pNetwork->MacTab.cEntries - iIf - 1) * sizeof(pNetwork->MacTab.paEntries[0]));
4876 pNetwork->MacTab.cEntries--;
4877 break;
4878 }
4879
4880 /* recalc the min flags. */
4881 if (pIf->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
4882 {
4883 uint32_t fMinFlags = 0;
4884 iIf = pNetwork->MacTab.cEntries;
4885 while (iIf-- > 0)
4886 {
4887 PINTNETIF pIf2 = pNetwork->MacTab.paEntries[iIf].pIf;
4888 if ( pIf2 /* paranoia */
4889 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
4890 fMinFlags |= pIf2->fOpenFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
4891 }
4892 pNetwork->fMinFlags = fMinFlags;
4893 }
4894
4895 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
4896
4897 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4898
4899 /* Notify the trunk about the interface being destroyed. */
4900 if (pTrunk && pTrunk->pIfPort)
4901 pTrunk->pIfPort->pfnDisconnectInterface(pTrunk->pIfPort, pIf->pvIfData);
4902
4903 /* Wait for the interface to quiesce while we still can. */
4904 intnetR0BusyWait(pNetwork, &pIf->cBusy);
4905
4906 /* Release our reference to the network. */
4907 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4908 pIf->pNetwork = NULL;
4909 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4910
4911 SUPR0ObjRelease(pNetwork->pvObj, pIf->pSession);
4912 }
4913
4914 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4915
4916 /*
4917 * Wakeup anyone waiting on this interface.
4918 *
4919 * We *must* make sure they have woken up properly and realized
4920 * that the interface is no longer valid.
4921 */
4922 if (pIf->hRecvEvent != NIL_RTSEMEVENT)
4923 {
4924 RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4925 unsigned cMaxWait = 0x1000;
4926 while (pIf->cSleepers && cMaxWait-- > 0)
4927 {
4928 RTSemEventSignal(hRecvEvent);
4929 RTThreadYield();
4930 }
4931 if (pIf->cSleepers)
4932 {
4933 RTThreadSleep(1);
4934
4935 cMaxWait = pIf->cSleepers;
4936 while (pIf->cSleepers && cMaxWait-- > 0)
4937 {
4938 RTSemEventSignal(hRecvEvent);
4939 RTThreadSleep(10);
4940 }
4941 }
4942
4943 RTSemEventDestroy(hRecvEvent);
4944 pIf->hRecvEvent = NIL_RTSEMEVENT;
4945 }
4946
4947 /*
4948 * Unmap user buffer.
4949 */
4950 if (pIf->pIntBuf != pIf->pIntBufDefault)
4951 {
4952 /** @todo user buffer */
4953 }
4954
4955 /*
4956 * Unmap and Free the default buffer.
4957 */
4958 if (pIf->pIntBufDefault)
4959 {
4960 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
4961 pIf->pIntBufDefault = NULL;
4962 pIf->pIntBufDefaultR3 = 0;
4963 pIf->pIntBuf = NULL;
4964 pIf->pIntBufR3 = 0;
4965 }
4966
4967 /*
4968 * Free remaining resources
4969 */
4970 RTSpinlockDestroy(pIf->hRecvInSpinlock);
4971 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
4972
4973 RTMemFree(pIf->pDstTab);
4974 pIf->pDstTab = NULL;
4975
4976 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
4977 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
4978
4979 pIf->pvObj = NULL;
4980 RTMemFree(pIf);
4981}
4982
4983
4984/**
4985 * Creates a new network interface.
4986 *
4987 * The call must have opened the network for the new interface and is
4988 * responsible for closing it on failure. On success it must leave the network
4989 * opened so the interface destructor can close it.
4990 *
4991 * @returns VBox status code.
4992 * @param pNetwork The network, referenced. The reference is consumed on
4993 * success.
4994 * @param pSession The session handle.
4995 * @param cbSend The size of the send buffer.
4996 * @param cbRecv The size of the receive buffer.
4997 * @param fFlags The open network flags.
4998 * @param phIf Where to store the interface handle.
4999 */
5000static int intnetR0NetworkCreateIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession,
5001 unsigned cbSend, unsigned cbRecv, uint32_t fFlags,
5002 PINTNETIFHANDLE phIf)
5003{
5004 LogFlow(("intnetR0NetworkCreateIf: pNetwork=%p pSession=%p cbSend=%u cbRecv=%u fFlags=%#x phIf=%p\n",
5005 pNetwork, pSession, cbSend, cbRecv, fFlags, phIf));
5006
5007 /*
5008 * Assert input.
5009 */
5010 AssertPtr(pNetwork);
5011 AssertPtr(phIf);
5012
5013 /*
5014 * Adjust the flags with defaults for the interface policies.
5015 * Note: Main restricts promiscuous mode per interface.
5016 */
5017 uint32_t const fDefFlags = INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
5018 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK;
5019 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
5020 if (!(fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair))
5021 fFlags |= g_afIntNetOpenNetworkIfFlags[i].fPair & fDefFlags;
5022
5023 /*
5024 * Make sure that all destination tables as well as the have space of
5025 */
5026 int rc = intnetR0NetworkEnsureTabSpace(pNetwork);
5027 if (RT_FAILURE(rc))
5028 return rc;
5029
5030 /*
5031 * Allocate the interface and initialize it.
5032 */
5033 PINTNETIF pIf = (PINTNETIF)RTMemAllocZ(sizeof(*pIf));
5034 if (!pIf)
5035 return VERR_NO_MEMORY;
5036
5037 memset(&pIf->MacAddr, 0xff, sizeof(pIf->MacAddr)); /* broadcast */
5038 //pIf->fMacSet = false;
5039 //pIf->fPromiscuousReal = false;
5040 //pIf->fActive = false;
5041 //pIf->fDestroying = false;
5042 pIf->fOpenFlags = fFlags;
5043 //pIf->cYields = 0;
5044 //pIf->pIntBuf = 0;
5045 //pIf->pIntBufR3 = NIL_RTR3PTR;
5046 //pIf->pIntBufDefault = 0;
5047 //pIf->pIntBufDefaultR3 = NIL_RTR3PTR;
5048 pIf->hRecvEvent = NIL_RTSEMEVENT;
5049 //pIf->cSleepers = 0;
5050 pIf->hIf = INTNET_HANDLE_INVALID;
5051 pIf->pNetwork = pNetwork;
5052 pIf->pSession = pSession;
5053 //pIf->pvObj = NULL;
5054 //pIf->aAddrCache = {0};
5055 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5056 pIf->cBusy = 0;
5057 //pIf->pDstTab = NULL;
5058 //pIf->pvIfData = NULL;
5059
5060 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
5061 rc = intnetR0IfAddrCacheInit(&pIf->aAddrCache[i], (INTNETADDRTYPE)i,
5062 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
5063 if (RT_SUCCESS(rc))
5064 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, (PINTNETDSTTAB *)&pIf->pDstTab);
5065 if (RT_SUCCESS(rc))
5066 rc = RTSemEventCreate((PRTSEMEVENT)&pIf->hRecvEvent);
5067 if (RT_SUCCESS(rc))
5068 rc = RTSpinlockCreate(&pIf->hRecvInSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hRecvInSpinlock");
5069 if (RT_SUCCESS(rc))
5070 {
5071 /*
5072 * Create the default buffer.
5073 */
5074 /** @todo adjust with minimums and apply defaults here. */
5075 cbRecv = RT_ALIGN(RT_MAX(cbRecv, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5076 cbSend = RT_ALIGN(RT_MAX(cbSend, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5077 const unsigned cbBuf = RT_ALIGN(sizeof(*pIf->pIntBuf), INTNETRINGBUF_ALIGNMENT) + cbRecv + cbSend;
5078 rc = SUPR0MemAlloc(pIf->pSession, cbBuf, (PRTR0PTR)&pIf->pIntBufDefault, (PRTR3PTR)&pIf->pIntBufDefaultR3);
5079 if (RT_SUCCESS(rc))
5080 {
5081 ASMMemZero32(pIf->pIntBufDefault, cbBuf); /** @todo I thought I specified these buggers as clearing the memory... */
5082
5083 pIf->pIntBuf = pIf->pIntBufDefault;
5084 pIf->pIntBufR3 = pIf->pIntBufDefaultR3;
5085 IntNetBufInit(pIf->pIntBuf, cbBuf, cbRecv, cbSend);
5086
5087 /*
5088 * Register the interface with the session and create a handle for it.
5089 */
5090 pIf->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK_INTERFACE,
5091 intnetR0IfDestruct, pIf, pNetwork->pIntNet);
5092 if (pIf->pvObj)
5093 {
5094 rc = RTHandleTableAllocWithCtx(pNetwork->pIntNet->hHtIfs, pIf, pSession, (uint32_t *)&pIf->hIf);
5095 if (RT_SUCCESS(rc))
5096 {
5097 /*
5098 * Finally add the interface to the network, consuming the
5099 * network reference of the caller.
5100 */
5101 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5102
5103 uint32_t iIf = pNetwork->MacTab.cEntries;
5104 Assert(iIf + 1 <= pNetwork->MacTab.cEntriesAllocated);
5105
5106 pNetwork->MacTab.paEntries[iIf].MacAddr = pIf->MacAddr;
5107 pNetwork->MacTab.paEntries[iIf].fActive = false;
5108 pNetwork->MacTab.paEntries[iIf].fPromiscuousEff = false;
5109 pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk = false;
5110 pNetwork->MacTab.paEntries[iIf].pIf = pIf;
5111
5112 pNetwork->MacTab.cEntries = iIf + 1;
5113 pIf->pNetwork = pNetwork;
5114
5115 /*
5116 * Grab a busy reference (paranoia) to the trunk before releasing
5117 * the spinlock and then notify it about the new interface.
5118 */
5119 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5120 if (pTrunk)
5121 intnetR0BusyIncTrunk(pTrunk);
5122
5123 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5124
5125 if (pTrunk)
5126 {
5127 Log(("intnetR0NetworkCreateIf: pfnConnectInterface hIf=%RX32\n", pIf->hIf));
5128 if (pTrunk->pIfPort)
5129 rc = pTrunk->pIfPort->pfnConnectInterface(pTrunk->pIfPort, pIf, &pIf->pvIfData);
5130 intnetR0BusyDecTrunk(pTrunk);
5131 }
5132 if (RT_SUCCESS(rc))
5133 {
5134 /*
5135 * We're good!
5136 */
5137 *phIf = pIf->hIf;
5138 Log(("intnetR0NetworkCreateIf: returns VINF_SUCCESS *phIf=%RX32 cbSend=%u cbRecv=%u cbBuf=%u\n",
5139 *phIf, pIf->pIntBufDefault->cbSend, pIf->pIntBufDefault->cbRecv, pIf->pIntBufDefault->cbBuf));
5140 return VINF_SUCCESS;
5141 }
5142 }
5143
5144 SUPR0ObjAddRef(pNetwork->pvObj, pSession);
5145 SUPR0ObjRelease(pIf->pvObj, pSession);
5146 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5147 return rc;
5148 }
5149
5150 /* clean up */
5151 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
5152 pIf->pIntBufDefault = NULL;
5153 pIf->pIntBuf = NULL;
5154 }
5155 }
5156
5157 RTSpinlockDestroy(pIf->hRecvInSpinlock);
5158 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5159 RTSemEventDestroy(pIf->hRecvEvent);
5160 pIf->hRecvEvent = NIL_RTSEMEVENT;
5161 RTMemFree(pIf->pDstTab);
5162 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5163 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
5164 RTMemFree(pIf);
5165 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5166 return rc;
5167}
5168
5169
5170/** @copydoc INTNETTRUNKSWPORT::pfnSetSGPhys */
5171static DECLCALLBACK(bool) intnetR0TrunkIfPortSetSGPhys(PINTNETTRUNKSWPORT pSwitchPort, bool fEnable)
5172{
5173 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5174 AssertMsgFailed(("Not implemented because it wasn't required on Darwin\n"));
5175 return ASMAtomicXchgBool(&pThis->fPhysSG, fEnable);
5176}
5177
5178
5179/** @copydoc INTNETTRUNKSWPORT::pfnReportMacAddress */
5180static DECLCALLBACK(void) intnetR0TrunkIfPortReportMacAddress(PINTNETTRUNKSWPORT pSwitchPort, PCRTMAC pMacAddr)
5181{
5182 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5183
5184 /*
5185 * Get the network instance and grab the address spinlock before making
5186 * any changes.
5187 */
5188 intnetR0BusyIncTrunk(pThis);
5189 PINTNETNETWORK pNetwork = pThis->pNetwork;
5190 if (pNetwork)
5191 {
5192 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5193
5194 pNetwork->MacTab.HostMac = *pMacAddr;
5195 pThis->MacAddr = *pMacAddr;
5196
5197 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5198 }
5199 else
5200 pThis->MacAddr = *pMacAddr;
5201 intnetR0BusyDecTrunk(pThis);
5202}
5203
5204
5205/** @copydoc INTNETTRUNKSWPORT::pfnReportPromiscuousMode */
5206static DECLCALLBACK(void) intnetR0TrunkIfPortReportPromiscuousMode(PINTNETTRUNKSWPORT pSwitchPort, bool fPromiscuous)
5207{
5208 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5209
5210 /*
5211 * Get the network instance and grab the address spinlock before making
5212 * any changes.
5213 */
5214 intnetR0BusyIncTrunk(pThis);
5215 PINTNETNETWORK pNetwork = pThis->pNetwork;
5216 if (pNetwork)
5217 {
5218 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5219
5220 pNetwork->MacTab.fHostPromiscuousReal = fPromiscuous
5221 || (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE);
5222 pNetwork->MacTab.fHostPromiscuousEff = pNetwork->MacTab.fHostPromiscuousReal
5223 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5224
5225 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5226 }
5227 intnetR0BusyDecTrunk(pThis);
5228}
5229
5230
5231/** @copydoc INTNETTRUNKSWPORT::pfnReportGsoCapabilities */
5232static DECLCALLBACK(void) intnetR0TrunkIfPortReportGsoCapabilities(PINTNETTRUNKSWPORT pSwitchPort,
5233 uint32_t fGsoCapabilities, uint32_t fDst)
5234{
5235 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5236
5237 for (unsigned iBit = PDMNETWORKGSOTYPE_END; iBit < 32; iBit++)
5238 Assert(!(fGsoCapabilities & RT_BIT_32(iBit)));
5239 Assert(!(fDst & ~INTNETTRUNKDIR_VALID_MASK));
5240 Assert(fDst);
5241
5242 if (fDst & INTNETTRUNKDIR_HOST)
5243 pThis->fHostGsoCapabilites = fGsoCapabilities;
5244
5245 if (fDst & INTNETTRUNKDIR_WIRE)
5246 pThis->fWireGsoCapabilites = fGsoCapabilities;
5247}
5248
5249
5250/** @copydoc INTNETTRUNKSWPORT::pfnReportNoPreemptDsts */
5251static DECLCALLBACK(void) intnetR0TrunkIfPortReportNoPreemptDsts(PINTNETTRUNKSWPORT pSwitchPort, uint32_t fNoPreemptDsts)
5252{
5253 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5254 Assert(!(fNoPreemptDsts & ~INTNETTRUNKDIR_VALID_MASK));
5255
5256 pThis->fNoPreemptDsts = fNoPreemptDsts;
5257}
5258
5259
5260/** @copydoc INTNETTRUNKSWPORT::pfnDisconnect */
5261static DECLCALLBACK(void) intnetR0TrunkIfPortDisconnect(PINTNETTRUNKSWPORT pSwitchPort, PINTNETTRUNKIFPORT pIfPort,
5262 PFNINTNETTRUNKIFPORTRELEASEBUSY pfnReleaseBusy)
5263{
5264 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5265
5266 /*
5267 * The caller has marked the trunk instance busy on his side before making
5268 * the call (see method docs) to let us safely grab the network and internal
5269 * network instance pointers without racing the network destruction code
5270 * (intnetR0TrunkIfDestroy (called by intnetR0TrunkIfDestroy) will wait for
5271 * the interface to stop being busy before setting pNetwork to NULL and
5272 * freeing up the resources).
5273 */
5274 PINTNETNETWORK pNetwork = pThis->pNetwork;
5275 if (pNetwork)
5276 {
5277 PINTNET pIntNet = pNetwork->pIntNet;
5278 Assert(pNetwork->pIntNet);
5279
5280 /*
5281 * We must decrease the callers busy count here to prevent deadlocking
5282 * when requesting the big mutex ownership. This will of course
5283 * unblock anyone stuck in intnetR0TrunkIfDestroy doing pfnWaitForIdle
5284 * (the other deadlock party), so we have to revalidate the network
5285 * pointer after taking ownership of the big mutex.
5286 */
5287 pfnReleaseBusy(pIfPort);
5288
5289 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5290
5291 if (intnetR0NetworkIsValid(pIntNet, pNetwork))
5292 {
5293 Assert(pNetwork->MacTab.pTrunk == pThis); /* Must be valid as long as tehre are no concurrent calls to this method. */
5294 Assert(pThis->pIfPort == pIfPort); /* Ditto */
5295
5296 /*
5297 * Disconnect the trunk and destroy it, similar to what is done int
5298 * intnetR0NetworkDestruct.
5299 */
5300 pIfPort->pfnSetState(pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5301
5302 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5303 pNetwork->MacTab.pTrunk = NULL;
5304 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5305
5306 intnetR0TrunkIfDestroy(pThis, pNetwork);
5307 }
5308
5309 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5310 }
5311 /*
5312 * We must always release the busy reference.
5313 */
5314 else
5315 pfnReleaseBusy(pIfPort);
5316}
5317
5318
5319/** @copydoc INTNETTRUNKSWPORT::pfnPreRecv */
5320static DECLCALLBACK(INTNETSWDECISION) intnetR0TrunkIfPortPreRecv(PINTNETTRUNKSWPORT pSwitchPort,
5321 void const *pvSrc, size_t cbSrc, uint32_t fSrc)
5322{
5323 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5324
5325 /* assert some sanity */
5326 AssertPtr(pvSrc);
5327 AssertReturn(cbSrc >= 6, INTNETSWDECISION_BROADCAST);
5328 Assert(fSrc);
5329
5330 /*
5331 * Mark the trunk as busy, make sure we've got a network and that there are
5332 * some active interfaces around.
5333 */
5334 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_TRUNK;
5335 intnetR0BusyIncTrunk(pThis);
5336 PINTNETNETWORK pNetwork = pThis->pNetwork;
5337 if (RT_LIKELY( pNetwork
5338 && pNetwork->cActiveIFs > 0 ))
5339 {
5340 /*
5341 * Lazy bird! No pre-switching of multicast and shared-MAC-on-wire.
5342 */
5343 PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvSrc;
5344 if (intnetR0IsMacAddrMulticast(&pEthHdr->DstMac))
5345 enmSwDecision = INTNETSWDECISION_BROADCAST;
5346 else if ( fSrc == INTNETTRUNKDIR_WIRE
5347 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
5348 enmSwDecision = INTNETSWDECISION_BROADCAST;
5349 else
5350 enmSwDecision = intnetR0NetworkPreSwitchUnicast(pNetwork,
5351 fSrc,
5352 cbSrc >= 12 ? &pEthHdr->SrcMac : NULL,
5353 &pEthHdr->DstMac);
5354 }
5355
5356 intnetR0BusyDecTrunk(pThis);
5357 return enmSwDecision;
5358}
5359
5360
5361/** @copydoc INTNETTRUNKSWPORT::pfnRecv */
5362static DECLCALLBACK(bool) intnetR0TrunkIfPortRecv(PINTNETTRUNKSWPORT pSwitchPort, void *pvIf, PINTNETSG pSG, uint32_t fSrc)
5363{
5364 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5365
5366 /* assert some sanity */
5367 AssertPtr(pSG);
5368 Assert(fSrc);
5369 NOREF(pvIf); /* later */
5370
5371 /*
5372 * Mark the trunk as busy, make sure we've got a network and that there are
5373 * some active interfaces around.
5374 */
5375 bool fRc = false /* don't drop it */;
5376 intnetR0BusyIncTrunk(pThis);
5377 PINTNETNETWORK pNetwork = pThis->pNetwork;
5378 if (RT_LIKELY( pNetwork
5379 && pNetwork->cActiveIFs > 0 ))
5380 {
5381 /*
5382 * Grab or allocate a destination table.
5383 */
5384 bool const fIntCtx = RTThreadPreemptIsEnabled(NIL_RTTHREAD) || RTThreadIsInInterrupt(NIL_RTTHREAD);
5385 unsigned iDstTab = 0;
5386 PINTNETDSTTAB pDstTab = NULL;
5387 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5388 if (fIntCtx)
5389 {
5390 /* Interrupt or restricted context. */
5391 iDstTab = RTMpCpuIdToSetIndex(RTMpCpuId());
5392 iDstTab %= pThis->cIntDstTabs;
5393 pDstTab = pThis->apIntDstTabs[iDstTab];
5394 if (RT_LIKELY(pDstTab))
5395 pThis->apIntDstTabs[iDstTab] = NULL;
5396 else
5397 {
5398 iDstTab = pThis->cIntDstTabs;
5399 while (iDstTab-- > 0)
5400 {
5401 pDstTab = pThis->apIntDstTabs[iDstTab];
5402 if (pDstTab)
5403 {
5404 pThis->apIntDstTabs[iDstTab] = NULL;
5405 break;
5406 }
5407 }
5408 }
5409 RTSpinlockRelease(pThis->hDstTabSpinlock);
5410 Assert(!pDstTab || iDstTab < pThis->cIntDstTabs);
5411 }
5412 else
5413 {
5414 /* Task context, fallback is to allocate a table. */
5415 AssertCompile(RT_ELEMENTS(pThis->apTaskDstTabs) == 2); /* for loop rollout */
5416 pDstTab = pThis->apIntDstTabs[iDstTab = 0];
5417 if (!pDstTab)
5418 pDstTab = pThis->apIntDstTabs[iDstTab = 1];
5419 if (pDstTab)
5420 {
5421 pThis->apIntDstTabs[iDstTab] = NULL;
5422 RTSpinlockRelease(pThis->hDstTabSpinlock);
5423 Assert(iDstTab < RT_ELEMENTS(pThis->apTaskDstTabs));
5424 }
5425 else
5426 {
5427 RTSpinlockRelease(pThis->hDstTabSpinlock);
5428 intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pDstTab);
5429 iDstTab = 65535;
5430 }
5431 }
5432 if (RT_LIKELY(pDstTab))
5433 {
5434 /*
5435 * Finally, get down to business of sending the frame.
5436 */
5437 INTNETSWDECISION enmSwDecision = intnetR0NetworkSend(pNetwork, NULL, fSrc, pSG, pDstTab);
5438 AssertMsg(enmSwDecision != INTNETSWDECISION_BAD_CONTEXT, ("fSrc=%#x fTrunkDst=%#x hdr=%.14Rhxs\n", fSrc, pDstTab->fTrunkDst, pSG->aSegs[0].pv));
5439 if (enmSwDecision == INTNETSWDECISION_INTNET)
5440 fRc = true; /* drop it */
5441
5442 /*
5443 * Free the destination table.
5444 */
5445 if (iDstTab == 65535)
5446 RTMemFree(pDstTab);
5447 else
5448 {
5449 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5450 if (fIntCtx && !pThis->apIntDstTabs[iDstTab])
5451 pThis->apIntDstTabs[iDstTab] = pDstTab;
5452 else if (!fIntCtx && !pThis->apTaskDstTabs[iDstTab])
5453 pThis->apTaskDstTabs[iDstTab] = pDstTab;
5454 else
5455 {
5456 /* this shouldn't happen! */
5457 PINTNETDSTTAB *papDstTabs = fIntCtx ? &pThis->apIntDstTabs[0] : &pThis->apTaskDstTabs[0];
5458 iDstTab = fIntCtx ? pThis->cIntDstTabs : RT_ELEMENTS(pThis->apTaskDstTabs);
5459 while (iDstTab-- > 0)
5460 if (!papDstTabs[iDstTab])
5461 {
5462 papDstTabs[iDstTab] = pDstTab;
5463 break;
5464 }
5465 }
5466 RTSpinlockRelease(pThis->hDstTabSpinlock);
5467 Assert(iDstTab < RT_MAX(RT_ELEMENTS(pThis->apTaskDstTabs), pThis->cIntDstTabs));
5468 }
5469 }
5470 }
5471
5472 intnetR0BusyDecTrunk(pThis);
5473 return fRc;
5474}
5475
5476
5477/** @copydoc INTNETTRUNKSWPORT::pfnSGRetain */
5478static DECLCALLBACK(void) intnetR0TrunkIfPortSGRetain(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5479{
5480 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5481 PINTNETNETWORK pNetwork = pThis->pNetwork;
5482
5483 /* assert some sanity */
5484 AssertPtrReturnVoid(pNetwork);
5485 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5486 AssertPtr(pSG);
5487 Assert(pSG->cUsers > 0 && pSG->cUsers < 256);
5488
5489 /* do it. */
5490 ++pSG->cUsers;
5491}
5492
5493
5494/** @copydoc INTNETTRUNKSWPORT::pfnSGRelease */
5495static DECLCALLBACK(void) intnetR0TrunkIfPortSGRelease(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5496{
5497 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5498 PINTNETNETWORK pNetwork = pThis->pNetwork;
5499
5500 /* assert some sanity */
5501 AssertPtrReturnVoid(pNetwork);
5502 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5503 AssertPtr(pSG);
5504 Assert(pSG->cUsers > 0);
5505
5506 /*
5507 * Free it?
5508 */
5509 if (!--pSG->cUsers)
5510 {
5511 /** @todo later */
5512 }
5513}
5514
5515
5516/** @copydoc INTNETTRUNKSWPORT::pfnNotifyHostAddress */
5517static DECLCALLBACK(void) intnetR0NetworkNotifyHostAddress(PINTNETTRUNKSWPORT pSwitchPort,
5518 bool fAdded,
5519 INTNETADDRTYPE enmType, const void *pvAddr)
5520{
5521 PINTNETTRUNKIF pTrunkIf = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5522 PINTNETNETWORK pNetwork = pTrunkIf->pNetwork;
5523 PCRTNETADDRU pAddr = (PCRTNETADDRU)pvAddr;
5524 uint8_t cbAddr;
5525
5526 if (enmType == kIntNetAddrType_IPv4)
5527 {
5528 Log(("%s: %s %RTnaipv4\n",
5529 __FUNCTION__, (fAdded ? "add" : "del"),
5530 pAddr->IPv4));
5531 cbAddr = 4;
5532 }
5533 else if (enmType == kIntNetAddrType_IPv6)
5534 {
5535 Log(("%s: %s %RTnaipv6\n",
5536 __FUNCTION__, (fAdded ? "add" : "del"),
5537 pAddr));
5538 cbAddr = 16;
5539 }
5540 else
5541 {
5542 Log(("%s: unexpected address type %d\n", __FUNCTION__, enmType));
5543 return;
5544 }
5545
5546 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5547 if (fAdded) /* one of host interfaces got a new address */
5548 {
5549 /* blacklist it to prevent spoofing by guests */
5550 intnetR0NetworkBlacklistAdd(pNetwork, pAddr, enmType);
5551
5552 /* kick out any guest that uses it */
5553 intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, "tif/host");
5554 }
5555 else /* address deleted from one of host interfaces */
5556 {
5557 /* stop blacklisting it, guests may use it now */
5558 intnetR0NetworkBlacklistDelete(pNetwork, pAddr, enmType);
5559 }
5560 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5561}
5562
5563
5564/**
5565 * Shutdown the trunk interface.
5566 *
5567 * @param pThis The trunk.
5568 * @param pNetworks The network.
5569 *
5570 * @remarks The caller must hold the global lock.
5571 */
5572static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork)
5573{
5574 /* assert sanity */
5575 if (!pThis)
5576 return;
5577 AssertPtr(pThis);
5578 Assert(pThis->pNetwork == pNetwork);
5579 AssertPtrNull(pThis->pIfPort);
5580
5581 /*
5582 * The interface has already been deactivated, we just to wait for
5583 * it to become idle before we can disconnect and release it.
5584 */
5585 PINTNETTRUNKIFPORT pIfPort = pThis->pIfPort;
5586 if (pIfPort)
5587 {
5588 /* unset it */
5589 pThis->pIfPort = NULL;
5590
5591 /* wait in portions so we can complain every now an then. */
5592 uint64_t StartTS = RTTimeSystemNanoTS();
5593 int rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5594 if (RT_FAILURE(rc))
5595 {
5596 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5597 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5598 Assert(rc == VERR_TIMEOUT);
5599 while ( RT_FAILURE(rc)
5600 && RTTimeSystemNanoTS() - StartTS < UINT64_C(30000000000)) /* 30 sec */
5601 rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5602 if (rc == VERR_TIMEOUT)
5603 {
5604 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5605 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5606 while ( rc == VERR_TIMEOUT
5607 && RTTimeSystemNanoTS() - StartTS < UINT64_C(360000000000)) /* 360 sec */
5608 rc = pIfPort->pfnWaitForIdle(pIfPort, 30*1000);
5609 if (RT_FAILURE(rc))
5610 {
5611 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc), giving up.\n",
5612 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5613 AssertRC(rc);
5614 }
5615 }
5616 }
5617
5618 /* disconnect & release it. */
5619 pIfPort->pfnDisconnectAndRelease(pIfPort);
5620 }
5621
5622 /*
5623 * Free up the resources.
5624 */
5625 pThis->pNetwork = NULL; /* Must not be cleared while busy, see intnetR0TrunkIfPortDisconnect. */
5626 RTSpinlockDestroy(pThis->hDstTabSpinlock);
5627 for (unsigned i = 0; i < RT_ELEMENTS(pThis->apTaskDstTabs); i++)
5628 {
5629 Assert(pThis->apTaskDstTabs[i]);
5630 RTMemFree(pThis->apTaskDstTabs[i]);
5631 pThis->apTaskDstTabs[i] = NULL;
5632 }
5633 for (unsigned i = 0; i < pThis->cIntDstTabs; i++)
5634 {
5635 Assert(pThis->apIntDstTabs[i]);
5636 RTMemFree(pThis->apIntDstTabs[i]);
5637 pThis->apIntDstTabs[i] = NULL;
5638 }
5639 RTMemFree(pThis);
5640}
5641
5642
5643/**
5644 * Creates the trunk connection (if any).
5645 *
5646 * @returns VBox status code.
5647 *
5648 * @param pNetwork The newly created network.
5649 * @param pSession The session handle.
5650 */
5651static int intnetR0NetworkCreateTrunkIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession)
5652{
5653 const char *pszName;
5654 switch (pNetwork->enmTrunkType)
5655 {
5656 /*
5657 * The 'None' case, simple.
5658 */
5659 case kIntNetTrunkType_None:
5660 case kIntNetTrunkType_WhateverNone:
5661#ifdef VBOX_WITH_NAT_SERVICE
5662 /*
5663 * Well, here we don't want load anything special,
5664 * just communicate between processes via internal network.
5665 */
5666 case kIntNetTrunkType_SrvNat:
5667#endif
5668 return VINF_SUCCESS;
5669
5670 /* Can't happen, but makes GCC happy. */
5671 default:
5672 return VERR_NOT_IMPLEMENTED;
5673
5674 /*
5675 * Translate enum to component factory name.
5676 */
5677 case kIntNetTrunkType_NetFlt:
5678 pszName = "VBoxNetFlt";
5679 break;
5680 case kIntNetTrunkType_NetAdp:
5681#if defined(RT_OS_DARWIN) && !defined(VBOXNETADP_DO_NOT_USE_NETFLT)
5682 pszName = "VBoxNetFlt";
5683#else /* VBOXNETADP_DO_NOT_USE_NETFLT */
5684 pszName = "VBoxNetAdp";
5685#endif /* VBOXNETADP_DO_NOT_USE_NETFLT */
5686 break;
5687#ifndef VBOX_WITH_NAT_SERVICE
5688 case kIntNetTrunkType_SrvNat:
5689 pszName = "VBoxSrvNat";
5690 break;
5691#endif
5692 }
5693
5694 /*
5695 * Allocate the trunk interface and associated destination tables.
5696 *
5697 * We take a very optimistic view on the parallelism of the host
5698 * network stack and NIC driver. So, we allocate one table for each
5699 * possible CPU to deal with interrupt time requests and one for task
5700 * time calls.
5701 */
5702 RTCPUID cCpus = RTMpGetCount(); Assert(cCpus > 0);
5703 PINTNETTRUNKIF pTrunk = (PINTNETTRUNKIF)RTMemAllocZ(RT_OFFSETOF(INTNETTRUNKIF, apIntDstTabs[cCpus]));
5704 if (!pTrunk)
5705 return VERR_NO_MEMORY;
5706
5707 Assert(pNetwork->MacTab.cEntriesAllocated > 0);
5708 int rc = VINF_SUCCESS;
5709 pTrunk->cIntDstTabs = cCpus;
5710 for (unsigned i = 0; i < cCpus && RT_SUCCESS(rc); i++)
5711 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apIntDstTabs[i]);
5712 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs) && RT_SUCCESS(rc); i++)
5713 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apTaskDstTabs[i]);
5714
5715 if (RT_SUCCESS(rc))
5716 {
5717 pTrunk->SwitchPort.u32Version = INTNETTRUNKSWPORT_VERSION;
5718 pTrunk->SwitchPort.pfnPreRecv = intnetR0TrunkIfPortPreRecv;
5719 pTrunk->SwitchPort.pfnRecv = intnetR0TrunkIfPortRecv;
5720 pTrunk->SwitchPort.pfnSGRetain = intnetR0TrunkIfPortSGRetain;
5721 pTrunk->SwitchPort.pfnSGRelease = intnetR0TrunkIfPortSGRelease;
5722 pTrunk->SwitchPort.pfnSetSGPhys = intnetR0TrunkIfPortSetSGPhys;
5723 pTrunk->SwitchPort.pfnReportMacAddress = intnetR0TrunkIfPortReportMacAddress;
5724 pTrunk->SwitchPort.pfnReportPromiscuousMode = intnetR0TrunkIfPortReportPromiscuousMode;
5725 pTrunk->SwitchPort.pfnReportGsoCapabilities = intnetR0TrunkIfPortReportGsoCapabilities;
5726 pTrunk->SwitchPort.pfnReportNoPreemptDsts = intnetR0TrunkIfPortReportNoPreemptDsts;
5727 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5728 pTrunk->SwitchPort.pfnNotifyHostAddress = intnetR0NetworkNotifyHostAddress;
5729 pTrunk->SwitchPort.pfnDisconnect = intnetR0TrunkIfPortDisconnect;
5730 pTrunk->SwitchPort.u32VersionEnd = INTNETTRUNKSWPORT_VERSION;
5731 //pTrunk->pIfPort = NULL;
5732 pTrunk->pNetwork = pNetwork;
5733 pTrunk->MacAddr.au8[0] = 0xff;
5734 pTrunk->MacAddr.au8[1] = 0xff;
5735 pTrunk->MacAddr.au8[2] = 0xff;
5736 pTrunk->MacAddr.au8[3] = 0xff;
5737 pTrunk->MacAddr.au8[4] = 0xff;
5738 pTrunk->MacAddr.au8[5] = 0xff;
5739 //pTrunk->fPhysSG = false;
5740 //pTrunk->fUnused = false;
5741 //pTrunk->cBusy = 0;
5742 //pTrunk->fNoPreemptDsts = 0;
5743 //pTrunk->fWireGsoCapabilites = 0;
5744 //pTrunk->fHostGsoCapabilites = 0;
5745 //pTrunk->abGsoHdrs = {0};
5746 pTrunk->hDstTabSpinlock = NIL_RTSPINLOCK;
5747 //pTrunk->apTaskDstTabs = above;
5748 //pTrunk->cIntDstTabs = above;
5749 //pTrunk->apIntDstTabs = above;
5750
5751 /*
5752 * Create the lock (we've NIL'ed the members above to simplify cleanup).
5753 */
5754 rc = RTSpinlockCreate(&pTrunk->hDstTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hDstTabSpinlock");
5755 if (RT_SUCCESS(rc))
5756 {
5757 /*
5758 * There are a couple of bits in MacTab as well pertaining to the
5759 * trunk. We have to set this before it's reported.
5760 *
5761 * Note! We don't need to lock the MacTab here - creation time.
5762 */
5763 pNetwork->MacTab.pTrunk = pTrunk;
5764 pNetwork->MacTab.HostMac = pTrunk->MacAddr;
5765 pNetwork->MacTab.fHostPromiscuousReal = false;
5766 pNetwork->MacTab.fHostPromiscuousEff = (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE)
5767 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5768 pNetwork->MacTab.fHostActive = false;
5769 pNetwork->MacTab.fWirePromiscuousReal = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
5770 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
5771 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
5772 pNetwork->MacTab.fWireActive = false;
5773
5774#ifdef IN_RING0 /* (testcase is ring-3) */
5775 /*
5776 * Query the factory we want, then use it create and connect the trunk.
5777 */
5778 PINTNETTRUNKFACTORY pTrunkFactory = NULL;
5779 rc = SUPR0ComponentQueryFactory(pSession, pszName, INTNETTRUNKFACTORY_UUID_STR, (void **)&pTrunkFactory);
5780 if (RT_SUCCESS(rc))
5781 {
5782 rc = pTrunkFactory->pfnCreateAndConnect(pTrunkFactory,
5783 pNetwork->szTrunk,
5784 &pTrunk->SwitchPort,
5785 pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE
5786 ? INTNETTRUNKFACTORY_FLAG_NO_PROMISC
5787 : 0,
5788 &pTrunk->pIfPort);
5789 pTrunkFactory->pfnRelease(pTrunkFactory);
5790 if (RT_SUCCESS(rc))
5791 {
5792 Assert(pTrunk->pIfPort);
5793
5794 Log(("intnetR0NetworkCreateTrunkIf: VINF_SUCCESS - pszName=%s szTrunk=%s%s Network=%s\n",
5795 pszName, pNetwork->szTrunk, pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE ? " shared-mac" : "", pNetwork->szName));
5796 return VINF_SUCCESS;
5797 }
5798 }
5799#else /* IN_RING3 */
5800 NOREF(pSession);
5801 rc = VERR_NOT_SUPPORTED;
5802#endif /* IN_RING3 */
5803
5804 pNetwork->MacTab.pTrunk = NULL;
5805 }
5806
5807 /* bail out and clean up. */
5808 RTSpinlockDestroy(pTrunk->hDstTabSpinlock);
5809 }
5810
5811 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs); i++)
5812 RTMemFree(pTrunk->apTaskDstTabs[i]);
5813 for (unsigned i = 0; i < pTrunk->cIntDstTabs; i++)
5814 RTMemFree(pTrunk->apIntDstTabs[i]);
5815 RTMemFree(pTrunk);
5816
5817 LogFlow(("intnetR0NetworkCreateTrunkIf: %Rrc - pszName=%s szTrunk=%s Network=%s\n",
5818 rc, pszName, pNetwork->szTrunk, pNetwork->szName));
5819 return rc;
5820}
5821
5822
5823
5824/**
5825 * Object destructor callback.
5826 * This is called for reference counted objectes when the count reaches 0.
5827 *
5828 * @param pvObj The object pointer.
5829 * @param pvUser1 Pointer to the network.
5830 * @param pvUser2 Pointer to the INTNET instance data.
5831 */
5832static DECLCALLBACK(void) intnetR0NetworkDestruct(void *pvObj, void *pvUser1, void *pvUser2)
5833{
5834 PINTNETNETWORK pNetwork = (PINTNETNETWORK)pvUser1;
5835 PINTNET pIntNet = (PINTNET)pvUser2;
5836 Log(("intnetR0NetworkDestruct: pvObj=%p pNetwork=%p pIntNet=%p %s\n", pvObj, pNetwork, pIntNet, pNetwork->szName));
5837 Assert(pNetwork->pIntNet == pIntNet);
5838
5839 /* Take the big create/open/destroy sem. */
5840 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5841
5842 /*
5843 * Tell the trunk, if present, that we're about to disconnect it and wish
5844 * no further calls from it.
5845 */
5846 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5847 if (pTrunk)
5848 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5849
5850 /*
5851 * Deactivate and orphan any remaining interfaces and wait for them to idle.
5852 *
5853 * Note! Normally there are no more interfaces at this point, however, when
5854 * supdrvCloseSession / supdrvCleanupSession release the objects the
5855 * order is undefined. So, it's quite possible that the network will
5856 * be dereference and destroyed before the interfaces.
5857 */
5858 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5859
5860 uint32_t iIf = pNetwork->MacTab.cEntries;
5861 while (iIf-- > 0)
5862 {
5863 pNetwork->MacTab.paEntries[iIf].fActive = false;
5864 pNetwork->MacTab.paEntries[iIf].pIf->fActive = false;
5865 }
5866
5867 pNetwork->MacTab.fHostActive = false;
5868 pNetwork->MacTab.fWireActive = false;
5869
5870 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5871
5872 /* Wait for all the interfaces to quiesce. (Interfaces cannot be
5873 removed / added since we're holding the big lock.) */
5874 if (pTrunk)
5875 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
5876
5877 iIf = pNetwork->MacTab.cEntries;
5878 while (iIf-- > 0)
5879 intnetR0BusyWait(pNetwork, &pNetwork->MacTab.paEntries[iIf].pIf->cBusy);
5880
5881 /* Orphan the interfaces (not trunk). Don't bother with calling
5882 pfnDisconnectInterface here since the networking is going away. */
5883 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5884 while ((iIf = pNetwork->MacTab.cEntries) > 0)
5885 {
5886 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf - 1].pIf;
5887 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5888
5889 intnetR0BusyWait(pNetwork, &pIf->cBusy);
5890
5891 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5892 if ( iIf == pNetwork->MacTab.cEntries /* paranoia */
5893 && pIf->cBusy)
5894 {
5895 pIf->pNetwork = NULL;
5896 pNetwork->MacTab.cEntries--;
5897 }
5898 }
5899
5900 /*
5901 * Zap the trunk pointer while we still own the spinlock, destroy the
5902 * trunk after we've left it. Note that this might take a while...
5903 */
5904 pNetwork->MacTab.pTrunk = NULL;
5905
5906 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5907
5908 if (pTrunk)
5909 intnetR0TrunkIfDestroy(pTrunk, pNetwork);
5910
5911 /*
5912 * Unlink the network.
5913 * Note that it needn't be in the list if we failed during creation.
5914 */
5915 PINTNETNETWORK pPrev = pIntNet->pNetworks;
5916 if (pPrev == pNetwork)
5917 pIntNet->pNetworks = pNetwork->pNext;
5918 else
5919 {
5920 for (; pPrev; pPrev = pPrev->pNext)
5921 if (pPrev->pNext == pNetwork)
5922 {
5923 pPrev->pNext = pNetwork->pNext;
5924 break;
5925 }
5926 }
5927 pNetwork->pNext = NULL;
5928 pNetwork->pvObj = NULL;
5929
5930 /*
5931 * Free resources.
5932 */
5933 RTSemEventDestroy(pNetwork->hEvtBusyIf);
5934 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
5935 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
5936 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
5937 RTMemFree(pNetwork->MacTab.paEntries);
5938 pNetwork->MacTab.paEntries = NULL;
5939 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5940 intnetR0IfAddrCacheDestroy(&pNetwork->aAddrBlacklist[i]);
5941 RTMemFree(pNetwork);
5942
5943 /* Release the create/destroy sem. */
5944 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5945}
5946
5947
5948/**
5949 * Checks if the open network flags are compatible.
5950 *
5951 * @returns VBox status code.
5952 * @param pNetwork The network.
5953 * @param fFlags The open network flags.
5954 */
5955static int intnetR0CheckOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
5956{
5957 uint32_t const fNetFlags = pNetwork->fFlags;
5958
5959 if ( (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5960 ^ (fNetFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
5961 return VERR_INTNET_INCOMPATIBLE_FLAGS;
5962
5963 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_EXACT)
5964 {
5965 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
5966 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
5967 && (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
5968 != (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) )
5969 return VERR_INTNET_INCOMPATIBLE_FLAGS;
5970 }
5971
5972 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
5973 {
5974 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
5975 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
5976 && !(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
5977 && (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed) )
5978 return VERR_INTNET_INCOMPATIBLE_FLAGS;
5979 }
5980
5981 return VINF_SUCCESS;
5982}
5983
5984
5985/**
5986 * Adapts flag changes on network opening.
5987 *
5988 * @returns VBox status code.
5989 * @param pNetwork The network.
5990 * @param fFlags The open network flags.
5991 */
5992static int intnetR0AdaptOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
5993{
5994 /*
5995 * Upgrade the minimum policy flags.
5996 */
5997 uint32_t fNetMinFlags = pNetwork->fMinFlags;
5998 Assert(!(fNetMinFlags & INTNET_OPEN_FLAGS_RELAXED_MASK));
5999 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
6000 {
6001 fNetMinFlags |= fFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
6002 if (fNetMinFlags != pNetwork->fMinFlags)
6003 {
6004 LogRel(("INTNET: %s - min flags changed %#x -> %#x\n", pNetwork->szName, pNetwork->fMinFlags, fNetMinFlags));
6005 pNetwork->fMinFlags = fNetMinFlags;
6006 }
6007 }
6008
6009 /*
6010 * Calculate the new network flags.
6011 * (Depends on fNetMinFlags being recalculated first.)
6012 */
6013 uint32_t fNetFlags = pNetwork->fFlags;
6014
6015 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6016 {
6017 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
6018 Assert(!(fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRelaxed));
6019
6020 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
6021 continue;
6022 if (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed)
6023 continue;
6024
6025 if ( (fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
6026 || (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive) )
6027 {
6028 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
6029 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRestrictive;
6030 }
6031 else if (!(fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
6032 {
6033 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
6034 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRelaxed;
6035 }
6036 }
6037
6038 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6039 {
6040 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
6041 fNetFlags |= fFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed;
6042 }
6043
6044 /*
6045 * Apply the flags if they changed.
6046 */
6047 uint32_t const fOldNetFlags = pNetwork->fFlags;
6048 if (fOldNetFlags != fNetFlags)
6049 {
6050 LogRel(("INTNET: %s - flags changed %#x -> %#x\n", pNetwork->szName, fOldNetFlags, fNetFlags));
6051
6052 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
6053
6054 pNetwork->fFlags = fNetFlags;
6055
6056 /* Recalculate some derived switcher variables. */
6057 bool fActiveTrunk = pNetwork->MacTab.pTrunk
6058 && pNetwork->cActiveIFs > 0;
6059 pNetwork->MacTab.fHostActive = fActiveTrunk
6060 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
6061 pNetwork->MacTab.fHostPromiscuousEff = ( pNetwork->MacTab.fHostPromiscuousReal
6062 || (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE))
6063 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
6064
6065 pNetwork->MacTab.fWireActive = fActiveTrunk
6066 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
6067 pNetwork->MacTab.fWirePromiscuousReal= RT_BOOL(fNetFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
6068 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
6069 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
6070
6071 if ((fOldNetFlags ^ fNetFlags) & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
6072 {
6073 pNetwork->MacTab.cPromiscuousEntries = 0;
6074 pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
6075
6076 uint32_t iIf = pNetwork->MacTab.cEntries;
6077 while (iIf-- > 0)
6078 {
6079 PINTNETMACTABENTRY pEntry = &pNetwork->MacTab.paEntries[iIf];
6080 PINTNETIF pIf2 = pEntry->pIf;
6081 if ( pIf2 /* paranoia */
6082 && pIf2->fPromiscuousReal)
6083 {
6084 bool fPromiscuousEff = (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
6085 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW);
6086 pEntry->fPromiscuousEff = fPromiscuousEff;
6087 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
6088 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
6089
6090 if (pEntry->fPromiscuousEff)
6091 {
6092 pNetwork->MacTab.cPromiscuousEntries++;
6093 if (!pEntry->fPromiscuousSeeTrunk)
6094 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
6095 }
6096 }
6097 }
6098 }
6099
6100 RTSpinlockRelease(pNetwork->hAddrSpinlock);
6101 }
6102
6103 return VINF_SUCCESS;
6104}
6105
6106
6107/**
6108 * Opens an existing network.
6109 *
6110 * The call must own the INTNET::hMtxCreateOpenDestroy.
6111 *
6112 * @returns VBox status code.
6113 * @param pIntNet The instance data.
6114 * @param pSession The current session.
6115 * @param pszNetwork The network name. This has a valid length.
6116 * @param enmTrunkType The trunk type.
6117 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6118 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6119 * @param ppNetwork Where to store the pointer to the network on success.
6120 */
6121static int intnetR0OpenNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
6122 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
6123{
6124 LogFlow(("intnetR0OpenNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
6125 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
6126
6127 /* just pro forma validation, the caller is internal. */
6128 AssertPtr(pIntNet);
6129 AssertPtr(pSession);
6130 AssertPtr(pszNetwork);
6131 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
6132 AssertPtr(pszTrunk);
6133 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
6134 AssertPtr(ppNetwork);
6135 *ppNetwork = NULL;
6136
6137 /*
6138 * Search networks by name.
6139 */
6140 PINTNETNETWORK pCur;
6141 uint8_t cchName = (uint8_t)strlen(pszNetwork);
6142 Assert(cchName && cchName < sizeof(pCur->szName)); /* caller ensures this */
6143
6144 pCur = pIntNet->pNetworks;
6145 while (pCur)
6146 {
6147 if ( pCur->cchName == cchName
6148 && !memcmp(pCur->szName, pszNetwork, cchName))
6149 {
6150 /*
6151 * Found the network, now check that we have the same ideas
6152 * about the trunk setup and security.
6153 */
6154 int rc;
6155 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
6156#ifdef VBOX_WITH_NAT_SERVICE
6157 || enmTrunkType == kIntNetTrunkType_SrvNat /* @todo: what does it mean */
6158#endif
6159 || ( pCur->enmTrunkType == enmTrunkType
6160 && !strcmp(pCur->szTrunk, pszTrunk)))
6161 {
6162 rc = intnetR0CheckOpenNetworkFlags(pCur, fFlags);
6163 if (RT_SUCCESS(rc))
6164 {
6165 /*
6166 * Increment the reference and check that the session
6167 * can access this network.
6168 */
6169 rc = SUPR0ObjAddRef(pCur->pvObj, pSession);
6170 if (RT_SUCCESS(rc))
6171 {
6172 if (pCur->fFlags & INTNET_OPEN_FLAGS_ACCESS_RESTRICTED)
6173 rc = SUPR0ObjVerifyAccess(pCur->pvObj, pSession, pCur->szName);
6174 if (RT_SUCCESS(rc))
6175 *ppNetwork = pCur;
6176 else
6177 SUPR0ObjRelease(pCur->pvObj, pSession);
6178 }
6179 else if (rc == VERR_WRONG_ORDER)
6180 rc = VERR_NOT_FOUND; /* destruction race, pretend the other isn't there. */
6181 }
6182 }
6183 else
6184 {
6185 rc = VERR_INTNET_INCOMPATIBLE_TRUNK;
6186 LogRel(("intnetR0OpenNetwork failed. rc=%Rrc pCur->szTrunk=%s pszTrunk=%s pCur->enmTrunkType=%d enmTrunkType=%d\n",
6187 rc, pCur->szTrunk, pszTrunk, pCur->enmTrunkType, enmTrunkType));
6188 }
6189
6190 LogFlow(("intnetR0OpenNetwork: returns %Rrc *ppNetwork=%p\n", rc, *ppNetwork));
6191 return rc;
6192 }
6193
6194 pCur = pCur->pNext;
6195 }
6196
6197 LogFlow(("intnetR0OpenNetwork: returns VERR_NOT_FOUND\n"));
6198 return VERR_NOT_FOUND;
6199}
6200
6201
6202/**
6203 * Creates a new network.
6204 *
6205 * The call must own the INTNET::hMtxCreateOpenDestroy and has already attempted
6206 * opening the network and found it to be non-existing.
6207 *
6208 * @returns VBox status code.
6209 * @param pIntNet The instance data.
6210 * @param pSession The session handle.
6211 * @param pszNetwork The name of the network. This must be at least one character long and no longer
6212 * than the INTNETNETWORK::szName.
6213 * @param enmTrunkType The trunk type.
6214 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6215 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6216 * @param ppNetwork Where to store the network. In the case of failure
6217 * whatever is returned here should be dereferenced
6218 * outside the INTNET::hMtxCreateOpenDestroy.
6219 */
6220static int intnetR0CreateNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
6221 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
6222{
6223 LogFlow(("intnetR0CreateNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
6224 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
6225
6226 /* just pro forma validation, the caller is internal. */
6227 AssertPtr(pIntNet);
6228 AssertPtr(pSession);
6229 AssertPtr(pszNetwork);
6230 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
6231 AssertPtr(pszTrunk);
6232 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
6233 AssertPtr(ppNetwork);
6234
6235 *ppNetwork = NULL;
6236
6237 /*
6238 * Adjust the flags with defaults for the network policies.
6239 * Note: Main restricts promiscuous mode on the per interface level.
6240 */
6241 fFlags &= ~( INTNET_OPEN_FLAGS_IF_FIXED
6242 | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
6243 | INTNET_OPEN_FLAGS_IF_PROMISC_DENY
6244 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK
6245 | INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK
6246 | INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES
6247 | INTNET_OPEN_FLAGS_REQUIRE_EXACT);
6248 uint32_t fDefFlags = INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS
6249 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST
6250 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE
6251 | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED
6252 | INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE
6253 | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED
6254 | INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE;
6255 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
6256#ifdef VBOX_WITH_NAT_SERVICE
6257 || enmTrunkType == kIntNetTrunkType_SrvNat /* simialar security */
6258#endif
6259 || enmTrunkType == kIntNetTrunkType_None)
6260 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_RESTRICTED;
6261 else
6262 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_PUBLIC;
6263 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6264 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
6265 fFlags |= g_afIntNetOpenNetworkNetFlags[i].fPair & fDefFlags;
6266
6267 /*
6268 * Allocate and initialize.
6269 */
6270 size_t cb = sizeof(INTNETNETWORK);
6271 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6272 cb += INTNETNETWORK_TMP_SIZE + 64;
6273 PINTNETNETWORK pNetwork = (PINTNETNETWORK)RTMemAllocZ(cb);
6274 if (!pNetwork)
6275 return VERR_NO_MEMORY;
6276 //pNetwork->pNext = NULL;
6277 //pNetwork->pIfs = NULL;
6278 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6279 pNetwork->MacTab.cEntries = 0;
6280 pNetwork->MacTab.cEntriesAllocated = INTNET_GROW_DSTTAB_SIZE;
6281 //pNetwork->MacTab.cPromiscuousEntries = 0;
6282 //pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
6283 pNetwork->MacTab.paEntries = NULL;
6284 pNetwork->MacTab.fHostPromiscuousReal = false;
6285 pNetwork->MacTab.fHostPromiscuousEff = false;
6286 pNetwork->MacTab.fHostActive = false;
6287 pNetwork->MacTab.fWirePromiscuousReal = false;
6288 pNetwork->MacTab.fWirePromiscuousEff = false;
6289 pNetwork->MacTab.fWireActive = false;
6290 pNetwork->MacTab.pTrunk = NULL;
6291 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6292 pNetwork->pIntNet = pIntNet;
6293 //pNetwork->pvObj = NULL;
6294 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6295 pNetwork->pbTmp = RT_ALIGN_PT(pNetwork + 1, 64, uint8_t *);
6296 //else
6297 // pNetwork->pbTmp = NULL;
6298 pNetwork->fFlags = fFlags;
6299 //pNetwork->fMinFlags = 0;
6300 //pNetwork->cActiveIFs = 0;
6301 size_t cchName = strlen(pszNetwork);
6302 pNetwork->cchName = (uint8_t)cchName;
6303 Assert(cchName && cchName < sizeof(pNetwork->szName)); /* caller's responsibility. */
6304 memcpy(pNetwork->szName, pszNetwork, cchName); /* '\0' at courtesy of alloc. */
6305 pNetwork->enmTrunkType = enmTrunkType;
6306 Assert(strlen(pszTrunk) < sizeof(pNetwork->szTrunk)); /* caller's responsibility. */
6307 strcpy(pNetwork->szTrunk, pszTrunk);
6308
6309 /*
6310 * Create the semaphore, spinlock and allocate the interface table.
6311 */
6312 int rc = RTSemEventCreate(&pNetwork->hEvtBusyIf);
6313 if (RT_SUCCESS(rc))
6314 rc = RTSpinlockCreate(&pNetwork->hAddrSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hAddrSpinlock");
6315 if (RT_SUCCESS(rc))
6316 {
6317 pNetwork->MacTab.paEntries = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * pNetwork->MacTab.cEntriesAllocated);
6318 if (!pNetwork->MacTab.paEntries)
6319 rc = VERR_NO_MEMORY;
6320 }
6321 if (RT_SUCCESS(rc))
6322 {
6323 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
6324 rc = intnetR0IfAddrCacheInit(&pNetwork->aAddrBlacklist[i], (INTNETADDRTYPE)i,
6325 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
6326 }
6327 if (RT_SUCCESS(rc))
6328 {
6329 /*
6330 * Register the object in the current session and link it into the network list.
6331 */
6332 pNetwork->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK, intnetR0NetworkDestruct, pNetwork, pIntNet);
6333 if (pNetwork->pvObj)
6334 {
6335 pNetwork->pNext = pIntNet->pNetworks;
6336 pIntNet->pNetworks = pNetwork;
6337
6338 /*
6339 * Check if the current session is actually allowed to create and
6340 * open the network. It is possible to implement network name
6341 * based policies and these must be checked now. SUPR0ObjRegister
6342 * does no such checks.
6343 */
6344 rc = SUPR0ObjVerifyAccess(pNetwork->pvObj, pSession, pNetwork->szName);
6345 if (RT_SUCCESS(rc))
6346 {
6347 /*
6348 * Connect the trunk.
6349 */
6350 rc = intnetR0NetworkCreateTrunkIf(pNetwork, pSession);
6351 if (RT_SUCCESS(rc))
6352 {
6353 *ppNetwork = pNetwork;
6354 LogFlow(("intnetR0CreateNetwork: returns VINF_SUCCESS *ppNetwork=%p\n", pNetwork));
6355 return VINF_SUCCESS;
6356 }
6357 }
6358
6359 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6360 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6361 return rc;
6362 }
6363
6364 /* cleanup */
6365 rc = VERR_NO_MEMORY;
6366 }
6367
6368 RTSemEventDestroy(pNetwork->hEvtBusyIf);
6369 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6370 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
6371 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6372 RTMemFree(pNetwork->MacTab.paEntries);
6373 pNetwork->MacTab.paEntries = NULL;
6374 RTMemFree(pNetwork);
6375
6376 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6377 return rc;
6378}
6379
6380
6381/**
6382 * Opens a network interface and connects it to the specified network.
6383 *
6384 * @returns VBox status code.
6385 * @param pSession The session handle.
6386 * @param pszNetwork The network name.
6387 * @param enmTrunkType The trunk type.
6388 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6389 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6390 * @param fRestrictAccess Whether new participants should be subjected to access check or not.
6391 * @param cbSend The send buffer size.
6392 * @param cbRecv The receive buffer size.
6393 * @param phIf Where to store the handle to the network interface.
6394 */
6395INTNETR0DECL(int) IntNetR0Open(PSUPDRVSESSION pSession, const char *pszNetwork,
6396 INTNETTRUNKTYPE enmTrunkType, const char *pszTrunk, uint32_t fFlags,
6397 uint32_t cbSend, uint32_t cbRecv, PINTNETIFHANDLE phIf)
6398{
6399 LogFlow(("IntNetR0Open: pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x cbSend=%u cbRecv=%u phIf=%p\n",
6400 pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, cbSend, cbRecv, phIf));
6401
6402 /*
6403 * Validate input.
6404 */
6405 PINTNET pIntNet = g_pIntNet;
6406 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
6407 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
6408
6409 AssertPtrReturn(pszNetwork, VERR_INVALID_PARAMETER);
6410 const char *pszNetworkEnd = RTStrEnd(pszNetwork, INTNET_MAX_NETWORK_NAME);
6411 AssertReturn(pszNetworkEnd, VERR_INVALID_PARAMETER);
6412 size_t cchNetwork = pszNetworkEnd - pszNetwork;
6413 AssertReturn(cchNetwork, VERR_INVALID_PARAMETER);
6414
6415 if (pszTrunk)
6416 {
6417 AssertPtrReturn(pszTrunk, VERR_INVALID_PARAMETER);
6418 const char *pszTrunkEnd = RTStrEnd(pszTrunk, INTNET_MAX_TRUNK_NAME);
6419 AssertReturn(pszTrunkEnd, VERR_INVALID_PARAMETER);
6420 }
6421 else
6422 pszTrunk = "";
6423
6424 AssertMsgReturn(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End,
6425 ("%d\n", enmTrunkType), VERR_INVALID_PARAMETER);
6426 switch (enmTrunkType)
6427 {
6428 case kIntNetTrunkType_None:
6429 case kIntNetTrunkType_WhateverNone:
6430#ifdef VBOX_WITH_NAT_SERVICE
6431 case kIntNetTrunkType_SrvNat:
6432#endif
6433 if (*pszTrunk)
6434 return VERR_INVALID_PARAMETER;
6435 break;
6436
6437 case kIntNetTrunkType_NetFlt:
6438 case kIntNetTrunkType_NetAdp:
6439 if (!*pszTrunk)
6440 return VERR_INVALID_PARAMETER;
6441 break;
6442
6443 default:
6444 return VERR_NOT_IMPLEMENTED;
6445 }
6446
6447 AssertMsgReturn(!(fFlags & ~INTNET_OPEN_FLAGS_MASK), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
6448 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6449 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) != g_afIntNetOpenNetworkNetFlags[i].fPair,
6450 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkNetFlags[i].fPair), VERR_INVALID_PARAMETER);
6451 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
6452 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair) != g_afIntNetOpenNetworkIfFlags[i].fPair,
6453 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkIfFlags[i].fPair), VERR_INVALID_PARAMETER);
6454 AssertPtrReturn(phIf, VERR_INVALID_PARAMETER);
6455
6456 /*
6457 * Acquire the mutex to serialize open/create/close.
6458 */
6459 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6460 if (RT_FAILURE(rc))
6461 return rc;
6462
6463 /*
6464 * Try open / create the network and create an interface on it for the
6465 * caller to use.
6466 */
6467 PINTNETNETWORK pNetwork = NULL;
6468 rc = intnetR0OpenNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6469 if (RT_SUCCESS(rc))
6470 {
6471 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6472 if (RT_SUCCESS(rc))
6473 {
6474 intnetR0AdaptOpenNetworkFlags(pNetwork, fFlags);
6475 rc = VINF_ALREADY_INITIALIZED;
6476 }
6477 else
6478 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6479 }
6480 else if (rc == VERR_NOT_FOUND)
6481 {
6482 rc = intnetR0CreateNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6483 if (RT_SUCCESS(rc))
6484 {
6485 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6486 if (RT_FAILURE(rc))
6487 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6488 }
6489 }
6490
6491 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6492 LogFlow(("IntNetR0Open: return %Rrc *phIf=%RX32\n", rc, *phIf));
6493 return rc;
6494}
6495
6496
6497/**
6498 * VMMR0 request wrapper for IntNetR0Open.
6499 *
6500 * @returns see GMMR0MapUnmapChunk.
6501 * @param pSession The caller's session.
6502 * @param pReq The request packet.
6503 */
6504INTNETR0DECL(int) IntNetR0OpenReq(PSUPDRVSESSION pSession, PINTNETOPENREQ pReq)
6505{
6506 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
6507 return VERR_INVALID_PARAMETER;
6508 return IntNetR0Open(pSession, &pReq->szNetwork[0], pReq->enmTrunkType, pReq->szTrunk,
6509 pReq->fFlags, pReq->cbSend, pReq->cbRecv, &pReq->hIf);
6510}
6511
6512
6513/**
6514 * Count the internal networks.
6515 *
6516 * This is mainly for providing the testcase with some introspection to validate
6517 * behavior when closing interfaces.
6518 *
6519 * @returns The number of networks.
6520 */
6521INTNETR0DECL(uint32_t) IntNetR0GetNetworkCount(void)
6522{
6523 /*
6524 * Grab the instance.
6525 */
6526 PINTNET pIntNet = g_pIntNet;
6527 if (!pIntNet)
6528 return 0;
6529 AssertPtrReturn(pIntNet, 0);
6530 AssertReturn(pIntNet->u32Magic == INTNET_MAGIC, 0);
6531
6532 /*
6533 * Grab the mutex and count the networks.
6534 */
6535 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6536 if (RT_FAILURE(rc))
6537 return 0;
6538
6539 uint32_t cNetworks = 0;
6540 for (PINTNETNETWORK pCur = pIntNet->pNetworks; pCur; pCur = pCur->pNext)
6541 cNetworks++;
6542
6543 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6544
6545 return cNetworks;
6546}
6547
6548
6549
6550/**
6551 * Destroys an instance of the Ring-0 internal networking service.
6552 */
6553INTNETR0DECL(void) IntNetR0Term(void)
6554{
6555 LogFlow(("IntNetR0Term:\n"));
6556
6557 /*
6558 * Zap the global pointer and validate it.
6559 */
6560 PINTNET pIntNet = g_pIntNet;
6561 g_pIntNet = NULL;
6562 if (!pIntNet)
6563 return;
6564 AssertPtrReturnVoid(pIntNet);
6565 AssertReturnVoid(pIntNet->u32Magic == INTNET_MAGIC);
6566
6567 /*
6568 * There is not supposed to be any networks hanging around at this time.
6569 */
6570 AssertReturnVoid(ASMAtomicCmpXchgU32(&pIntNet->u32Magic, ~INTNET_MAGIC, INTNET_MAGIC));
6571 Assert(pIntNet->pNetworks == NULL);
6572 if (pIntNet->hMtxCreateOpenDestroy != NIL_RTSEMMUTEX)
6573 {
6574 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6575 pIntNet->hMtxCreateOpenDestroy = NIL_RTSEMMUTEX;
6576 }
6577 if (pIntNet->hHtIfs != NIL_RTHANDLETABLE)
6578 {
6579 /** @todo does it make sense to have a deleter here? */
6580 RTHandleTableDestroy(pIntNet->hHtIfs, NULL, NULL);
6581 pIntNet->hHtIfs = NIL_RTHANDLETABLE;
6582 }
6583
6584 RTMemFree(pIntNet);
6585}
6586
6587
6588/**
6589 * Initializes the internal network ring-0 service.
6590 *
6591 * @returns VBox status code.
6592 */
6593INTNETR0DECL(int) IntNetR0Init(void)
6594{
6595 LogFlow(("IntNetR0Init:\n"));
6596 int rc = VERR_NO_MEMORY;
6597 PINTNET pIntNet = (PINTNET)RTMemAllocZ(sizeof(*pIntNet));
6598 if (pIntNet)
6599 {
6600 //pIntNet->pNetworks = NULL;
6601
6602 rc = RTSemMutexCreate(&pIntNet->hMtxCreateOpenDestroy);
6603 if (RT_SUCCESS(rc))
6604 {
6605 rc = RTHandleTableCreateEx(&pIntNet->hHtIfs, RTHANDLETABLE_FLAGS_LOCKED | RTHANDLETABLE_FLAGS_CONTEXT,
6606 UINT32_C(0x8ffe0000), 4096, intnetR0IfRetainHandle, NULL);
6607 if (RT_SUCCESS(rc))
6608 {
6609 pIntNet->u32Magic = INTNET_MAGIC;
6610 g_pIntNet = pIntNet;
6611 LogFlow(("IntNetR0Init: returns VINF_SUCCESS pIntNet=%p\n", pIntNet));
6612 return VINF_SUCCESS;
6613 }
6614
6615 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6616 }
6617 RTMemFree(pIntNet);
6618 }
6619 LogFlow(("IntNetR0Init: returns %Rrc\n", rc));
6620 return rc;
6621}
6622
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette