VirtualBox

source: vbox/trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp@ 87786

Last change on this file since 87786 was 87786, checked in by vboxsync, 4 years ago

AMD IOMMU: bugref:9654 Implemented an interrupt remap cache.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 285.2 KB
Line 
1/* $Id: DevIommuAmd.cpp 87786 2021-02-18 11:31:38Z vboxsync $ */
2/** @file
3 * IOMMU - Input/Output Memory Management Unit - AMD implementation.
4 */
5
6/*
7 * Copyright (C) 2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_DEV_IOMMU
23#include <VBox/msi.h>
24#include <VBox/iommu-amd.h>
25#include <VBox/vmm/pdmdev.h>
26#include <VBox/AssertGuest.h>
27
28#include <iprt/x86.h>
29#include <iprt/string.h>
30#include <iprt/avl.h>
31#ifdef IN_RING3
32# include <iprt/mem.h>
33#endif
34
35#include "VBoxDD.h"
36#include "DevIommuAmd.h"
37
38
39/*********************************************************************************************************************************
40* Defined Constants And Macros *
41*********************************************************************************************************************************/
42/** Release log prefix string. */
43#define IOMMU_LOG_PFX "AMD-IOMMU"
44/** The current saved state version. */
45#define IOMMU_SAVED_STATE_VERSION 1
46/** The IOMMU device instance magic. */
47#define IOMMU_MAGIC 0x10acce55
48
49/** Enable the IOTLBE cache. */
50#define IOMMU_WITH_IOTLBE_CACHE
51/** Enable the interrupt cache. */
52#define IOMMU_WITH_IRTE_CACHE
53
54/* The DTE cache is mandatory for the IOTLB or interrupt cache to work. */
55#if defined(IOMMU_WITH_IOTLBE_CACHE) || defined(IOMMU_WITH_IRTE_CACHE)
56# define IOMMU_WITH_DTE_CACHE
57#endif
58
59#ifdef IOMMU_WITH_IRTE_CACHE
60/** The maximum number of interrupt cache entries configurable through CFGM. */
61# define IOMMU_IRTE_CACHE_MAX 32
62/** The default number of interrupt cache entries. */
63# define IOMMU_IRTE_CACHE_DEFAULT 16
64/** The minimum number of interrupt cache entries configurable through CFGM. */
65# define IOMMU_IRTE_CACHE_MIN 8
66
67/** A NIL IRTE cache entry key. */
68# define IOMMU_IRTE_CACHE_KEY_NIL (~(uint32_t)0U)
69/** Gets the device ID from an IRTE cache entry key. */
70#define IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(a_Key) RT_HIWORD(a_Key)
71/** Gets the IOVA from the IOTLB entry key. */
72# define IOMMU_IRTE_CACHE_KEY_GET_OFF(a_Key) RT_LOWORD(a_Key)
73/** Makes an IRTE cache entry key.
74 *
75 * Bits 31:16 is the device ID (Bus, Device, Function).
76 * Bits 15:0 is the the offset into the IRTE table.
77 */
78# define IOMMU_IRTE_CACHE_KEY_MAKE(a_DevId, a_off) RT_MAKE_U32(a_off, a_DevId)
79#endif /* IOMMU_WITH_IRTE_CACHE */
80
81#ifdef IOMMU_WITH_IOTLBE_CACHE
82/** The maximum number of IOTLB entries. */
83# define IOMMU_IOTLBE_MAX 96
84/** The mask of bits covering the domain ID in the IOTLBE key. */
85# define IOMMU_IOTLB_DOMAIN_ID_MASK UINT64_C(0xffffff0000000000)
86/** The mask of bits covering the IOVA in the IOTLBE key. */
87# define IOMMU_IOTLB_IOVA_MASK (~IOMMU_IOTLB_DOMAIN_ID_MASK)
88/** The number of bits to shift for the domain ID of the IOTLBE key. */
89# define IOMMU_IOTLB_DOMAIN_ID_SHIFT 40
90/** A NIL IOTLB key. */
91# define IOMMU_IOTLB_KEY_NIL UINT64_C(0)
92/** Gets the domain ID from an IOTLB entry key. */
93# define IOMMU_IOTLB_KEY_GET_DOMAIN_ID(a_Key) ((a_Key) >> IOMMU_IOTLB_DOMAIN_ID_SHIFT)
94/** Gets the IOVA from the IOTLB entry key. */
95# define IOMMU_IOTLB_KEY_GET_IOVA(a_Key) (((a_Key) & IOMMU_IOTLB_IOVA_MASK) << X86_PAGE_4K_SHIFT)
96/** Makes an IOTLB entry key.
97 *
98 * Address bits 63:52 of the IOVA are zero extended, so top 12 bits are free.
99 * Address bits 11:0 of the IOVA are offset into the minimum page size of 4K,
100 * so bottom 12 bits are free.
101 *
102 * Thus we use the top 24 bits of key to hold bits 15:0 of the domain ID.
103 * We use the bottom 40 bits of the key to hold bits 51:12 of the IOVA.
104 */
105# define IOMMU_IOTLB_KEY_MAKE(a_DomainId, a_uIova) ( ((uint64_t)(a_DomainId) << IOMMU_IOTLB_DOMAIN_ID_SHIFT) \
106 | (((a_uIova) >> X86_PAGE_4K_SHIFT) & IOMMU_IOTLB_IOVA_MASK))
107#endif /* IOMMU_WITH_IOTLBE_CACHE */
108
109#ifdef IOMMU_WITH_DTE_CACHE
110/** The maximum number of DTE entries. */
111# define IOMMU_DTE_CACHE_MAX UINT16_MAX
112
113/** @name IOMMU_DTECACHE_F_XXX: DTE cache flags.
114 *
115 * Some of these flags are "basic" i.e. they correspond directly to their bits in
116 * the DTE. The rest of the flags are based on checks or operations on several DTE
117 * bits.
118 *
119 * The basic flags are:
120 * - VALID (DTE.V)
121 * - IO_PERM_READ (DTE.IR)
122 * - IO_PERM_WRITE (DTE.IW)
123 * - IO_PERM_RSVD (bit following DTW.IW reserved for future & to keep
124 * masking consistent)
125 * - SUPPRESS_ALL_IOPF (DTE.SA)
126 * - SUPPRESS_IOPF (DTE.SE)
127 * - INTR_MAP_VALID (DTE.IV)
128 * - IGNORE_UNMAPPED_INTR (DTE.IG)
129 *
130 * @see iommuAmdGetBasicDevFlags()
131 * @{ */
132/** The DTE is present. */
133# define IOMMU_DTECACHE_F_PRESENT RT_BIT(0)
134/** The DTE is valid. */
135# define IOMMU_DTECACHE_F_VALID RT_BIT(1)
136/** The DTE permissions apply for address translations. */
137# define IOMMU_DTECACHE_F_IO_PERM RT_BIT(2)
138/** DTE permission - I/O read allowed. */
139# define IOMMU_DTECACHE_F_IO_PERM_READ RT_BIT(3)
140/** DTE permission - I/O write allowed. */
141# define IOMMU_DTECACHE_F_IO_PERM_WRITE RT_BIT(4)
142/** DTE permission - reserved. */
143# define IOMMU_DTECACHE_F_IO_PERM_RSVD RT_BIT(5)
144/** Address translation required. */
145# define IOMMU_DTECACHE_F_ADDR_TRANSLATE RT_BIT(6)
146/** Suppress all I/O page faults. */
147# define IOMMU_DTECACHE_F_SUPPRESS_ALL_IOPF RT_BIT(7)
148/** Suppress I/O page faults. */
149# define IOMMU_DTECACHE_F_SUPPRESS_IOPF RT_BIT(8)
150/** Interrupt map valid. */
151# define IOMMU_DTECACHE_F_INTR_MAP_VALID RT_BIT(9)
152/** Ignore unmapped interrupts. */
153# define IOMMU_DTECACHE_F_IGNORE_UNMAPPED_INTR RT_BIT(10)
154/** An I/O page fault has been raised for this device. */
155# define IOMMU_DTECACHE_F_IO_PAGE_FAULT_RAISED RT_BIT(11)
156/** Fixed and arbitrary interrupt control: Target Abort. */
157# define IOMMU_DTECACHE_F_INTR_CTRL_TARGET_ABORT RT_BIT(12)
158/** Fixed and arbitrary interrupt control: Forward unmapped. */
159# define IOMMU_DTECACHE_F_INTR_CTRL_FWD_UNMAPPED RT_BIT(13)
160/** Fixed and arbitrary interrupt control: Remapped. */
161# define IOMMU_DTECACHE_F_INTR_CTRL_REMAPPED RT_BIT(14)
162/** Fixed and arbitrary interrupt control: Reserved. */
163# define IOMMU_DTECACHE_F_INTR_CTRL_RSVD RT_BIT(15)
164/** @} */
165
166/** The number of bits to shift I/O device flags for DTE permissions. */
167# define IOMMU_DTECACHE_F_IO_PERM_SHIFT 3
168/** The mask of DTE permissions in I/O device flags. */
169# define IOMMU_DTECACHE_F_IO_PERM_MASK 0x3
170/** The number of bits to shift I/O device flags for interrupt control bits. */
171# define IOMMU_DTECACHE_F_INTR_CTRL_SHIFT 12
172/** The mask of interrupt control bits in I/O device flags. */
173# define IOMMU_DTECACHE_F_INTR_CTRL_MASK 0x3
174
175/** Acquires the cache lock. */
176# define IOMMU_LOCK_CACHE(a_pDevIns, a_pThis) \
177 do { \
178 int const rcLock = PDMDevHlpCritSectEnter((a_pDevIns), &(a_pThis)->CritSectCache, VERR_SEM_BUSY); \
179 if (rcLock == VINF_SUCCESS) \
180 { /* likely */ } \
181 else \
182 { \
183 AssertRC(rcLock); \
184 return rcLock; \
185 } \
186 } while (0)
187
188/** Acquires the cache lock (asserts on failure). */
189# define IOMMU_LOCK_CACHE_NORET(a_pDevIns, a_pThis) \
190 do { \
191 int const rcLock = PDMDevHlpCritSectEnter((a_pDevIns), &(a_pThis)->CritSectCache, VERR_SEM_BUSY); \
192 AssertRC(rcLock); \
193 } while (0)
194
195/** Releases the cache lock. */
196# define IOMMU_UNLOCK_CACHE(a_pDevIns, a_pThis) PDMDevHlpCritSectLeave((a_pDevIns), &(a_pThis)->CritSectCache)
197#endif /* IOMMU_WITH_DTE_CACHE */
198
199/** Gets the page offset mask given the number of bits to shift. */
200#define IOMMU_GET_PAGE_OFF_MASK(a_cShift) (~(UINT64_C(0xffffffffffffffff) << (a_cShift)))
201
202/** Acquires the PDM lock. */
203#define IOMMU_LOCK(a_pDevIns, a_pThisCC) \
204 do { \
205 int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VERR_SEM_BUSY); \
206 if (RT_LIKELY(rcLock == VINF_SUCCESS)) \
207 { /* likely */ } \
208 else \
209 return rcLock; \
210 } while (0)
211
212/** Acquires the PDM lock (asserts on failure). */
213#define IOMMU_LOCK_NORET(a_pDevIns, a_pThisCC) \
214 do { \
215 int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VERR_SEM_BUSY); \
216 AssertRC(rcLock); \
217 } while (0)
218
219/** Releases the PDM lock. */
220# define IOMMU_UNLOCK(a_pDevIns, a_pThisCC) (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnUnlock((a_pDevIns))
221
222
223/*********************************************************************************************************************************
224* Structures and Typedefs *
225*********************************************************************************************************************************/
226/**
227 * IOMMU operation (transaction).
228 */
229typedef enum IOMMUOP
230{
231 /** Address translation request. */
232 IOMMUOP_TRANSLATE_REQ = 0,
233 /** Memory read request. */
234 IOMMUOP_MEM_READ,
235 /** Memory write request. */
236 IOMMUOP_MEM_WRITE,
237 /** Interrupt request. */
238 IOMMUOP_INTR_REQ,
239 /** Command. */
240 IOMMUOP_CMD
241} IOMMUOP;
242/** Pointer to a IOMMU operation. */
243typedef IOMMUOP *PIOMMUOP;
244
245/**
246 * I/O page lookup.
247 */
248typedef struct IOPAGELOOKUP
249{
250 /** The translated system physical address. */
251 RTGCPHYS GCPhysSpa;
252 /** The number of offset bits in the system physical address. */
253 uint8_t cShift;
254 /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */
255 uint8_t fPerm;
256} IOPAGELOOKUP;
257/** Pointer to an I/O page lookup. */
258typedef IOPAGELOOKUP *PIOPAGELOOKUP;
259/** Pointer to a const I/O page lookup. */
260typedef IOPAGELOOKUP const *PCIOPAGELOOKUP;
261
262/**
263 * I/O address range.
264 */
265typedef struct IOADDRRANGE
266{
267 /** The address (virtual or physical). */
268 uint64_t uAddr;
269 /** The size of the access in bytes. */
270 size_t cb;
271 /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */
272 uint8_t fPerm;
273} IOADDRRANGE;
274/** Pointer to an I/O address range. */
275typedef IOADDRRANGE *PIOADDRRANGE;
276/** Pointer to a const I/O address range. */
277typedef IOADDRRANGE const *PCIOADDRRANGE;
278
279#ifdef IOMMU_WITH_DTE_CACHE
280/**
281 * Device Table Entry Cache.
282 */
283typedef struct DTECACHE
284{
285 /** This device's flags, see IOMMU_DTECACHE_F_XXX. */
286 uint16_t fFlags;
287 /** The domain ID assigned for this device by software. */
288 uint16_t uDomainId;
289} DTECACHE;
290/** Pointer to an I/O device struct. */
291typedef DTECACHE *PDTECACHE;
292/** Pointer to a const I/O device struct. */
293typedef DTECACHE *PCDTECACHE;
294AssertCompileSize(DTECACHE, 4);
295#endif /* IOMMU_WITH_DTE_CACHE */
296
297#ifdef IOMMU_WITH_IOTLBE_CACHE
298/**
299 * I/O TLB Entry.
300 * Keep this as small and aligned as possible.
301 */
302typedef struct IOTLBE
303{
304 /** The AVL tree node. */
305 AVLU64NODECORE Core;
306 /** The least recently used (LRU) list node. */
307 RTLISTNODE NdLru;
308 /** The I/O page lookup results of the translation. */
309 IOPAGELOOKUP PageLookup;
310 /** Whether the entry needs to be evicted from the cache. */
311 bool fEvictPending;
312} IOTLBE;
313/** Pointer to an IOMMU I/O TLB entry struct. */
314typedef IOTLBE *PIOTLBE;
315/** Pointer to a const IOMMU I/O TLB entry struct. */
316typedef IOTLBE const *PCIOTLBE;
317AssertCompileSizeAlignment(IOTLBE, 8);
318AssertCompileMemberOffset(IOTLBE, Core, 0);
319#endif /* IOMMU_WITH_IOTLBE_CACHE */
320
321#ifdef IOMMU_WITH_IRTE_CACHE
322/**
323 * Interrupt Remap Table Entry Cache.
324 */
325typedef struct IRTECACHE
326{
327 /** The IRTE. */
328 IRTE_T Irte;
329 /** The key, see IOMMU_IRTE_CACHE_KEY_MAKE. */
330 uint32_t uKey;
331} IRTECACHE;
332/** Pointer to an IRTE cache struct. */
333typedef IRTECACHE *PIRTECACHE;
334/** Pointer to a const IRTE cache struct. */
335typedef IRTECACHE const *PCIRTECACHE;
336AssertCompileSizeAlignment(IRTECACHE, 4);
337#endif /* IOMMU_WITH_IRTE_CACHE */
338
339/**
340 * The shared IOMMU device state.
341 */
342typedef struct IOMMU
343{
344 /** IOMMU device index (0 is at the top of the PCI tree hierarchy). */
345 uint32_t idxIommu;
346 /** IOMMU magic. */
347 uint32_t u32Magic;
348
349 /** Whether the command thread is sleeping. */
350 bool volatile fCmdThreadSleeping;
351 /** Alignment padding. */
352 uint8_t afPadding0[3];
353 /** Whether the command thread has been signaled for wake up. */
354 bool volatile fCmdThreadSignaled;
355 /** Alignment padding. */
356 uint8_t afPadding1[3];
357
358 /** The event semaphore the command thread waits on. */
359 SUPSEMEVENT hEvtCmdThread;
360 /** The MMIO handle. */
361 IOMMMIOHANDLE hMmio;
362
363#ifdef IOMMU_WITH_DTE_CACHE
364 /** The critsect that protects the cache from concurrent access. */
365 PDMCRITSECT CritSectCache;
366 /** Maps [DeviceId] to [DomainId]. */
367 PDTECACHE paDteCache;
368#endif
369#ifdef IOMMU_WITH_IOTLBE_CACHE
370 /** Pointer to array of pre-allocated IOTLBEs. */
371 PIOTLBE paIotlbes;
372 /** Maps [DomainId,Iova] to [IOTLBE]. */
373 AVLU64TREE TreeIotlbe;
374 /** LRU list anchor for IOTLB entries. */
375 RTLISTANCHOR LstLruIotlbe;
376 /** Index of the next unused IOTLB. */
377 uint32_t idxUnusedIotlbe;
378 /** Number of cached IOTLB entries in the tree. */
379 uint32_t cCachedIotlbes;
380#endif
381#ifdef IOMMU_WITH_IRTE_CACHE
382 /** Maps [DeviceId] to [IRTE]. */
383 PIRTECACHE paIrteCache;
384 /** Maximum number of entries in the IRTE cache. */
385 uint16_t cIrteCache;
386 /** Padding. */
387 uint16_t auPadding[3];
388#endif
389
390 /** @name PCI: Base capability block registers.
391 * @{ */
392 IOMMU_BAR_T IommuBar; /**< IOMMU base address register. */
393 /** @} */
394
395 /** @name MMIO: Control and status registers.
396 * @{ */
397 DEV_TAB_BAR_T aDevTabBaseAddrs[8]; /**< Device table base address registers. */
398 CMD_BUF_BAR_T CmdBufBaseAddr; /**< Command buffer base address register. */
399 EVT_LOG_BAR_T EvtLogBaseAddr; /**< Event log base address register. */
400 IOMMU_CTRL_T Ctrl; /**< IOMMU control register. */
401 IOMMU_EXCL_RANGE_BAR_T ExclRangeBaseAddr; /**< IOMMU exclusion range base register. */
402 IOMMU_EXCL_RANGE_LIMIT_T ExclRangeLimit; /**< IOMMU exclusion range limit. */
403 IOMMU_EXT_FEAT_T ExtFeat; /**< IOMMU extended feature register. */
404 /** @} */
405
406 /** @name MMIO: Peripheral Page Request (PPR) Log registers.
407 * @{ */
408 PPR_LOG_BAR_T PprLogBaseAddr; /**< PPR Log base address register. */
409 IOMMU_HW_EVT_HI_T HwEvtHi; /**< IOMMU hardware event register (Hi). */
410 IOMMU_HW_EVT_LO_T HwEvtLo; /**< IOMMU hardware event register (Lo). */
411 IOMMU_HW_EVT_STATUS_T HwEvtStatus; /**< IOMMU hardware event status. */
412 /** @} */
413
414 /** @todo IOMMU: SMI filter. */
415
416 /** @name MMIO: Guest Virtual-APIC Log registers.
417 * @{ */
418 GALOG_BAR_T GALogBaseAddr; /**< Guest Virtual-APIC Log base address register. */
419 GALOG_TAIL_ADDR_T GALogTailAddr; /**< Guest Virtual-APIC Log Tail address register. */
420 /** @} */
421
422 /** @name MMIO: Alternate PPR and Event Log registers.
423 * @{ */
424 PPR_LOG_B_BAR_T PprLogBBaseAddr; /**< PPR Log B base address register. */
425 EVT_LOG_B_BAR_T EvtLogBBaseAddr; /**< Event Log B base address register. */
426 /** @} */
427
428 /** @name MMIO: Device-specific feature registers.
429 * @{ */
430 DEV_SPECIFIC_FEAT_T DevSpecificFeat; /**< Device-specific feature extension register (DSFX). */
431 DEV_SPECIFIC_CTRL_T DevSpecificCtrl; /**< Device-specific control extension register (DSCX). */
432 DEV_SPECIFIC_STATUS_T DevSpecificStatus; /**< Device-specific status extension register (DSSX). */
433 /** @} */
434
435 /** @name MMIO: MSI Capability Block registers.
436 * @{ */
437 MSI_MISC_INFO_T MiscInfo; /**< MSI Misc. info registers / MSI Vector registers. */
438 /** @} */
439
440 /** @name MMIO: Performance Optimization Control registers.
441 * @{ */
442 IOMMU_PERF_OPT_CTRL_T PerfOptCtrl; /**< IOMMU Performance optimization control register. */
443 /** @} */
444
445 /** @name MMIO: x2APIC Control registers.
446 * @{ */
447 IOMMU_XT_GEN_INTR_CTRL_T XtGenIntrCtrl; /**< IOMMU X2APIC General interrupt control register. */
448 IOMMU_XT_PPR_INTR_CTRL_T XtPprIntrCtrl; /**< IOMMU X2APIC PPR interrupt control register. */
449 IOMMU_XT_GALOG_INTR_CTRL_T XtGALogIntrCtrl; /**< IOMMU X2APIC Guest Log interrupt control register. */
450 /** @} */
451
452 /** @name MMIO: Memory Address Routing & Control (MARC) registers.
453 * @{ */
454 MARC_APER_T aMarcApers[4]; /**< MARC Aperture Registers. */
455 /** @} */
456
457 /** @name MMIO: Reserved register.
458 * @{ */
459 IOMMU_RSVD_REG_T RsvdReg; /**< IOMMU Reserved Register. */
460 /** @} */
461
462 /** @name MMIO: Command and Event Log pointer registers.
463 * @{ */
464 CMD_BUF_HEAD_PTR_T CmdBufHeadPtr; /**< Command buffer head pointer register. */
465 CMD_BUF_TAIL_PTR_T CmdBufTailPtr; /**< Command buffer tail pointer register. */
466 EVT_LOG_HEAD_PTR_T EvtLogHeadPtr; /**< Event log head pointer register. */
467 EVT_LOG_TAIL_PTR_T EvtLogTailPtr; /**< Event log tail pointer register. */
468 /** @} */
469
470 /** @name MMIO: Command and Event Status register.
471 * @{ */
472 IOMMU_STATUS_T Status; /**< IOMMU status register. */
473 /** @} */
474
475 /** @name MMIO: PPR Log Head and Tail pointer registers.
476 * @{ */
477 PPR_LOG_HEAD_PTR_T PprLogHeadPtr; /**< IOMMU PPR log head pointer register. */
478 PPR_LOG_TAIL_PTR_T PprLogTailPtr; /**< IOMMU PPR log tail pointer register. */
479 /** @} */
480
481 /** @name MMIO: Guest Virtual-APIC Log Head and Tail pointer registers.
482 * @{ */
483 GALOG_HEAD_PTR_T GALogHeadPtr; /**< Guest Virtual-APIC log head pointer register. */
484 GALOG_TAIL_PTR_T GALogTailPtr; /**< Guest Virtual-APIC log tail pointer register. */
485 /** @} */
486
487 /** @name MMIO: PPR Log B Head and Tail pointer registers.
488 * @{ */
489 PPR_LOG_B_HEAD_PTR_T PprLogBHeadPtr; /**< PPR log B head pointer register. */
490 PPR_LOG_B_TAIL_PTR_T PprLogBTailPtr; /**< PPR log B tail pointer register. */
491 /** @} */
492
493 /** @name MMIO: Event Log B Head and Tail pointer registers.
494 * @{ */
495 EVT_LOG_B_HEAD_PTR_T EvtLogBHeadPtr; /**< Event log B head pointer register. */
496 EVT_LOG_B_TAIL_PTR_T EvtLogBTailPtr; /**< Event log B tail pointer register. */
497 /** @} */
498
499 /** @name MMIO: PPR Log Overflow protection registers.
500 * @{ */
501 PPR_LOG_AUTO_RESP_T PprLogAutoResp; /**< PPR Log Auto Response register. */
502 PPR_LOG_OVERFLOW_EARLY_T PprLogOverflowEarly; /**< PPR Log Overflow Early Indicator register. */
503 PPR_LOG_B_OVERFLOW_EARLY_T PprLogBOverflowEarly; /**< PPR Log B Overflow Early Indicator register. */
504 /** @} */
505
506 /** @todo IOMMU: IOMMU Event counter registers. */
507
508#ifdef VBOX_WITH_STATISTICS
509 /** @name IOMMU: Stat counters.
510 * @{ */
511 STAMCOUNTER StatMmioReadR3; /**< Number of MMIO reads in R3. */
512 STAMCOUNTER StatMmioReadRZ; /**< Number of MMIO reads in RZ. */
513 STAMCOUNTER StatMmioWriteR3; /**< Number of MMIO writes in R3. */
514 STAMCOUNTER StatMmioWriteRZ; /**< Number of MMIO writes in RZ. */
515
516 STAMCOUNTER StatMsiRemapR3; /**< Number of MSI remap requests in R3. */
517 STAMCOUNTER StatMsiRemapRZ; /**< Number of MSI remap requests in RZ. */
518
519 STAMCOUNTER StatMemReadR3; /**< Number of memory read translation requests in R3. */
520 STAMCOUNTER StatMemReadRZ; /**< Number of memory read translation requests in RZ. */
521 STAMCOUNTER StatMemWriteR3; /**< Number of memory write translation requests in R3. */
522 STAMCOUNTER StatMemWriteRZ; /**< Number of memory write translation requests in RZ. */
523
524 STAMCOUNTER StatMemBulkReadR3; /**< Number of memory read bulk translation requests in R3. */
525 STAMCOUNTER StatMemBulkReadRZ; /**< Number of memory read bulk translation requests in RZ. */
526 STAMCOUNTER StatMemBulkWriteR3; /**< Number of memory write bulk translation requests in R3. */
527 STAMCOUNTER StatMemBulkWriteRZ; /**< Number of memory write bulk translation requests in RZ. */
528
529 STAMCOUNTER StatCmd; /**< Number of commands processed in total. */
530 STAMCOUNTER StatCmdCompWait; /**< Number of Completion Wait commands processed. */
531 STAMCOUNTER StatCmdInvDte; /**< Number of Invalidate DTE commands processed. */
532 STAMCOUNTER StatCmdInvIommuPages; /**< Number of Invalidate IOMMU pages commands processed. */
533 STAMCOUNTER StatCmdInvIotlbPages; /**< Number of Invalidate IOTLB pages commands processed. */
534 STAMCOUNTER StatCmdInvIntrTable; /**< Number of Invalidate Interrupt Table commands processed. */
535 STAMCOUNTER StatCmdPrefIommuPages; /**< Number of Prefetch IOMMU Pages commands processed. */
536 STAMCOUNTER StatCmdCompletePprReq; /**< Number of Complete PPR Requests commands processed. */
537 STAMCOUNTER StatCmdInvIommuAll; /**< Number of Invalidate IOMMU All commands processed. */
538
539 STAMCOUNTER StatIotlbeCached; /**< Number of IOTLB entries in the cache. */
540 STAMCOUNTER StatIotlbeLazyEvictReuse; /**< Number of IOTLB entries re-used after lazy eviction. */
541
542 STAMPROFILEADV StatProfDteLookup; /**< Profiling of I/O page walk (from memory). */
543 STAMPROFILEADV StatProfIotlbeLookup; /**< Profiling of IOTLB entry lookup (from cache). */
544
545 STAMPROFILEADV StatProfIrteLookup; /**< Profiling of IRTE entry lookup (from memory). */
546 STAMPROFILEADV StatProfIrteCacheLookup; /**< Profiling of IRTE entry lookup (from cache). */
547
548 STAMCOUNTER StatAccessCacheHit; /**< Number of IOTLB cache hits. */
549 STAMCOUNTER StatAccessCacheHitFull; /**< Number of accesses that were fully looked up from the cache. */
550 STAMCOUNTER StatAccessCacheMiss; /**< Number of cache misses (resulting in DTE lookups). */
551 STAMCOUNTER StatAccessCacheNonContig; /**< Number of cache accesses resulting in non-contiguous access. */
552 STAMCOUNTER StatAccessCachePermDenied; /**< Number of cache accesses resulting in insufficient permissions. */
553 STAMCOUNTER StatAccessDteNonContig; /**< Number of DTE accesses resulting in non-contiguous access. */
554 STAMCOUNTER StatAccessDtePermDenied; /**< Number of DTE accesses resulting in insufficient permissions. */
555
556 STAMCOUNTER StatIntrCacheHit; /**< Number of interrupt cache hits. */
557 STAMCOUNTER StatIntrCacheMiss; /**< Number of interrupt cache misses. */
558 /** @} */
559#endif
560} IOMMU;
561/** Pointer to the IOMMU device state. */
562typedef struct IOMMU *PIOMMU;
563/** Pointer to the const IOMMU device state. */
564typedef const struct IOMMU *PCIOMMU;
565AssertCompileMemberAlignment(IOMMU, fCmdThreadSleeping, 4);
566AssertCompileMemberAlignment(IOMMU, fCmdThreadSignaled, 4);
567AssertCompileMemberAlignment(IOMMU, hEvtCmdThread, 8);
568AssertCompileMemberAlignment(IOMMU, hMmio, 8);
569#ifdef IOMMU_WITH_DTE_CACHE
570AssertCompileMemberAlignment(IOMMU, paDteCache, 8);
571#endif
572#ifdef IOMMU_WITH_IOTLBE_CACHE
573AssertCompileMemberAlignment(IOMMU, paIotlbes, 8);
574AssertCompileMemberAlignment(IOMMU, TreeIotlbe, 8);
575AssertCompileMemberAlignment(IOMMU, LstLruIotlbe, 8);
576#endif
577#ifdef IOMMU_WITH_IRTE_CACHE
578AssertCompileMemberAlignment(IOMMU, paIrteCache, 8);
579#endif
580AssertCompileMemberAlignment(IOMMU, IommuBar, 8);
581AssertCompileMemberAlignment(IOMMU, aDevTabBaseAddrs, 8);
582AssertCompileMemberAlignment(IOMMU, CmdBufHeadPtr, 8);
583AssertCompileMemberAlignment(IOMMU, Status, 8);
584
585/**
586 * The ring-3 IOMMU device state.
587 */
588typedef struct IOMMUR3
589{
590 /** Device instance. */
591 PPDMDEVINSR3 pDevInsR3;
592 /** The IOMMU helpers. */
593 R3PTRTYPE(PCPDMIOMMUHLPR3) pIommuHlpR3;
594 /** The command thread handle. */
595 R3PTRTYPE(PPDMTHREAD) pCmdThread;
596} IOMMUR3;
597/** Pointer to the ring-3 IOMMU device state. */
598typedef IOMMUR3 *PIOMMUR3;
599
600/**
601 * The ring-0 IOMMU device state.
602 */
603typedef struct IOMMUR0
604{
605 /** Device instance. */
606 PPDMDEVINSR0 pDevInsR0;
607 /** The IOMMU helpers. */
608 R0PTRTYPE(PCPDMIOMMUHLPR0) pIommuHlpR0;
609} IOMMUR0;
610/** Pointer to the ring-0 IOMMU device state. */
611typedef IOMMUR0 *PIOMMUR0;
612
613/**
614 * The raw-mode IOMMU device state.
615 */
616typedef struct IOMMURC
617{
618 /** Device instance. */
619 PPDMDEVINSRC pDevInsRC;
620 /** The IOMMU helpers. */
621 RCPTRTYPE(PCPDMIOMMUHLPRC) pIommuHlpRC;
622} IOMMURC;
623/** Pointer to the raw-mode IOMMU device state. */
624typedef IOMMURC *PIOMMURC;
625
626/** The IOMMU device state for the current context. */
627typedef CTX_SUFF(IOMMU) IOMMUCC;
628/** Pointer to the IOMMU device state for the current context. */
629typedef CTX_SUFF(PIOMMU) PIOMMUCC;
630
631/**
632 * IOMMU register access.
633 */
634typedef struct IOMMUREGACC
635{
636 const char *pszName;
637 VBOXSTRICTRC (*pfnRead)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value);
638 VBOXSTRICTRC (*pfnWrite)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value);
639} IOMMUREGACC;
640/** Pointer to an IOMMU register access. */
641typedef IOMMUREGACC *PIOMMUREGACC;
642/** Pointer to a const IOMMU register access. */
643typedef IOMMUREGACC const *PCIOMMUREGACC;
644
645#ifdef IOMMU_WITH_IOTLBE_CACHE
646/**
647 * IOTLBE flush argument.
648 */
649typedef struct IOTLBEFLUSHARG
650{
651 /** The IOMMU device state. */
652 PIOMMU pIommu;
653 /** The domain ID to flush. */
654 uint16_t uDomainId;
655} IOTLBEFLUSHARG;
656/** Pointer to an IOTLBE flush argument. */
657typedef IOTLBEFLUSHARG *PIOTLBEFLUSHARG;
658/** Pointer to a const IOTLBE flush argument. */
659typedef IOTLBEFLUSHARG const *PCIOTLBEFLUSHARG;
660
661/**
662 * IOTLBE Info. argument.
663 */
664typedef struct IOTLBEINFOARG
665{
666 /** The IOMMU device state. */
667 PIOMMU pIommu;
668 /** The info helper. */
669 PCDBGFINFOHLP pHlp;
670 /** The domain ID to dump IOTLB entry. */
671 uint16_t uDomainId;
672} IOTLBEINFOARG;
673/** Pointer to an IOTLBE flush argument. */
674typedef IOTLBEINFOARG *PIOTLBEINFOARG;
675/** Pointer to a const IOTLBE flush argument. */
676typedef IOTLBEINFOARG const *PCIOTLBEINFOARG;
677#endif
678
679/**
680 * IOMMU operation auxiliary info.
681 */
682typedef struct IOMMUOPAUX
683{
684 /** The IOMMU operation being performed. */
685 IOMMUOP enmOp;
686 /** The device table entry (can be NULL). */
687 PCDTE_T pDte;
688 /** The device ID. */
689 uint16_t uDeviceId;
690 /** The domain ID (when the DTE isn't provided). */
691 uint16_t uDomainId;
692} IOMMUOPAUX;
693/** Pointer to an I/O address lookup struct. */
694typedef IOMMUOPAUX *PIOMMUOPAUX;
695/** Pointer to a const I/O address lookup struct. */
696typedef IOMMUOPAUX const *PCIOMMUOPAUX;
697
698typedef DECLCALLBACKTYPE(int, FNIOPAGELOOKUP,(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
699 PIOPAGELOOKUP pPageLookup));
700typedef FNIOPAGELOOKUP *PFNIOPAGELOOKUP;
701
702
703/*********************************************************************************************************************************
704* Global Variables *
705*********************************************************************************************************************************/
706/**
707 * An array of the number of device table segments supported.
708 * Indexed by u2DevTabSegSup.
709 */
710static uint8_t const g_acDevTabSegs[] = { 0, 2, 4, 8 };
711
712/**
713 * An array of the masks to select the device table segment index from a device ID.
714 */
715static uint16_t const g_auDevTabSegMasks[] = { 0x0, 0x8000, 0xc000, 0xe000 };
716
717/**
718 * An array of the shift values to select the device table segment index from a
719 * device ID.
720 */
721static uint8_t const g_auDevTabSegShifts[] = { 0, 15, 14, 13 };
722
723/**
724 * The maximum size (inclusive) of each device table segment (0 to 7).
725 * Indexed by the device table segment index.
726 */
727static uint16_t const g_auDevTabSegMaxSizes[] = { 0x1ff, 0xff, 0x7f, 0x7f, 0x3f, 0x3f, 0x3f, 0x3f };
728
729/**
730 * The IOMMU I/O permission names.
731 */
732static const char * const g_aszPerm[] = { "none", "read", "write", "read+write" };
733
734
735#ifndef VBOX_DEVICE_STRUCT_TESTCASE
736/**
737 * Gets the maximum number of buffer entries for the given buffer length.
738 *
739 * @returns Number of buffer entries.
740 * @param uEncodedLen The length (power-of-2 encoded).
741 */
742DECLINLINE(uint32_t) iommuAmdGetBufMaxEntries(uint8_t uEncodedLen)
743{
744 Assert(uEncodedLen > 7);
745 Assert(uEncodedLen < 16);
746 return 2 << (uEncodedLen - 1);
747}
748
749
750/**
751 * Gets the total length of the buffer given a base register's encoded length.
752 *
753 * @returns The length of the buffer in bytes.
754 * @param uEncodedLen The length (power-of-2 encoded).
755 */
756DECLINLINE(uint32_t) iommuAmdGetTotalBufLength(uint8_t uEncodedLen)
757{
758 Assert(uEncodedLen > 7);
759 Assert(uEncodedLen < 16);
760 return (2 << (uEncodedLen - 1)) << 4;
761}
762
763
764/**
765 * Gets the number of (unconsumed) entries in the event log.
766 *
767 * @returns The number of entries in the event log.
768 * @param pThis The IOMMU device state.
769 */
770static uint32_t iommuAmdGetEvtLogEntryCount(PIOMMU pThis)
771{
772 uint32_t const idxTail = pThis->EvtLogTailPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT;
773 uint32_t const idxHead = pThis->EvtLogHeadPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT;
774 if (idxTail >= idxHead)
775 return idxTail - idxHead;
776
777 uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len);
778 return cMaxEvts - idxHead + idxTail;
779}
780
781
782/**
783 * Gets the descriptive I/O permission name for a memory access.
784 *
785 * @returns The I/O permission name.
786 * @param fPerm The I/O permissions for the access, see IOMMU_IO_PERM_XXX.
787 */
788static const char *iommuAmdMemAccessGetPermName(uint8_t fPerm)
789{
790 /* We shouldn't construct an access with "none" or "read+write" (must be read or write) permissions. */
791 Assert(fPerm > 0 && fPerm < RT_ELEMENTS(g_aszPerm));
792 return g_aszPerm[fPerm & IOMMU_IO_PERM_MASK];
793}
794
795
796/**
797 * Checks whether two consecutive I/O page lookup results translates to a physically
798 * contiguous region.
799 *
800 * @returns @c true if they are contiguous, @c false otherwise.
801 * @param pPageLookupPrev The I/O page lookup result of the previous page.
802 * @param pPageLookup The I/O page lookup result of the current page.
803 */
804static bool iommuAmdLookupIsAccessContig(PCIOPAGELOOKUP pPageLookupPrev, PCIOPAGELOOKUP pPageLookup)
805{
806 Assert(pPageLookupPrev->fPerm == pPageLookup->fPerm);
807 size_t const cbPrev = RT_BIT_64(pPageLookupPrev->cShift);
808 RTGCPHYS const GCPhysPrev = pPageLookupPrev->GCPhysSpa;
809 RTGCPHYS const GCPhys = pPageLookup->GCPhysSpa;
810 uint64_t const offMaskPrev = IOMMU_GET_PAGE_OFF_MASK(pPageLookupPrev->cShift);
811 uint64_t const offMask = IOMMU_GET_PAGE_OFF_MASK(pPageLookup->cShift);
812
813 /* Paranoia: Ensure offset bits are 0. */
814 Assert(!(GCPhysPrev & offMaskPrev));
815 Assert(!(GCPhys & offMask));
816
817 if ((GCPhysPrev & ~offMaskPrev) + cbPrev == (GCPhys & ~offMask))
818 return true;
819 return false;
820}
821
822
823/**
824 * Gets the basic I/O device flags for the given device table entry.
825 *
826 * @returns The basic I/O device flags.
827 * @param pDte The device table entry.
828 */
829static uint16_t iommuAmdGetBasicDevFlags(PCDTE_T pDte)
830{
831 /* Extract basic flags from bits 127:0 of the DTE. */
832 uint16_t fFlags = 0;
833 if (pDte->n.u1Valid)
834 {
835 fFlags |= IOMMU_DTECACHE_F_VALID;
836
837 if (pDte->n.u1SuppressAllPfEvents)
838 fFlags |= IOMMU_DTECACHE_F_SUPPRESS_ALL_IOPF;
839 if (pDte->n.u1SuppressPfEvents)
840 fFlags |= IOMMU_DTECACHE_F_SUPPRESS_IOPF;
841
842 uint16_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
843 AssertCompile(IOMMU_DTECACHE_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK);
844 fFlags |= fDtePerm << IOMMU_DTECACHE_F_IO_PERM_SHIFT;
845 }
846
847 /* Extract basic flags from bits 255:128 of the DTE. */
848 if (pDte->n.u1IntrMapValid)
849 {
850 fFlags |= IOMMU_DTECACHE_F_INTR_MAP_VALID;
851 if (pDte->n.u1IgnoreUnmappedIntrs)
852 fFlags |= IOMMU_DTECACHE_F_IGNORE_UNMAPPED_INTR;
853
854 uint16_t const fIntrCtrl = IOMMU_GET_INTR_CTRL(pDte);
855 AssertCompile(IOMMU_DTECACHE_F_INTR_CTRL_MASK == IOMMU_DTE_INTR_CTRL_MASK);
856 fFlags |= fIntrCtrl << IOMMU_DTECACHE_F_INTR_CTRL_SHIFT;
857 }
858 return fFlags;
859}
860
861
862/**
863 * Remaps the source MSI to the destination MSI given the IRTE.
864 *
865 * @param pMsiIn The source MSI.
866 * @param pMsiOut Where to store the remapped MSI.
867 * @param pIrte The IRTE used for the remapping.
868 */
869static void iommuAmdIrteRemapMsi(PCMSIMSG pMsiIn, PMSIMSG pMsiOut, PCIRTE_T pIrte)
870{
871 /* Preserve all bits from the source MSI address and data that don't map 1:1 from the IRTE. */
872 *pMsiOut = *pMsiIn;
873
874 pMsiOut->Addr.n.u1DestMode = pIrte->n.u1DestMode;
875 pMsiOut->Addr.n.u8DestId = pIrte->n.u8Dest;
876
877 pMsiOut->Data.n.u8Vector = pIrte->n.u8Vector;
878 pMsiOut->Data.n.u3DeliveryMode = pIrte->n.u3IntrType;
879}
880
881
882#ifdef IOMMU_WITH_IOTLBE_CACHE
883/**
884 * Moves the IOTLB entry to the least recently used slot.
885 *
886 * @param pThis The IOMMU device state.
887 * @param pIotlbe The IOTLB entry.
888 */
889static void iommuAmdIotlbEntryMoveToLru(PIOMMU pThis, PIOTLBE pIotlbe)
890{
891 if (!RTListNodeIsFirst(&pThis->LstLruIotlbe, &pIotlbe->NdLru))
892 {
893 RTListNodeRemove(&pIotlbe->NdLru);
894 RTListPrepend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
895 }
896}
897
898
899/**
900 * Moves the IOTLB entry to the most recently used slot.
901 *
902 * @param pThis The IOMMU device state.
903 * @param pIotlbe The IOTLB entry.
904 */
905static void iommuAmdIotlbEntryMoveToMru(PIOMMU pThis, PIOTLBE pIotlbe)
906{
907 if (!RTListNodeIsLast(&pThis->LstLruIotlbe, &pIotlbe->NdLru))
908 {
909 RTListNodeRemove(&pIotlbe->NdLru);
910 RTListAppend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
911 }
912}
913
914
915# ifdef IN_RING3
916/**
917 * Dumps the IOTLB entry via the debug info helper.
918 *
919 * @returns VINF_SUCCESS.
920 * @param pNode Pointer to an IOTLBE.
921 * @param pvUser Pointer to an IOTLBEINFOARG.
922 */
923static DECLCALLBACK(int) iommuAmdR3IotlbEntryInfo(PAVLU64NODECORE pNode, void *pvUser)
924{
925 /* Validate. */
926 PCIOTLBEINFOARG pArgs = (PCIOTLBEINFOARG)pvUser;
927 AssertPtr(pArgs);
928 AssertPtr(pArgs->pIommu);
929 AssertPtr(pArgs->pHlp);
930 Assert(pArgs->pIommu->u32Magic == IOMMU_MAGIC);
931
932 uint16_t const uDomainId = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
933 if (uDomainId == pArgs->uDomainId)
934 {
935 PCIOTLBE pIotlbe = (PCIOTLBE)pNode;
936 AVLU64KEY const uKey = pIotlbe->Core.Key;
937 uint64_t const uIova = IOMMU_IOTLB_KEY_GET_IOVA(uKey);
938 RTGCPHYS const GCPhysSpa = pIotlbe->PageLookup.GCPhysSpa;
939 uint8_t const cShift = pIotlbe->PageLookup.cShift;
940 size_t const cbPage = RT_BIT_64(cShift);
941 uint8_t const fPerm = pIotlbe->PageLookup.fPerm;
942 const char *pszPerm = iommuAmdMemAccessGetPermName(fPerm);
943 bool const fEvictPending = pIotlbe->fEvictPending;
944
945 PCDBGFINFOHLP pHlp = pArgs->pHlp;
946 pHlp->pfnPrintf(pHlp, " Key = %#RX64 (%#RX64)\n", uKey, uIova);
947 pHlp->pfnPrintf(pHlp, " GCPhys = %#RGp\n", GCPhysSpa);
948 pHlp->pfnPrintf(pHlp, " cShift = %u (%zu bytes)\n", cShift, cbPage);
949 pHlp->pfnPrintf(pHlp, " fPerm = %#x (%s)\n", fPerm, pszPerm);
950 pHlp->pfnPrintf(pHlp, " fEvictPending = %RTbool\n", fEvictPending);
951 }
952
953 return VINF_SUCCESS;
954}
955# endif /* IN_RING3 */
956
957
958/**
959 * Removes the IOTLB entry if it's associated with the specified domain ID.
960 *
961 * @returns VINF_SUCCESS.
962 * @param pNode Pointer to an IOTLBE.
963 * @param pvUser Pointer to an IOTLBEFLUSHARG containing the domain ID.
964 */
965static DECLCALLBACK(int) iommuAmdIotlbEntryRemoveDomainId(PAVLU64NODECORE pNode, void *pvUser)
966{
967 /* Validate. */
968 PCIOTLBEFLUSHARG pArgs = (PCIOTLBEFLUSHARG)pvUser;
969 AssertPtr(pArgs);
970 AssertPtr(pArgs->pIommu);
971 Assert(pArgs->pIommu->u32Magic == IOMMU_MAGIC);
972
973 uint16_t const uDomainId = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
974 if (uDomainId == pArgs->uDomainId)
975 {
976 /* Mark this entry is as invalidated and needs to be evicted later. */
977 PIOTLBE pIotlbe = (PIOTLBE)pNode;
978 pIotlbe->fEvictPending = true;
979 iommuAmdIotlbEntryMoveToLru(pArgs->pIommu, (PIOTLBE)pNode);
980 }
981 return VINF_SUCCESS;
982}
983
984
985/**
986 * Inserts an IOTLB entry into the cache.
987 *
988 * @param pThis The IOMMU device state.
989 * @param pIotlbe The IOTLB entry to initialize and insert.
990 * @param uDomainId The domain ID.
991 * @param uIova The I/O virtual address.
992 * @param pPageLookup The I/O page lookup result of the access.
993 */
994static void iommuAmdIotlbEntryInsert(PIOMMU pThis, PIOTLBE pIotlbe, uint16_t uDomainId, uint64_t uIova,
995 PCIOPAGELOOKUP pPageLookup)
996{
997 /* Initialize the IOTLB entry with results of the I/O page walk. */
998 pIotlbe->Core.Key = IOMMU_IOTLB_KEY_MAKE(uDomainId, uIova);
999 pIotlbe->PageLookup = *pPageLookup;
1000
1001 /* Validate. */
1002 Assert(pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL);
1003 Assert(!pIotlbe->fEvictPending);
1004
1005 /* Check if the entry already exists. */
1006 PIOTLBE pFound = (PIOTLBE)RTAvlU64Get(&pThis->TreeIotlbe, pIotlbe->Core.Key);
1007 if (!pFound)
1008 {
1009 /* Insert the entry into the cache. */
1010 bool const fInserted = RTAvlU64Insert(&pThis->TreeIotlbe, &pIotlbe->Core);
1011 Assert(fInserted); NOREF(fInserted);
1012 Assert(pThis->cCachedIotlbes < IOMMU_IOTLBE_MAX);
1013 ++pThis->cCachedIotlbes;
1014 STAM_COUNTER_INC(&pThis->StatIotlbeCached);
1015 }
1016 else
1017 {
1018 /* Update the existing entry. */
1019 if (pFound->fEvictPending)
1020 {
1021 pFound->fEvictPending = false;
1022 STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse);
1023 }
1024 Assert(pFound->PageLookup.cShift == pPageLookup->cShift);
1025 pFound->PageLookup.fPerm = pPageLookup->fPerm;
1026 pFound->PageLookup.GCPhysSpa = pPageLookup->GCPhysSpa;
1027 }
1028}
1029
1030
1031/**
1032 * Removes an IOTLB entry from the cache for the given key.
1033 *
1034 * @returns Pointer to the removed IOTLB entry, NULL if the entry wasn't found in
1035 * the tree.
1036 * @param pThis The IOMMU device state.
1037 * @param uKey The key of the IOTLB entry to remove.
1038 */
1039static PIOTLBE iommuAmdIotlbEntryRemove(PIOMMU pThis, AVLU64KEY uKey)
1040{
1041 PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Remove(&pThis->TreeIotlbe, uKey);
1042 if (pIotlbe)
1043 {
1044 if (pIotlbe->fEvictPending)
1045 STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse);
1046
1047 RT_ZERO(pIotlbe->Core);
1048 RT_ZERO(pIotlbe->PageLookup);
1049 /* We must not erase the LRU node connections here! */
1050 pIotlbe->fEvictPending = false;
1051 Assert(pIotlbe->Core.Key == IOMMU_IOTLB_KEY_NIL);
1052
1053 Assert(pThis->cCachedIotlbes > 0);
1054 --pThis->cCachedIotlbes;
1055 STAM_COUNTER_DEC(&pThis->StatIotlbeCached);
1056 }
1057 return pIotlbe;
1058}
1059
1060
1061/**
1062 * Looks up an IOTLB from the cache.
1063 *
1064 * @returns Pointer to IOTLB entry if found, NULL otherwise.
1065 * @param pThis The IOMMU device state.
1066 * @param uDomainId The domain ID.
1067 * @param uIova The I/O virtual address.
1068 */
1069static PIOTLBE iommuAmdIotlbLookup(PIOMMU pThis, uint64_t uDomainId, uint64_t uIova)
1070{
1071 uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(uDomainId, uIova);
1072 PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Get(&pThis->TreeIotlbe, uKey);
1073 if ( pIotlbe
1074 && !pIotlbe->fEvictPending)
1075 return pIotlbe;
1076
1077 /*
1078 * Domain Id wildcard invalidations only marks entries for eviction later but doesn't remove
1079 * them from the cache immediately. We found an entry pending eviction, just return that
1080 * nothing was found (rather than evicting now).
1081 */
1082 return NULL;
1083}
1084
1085
1086/**
1087 * Adds an IOTLB entry to the cache.
1088 *
1089 * @param pThis The IOMMU device state.
1090 * @param uDomainId The domain ID.
1091 * @param uIova The I/O virtual address.
1092 * @param pPageLookup The I/O page lookup result of the access.
1093 */
1094static void iommuAmdIotlbAdd(PIOMMU pThis, uint16_t uDomainId, uint64_t uIova, PCIOPAGELOOKUP pPageLookup)
1095{
1096 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
1097 Assert(pPageLookup);
1098 Assert(pPageLookup->cShift <= 31);
1099 Assert(pPageLookup->fPerm != IOMMU_IO_PERM_NONE);
1100
1101 /*
1102 * If there are no unused IOTLB entries, evict the LRU entry.
1103 * Otherwise, get a new IOTLB entry from the pre-allocated list.
1104 */
1105 if (pThis->idxUnusedIotlbe == IOMMU_IOTLBE_MAX)
1106 {
1107 /* Grab the least recently used entry. */
1108 PIOTLBE pIotlbe = RTListGetFirst(&pThis->LstLruIotlbe, IOTLBE, NdLru);
1109 Assert(pIotlbe);
1110
1111 /* If the entry is in the cache, remove it. */
1112 if (pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL)
1113 iommuAmdIotlbEntryRemove(pThis, pIotlbe->Core.Key);
1114
1115 /* Initialize and insert the IOTLB entry into the cache. */
1116 iommuAmdIotlbEntryInsert(pThis, pIotlbe, uDomainId, uIova, pPageLookup);
1117
1118 /* Move the entry to the most recently used slot. */
1119 iommuAmdIotlbEntryMoveToMru(pThis, pIotlbe);
1120 }
1121 else
1122 {
1123 /* Grab an unused IOTLB entry from the pre-allocated list. */
1124 PIOTLBE pIotlbe = &pThis->paIotlbes[pThis->idxUnusedIotlbe];
1125 ++pThis->idxUnusedIotlbe;
1126
1127 /* Initialize and insert the IOTLB entry into the cache. */
1128 iommuAmdIotlbEntryInsert(pThis, pIotlbe, uDomainId, uIova, pPageLookup);
1129
1130 /* Add the entry to the most recently used slot. */
1131 RTListAppend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
1132 }
1133}
1134
1135
1136/**
1137 * Removes all IOTLB entries from the cache.
1138 *
1139 * @param pDevIns The IOMMU instance data.
1140 */
1141static void iommuAmdIotlbRemoveAll(PPDMDEVINS pDevIns)
1142{
1143 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1144 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1145
1146 if (pThis->cCachedIotlbes > 0)
1147 {
1148 pThis->idxUnusedIotlbe = 0;
1149 size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX;
1150 RT_BZERO(pThis->paIotlbes, cbIotlbes);
1151 pThis->cCachedIotlbes = 0;
1152 STAM_COUNTER_RESET(&pThis->StatIotlbeCached);
1153 RTListInit(&pThis->LstLruIotlbe);
1154 }
1155
1156 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1157}
1158
1159
1160/**
1161 * Removes IOTLB entries for the range of I/O virtual addresses and the specified
1162 * domain ID from the cache.
1163 *
1164 * @param pDevIns The IOMMU instance data.
1165 * @param uDomainId The domain ID.
1166 * @param uIova The I/O virtual address to invalidate.
1167 * @param cbInvalidate The size of the invalidation (must be 4K aligned).
1168 */
1169static void iommuAmdIotlbRemoveRange(PPDMDEVINS pDevIns, uint16_t uDomainId, uint64_t uIova, size_t cbInvalidate)
1170{
1171 /* Validate. */
1172 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
1173 Assert(!(cbInvalidate & X86_PAGE_4K_OFFSET_MASK));
1174 Assert(cbInvalidate >= X86_PAGE_4K_SIZE);
1175
1176 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1177 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1178
1179 do
1180 {
1181 uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(uDomainId, uIova);
1182 PIOTLBE pIotlbe = iommuAmdIotlbEntryRemove(pThis, uKey);
1183 if (pIotlbe)
1184 iommuAmdIotlbEntryMoveToLru(pThis, pIotlbe);
1185 uIova += X86_PAGE_4K_SIZE;
1186 cbInvalidate -= X86_PAGE_4K_SIZE;
1187 } while (cbInvalidate > 0);
1188
1189 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1190}
1191
1192
1193/**
1194 * Removes all IOTLB entries for the specified domain ID.
1195 *
1196 * @param pDevIns The IOMMU instance data.
1197 * @param uDomainId The domain ID.
1198 */
1199static void iommuAmdIotlbRemoveDomainId(PPDMDEVINS pDevIns, uint16_t uDomainId)
1200{
1201 /*
1202 * We need to iterate the tree and search based on the domain ID.
1203 * But it seems we cannot remove items while iterating the tree.
1204 * Thus, we simply mark entries for eviction later but move them to the LRU
1205 * so they will eventually get evicted and re-cycled as the cache gets re-populated.
1206 */
1207 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1208 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1209
1210 IOTLBEFLUSHARG Args;
1211 Args.pIommu = pThis;
1212 Args.uDomainId = uDomainId;
1213 RTAvlU64DoWithAll(&pThis->TreeIotlbe, true /* fFromLeft */, iommuAmdIotlbEntryRemoveDomainId, &Args);
1214
1215 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1216}
1217
1218
1219/**
1220 * Adds or updates IOTLB entries for the given range of I/O virtual addresses.
1221 *
1222 * @param pDevIns The IOMMU instance data.
1223 * @param uDomainId The domain ID.
1224 * @param uIova The I/O virtual address.
1225 * @param cbAccess The size of the access (must be 4K aligned).
1226 * @param GCPhysSpa The translated system-physical address.
1227 * @param fPerm The I/O permissions for the access, see IOMMU_IO_PERM_XXX.
1228 */
1229static void iommuAmdIotlbAddRange(PPDMDEVINS pDevIns, uint16_t uDomainId, uint64_t uIova, size_t cbAccess, RTGCPHYS GCPhysSpa,
1230 uint8_t fPerm)
1231{
1232 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
1233 Assert(!(GCPhysSpa & X86_PAGE_4K_OFFSET_MASK));
1234 Assert(!(cbAccess & X86_PAGE_4K_OFFSET_MASK));
1235 Assert(cbAccess >= X86_PAGE_4K_SIZE);
1236
1237 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1238
1239 /* Add IOTLB entries for every page in the access. */
1240 IOPAGELOOKUP PageLookup;
1241 RT_ZERO(PageLookup);
1242 PageLookup.cShift = X86_PAGE_4K_SHIFT;
1243 PageLookup.fPerm = fPerm;
1244 PageLookup.GCPhysSpa = GCPhysSpa;
1245
1246 size_t cPages = cbAccess / X86_PAGE_4K_SIZE;
1247 cPages = RT_MIN(cPages, IOMMU_IOTLBE_MAX);
1248
1249 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1250 /** @todo Re-check DTE cache? */
1251 do
1252 {
1253 iommuAmdIotlbAdd(pThis, uDomainId, uIova, &PageLookup);
1254 uIova += X86_PAGE_4K_SIZE;
1255 PageLookup.GCPhysSpa += X86_PAGE_4K_SIZE;
1256 --cPages;
1257 } while (cPages > 0);
1258 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1259}
1260#endif /* IOMMU_WITH_IOTLBE_CACHE */
1261
1262
1263#ifdef IOMMU_WITH_IRTE_CACHE
1264/**
1265 * Looks up an IRTE cache entry.
1266 *
1267 * @returns Index of the found entry, or cache capacity if not found.
1268 * @param pThis The IOMMU device state.
1269 * @param uDevId The device ID (bus, device, function).
1270 * @param offIrte The offset into the interrupt remap table.
1271 */
1272static uint16_t iommuAmdIrteCacheEntryLookup(PCIOMMU pThis, uint16_t uDevId, uint16_t offIrte)
1273{
1274 /** @todo Consider sorting and binary search when the cache capacity grows.
1275 * For the IRTE cache this should be okay since typically guests do not alter the
1276 * interrupt remapping once programmed, so hopefully sorting shouldn't happen
1277 * often. */
1278 uint32_t const uKey = IOMMU_IRTE_CACHE_KEY_MAKE(uDevId, offIrte);
1279 uint16_t const cIrteCache = pThis->cIrteCache;
1280 for (uint16_t i = 0; i < cIrteCache; i++)
1281 {
1282 PCIRTECACHE pIrteCache = &pThis->paIrteCache[i];
1283 if (pIrteCache->uKey == uKey)
1284 return i;
1285 }
1286 return cIrteCache;
1287}
1288
1289
1290/**
1291 * Gets an free/unused IRTE cache entry.
1292 *
1293 * @returns The index of an unused entry, or cache capacity if the cache is full.
1294 * @param pThis The IOMMU device state.
1295 */
1296static uint16_t iommuAmdIrteCacheEntryGetUnused(PCIOMMU pThis)
1297{
1298 uint16_t const cIrteCache = pThis->cIrteCache;
1299 for (uint16_t i = 0; i < cIrteCache; i++)
1300 {
1301 PCIRTECACHE pIrteCache = &pThis->paIrteCache[i];
1302 if (pIrteCache->uKey == IOMMU_IRTE_CACHE_KEY_NIL)
1303 {
1304 Assert(!pIrteCache->Irte.u32);
1305 return i;
1306 }
1307 }
1308 return cIrteCache;
1309}
1310
1311
1312/**
1313 * Looks up the IRTE cache for the given MSI.
1314 *
1315 * @returns VBox status code.
1316 * @param pDevIns The IOMMU instance data.
1317 * @param uDevId The device ID (bus, device, function).
1318 * @param enmOp The IOMMU operation being performed.
1319 * @param pMsiIn The source MSI.
1320 * @param pMsiOut Where to store the remapped MSI.
1321 */
1322static int iommuAmdIrteCacheLookup(PPDMDEVINS pDevIns, uint16_t uDevId, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
1323{
1324 RT_NOREF(enmOp); /* May need it if we have to report errors (currently we fallback to the slower path to do that). */
1325
1326 int rc = VERR_NOT_FOUND;
1327 /* Deal with such cases in the slower/fallback path. */
1328 if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE)
1329 { /* likely */ }
1330 else
1331 return rc;
1332
1333 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1334 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1335
1336 PCDTECACHE pDteCache = &pThis->paDteCache[uDevId];
1337 if ((pDteCache->fFlags & (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_INTR_MAP_VALID))
1338 == (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_INTR_MAP_VALID))
1339 {
1340 Assert((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE); /* Paranoia. */
1341
1342 /* Currently, we only cache remapping of fixed and arbitrated interrupts. */
1343 uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
1344 if (u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
1345 {
1346 uint8_t const uIntrCtrl = (pDteCache->fFlags >> IOMMU_DTECACHE_F_INTR_CTRL_SHIFT)
1347 & IOMMU_DTECACHE_F_INTR_CTRL_MASK;
1348 if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
1349 {
1350 /* Interrupt table length has been verified prior to adding entries to the cache. */
1351 uint16_t const offIrte = IOMMU_GET_IRTE_OFF(pMsiIn->Data.u32);
1352 uint16_t const idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, uDevId, offIrte);
1353 if (idxIrteCache < pThis->cIrteCache)
1354 {
1355 PCIRTE_T pIrte = &pThis->paIrteCache[idxIrteCache].Irte;
1356 iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, pIrte);
1357 rc = VINF_SUCCESS;
1358 }
1359 }
1360 else if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
1361 {
1362 *pMsiOut = *pMsiIn;
1363 rc = VINF_SUCCESS;
1364 }
1365 }
1366 }
1367 else if (pDteCache->fFlags & IOMMU_DTECACHE_F_PRESENT)
1368 {
1369 *pMsiOut = *pMsiIn;
1370 rc = VINF_SUCCESS;
1371 }
1372
1373 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1374 return rc;
1375}
1376
1377
1378/**
1379 * Adds or updates the IRTE cache for the given IRTE.
1380 *
1381 * @returns VBox status code.
1382 * @retval VERR_OUT_OF_RESOURCES if the cache is full.
1383 *
1384 * @param pDevIns The IOMMU instance data.
1385 * @param uDevId The device ID (bus, device, function).
1386 * @param offIrte The offset into the interrupt remap table.
1387 * @param pIrte The IRTE to cache.
1388 */
1389static int iommuAmdIrteCacheAdd(PPDMDEVINS pDevIns, uint16_t uDevId, uint16_t offIrte, PCIRTE_T pIrte)
1390{
1391 Assert(offIrte != 0xffff); /* Shouldn't be a valid IRTE table offset since sizeof(IRTE) is a multiple of 4. */
1392
1393 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1394 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1395
1396 /* Find an existing entry or get an unused slot. */
1397 uint16_t const cIrteCache = pThis->cIrteCache;
1398 uint16_t idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, uDevId, offIrte);
1399 if (idxIrteCache == pThis->cIrteCache)
1400 idxIrteCache = iommuAmdIrteCacheEntryGetUnused(pThis);
1401
1402 /* Update the cache entry. */
1403 int rc;
1404 if (idxIrteCache < cIrteCache)
1405 {
1406 PIRTECACHE pIrteCache = &pThis->paIrteCache[idxIrteCache];
1407 pIrteCache->uKey = IOMMU_IRTE_CACHE_KEY_MAKE(uDevId, offIrte);
1408 pIrteCache->Irte.u32 = pIrte->u32;
1409 rc = VINF_SUCCESS;
1410 }
1411 else
1412 rc = VERR_OUT_OF_RESOURCES;
1413
1414 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1415 return rc;
1416}
1417
1418
1419/**
1420 * Removes IRTE cache entries for the given device ID.
1421 *
1422 * @param pDevIns The IOMMU instance data.
1423 * @param uDevId The device ID (bus, device, function).
1424 */
1425static void iommuAmdIrteCacheRemove(PPDMDEVINS pDevIns, uint16_t uDevId)
1426{
1427 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1428 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1429 uint16_t const cIrteCache = pThis->cIrteCache;
1430 for (uint16_t i = 0; i < cIrteCache; i++)
1431 {
1432 PIRTECACHE pIrteCache = &pThis->paIrteCache[i];
1433 if (uDevId == IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(pIrteCache->uKey))
1434 {
1435 pIrteCache->uKey = IOMMU_IRTE_CACHE_KEY_NIL;
1436 pIrteCache->Irte.u32 = 0;
1437 }
1438 }
1439 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1440}
1441
1442
1443/**
1444 * Removes all IRTE cache entries.
1445 *
1446 * @param pDevIns The IOMMU instance data.
1447 */
1448static void iommuAmdIrteCacheRemoveAll(PPDMDEVINS pDevIns)
1449{
1450 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1451 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1452 uint16_t const cIrteCache = pThis->cIrteCache;
1453 for (uint16_t i = 0; i < cIrteCache; i++)
1454 {
1455 PIRTECACHE pIrteCache = &pThis->paIrteCache[i];
1456 pIrteCache->uKey = IOMMU_IRTE_CACHE_KEY_NIL;
1457 pIrteCache->Irte.u32 = 0;
1458 }
1459 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1460}
1461#endif /* IOMMU_WITH_IRTE_CACHE */
1462
1463
1464#ifdef IOMMU_WITH_DTE_CACHE
1465/**
1466 * Updates the I/O device flags for the given device ID.
1467 *
1468 * @param pDevIns The IOMMU instance data.
1469 * @param uDevId The device ID (bus, device, function).
1470 * @param pDte The device table entry. Can be NULL only when @a fFlags is
1471 * 0.
1472 * @param fOrMask The device flags (usually compound flags) to OR in with the
1473 * basic flags, see IOMMU_DTECACHE_F_XXX. Pass 0 to flush the DTE
1474 * from the cache.
1475 */
1476static void iommuAmdDteCacheUpdate(PPDMDEVINS pDevIns, uint16_t uDevId, PCDTE_T pDte, uint16_t fOrMask)
1477{
1478 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1479 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1480
1481 if (fOrMask & IOMMU_DTECACHE_F_PRESENT)
1482 {
1483 Assert(pDte);
1484 pThis->paDteCache[uDevId].fFlags = iommuAmdGetBasicDevFlags(pDte) | fOrMask;
1485 pThis->paDteCache[uDevId].uDomainId = pDte->n.u16DomainId;
1486 }
1487 else
1488 {
1489 pThis->paDteCache[uDevId].fFlags = 0;
1490 pThis->paDteCache[uDevId].uDomainId = 0;
1491 }
1492
1493 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1494}
1495
1496
1497/**
1498 * Sets one or more I/O device flags if the device is present in the cache.
1499 *
1500 * @param pDevIns The IOMMU instance data.
1501 * @param uDevId The device ID (bus, device, function).
1502 * @param fDevIoFlags The device flags to set.
1503 */
1504static void iommuAmdDteCacheSetFlags(PPDMDEVINS pDevIns, uint16_t uDevId, uint16_t fDevIoFlags)
1505{
1506 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1507 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1508
1509 if (fDevIoFlags & IOMMU_DTECACHE_F_PRESENT)
1510 pThis->paDteCache[uDevId].fFlags |= fDevIoFlags;
1511
1512 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1513}
1514
1515
1516/**
1517 * Removes all entries in the device table entry cache.
1518 *
1519 * @param pDevIns The IOMMU instance data.
1520 */
1521static void iommuAmdDteCacheRemoveAll(PPDMDEVINS pDevIns)
1522{
1523 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1524 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1525
1526 size_t const cbDteCache = sizeof(DTECACHE) * IOMMU_DTE_CACHE_MAX;
1527 RT_BZERO(pThis->paDteCache, cbDteCache);
1528
1529 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1530}
1531#endif /* IOMMU_WITH_DTE_CACHE */
1532
1533
1534/**
1535 * Atomically reads the control register without locking the IOMMU device.
1536 *
1537 * @returns The control register.
1538 * @param pThis The IOMMU device state.
1539 */
1540DECL_FORCE_INLINE(IOMMU_CTRL_T) iommuAmdGetCtrlUnlocked(PCIOMMU pThis)
1541{
1542 IOMMU_CTRL_T Ctrl;
1543 Ctrl.u64 = ASMAtomicReadU64((volatile uint64_t *)&pThis->Ctrl.u64);
1544 return Ctrl;
1545}
1546
1547
1548/**
1549 * Returns whether MSI is enabled for the IOMMU.
1550 *
1551 * @returns Whether MSI is enabled.
1552 * @param pDevIns The IOMMU device instance.
1553 *
1554 * @note There should be a PCIDevXxx function for this.
1555 */
1556static bool iommuAmdIsMsiEnabled(PPDMDEVINS pDevIns)
1557{
1558 MSI_CAP_HDR_T MsiCapHdr;
1559 MsiCapHdr.u32 = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_MSI_CAP_HDR);
1560 return MsiCapHdr.n.u1MsiEnable;
1561}
1562
1563
1564/**
1565 * Signals a PCI target abort.
1566 *
1567 * @param pDevIns The IOMMU device instance.
1568 */
1569static void iommuAmdSetPciTargetAbort(PPDMDEVINS pDevIns)
1570{
1571 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1572 uint16_t const u16Status = PDMPciDevGetStatus(pPciDev) | VBOX_PCI_STATUS_SIG_TARGET_ABORT;
1573 PDMPciDevSetStatus(pPciDev, u16Status);
1574}
1575
1576
1577/**
1578 * Wakes up the command thread if there are commands to be processed or if
1579 * processing is requested to be stopped by software.
1580 *
1581 * @param pDevIns The IOMMU device instance.
1582 *
1583 * @remarks The IOMMU lock must be held while calling this!
1584 */
1585static void iommuAmdCmdThreadWakeUpIfNeeded(PPDMDEVINS pDevIns)
1586{
1587 Log4Func(("\n"));
1588
1589 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1590 if (pThis->Status.n.u1CmdBufRunning)
1591 {
1592 Log4Func(("Signaling command thread\n"));
1593 PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread);
1594 }
1595}
1596
1597
1598/**
1599 * Reads the Device Table Base Address Register.
1600 */
1601static VBOXSTRICTRC iommuAmdDevTabBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1602{
1603 RT_NOREF(pDevIns, offReg);
1604 *pu64Value = pThis->aDevTabBaseAddrs[0].u64;
1605 return VINF_SUCCESS;
1606}
1607
1608
1609/**
1610 * Reads the Command Buffer Base Address Register.
1611 */
1612static VBOXSTRICTRC iommuAmdCmdBufBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1613{
1614 RT_NOREF(pDevIns, offReg);
1615 *pu64Value = pThis->CmdBufBaseAddr.u64;
1616 return VINF_SUCCESS;
1617}
1618
1619
1620/**
1621 * Reads the Event Log Base Address Register.
1622 */
1623static VBOXSTRICTRC iommuAmdEvtLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1624{
1625 RT_NOREF(pDevIns, offReg);
1626 *pu64Value = pThis->EvtLogBaseAddr.u64;
1627 return VINF_SUCCESS;
1628}
1629
1630
1631/**
1632 * Reads the Control Register.
1633 */
1634static VBOXSTRICTRC iommuAmdCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1635{
1636 RT_NOREF(pDevIns, offReg);
1637 *pu64Value = pThis->Ctrl.u64;
1638 return VINF_SUCCESS;
1639}
1640
1641
1642/**
1643 * Reads the Exclusion Range Base Address Register.
1644 */
1645static VBOXSTRICTRC iommuAmdExclRangeBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1646{
1647 RT_NOREF(pDevIns, offReg);
1648 *pu64Value = pThis->ExclRangeBaseAddr.u64;
1649 return VINF_SUCCESS;
1650}
1651
1652
1653/**
1654 * Reads to the Exclusion Range Limit Register.
1655 */
1656static VBOXSTRICTRC iommuAmdExclRangeLimit_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1657{
1658 RT_NOREF(pDevIns, offReg);
1659 *pu64Value = pThis->ExclRangeLimit.u64;
1660 return VINF_SUCCESS;
1661}
1662
1663
1664/**
1665 * Reads to the Extended Feature Register.
1666 */
1667static VBOXSTRICTRC iommuAmdExtFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1668{
1669 RT_NOREF(pDevIns, offReg);
1670 *pu64Value = pThis->ExtFeat.u64;
1671 return VINF_SUCCESS;
1672}
1673
1674
1675/**
1676 * Reads to the PPR Log Base Address Register.
1677 */
1678static VBOXSTRICTRC iommuAmdPprLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1679{
1680 RT_NOREF(pDevIns, offReg);
1681 *pu64Value = pThis->PprLogBaseAddr.u64;
1682 return VINF_SUCCESS;
1683}
1684
1685
1686/**
1687 * Writes the Hardware Event Register (Hi).
1688 */
1689static VBOXSTRICTRC iommuAmdHwEvtHi_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1690{
1691 RT_NOREF(pDevIns, offReg);
1692 *pu64Value = pThis->HwEvtHi.u64;
1693 return VINF_SUCCESS;
1694}
1695
1696
1697/**
1698 * Reads the Hardware Event Register (Lo).
1699 */
1700static VBOXSTRICTRC iommuAmdHwEvtLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1701{
1702 RT_NOREF(pDevIns, offReg);
1703 *pu64Value = pThis->HwEvtLo;
1704 return VINF_SUCCESS;
1705}
1706
1707
1708/**
1709 * Reads the Hardware Event Status Register.
1710 */
1711static VBOXSTRICTRC iommuAmdHwEvtStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1712{
1713 RT_NOREF(pDevIns, offReg);
1714 *pu64Value = pThis->HwEvtStatus.u64;
1715 return VINF_SUCCESS;
1716}
1717
1718
1719/**
1720 * Reads to the GA Log Base Address Register.
1721 */
1722static VBOXSTRICTRC iommuAmdGALogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1723{
1724 RT_NOREF(pDevIns, offReg);
1725 *pu64Value = pThis->GALogBaseAddr.u64;
1726 return VINF_SUCCESS;
1727}
1728
1729
1730/**
1731 * Reads to the PPR Log B Base Address Register.
1732 */
1733static VBOXSTRICTRC iommuAmdPprLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1734{
1735 RT_NOREF(pDevIns, offReg);
1736 *pu64Value = pThis->PprLogBBaseAddr.u64;
1737 return VINF_SUCCESS;
1738}
1739
1740
1741/**
1742 * Reads to the Event Log B Base Address Register.
1743 */
1744static VBOXSTRICTRC iommuAmdEvtLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1745{
1746 RT_NOREF(pDevIns, offReg);
1747 *pu64Value = pThis->EvtLogBBaseAddr.u64;
1748 return VINF_SUCCESS;
1749}
1750
1751
1752/**
1753 * Reads the Device Table Segment Base Address Register.
1754 */
1755static VBOXSTRICTRC iommuAmdDevTabSegBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1756{
1757 RT_NOREF(pDevIns);
1758
1759 /* Figure out which segment is being written. */
1760 uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3;
1761 uint8_t const idxSegment = offSegment + 1;
1762 Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
1763
1764 *pu64Value = pThis->aDevTabBaseAddrs[idxSegment].u64;
1765 return VINF_SUCCESS;
1766}
1767
1768
1769/**
1770 * Reads the Device Specific Feature Extension (DSFX) Register.
1771 */
1772static VBOXSTRICTRC iommuAmdDevSpecificFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1773{
1774 RT_NOREF(pDevIns, offReg);
1775 *pu64Value = pThis->DevSpecificFeat.u64;
1776 return VINF_SUCCESS;
1777}
1778
1779/**
1780 * Reads the Device Specific Control Extension (DSCX) Register.
1781 */
1782static VBOXSTRICTRC iommuAmdDevSpecificCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1783{
1784 RT_NOREF(pDevIns, offReg);
1785 *pu64Value = pThis->DevSpecificCtrl.u64;
1786 return VINF_SUCCESS;
1787}
1788
1789
1790/**
1791 * Reads the Device Specific Status Extension (DSSX) Register.
1792 */
1793static VBOXSTRICTRC iommuAmdDevSpecificStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1794{
1795 RT_NOREF(pDevIns, offReg);
1796 *pu64Value = pThis->DevSpecificStatus.u64;
1797 return VINF_SUCCESS;
1798}
1799
1800
1801/**
1802 * Reads the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit).
1803 */
1804static VBOXSTRICTRC iommuAmdDevMsiVector_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1805{
1806 RT_NOREF(pDevIns, offReg);
1807 uint32_t const uLo = pThis->MiscInfo.au32[0];
1808 uint32_t const uHi = pThis->MiscInfo.au32[1];
1809 *pu64Value = RT_MAKE_U64(uLo, uHi);
1810 return VINF_SUCCESS;
1811}
1812
1813
1814/**
1815 * Reads the MSI Capability Header Register (32-bit) and the MSI Address (Lo)
1816 * Register (32-bit).
1817 */
1818static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1819{
1820 RT_NOREF(pThis, offReg);
1821 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1822 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
1823 uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
1824 uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO);
1825 *pu64Value = RT_MAKE_U64(uLo, uHi);
1826 return VINF_SUCCESS;
1827}
1828
1829
1830/**
1831 * Reads the MSI Address (Hi) Register (32-bit) and the MSI data register (32-bit).
1832 */
1833static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1834{
1835 RT_NOREF(pThis, offReg);
1836 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1837 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
1838 uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI);
1839 uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA);
1840 *pu64Value = RT_MAKE_U64(uLo, uHi);
1841 return VINF_SUCCESS;
1842}
1843
1844
1845/**
1846 * Reads the Command Buffer Head Pointer Register.
1847 */
1848static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1849{
1850 RT_NOREF(pDevIns, offReg);
1851 *pu64Value = pThis->CmdBufHeadPtr.u64;
1852 return VINF_SUCCESS;
1853}
1854
1855
1856/**
1857 * Reads the Command Buffer Tail Pointer Register.
1858 */
1859static VBOXSTRICTRC iommuAmdCmdBufTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1860{
1861 RT_NOREF(pDevIns, offReg);
1862 *pu64Value = pThis->CmdBufTailPtr.u64;
1863 return VINF_SUCCESS;
1864}
1865
1866
1867/**
1868 * Reads the Event Log Head Pointer Register.
1869 */
1870static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1871{
1872 RT_NOREF(pDevIns, offReg);
1873 *pu64Value = pThis->EvtLogHeadPtr.u64;
1874 return VINF_SUCCESS;
1875}
1876
1877
1878/**
1879 * Reads the Event Log Tail Pointer Register.
1880 */
1881static VBOXSTRICTRC iommuAmdEvtLogTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1882{
1883 RT_NOREF(pDevIns, offReg);
1884 *pu64Value = pThis->EvtLogTailPtr.u64;
1885 return VINF_SUCCESS;
1886}
1887
1888
1889/**
1890 * Reads the Status Register.
1891 */
1892static VBOXSTRICTRC iommuAmdStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1893{
1894 RT_NOREF(pDevIns, offReg);
1895 *pu64Value = pThis->Status.u64;
1896 return VINF_SUCCESS;
1897}
1898
1899
1900/**
1901 * Writes the Device Table Base Address Register.
1902 */
1903static VBOXSTRICTRC iommuAmdDevTabBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1904{
1905 RT_NOREF(pDevIns, offReg);
1906
1907 /* Mask out all unrecognized bits. */
1908 u64Value &= IOMMU_DEV_TAB_BAR_VALID_MASK;
1909
1910 /* Update the register. */
1911 pThis->aDevTabBaseAddrs[0].u64 = u64Value;
1912
1913 /* Paranoia. */
1914 Assert(pThis->aDevTabBaseAddrs[0].n.u9Size <= g_auDevTabSegMaxSizes[0]);
1915 return VINF_SUCCESS;
1916}
1917
1918
1919/**
1920 * Writes the Command Buffer Base Address Register.
1921 */
1922static VBOXSTRICTRC iommuAmdCmdBufBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1923{
1924 RT_NOREF(pDevIns, offReg);
1925
1926 /*
1927 * While this is not explicitly specified like the event log base address register,
1928 * the AMD IOMMU spec. does specify "CmdBufRun must be 0b to modify the command buffer registers properly".
1929 * Inconsistent specs :/
1930 */
1931 if (pThis->Status.n.u1CmdBufRunning)
1932 {
1933 LogFunc(("Setting CmdBufBar (%#RX64) when command buffer is running -> Ignored\n", u64Value));
1934 return VINF_SUCCESS;
1935 }
1936
1937 /* Mask out all unrecognized bits. */
1938 CMD_BUF_BAR_T CmdBufBaseAddr;
1939 CmdBufBaseAddr.u64 = u64Value & IOMMU_CMD_BUF_BAR_VALID_MASK;
1940
1941 /* Validate the length. */
1942 if (CmdBufBaseAddr.n.u4Len >= 8)
1943 {
1944 /* Update the register. */
1945 pThis->CmdBufBaseAddr.u64 = CmdBufBaseAddr.u64;
1946
1947 /*
1948 * Writing the command buffer base address, clears the command buffer head and tail pointers.
1949 * See AMD IOMMU spec. 2.4 "Commands".
1950 */
1951 pThis->CmdBufHeadPtr.u64 = 0;
1952 pThis->CmdBufTailPtr.u64 = 0;
1953 }
1954 else
1955 LogFunc(("Command buffer length (%#x) invalid -> Ignored\n", CmdBufBaseAddr.n.u4Len));
1956
1957 return VINF_SUCCESS;
1958}
1959
1960
1961/**
1962 * Writes the Event Log Base Address Register.
1963 */
1964static VBOXSTRICTRC iommuAmdEvtLogBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1965{
1966 RT_NOREF(pDevIns, offReg);
1967
1968 /*
1969 * IOMMU behavior is undefined when software writes this register when event logging is running.
1970 * In our emulation, we ignore the write entirely.
1971 * See AMD IOMMU spec. "Event Log Base Address Register".
1972 */
1973 if (pThis->Status.n.u1EvtLogRunning)
1974 {
1975 LogFunc(("Setting EvtLogBar (%#RX64) when event logging is running -> Ignored\n", u64Value));
1976 return VINF_SUCCESS;
1977 }
1978
1979 /* Mask out all unrecognized bits. */
1980 u64Value &= IOMMU_EVT_LOG_BAR_VALID_MASK;
1981 EVT_LOG_BAR_T EvtLogBaseAddr;
1982 EvtLogBaseAddr.u64 = u64Value;
1983
1984 /* Validate the length. */
1985 if (EvtLogBaseAddr.n.u4Len >= 8)
1986 {
1987 /* Update the register. */
1988 pThis->EvtLogBaseAddr.u64 = EvtLogBaseAddr.u64;
1989
1990 /*
1991 * Writing the event log base address, clears the event log head and tail pointers.
1992 * See AMD IOMMU spec. 2.5 "Event Logging".
1993 */
1994 pThis->EvtLogHeadPtr.u64 = 0;
1995 pThis->EvtLogTailPtr.u64 = 0;
1996 }
1997 else
1998 LogFunc(("Event log length (%#x) invalid -> Ignored\n", EvtLogBaseAddr.n.u4Len));
1999
2000 return VINF_SUCCESS;
2001}
2002
2003
2004/**
2005 * Writes the Control Register.
2006 */
2007static VBOXSTRICTRC iommuAmdCtrl_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2008{
2009 RT_NOREF(pDevIns, offReg);
2010
2011 /* Mask out all unrecognized bits. */
2012 u64Value &= IOMMU_CTRL_VALID_MASK;
2013 IOMMU_CTRL_T NewCtrl;
2014 NewCtrl.u64 = u64Value;
2015
2016 /* Ensure the device table segments are within limits. */
2017 if (NewCtrl.n.u3DevTabSegEn <= pThis->ExtFeat.n.u2DevTabSegSup)
2018 {
2019 IOMMU_CTRL_T const OldCtrl = pThis->Ctrl;
2020
2021 /* Update the register. */
2022 ASMAtomicWriteU64(&pThis->Ctrl.u64, NewCtrl.u64);
2023
2024 bool const fNewIommuEn = NewCtrl.n.u1IommuEn;
2025 bool const fOldIommuEn = OldCtrl.n.u1IommuEn;
2026
2027 /* Enable or disable event logging when the bit transitions. */
2028 bool const fOldEvtLogEn = OldCtrl.n.u1EvtLogEn;
2029 bool const fNewEvtLogEn = NewCtrl.n.u1EvtLogEn;
2030 if ( fOldEvtLogEn != fNewEvtLogEn
2031 || fOldIommuEn != fNewIommuEn)
2032 {
2033 if ( fNewIommuEn
2034 && fNewEvtLogEn)
2035 {
2036 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_OVERFLOW);
2037 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_RUNNING);
2038 }
2039 else
2040 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_RUNNING);
2041 }
2042
2043 /* Enable or disable command buffer processing when the bit transitions. */
2044 bool const fOldCmdBufEn = OldCtrl.n.u1CmdBufEn;
2045 bool const fNewCmdBufEn = NewCtrl.n.u1CmdBufEn;
2046 if ( fOldCmdBufEn != fNewCmdBufEn
2047 || fOldIommuEn != fNewIommuEn)
2048 {
2049 if ( fNewCmdBufEn
2050 && fNewIommuEn)
2051 {
2052 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_CMD_BUF_RUNNING);
2053 LogFunc(("Command buffer enabled\n"));
2054
2055 /* Wake up the command thread to start processing commands. */
2056 iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
2057 }
2058 else
2059 {
2060 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
2061 LogFunc(("Command buffer disabled\n"));
2062 }
2063 }
2064 }
2065 else
2066 {
2067 LogFunc(("Invalid number of device table segments enabled, exceeds %#x (%#RX64) -> Ignored!\n",
2068 pThis->ExtFeat.n.u2DevTabSegSup, NewCtrl.u64));
2069 }
2070
2071 return VINF_SUCCESS;
2072}
2073
2074
2075/**
2076 * Writes to the Exclusion Range Base Address Register.
2077 */
2078static VBOXSTRICTRC iommuAmdExclRangeBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2079{
2080 RT_NOREF(pDevIns, offReg);
2081 pThis->ExclRangeBaseAddr.u64 = u64Value & IOMMU_EXCL_RANGE_BAR_VALID_MASK;
2082 return VINF_SUCCESS;
2083}
2084
2085
2086/**
2087 * Writes to the Exclusion Range Limit Register.
2088 */
2089static VBOXSTRICTRC iommuAmdExclRangeLimit_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2090{
2091 RT_NOREF(pDevIns, offReg);
2092 u64Value &= IOMMU_EXCL_RANGE_LIMIT_VALID_MASK;
2093 u64Value |= UINT64_C(0xfff);
2094 pThis->ExclRangeLimit.u64 = u64Value;
2095 return VINF_SUCCESS;
2096}
2097
2098
2099/**
2100 * Writes the Hardware Event Register (Hi).
2101 */
2102static VBOXSTRICTRC iommuAmdHwEvtHi_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2103{
2104 /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */
2105 RT_NOREF(pDevIns, offReg);
2106 LogFlowFunc(("Writing %#RX64 to hardware event (Hi) register!\n", u64Value));
2107 pThis->HwEvtHi.u64 = u64Value;
2108 return VINF_SUCCESS;
2109}
2110
2111
2112/**
2113 * Writes the Hardware Event Register (Lo).
2114 */
2115static VBOXSTRICTRC iommuAmdHwEvtLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2116{
2117 /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */
2118 RT_NOREF(pDevIns, offReg);
2119 LogFlowFunc(("Writing %#RX64 to hardware event (Lo) register!\n", u64Value));
2120 pThis->HwEvtLo = u64Value;
2121 return VINF_SUCCESS;
2122}
2123
2124
2125/**
2126 * Writes the Hardware Event Status Register.
2127 */
2128static VBOXSTRICTRC iommuAmdHwEvtStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2129{
2130 RT_NOREF(pDevIns, offReg);
2131
2132 /* Mask out all unrecognized bits. */
2133 u64Value &= IOMMU_HW_EVT_STATUS_VALID_MASK;
2134
2135 /*
2136 * The two bits (HEO and HEV) are RW1C (Read/Write 1-to-Clear; writing 0 has no effect).
2137 * If the current status bits or the bits being written are both 0, we've nothing to do.
2138 * The Overflow bit (bit 1) is only valid when the Valid bit (bit 0) is 1.
2139 */
2140 uint64_t HwStatus = pThis->HwEvtStatus.u64;
2141 if (!(HwStatus & RT_BIT(0)))
2142 return VINF_SUCCESS;
2143 if (u64Value & HwStatus & RT_BIT_64(0))
2144 HwStatus &= ~RT_BIT_64(0);
2145 if (u64Value & HwStatus & RT_BIT_64(1))
2146 HwStatus &= ~RT_BIT_64(1);
2147
2148 /* Update the register. */
2149 pThis->HwEvtStatus.u64 = HwStatus;
2150 return VINF_SUCCESS;
2151}
2152
2153
2154/**
2155 * Writes the Device Table Segment Base Address Register.
2156 */
2157static VBOXSTRICTRC iommuAmdDevTabSegBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2158{
2159 RT_NOREF(pDevIns);
2160
2161 /* Figure out which segment is being written. */
2162 uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3;
2163 uint8_t const idxSegment = offSegment + 1;
2164 Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
2165
2166 /* Mask out all unrecognized bits. */
2167 u64Value &= IOMMU_DEV_TAB_SEG_BAR_VALID_MASK;
2168 DEV_TAB_BAR_T DevTabSegBar;
2169 DevTabSegBar.u64 = u64Value;
2170
2171 /* Validate the size. */
2172 uint16_t const uSegSize = DevTabSegBar.n.u9Size;
2173 uint16_t const uMaxSegSize = g_auDevTabSegMaxSizes[idxSegment];
2174 if (uSegSize <= uMaxSegSize)
2175 {
2176 /* Update the register. */
2177 pThis->aDevTabBaseAddrs[idxSegment].u64 = u64Value;
2178 }
2179 else
2180 LogFunc(("Device table segment (%u) size invalid (%#RX32) -> Ignored\n", idxSegment, uSegSize));
2181
2182 return VINF_SUCCESS;
2183}
2184
2185
2186/**
2187 * Writes the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit).
2188 */
2189static VBOXSTRICTRC iommuAmdDevMsiVector_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2190{
2191 RT_NOREF(pDevIns, offReg);
2192
2193 /* MSI Vector Register 0 is read-only. */
2194 /* MSI Vector Register 1. */
2195 uint32_t const uReg = u64Value >> 32;
2196 pThis->MiscInfo.au32[1] = uReg & IOMMU_MSI_VECTOR_1_VALID_MASK;
2197 return VINF_SUCCESS;
2198}
2199
2200
2201/**
2202 * Writes the MSI Capability Header Register (32-bit) or the MSI Address (Lo)
2203 * Register (32-bit).
2204 */
2205static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2206{
2207 RT_NOREF(pThis, offReg);
2208 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
2209 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
2210
2211 /* MSI capability header. */
2212 {
2213 uint32_t const uReg = u64Value;
2214 MSI_CAP_HDR_T MsiCapHdr;
2215 MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
2216 MsiCapHdr.n.u1MsiEnable = RT_BOOL(uReg & IOMMU_MSI_CAP_HDR_MSI_EN_MASK);
2217 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR, MsiCapHdr.u32);
2218 }
2219
2220 /* MSI Address Lo. */
2221 {
2222 uint32_t const uReg = u64Value >> 32;
2223 uint32_t const uMsiAddrLo = uReg & VBOX_MSI_ADDR_VALID_MASK;
2224 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, uMsiAddrLo);
2225 }
2226
2227 return VINF_SUCCESS;
2228}
2229
2230
2231/**
2232 * Writes the MSI Address (Hi) Register (32-bit) or the MSI data register (32-bit).
2233 */
2234static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2235{
2236 RT_NOREF(pThis, offReg);
2237 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
2238 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
2239
2240 /* MSI Address Hi. */
2241 {
2242 uint32_t const uReg = u64Value;
2243 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, uReg);
2244 }
2245
2246 /* MSI Data. */
2247 {
2248 uint32_t const uReg = u64Value >> 32;
2249 uint32_t const uMsiData = uReg & VBOX_MSI_DATA_VALID_MASK;
2250 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, uMsiData);
2251 }
2252
2253 return VINF_SUCCESS;
2254}
2255
2256
2257/**
2258 * Writes the Command Buffer Head Pointer Register.
2259 */
2260static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2261{
2262 RT_NOREF(pDevIns, offReg);
2263
2264 /*
2265 * IOMMU behavior is undefined when software writes this register when the command buffer is running.
2266 * In our emulation, we ignore the write entirely.
2267 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2268 */
2269 if (pThis->Status.n.u1CmdBufRunning)
2270 {
2271 LogFunc(("Setting CmdBufHeadPtr (%#RX64) when command buffer is running -> Ignored\n", u64Value));
2272 return VINF_SUCCESS;
2273 }
2274
2275 /*
2276 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2277 * In our emulation, we ignore the write entirely.
2278 */
2279 uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK;
2280 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
2281 Assert(cbBuf <= _512K);
2282 if (offBuf >= cbBuf)
2283 {
2284 LogFunc(("Setting CmdBufHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX23) -> Ignored\n", offBuf, cbBuf));
2285 return VINF_SUCCESS;
2286 }
2287
2288 /* Update the register. */
2289 pThis->CmdBufHeadPtr.au32[0] = offBuf;
2290
2291 iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
2292
2293 Log4Func(("Set CmdBufHeadPtr to %#RX32\n", offBuf));
2294 return VINF_SUCCESS;
2295}
2296
2297
2298/**
2299 * Writes the Command Buffer Tail Pointer Register.
2300 */
2301static VBOXSTRICTRC iommuAmdCmdBufTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2302{
2303 RT_NOREF(pDevIns, offReg);
2304
2305 /*
2306 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2307 * In our emulation, we ignore the write entirely.
2308 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2309 */
2310 uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_TAIL_PTR_VALID_MASK;
2311 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
2312 Assert(cbBuf <= _512K);
2313 if (offBuf >= cbBuf)
2314 {
2315 LogFunc(("Setting CmdBufTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
2316 return VINF_SUCCESS;
2317 }
2318
2319 /*
2320 * IOMMU behavior is undefined if software advances the tail pointer equal to or beyond the
2321 * head pointer after adding one or more commands to the buffer.
2322 *
2323 * However, we cannot enforce this strictly because it's legal for software to shrink the
2324 * command queue (by reducing the offset) as well as wrap around the pointer (when head isn't
2325 * at 0). Software might even make the queue empty by making head and tail equal which is
2326 * allowed. I don't think we can or should try too hard to prevent software shooting itself
2327 * in the foot here. As long as we make sure the offset value is within the circular buffer
2328 * bounds (which we do by masking bits above) it should be sufficient.
2329 */
2330 pThis->CmdBufTailPtr.au32[0] = offBuf;
2331
2332 iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
2333
2334 Log4Func(("Set CmdBufTailPtr to %#RX32\n", offBuf));
2335 return VINF_SUCCESS;
2336}
2337
2338
2339/**
2340 * Writes the Event Log Head Pointer Register.
2341 */
2342static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2343{
2344 RT_NOREF(pDevIns, offReg);
2345
2346 /*
2347 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2348 * In our emulation, we ignore the write entirely.
2349 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2350 */
2351 uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_HEAD_PTR_VALID_MASK;
2352 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
2353 Assert(cbBuf <= _512K);
2354 if (offBuf >= cbBuf)
2355 {
2356 LogFunc(("Setting EvtLogHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
2357 return VINF_SUCCESS;
2358 }
2359
2360 /* Update the register. */
2361 pThis->EvtLogHeadPtr.au32[0] = offBuf;
2362
2363 LogFlowFunc(("Set EvtLogHeadPtr to %#RX32\n", offBuf));
2364 return VINF_SUCCESS;
2365}
2366
2367
2368/**
2369 * Writes the Event Log Tail Pointer Register.
2370 */
2371static VBOXSTRICTRC iommuAmdEvtLogTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2372{
2373 RT_NOREF(pDevIns, offReg);
2374 NOREF(pThis);
2375
2376 /*
2377 * IOMMU behavior is undefined when software writes this register when the event log is running.
2378 * In our emulation, we ignore the write entirely.
2379 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2380 */
2381 if (pThis->Status.n.u1EvtLogRunning)
2382 {
2383 LogFunc(("Setting EvtLogTailPtr (%#RX64) when event log is running -> Ignored\n", u64Value));
2384 return VINF_SUCCESS;
2385 }
2386
2387 /*
2388 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2389 * In our emulation, we ignore the write entirely.
2390 */
2391 uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK;
2392 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
2393 Assert(cbBuf <= _512K);
2394 if (offBuf >= cbBuf)
2395 {
2396 LogFunc(("Setting EvtLogTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
2397 return VINF_SUCCESS;
2398 }
2399
2400 /* Update the register. */
2401 pThis->EvtLogTailPtr.au32[0] = offBuf;
2402
2403 LogFlowFunc(("Set EvtLogTailPtr to %#RX32\n", offBuf));
2404 return VINF_SUCCESS;
2405}
2406
2407
2408/**
2409 * Writes the Status Register.
2410 */
2411static VBOXSTRICTRC iommuAmdStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2412{
2413 RT_NOREF(pDevIns, offReg);
2414
2415 /* Mask out all unrecognized bits. */
2416 u64Value &= IOMMU_STATUS_VALID_MASK;
2417
2418 /*
2419 * Compute RW1C (read-only, write-1-to-clear) bits and preserve the rest (which are read-only).
2420 * Writing 0 to an RW1C bit has no effect. Writing 1 to an RW1C bit, clears the bit if it's already 1.
2421 */
2422 IOMMU_STATUS_T const OldStatus = pThis->Status;
2423 uint64_t const fOldRw1cBits = (OldStatus.u64 & IOMMU_STATUS_RW1C_MASK);
2424 uint64_t const fOldRoBits = (OldStatus.u64 & ~IOMMU_STATUS_RW1C_MASK);
2425 uint64_t const fNewRw1cBits = (u64Value & IOMMU_STATUS_RW1C_MASK);
2426
2427 uint64_t const uNewStatus = (fOldRw1cBits & ~fNewRw1cBits) | fOldRoBits;
2428
2429 /* Update the register. */
2430 ASMAtomicWriteU64(&pThis->Status.u64, uNewStatus);
2431 return VINF_SUCCESS;
2432}
2433
2434
2435/**
2436 * Register access table 0.
2437 * The MMIO offset of each entry must be a multiple of 8!
2438 */
2439static const IOMMUREGACC g_aRegAccess0[] =
2440{
2441 /* MMIO off. Register name Read function Write function */
2442 { /* 0x00 */ "DEV_TAB_BAR", iommuAmdDevTabBar_r, iommuAmdDevTabBar_w },
2443 { /* 0x08 */ "CMD_BUF_BAR", iommuAmdCmdBufBar_r, iommuAmdCmdBufBar_w },
2444 { /* 0x10 */ "EVT_LOG_BAR", iommuAmdEvtLogBar_r, iommuAmdEvtLogBar_w },
2445 { /* 0x18 */ "CTRL", iommuAmdCtrl_r, iommuAmdCtrl_w },
2446 { /* 0x20 */ "EXCL_BAR", iommuAmdExclRangeBar_r, iommuAmdExclRangeBar_w },
2447 { /* 0x28 */ "EXCL_RANGE_LIMIT", iommuAmdExclRangeLimit_r, iommuAmdExclRangeLimit_w },
2448 { /* 0x30 */ "EXT_FEAT", iommuAmdExtFeat_r, NULL },
2449 { /* 0x38 */ "PPR_LOG_BAR", iommuAmdPprLogBar_r, NULL },
2450 { /* 0x40 */ "HW_EVT_HI", iommuAmdHwEvtHi_r, iommuAmdHwEvtHi_w },
2451 { /* 0x48 */ "HW_EVT_LO", iommuAmdHwEvtLo_r, iommuAmdHwEvtLo_w },
2452 { /* 0x50 */ "HW_EVT_STATUS", iommuAmdHwEvtStatus_r, iommuAmdHwEvtStatus_w },
2453 { /* 0x58 */ NULL, NULL, NULL },
2454
2455 { /* 0x60 */ "SMI_FLT_0", NULL, NULL },
2456 { /* 0x68 */ "SMI_FLT_1", NULL, NULL },
2457 { /* 0x70 */ "SMI_FLT_2", NULL, NULL },
2458 { /* 0x78 */ "SMI_FLT_3", NULL, NULL },
2459 { /* 0x80 */ "SMI_FLT_4", NULL, NULL },
2460 { /* 0x88 */ "SMI_FLT_5", NULL, NULL },
2461 { /* 0x90 */ "SMI_FLT_6", NULL, NULL },
2462 { /* 0x98 */ "SMI_FLT_7", NULL, NULL },
2463 { /* 0xa0 */ "SMI_FLT_8", NULL, NULL },
2464 { /* 0xa8 */ "SMI_FLT_9", NULL, NULL },
2465 { /* 0xb0 */ "SMI_FLT_10", NULL, NULL },
2466 { /* 0xb8 */ "SMI_FLT_11", NULL, NULL },
2467 { /* 0xc0 */ "SMI_FLT_12", NULL, NULL },
2468 { /* 0xc8 */ "SMI_FLT_13", NULL, NULL },
2469 { /* 0xd0 */ "SMI_FLT_14", NULL, NULL },
2470 { /* 0xd8 */ "SMI_FLT_15", NULL, NULL },
2471
2472 { /* 0xe0 */ "GALOG_BAR", iommuAmdGALogBar_r, NULL },
2473 { /* 0xe8 */ "GALOG_TAIL_ADDR", NULL, NULL },
2474 { /* 0xf0 */ "PPR_LOG_B_BAR", iommuAmdPprLogBBaseAddr_r, NULL },
2475 { /* 0xf8 */ "PPR_EVT_B_BAR", iommuAmdEvtLogBBaseAddr_r, NULL },
2476
2477 { /* 0x100 */ "DEV_TAB_SEG_1", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2478 { /* 0x108 */ "DEV_TAB_SEG_2", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2479 { /* 0x110 */ "DEV_TAB_SEG_3", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2480 { /* 0x118 */ "DEV_TAB_SEG_4", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2481 { /* 0x120 */ "DEV_TAB_SEG_5", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2482 { /* 0x128 */ "DEV_TAB_SEG_6", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2483 { /* 0x130 */ "DEV_TAB_SEG_7", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2484
2485 { /* 0x138 */ "DEV_SPECIFIC_FEAT", iommuAmdDevSpecificFeat_r, NULL },
2486 { /* 0x140 */ "DEV_SPECIFIC_CTRL", iommuAmdDevSpecificCtrl_r, NULL },
2487 { /* 0x148 */ "DEV_SPECIFIC_STATUS", iommuAmdDevSpecificStatus_r, NULL },
2488
2489 { /* 0x150 */ "MSI_VECTOR_0 or MSI_VECTOR_1", iommuAmdDevMsiVector_r, iommuAmdDevMsiVector_w },
2490 { /* 0x158 */ "MSI_CAP_HDR or MSI_ADDR_LO", iommuAmdMsiCapHdrAndAddrLo_r, iommuAmdMsiCapHdrAndAddrLo_w },
2491 { /* 0x160 */ "MSI_ADDR_HI or MSI_DATA", iommuAmdMsiAddrHiAndData_r, iommuAmdMsiAddrHiAndData_w },
2492 { /* 0x168 */ "MSI_MAPPING_CAP_HDR or PERF_OPT_CTRL", NULL, NULL },
2493
2494 { /* 0x170 */ "XT_GEN_INTR_CTRL", NULL, NULL },
2495 { /* 0x178 */ "XT_PPR_INTR_CTRL", NULL, NULL },
2496 { /* 0x180 */ "XT_GALOG_INT_CTRL", NULL, NULL },
2497};
2498AssertCompile(RT_ELEMENTS(g_aRegAccess0) == (IOMMU_MMIO_OFF_QWORD_TABLE_0_END - IOMMU_MMIO_OFF_QWORD_TABLE_0_START) / 8);
2499
2500/**
2501 * Register access table 1.
2502 * The MMIO offset of each entry must be a multiple of 8!
2503 */
2504static const IOMMUREGACC g_aRegAccess1[] =
2505{
2506 /* MMIO offset Register name Read function Write function */
2507 { /* 0x200 */ "MARC_APER_BAR_0", NULL, NULL },
2508 { /* 0x208 */ "MARC_APER_RELOC_0", NULL, NULL },
2509 { /* 0x210 */ "MARC_APER_LEN_0", NULL, NULL },
2510 { /* 0x218 */ "MARC_APER_BAR_1", NULL, NULL },
2511 { /* 0x220 */ "MARC_APER_RELOC_1", NULL, NULL },
2512 { /* 0x228 */ "MARC_APER_LEN_1", NULL, NULL },
2513 { /* 0x230 */ "MARC_APER_BAR_2", NULL, NULL },
2514 { /* 0x238 */ "MARC_APER_RELOC_2", NULL, NULL },
2515 { /* 0x240 */ "MARC_APER_LEN_2", NULL, NULL },
2516 { /* 0x248 */ "MARC_APER_BAR_3", NULL, NULL },
2517 { /* 0x250 */ "MARC_APER_RELOC_3", NULL, NULL },
2518 { /* 0x258 */ "MARC_APER_LEN_3", NULL, NULL }
2519};
2520AssertCompile(RT_ELEMENTS(g_aRegAccess1) == (IOMMU_MMIO_OFF_QWORD_TABLE_1_END - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) / 8);
2521
2522/**
2523 * Register access table 2.
2524 * The MMIO offset of each entry must be a multiple of 8!
2525 */
2526static const IOMMUREGACC g_aRegAccess2[] =
2527{
2528 /* MMIO offset Register name Read Function Write function */
2529 { /* 0x1ff8 */ "RSVD_REG", NULL, NULL },
2530
2531 { /* 0x2000 */ "CMD_BUF_HEAD_PTR", iommuAmdCmdBufHeadPtr_r, iommuAmdCmdBufHeadPtr_w },
2532 { /* 0x2008 */ "CMD_BUF_TAIL_PTR", iommuAmdCmdBufTailPtr_r , iommuAmdCmdBufTailPtr_w },
2533 { /* 0x2010 */ "EVT_LOG_HEAD_PTR", iommuAmdEvtLogHeadPtr_r, iommuAmdEvtLogHeadPtr_w },
2534 { /* 0x2018 */ "EVT_LOG_TAIL_PTR", iommuAmdEvtLogTailPtr_r, iommuAmdEvtLogTailPtr_w },
2535
2536 { /* 0x2020 */ "STATUS", iommuAmdStatus_r, iommuAmdStatus_w },
2537 { /* 0x2028 */ NULL, NULL, NULL },
2538
2539 { /* 0x2030 */ "PPR_LOG_HEAD_PTR", NULL, NULL },
2540 { /* 0x2038 */ "PPR_LOG_TAIL_PTR", NULL, NULL },
2541
2542 { /* 0x2040 */ "GALOG_HEAD_PTR", NULL, NULL },
2543 { /* 0x2048 */ "GALOG_TAIL_PTR", NULL, NULL },
2544
2545 { /* 0x2050 */ "PPR_LOG_B_HEAD_PTR", NULL, NULL },
2546 { /* 0x2058 */ "PPR_LOG_B_TAIL_PTR", NULL, NULL },
2547
2548 { /* 0x2060 */ NULL, NULL, NULL },
2549 { /* 0x2068 */ NULL, NULL, NULL },
2550
2551 { /* 0x2070 */ "EVT_LOG_B_HEAD_PTR", NULL, NULL },
2552 { /* 0x2078 */ "EVT_LOG_B_TAIL_PTR", NULL, NULL },
2553
2554 { /* 0x2080 */ "PPR_LOG_AUTO_RESP", NULL, NULL },
2555 { /* 0x2088 */ "PPR_LOG_OVERFLOW_EARLY", NULL, NULL },
2556 { /* 0x2090 */ "PPR_LOG_B_OVERFLOW_EARLY", NULL, NULL }
2557};
2558AssertCompile(RT_ELEMENTS(g_aRegAccess2) == (IOMMU_MMIO_OFF_QWORD_TABLE_2_END - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) / 8);
2559
2560
2561/**
2562 * Gets the register access structure given its MMIO offset.
2563 *
2564 * @returns The register access structure, or NULL if the offset is invalid.
2565 * @param off The MMIO offset of the register being accessed.
2566 */
2567static PCIOMMUREGACC iommuAmdGetRegAccess(uint32_t off)
2568{
2569 /* Figure out which table the register belongs to and validate its index. */
2570 PCIOMMUREGACC pReg;
2571 if (off < IOMMU_MMIO_OFF_QWORD_TABLE_0_END)
2572 {
2573 uint32_t const idxReg = off >> 3;
2574 Assert(idxReg < RT_ELEMENTS(g_aRegAccess0));
2575 pReg = &g_aRegAccess0[idxReg];
2576 }
2577 else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_1_END
2578 && off >= IOMMU_MMIO_OFF_QWORD_TABLE_1_START)
2579 {
2580 uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) >> 3;
2581 Assert(idxReg < RT_ELEMENTS(g_aRegAccess1));
2582 pReg = &g_aRegAccess1[idxReg];
2583 }
2584 else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_2_END
2585 && off >= IOMMU_MMIO_OFF_QWORD_TABLE_2_START)
2586 {
2587 uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) >> 3;
2588 Assert(idxReg < RT_ELEMENTS(g_aRegAccess2));
2589 pReg = &g_aRegAccess2[idxReg];
2590 }
2591 else
2592 return NULL;
2593 return pReg;
2594}
2595
2596
2597/**
2598 * Writes an IOMMU register (32-bit and 64-bit).
2599 *
2600 * @returns Strict VBox status code.
2601 * @param pDevIns The IOMMU device instance.
2602 * @param off MMIO byte offset to the register.
2603 * @param cb The size of the write access.
2604 * @param uValue The value being written.
2605 *
2606 * @thread EMT.
2607 */
2608static VBOXSTRICTRC iommuAmdRegisterWrite(PPDMDEVINS pDevIns, uint32_t off, uint8_t cb, uint64_t uValue)
2609{
2610 /*
2611 * Validate the access in case of IOM bug or incorrect assumption.
2612 */
2613 Assert(off < IOMMU_MMIO_REGION_SIZE);
2614 AssertMsgReturn(cb == 4 || cb == 8, ("Invalid access size %u\n", cb), VINF_SUCCESS);
2615 AssertMsgReturn(!(off & 3), ("Invalid offset %#x\n", off), VINF_SUCCESS);
2616
2617 Log4Func(("off=%#x cb=%u uValue=%#RX64\n", off, cb, uValue));
2618
2619 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2620 PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off);
2621 if (pReg)
2622 { /* likely */ }
2623 else
2624 {
2625 LogFunc(("Writing unknown register %#x with %#RX64 -> Ignored\n", off, uValue));
2626 return VINF_SUCCESS;
2627 }
2628
2629 /* If a write handler doesn't exist, it's either a reserved or read-only register. */
2630 if (pReg->pfnWrite)
2631 { /* likely */ }
2632 else
2633 {
2634 LogFunc(("Writing reserved or read-only register off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue));
2635 return VINF_SUCCESS;
2636 }
2637
2638 /*
2639 * If the write access is 64-bits and aligned on a 64-bit boundary, dispatch right away.
2640 * This handles writes to 64-bit registers as well as aligned, 64-bit writes to two
2641 * consecutive 32-bit registers.
2642 */
2643 if (cb == 8)
2644 {
2645 if (!(off & 7))
2646 return pReg->pfnWrite(pDevIns, pThis, off, uValue);
2647
2648 LogFunc(("Misaligned access while writing register at off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue));
2649 return VINF_SUCCESS;
2650 }
2651
2652 /* We shouldn't get sizes other than 32 bits here as we've specified so with IOM. */
2653 Assert(cb == 4);
2654 if (!(off & 7))
2655 {
2656 /*
2657 * Lower 32 bits of a 64-bit register or a 32-bit register is being written.
2658 * Merge with higher 32 bits (after reading the full 64-bits) and perform a 64-bit write.
2659 */
2660 uint64_t u64Read;
2661 if (pReg->pfnRead)
2662 {
2663 VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off, &u64Read);
2664 if (RT_FAILURE(rcStrict))
2665 {
2666 LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
2667 return rcStrict;
2668 }
2669 }
2670 else
2671 u64Read = 0;
2672
2673 uValue = (u64Read & UINT64_C(0xffffffff00000000)) | uValue;
2674 return pReg->pfnWrite(pDevIns, pThis, off, uValue);
2675 }
2676
2677 /*
2678 * Higher 32 bits of a 64-bit register or a 32-bit register at a 32-bit boundary is being written.
2679 * Merge with lower 32 bits (after reading the full 64-bits) and perform a 64-bit write.
2680 */
2681 Assert(!(off & 3));
2682 Assert(off & 7);
2683 Assert(off >= 4);
2684 uint64_t u64Read;
2685 if (pReg->pfnRead)
2686 {
2687 VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, &u64Read);
2688 if (RT_FAILURE(rcStrict))
2689 {
2690 LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
2691 return rcStrict;
2692 }
2693 }
2694 else
2695 u64Read = 0;
2696
2697 uValue = (uValue << 32) | (u64Read & UINT64_C(0xffffffff));
2698 return pReg->pfnWrite(pDevIns, pThis, off - 4, uValue);
2699}
2700
2701
2702/**
2703 * Reads an IOMMU register (64-bit) given its MMIO offset.
2704 *
2705 * All reads are 64-bit but reads to 32-bit registers that are aligned on an 8-byte
2706 * boundary include the lower half of the subsequent register.
2707 *
2708 * This is because most registers are 64-bit and aligned on 8-byte boundaries but
2709 * some are really 32-bit registers aligned on an 8-byte boundary. We cannot assume
2710 * software will only perform 32-bit reads on those 32-bit registers that are
2711 * aligned on 8-byte boundaries.
2712 *
2713 * @returns Strict VBox status code.
2714 * @param pDevIns The IOMMU device instance.
2715 * @param off The MMIO offset of the register in bytes.
2716 * @param puResult Where to store the value being read.
2717 *
2718 * @thread EMT.
2719 */
2720static VBOXSTRICTRC iommuAmdRegisterRead(PPDMDEVINS pDevIns, uint32_t off, uint64_t *puResult)
2721{
2722 Assert(off < IOMMU_MMIO_REGION_SIZE);
2723 Assert(!(off & 7) || !(off & 3));
2724
2725 Log4Func(("off=%#x\n", off));
2726
2727 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2728 PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
2729 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); NOREF(pPciDev);
2730
2731 PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off);
2732 if (pReg)
2733 { /* likely */ }
2734 else
2735 {
2736 LogFunc(("Reading unknown register %#x -> Ignored\n", off));
2737 return VINF_IOM_MMIO_UNUSED_FF;
2738 }
2739
2740 /* If a read handler doesn't exist, it's a reserved or unknown register. */
2741 if (pReg->pfnRead)
2742 { /* likely */ }
2743 else
2744 {
2745 LogFunc(("Reading reserved or unknown register off=%#x -> returning 0s\n", off));
2746 return VINF_IOM_MMIO_UNUSED_00;
2747 }
2748
2749 /*
2750 * If the read access is aligned on a 64-bit boundary, read the full 64-bits and return.
2751 * The caller takes care of truncating upper 32 bits for 32-bit reads.
2752 */
2753 if (!(off & 7))
2754 return pReg->pfnRead(pDevIns, pThis, off, puResult);
2755
2756 /*
2757 * High 32 bits of a 64-bit register or a 32-bit register at a non 64-bit boundary is being read.
2758 * Read full 64 bits at the previous 64-bit boundary but return only the high 32 bits.
2759 */
2760 Assert(!(off & 3));
2761 Assert(off & 7);
2762 Assert(off >= 4);
2763 VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, puResult);
2764 if (RT_SUCCESS(rcStrict))
2765 *puResult >>= 32;
2766 else
2767 {
2768 *puResult = 0;
2769 LogFunc(("Reading off %#x during split read failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
2770 }
2771
2772 return rcStrict;
2773}
2774
2775
2776/**
2777 * Raises the MSI interrupt for the IOMMU device.
2778 *
2779 * @param pDevIns The IOMMU device instance.
2780 *
2781 * @thread Any.
2782 * @remarks The IOMMU lock may or may not be held.
2783 */
2784static void iommuAmdMsiInterruptRaise(PPDMDEVINS pDevIns)
2785{
2786 LogFlowFunc(("\n"));
2787 if (iommuAmdIsMsiEnabled(pDevIns))
2788 {
2789 LogFunc(("Raising MSI\n"));
2790 PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_HIGH);
2791 }
2792}
2793
2794#if 0
2795/**
2796 * Clears the MSI interrupt for the IOMMU device.
2797 *
2798 * @param pDevIns The IOMMU device instance.
2799 *
2800 * @thread Any.
2801 * @remarks The IOMMU lock may or may not be held.
2802 */
2803static void iommuAmdMsiInterruptClear(PPDMDEVINS pDevIns)
2804{
2805 if (iommuAmdIsMsiEnabled(pDevIns))
2806 PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_LOW);
2807}
2808#endif
2809
2810/**
2811 * Writes an entry to the event log in memory.
2812 *
2813 * @returns VBox status code.
2814 * @param pDevIns The IOMMU device instance.
2815 * @param pEvent The event to log.
2816 *
2817 * @thread Any.
2818 * @remarks The IOMMU lock must be held while calling this function.
2819 */
2820static int iommuAmdEvtLogEntryWrite(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent)
2821{
2822 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2823 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
2824
2825 IOMMU_LOCK_NORET(pDevIns, pThisCC);
2826
2827 /* Check if event logging is active and the log has not overflowed. */
2828 IOMMU_STATUS_T const Status = pThis->Status;
2829 if ( Status.n.u1EvtLogRunning
2830 && !Status.n.u1EvtOverflow)
2831 {
2832 uint32_t const cbEvt = sizeof(*pEvent);
2833
2834 /* Get the offset we need to write the event to in memory (circular buffer offset). */
2835 uint32_t const offEvt = pThis->EvtLogTailPtr.n.off;
2836 Assert(!(offEvt & ~IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK));
2837
2838 /* Ensure we have space in the event log. */
2839 uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len);
2840 uint32_t const cEvts = iommuAmdGetEvtLogEntryCount(pThis);
2841 if (cEvts + 1 < cMaxEvts)
2842 {
2843 /* Write the event log entry to memory. */
2844 RTGCPHYS const GCPhysEvtLog = pThis->EvtLogBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT;
2845 RTGCPHYS const GCPhysEvtLogEntry = GCPhysEvtLog + offEvt;
2846 int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysEvtLogEntry, pEvent, cbEvt);
2847 if (RT_FAILURE(rc))
2848 LogFunc(("Failed to write event log entry at %#RGp. rc=%Rrc\n", GCPhysEvtLogEntry, rc));
2849
2850 /* Increment the event log tail pointer. */
2851 uint32_t const cbEvtLog = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
2852 pThis->EvtLogTailPtr.n.off = (offEvt + cbEvt) % cbEvtLog;
2853
2854 /* Indicate that an event log entry was written. */
2855 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_INTR);
2856
2857 /* Check and signal an interrupt if software wants to receive one when an event log entry is written. */
2858 if (pThis->Ctrl.n.u1EvtIntrEn)
2859 iommuAmdMsiInterruptRaise(pDevIns);
2860 }
2861 else
2862 {
2863 /* Indicate that the event log has overflowed. */
2864 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_OVERFLOW);
2865
2866 /* Check and signal an interrupt if software wants to receive one when the event log has overflowed. */
2867 if (pThis->Ctrl.n.u1EvtIntrEn)
2868 iommuAmdMsiInterruptRaise(pDevIns);
2869 }
2870 }
2871
2872 IOMMU_UNLOCK(pDevIns, pThisCC);
2873
2874 return VINF_SUCCESS;
2875}
2876
2877
2878/**
2879 * Sets an event in the hardware error registers.
2880 *
2881 * @param pDevIns The IOMMU device instance.
2882 * @param pEvent The event.
2883 *
2884 * @thread Any.
2885 */
2886static void iommuAmdHwErrorSet(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent)
2887{
2888 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2889 if (pThis->ExtFeat.n.u1HwErrorSup)
2890 {
2891 if (pThis->HwEvtStatus.n.u1Valid)
2892 pThis->HwEvtStatus.n.u1Overflow = 1;
2893 pThis->HwEvtStatus.n.u1Valid = 1;
2894 pThis->HwEvtHi.u64 = RT_MAKE_U64(pEvent->au32[0], pEvent->au32[1]);
2895 pThis->HwEvtLo = RT_MAKE_U64(pEvent->au32[2], pEvent->au32[3]);
2896 Assert( pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_DEV_TAB_HW_ERROR
2897 || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_PAGE_TAB_HW_ERROR
2898 || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR);
2899 }
2900}
2901
2902
2903/**
2904 * Initializes a PAGE_TAB_HARDWARE_ERROR event.
2905 *
2906 * @param uDevId The device ID (bus, device, function).
2907 * @param uDomainId The domain ID.
2908 * @param GCPhysPtEntity The system physical address of the page table
2909 * entity.
2910 * @param enmOp The IOMMU operation being performed.
2911 * @param pEvtPageTabHwErr Where to store the initialized event.
2912 */
2913static void iommuAmdPageTabHwErrorEventInit(uint16_t uDevId, uint16_t uDomainId, RTGCPHYS GCPhysPtEntity, IOMMUOP enmOp,
2914 PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr)
2915{
2916 memset(pEvtPageTabHwErr, 0, sizeof(*pEvtPageTabHwErr));
2917 pEvtPageTabHwErr->n.u16DevId = uDevId;
2918 pEvtPageTabHwErr->n.u16DomainOrPasidLo = uDomainId;
2919 pEvtPageTabHwErr->n.u1GuestOrNested = 0;
2920 pEvtPageTabHwErr->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
2921 pEvtPageTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
2922 pEvtPageTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
2923 pEvtPageTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT;
2924 pEvtPageTabHwErr->n.u4EvtCode = IOMMU_EVT_PAGE_TAB_HW_ERROR;
2925 pEvtPageTabHwErr->n.u64Addr = GCPhysPtEntity;
2926}
2927
2928
2929/**
2930 * Raises a PAGE_TAB_HARDWARE_ERROR event.
2931 *
2932 * @param pDevIns The IOMMU device instance.
2933 * @param enmOp The IOMMU operation being performed.
2934 * @param pEvtPageTabHwErr The page table hardware error event.
2935 *
2936 * @thread Any.
2937 */
2938static void iommuAmdPageTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr)
2939{
2940 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_PAGE_TAB_HW_ERR_T));
2941 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtPageTabHwErr;
2942
2943 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
2944 IOMMU_LOCK_NORET(pDevIns, pThisCC);
2945
2946 iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
2947 iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
2948 if (enmOp != IOMMUOP_CMD)
2949 iommuAmdSetPciTargetAbort(pDevIns);
2950
2951 IOMMU_UNLOCK(pDevIns, pThisCC);
2952
2953 LogFunc(("Raised PAGE_TAB_HARDWARE_ERROR. uDevId=%#x uDomainId=%#x GCPhysPtEntity=%#RGp enmOp=%u u2Type=%u\n",
2954 pEvtPageTabHwErr->n.u16DevId, pEvtPageTabHwErr->n.u16DomainOrPasidLo, pEvtPageTabHwErr->n.u64Addr, enmOp,
2955 pEvtPageTabHwErr->n.u2Type));
2956}
2957
2958
2959#ifdef IN_RING3
2960/**
2961 * Initializes a COMMAND_HARDWARE_ERROR event.
2962 *
2963 * @param GCPhysAddr The system physical address the IOMMU attempted to access.
2964 * @param pEvtCmdHwErr Where to store the initialized event.
2965 */
2966static void iommuAmdCmdHwErrorEventInit(RTGCPHYS GCPhysAddr, PEVT_CMD_HW_ERR_T pEvtCmdHwErr)
2967{
2968 memset(pEvtCmdHwErr, 0, sizeof(*pEvtCmdHwErr));
2969 pEvtCmdHwErr->n.u2Type = HWEVTTYPE_DATA_ERROR;
2970 pEvtCmdHwErr->n.u4EvtCode = IOMMU_EVT_COMMAND_HW_ERROR;
2971 pEvtCmdHwErr->n.u64Addr = GCPhysAddr;
2972}
2973
2974
2975/**
2976 * Raises a COMMAND_HARDWARE_ERROR event.
2977 *
2978 * @param pDevIns The IOMMU device instance.
2979 * @param pEvtCmdHwErr The command hardware error event.
2980 *
2981 * @thread Any.
2982 */
2983static void iommuAmdCmdHwErrorEventRaise(PPDMDEVINS pDevIns, PCEVT_CMD_HW_ERR_T pEvtCmdHwErr)
2984{
2985 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_CMD_HW_ERR_T));
2986 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtCmdHwErr;
2987 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2988
2989 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
2990 IOMMU_LOCK_NORET(pDevIns, pThisCC);
2991
2992 iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
2993 iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
2994 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
2995
2996 IOMMU_UNLOCK(pDevIns, pThisCC);
2997
2998 LogFunc(("Raised COMMAND_HARDWARE_ERROR. GCPhysCmd=%#RGp u2Type=%u\n", pEvtCmdHwErr->n.u64Addr, pEvtCmdHwErr->n.u2Type));
2999}
3000#endif /* IN_RING3 */
3001
3002
3003/**
3004 * Initializes a DEV_TAB_HARDWARE_ERROR event.
3005 *
3006 * @param uDevId The device ID (bus, device, function).
3007 * @param GCPhysDte The system physical address of the failed device table
3008 * access.
3009 * @param enmOp The IOMMU operation being performed.
3010 * @param pEvtDevTabHwErr Where to store the initialized event.
3011 */
3012static void iommuAmdDevTabHwErrorEventInit(uint16_t uDevId, RTGCPHYS GCPhysDte, IOMMUOP enmOp,
3013 PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr)
3014{
3015 memset(pEvtDevTabHwErr, 0, sizeof(*pEvtDevTabHwErr));
3016 pEvtDevTabHwErr->n.u16DevId = uDevId;
3017 pEvtDevTabHwErr->n.u1Intr = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
3018 /** @todo IOMMU: Any other transaction type that can set read/write bit? */
3019 pEvtDevTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
3020 pEvtDevTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
3021 pEvtDevTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT;
3022 pEvtDevTabHwErr->n.u4EvtCode = IOMMU_EVT_DEV_TAB_HW_ERROR;
3023 pEvtDevTabHwErr->n.u64Addr = GCPhysDte;
3024}
3025
3026
3027/**
3028 * Raises a DEV_TAB_HARDWARE_ERROR event.
3029 *
3030 * @param pDevIns The IOMMU device instance.
3031 * @param enmOp The IOMMU operation being performed.
3032 * @param pEvtDevTabHwErr The device table hardware error event.
3033 *
3034 * @thread Any.
3035 */
3036static void iommuAmdDevTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr)
3037{
3038 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_DEV_TAB_HW_ERROR_T));
3039 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtDevTabHwErr;
3040
3041 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
3042 IOMMU_LOCK_NORET(pDevIns, pThisCC);
3043
3044 iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
3045 iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
3046 if (enmOp != IOMMUOP_CMD)
3047 iommuAmdSetPciTargetAbort(pDevIns);
3048
3049 IOMMU_UNLOCK(pDevIns, pThisCC);
3050
3051 LogFunc(("Raised DEV_TAB_HARDWARE_ERROR. uDevId=%#x GCPhysDte=%#RGp enmOp=%u u2Type=%u\n", pEvtDevTabHwErr->n.u16DevId,
3052 pEvtDevTabHwErr->n.u64Addr, enmOp, pEvtDevTabHwErr->n.u2Type));
3053}
3054
3055
3056#ifdef IN_RING3
3057/**
3058 * Initializes an ILLEGAL_COMMAND_ERROR event.
3059 *
3060 * @param GCPhysCmd The system physical address of the failed command
3061 * access.
3062 * @param pEvtIllegalCmd Where to store the initialized event.
3063 */
3064static void iommuAmdIllegalCmdEventInit(RTGCPHYS GCPhysCmd, PEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd)
3065{
3066 Assert(!(GCPhysCmd & UINT64_C(0xf)));
3067 memset(pEvtIllegalCmd, 0, sizeof(*pEvtIllegalCmd));
3068 pEvtIllegalCmd->n.u4EvtCode = IOMMU_EVT_ILLEGAL_CMD_ERROR;
3069 pEvtIllegalCmd->n.u64Addr = GCPhysCmd;
3070}
3071
3072
3073/**
3074 * Raises an ILLEGAL_COMMAND_ERROR event.
3075 *
3076 * @param pDevIns The IOMMU device instance.
3077 * @param pEvtIllegalCmd The illegal command error event.
3078 */
3079static void iommuAmdIllegalCmdEventRaise(PPDMDEVINS pDevIns, PCEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd)
3080{
3081 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T));
3082 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalCmd;
3083 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3084
3085 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3086 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
3087
3088 LogFunc(("Raised ILLEGAL_COMMAND_ERROR. Addr=%#RGp\n", pEvtIllegalCmd->n.u64Addr));
3089}
3090#endif /* IN_RING3 */
3091
3092
3093/**
3094 * Initializes an ILLEGAL_DEV_TABLE_ENTRY event.
3095 *
3096 * @param uDevId The device ID (bus, device, function).
3097 * @param uIova The I/O virtual address.
3098 * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if the
3099 * event was caused by an invalid level encoding in the
3100 * DTE.
3101 * @param enmOp The IOMMU operation being performed.
3102 * @param pEvtIllegalDte Where to store the initialized event.
3103 */
3104static void iommuAmdIllegalDteEventInit(uint16_t uDevId, uint64_t uIova, bool fRsvdNotZero, IOMMUOP enmOp,
3105 PEVT_ILLEGAL_DTE_T pEvtIllegalDte)
3106{
3107 memset(pEvtIllegalDte, 0, sizeof(*pEvtIllegalDte));
3108 pEvtIllegalDte->n.u16DevId = uDevId;
3109 pEvtIllegalDte->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
3110 pEvtIllegalDte->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
3111 pEvtIllegalDte->n.u1RsvdNotZero = fRsvdNotZero;
3112 pEvtIllegalDte->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
3113 pEvtIllegalDte->n.u4EvtCode = IOMMU_EVT_ILLEGAL_DEV_TAB_ENTRY;
3114 pEvtIllegalDte->n.u64Addr = uIova & ~UINT64_C(0x3);
3115 /** @todo r=ramshankar: Not sure why the last 2 bits are marked as reserved by the
3116 * IOMMU spec here but not for this field for I/O page fault event. */
3117 Assert(!(uIova & UINT64_C(0x3)));
3118}
3119
3120
3121/**
3122 * Raises an ILLEGAL_DEV_TABLE_ENTRY event.
3123 *
3124 * @param pDevIns The IOMMU instance data.
3125 * @param enmOp The IOMMU operation being performed.
3126 * @param pEvtIllegalDte The illegal device table entry event.
3127 * @param enmEvtType The illegal device table entry event type.
3128 *
3129 * @thread Any.
3130 */
3131static void iommuAmdIllegalDteEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PCEVT_ILLEGAL_DTE_T pEvtIllegalDte,
3132 EVT_ILLEGAL_DTE_TYPE_T enmEvtType)
3133{
3134 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T));
3135 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalDte;
3136
3137 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3138 if (enmOp != IOMMUOP_CMD)
3139 iommuAmdSetPciTargetAbort(pDevIns);
3140
3141 LogFunc(("Raised ILLEGAL_DTE_EVENT. uDevId=%#x uIova=%#RX64 enmOp=%u enmEvtType=%u\n", pEvtIllegalDte->n.u16DevId,
3142 pEvtIllegalDte->n.u64Addr, enmOp, enmEvtType));
3143 NOREF(enmEvtType);
3144}
3145
3146
3147/**
3148 * Initializes an IO_PAGE_FAULT event.
3149 *
3150 * @param uDevId The device ID (bus, device, function).
3151 * @param uDomainId The domain ID.
3152 * @param uIova The I/O virtual address being accessed.
3153 * @param fPresent Transaction to a page marked as present (including
3154 * DTE.V=1) or interrupt marked as remapped
3155 * (IRTE.RemapEn=1).
3156 * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if
3157 * the I/O page fault was caused by invalid level
3158 * encoding.
3159 * @param fPermDenied Permission denied for the address being accessed.
3160 * @param enmOp The IOMMU operation being performed.
3161 * @param pEvtIoPageFault Where to store the initialized event.
3162 */
3163static void iommuAmdIoPageFaultEventInit(uint16_t uDevId, uint16_t uDomainId, uint64_t uIova, bool fPresent, bool fRsvdNotZero,
3164 bool fPermDenied, IOMMUOP enmOp, PEVT_IO_PAGE_FAULT_T pEvtIoPageFault)
3165{
3166 Assert(!fPermDenied || fPresent);
3167 memset(pEvtIoPageFault, 0, sizeof(*pEvtIoPageFault));
3168 pEvtIoPageFault->n.u16DevId = uDevId;
3169 //pEvtIoPageFault->n.u4PasidHi = 0;
3170 pEvtIoPageFault->n.u16DomainOrPasidLo = uDomainId;
3171 //pEvtIoPageFault->n.u1GuestOrNested = 0;
3172 //pEvtIoPageFault->n.u1NoExecute = 0;
3173 //pEvtIoPageFault->n.u1User = 0;
3174 pEvtIoPageFault->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
3175 pEvtIoPageFault->n.u1Present = fPresent;
3176 pEvtIoPageFault->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
3177 pEvtIoPageFault->n.u1PermDenied = fPermDenied;
3178 pEvtIoPageFault->n.u1RsvdNotZero = fRsvdNotZero;
3179 pEvtIoPageFault->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
3180 pEvtIoPageFault->n.u4EvtCode = IOMMU_EVT_IO_PAGE_FAULT;
3181 pEvtIoPageFault->n.u64Addr = uIova;
3182}
3183
3184
3185/**
3186 * Raises an IO_PAGE_FAULT event.
3187 *
3188 * @param pDevIns The IOMMU instance data.
3189 * @param fIoDevFlags The I/O device flags, see IOMMU_DTECACHE_F_XXX.
3190 * @param pIrte The interrupt remapping table entry, can be NULL.
3191 * @param enmOp The IOMMU operation being performed.
3192 * @param pEvtIoPageFault The I/O page fault event.
3193 * @param enmEvtType The I/O page fault event type.
3194 *
3195 * @thread Any.
3196 */
3197static void iommuAmdIoPageFaultEventRaise(PPDMDEVINS pDevIns, uint16_t fIoDevFlags, PCIRTE_T pIrte, IOMMUOP enmOp,
3198 PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType)
3199{
3200 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_IO_PAGE_FAULT_T));
3201 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIoPageFault;
3202
3203#ifdef IOMMU_WITH_IOTLBE_CACHE
3204# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) iommuAmdDteCacheSetFlags((a_pDevIns), (a_DevId), \
3205 IOMMU_DTECACHE_F_IO_PAGE_FAULT_RAISED)
3206#else
3207# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) do { } while (0)
3208#endif
3209
3210 bool fSuppressEvtLogging = false;
3211 if ( enmOp == IOMMUOP_MEM_READ
3212 || enmOp == IOMMUOP_MEM_WRITE)
3213 {
3214 uint16_t const fSuppressIopf = IOMMU_DTECACHE_F_VALID
3215 | IOMMU_DTECACHE_F_SUPPRESS_IOPF
3216 | IOMMU_DTECACHE_F_IO_PAGE_FAULT_RAISED;
3217 uint16_t const fSuppressAllIopf = IOMMU_DTECACHE_F_VALID
3218 | IOMMU_DTECACHE_F_SUPPRESS_ALL_IOPF;
3219 if ( (fIoDevFlags & fSuppressAllIopf) == fSuppressAllIopf
3220 || (fIoDevFlags & fSuppressIopf) == fSuppressIopf)
3221 {
3222 fSuppressEvtLogging = true;
3223 }
3224 }
3225 else if (enmOp == IOMMUOP_INTR_REQ)
3226 {
3227 uint16_t const fSuppressIopf = IOMMU_DTECACHE_F_VALID
3228 | IOMMU_DTECACHE_F_INTR_MAP_VALID
3229 | IOMMU_DTECACHE_F_IGNORE_UNMAPPED_INTR;
3230 if ((fIoDevFlags & fSuppressIopf) == fSuppressIopf)
3231 fSuppressEvtLogging = true;
3232 else if (pIrte)
3233 fSuppressEvtLogging = pIrte->n.u1SuppressIoPf;
3234 }
3235 /* else: Events are never suppressed for commands. */
3236
3237 switch (enmEvtType)
3238 {
3239 case kIoPageFaultType_PermDenied:
3240 {
3241 /* Cannot be triggered by a command. */
3242 Assert(enmOp != IOMMUOP_CMD);
3243 RT_FALL_THRU();
3244 }
3245 case kIoPageFaultType_DteRsvdPagingMode:
3246 case kIoPageFaultType_PteInvalidPageSize:
3247 case kIoPageFaultType_PteInvalidLvlEncoding:
3248 case kIoPageFaultType_SkippedLevelIovaNotZero:
3249 case kIoPageFaultType_PteRsvdNotZero:
3250 case kIoPageFaultType_PteValidNotSet:
3251 case kIoPageFaultType_DteTranslationDisabled:
3252 case kIoPageFaultType_PasidInvalidRange:
3253 {
3254 /*
3255 * For a translation request, the IOMMU doesn't signal an I/O page fault nor does it
3256 * create an event log entry. See AMD IOMMU spec. 2.1.3.2 "I/O Page Faults".
3257 */
3258 if (enmOp != IOMMUOP_TRANSLATE_REQ)
3259 {
3260 if (!fSuppressEvtLogging)
3261 {
3262 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3263 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
3264 }
3265 if (enmOp != IOMMUOP_CMD)
3266 iommuAmdSetPciTargetAbort(pDevIns);
3267 }
3268 break;
3269 }
3270
3271 case kIoPageFaultType_UserSupervisor:
3272 {
3273 /* Access is blocked and only creates an event log entry. */
3274 if (!fSuppressEvtLogging)
3275 {
3276 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3277 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
3278 }
3279 break;
3280 }
3281
3282 case kIoPageFaultType_IrteAddrInvalid:
3283 case kIoPageFaultType_IrteRsvdNotZero:
3284 case kIoPageFaultType_IrteRemapEn:
3285 case kIoPageFaultType_IrteRsvdIntType:
3286 case kIoPageFaultType_IntrReqAborted:
3287 case kIoPageFaultType_IntrWithPasid:
3288 {
3289 /* Only trigerred by interrupt requests. */
3290 Assert(enmOp == IOMMUOP_INTR_REQ);
3291 if (!fSuppressEvtLogging)
3292 {
3293 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3294 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
3295 }
3296 iommuAmdSetPciTargetAbort(pDevIns);
3297 break;
3298 }
3299
3300 case kIoPageFaultType_SmiFilterMismatch:
3301 {
3302 /* Not supported and probably will never be, assert. */
3303 AssertMsgFailed(("kIoPageFaultType_SmiFilterMismatch - Upstream SMI requests not supported/implemented."));
3304 break;
3305 }
3306
3307 case kIoPageFaultType_DevId_Invalid:
3308 {
3309 /* Cannot be triggered by a command. */
3310 Assert(enmOp != IOMMUOP_CMD);
3311 Assert(enmOp != IOMMUOP_TRANSLATE_REQ); /** @todo IOMMU: We don't support translation requests yet. */
3312 if (!fSuppressEvtLogging)
3313 {
3314 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3315 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
3316 }
3317 if ( enmOp == IOMMUOP_MEM_READ
3318 || enmOp == IOMMUOP_MEM_WRITE)
3319 iommuAmdSetPciTargetAbort(pDevIns);
3320 break;
3321 }
3322 }
3323
3324#undef IOMMU_DTE_CACHE_SET_PF_RAISED
3325}
3326
3327
3328/**
3329 * Raises an IO_PAGE_FAULT event given the DTE.
3330 *
3331 * @param pDevIns The IOMMU instance data.
3332 * @param pDte The device table entry.
3333 * @param pIrte The interrupt remapping table entry, can be NULL.
3334 * @param enmOp The IOMMU operation being performed.
3335 * @param pEvtIoPageFault The I/O page fault event.
3336 * @param enmEvtType The I/O page fault event type.
3337 *
3338 * @thread Any.
3339 */
3340static void iommuAmdIoPageFaultEventRaiseWithDte(PPDMDEVINS pDevIns, PCDTE_T pDte, PCIRTE_T pIrte, IOMMUOP enmOp,
3341 PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType)
3342{
3343 Assert(pDte);
3344 uint16_t const fIoDevFlags = iommuAmdGetBasicDevFlags(pDte);
3345 return iommuAmdIoPageFaultEventRaise(pDevIns, fIoDevFlags, pIrte, enmOp, pEvtIoPageFault, enmEvtType);
3346}
3347
3348
3349/**
3350 * Reads a device table entry for the given the device ID.
3351 *
3352 * @returns VBox status code.
3353 * @param pDevIns The IOMMU device instance.
3354 * @param uDevId The device ID (bus, device, function).
3355 * @param enmOp The IOMMU operation being performed.
3356 * @param pDte Where to store the device table entry.
3357 *
3358 * @thread Any.
3359 */
3360static int iommuAmdDteRead(PPDMDEVINS pDevIns, uint16_t uDevId, IOMMUOP enmOp, PDTE_T pDte)
3361{
3362 PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3363 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
3364
3365 IOMMU_LOCK(pDevIns, pThisCC);
3366
3367 /* Figure out which device table segment is being accessed. */
3368 uint8_t const idxSegsEn = pThis->Ctrl.n.u3DevTabSegEn;
3369 Assert(idxSegsEn < RT_ELEMENTS(g_auDevTabSegShifts));
3370
3371 uint8_t const idxSeg = (uDevId & g_auDevTabSegMasks[idxSegsEn]) >> g_auDevTabSegShifts[idxSegsEn];
3372 Assert(idxSeg < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
3373 AssertCompile(RT_ELEMENTS(g_auDevTabSegShifts) == RT_ELEMENTS(g_auDevTabSegMasks));
3374
3375 RTGCPHYS const GCPhysDevTab = pThis->aDevTabBaseAddrs[idxSeg].n.u40Base << X86_PAGE_4K_SHIFT;
3376 uint32_t const offDte = (uDevId & ~g_auDevTabSegMasks[idxSegsEn]) * sizeof(DTE_T);
3377 RTGCPHYS const GCPhysDte = GCPhysDevTab + offDte;
3378
3379 /* Ensure the DTE falls completely within the device table segment. */
3380 uint32_t const cbDevTabSeg = (pThis->aDevTabBaseAddrs[idxSeg].n.u9Size + 1) << X86_PAGE_4K_SHIFT;
3381
3382 IOMMU_UNLOCK(pDevIns, pThisCC);
3383
3384 if (offDte + sizeof(DTE_T) <= cbDevTabSeg)
3385 {
3386 /* Read the device table entry from guest memory. */
3387 Assert(!(GCPhysDevTab & X86_PAGE_4K_OFFSET_MASK));
3388 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDte, pDte, sizeof(*pDte));
3389 if (RT_SUCCESS(rc))
3390 return rc;
3391
3392 /* Raise a device table hardware error. */
3393 LogFunc(("Failed to read device table entry at %#RGp. rc=%Rrc -> DevTabHwError\n", GCPhysDte, rc));
3394
3395 EVT_DEV_TAB_HW_ERROR_T EvtDevTabHwErr;
3396 iommuAmdDevTabHwErrorEventInit(uDevId, GCPhysDte, enmOp, &EvtDevTabHwErr);
3397 iommuAmdDevTabHwErrorEventRaise(pDevIns, enmOp, &EvtDevTabHwErr);
3398 return VERR_IOMMU_DTE_READ_FAILED;
3399 }
3400
3401 /* Raise an I/O page fault for out-of-bounds acccess. */
3402 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3403 iommuAmdIoPageFaultEventInit(uDevId, 0 /* uDomainId */, 0 /* uIova */, false /* fPresent */, false /* fRsvdNotZero */,
3404 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3405 iommuAmdIoPageFaultEventRaise(pDevIns, 0 /* fIoDevFlags */, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3406 kIoPageFaultType_DevId_Invalid);
3407 return VERR_IOMMU_DTE_BAD_OFFSET;
3408}
3409
3410
3411/**
3412 * Performs pre-translation checks for the given device table entry.
3413 *
3414 * @returns VBox status code.
3415 * @retval VINF_SUCCESS if the DTE is valid and supports address translation.
3416 * @retval VINF_IOMMU_ADDR_TRANSLATION_DISABLED if the DTE is valid but address
3417 * translation is disabled.
3418 * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED if an error occurred and any
3419 * corresponding event was raised.
3420 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the DTE denies the requested
3421 * permissions.
3422 *
3423 * @param pDevIns The IOMMU device instance.
3424 * @param uIova The I/O virtual address to translate.
3425 * @param uDevId The device ID (bus, device, function).
3426 * @param fPerm The I/O permissions for this access, see
3427 * IOMMU_IO_PERM_XXX.
3428 * @param pDte The device table entry.
3429 * @param enmOp The IOMMU operation being performed.
3430 *
3431 * @thread Any.
3432 */
3433static int iommuAmdPreTranslateChecks(PPDMDEVINS pDevIns, uint16_t uDevId, uint64_t uIova, uint8_t fPerm, PCDTE_T pDte,
3434 IOMMUOP enmOp)
3435{
3436 /*
3437 * Check if the translation is valid, otherwise raise an I/O page fault.
3438 */
3439 if (pDte->n.u1TranslationValid)
3440 { /* likely */ }
3441 else
3442 {
3443 /** @todo r=ramshankar: The AMD IOMMU spec. says page walk is terminated but
3444 * doesn't explicitly say whether an I/O page fault is raised. From other
3445 * places in the spec. it seems early page walk terminations (starting with
3446 * the DTE) return the state computed so far and raises an I/O page fault. So
3447 * returning an invalid translation rather than skipping translation. */
3448 LogFunc(("Translation valid bit not set -> IOPF\n"));
3449 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3450 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */,
3451 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3452 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3453 kIoPageFaultType_DteTranslationDisabled);
3454 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3455 }
3456
3457 /*
3458 * Check permissions bits in the DTE.
3459 * Note: This MUST be checked prior to checking the root page table level below!
3460 */
3461 uint8_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
3462 if ((fPerm & fDtePerm) == fPerm)
3463 { /* likely */ }
3464 else
3465 {
3466 LogFunc(("Permission denied by DTE (fPerm=%#x fDtePerm=%#x) -> IOPF\n", fPerm, fDtePerm));
3467 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3468 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3469 true /* fPermDenied */, enmOp, &EvtIoPageFault);
3470 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3471 kIoPageFaultType_PermDenied);
3472 return VERR_IOMMU_ADDR_ACCESS_DENIED;
3473 }
3474
3475 /*
3476 * If the root page table level is 0, translation is disabled and GPA=SPA and
3477 * the DTE.IR and DTE.IW bits control permissions (verified above).
3478 */
3479 uint8_t const uMaxLevel = pDte->n.u3Mode;
3480 if (uMaxLevel != 0)
3481 { /* likely */ }
3482 else
3483 {
3484 Assert((fPerm & fDtePerm) == fPerm); /* Verify we've checked permissions. */
3485 return VINF_IOMMU_ADDR_TRANSLATION_DISABLED;
3486 }
3487
3488 /*
3489 * If the root page table level exceeds the allowed host-address translation level,
3490 * page walk is terminated and translation fails.
3491 */
3492 if (uMaxLevel <= IOMMU_MAX_HOST_PT_LEVEL)
3493 { /* likely */ }
3494 else
3495 {
3496 /** @todo r=ramshankar: I cannot make out from the AMD IOMMU spec. if I should be
3497 * raising an ILLEGAL_DEV_TABLE_ENTRY event or an IO_PAGE_FAULT event here.
3498 * I'm just going with I/O page fault. */
3499 LogFunc(("Invalid root page table level %#x (uDevId=%#x) -> IOPF\n", uMaxLevel, uDevId));
3500 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3501 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3502 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3503 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3504 kIoPageFaultType_PteInvalidLvlEncoding);
3505 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3506 }
3507
3508 /* The DTE allows translations for this device. */
3509 return VINF_SUCCESS;
3510}
3511
3512
3513/**
3514 * Walks the I/O page table to translate the I/O virtual address to a system
3515 * physical address.
3516 *
3517 * @returns VBox status code.
3518 * @param pDevIns The IOMMU device instance.
3519 * @param uIova The I/O virtual address to translate. Must be 4K aligned.
3520 * @param fPerm The I/O permissions for this access, see
3521 * IOMMU_IO_PERM_XXX.
3522 * @param uDevId The device ID (bus, device, function).
3523 * @param pDte The device table entry.
3524 * @param enmOp The IOMMU operation being performed.
3525 * @param pPageLookup Where to store the results of the I/O page lookup. This
3526 * is only updated when VINF_SUCCESS is returned.
3527 *
3528 * @thread Any.
3529 */
3530static int iommuAmdIoPageTableWalk(PPDMDEVINS pDevIns, uint64_t uIova, uint8_t fPerm, uint16_t uDevId, PCDTE_T pDte,
3531 IOMMUOP enmOp, PIOPAGELOOKUP pPageLookup)
3532{
3533 Assert(pDte->n.u1Valid);
3534 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
3535
3536 /* The virtual address bits indexing table. */
3537 static uint8_t const s_acIovaLevelShifts[] = { 0, 12, 21, 30, 39, 48, 57, 0 };
3538 static uint64_t const s_auIovaLevelMasks[] = { UINT64_C(0x0000000000000000),
3539 UINT64_C(0x00000000001ff000),
3540 UINT64_C(0x000000003fe00000),
3541 UINT64_C(0x0000007fc0000000),
3542 UINT64_C(0x0000ff8000000000),
3543 UINT64_C(0x01ff000000000000),
3544 UINT64_C(0xfe00000000000000),
3545 UINT64_C(0x0000000000000000) };
3546 AssertCompile(RT_ELEMENTS(s_acIovaLevelShifts) == RT_ELEMENTS(s_auIovaLevelMasks));
3547 AssertCompile(RT_ELEMENTS(s_acIovaLevelShifts) > IOMMU_MAX_HOST_PT_LEVEL);
3548
3549 /* Traverse the I/O page table starting with the page directory in the DTE. */
3550 IOPTENTITY_T PtEntity;
3551 PtEntity.u64 = pDte->au64[0];
3552 for (;;)
3553 {
3554 /* Figure out the system physical address of the page table at the current level. */
3555 uint8_t const uLevel = PtEntity.n.u3NextLevel;
3556
3557 /* Read the page table entity at the current level. */
3558 {
3559 Assert(uLevel > 0 && uLevel < RT_ELEMENTS(s_acIovaLevelShifts));
3560 Assert(uLevel <= IOMMU_MAX_HOST_PT_LEVEL);
3561 uint16_t const idxPte = (uIova >> s_acIovaLevelShifts[uLevel]) & UINT64_C(0x1ff);
3562 uint64_t const offPte = idxPte << 3;
3563 RTGCPHYS const GCPhysPtEntity = (PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK) + offPte;
3564 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysPtEntity, &PtEntity.u64, sizeof(PtEntity));
3565 if (RT_FAILURE(rc))
3566 {
3567 LogFunc(("Failed to read page table entry at %#RGp. rc=%Rrc -> PageTabHwError\n", GCPhysPtEntity, rc));
3568 EVT_PAGE_TAB_HW_ERR_T EvtPageTabHwErr;
3569 iommuAmdPageTabHwErrorEventInit(uDevId, pDte->n.u16DomainId, GCPhysPtEntity, enmOp, &EvtPageTabHwErr);
3570 iommuAmdPageTabHwErrorEventRaise(pDevIns, enmOp, &EvtPageTabHwErr);
3571 return VERR_IOMMU_IPE_2;
3572 }
3573 }
3574
3575 /* Check present bit. */
3576 if (PtEntity.n.u1Present)
3577 { /* likely */ }
3578 else
3579 {
3580 LogFunc(("Page table entry not present (uDevId=%#x) -> IOPF\n", uDevId));
3581 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3582 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */,
3583 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3584 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3585 kIoPageFaultType_PermDenied);
3586 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3587 }
3588
3589 /* Check permission bits. */
3590 uint8_t const fPtePerm = (PtEntity.u64 >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
3591 if ((fPerm & fPtePerm) == fPerm)
3592 { /* likely */ }
3593 else
3594 {
3595 LogFunc(("Page table entry access denied (uDevId=%#x fPerm=%#x fPtePerm=%#x) -> IOPF\n", uDevId, fPerm, fPtePerm));
3596 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3597 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3598 true /* fPermDenied */, enmOp, &EvtIoPageFault);
3599 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3600 kIoPageFaultType_PermDenied);
3601 return VERR_IOMMU_ADDR_ACCESS_DENIED;
3602 }
3603
3604 /* If this is a PTE, we're at the final level and we're done. */
3605 uint8_t const uNextLevel = PtEntity.n.u3NextLevel;
3606 if (uNextLevel == 0)
3607 {
3608 /* The page size of the translation is the default (4K). */
3609 pPageLookup->GCPhysSpa = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK;
3610 pPageLookup->cShift = X86_PAGE_4K_SHIFT;
3611 pPageLookup->fPerm = fPtePerm;
3612 return VINF_SUCCESS;
3613 }
3614 if (uNextLevel == 7)
3615 {
3616 /* The default page size of the translation is overridden. */
3617 RTGCPHYS const GCPhysPte = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK;
3618 uint8_t cShift = X86_PAGE_4K_SHIFT;
3619 while (GCPhysPte & RT_BIT_64(cShift++))
3620 ;
3621
3622 /* The page size must be larger than the default size and lower than the default size of the higher level. */
3623 Assert(uLevel < IOMMU_MAX_HOST_PT_LEVEL); /* PTE at level 6 handled outside the loop, uLevel should be <= 5. */
3624 if ( cShift > s_acIovaLevelShifts[uLevel]
3625 && cShift < s_acIovaLevelShifts[uLevel + 1])
3626 {
3627 pPageLookup->GCPhysSpa = GCPhysPte;
3628 pPageLookup->cShift = cShift;
3629 pPageLookup->fPerm = fPtePerm;
3630 return VINF_SUCCESS;
3631 }
3632
3633 LogFunc(("Page size invalid cShift=%#x -> IOPF\n", cShift));
3634 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3635 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3636 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3637 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3638 kIoPageFaultType_PteInvalidPageSize);
3639 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3640 }
3641
3642 /* Validate the next level encoding of the PDE. */
3643#if IOMMU_MAX_HOST_PT_LEVEL < 6
3644 if (uNextLevel <= IOMMU_MAX_HOST_PT_LEVEL)
3645 { /* likely */ }
3646 else
3647 {
3648 LogFunc(("Next level of PDE invalid uNextLevel=%#x -> IOPF\n", uNextLevel));
3649 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3650 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3651 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3652 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3653 kIoPageFaultType_PteInvalidLvlEncoding);
3654 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3655 }
3656#else
3657 Assert(uNextLevel <= IOMMU_MAX_HOST_PT_LEVEL);
3658#endif
3659
3660 /* Validate level transition. */
3661 if (uNextLevel < uLevel)
3662 { /* likely */ }
3663 else
3664 {
3665 LogFunc(("Next level (%#x) must be less than the current level (%#x) -> IOPF\n", uNextLevel, uLevel));
3666 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3667 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3668 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3669 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3670 kIoPageFaultType_PteInvalidLvlEncoding);
3671 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3672 }
3673
3674 /* Ensure IOVA bits of skipped levels are zero. */
3675 Assert(uLevel > 0);
3676 uint64_t uIovaSkipMask = 0;
3677 for (unsigned idxLevel = uLevel - 1; idxLevel > uNextLevel; idxLevel--)
3678 uIovaSkipMask |= s_auIovaLevelMasks[idxLevel];
3679 if (!(uIova & uIovaSkipMask))
3680 { /* likely */ }
3681 else
3682 {
3683 LogFunc(("IOVA of skipped levels are not zero %#RX64 (SkipMask=%#RX64) -> IOPF\n", uIova, uIovaSkipMask));
3684 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3685 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3686 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3687 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3688 kIoPageFaultType_SkippedLevelIovaNotZero);
3689 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3690 }
3691
3692 /* Continue with traversing the page directory at this level. */
3693 }
3694}
3695
3696
3697/**
3698 * Page lookup callback for finding an I/O page from guest memory.
3699 *
3700 * @returns VBox status code.
3701 * @retval VINF_SUCCESS when the page is found and has the right permissions.
3702 * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED when address translation fails.
3703 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are
3704 * insufficient to what is requested.
3705 *
3706 * @param pDevIns The IOMMU instance data.
3707 * @param uIovaPage The I/O virtual address to lookup in the cache (must be
3708 * 4K aligned).
3709 * @param fPerm The I/O permissions for this access, see
3710 * IOMMU_IO_PERM_XXX.
3711 * @param pAux The auxiliary information required during lookup.
3712 * @param pPageLookup Where to store the looked up I/O page.
3713 */
3714static DECLCALLBACK(int) iommuAmdDteLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
3715 PIOPAGELOOKUP pPageLookup)
3716{
3717 AssertPtr(pAux);
3718 AssertPtr(pPageLookup);
3719 Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
3720
3721 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3722 STAM_PROFILE_ADV_START(&pThis->StatProfDteLookup, a);
3723 int rc = iommuAmdIoPageTableWalk(pDevIns, uIovaPage, fPerm, pAux->uDeviceId, pAux->pDte, pAux->enmOp, pPageLookup);
3724 STAM_PROFILE_ADV_STOP(&pThis->StatProfDteLookup, a);
3725 return rc;
3726}
3727
3728
3729/**
3730 * Looks up a range of I/O virtual addresses.
3731 *
3732 * @returns VBox status code.
3733 * @param pDevIns The IOMMU instance data.
3734 * @param pfnIoPageLookup The lookup function to use.
3735 * @param pAddrIn The I/O address range to lookup.
3736 * @param pAux The auxiliary information required by the lookup
3737 * function.
3738 * @param pAddrOut Where to store the translated I/O address range.
3739 * @param pcbPages Where to store the size of the access (round up to
3740 * the page size). Optional, can be NULL.
3741 */
3742static int iommuAmdLookupIoAddrRange(PPDMDEVINS pDevIns, PFNIOPAGELOOKUP pfnIoPageLookup, PCIOADDRRANGE pAddrIn,
3743 PCIOMMUOPAUX pAux, PIOADDRRANGE pAddrOut, size_t *pcbPages)
3744{
3745 AssertPtr(pfnIoPageLookup);
3746 AssertPtr(pAddrIn);
3747 AssertPtr(pAddrOut);
3748
3749 int rc;
3750 size_t const cbAccess = pAddrIn->cb;
3751 uint8_t const fPerm = pAddrIn->fPerm;
3752 uint64_t const uIova = pAddrIn->uAddr;
3753 RTGCPHYS GCPhysSpa = NIL_RTGCPHYS;
3754 size_t cbRemaining = cbAccess;
3755 uint64_t uIovaPage = pAddrIn->uAddr & X86_PAGE_4K_BASE_MASK;
3756 uint64_t offIova = pAddrIn->uAddr & X86_PAGE_4K_OFFSET_MASK;
3757 uint64_t cbPages = 0;
3758
3759 IOPAGELOOKUP PageLookupPrev;
3760 RT_ZERO(PageLookupPrev);
3761 for (;;)
3762 {
3763 IOPAGELOOKUP PageLookup;
3764 rc = pfnIoPageLookup(pDevIns, uIovaPage, fPerm, pAux, &PageLookup);
3765 if (RT_SUCCESS(rc))
3766 {
3767 Assert(PageLookup.cShift >= X86_PAGE_4K_SHIFT);
3768
3769 /* Store the translated address before continuing to access more pages. */
3770 if (cbRemaining == cbAccess)
3771 {
3772 uint64_t const offMask = IOMMU_GET_PAGE_OFF_MASK(PageLookup.cShift);
3773 uint64_t const offSpa = uIova & offMask;
3774 Assert(!(PageLookup.GCPhysSpa & offMask));
3775 GCPhysSpa = PageLookup.GCPhysSpa | offSpa;
3776 }
3777 /* Check if addresses translated so far result in a physically contiguous region. */
3778 else if (!iommuAmdLookupIsAccessContig(&PageLookupPrev, &PageLookup))
3779 {
3780 rc = VERR_OUT_OF_RANGE;
3781 break;
3782 }
3783
3784 /* Store the page lookup result from the first/previous page. */
3785 PageLookupPrev = PageLookup;
3786
3787 /* Update size of all pages read thus far. */
3788 uint64_t const cbPage = RT_BIT_64(PageLookup.cShift);
3789 cbPages += cbPage;
3790
3791 /* Check if we need to access more pages. */
3792 if (cbRemaining > cbPage - offIova)
3793 {
3794 cbRemaining -= (cbPage - offIova); /* Calculate how much more we need to access. */
3795 uIovaPage += cbPage; /* Update address of the next access. */
3796 offIova = 0; /* After first page, all pages are accessed from off 0. */
3797 }
3798 else
3799 {
3800 cbRemaining = 0;
3801 break;
3802 }
3803 }
3804 else
3805 break;
3806 }
3807
3808 pAddrOut->uAddr = GCPhysSpa; /* Update the translated address. */
3809 pAddrOut->cb = cbAccess - cbRemaining; /* Update the size of the contiguous memory region. */
3810 pAddrOut->fPerm = PageLookupPrev.fPerm; /* Update the allowed permissions for this access. */
3811 if (pcbPages)
3812 *pcbPages = cbPages; /* Update the size of the pages accessed. */
3813 return rc;
3814}
3815
3816
3817/**
3818 * Looks up an I/O virtual address from the device table.
3819 *
3820 * @returns VBox status code.
3821 * @param pDevIns The IOMMU instance data.
3822 * @param uDevId The device ID (bus, device, function).
3823 * @param uIova The I/O virtual address to lookup.
3824 * @param cbAccess The size of the access.
3825 * @param fPerm The I/O permissions for this access, see
3826 * IOMMU_IO_PERM_XXX.
3827 * @param enmOp The IOMMU operation being performed.
3828 * @param pGCPhysSpa Where to store the translated system physical address.
3829 * @param pcbContiguous Where to store the number of contiguous bytes translated
3830 * and permission-checked.
3831 *
3832 * @thread Any.
3833 */
3834static int iommuAmdDteLookup(PPDMDEVINS pDevIns, uint16_t uDevId, uint64_t uIova, size_t cbAccess, uint8_t fPerm, IOMMUOP enmOp,
3835 PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
3836{
3837 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3838 RTGCPHYS GCPhysSpa = NIL_RTGCPHYS;
3839 size_t cbContiguous = 0;
3840
3841 /* Read the device table entry from memory. */
3842 DTE_T Dte;
3843 int rc = iommuAmdDteRead(pDevIns, uDevId, enmOp, &Dte);
3844 if (RT_SUCCESS(rc))
3845 {
3846 if (Dte.n.u1Valid)
3847 {
3848 /* Validate bits 127:0 of the device table entry when DTE.V is 1. */
3849 uint64_t const fRsvd0 = Dte.au64[0] & ~(IOMMU_DTE_QWORD_0_VALID_MASK & ~IOMMU_DTE_QWORD_0_FEAT_MASK);
3850 uint64_t const fRsvd1 = Dte.au64[1] & ~(IOMMU_DTE_QWORD_1_VALID_MASK & ~IOMMU_DTE_QWORD_1_FEAT_MASK);
3851 if (RT_LIKELY(!fRsvd0 && !fRsvd1))
3852 {
3853 /*
3854 * Check if the DTE is configured for translating addresses.
3855 * Note: Addresses cannot be subject to exclusion as we do -not- support remote IOTLBs,
3856 * so there's no need to check the address exclusion base/limit here.
3857 */
3858 rc = iommuAmdPreTranslateChecks(pDevIns, uDevId, uIova, fPerm, &Dte, enmOp);
3859 if (rc == VINF_SUCCESS)
3860 {
3861 IOADDRRANGE AddrIn;
3862 AddrIn.uAddr = uIova;
3863 AddrIn.cb = cbAccess;
3864 AddrIn.fPerm = fPerm;
3865
3866 IOMMUOPAUX Aux;
3867 Aux.enmOp = enmOp;
3868 Aux.pDte = &Dte;
3869 Aux.uDeviceId = uDevId;
3870 Aux.uDomainId = Dte.n.u16DomainId;
3871
3872 IOADDRRANGE AddrOut;
3873
3874 /* Lookup the address from the DTE and I/O page tables.*/
3875 size_t cbPages = 0;
3876 rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdDteLookupPage, &AddrIn, &Aux, &AddrOut, &cbPages);
3877 GCPhysSpa = AddrOut.uAddr;
3878 cbContiguous = AddrOut.cb;
3879
3880 /* If we stopped since translation resulted in non-contiguous physical addresses,
3881 what we translated so far is still valid. */
3882 if (rc == VERR_OUT_OF_RANGE)
3883 {
3884 Assert(cbContiguous > 0 && cbContiguous < cbAccess);
3885 rc = VINF_SUCCESS;
3886 STAM_COUNTER_INC(&pThis->StatAccessDteNonContig);
3887 }
3888
3889 if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
3890 STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied);
3891
3892#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
3893 if (RT_SUCCESS(rc))
3894 {
3895 /* Update that addresses requires translation (cumulative permissions of DTE and I/O page tables). */
3896 iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_ADDR_TRANSLATE);
3897 /* Update IOTLB for the contiguous range of I/O virtual addresses. */
3898 iommuAmdIotlbAddRange(pDevIns, Dte.n.u16DomainId, uIova & X86_PAGE_4K_BASE_MASK, cbPages,
3899 GCPhysSpa & X86_PAGE_4K_BASE_MASK, AddrOut.fPerm);
3900 }
3901#endif
3902 }
3903 else if (rc == VINF_IOMMU_ADDR_TRANSLATION_DISABLED)
3904 {
3905 /*
3906 * Translation is disabled for this device (root paging mode is 0).
3907 * GPA=SPA, but the permission bits are important and controls accesses.
3908 */
3909 GCPhysSpa = uIova;
3910 cbContiguous = cbAccess;
3911 rc = VINF_SUCCESS;
3912
3913#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
3914 /* Update that addresses permissions of DTE apply (but omit address translation). */
3915 iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_IO_PERM);
3916#endif
3917 }
3918 else
3919 {
3920 /* Address translation failed or access is denied. */
3921 Assert(rc == VERR_IOMMU_ADDR_ACCESS_DENIED || rc == VERR_IOMMU_ADDR_TRANSLATION_FAILED);
3922 GCPhysSpa = NIL_RTGCPHYS;
3923 cbContiguous = 0;
3924 STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied);
3925 }
3926 }
3927 else
3928 {
3929 /* Invalid reserved bits in the DTE, raise an error event. */
3930 LogFunc(("Invalid DTE reserved bits (u64[0]=%#RX64 u64[1]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1));
3931 EVT_ILLEGAL_DTE_T Event;
3932 iommuAmdIllegalDteEventInit(uDevId, uIova, true /* fRsvdNotZero */, enmOp, &Event);
3933 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero);
3934 rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3935 }
3936 }
3937 else
3938 {
3939 /*
3940 * The DTE is not valid, forward addresses untranslated.
3941 * See AMD IOMMU spec. "Table 5: Feature Enablement for Address Translation".
3942 */
3943 GCPhysSpa = uIova;
3944 cbContiguous = cbAccess;
3945
3946#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
3947 /* Update that addresses don't require translation (nor permission checks) but a DTE is present. */
3948 iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT);
3949#endif
3950 }
3951 }
3952 else
3953 {
3954 LogFunc(("Failed to read device table entry. uDevId=%#x rc=%Rrc\n", uDevId, rc));
3955 rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3956 }
3957
3958 *pGCPhysSpa = GCPhysSpa;
3959 *pcbContiguous = cbContiguous;
3960 AssertMsg(rc != VINF_SUCCESS || cbContiguous > 0, ("cbContiguous=%zu\n", cbContiguous));
3961 return rc;
3962}
3963
3964
3965#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
3966/**
3967 * I/O page lookup callback for finding an I/O page from the IOTLB.
3968 *
3969 * @returns VBox status code.
3970 * @retval VINF_SUCCESS when the page is found and has the right permissions.
3971 * @retval VERR_NOT_FOUND when the page is not found.
3972 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are
3973 * insufficient to what is requested.
3974 *
3975 * @param pDevIns The IOMMU instance data.
3976 * @param uIovaPage The I/O virtual address to lookup in the cache (must be
3977 * 4K aligned).
3978 * @param fPerm The I/O permissions for this access, see
3979 * IOMMU_IO_PERM_XXX.
3980 * @param pAux The auxiliary information required during lookup.
3981 * @param pPageLookup Where to store the looked up I/O page.
3982 */
3983static DECLCALLBACK(int) iommuAmdCacheLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
3984 PIOPAGELOOKUP pPageLookup)
3985{
3986 Assert(pAux);
3987 Assert(pPageLookup);
3988 Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
3989
3990 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3991
3992 STAM_PROFILE_ADV_START(&pThis->StatProfIotlbeLookup, a);
3993 PCIOTLBE pIotlbe = iommuAmdIotlbLookup(pThis, pAux->uDomainId, uIovaPage);
3994 STAM_PROFILE_ADV_STOP(&pThis->StatProfIotlbeLookup, a);
3995 if (pIotlbe)
3996 {
3997 *pPageLookup = pIotlbe->PageLookup;
3998 if ((pPageLookup->fPerm & fPerm) == fPerm)
3999 {
4000 STAM_COUNTER_INC(&pThis->StatAccessCacheHit);
4001 return VINF_SUCCESS;
4002 }
4003 return VERR_IOMMU_ADDR_ACCESS_DENIED;
4004 }
4005 return VERR_NOT_FOUND;
4006}
4007
4008
4009/**
4010 * Lookups a memory access from the IOMMU cache.
4011 *
4012 * @returns VBox status code.
4013 * @retval VINF_SUCCESS if the access was cached and permissions are verified.
4014 * @retval VERR_OUT_OF_RANGE if the access resulted in a non-contiguous physical
4015 * address region.
4016 * @retval VERR_NOT_FOUND if the access was not cached.
4017 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the access was cached but permissions
4018 * are insufficient.
4019 *
4020 * @param pDevIns The IOMMU instance data.
4021 * @param uDevId The device ID (bus, device, function).
4022 * @param uIova The I/O virtual address to lookup.
4023 * @param cbAccess The size of the access.
4024 * @param fPerm The I/O permissions for this access, see
4025 * IOMMU_IO_PERM_XXX.
4026 * @param enmOp The IOMMU operation being performed.
4027 * @param pGCPhysSpa Where to store the translated system physical address.
4028 * @param pcbContiguous Where to store the number of contiguous bytes translated
4029 * and permission-checked.
4030 */
4031static int iommuAmdCacheLookup(PPDMDEVINS pDevIns, uint16_t uDevId, uint64_t uIova, size_t cbAccess, uint8_t fPerm, IOMMUOP enmOp,
4032 PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
4033{
4034 int rc;
4035 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4036
4037 /*
4038 * We hold the cache lock across both the DTE and the IOTLB lookups (if any) because
4039 * we don't want the DTE cache to be invalidate while we perform IOTBL lookups.
4040 */
4041 IOMMU_LOCK_CACHE(pDevIns, pThis);
4042
4043 /* Lookup the DTE cache entry. */
4044 PCDTECACHE pDteCache = &pThis->paDteCache[uDevId];
4045 if ((pDteCache->fFlags & (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_ADDR_TRANSLATE))
4046 == (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_ADDR_TRANSLATE))
4047 {
4048 /* Lookup IOTLB entries. */
4049 IOADDRRANGE AddrIn;
4050 AddrIn.uAddr = uIova;
4051 AddrIn.cb = cbAccess;
4052 AddrIn.fPerm = fPerm;
4053
4054 IOMMUOPAUX Aux;
4055 Aux.enmOp = enmOp;
4056 Aux.pDte = NULL;
4057 Aux.uDeviceId = uDevId;
4058 Aux.uDomainId = pDteCache->uDomainId;
4059
4060 IOADDRRANGE AddrOut;
4061 rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdCacheLookupPage, &AddrIn, &Aux, &AddrOut, NULL /* pcbPages */);
4062 Assert(AddrOut.cb <= cbAccess);
4063 *pGCPhysSpa = AddrOut.uAddr;
4064 *pcbContiguous = AddrOut.cb;
4065 }
4066 else if ((pDteCache->fFlags & (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_IO_PERM))
4067 == (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_IO_PERM))
4068 {
4069 /* Address translation is disabled, but DTE permissions apply. */
4070 Assert(!(pDteCache->fFlags & IOMMU_DTECACHE_F_ADDR_TRANSLATE));
4071 uint8_t const fDtePerm = (pDteCache->fFlags >> IOMMU_DTECACHE_F_IO_PERM_SHIFT) & IOMMU_DTECACHE_F_IO_PERM_MASK;
4072 if ((fDtePerm & fPerm) == fPerm)
4073 {
4074 *pGCPhysSpa = uIova;
4075 *pcbContiguous = cbAccess;
4076 rc = VINF_SUCCESS;
4077 }
4078 else
4079 {
4080 *pGCPhysSpa = NIL_RTGCPHYS;
4081 *pcbContiguous = 0;
4082 rc = VERR_IOMMU_ADDR_ACCESS_DENIED;
4083 }
4084 }
4085 else if (pDteCache->fFlags & IOMMU_DTECACHE_F_PRESENT)
4086 {
4087 /* Forward addresses untranslated, without checking permissions. */
4088 *pGCPhysSpa = uIova;
4089 *pcbContiguous = cbAccess;
4090 rc = VINF_SUCCESS;
4091 }
4092 else
4093 {
4094 rc = VERR_NOT_FOUND;
4095 *pGCPhysSpa = NIL_RTGCPHYS;
4096 *pcbContiguous = 0;
4097 }
4098
4099 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
4100
4101 /* Raise event if address translation resulted in a permission failure. */
4102 if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
4103 {
4104 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4105 iommuAmdIoPageFaultEventInit(uDevId, pDteCache->uDomainId, uIova, true /* fPresent */,
4106 false /* fRsvdNotZero */, true /* fPermDenied */, enmOp, &EvtIoPageFault);
4107 iommuAmdIoPageFaultEventRaise(pDevIns, pDteCache->fFlags, NULL /* pIrte */, enmOp, &EvtIoPageFault,
4108 kIoPageFaultType_PermDenied);
4109 }
4110
4111 return rc;
4112}
4113#endif /* IN_RING3 && IOMMU_WITH_IOTLBE_CACHE */
4114
4115
4116/**
4117 * Gets the I/O permission and IOMMU operation type for the given access flags.
4118 *
4119 * @param fFlags The PDM IOMMU flags, PDMIOMMU_MEM_F_XXX.
4120 * @param penmOp Where to store the IOMMU operation.
4121 * @param pfPerm Where to store the IOMMU I/O permission.
4122 * @param pStatRead The stat counter to increment for a read operation.
4123 * @param pStatWrite The stat counter to increment for a write operation.
4124 */
4125DECLINLINE(void) iommuAmdMemAccessGetPermAndOp(uint32_t fFlags, PIOMMUOP penmOp, uint8_t *pfPerm, PSTAMCOUNTER pStatRead,
4126 PSTAMCOUNTER pStatWrite)
4127{
4128 if (fFlags & PDMIOMMU_MEM_F_WRITE)
4129 {
4130 *penmOp = IOMMUOP_MEM_WRITE;
4131 *pfPerm = IOMMU_IO_PERM_WRITE;
4132 STAM_COUNTER_INC(pStatWrite);
4133 }
4134 else
4135 {
4136 Assert(fFlags & PDMIOMMU_MEM_F_READ);
4137 *penmOp = IOMMUOP_MEM_READ;
4138 *pfPerm = IOMMU_IO_PERM_READ;
4139 STAM_COUNTER_INC(pStatRead);
4140 }
4141}
4142
4143
4144/**
4145 * Memory access transaction from a device.
4146 *
4147 * @returns VBox status code.
4148 * @param pDevIns The IOMMU device instance.
4149 * @param uDevId The device ID (bus, device, function).
4150 * @param uIova The I/O virtual address being accessed.
4151 * @param cbAccess The number of bytes being accessed.
4152 * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX.
4153 * @param pGCPhysSpa Where to store the translated system physical address.
4154 * @param pcbContiguous Where to store the number of contiguous bytes translated
4155 * and permission-checked.
4156 *
4157 * @thread Any.
4158 */
4159static DECLCALLBACK(int) iommuAmdMemAccess(PPDMDEVINS pDevIns, uint16_t uDevId, uint64_t uIova, size_t cbAccess, uint32_t fFlags,
4160 PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
4161{
4162 /* Validate. */
4163 AssertPtr(pDevIns);
4164 AssertPtr(pGCPhysSpa);
4165 Assert(cbAccess > 0);
4166 Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK));
4167
4168 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4169 IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
4170 if (Ctrl.n.u1IommuEn)
4171 {
4172 IOMMUOP enmOp;
4173 uint8_t fPerm;
4174 iommuAmdMemAccessGetPermAndOp(fFlags, &enmOp, &fPerm, &pThis->CTX_SUFF_Z(StatMemRead), &pThis->CTX_SUFF_Z(StatMemWrite));
4175 LogFlowFunc(("%s: uDevId=%#x uIova=%#RX64 cb=%zu\n", iommuAmdMemAccessGetPermName(fPerm), uDevId, uIova, cbAccess));
4176
4177 int rc;
4178#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
4179 /* Lookup the IOVA from the cache. */
4180 rc = iommuAmdCacheLookup(pDevIns, uDevId, uIova, cbAccess, fPerm, enmOp, pGCPhysSpa, pcbContiguous);
4181 if (rc == VINF_SUCCESS)
4182 {
4183 /* Entire access was cached and permissions were valid. */
4184 Assert(*pcbContiguous == cbAccess);
4185 Assert(*pGCPhysSpa != NIL_RTGCPHYS);
4186 STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull);
4187 return rc;
4188 }
4189 if (rc == VERR_OUT_OF_RANGE)
4190 {
4191 /* Access stopped when translations resulted in non-contiguous memory, let caller resume access. */
4192 Assert(*pcbContiguous > 0 && *pcbContiguous < cbAccess);
4193 STAM_COUNTER_INC(&pThis->StatAccessCacheNonContig);
4194 return VINF_SUCCESS;
4195 }
4196 if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
4197 {
4198 /* Access denied due to insufficient permissions. */
4199 STAM_COUNTER_INC(&pThis->StatAccessCachePermDenied);
4200 return rc;
4201 }
4202
4203 /* Access incomplete as not all pages were in the cache. Lookup the rest from the device table. */
4204 AssertMsg(rc == VERR_NOT_FOUND, ("Invalid cache lookup result: %Rrc\n", rc));
4205 AssertMsg(*pcbContiguous < cbAccess, ("Invalid size: cbContiguous=%zu cbAccess=%zu\n", *pcbContiguous, cbAccess));
4206 uIova += *pcbContiguous;
4207 cbAccess -= *pcbContiguous;
4208 STAM_COUNTER_INC(&pThis->StatAccessCacheMiss);
4209#endif
4210
4211 /* Lookup the IOVA from the device table. */
4212 rc = iommuAmdDteLookup(pDevIns, uDevId, uIova, cbAccess, fPerm, enmOp, pGCPhysSpa, pcbContiguous);
4213 if (RT_SUCCESS(rc))
4214 { /* likely */ }
4215 else
4216 {
4217 Assert(rc != VERR_OUT_OF_RANGE);
4218 LogFunc(("DTE lookup failed! uDevId=%#x uIova=%#RX64 fPerm=%u cbAccess=%zu rc=%#Rrc\n", uDevId, uIova, fPerm,
4219 cbAccess, rc));
4220 }
4221
4222 return rc;
4223 }
4224
4225 /* Addresses are forwarded without translation when the IOMMU is disabled. */
4226 *pGCPhysSpa = uIova;
4227 *pcbContiguous = cbAccess;
4228 return VINF_SUCCESS;
4229}
4230
4231
4232/**
4233 * Memory access bulk (one or more 4K pages) request from a device.
4234 *
4235 * @returns VBox status code.
4236 * @param pDevIns The IOMMU device instance.
4237 * @param uDevId The device ID (bus, device, function).
4238 * @param cIovas The number of addresses being accessed.
4239 * @param pauIovas The I/O virtual addresses for each page being accessed.
4240 * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX.
4241 * @param paGCPhysSpa Where to store the translated physical addresses.
4242 *
4243 * @thread Any.
4244 */
4245static DECLCALLBACK(int) iommuAmdMemBulkAccess(PPDMDEVINS pDevIns, uint16_t uDevId, size_t cIovas, uint64_t const *pauIovas,
4246 uint32_t fFlags, PRTGCPHYS paGCPhysSpa)
4247{
4248 /* Validate. */
4249 AssertPtr(pDevIns);
4250 Assert(cIovas > 0);
4251 AssertPtr(pauIovas);
4252 AssertPtr(paGCPhysSpa);
4253 Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK));
4254
4255 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4256 IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
4257 if (Ctrl.n.u1IommuEn)
4258 {
4259 IOMMUOP enmOp;
4260 uint8_t fPerm;
4261 iommuAmdMemAccessGetPermAndOp(fFlags, &enmOp, &fPerm, &pThis->CTX_SUFF_Z(StatMemBulkRead),
4262 &pThis->CTX_SUFF_Z(StatMemBulkWrite));
4263 LogFlowFunc(("%s: uDevId=%#x cIovas=%zu\n", iommuAmdMemAccessGetPermName(fPerm), uDevId, cIovas));
4264
4265 /** @todo IOMMU: IOTLB cache lookup. */
4266
4267 /* Lookup each IOVA from the device table. */
4268 for (size_t i = 0; i < cIovas; i++)
4269 {
4270 size_t cbContig;
4271 int rc = iommuAmdDteLookup(pDevIns, uDevId, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i], &cbContig);
4272 if (RT_SUCCESS(rc))
4273 { /* likely */ }
4274 else
4275 {
4276 LogFunc(("Failed! uDevId=%#x uIova=%#RX64 fPerm=%u rc=%Rrc\n", uDevId, pauIovas[i], fPerm, rc));
4277 return rc;
4278 }
4279 Assert(cbContig == X86_PAGE_SIZE);
4280 }
4281 }
4282 else
4283 {
4284 /* Addresses are forwarded without translation when the IOMMU is disabled. */
4285 for (size_t i = 0; i < cIovas; i++)
4286 paGCPhysSpa[i] = pauIovas[i];
4287 }
4288
4289 return VINF_SUCCESS;
4290}
4291
4292
4293/**
4294 * Reads an interrupt remapping table entry from guest memory given its DTE.
4295 *
4296 * @returns VBox status code.
4297 * @param pDevIns The IOMMU device instance.
4298 * @param uDevId The device ID (bus, device, function).
4299 * @param pDte The device table entry.
4300 * @param GCPhysIn The source MSI address (used for reporting errors).
4301 * @param uDataIn The source MSI data.
4302 * @param enmOp The IOMMU operation being performed.
4303 * @param pIrte Where to store the interrupt remapping table entry.
4304 *
4305 * @thread Any.
4306 */
4307static int iommuAmdIrteRead(PPDMDEVINS pDevIns, uint16_t uDevId, PCDTE_T pDte, RTGCPHYS GCPhysIn, uint32_t uDataIn,
4308 IOMMUOP enmOp, PIRTE_T pIrte)
4309{
4310 /* Ensure the IRTE length is valid. */
4311 Assert(pDte->n.u4IntrTableLength < IOMMU_DTE_INTR_TAB_LEN_MAX);
4312
4313 RTGCPHYS const GCPhysIntrTable = pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK;
4314 uint16_t const cbIntrTable = IOMMU_GET_INTR_TAB_LEN(pDte);
4315 uint16_t const offIrte = IOMMU_GET_IRTE_OFF(uDataIn);
4316 RTGCPHYS const GCPhysIrte = GCPhysIntrTable + offIrte;
4317
4318 /* Ensure the IRTE falls completely within the interrupt table. */
4319 if (offIrte + sizeof(IRTE_T) <= cbIntrTable)
4320 { /* likely */ }
4321 else
4322 {
4323 LogFunc(("IRTE exceeds table length (GCPhysIntrTable=%#RGp cbIntrTable=%u offIrte=%#x uDataIn=%#x) -> IOPF\n",
4324 GCPhysIntrTable, cbIntrTable, offIrte, uDataIn));
4325
4326 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4327 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, GCPhysIn, false /* fPresent */, false /* fRsvdNotZero */,
4328 false /* fPermDenied */, enmOp, &EvtIoPageFault);
4329 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
4330 kIoPageFaultType_IrteAddrInvalid);
4331 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4332 }
4333
4334 /* Read the IRTE from memory. */
4335 Assert(!(GCPhysIrte & 3));
4336 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysIrte, pIrte, sizeof(*pIrte));
4337 if (RT_SUCCESS(rc))
4338 return VINF_SUCCESS;
4339
4340 /** @todo The IOMMU spec. does not tell what kind of error is reported in this
4341 * situation. Is it an I/O page fault or a device table hardware error?
4342 * There's no interrupt table hardware error event, but it's unclear what
4343 * we should do here. */
4344 LogFunc(("Failed to read interrupt table entry at %#RGp. rc=%Rrc -> ???\n", GCPhysIrte, rc));
4345 return VERR_IOMMU_IPE_4;
4346}
4347
4348
4349/**
4350 * Remaps the interrupt using the interrupt remapping table.
4351 *
4352 * @returns VBox status code.
4353 * @param pDevIns The IOMMU instance data.
4354 * @param uDevId The device ID (bus, device, function).
4355 * @param pDte The device table entry.
4356 * @param enmOp The IOMMU operation being performed.
4357 * @param pMsiIn The source MSI.
4358 * @param pMsiOut Where to store the remapped MSI.
4359 *
4360 * @thread Any.
4361 */
4362static int iommuAmdIntrRemap(PPDMDEVINS pDevIns, uint16_t uDevId, PCDTE_T pDte, IOMMUOP enmOp, PCMSIMSG pMsiIn,
4363 PMSIMSG pMsiOut)
4364{
4365 Assert(pDte->n.u2IntrCtrl == IOMMU_INTR_CTRL_REMAP);
4366
4367 IRTE_T Irte;
4368 uint32_t const uMsiInData = pMsiIn->Data.u32;
4369 int rc = iommuAmdIrteRead(pDevIns, uDevId, pDte, pMsiIn->Addr.u64, uMsiInData, enmOp, &Irte);
4370 if (RT_SUCCESS(rc))
4371 {
4372 if (Irte.n.u1RemapEnable)
4373 {
4374 if (!Irte.n.u1GuestMode)
4375 {
4376 if (Irte.n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
4377 {
4378 iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, &Irte);
4379#ifdef IOMMU_WITH_IRTE_CACHE
4380 /* Add/Update the interrupt cache with the remapped results. */
4381 uint16_t const offIrte = IOMMU_GET_IRTE_OFF(uMsiInData);
4382 int const rcUpdate = iommuAmdIrteCacheAdd(pDevIns, uDevId, offIrte, &Irte);
4383 if (RT_FAILURE(rcUpdate))
4384 LogRelMax(1, ("%s: Warning! Interrupt cache full. Consider increasing cache capacity.\n", IOMMU_LOG_PFX));
4385#endif
4386 return VINF_SUCCESS;
4387 }
4388
4389 LogFunc(("Interrupt type (%#x) invalid -> IOPF\n", Irte.n.u3IntrType));
4390 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4391 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
4392 true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
4393 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault,
4394 kIoPageFaultType_IrteRsvdIntType);
4395 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4396 }
4397
4398 LogFunc(("Guest mode not supported -> IOPF\n"));
4399 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4400 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
4401 true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
4402 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRsvdNotZero);
4403 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4404 }
4405
4406 LogFunc(("Remapping disabled -> IOPF\n"));
4407 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4408 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
4409 false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
4410 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRemapEn);
4411 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4412 }
4413
4414 return rc;
4415}
4416
4417
4418/**
4419 * Looks up an MSI interrupt from the interrupt remapping table.
4420 *
4421 * @returns VBox status code.
4422 * @param pDevIns The IOMMU instance data.
4423 * @param uDevId The device ID (bus, device, function).
4424 * @param enmOp The IOMMU operation being performed.
4425 * @param pMsiIn The source MSI.
4426 * @param pMsiOut Where to store the remapped MSI.
4427 *
4428 * @thread Any.
4429 */
4430static int iommuAmdIntrTableLookup(PPDMDEVINS pDevIns, uint16_t uDevId, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
4431{
4432 /* Read the device table entry from memory. */
4433 LogFlowFunc(("uDevId=%#x (%#x:%#x:%#x) enmOp=%u\n", uDevId,
4434 ((uDevId >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK),
4435 ((uDevId >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK), (uDevId & VBOX_PCI_DEVFN_FUN_MASK), enmOp));
4436
4437 DTE_T Dte;
4438 int rc = iommuAmdDteRead(pDevIns, uDevId, enmOp, &Dte);
4439 if (RT_SUCCESS(rc))
4440 {
4441 /* If the DTE is not valid, all interrupts are forwarded without remapping. */
4442 if (Dte.n.u1IntrMapValid)
4443 {
4444 /* Validate bits 255:128 of the device table entry when DTE.IV is 1. */
4445 uint64_t const fRsvd0 = Dte.au64[2] & ~IOMMU_DTE_QWORD_2_VALID_MASK;
4446 uint64_t const fRsvd1 = Dte.au64[3] & ~IOMMU_DTE_QWORD_3_VALID_MASK;
4447 if (RT_LIKELY( !fRsvd0
4448 && !fRsvd1))
4449 { /* likely */ }
4450 else
4451 {
4452 LogFunc(("Invalid reserved bits in DTE (u64[2]=%#RX64 u64[3]=%#RX64) -> Illegal DTE\n", fRsvd0,
4453 fRsvd1));
4454 EVT_ILLEGAL_DTE_T Event;
4455 iommuAmdIllegalDteEventInit(uDevId, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event);
4456 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero);
4457 return VERR_IOMMU_INTR_REMAP_FAILED;
4458 }
4459
4460#ifdef IOMMU_WITH_IRTE_CACHE
4461 /* Update the DTE cache -after- we've checked reserved bits (above) when the interrupt map is valid. */
4462 iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT);
4463#endif
4464
4465 /*
4466 * LINT0/LINT1 pins cannot be driven by PCI(e) devices. Perhaps for a Southbridge
4467 * that's connected through HyperTransport it might be possible; but for us, it
4468 * doesn't seem we need to specially handle these pins.
4469 */
4470
4471 /*
4472 * Validate the MSI source address.
4473 *
4474 * 64-bit MSIs are supported by the PCI and AMD IOMMU spec. However as far as the
4475 * CPU is concerned, the MSI region is fixed and we must ensure no other device
4476 * claims the region as I/O space.
4477 *
4478 * See PCI spec. 6.1.4. "Message Signaled Interrupt (MSI) Support".
4479 * See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support".
4480 * See Intel spec. 10.11.1 "Message Address Register Format".
4481 */
4482 if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE)
4483 {
4484 /*
4485 * The IOMMU remaps fixed and arbitrated interrupts using the IRTE.
4486 * See AMD IOMMU spec. "2.2.5.1 Interrupt Remapping Tables, Guest Virtual APIC Not Enabled".
4487 */
4488 uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
4489 bool fPassThru = false;
4490 switch (u8DeliveryMode)
4491 {
4492 case VBOX_MSI_DELIVERY_MODE_FIXED:
4493 case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO:
4494 {
4495 uint8_t const uIntrCtrl = Dte.n.u2IntrCtrl;
4496 if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
4497 {
4498 /* Validate the encoded interrupt table length when IntCtl specifies remapping. */
4499 uint8_t const uIntrTabLen = Dte.n.u4IntrTableLength;
4500 if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX)
4501 {
4502 /*
4503 * We don't support guest interrupt remapping yet. When we do, we'll need to
4504 * check Ctrl.u1GstVirtApicEn and use the guest Virtual APIC Table Root Pointer
4505 * in the DTE rather than the Interrupt Root Table Pointer. Since the caller
4506 * already reads the control register, add that as a parameter when we eventually
4507 * support guest interrupt remapping. For now, just assert.
4508 */
4509 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4510 Assert(!pThis->ExtFeat.n.u1GstVirtApicSup);
4511 NOREF(pThis);
4512
4513 return iommuAmdIntrRemap(pDevIns, uDevId, &Dte, enmOp, pMsiIn, pMsiOut);
4514 }
4515
4516 LogFunc(("Invalid interrupt table length %#x -> Illegal DTE\n", uIntrTabLen));
4517 EVT_ILLEGAL_DTE_T Event;
4518 iommuAmdIllegalDteEventInit(uDevId, pMsiIn->Addr.u64, false /* fRsvdNotZero */, enmOp, &Event);
4519 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntTabLen);
4520 return VERR_IOMMU_INTR_REMAP_FAILED;
4521 }
4522
4523 if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
4524 {
4525 fPassThru = true;
4526 break;
4527 }
4528
4529 if (uIntrCtrl == IOMMU_INTR_CTRL_TARGET_ABORT)
4530 {
4531 LogFunc(("IntCtl=0: Remapping disallowed for fixed/arbitrated interrupt (%#x) -> Target abort\n",
4532 pMsiIn->Data.n.u8Vector));
4533 iommuAmdSetPciTargetAbort(pDevIns);
4534 return VERR_IOMMU_INTR_REMAP_DENIED;
4535 }
4536
4537 Assert(uIntrCtrl == IOMMU_INTR_CTRL_RSVD); /* Paranoia. */
4538 LogFunc(("IntCtl mode invalid %#x -> Illegal DTE\n", uIntrCtrl));
4539 EVT_ILLEGAL_DTE_T Event;
4540 iommuAmdIllegalDteEventInit(uDevId, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event);
4541 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntCtl);
4542 return VERR_IOMMU_INTR_REMAP_FAILED;
4543 }
4544
4545 /* SMIs are passed through unmapped. We don't implement SMI filters. */
4546 case VBOX_MSI_DELIVERY_MODE_SMI: fPassThru = true; break;
4547 case VBOX_MSI_DELIVERY_MODE_NMI: fPassThru = Dte.n.u1NmiPassthru; break;
4548 case VBOX_MSI_DELIVERY_MODE_INIT: fPassThru = Dte.n.u1InitPassthru; break;
4549 case VBOX_MSI_DELIVERY_MODE_EXT_INT: fPassThru = Dte.n.u1ExtIntPassthru; break;
4550 default:
4551 {
4552 LogFunc(("MSI data delivery mode invalid %#x -> Target abort\n", u8DeliveryMode));
4553 iommuAmdSetPciTargetAbort(pDevIns);
4554 return VERR_IOMMU_INTR_REMAP_FAILED;
4555 }
4556 }
4557
4558 /*
4559 * For those other than fixed and arbitrated interrupts, destination mode must be 0 (physical).
4560 * See AMD IOMMU spec. The note below Table 19: "IOMMU Controls and Actions for Upstream Interrupts".
4561 */
4562 if ( u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO
4563 || !pMsiIn->Addr.n.u1DestMode)
4564 {
4565 if (fPassThru)
4566 {
4567 *pMsiOut = *pMsiIn;
4568 return VINF_SUCCESS;
4569 }
4570 LogFunc(("Remapping/passthru disallowed for interrupt %#x -> Target abort\n", pMsiIn->Data.n.u8Vector));
4571 }
4572 else
4573 LogFunc(("Logical destination mode invalid for delivery mode %#x\n -> Target abort\n", u8DeliveryMode));
4574
4575 iommuAmdSetPciTargetAbort(pDevIns);
4576 return VERR_IOMMU_INTR_REMAP_DENIED;
4577 }
4578 else
4579 {
4580 /** @todo should be cause a PCI target abort here? */
4581 LogFunc(("MSI address region invalid %#RX64\n", pMsiIn->Addr.u64));
4582 return VERR_IOMMU_INTR_REMAP_FAILED;
4583 }
4584 }
4585 else
4586 {
4587#ifdef IOMMU_WITH_IRTE_CACHE
4588 /* Update the DTE cache that the interrupt map isn't valid. */
4589 iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT);
4590#endif
4591 LogFlowFunc(("DTE interrupt map not valid\n"));
4592 *pMsiOut = *pMsiIn;
4593 return VINF_SUCCESS;
4594 }
4595 }
4596
4597 LogFunc(("Failed to read device table entry. uDevId=%#x rc=%Rrc\n", uDevId, rc));
4598 return VERR_IOMMU_INTR_REMAP_FAILED;
4599}
4600
4601
4602/**
4603 * Interrupt remap request from a device.
4604 *
4605 * @returns VBox status code.
4606 * @param pDevIns The IOMMU device instance.
4607 * @param uDevId The device ID (bus, device, function).
4608 * @param pMsiIn The source MSI.
4609 * @param pMsiOut Where to store the remapped MSI.
4610 */
4611static DECLCALLBACK(int) iommuAmdMsiRemap(PPDMDEVINS pDevIns, uint16_t uDevId, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
4612{
4613 /* Validate. */
4614 Assert(pDevIns);
4615 Assert(pMsiIn);
4616 Assert(pMsiOut);
4617
4618 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4619
4620 /* Interrupts are forwarded with remapping when the IOMMU is disabled. */
4621 IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
4622 if (Ctrl.n.u1IommuEn)
4623 {
4624 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMsiRemap));
4625
4626 int rc;
4627#ifdef IOMMU_WITH_IRTE_CACHE
4628 STAM_PROFILE_ADV_START(&pThis->StatProfIrteCacheLookup, a);
4629 rc = iommuAmdIrteCacheLookup(pDevIns, uDevId, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
4630 STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteCacheLookup, a);
4631 if (RT_SUCCESS(rc))
4632 {
4633 STAM_COUNTER_INC(&pThis->StatIntrCacheHit);
4634 return VINF_SUCCESS;
4635 }
4636 STAM_COUNTER_INC(&pThis->StatIntrCacheMiss);
4637#endif
4638
4639 STAM_PROFILE_ADV_START(&pThis->StatProfIrteLookup, a);
4640 rc = iommuAmdIntrTableLookup(pDevIns, uDevId, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
4641 STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteLookup, a);
4642 return rc;
4643 }
4644
4645 *pMsiOut = *pMsiIn;
4646 return VINF_SUCCESS;
4647}
4648
4649
4650/**
4651 * @callback_method_impl{FNIOMMMIONEWWRITE}
4652 */
4653static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioWrite(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb)
4654{
4655 NOREF(pvUser);
4656 Assert(cb == 4 || cb == 8);
4657 Assert(!(off & (cb - 1)));
4658
4659 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4660 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioWrite)); NOREF(pThis);
4661
4662 uint64_t const uValue = cb == 8 ? *(uint64_t const *)pv : *(uint32_t const *)pv;
4663 return iommuAmdRegisterWrite(pDevIns, off, cb, uValue);
4664}
4665
4666
4667/**
4668 * @callback_method_impl{FNIOMMMIONEWREAD}
4669 */
4670static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioRead(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb)
4671{
4672 NOREF(pvUser);
4673 Assert(cb == 4 || cb == 8);
4674 Assert(!(off & (cb - 1)));
4675
4676 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4677 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioRead)); NOREF(pThis);
4678
4679 uint64_t uResult;
4680 VBOXSTRICTRC rcStrict = iommuAmdRegisterRead(pDevIns, off, &uResult);
4681 if (cb == 8)
4682 *(uint64_t *)pv = uResult;
4683 else
4684 *(uint32_t *)pv = (uint32_t)uResult;
4685
4686 return rcStrict;
4687}
4688
4689
4690#ifdef IN_RING3
4691/**
4692 * Processes an IOMMU command.
4693 *
4694 * @returns VBox status code.
4695 * @param pDevIns The IOMMU device instance.
4696 * @param pCmd The command to process.
4697 * @param GCPhysCmd The system physical address of the command.
4698 * @param pEvtError Where to store the error event in case of failures.
4699 *
4700 * @thread Command thread.
4701 */
4702static int iommuAmdR3CmdProcess(PPDMDEVINS pDevIns, PCCMD_GENERIC_T pCmd, RTGCPHYS GCPhysCmd, PEVT_GENERIC_T pEvtError)
4703{
4704 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4705 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
4706
4707 STAM_COUNTER_INC(&pThis->StatCmd);
4708
4709 uint8_t const bCmd = pCmd->n.u4Opcode;
4710 switch (bCmd)
4711 {
4712 case IOMMU_CMD_COMPLETION_WAIT:
4713 {
4714 STAM_COUNTER_INC(&pThis->StatCmdCompWait);
4715
4716 PCCMD_COMWAIT_T pCmdComWait = (PCCMD_COMWAIT_T)pCmd;
4717 AssertCompile(sizeof(*pCmdComWait) == sizeof(*pCmd));
4718
4719 /* Validate reserved bits in the command. */
4720 if (!(pCmdComWait->au64[0] & ~IOMMU_CMD_COM_WAIT_QWORD_0_VALID_MASK))
4721 {
4722 /* If Completion Store is requested, write the StoreData to the specified address. */
4723 if (pCmdComWait->n.u1Store)
4724 {
4725 RTGCPHYS const GCPhysStore = RT_MAKE_U64(pCmdComWait->n.u29StoreAddrLo << 3, pCmdComWait->n.u20StoreAddrHi);
4726 uint64_t const u64Data = pCmdComWait->n.u64StoreData;
4727 int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysStore, &u64Data, sizeof(u64Data));
4728 if (RT_FAILURE(rc))
4729 {
4730 LogFunc(("Cmd(%#x): Failed to write StoreData (%#RX64) to %#RGp, rc=%Rrc\n", bCmd, u64Data,
4731 GCPhysStore, rc));
4732 iommuAmdCmdHwErrorEventInit(GCPhysStore, (PEVT_CMD_HW_ERR_T)pEvtError);
4733 return VERR_IOMMU_CMD_HW_ERROR;
4734 }
4735 }
4736
4737 /* If the command requests an interrupt and completion wait interrupts are enabled, raise it. */
4738 if (pCmdComWait->n.u1Interrupt)
4739 {
4740 IOMMU_LOCK(pDevIns, pThisCC);
4741 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_COMPLETION_WAIT_INTR);
4742 bool const fRaiseInt = pThis->Ctrl.n.u1CompWaitIntrEn;
4743 IOMMU_UNLOCK(pDevIns, pThisCC);
4744
4745 if (fRaiseInt)
4746 iommuAmdMsiInterruptRaise(pDevIns);
4747 }
4748 return VINF_SUCCESS;
4749 }
4750 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4751 return VERR_IOMMU_CMD_INVALID_FORMAT;
4752 }
4753
4754 case IOMMU_CMD_INV_DEV_TAB_ENTRY:
4755 {
4756 STAM_COUNTER_INC(&pThis->StatCmdInvDte);
4757#ifdef IOMMU_WITH_IOTLBE_CACHE
4758 PCCMD_INV_DTE_T pCmdInvDte = (PCCMD_INV_DTE_T)pCmd;
4759 AssertCompile(sizeof(*pCmdInvDte) == sizeof(*pCmd));
4760
4761 /* Validate reserved bits in the command. */
4762 if ( !(pCmdInvDte->au64[0] & ~IOMMU_CMD_INV_DTE_QWORD_0_VALID_MASK)
4763 && !(pCmdInvDte->au64[1] & ~IOMMU_CMD_INV_DTE_QWORD_1_VALID_MASK))
4764 {
4765 iommuAmdDteCacheUpdate(pDevIns, pCmdInvDte->n.u16DevId, NULL /* pDte */, 0 /* fFlags */);
4766 return VINF_SUCCESS;
4767 }
4768 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4769 return VERR_IOMMU_CMD_INVALID_FORMAT;
4770#else
4771 return VINF_SUCCESS;
4772#endif
4773 }
4774
4775 case IOMMU_CMD_INV_IOMMU_PAGES:
4776 {
4777 STAM_COUNTER_INC(&pThis->StatCmdInvIommuPages);
4778#ifdef IOMMU_WITH_IOTLBE_CACHE
4779 PCCMD_INV_IOMMU_PAGES_T pCmdInvPages = (PCCMD_INV_IOMMU_PAGES_T)pCmd;
4780 AssertCompile(sizeof(*pCmdInvPages) == sizeof(*pCmd));
4781
4782 /* Validate reserved bits in the command. */
4783 if ( !(pCmdInvPages->au64[0] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_0_VALID_MASK)
4784 && !(pCmdInvPages->au64[1] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_1_VALID_MASK))
4785 {
4786 uint64_t const uIova = RT_MAKE_U64(pCmdInvPages->n.u20AddrLo << X86_PAGE_4K_SHIFT, pCmdInvPages->n.u32AddrHi);
4787 uint16_t const uDomainId = pCmdInvPages->n.u16DomainId;
4788 bool const fFlushPde = pCmdInvPages->n.u1PageDirEntries;
4789 uint8_t cShift;
4790 if (!pCmdInvPages->n.u1Size)
4791 cShift = X86_PAGE_4K_SHIFT;
4792 else
4793 {
4794 /* Find the first clear bit starting from bit 12 to 64 of the I/O virtual address. */
4795 unsigned const uFirstZeroBit = ASMBitLastSetU64(~(uIova >> X86_PAGE_4K_SHIFT));
4796 cShift = X86_PAGE_4K_SHIFT + uFirstZeroBit;
4797
4798 /*
4799 * For the address 0x7ffffffffffff000, cShift would be 76 (12+64) and the code below
4800 * would do the right thing by clearing the entire cache for the specified domain ID.
4801 *
4802 * However, for the address 0xfffffffffffff000, cShift would be computed as 12.
4803 * IOMMU behavior is undefined in this case, so it's safe to invalidate just one page.
4804 * A debug-time assert is in place here to let us know if any software tries this.
4805 *
4806 * See AMD IOMMU spec. 2.4.3 "INVALIDATE_IOMMU_PAGES".
4807 * See AMD IOMMU spec. Table 14: "Example Page Size Encodings".
4808 */
4809 Assert(uIova != UINT64_C(0xfffffffffffff000));
4810 }
4811
4812 /*
4813 * Validate invalidation size.
4814 * See AMD IOMMU spec. 2.2.3 "I/O Page Tables for Host Translations".
4815 */
4816 if ( cShift == 12 /* 4K */ || cShift == 13 /* 8K */
4817 || cShift == 14 /* 16K */ || cShift == 20 /* 1M */
4818 || cShift == 22 /* 4M */ || cShift == 32 /* 4G */)
4819 {
4820 /* Remove the range of I/O virtual addresses requesting to be invalidated. */
4821 size_t const cbAccess = RT_BIT_64(cShift);
4822 iommuAmdIotlbRemoveRange(pDevIns, uDomainId, uIova, cbAccess);
4823 }
4824 else
4825 {
4826 /*
4827 * The guest provided size is invalid or exceeds the largest, meaningful page size.
4828 * In such situations we must remove all ranges for the specified domain ID.
4829 */
4830 iommuAmdIotlbRemoveDomainId(pDevIns, uDomainId);
4831 }
4832
4833 return VINF_SUCCESS;
4834 }
4835 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4836 return VERR_IOMMU_CMD_INVALID_FORMAT;
4837#else
4838 return VINF_SUCCESS;
4839#endif
4840 }
4841
4842 case IOMMU_CMD_INV_IOTLB_PAGES:
4843 {
4844 STAM_COUNTER_INC(&pThis->StatCmdInvIotlbPages);
4845
4846 uint32_t const uCapHdr = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_CAP_HDR);
4847 if (RT_BF_GET(uCapHdr, IOMMU_BF_CAPHDR_IOTLB_SUP))
4848 {
4849 /** @todo IOMMU: Implement remote IOTLB invalidation. */
4850 return VERR_NOT_IMPLEMENTED;
4851 }
4852 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4853 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4854 }
4855
4856 case IOMMU_CMD_INV_INTR_TABLE:
4857 {
4858 STAM_COUNTER_INC(&pThis->StatCmdInvIntrTable);
4859
4860 PCCMD_INV_INTR_TABLE_T pCmdInvIntrTable = (PCCMD_INV_INTR_TABLE_T)pCmd;
4861 AssertCompile(sizeof(*pCmdInvIntrTable) == sizeof(*pCmd));
4862
4863 /* Validate reserved bits in the command. */
4864 if ( !(pCmdInvIntrTable->au64[0] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_0_VALID_MASK)
4865 && !(pCmdInvIntrTable->au64[1] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_1_VALID_MASK))
4866 {
4867#ifdef IOMMU_WITH_IRTE_CACHE
4868 iommuAmdIrteCacheRemove(pDevIns, pCmdInvIntrTable->u.u16DevId);
4869#endif
4870 return VINF_SUCCESS;
4871 }
4872 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4873 return VERR_IOMMU_CMD_INVALID_FORMAT;
4874 }
4875
4876 case IOMMU_CMD_PREFETCH_IOMMU_PAGES:
4877 {
4878 /* Linux doesn't use prefetching of IOMMU pages, so we don't bother for now. */
4879 STAM_COUNTER_INC(&pThis->StatCmdPrefIommuPages);
4880 Assert(!pThis->ExtFeat.n.u1PrefetchSup);
4881 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4882 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4883 }
4884
4885 case IOMMU_CMD_COMPLETE_PPR_REQ:
4886 {
4887 STAM_COUNTER_INC(&pThis->StatCmdCompletePprReq);
4888
4889 /* We don't support PPR requests yet. */
4890 Assert(!pThis->ExtFeat.n.u1PprSup);
4891 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4892 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4893 }
4894
4895 case IOMMU_CMD_INV_IOMMU_ALL:
4896 {
4897 STAM_COUNTER_INC(&pThis->StatCmdInvIommuAll);
4898 if (pThis->ExtFeat.n.u1InvAllSup)
4899 {
4900#ifdef IOMMU_WITH_IOTLBE_CACHE
4901 PCCMD_INV_IOMMU_ALL_T pCmdInvAll = (PCCMD_INV_IOMMU_ALL_T)pCmd;
4902 AssertCompile(sizeof(*pCmdInvAll) == sizeof(*pCmd));
4903
4904 /* Validate reserved bits in the command. */
4905 if ( !(pCmdInvAll->au64[0] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_0_VALID_MASK)
4906 && !(pCmdInvAll->au64[1] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_1_VALID_MASK))
4907 {
4908 iommuAmdDteCacheRemoveAll(pDevIns);
4909 iommuAmdIotlbRemoveAll(pDevIns);
4910 return VINF_SUCCESS;
4911 }
4912 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4913 return VERR_IOMMU_CMD_INVALID_FORMAT;
4914#else
4915 return VINF_SUCCESS;
4916#endif
4917 }
4918 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4919 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4920 }
4921 }
4922
4923 STAM_COUNTER_DEC(&pThis->StatCmd);
4924 LogFunc(("Cmd(%#x): Unrecognized\n", bCmd));
4925 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4926 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4927}
4928
4929
4930/**
4931 * The IOMMU command thread.
4932 *
4933 * @returns VBox status code.
4934 * @param pDevIns The IOMMU device instance.
4935 * @param pThread The command thread.
4936 */
4937static DECLCALLBACK(int) iommuAmdR3CmdThread(PPDMDEVINS pDevIns, PPDMTHREAD pThread)
4938{
4939 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4940 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
4941
4942 if (pThread->enmState == PDMTHREADSTATE_INITIALIZING)
4943 return VINF_SUCCESS;
4944
4945 /*
4946 * Pre-allocate the maximum command buffer size supported by the IOMMU.
4947 * This avoid trashing the heap as well as not wasting time allocating
4948 * and freeing buffers while processing commands.
4949 */
4950 size_t const cbMaxCmdBuf = sizeof(CMD_GENERIC_T) * iommuAmdGetBufMaxEntries(15);
4951 void *pvCmds = RTMemAllocZ(cbMaxCmdBuf);
4952 AssertPtrReturn(pvCmds, VERR_NO_MEMORY);
4953
4954 while (pThread->enmState == PDMTHREADSTATE_RUNNING)
4955 {
4956 /*
4957 * Sleep perpetually until we are woken up to process commands.
4958 */
4959 {
4960 ASMAtomicWriteBool(&pThis->fCmdThreadSleeping, true);
4961 bool fSignaled = ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, false);
4962 if (!fSignaled)
4963 {
4964 Assert(ASMAtomicReadBool(&pThis->fCmdThreadSleeping));
4965 int rc = PDMDevHlpSUPSemEventWaitNoResume(pDevIns, pThis->hEvtCmdThread, RT_INDEFINITE_WAIT);
4966 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_INTERRUPTED, ("%Rrc\n", rc), rc);
4967 if (RT_UNLIKELY(pThread->enmState != PDMTHREADSTATE_RUNNING))
4968 break;
4969 Log4Func(("Woken up with rc=%Rrc\n", rc));
4970 ASMAtomicWriteBool(&pThis->fCmdThreadSignaled, false);
4971 }
4972 ASMAtomicWriteBool(&pThis->fCmdThreadSleeping, false);
4973 }
4974
4975 /*
4976 * Fetch and process IOMMU commands.
4977 */
4978 /** @todo r=ramshankar: We currently copy all commands from guest memory into a
4979 * temporary host buffer before processing them as a batch. If we want to
4980 * save on host memory a bit, we could (once PGM has the necessary APIs)
4981 * lock the page mappings page mappings and access them directly. */
4982 IOMMU_LOCK(pDevIns, pThisCC);
4983
4984 if (pThis->Status.n.u1CmdBufRunning)
4985 {
4986 /* Get the offsets we need to read commands from memory (circular buffer offset). */
4987 uint32_t const cbCmdBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
4988 uint32_t const offTail = pThis->CmdBufTailPtr.n.off;
4989 uint32_t offHead = pThis->CmdBufHeadPtr.n.off;
4990
4991 /* Validate. */
4992 Assert(!(offHead & ~IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK));
4993 Assert(offHead < cbCmdBuf);
4994 Assert(cbCmdBuf <= cbMaxCmdBuf);
4995
4996 if (offHead != offTail)
4997 {
4998 /* Read the entire command buffer from memory (avoids multiple PGM calls). */
4999 RTGCPHYS const GCPhysCmdBufBase = pThis->CmdBufBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT;
5000
5001 IOMMU_UNLOCK(pDevIns, pThisCC);
5002 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysCmdBufBase, pvCmds, cbCmdBuf);
5003 IOMMU_LOCK(pDevIns, pThisCC);
5004
5005 if (RT_SUCCESS(rc))
5006 {
5007 /* Indicate to software we've fetched all commands from the buffer. */
5008 pThis->CmdBufHeadPtr.n.off = offTail;
5009
5010 /* Allow IOMMU to do other work while we process commands. */
5011 IOMMU_UNLOCK(pDevIns, pThisCC);
5012
5013 /* Process the fetched commands. */
5014 EVT_GENERIC_T EvtError;
5015 do
5016 {
5017 PCCMD_GENERIC_T pCmd = (PCCMD_GENERIC_T)((uintptr_t)pvCmds + offHead);
5018 rc = iommuAmdR3CmdProcess(pDevIns, pCmd, GCPhysCmdBufBase + offHead, &EvtError);
5019 if (RT_FAILURE(rc))
5020 {
5021 if ( rc == VERR_IOMMU_CMD_NOT_SUPPORTED
5022 || rc == VERR_IOMMU_CMD_INVALID_FORMAT)
5023 {
5024 Assert(EvtError.n.u4EvtCode == IOMMU_EVT_ILLEGAL_CMD_ERROR);
5025 iommuAmdIllegalCmdEventRaise(pDevIns, (PCEVT_ILLEGAL_CMD_ERR_T)&EvtError);
5026 }
5027 else if (rc == VERR_IOMMU_CMD_HW_ERROR)
5028 {
5029 Assert(EvtError.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR);
5030 LogFunc(("Raising command hardware error. Cmd=%#x -> COMMAND_HW_ERROR\n", pCmd->n.u4Opcode));
5031 iommuAmdCmdHwErrorEventRaise(pDevIns, (PCEVT_CMD_HW_ERR_T)&EvtError);
5032 }
5033 break;
5034 }
5035
5036 /* Move to the next command in the circular buffer. */
5037 offHead = (offHead + sizeof(CMD_GENERIC_T)) % cbCmdBuf;
5038 } while (offHead != offTail);
5039 }
5040 else
5041 {
5042 LogFunc(("Failed to read command at %#RGp. rc=%Rrc -> COMMAND_HW_ERROR\n", GCPhysCmdBufBase, rc));
5043 EVT_CMD_HW_ERR_T EvtCmdHwErr;
5044 iommuAmdCmdHwErrorEventInit(GCPhysCmdBufBase, &EvtCmdHwErr);
5045 iommuAmdCmdHwErrorEventRaise(pDevIns, &EvtCmdHwErr);
5046
5047 IOMMU_UNLOCK(pDevIns, pThisCC);
5048 }
5049 }
5050 else
5051 IOMMU_UNLOCK(pDevIns, pThisCC);
5052 }
5053 else
5054 IOMMU_UNLOCK(pDevIns, pThisCC);
5055 }
5056
5057 RTMemFree(pvCmds);
5058 LogFlowFunc(("Command thread terminating\n"));
5059 return VINF_SUCCESS;
5060}
5061
5062
5063/**
5064 * Wakes up the command thread so it can respond to a state change.
5065 *
5066 * @returns VBox status code.
5067 * @param pDevIns The IOMMU device instance.
5068 * @param pThread The command thread.
5069 */
5070static DECLCALLBACK(int) iommuAmdR3CmdThreadWakeUp(PPDMDEVINS pDevIns, PPDMTHREAD pThread)
5071{
5072 RT_NOREF(pThread);
5073 LogFlowFunc(("\n"));
5074 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5075 return PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread);
5076}
5077
5078
5079/**
5080 * @callback_method_impl{FNPCICONFIGREAD}
5081 */
5082static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress,
5083 unsigned cb, uint32_t *pu32Value)
5084{
5085 /** @todo IOMMU: PCI config read stat counter. */
5086 VBOXSTRICTRC rcStrict = PDMDevHlpPCIConfigRead(pDevIns, pPciDev, uAddress, cb, pu32Value);
5087 Log3Func(("uAddress=%#x (cb=%u) -> %#x. rc=%Rrc\n", uAddress, cb, *pu32Value, VBOXSTRICTRC_VAL(rcStrict)));
5088 return rcStrict;
5089}
5090
5091
5092/**
5093 * @callback_method_impl{FNPCICONFIGWRITE}
5094 */
5095static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress,
5096 unsigned cb, uint32_t u32Value)
5097{
5098 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5099
5100 /*
5101 * Discard writes to read-only registers that are specific to the IOMMU.
5102 * Other common PCI registers are handled by the generic code, see devpciR3IsConfigByteWritable().
5103 * See PCI spec. 6.1. "Configuration Space Organization".
5104 */
5105 switch (uAddress)
5106 {
5107 case IOMMU_PCI_OFF_CAP_HDR: /* All bits are read-only. */
5108 case IOMMU_PCI_OFF_RANGE_REG: /* We don't have any devices integrated with the IOMMU. */
5109 case IOMMU_PCI_OFF_MISCINFO_REG_0: /* We don't support MSI-X. */
5110 case IOMMU_PCI_OFF_MISCINFO_REG_1: /* We don't support guest-address translation. */
5111 {
5112 LogFunc(("PCI config write (%#RX32) to read-only register %#x -> Ignored\n", u32Value, uAddress));
5113 return VINF_SUCCESS;
5114 }
5115 }
5116
5117 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
5118 IOMMU_LOCK(pDevIns, pThisCC);
5119
5120 VBOXSTRICTRC rcStrict = VERR_IOMMU_IPE_3;
5121 switch (uAddress)
5122 {
5123 case IOMMU_PCI_OFF_BASE_ADDR_REG_LO:
5124 {
5125 if (pThis->IommuBar.n.u1Enable)
5126 {
5127 rcStrict = VINF_SUCCESS;
5128 LogFunc(("Writing Base Address (Lo) when it's already enabled -> Ignored\n"));
5129 break;
5130 }
5131
5132 pThis->IommuBar.au32[0] = u32Value & IOMMU_BAR_VALID_MASK;
5133 if (pThis->IommuBar.n.u1Enable)
5134 {
5135 Assert(pThis->hMmio != NIL_IOMMMIOHANDLE); /* Paranoia. Ensure we have a valid IOM MMIO handle. */
5136 Assert(!pThis->ExtFeat.n.u1PerfCounterSup); /* Base is 16K aligned when performance counters aren't supported. */
5137 RTGCPHYS const GCPhysMmioBase = RT_MAKE_U64(pThis->IommuBar.au32[0] & 0xffffc000, pThis->IommuBar.au32[1]);
5138 RTGCPHYS const GCPhysMmioBasePrev = PDMDevHlpMmioGetMappingAddress(pDevIns, pThis->hMmio);
5139
5140 /* If the MMIO region is already mapped at the specified address, we're done. */
5141 Assert(GCPhysMmioBase != NIL_RTGCPHYS);
5142 if (GCPhysMmioBasePrev == GCPhysMmioBase)
5143 {
5144 rcStrict = VINF_SUCCESS;
5145 break;
5146 }
5147
5148 /* Unmap the previous MMIO region (which is at a different address). */
5149 if (GCPhysMmioBasePrev != NIL_RTGCPHYS)
5150 {
5151 LogFlowFunc(("Unmapping previous MMIO region at %#RGp\n", GCPhysMmioBasePrev));
5152 rcStrict = PDMDevHlpMmioUnmap(pDevIns, pThis->hMmio);
5153 if (RT_FAILURE(rcStrict))
5154 {
5155 LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
5156 break;
5157 }
5158 }
5159
5160 /* Map the newly specified MMIO region. */
5161 LogFlowFunc(("Mapping MMIO region at %#RGp\n", GCPhysMmioBase));
5162 rcStrict = PDMDevHlpMmioMap(pDevIns, pThis->hMmio, GCPhysMmioBase);
5163 if (RT_FAILURE(rcStrict))
5164 {
5165 LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
5166 break;
5167 }
5168 }
5169 else
5170 rcStrict = VINF_SUCCESS;
5171 break;
5172 }
5173
5174 case IOMMU_PCI_OFF_BASE_ADDR_REG_HI:
5175 {
5176 if (!pThis->IommuBar.n.u1Enable)
5177 pThis->IommuBar.au32[1] = u32Value;
5178 else
5179 {
5180 rcStrict = VINF_SUCCESS;
5181 LogFunc(("Writing Base Address (Hi) when it's already enabled -> Ignored\n"));
5182 }
5183 break;
5184 }
5185
5186 case IOMMU_PCI_OFF_MSI_CAP_HDR:
5187 {
5188 u32Value |= RT_BIT(23); /* 64-bit MSI addressess must always be enabled for IOMMU. */
5189 RT_FALL_THRU();
5190 }
5191 default:
5192 {
5193 rcStrict = PDMDevHlpPCIConfigWrite(pDevIns, pPciDev, uAddress, cb, u32Value);
5194 break;
5195 }
5196 }
5197
5198 IOMMU_UNLOCK(pDevIns, pThisCC);
5199
5200 Log3Func(("uAddress=%#x (cb=%u) with %#x. rc=%Rrc\n", uAddress, cb, u32Value, VBOXSTRICTRC_VAL(rcStrict)));
5201 return rcStrict;
5202}
5203
5204
5205/**
5206 * @callback_method_impl{FNDBGFHANDLERDEV}
5207 */
5208static DECLCALLBACK(void) iommuAmdR3DbgInfo(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
5209{
5210 PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5211 PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
5212 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
5213
5214 bool fVerbose;
5215 if ( pszArgs
5216 && !strncmp(pszArgs, RT_STR_TUPLE("verbose")))
5217 fVerbose = true;
5218 else
5219 fVerbose = false;
5220
5221 pHlp->pfnPrintf(pHlp, "AMD-IOMMU:\n");
5222 /* Device Table Base Addresses (all segments). */
5223 for (unsigned i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
5224 {
5225 DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
5226 pHlp->pfnPrintf(pHlp, " Device Table BAR %u = %#RX64\n", i, DevTabBar.u64);
5227 if (fVerbose)
5228 {
5229 pHlp->pfnPrintf(pHlp, " Size = %#x (%u bytes)\n", DevTabBar.n.u9Size,
5230 IOMMU_GET_DEV_TAB_LEN(&DevTabBar));
5231 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT);
5232 }
5233 }
5234 /* Command Buffer Base Address Register. */
5235 {
5236 CMD_BUF_BAR_T const CmdBufBar = pThis->CmdBufBaseAddr;
5237 uint8_t const uEncodedLen = CmdBufBar.n.u4Len;
5238 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5239 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5240 pHlp->pfnPrintf(pHlp, " Command Buffer BAR = %#RX64\n", CmdBufBar.u64);
5241 if (fVerbose)
5242 {
5243 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", CmdBufBar.n.u40Base << X86_PAGE_4K_SHIFT);
5244 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5245 cEntries, cbBuffer);
5246 }
5247 }
5248 /* Event Log Base Address Register. */
5249 {
5250 EVT_LOG_BAR_T const EvtLogBar = pThis->EvtLogBaseAddr;
5251 uint8_t const uEncodedLen = EvtLogBar.n.u4Len;
5252 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5253 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5254 pHlp->pfnPrintf(pHlp, " Event Log BAR = %#RX64\n", EvtLogBar.u64);
5255 if (fVerbose)
5256 {
5257 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", EvtLogBar.n.u40Base << X86_PAGE_4K_SHIFT);
5258 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5259 cEntries, cbBuffer);
5260 }
5261 }
5262 /* IOMMU Control Register. */
5263 {
5264 IOMMU_CTRL_T const Ctrl = pThis->Ctrl;
5265 pHlp->pfnPrintf(pHlp, " Control = %#RX64\n", Ctrl.u64);
5266 if (fVerbose)
5267 {
5268 pHlp->pfnPrintf(pHlp, " IOMMU enable = %RTbool\n", Ctrl.n.u1IommuEn);
5269 pHlp->pfnPrintf(pHlp, " HT Tunnel translation enable = %RTbool\n", Ctrl.n.u1HtTunEn);
5270 pHlp->pfnPrintf(pHlp, " Event log enable = %RTbool\n", Ctrl.n.u1EvtLogEn);
5271 pHlp->pfnPrintf(pHlp, " Event log interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn);
5272 pHlp->pfnPrintf(pHlp, " Completion wait interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn);
5273 pHlp->pfnPrintf(pHlp, " Invalidation timeout = %u\n", Ctrl.n.u3InvTimeOut);
5274 pHlp->pfnPrintf(pHlp, " Pass posted write = %RTbool\n", Ctrl.n.u1PassPW);
5275 pHlp->pfnPrintf(pHlp, " Respose Pass posted write = %RTbool\n", Ctrl.n.u1ResPassPW);
5276 pHlp->pfnPrintf(pHlp, " Coherent = %RTbool\n", Ctrl.n.u1Coherent);
5277 pHlp->pfnPrintf(pHlp, " Isochronous = %RTbool\n", Ctrl.n.u1Isoc);
5278 pHlp->pfnPrintf(pHlp, " Command buffer enable = %RTbool\n", Ctrl.n.u1CmdBufEn);
5279 pHlp->pfnPrintf(pHlp, " PPR log enable = %RTbool\n", Ctrl.n.u1PprLogEn);
5280 pHlp->pfnPrintf(pHlp, " PPR interrupt enable = %RTbool\n", Ctrl.n.u1PprIntrEn);
5281 pHlp->pfnPrintf(pHlp, " PPR enable = %RTbool\n", Ctrl.n.u1PprEn);
5282 pHlp->pfnPrintf(pHlp, " Guest translation eanble = %RTbool\n", Ctrl.n.u1GstTranslateEn);
5283 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC enable = %RTbool\n", Ctrl.n.u1GstVirtApicEn);
5284 pHlp->pfnPrintf(pHlp, " CRW = %#x\n", Ctrl.n.u4Crw);
5285 pHlp->pfnPrintf(pHlp, " SMI filter enable = %RTbool\n", Ctrl.n.u1SmiFilterEn);
5286 pHlp->pfnPrintf(pHlp, " Self-writeback disable = %RTbool\n", Ctrl.n.u1SelfWriteBackDis);
5287 pHlp->pfnPrintf(pHlp, " SMI filter log enable = %RTbool\n", Ctrl.n.u1SmiFilterLogEn);
5288 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC mode enable = %#x\n", Ctrl.n.u3GstVirtApicModeEn);
5289 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC GA log enable = %RTbool\n", Ctrl.n.u1GstLogEn);
5290 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC interrupt enable = %RTbool\n", Ctrl.n.u1GstIntrEn);
5291 pHlp->pfnPrintf(pHlp, " Dual PPR log enable = %#x\n", Ctrl.n.u2DualPprLogEn);
5292 pHlp->pfnPrintf(pHlp, " Dual event log enable = %#x\n", Ctrl.n.u2DualEvtLogEn);
5293 pHlp->pfnPrintf(pHlp, " Device table segmentation enable = %#x\n", Ctrl.n.u3DevTabSegEn);
5294 pHlp->pfnPrintf(pHlp, " Privilege abort enable = %#x\n", Ctrl.n.u2PrivAbortEn);
5295 pHlp->pfnPrintf(pHlp, " PPR auto response enable = %RTbool\n", Ctrl.n.u1PprAutoRespEn);
5296 pHlp->pfnPrintf(pHlp, " MARC enable = %RTbool\n", Ctrl.n.u1MarcEn);
5297 pHlp->pfnPrintf(pHlp, " Block StopMark enable = %RTbool\n", Ctrl.n.u1BlockStopMarkEn);
5298 pHlp->pfnPrintf(pHlp, " PPR auto response always-on enable = %RTbool\n", Ctrl.n.u1PprAutoRespAlwaysOnEn);
5299 pHlp->pfnPrintf(pHlp, " Domain IDPNE = %RTbool\n", Ctrl.n.u1DomainIDPNE);
5300 pHlp->pfnPrintf(pHlp, " Enhanced PPR handling = %RTbool\n", Ctrl.n.u1EnhancedPpr);
5301 pHlp->pfnPrintf(pHlp, " Host page table access/dirty bit update = %#x\n", Ctrl.n.u2HstAccDirtyBitUpdate);
5302 pHlp->pfnPrintf(pHlp, " Guest page table dirty bit disable = %RTbool\n", Ctrl.n.u1GstDirtyUpdateDis);
5303 pHlp->pfnPrintf(pHlp, " x2APIC enable = %RTbool\n", Ctrl.n.u1X2ApicEn);
5304 pHlp->pfnPrintf(pHlp, " x2APIC interrupt enable = %RTbool\n", Ctrl.n.u1X2ApicIntrGenEn);
5305 pHlp->pfnPrintf(pHlp, " Guest page table access bit update = %RTbool\n", Ctrl.n.u1GstAccessUpdateDis);
5306 }
5307 }
5308 /* Exclusion Base Address Register. */
5309 {
5310 IOMMU_EXCL_RANGE_BAR_T const ExclRangeBar = pThis->ExclRangeBaseAddr;
5311 pHlp->pfnPrintf(pHlp, " Exclusion BAR = %#RX64\n", ExclRangeBar.u64);
5312 if (fVerbose)
5313 {
5314 pHlp->pfnPrintf(pHlp, " Exclusion enable = %RTbool\n", ExclRangeBar.n.u1ExclEnable);
5315 pHlp->pfnPrintf(pHlp, " Allow all devices = %RTbool\n", ExclRangeBar.n.u1AllowAll);
5316 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
5317 ExclRangeBar.n.u40ExclRangeBase << X86_PAGE_4K_SHIFT);
5318 }
5319 }
5320 /* Exclusion Range Limit Register. */
5321 {
5322 IOMMU_EXCL_RANGE_LIMIT_T const ExclRangeLimit = pThis->ExclRangeLimit;
5323 pHlp->pfnPrintf(pHlp, " Exclusion Range Limit = %#RX64\n", ExclRangeLimit.u64);
5324 if (fVerbose)
5325 {
5326 pHlp->pfnPrintf(pHlp, " Range limit = %#RX64\n",
5327 (ExclRangeLimit.n.u40ExclRangeLimit << X86_PAGE_4K_SHIFT) | X86_PAGE_4K_OFFSET_MASK);
5328 }
5329 }
5330 /* Extended Feature Register. */
5331 {
5332 IOMMU_EXT_FEAT_T ExtFeat = pThis->ExtFeat;
5333 pHlp->pfnPrintf(pHlp, " Extended Feature Register = %#RX64\n", ExtFeat.u64);
5334 if (fVerbose)
5335 {
5336 pHlp->pfnPrintf(pHlp, " Prefetch support = %RTbool\n", ExtFeat.n.u1PrefetchSup);
5337 pHlp->pfnPrintf(pHlp, " PPR support = %RTbool\n", ExtFeat.n.u1PprSup);
5338 pHlp->pfnPrintf(pHlp, " x2APIC support = %RTbool\n", ExtFeat.n.u1X2ApicSup);
5339 pHlp->pfnPrintf(pHlp, " NX and privilege level support = %RTbool\n", ExtFeat.n.u1NoExecuteSup);
5340 pHlp->pfnPrintf(pHlp, " Guest translation support = %RTbool\n", ExtFeat.n.u1GstTranslateSup);
5341 pHlp->pfnPrintf(pHlp, " Invalidate-All command support = %RTbool\n", ExtFeat.n.u1InvAllSup);
5342 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC support = %RTbool\n", ExtFeat.n.u1GstVirtApicSup);
5343 pHlp->pfnPrintf(pHlp, " Hardware error register support = %RTbool\n", ExtFeat.n.u1HwErrorSup);
5344 pHlp->pfnPrintf(pHlp, " Performance counters support = %RTbool\n", ExtFeat.n.u1PerfCounterSup);
5345 pHlp->pfnPrintf(pHlp, " Host address translation size = %#x\n", ExtFeat.n.u2HostAddrTranslateSize);
5346 pHlp->pfnPrintf(pHlp, " Guest address translation size = %#x\n", ExtFeat.n.u2GstAddrTranslateSize);
5347 pHlp->pfnPrintf(pHlp, " Guest CR3 root table level support = %#x\n", ExtFeat.n.u2GstCr3RootTblLevel);
5348 pHlp->pfnPrintf(pHlp, " SMI filter register support = %#x\n", ExtFeat.n.u2SmiFilterSup);
5349 pHlp->pfnPrintf(pHlp, " SMI filter register count = %#x\n", ExtFeat.n.u3SmiFilterCount);
5350 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC modes support = %#x\n", ExtFeat.n.u3GstVirtApicModeSup);
5351 pHlp->pfnPrintf(pHlp, " Dual PPR log support = %#x\n", ExtFeat.n.u2DualPprLogSup);
5352 pHlp->pfnPrintf(pHlp, " Dual event log support = %#x\n", ExtFeat.n.u2DualEvtLogSup);
5353 pHlp->pfnPrintf(pHlp, " Maximum PASID = %#x\n", ExtFeat.n.u5MaxPasidSup);
5354 pHlp->pfnPrintf(pHlp, " User/supervisor page protection support = %RTbool\n", ExtFeat.n.u1UserSupervisorSup);
5355 pHlp->pfnPrintf(pHlp, " Device table segments supported = %#x (%u)\n", ExtFeat.n.u2DevTabSegSup,
5356 g_acDevTabSegs[ExtFeat.n.u2DevTabSegSup]);
5357 pHlp->pfnPrintf(pHlp, " PPR log overflow early warning support = %RTbool\n", ExtFeat.n.u1PprLogOverflowWarn);
5358 pHlp->pfnPrintf(pHlp, " PPR auto response support = %RTbool\n", ExtFeat.n.u1PprAutoRespSup);
5359 pHlp->pfnPrintf(pHlp, " MARC support = %#x\n", ExtFeat.n.u2MarcSup);
5360 pHlp->pfnPrintf(pHlp, " Block StopMark message support = %RTbool\n", ExtFeat.n.u1BlockStopMarkSup);
5361 pHlp->pfnPrintf(pHlp, " Performance optimization support = %RTbool\n", ExtFeat.n.u1PerfOptSup);
5362 pHlp->pfnPrintf(pHlp, " MSI capability MMIO access support = %RTbool\n", ExtFeat.n.u1MsiCapMmioSup);
5363 pHlp->pfnPrintf(pHlp, " Guest I/O protection support = %RTbool\n", ExtFeat.n.u1GstIoSup);
5364 pHlp->pfnPrintf(pHlp, " Host access support = %RTbool\n", ExtFeat.n.u1HostAccessSup);
5365 pHlp->pfnPrintf(pHlp, " Enhanced PPR handling support = %RTbool\n", ExtFeat.n.u1EnhancedPprSup);
5366 pHlp->pfnPrintf(pHlp, " Attribute forward supported = %RTbool\n", ExtFeat.n.u1AttrForwardSup);
5367 pHlp->pfnPrintf(pHlp, " Host dirty support = %RTbool\n", ExtFeat.n.u1HostDirtySup);
5368 pHlp->pfnPrintf(pHlp, " Invalidate IOTLB type support = %RTbool\n", ExtFeat.n.u1InvIoTlbTypeSup);
5369 pHlp->pfnPrintf(pHlp, " Guest page table access bit hw disable = %RTbool\n", ExtFeat.n.u1GstUpdateDisSup);
5370 pHlp->pfnPrintf(pHlp, " Force physical dest for remapped intr. = %RTbool\n", ExtFeat.n.u1ForcePhysDstSup);
5371 }
5372 }
5373 /* PPR Log Base Address Register. */
5374 {
5375 PPR_LOG_BAR_T PprLogBar = pThis->PprLogBaseAddr;
5376 uint8_t const uEncodedLen = PprLogBar.n.u4Len;
5377 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5378 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5379 pHlp->pfnPrintf(pHlp, " PPR Log BAR = %#RX64\n", PprLogBar.u64);
5380 if (fVerbose)
5381 {
5382 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", PprLogBar.n.u40Base << X86_PAGE_4K_SHIFT);
5383 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5384 cEntries, cbBuffer);
5385 }
5386 }
5387 /* Hardware Event (Hi) Register. */
5388 {
5389 IOMMU_HW_EVT_HI_T HwEvtHi = pThis->HwEvtHi;
5390 pHlp->pfnPrintf(pHlp, " Hardware Event (Hi) = %#RX64\n", HwEvtHi.u64);
5391 if (fVerbose)
5392 {
5393 pHlp->pfnPrintf(pHlp, " First operand = %#RX64\n", HwEvtHi.n.u60FirstOperand);
5394 pHlp->pfnPrintf(pHlp, " Event code = %#RX8\n", HwEvtHi.n.u4EvtCode);
5395 }
5396 }
5397 /* Hardware Event (Lo) Register. */
5398 pHlp->pfnPrintf(pHlp, " Hardware Event (Lo) = %#RX64\n", pThis->HwEvtLo);
5399 /* Hardware Event Status. */
5400 {
5401 IOMMU_HW_EVT_STATUS_T HwEvtStatus = pThis->HwEvtStatus;
5402 pHlp->pfnPrintf(pHlp, " Hardware Event Status = %#RX64\n", HwEvtStatus.u64);
5403 if (fVerbose)
5404 {
5405 pHlp->pfnPrintf(pHlp, " Valid = %RTbool\n", HwEvtStatus.n.u1Valid);
5406 pHlp->pfnPrintf(pHlp, " Overflow = %RTbool\n", HwEvtStatus.n.u1Overflow);
5407 }
5408 }
5409 /* Guest Virtual-APIC Log Base Address Register. */
5410 {
5411 GALOG_BAR_T const GALogBar = pThis->GALogBaseAddr;
5412 uint8_t const uEncodedLen = GALogBar.n.u4Len;
5413 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5414 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5415 pHlp->pfnPrintf(pHlp, " Guest Log BAR = %#RX64\n", GALogBar.u64);
5416 if (fVerbose)
5417 {
5418 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", GALogBar.n.u40Base << X86_PAGE_4K_SHIFT);
5419 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5420 cEntries, cbBuffer);
5421 }
5422 }
5423 /* Guest Virtual-APIC Log Tail Address Register. */
5424 {
5425 GALOG_TAIL_ADDR_T GALogTail = pThis->GALogTailAddr;
5426 pHlp->pfnPrintf(pHlp, " Guest Log Tail Address = %#RX64\n", GALogTail.u64);
5427 if (fVerbose)
5428 pHlp->pfnPrintf(pHlp, " Tail address = %#RX64\n", GALogTail.n.u40GALogTailAddr);
5429 }
5430 /* PPR Log B Base Address Register. */
5431 {
5432 PPR_LOG_B_BAR_T PprLogBBar = pThis->PprLogBBaseAddr;
5433 uint8_t const uEncodedLen = PprLogBBar.n.u4Len;
5434 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5435 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5436 pHlp->pfnPrintf(pHlp, " PPR Log B BAR = %#RX64\n", PprLogBBar.u64);
5437 if (fVerbose)
5438 {
5439 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", PprLogBBar.n.u40Base << X86_PAGE_4K_SHIFT);
5440 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5441 cEntries, cbBuffer);
5442 }
5443 }
5444 /* Event Log B Base Address Register. */
5445 {
5446 EVT_LOG_B_BAR_T EvtLogBBar = pThis->EvtLogBBaseAddr;
5447 uint8_t const uEncodedLen = EvtLogBBar.n.u4Len;
5448 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5449 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5450 pHlp->pfnPrintf(pHlp, " Event Log B BAR = %#RX64\n", EvtLogBBar.u64);
5451 if (fVerbose)
5452 {
5453 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", EvtLogBBar.n.u40Base << X86_PAGE_4K_SHIFT);
5454 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5455 cEntries, cbBuffer);
5456 }
5457 }
5458 /* Device-Specific Feature Extension Register. */
5459 {
5460 DEV_SPECIFIC_FEAT_T const DevSpecificFeat = pThis->DevSpecificFeat;
5461 pHlp->pfnPrintf(pHlp, " Device-specific Feature = %#RX64\n", DevSpecificFeat.u64);
5462 if (fVerbose)
5463 {
5464 pHlp->pfnPrintf(pHlp, " Feature = %#RX32\n", DevSpecificFeat.n.u24DevSpecFeat);
5465 pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificFeat.n.u4RevMinor);
5466 pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificFeat.n.u4RevMajor);
5467 }
5468 }
5469 /* Device-Specific Control Extension Register. */
5470 {
5471 DEV_SPECIFIC_CTRL_T const DevSpecificCtrl = pThis->DevSpecificCtrl;
5472 pHlp->pfnPrintf(pHlp, " Device-specific Control = %#RX64\n", DevSpecificCtrl.u64);
5473 if (fVerbose)
5474 {
5475 pHlp->pfnPrintf(pHlp, " Control = %#RX32\n", DevSpecificCtrl.n.u24DevSpecCtrl);
5476 pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificCtrl.n.u4RevMinor);
5477 pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificCtrl.n.u4RevMajor);
5478 }
5479 }
5480 /* Device-Specific Status Extension Register. */
5481 {
5482 DEV_SPECIFIC_STATUS_T const DevSpecificStatus = pThis->DevSpecificStatus;
5483 pHlp->pfnPrintf(pHlp, " Device-specific Status = %#RX64\n", DevSpecificStatus.u64);
5484 if (fVerbose)
5485 {
5486 pHlp->pfnPrintf(pHlp, " Status = %#RX32\n", DevSpecificStatus.n.u24DevSpecStatus);
5487 pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificStatus.n.u4RevMinor);
5488 pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificStatus.n.u4RevMajor);
5489 }
5490 }
5491 /* Miscellaneous Information Register (Lo and Hi). */
5492 {
5493 MSI_MISC_INFO_T const MiscInfo = pThis->MiscInfo;
5494 pHlp->pfnPrintf(pHlp, " Misc. Info. Register = %#RX64\n", MiscInfo.u64);
5495 if (fVerbose)
5496 {
5497 pHlp->pfnPrintf(pHlp, " Event Log MSI number = %#x\n", MiscInfo.n.u5MsiNumEvtLog);
5498 pHlp->pfnPrintf(pHlp, " Guest Virtual-Address Size = %#x\n", MiscInfo.n.u3GstVirtAddrSize);
5499 pHlp->pfnPrintf(pHlp, " Physical Address Size = %#x\n", MiscInfo.n.u7PhysAddrSize);
5500 pHlp->pfnPrintf(pHlp, " Virtual-Address Size = %#x\n", MiscInfo.n.u7VirtAddrSize);
5501 pHlp->pfnPrintf(pHlp, " HT Transport ATS Range Reserved = %RTbool\n", MiscInfo.n.u1HtAtsResv);
5502 pHlp->pfnPrintf(pHlp, " PPR MSI number = %#x\n", MiscInfo.n.u5MsiNumPpr);
5503 pHlp->pfnPrintf(pHlp, " GA Log MSI number = %#x\n", MiscInfo.n.u5MsiNumGa);
5504 }
5505 }
5506 /* MSI Capability Header. */
5507 {
5508 MSI_CAP_HDR_T MsiCapHdr;
5509 MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
5510 pHlp->pfnPrintf(pHlp, " MSI Capability Header = %#RX32\n", MsiCapHdr.u32);
5511 if (fVerbose)
5512 {
5513 pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiCapHdr.n.u8MsiCapId);
5514 pHlp->pfnPrintf(pHlp, " Capability Ptr (PCI config offset) = %#x\n", MsiCapHdr.n.u8MsiCapPtr);
5515 pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", MsiCapHdr.n.u1MsiEnable);
5516 pHlp->pfnPrintf(pHlp, " Multi-message capability = %#x\n", MsiCapHdr.n.u3MsiMultiMessCap);
5517 pHlp->pfnPrintf(pHlp, " Multi-message enable = %#x\n", MsiCapHdr.n.u3MsiMultiMessEn);
5518 }
5519 }
5520 /* MSI Address Register (Lo and Hi). */
5521 {
5522 uint32_t const uMsiAddrLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO);
5523 uint32_t const uMsiAddrHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI);
5524 MSIADDR MsiAddr;
5525 MsiAddr.u64 = RT_MAKE_U64(uMsiAddrLo, uMsiAddrHi);
5526 pHlp->pfnPrintf(pHlp, " MSI Address = %#RX64\n", MsiAddr.u64);
5527 if (fVerbose)
5528 {
5529 pHlp->pfnPrintf(pHlp, " Destination mode = %#x\n", MsiAddr.n.u1DestMode);
5530 pHlp->pfnPrintf(pHlp, " Redirection hint = %#x\n", MsiAddr.n.u1RedirHint);
5531 pHlp->pfnPrintf(pHlp, " Destination Id = %#x\n", MsiAddr.n.u8DestId);
5532 pHlp->pfnPrintf(pHlp, " Address = %#RX32\n", MsiAddr.n.u12Addr);
5533 pHlp->pfnPrintf(pHlp, " Address (Hi) / Rsvd? = %#RX32\n", MsiAddr.n.u32Rsvd0);
5534 }
5535 }
5536 /* MSI Data. */
5537 {
5538 MSIDATA MsiData;
5539 MsiData.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA);
5540 pHlp->pfnPrintf(pHlp, " MSI Data = %#RX32\n", MsiData.u32);
5541 if (fVerbose)
5542 {
5543 pHlp->pfnPrintf(pHlp, " Vector = %#x (%u)\n", MsiData.n.u8Vector,
5544 MsiData.n.u8Vector);
5545 pHlp->pfnPrintf(pHlp, " Delivery mode = %#x\n", MsiData.n.u3DeliveryMode);
5546 pHlp->pfnPrintf(pHlp, " Level = %#x\n", MsiData.n.u1Level);
5547 pHlp->pfnPrintf(pHlp, " Trigger mode = %s\n", MsiData.n.u1TriggerMode ?
5548 "level" : "edge");
5549 }
5550 }
5551 /* MSI Mapping Capability Header (HyperTransport, reporting all 0s currently). */
5552 {
5553 MSI_MAP_CAP_HDR_T MsiMapCapHdr;
5554 MsiMapCapHdr.u32 = 0;
5555 pHlp->pfnPrintf(pHlp, " MSI Mapping Capability Header = %#RX32\n", MsiMapCapHdr.u32);
5556 if (fVerbose)
5557 {
5558 pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiMapCapHdr.n.u8MsiMapCapId);
5559 pHlp->pfnPrintf(pHlp, " Map enable = %RTbool\n", MsiMapCapHdr.n.u1MsiMapEn);
5560 pHlp->pfnPrintf(pHlp, " Map fixed = %RTbool\n", MsiMapCapHdr.n.u1MsiMapFixed);
5561 pHlp->pfnPrintf(pHlp, " Map capability type = %#x\n", MsiMapCapHdr.n.u5MapCapType);
5562 }
5563 }
5564 /* Performance Optimization Control Register. */
5565 {
5566 IOMMU_PERF_OPT_CTRL_T const PerfOptCtrl = pThis->PerfOptCtrl;
5567 pHlp->pfnPrintf(pHlp, " Performance Optimization Control = %#RX32\n", PerfOptCtrl.u32);
5568 if (fVerbose)
5569 pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PerfOptCtrl.n.u1PerfOptEn);
5570 }
5571 /* XT (x2APIC) General Interrupt Control Register. */
5572 {
5573 IOMMU_XT_GEN_INTR_CTRL_T const XtGenIntrCtrl = pThis->XtGenIntrCtrl;
5574 pHlp->pfnPrintf(pHlp, " XT General Interrupt Control = %#RX64\n", XtGenIntrCtrl.u64);
5575 if (fVerbose)
5576 {
5577 pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n",
5578 !XtGenIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical");
5579 pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n",
5580 RT_MAKE_U64(XtGenIntrCtrl.n.u24X2ApicIntrDstLo, XtGenIntrCtrl.n.u7X2ApicIntrDstHi));
5581 pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGenIntrCtrl.n.u8X2ApicIntrVector);
5582 pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n",
5583 !XtGenIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated");
5584 }
5585 }
5586 /* XT (x2APIC) PPR Interrupt Control Register. */
5587 {
5588 IOMMU_XT_PPR_INTR_CTRL_T const XtPprIntrCtrl = pThis->XtPprIntrCtrl;
5589 pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtPprIntrCtrl.u64);
5590 if (fVerbose)
5591 {
5592 pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n",
5593 !XtPprIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical");
5594 pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n",
5595 RT_MAKE_U64(XtPprIntrCtrl.n.u24X2ApicIntrDstLo, XtPprIntrCtrl.n.u7X2ApicIntrDstHi));
5596 pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtPprIntrCtrl.n.u8X2ApicIntrVector);
5597 pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n",
5598 !XtPprIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated");
5599 }
5600 }
5601 /* XT (X2APIC) GA Log Interrupt Control Register. */
5602 {
5603 IOMMU_XT_GALOG_INTR_CTRL_T const XtGALogIntrCtrl = pThis->XtGALogIntrCtrl;
5604 pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtGALogIntrCtrl.u64);
5605 if (fVerbose)
5606 {
5607 pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n",
5608 !XtGALogIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical");
5609 pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n",
5610 RT_MAKE_U64(XtGALogIntrCtrl.n.u24X2ApicIntrDstLo, XtGALogIntrCtrl.n.u7X2ApicIntrDstHi));
5611 pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGALogIntrCtrl.n.u8X2ApicIntrVector);
5612 pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n",
5613 !XtGALogIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated");
5614 }
5615 }
5616 /* MARC Registers. */
5617 {
5618 for (unsigned i = 0; i < RT_ELEMENTS(pThis->aMarcApers); i++)
5619 {
5620 pHlp->pfnPrintf(pHlp, " MARC Aperature %u:\n", i);
5621 MARC_APER_BAR_T const MarcAperBar = pThis->aMarcApers[i].Base;
5622 pHlp->pfnPrintf(pHlp, " Base = %#RX64\n", MarcAperBar.n.u40MarcBaseAddr << X86_PAGE_4K_SHIFT);
5623
5624 MARC_APER_RELOC_T const MarcAperReloc = pThis->aMarcApers[i].Reloc;
5625 pHlp->pfnPrintf(pHlp, " Reloc = %#RX64 (addr: %#RX64, read-only: %RTbool, enable: %RTbool)\n",
5626 MarcAperReloc.u64, MarcAperReloc.n.u40MarcRelocAddr << X86_PAGE_4K_SHIFT,
5627 MarcAperReloc.n.u1ReadOnly, MarcAperReloc.n.u1RelocEn);
5628
5629 MARC_APER_LEN_T const MarcAperLen = pThis->aMarcApers[i].Length;
5630 pHlp->pfnPrintf(pHlp, " Length = %u pages\n", MarcAperLen.n.u40MarcLength);
5631 }
5632 }
5633 /* Reserved Register. */
5634 pHlp->pfnPrintf(pHlp, " Reserved Register = %#RX64\n", pThis->RsvdReg);
5635 /* Command Buffer Head Pointer Register. */
5636 {
5637 CMD_BUF_HEAD_PTR_T const CmdBufHeadPtr = pThis->CmdBufHeadPtr;
5638 pHlp->pfnPrintf(pHlp, " Command Buffer Head Pointer = %#RX64 (off: %#x)\n", CmdBufHeadPtr.u64,
5639 CmdBufHeadPtr.n.off);
5640 }
5641 /* Command Buffer Tail Pointer Register. */
5642 {
5643 CMD_BUF_HEAD_PTR_T const CmdBufTailPtr = pThis->CmdBufTailPtr;
5644 pHlp->pfnPrintf(pHlp, " Command Buffer Tail Pointer = %#RX64 (off: %#x)\n", CmdBufTailPtr.u64,
5645 CmdBufTailPtr.n.off);
5646 }
5647 /* Event Log Head Pointer Register. */
5648 {
5649 EVT_LOG_HEAD_PTR_T const EvtLogHeadPtr = pThis->EvtLogHeadPtr;
5650 pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogHeadPtr.u64,
5651 EvtLogHeadPtr.n.off);
5652 }
5653 /* Event Log Tail Pointer Register. */
5654 {
5655 EVT_LOG_TAIL_PTR_T const EvtLogTailPtr = pThis->EvtLogTailPtr;
5656 pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogTailPtr.u64,
5657 EvtLogTailPtr.n.off);
5658 }
5659 /* Status Register. */
5660 {
5661 IOMMU_STATUS_T const Status = pThis->Status;
5662 pHlp->pfnPrintf(pHlp, " Status Register = %#RX64\n", Status.u64);
5663 if (fVerbose)
5664 {
5665 pHlp->pfnPrintf(pHlp, " Event log overflow = %RTbool\n", Status.n.u1EvtOverflow);
5666 pHlp->pfnPrintf(pHlp, " Event log interrupt = %RTbool\n", Status.n.u1EvtLogIntr);
5667 pHlp->pfnPrintf(pHlp, " Completion wait interrupt = %RTbool\n", Status.n.u1CompWaitIntr);
5668 pHlp->pfnPrintf(pHlp, " Event log running = %RTbool\n", Status.n.u1EvtLogRunning);
5669 pHlp->pfnPrintf(pHlp, " Command buffer running = %RTbool\n", Status.n.u1CmdBufRunning);
5670 pHlp->pfnPrintf(pHlp, " PPR overflow = %RTbool\n", Status.n.u1PprOverflow);
5671 pHlp->pfnPrintf(pHlp, " PPR interrupt = %RTbool\n", Status.n.u1PprIntr);
5672 pHlp->pfnPrintf(pHlp, " PPR log running = %RTbool\n", Status.n.u1PprLogRunning);
5673 pHlp->pfnPrintf(pHlp, " Guest log running = %RTbool\n", Status.n.u1GstLogRunning);
5674 pHlp->pfnPrintf(pHlp, " Guest log interrupt = %RTbool\n", Status.n.u1GstLogIntr);
5675 pHlp->pfnPrintf(pHlp, " PPR log B overflow = %RTbool\n", Status.n.u1PprOverflowB);
5676 pHlp->pfnPrintf(pHlp, " PPR log active = %RTbool\n", Status.n.u1PprLogActive);
5677 pHlp->pfnPrintf(pHlp, " Event log B overflow = %RTbool\n", Status.n.u1EvtOverflowB);
5678 pHlp->pfnPrintf(pHlp, " Event log active = %RTbool\n", Status.n.u1EvtLogActive);
5679 pHlp->pfnPrintf(pHlp, " PPR log B overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarlyB);
5680 pHlp->pfnPrintf(pHlp, " PPR log overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarly);
5681 }
5682 }
5683 /* PPR Log Head Pointer. */
5684 {
5685 PPR_LOG_HEAD_PTR_T const PprLogHeadPtr = pThis->PprLogHeadPtr;
5686 pHlp->pfnPrintf(pHlp, " PPR Log Head Pointer = %#RX64 (off: %#x)\n", PprLogHeadPtr.u64,
5687 PprLogHeadPtr.n.off);
5688 }
5689 /* PPR Log Tail Pointer. */
5690 {
5691 PPR_LOG_TAIL_PTR_T const PprLogTailPtr = pThis->PprLogTailPtr;
5692 pHlp->pfnPrintf(pHlp, " PPR Log Tail Pointer = %#RX64 (off: %#x)\n", PprLogTailPtr.u64,
5693 PprLogTailPtr.n.off);
5694 }
5695 /* Guest Virtual-APIC Log Head Pointer. */
5696 {
5697 GALOG_HEAD_PTR_T const GALogHeadPtr = pThis->GALogHeadPtr;
5698 pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Head Pointer = %#RX64 (off: %#x)\n", GALogHeadPtr.u64,
5699 GALogHeadPtr.n.u12GALogPtr);
5700 }
5701 /* Guest Virtual-APIC Log Tail Pointer. */
5702 {
5703 GALOG_HEAD_PTR_T const GALogTailPtr = pThis->GALogTailPtr;
5704 pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Tail Pointer = %#RX64 (off: %#x)\n", GALogTailPtr.u64,
5705 GALogTailPtr.n.u12GALogPtr);
5706 }
5707 /* PPR Log B Head Pointer. */
5708 {
5709 PPR_LOG_B_HEAD_PTR_T const PprLogBHeadPtr = pThis->PprLogBHeadPtr;
5710 pHlp->pfnPrintf(pHlp, " PPR Log B Head Pointer = %#RX64 (off: %#x)\n", PprLogBHeadPtr.u64,
5711 PprLogBHeadPtr.n.off);
5712 }
5713 /* PPR Log B Tail Pointer. */
5714 {
5715 PPR_LOG_B_TAIL_PTR_T const PprLogBTailPtr = pThis->PprLogBTailPtr;
5716 pHlp->pfnPrintf(pHlp, " PPR Log B Tail Pointer = %#RX64 (off: %#x)\n", PprLogBTailPtr.u64,
5717 PprLogBTailPtr.n.off);
5718 }
5719 /* Event Log B Head Pointer. */
5720 {
5721 EVT_LOG_B_HEAD_PTR_T const EvtLogBHeadPtr = pThis->EvtLogBHeadPtr;
5722 pHlp->pfnPrintf(pHlp, " Event Log B Head Pointer = %#RX64 (off: %#x)\n", EvtLogBHeadPtr.u64,
5723 EvtLogBHeadPtr.n.off);
5724 }
5725 /* Event Log B Tail Pointer. */
5726 {
5727 EVT_LOG_B_TAIL_PTR_T const EvtLogBTailPtr = pThis->EvtLogBTailPtr;
5728 pHlp->pfnPrintf(pHlp, " Event Log B Tail Pointer = %#RX64 (off: %#x)\n", EvtLogBTailPtr.u64,
5729 EvtLogBTailPtr.n.off);
5730 }
5731 /* PPR Log Auto Response Register. */
5732 {
5733 PPR_LOG_AUTO_RESP_T const PprLogAutoResp = pThis->PprLogAutoResp;
5734 pHlp->pfnPrintf(pHlp, " PPR Log Auto Response Register = %#RX64\n", PprLogAutoResp.u64);
5735 if (fVerbose)
5736 {
5737 pHlp->pfnPrintf(pHlp, " Code = %#x\n", PprLogAutoResp.n.u4AutoRespCode);
5738 pHlp->pfnPrintf(pHlp, " Mask Gen. = %RTbool\n", PprLogAutoResp.n.u1AutoRespMaskGen);
5739 }
5740 }
5741 /* PPR Log Overflow Early Warning Indicator Register. */
5742 {
5743 PPR_LOG_OVERFLOW_EARLY_T const PprLogOverflowEarly = pThis->PprLogOverflowEarly;
5744 pHlp->pfnPrintf(pHlp, " PPR Log overflow early warning = %#RX64\n", PprLogOverflowEarly.u64);
5745 if (fVerbose)
5746 {
5747 pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogOverflowEarly.n.u15Threshold);
5748 pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogOverflowEarly.n.u1IntrEn);
5749 pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogOverflowEarly.n.u1Enable);
5750 }
5751 }
5752 /* PPR Log Overflow Early Warning Indicator Register. */
5753 {
5754 PPR_LOG_OVERFLOW_EARLY_T const PprLogBOverflowEarly = pThis->PprLogBOverflowEarly;
5755 pHlp->pfnPrintf(pHlp, " PPR Log B overflow early warning = %#RX64\n", PprLogBOverflowEarly.u64);
5756 if (fVerbose)
5757 {
5758 pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogBOverflowEarly.n.u15Threshold);
5759 pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogBOverflowEarly.n.u1IntrEn);
5760 pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogBOverflowEarly.n.u1Enable);
5761 }
5762 }
5763}
5764
5765
5766/**
5767 * Dumps the DTE via the info callback helper.
5768 *
5769 * @param pHlp The info helper.
5770 * @param pDte The device table entry.
5771 * @param pszPrefix The string prefix.
5772 */
5773static void iommuAmdR3DbgInfoDteWorker(PCDBGFINFOHLP pHlp, PCDTE_T pDte, const char *pszPrefix)
5774{
5775 AssertReturnVoid(pHlp);
5776 AssertReturnVoid(pDte);
5777 AssertReturnVoid(pszPrefix);
5778
5779 pHlp->pfnPrintf(pHlp, "%sValid = %RTbool\n", pszPrefix, pDte->n.u1Valid);
5780 pHlp->pfnPrintf(pHlp, "%sTranslation Valid = %RTbool\n", pszPrefix, pDte->n.u1TranslationValid);
5781 pHlp->pfnPrintf(pHlp, "%sHost Access Dirty = %#x\n", pszPrefix, pDte->n.u2Had);
5782 pHlp->pfnPrintf(pHlp, "%sPaging Mode = %u\n", pszPrefix, pDte->n.u3Mode);
5783 pHlp->pfnPrintf(pHlp, "%sPage Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix, pDte->n.u40PageTableRootPtrLo,
5784 pDte->n.u40PageTableRootPtrLo << 12);
5785 pHlp->pfnPrintf(pHlp, "%sPPR enable = %RTbool\n", pszPrefix, pDte->n.u1Ppr);
5786 pHlp->pfnPrintf(pHlp, "%sGuest PPR Resp w/ PASID = %RTbool\n", pszPrefix, pDte->n.u1GstPprRespPasid);
5787 pHlp->pfnPrintf(pHlp, "%sGuest I/O Prot Valid = %RTbool\n", pszPrefix, pDte->n.u1GstIoValid);
5788 pHlp->pfnPrintf(pHlp, "%sGuest Translation Valid = %RTbool\n", pszPrefix, pDte->n.u1GstTranslateValid);
5789 pHlp->pfnPrintf(pHlp, "%sGuest Levels Translated = %#x\n", pszPrefix, pDte->n.u2GstMode);
5790 pHlp->pfnPrintf(pHlp, "%sGuest Root Page Table Ptr = %#x %#x %#x (addr=%#RGp)\n", pszPrefix,
5791 pDte->n.u3GstCr3TableRootPtrLo, pDte->n.u16GstCr3TableRootPtrMid, pDte->n.u21GstCr3TableRootPtrHi,
5792 (pDte->n.u21GstCr3TableRootPtrHi << 31)
5793 | (pDte->n.u16GstCr3TableRootPtrMid << 15)
5794 | (pDte->n.u3GstCr3TableRootPtrLo << 12));
5795 pHlp->pfnPrintf(pHlp, "%sI/O Read = %s\n", pszPrefix, pDte->n.u1IoRead ? "allowed" : "denied");
5796 pHlp->pfnPrintf(pHlp, "%sI/O Write = %s\n", pszPrefix, pDte->n.u1IoWrite ? "allowed" : "denied");
5797 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd0);
5798 pHlp->pfnPrintf(pHlp, "%sDomain ID = %u (%#x)\n", pszPrefix, pDte->n.u16DomainId, pDte->n.u16DomainId);
5799 pHlp->pfnPrintf(pHlp, "%sIOTLB Enable = %RTbool\n", pszPrefix, pDte->n.u1IoTlbEnable);
5800 pHlp->pfnPrintf(pHlp, "%sSuppress I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressPfEvents);
5801 pHlp->pfnPrintf(pHlp, "%sSuppress all I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressAllPfEvents);
5802 pHlp->pfnPrintf(pHlp, "%sPort I/O Control = %#x\n", pszPrefix, pDte->n.u2IoCtl);
5803 pHlp->pfnPrintf(pHlp, "%sIOTLB Cache Hint = %s\n", pszPrefix, pDte->n.u1Cache ? "no caching" : "cache");
5804 pHlp->pfnPrintf(pHlp, "%sSnoop Disable = %RTbool\n", pszPrefix, pDte->n.u1SnoopDisable);
5805 pHlp->pfnPrintf(pHlp, "%sAllow Exclusion = %RTbool\n", pszPrefix, pDte->n.u1AllowExclusion);
5806 pHlp->pfnPrintf(pHlp, "%sSysMgt Message Enable = %RTbool\n", pszPrefix, pDte->n.u2SysMgt);
5807 pHlp->pfnPrintf(pHlp, "%sInterrupt Map Valid = %RTbool\n", pszPrefix, pDte->n.u1IntrMapValid);
5808 uint8_t const uIntrTabLen = pDte->n.u4IntrTableLength;
5809 if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX)
5810 {
5811 uint16_t const cEntries = IOMMU_GET_INTR_TAB_ENTRIES(pDte);
5812 uint16_t const cbIntrTable = IOMMU_GET_INTR_TAB_LEN(pDte);
5813 pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (%u entries, %u bytes)\n", pszPrefix, uIntrTabLen, cEntries,
5814 cbIntrTable);
5815 }
5816 else
5817 pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (invalid!)\n", pszPrefix, uIntrTabLen);
5818 pHlp->pfnPrintf(pHlp, "%sIgnore Unmapped Interrupts = %RTbool\n", pszPrefix, pDte->n.u1IgnoreUnmappedIntrs);
5819 pHlp->pfnPrintf(pHlp, "%sInterrupt Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix,
5820 pDte->n.u46IntrTableRootPtr, pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK);
5821 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u4Rsvd0);
5822 pHlp->pfnPrintf(pHlp, "%sINIT passthru = %RTbool\n", pszPrefix, pDte->n.u1InitPassthru);
5823 pHlp->pfnPrintf(pHlp, "%sExtInt passthru = %RTbool\n", pszPrefix, pDte->n.u1ExtIntPassthru);
5824 pHlp->pfnPrintf(pHlp, "%sNMI passthru = %RTbool\n", pszPrefix, pDte->n.u1NmiPassthru);
5825 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd2);
5826 pHlp->pfnPrintf(pHlp, "%sInterrupt Control = %#x\n", pszPrefix, pDte->n.u2IntrCtrl);
5827 pHlp->pfnPrintf(pHlp, "%sLINT0 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint0Passthru);
5828 pHlp->pfnPrintf(pHlp, "%sLINT1 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint1Passthru);
5829 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u32Rsvd0);
5830 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u22Rsvd0);
5831 pHlp->pfnPrintf(pHlp, "%sAttribute Override Valid = %RTbool\n", pszPrefix, pDte->n.u1AttrOverride);
5832 pHlp->pfnPrintf(pHlp, "%sMode0FC = %#x\n", pszPrefix, pDte->n.u1Mode0FC);
5833 pHlp->pfnPrintf(pHlp, "%sSnoop Attribute = %#x\n", pszPrefix, pDte->n.u8SnoopAttr);
5834 pHlp->pfnPrintf(pHlp, "\n");
5835}
5836
5837
5838/**
5839 * @callback_method_impl{FNDBGFHANDLERDEV}
5840 */
5841static DECLCALLBACK(void) iommuAmdR3DbgInfoDte(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
5842{
5843 if (pszArgs)
5844 {
5845 uint16_t uDevId = 0;
5846 int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &uDevId);
5847 if (RT_SUCCESS(rc))
5848 {
5849 DTE_T Dte;
5850 rc = iommuAmdDteRead(pDevIns, uDevId, IOMMUOP_TRANSLATE_REQ, &Dte);
5851 if (RT_SUCCESS(rc))
5852 {
5853 pHlp->pfnPrintf(pHlp, "DTE for device %#x\n", uDevId);
5854 iommuAmdR3DbgInfoDteWorker(pHlp, &Dte, " ");
5855 return;
5856 }
5857 pHlp->pfnPrintf(pHlp, "Failed to read DTE for device ID %u (%#x). rc=%Rrc\n", uDevId, uDevId, rc);
5858 }
5859 else
5860 pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit device ID. rc=%Rrc\n", rc);
5861 }
5862 else
5863 pHlp->pfnPrintf(pHlp, "Missing device ID.\n");
5864}
5865
5866
5867# ifdef IOMMU_WITH_IOTLBE_CACHE
5868/**
5869 * @callback_method_impl{FNDBGFHANDLERDEV}
5870 */
5871static DECLCALLBACK(void) iommuAmdR3DbgInfoIotlb(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
5872{
5873 if (pszArgs)
5874 {
5875 uint16_t uDomainId = 0;
5876 int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &uDomainId);
5877 if (RT_SUCCESS(rc))
5878 {
5879 pHlp->pfnPrintf(pHlp, "IOTLBEs for domain %u (%#x):\n", uDomainId, uDomainId);
5880 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5881 IOTLBEINFOARG Args;
5882 Args.pIommu = pThis;
5883 Args.pHlp = pHlp;
5884 Args.uDomainId = uDomainId;
5885
5886 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
5887 RTAvlU64DoWithAll(&pThis->TreeIotlbe, true /* fFromLeft */, iommuAmdR3IotlbEntryInfo, &Args);
5888 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
5889 }
5890 else
5891 pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit domain ID. rc=%Rrc\n", rc);
5892 }
5893 else
5894 pHlp->pfnPrintf(pHlp, "Missing domain ID.\n");
5895}
5896# endif
5897
5898
5899#ifdef IOMMU_WITH_IRTE_CACHE
5900/**
5901 * Gets the interrupt type name for an interrupt type in the IRTE.
5902 *
5903 * @returns The interrupt type name.
5904 * @param uIntrType The interrupt type (as specified in the IRTE).
5905 */
5906static const char *iommuAmdIrteGetIntrTypeName(uint8_t uIntrType)
5907{
5908 switch (uIntrType)
5909 {
5910 case VBOX_MSI_DELIVERY_MODE_FIXED: return "Fixed";
5911 case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO: return "Arbitrated";
5912 default: return "<Reserved>";
5913 }
5914}
5915
5916/**
5917 * @callback_method_impl{FNDBGFHANDLERDEV}
5918 */
5919static DECLCALLBACK(void) iommuAmdR3DbgInfoIrtes(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
5920{
5921 RT_NOREF(pszArgs);
5922
5923 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5924 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
5925
5926 uint16_t const cIrteCache = pThis->cIrteCache;
5927 pHlp->pfnPrintf(pHlp, "IRTE Cache: Capacity=%u entries\n", cIrteCache);
5928 for (uint16_t idxIrte = 0; idxIrte < cIrteCache; idxIrte++)
5929 {
5930 PCIRTECACHE pIrteCache = &pThis->paIrteCache[idxIrte];
5931 uint32_t const uKey = pIrteCache->uKey;
5932 if (uKey != IOMMU_IRTE_CACHE_KEY_NIL)
5933 {
5934 uint16_t const uDeviceId = IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(uKey);
5935 uint16_t const offIrte = IOMMU_IRTE_CACHE_KEY_GET_OFF(uKey);
5936 pHlp->pfnPrintf(pHlp, " Entry[%u]: Offset=%#x Device=%#x (BDF %02x:%02x.%d)\n",
5937 idxIrte, offIrte, uDeviceId,
5938 (uDeviceId >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK,
5939 (uDeviceId >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK,
5940 uDeviceId & VBOX_PCI_DEVFN_FUN_MASK);
5941
5942 PCIRTE_T pIrte = &pIrteCache->Irte;
5943 pHlp->pfnPrintf(pHlp, " Remap Enable = %RTbool\n", pIrte->n.u1RemapEnable);
5944 pHlp->pfnPrintf(pHlp, " Suppress IOPF = %RTbool\n", pIrte->n.u1SuppressIoPf);
5945 pHlp->pfnPrintf(pHlp, " Interrupt Type = %#x (%s)\n", pIrte->n.u3IntrType,
5946 iommuAmdIrteGetIntrTypeName(pIrte->n.u3IntrType));
5947 pHlp->pfnPrintf(pHlp, " Request EOI = %RTbool\n", pIrte->n.u1ReqEoi);
5948 pHlp->pfnPrintf(pHlp, " Destination mode = %s\n", pIrte->n.u1DestMode ? "Logical" : "Physical");
5949 pHlp->pfnPrintf(pHlp, " Destination Id = %u\n", pIrte->n.u8Dest);
5950 pHlp->pfnPrintf(pHlp, " Vector = %#x (%u)\n", pIrte->n.u8Vector, pIrte->n.u8Vector);
5951 pHlp->pfnPrintf(pHlp, "\n");
5952 }
5953 }
5954 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
5955}
5956#endif
5957
5958
5959/**
5960 * @callback_method_impl{FNDBGFHANDLERDEV}
5961 */
5962static DECLCALLBACK(void) iommuAmdR3DbgInfoDevTabs(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
5963{
5964 RT_NOREF(pszArgs);
5965
5966 PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5967 PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
5968 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
5969 NOREF(pPciDev);
5970
5971 uint8_t cSegments = 0;
5972 for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
5973 {
5974 DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
5975 RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT;
5976 if (GCPhysDevTab)
5977 ++cSegments;
5978 }
5979
5980 pHlp->pfnPrintf(pHlp, "AMD-IOMMU device tables with address translations enabled:\n");
5981 pHlp->pfnPrintf(pHlp, " DTE Segments=%u\n", cSegments);
5982 if (!cSegments)
5983 return;
5984
5985 for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
5986 {
5987 DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
5988 RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT;
5989 if (GCPhysDevTab)
5990 {
5991 uint32_t const cbDevTab = IOMMU_GET_DEV_TAB_LEN(&DevTabBar);
5992 uint32_t const cDtes = cbDevTab / sizeof(DTE_T);
5993
5994 void *pvDevTab = RTMemAllocZ(cbDevTab);
5995 if (RT_LIKELY(pvDevTab))
5996 {
5997 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDevTab, pvDevTab, cbDevTab);
5998 if (RT_SUCCESS(rc))
5999 {
6000 for (uint32_t idxDte = 0; idxDte < cDtes; idxDte++)
6001 {
6002 PCDTE_T pDte = (PCDTE_T)((uintptr_t)pvDevTab + idxDte * sizeof(DTE_T));
6003 if ( pDte->n.u1Valid
6004 && pDte->n.u1TranslationValid
6005 && pDte->n.u3Mode != 0)
6006 {
6007 pHlp->pfnPrintf(pHlp, " DTE %u (BDF %02x:%02x.%d)\n", idxDte,
6008 (idxDte >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK,
6009 (idxDte >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK,
6010 idxDte & VBOX_PCI_DEVFN_FUN_MASK);
6011 iommuAmdR3DbgInfoDteWorker(pHlp, pDte, " ");
6012 pHlp->pfnPrintf(pHlp, "\n");
6013 }
6014 }
6015 pHlp->pfnPrintf(pHlp, "\n");
6016 }
6017 else
6018 {
6019 pHlp->pfnPrintf(pHlp, " Failed to read table at %#RGp of size %zu bytes. rc=%Rrc!\n", GCPhysDevTab,
6020 cbDevTab, rc);
6021 }
6022
6023 RTMemFree(pvDevTab);
6024 }
6025 else
6026 {
6027 pHlp->pfnPrintf(pHlp, " Allocating %zu bytes for reading the device table failed!\n", cbDevTab);
6028 return;
6029 }
6030 }
6031 }
6032}
6033
6034
6035/**
6036 * @callback_method_impl{FNSSMDEVSAVEEXEC}
6037 */
6038static DECLCALLBACK(int) iommuAmdR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM)
6039{
6040 /** @todo IOMMU: Save state. */
6041 RT_NOREF2(pDevIns, pSSM);
6042 LogFlowFunc(("\n"));
6043 return VERR_NOT_IMPLEMENTED;
6044}
6045
6046
6047/**
6048 * @callback_method_impl{FNSSMDEVLOADEXEC}
6049 */
6050static DECLCALLBACK(int) iommuAmdR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
6051{
6052 /** @todo IOMMU: Load state. */
6053 RT_NOREF4(pDevIns, pSSM, uVersion, uPass);
6054 LogFlowFunc(("\n"));
6055 return VERR_NOT_IMPLEMENTED;
6056}
6057
6058
6059/**
6060 * @interface_method_impl{PDMDEVREG,pfnReset}
6061 */
6062static DECLCALLBACK(void) iommuAmdR3Reset(PPDMDEVINS pDevIns)
6063{
6064 /*
6065 * Resets read-write portion of the IOMMU state.
6066 *
6067 * NOTE! State not initialized here is expected to be initialized during
6068 * device construction and remain read-only through the lifetime of the VM.
6069 */
6070 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
6071 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
6072 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
6073 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
6074
6075 IOMMU_LOCK_NORET(pDevIns, pThisCC);
6076
6077 LogFlowFunc(("\n"));
6078
6079 memset(&pThis->aDevTabBaseAddrs[0], 0, sizeof(pThis->aDevTabBaseAddrs));
6080
6081 pThis->CmdBufBaseAddr.u64 = 0;
6082 pThis->CmdBufBaseAddr.n.u4Len = 8;
6083
6084 pThis->EvtLogBaseAddr.u64 = 0;
6085 pThis->EvtLogBaseAddr.n.u4Len = 8;
6086
6087 pThis->Ctrl.u64 = 0;
6088 pThis->Ctrl.n.u1Coherent = 1;
6089 Assert(!pThis->ExtFeat.n.u1BlockStopMarkSup);
6090
6091 pThis->ExclRangeBaseAddr.u64 = 0;
6092 pThis->ExclRangeLimit.u64 = 0;
6093
6094 pThis->PprLogBaseAddr.u64 = 0;
6095 pThis->PprLogBaseAddr.n.u4Len = 8;
6096
6097 pThis->HwEvtHi.u64 = 0;
6098 pThis->HwEvtLo = 0;
6099 pThis->HwEvtStatus.u64 = 0;
6100
6101 pThis->GALogBaseAddr.u64 = 0;
6102 pThis->GALogBaseAddr.n.u4Len = 8;
6103 pThis->GALogTailAddr.u64 = 0;
6104
6105 pThis->PprLogBBaseAddr.u64 = 0;
6106 pThis->PprLogBBaseAddr.n.u4Len = 8;
6107
6108 pThis->EvtLogBBaseAddr.u64 = 0;
6109 pThis->EvtLogBBaseAddr.n.u4Len = 8;
6110
6111 pThis->PerfOptCtrl.u32 = 0;
6112
6113 pThis->XtGenIntrCtrl.u64 = 0;
6114 pThis->XtPprIntrCtrl.u64 = 0;
6115 pThis->XtGALogIntrCtrl.u64 = 0;
6116
6117 memset(&pThis->aMarcApers[0], 0, sizeof(pThis->aMarcApers));
6118
6119 pThis->CmdBufHeadPtr.u64 = 0;
6120 pThis->CmdBufTailPtr.u64 = 0;
6121 pThis->EvtLogHeadPtr.u64 = 0;
6122 pThis->EvtLogTailPtr.u64 = 0;
6123
6124 pThis->Status.u64 = 0;
6125
6126 pThis->PprLogHeadPtr.u64 = 0;
6127 pThis->PprLogTailPtr.u64 = 0;
6128
6129 pThis->GALogHeadPtr.u64 = 0;
6130 pThis->GALogTailPtr.u64 = 0;
6131
6132 pThis->PprLogBHeadPtr.u64 = 0;
6133 pThis->PprLogBTailPtr.u64 = 0;
6134
6135 pThis->EvtLogBHeadPtr.u64 = 0;
6136 pThis->EvtLogBTailPtr.u64 = 0;
6137
6138 pThis->PprLogAutoResp.u64 = 0;
6139 pThis->PprLogOverflowEarly.u64 = 0;
6140 pThis->PprLogBOverflowEarly.u64 = 0;
6141
6142 pThis->IommuBar.u64 = 0;
6143 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0);
6144 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0);
6145
6146 PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER);
6147
6148 IOMMU_UNLOCK(pDevIns, pThisCC);
6149
6150#ifdef IOMMU_WITH_DTE_CACHE
6151 iommuAmdDteCacheRemoveAll(pDevIns);
6152#endif
6153#ifdef IOMMU_WITH_IOTLBE_CACHE
6154 iommuAmdIotlbRemoveAll(pDevIns);
6155#endif
6156#ifdef IOMMU_WITH_IRTE_CACHE
6157 iommuAmdIrteCacheRemoveAll(pDevIns);
6158#endif
6159}
6160
6161
6162/**
6163 * @interface_method_impl{PDMDEVREG,pfnDestruct}
6164 */
6165static DECLCALLBACK(int) iommuAmdR3Destruct(PPDMDEVINS pDevIns)
6166{
6167 PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns);
6168 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
6169 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
6170 LogFlowFunc(("\n"));
6171
6172 IOMMU_LOCK_NORET(pDevIns, pThisCC);
6173
6174 /* Close the command thread semaphore. */
6175 if (pThis->hEvtCmdThread != NIL_SUPSEMEVENT)
6176 {
6177 PDMDevHlpSUPSemEventClose(pDevIns, pThis->hEvtCmdThread);
6178 pThis->hEvtCmdThread = NIL_SUPSEMEVENT;
6179 }
6180
6181#ifdef IOMMU_WITH_DTE_CACHE
6182 /* Destroy the DTE cache. */
6183 if (pThis->paDteCache)
6184 {
6185 PDMDevHlpMMHeapFree(pDevIns, pThis->paDteCache);
6186 pThis->paDteCache = NULL;
6187 }
6188#endif
6189
6190#ifdef IOMMU_WITH_IOTLBE_CACHE
6191 /* Destroy the IOTLB cache. */
6192 if (pThis->paIotlbes)
6193 {
6194 PDMDevHlpMMHeapFree(pDevIns, pThis->paIotlbes);
6195 pThis->paIotlbes = NULL;
6196 pThis->idxUnusedIotlbe = 0;
6197 }
6198#endif
6199
6200#ifdef IOMMU_WITH_IRTE_CACHE
6201 /* Destroy the interrupt cache. */
6202 if (pThis->paIrteCache)
6203 {
6204 PDMDevHlpMMHeapFree(pDevIns, pThis->paIrteCache);
6205 pThis->paIrteCache = NULL;
6206 }
6207#endif
6208
6209 IOMMU_UNLOCK(pDevIns, pThisCC);
6210 return VINF_SUCCESS;
6211}
6212
6213
6214/**
6215 * @interface_method_impl{PDMDEVREG,pfnConstruct}
6216 */
6217static DECLCALLBACK(int) iommuAmdR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg)
6218{
6219 PDMDEV_CHECK_VERSIONS_RETURN(pDevIns);
6220 RT_NOREF(pCfg);
6221
6222 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
6223 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
6224 pThis->u32Magic = IOMMU_MAGIC;
6225 pThisCC->pDevInsR3 = pDevIns;
6226
6227 LogFlowFunc(("iInstance=%d\n", iInstance));
6228
6229 /*
6230 * Register the IOMMU with PDM.
6231 */
6232 PDMIOMMUREGR3 IommuReg;
6233 RT_ZERO(IommuReg);
6234 IommuReg.u32Version = PDM_IOMMUREGCC_VERSION;
6235 IommuReg.pfnMemAccess = iommuAmdMemAccess;
6236 IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess;
6237 IommuReg.pfnMsiRemap = iommuAmdMsiRemap;
6238 IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION;
6239 int rc = PDMDevHlpIommuRegister(pDevIns, &IommuReg, &pThisCC->CTX_SUFF(pIommuHlp), &pThis->idxIommu);
6240 if (RT_FAILURE(rc))
6241 return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as an IOMMU device"));
6242 if (pThisCC->CTX_SUFF(pIommuHlp)->u32Version != PDM_IOMMUHLPR3_VERSION)
6243 return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS,
6244 N_("IOMMU helper version mismatch; got %#x expected %#x"),
6245 pThisCC->CTX_SUFF(pIommuHlp)->u32Version, PDM_IOMMUHLPR3_VERSION);
6246 if (pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd != PDM_IOMMUHLPR3_VERSION)
6247 return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS,
6248 N_("IOMMU helper end-version mismatch; got %#x expected %#x"),
6249 pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd, PDM_IOMMUHLPR3_VERSION);
6250
6251 /*
6252 * We will use PDM's critical section (via helpers) for the IOMMU device.
6253 */
6254 rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns));
6255 AssertRCReturn(rc, rc);
6256
6257 /*
6258 * Initialize read-only PCI configuration space.
6259 */
6260 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
6261 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
6262
6263 /* Header. */
6264 PDMPciDevSetVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* AMD */
6265 PDMPciDevSetDeviceId(pPciDev, IOMMU_PCI_DEVICE_ID); /* VirtualBox IOMMU device */
6266 PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER); /* Enable bus master (as we directly access main memory) */
6267 PDMPciDevSetStatus(pPciDev, VBOX_PCI_STATUS_CAP_LIST); /* Capability list supported */
6268 PDMPciDevSetRevisionId(pPciDev, IOMMU_PCI_REVISION_ID); /* VirtualBox specific device implementation revision */
6269 PDMPciDevSetClassBase(pPciDev, VBOX_PCI_CLASS_SYSTEM); /* System Base Peripheral */
6270 PDMPciDevSetClassSub(pPciDev, VBOX_PCI_SUB_SYSTEM_IOMMU); /* IOMMU */
6271 PDMPciDevSetClassProg(pPciDev, 0x0); /* IOMMU Programming interface */
6272 PDMPciDevSetHeaderType(pPciDev, 0x0); /* Single function, type 0 */
6273 PDMPciDevSetSubSystemId(pPciDev, IOMMU_PCI_DEVICE_ID); /* AMD */
6274 PDMPciDevSetSubSystemVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* VirtualBox IOMMU device */
6275 PDMPciDevSetCapabilityList(pPciDev, IOMMU_PCI_OFF_CAP_HDR); /* Offset into capability registers */
6276 PDMPciDevSetInterruptPin(pPciDev, 0x1); /* INTA#. */
6277 PDMPciDevSetInterruptLine(pPciDev, 0x0); /* For software compatibility; no effect on hardware */
6278
6279 /* Capability Header. */
6280 /* NOTE! Fields (e.g, EFR) must match what we expose in the ACPI tables. */
6281 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_CAP_HDR,
6282 RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_ID, 0xf) /* RO - Secure Device capability block */
6283 | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_PTR, IOMMU_PCI_OFF_MSI_CAP_HDR) /* RO - Next capability offset */
6284 | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_TYPE, 0x3) /* RO - IOMMU capability block */
6285 | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_REV, 0x1) /* RO - IOMMU interface revision */
6286 | RT_BF_MAKE(IOMMU_BF_CAPHDR_IOTLB_SUP, 0x0) /* RO - Remote IOTLB support */
6287 | RT_BF_MAKE(IOMMU_BF_CAPHDR_HT_TUNNEL, 0x0) /* RO - HyperTransport Tunnel support */
6288 | RT_BF_MAKE(IOMMU_BF_CAPHDR_NP_CACHE, 0x0) /* RO - Cache NP page table entries */
6289 | RT_BF_MAKE(IOMMU_BF_CAPHDR_EFR_SUP, 0x1) /* RO - Extended Feature Register support */
6290 | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_EXT, 0x1)); /* RO - Misc. Information Register support */
6291
6292 /* Base Address Register. */
6293 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0x0); /* RW - Base address (Lo) and enable bit */
6294 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0x0); /* RW - Base address (Hi) */
6295
6296 /* IOMMU Range Register. */
6297 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_RANGE_REG, 0x0); /* RW - Range register (implemented as RO by us) */
6298
6299 /* Misc. Information Register. */
6300 /* NOTE! Fields (e.g, GVA size) must match what we expose in the ACPI tables. */
6301 uint32_t const uMiscInfoReg0 = RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM, 0) /* RO - MSI number */
6302 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_GVA_SIZE, 2) /* RO - Guest Virt. Addr size (2=48 bits) */
6303 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_PA_SIZE, 48) /* RO - Physical Addr size (48 bits) */
6304 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_VA_SIZE, 64) /* RO - Virt. Addr size (64 bits) */
6305 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_HT_ATS_RESV, 0) /* RW - HT ATS reserved */
6306 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM_PPR, 0); /* RW - PPR interrupt number */
6307 uint32_t const uMiscInfoReg1 = 0;
6308 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_0, uMiscInfoReg0);
6309 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_1, uMiscInfoReg1);
6310
6311 /* MSI Capability Header register. */
6312 PDMMSIREG MsiReg;
6313 RT_ZERO(MsiReg);
6314 MsiReg.cMsiVectors = 1;
6315 MsiReg.iMsiCapOffset = IOMMU_PCI_OFF_MSI_CAP_HDR;
6316 MsiReg.iMsiNextOffset = 0; /* IOMMU_PCI_OFF_MSI_MAP_CAP_HDR */
6317 MsiReg.fMsi64bit = 1; /* 64-bit addressing support is mandatory; See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support". */
6318
6319 /* MSI Address (Lo, Hi) and MSI data are read-write PCI config registers handled by our generic PCI config space code. */
6320#if 0
6321 /* MSI Address Lo. */
6322 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, 0); /* RW - MSI message address (Lo) */
6323 /* MSI Address Hi. */
6324 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, 0); /* RW - MSI message address (Hi) */
6325 /* MSI Data. */
6326 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, 0); /* RW - MSI data */
6327#endif
6328
6329#if 0
6330 /** @todo IOMMU: I don't know if we need to support this, enable later if
6331 * required. */
6332 /* MSI Mapping Capability Header register. */
6333 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_MAP_CAP_HDR,
6334 RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_ID, 0x8) /* RO - Capability ID */
6335 | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_PTR, 0x0) /* RO - Offset to next capability (NULL) */
6336 | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_EN, 0x1) /* RO - MSI mapping capability enable */
6337 | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_FIXED, 0x1) /* RO - MSI mapping range is fixed */
6338 | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_TYPE, 0x15)); /* RO - MSI mapping capability */
6339 /* When implementing don't forget to copy this to its MMIO shadow register (MsiMapCapHdr) in iommuAmdR3Init. */
6340#endif
6341
6342 /*
6343 * Register the PCI function with PDM.
6344 */
6345 rc = PDMDevHlpPCIRegister(pDevIns, pPciDev);
6346 AssertLogRelRCReturn(rc, rc);
6347
6348 /*
6349 * Register MSI support for the PCI device.
6350 * This must be done -after- register it as a PCI device!
6351 */
6352 rc = PDMDevHlpPCIRegisterMsi(pDevIns, &MsiReg);
6353 AssertRCReturn(rc, rc);
6354
6355 /*
6356 * Intercept PCI config. space accesses.
6357 */
6358 rc = PDMDevHlpPCIInterceptConfigAccesses(pDevIns, pPciDev, iommuAmdR3PciConfigRead, iommuAmdR3PciConfigWrite);
6359 AssertLogRelRCReturn(rc, rc);
6360
6361 /*
6362 * Create the MMIO region.
6363 * Mapping of the region is done when software configures it via PCI config space.
6364 */
6365 rc = PDMDevHlpMmioCreate(pDevIns, IOMMU_MMIO_REGION_SIZE, pPciDev, 0 /* iPciRegion */, iommuAmdMmioWrite, iommuAmdMmioRead,
6366 NULL /* pvUser */,
6367 IOMMMIO_FLAGS_READ_DWORD_QWORD
6368 | IOMMMIO_FLAGS_WRITE_DWORD_QWORD_READ_MISSING
6369 | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_READ
6370 | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_WRITE,
6371 "AMD-IOMMU", &pThis->hMmio);
6372 AssertLogRelRCReturn(rc, rc);
6373
6374 /*
6375 * Register saved state.
6376 */
6377 rc = PDMDevHlpSSMRegisterEx(pDevIns, IOMMU_SAVED_STATE_VERSION, sizeof(IOMMU), NULL,
6378 NULL, NULL, NULL,
6379 NULL, iommuAmdR3SaveExec, NULL,
6380 NULL, iommuAmdR3LoadExec, NULL);
6381 AssertLogRelRCReturn(rc, rc);
6382
6383 /*
6384 * Register debugger info items.
6385 */
6386 PDMDevHlpDBGFInfoRegister(pDevIns, "iommu", "Display IOMMU state.", iommuAmdR3DbgInfo);
6387 PDMDevHlpDBGFInfoRegister(pDevIns, "iommudte", "Display the DTE for a device. Arguments: DeviceID.", iommuAmdR3DbgInfoDte);
6388 PDMDevHlpDBGFInfoRegister(pDevIns, "iommudevtabs", "Display active IOMMU device tables.", iommuAmdR3DbgInfoDevTabs);
6389#ifdef IOMMU_WITH_IOTLBE_CACHE
6390 PDMDevHlpDBGFInfoRegister(pDevIns, "iommutlb", "Display IOTLBs for a domain. Arguments: DomainID.", iommuAmdR3DbgInfoIotlb);
6391#endif
6392#ifdef IOMMU_WITH_IRTE_CACHE
6393 PDMDevHlpDBGFInfoRegister(pDevIns, "iommuirtes", "Display the IRTE cache.", iommuAmdR3DbgInfoIrtes);
6394#endif
6395
6396# ifdef VBOX_WITH_STATISTICS
6397 /*
6398 * Statistics.
6399 */
6400 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadR3, STAMTYPE_COUNTER, "R3/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in R3");
6401 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadRZ, STAMTYPE_COUNTER, "RZ/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in RZ.");
6402
6403 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteR3, STAMTYPE_COUNTER, "R3/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in R3.");
6404 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteRZ, STAMTYPE_COUNTER, "RZ/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in RZ.");
6405
6406 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapR3, STAMTYPE_COUNTER, "R3/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in R3.");
6407 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapRZ, STAMTYPE_COUNTER, "RZ/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in RZ.");
6408
6409 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadR3, STAMTYPE_COUNTER, "R3/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in R3.");
6410 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadRZ, STAMTYPE_COUNTER, "RZ/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in RZ.");
6411
6412 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteR3, STAMTYPE_COUNTER, "R3/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in R3.");
6413 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteRZ, STAMTYPE_COUNTER, "RZ/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in RZ.");
6414
6415 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadR3, STAMTYPE_COUNTER, "R3/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in R3.");
6416 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadRZ, STAMTYPE_COUNTER, "RZ/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in RZ.");
6417
6418 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteR3, STAMTYPE_COUNTER, "R3/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in R3.");
6419 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteRZ, STAMTYPE_COUNTER, "RZ/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in RZ.");
6420
6421 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmd, STAMTYPE_COUNTER, "R3/Commands", STAMUNIT_OCCURENCES, "Number of commands processed (total).");
6422 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompWait, STAMTYPE_COUNTER, "R3/Commands/CompWait", STAMUNIT_OCCURENCES, "Number of Completion Wait commands processed.");
6423 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvDte, STAMTYPE_COUNTER, "R3/Commands/InvDte", STAMUNIT_OCCURENCES, "Number of Invalidate DTE commands processed.");
6424 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuPages, STAMTYPE_COUNTER, "R3/Commands/InvIommuPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU Pages commands processed.");
6425 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIotlbPages, STAMTYPE_COUNTER, "R3/Commands/InvIotlbPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOTLB Pages commands processed.");
6426 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIntrTable, STAMTYPE_COUNTER, "R3/Commands/InvIntrTable", STAMUNIT_OCCURENCES, "Number of Invalidate Interrupt Table commands processed.");
6427 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdPrefIommuPages, STAMTYPE_COUNTER, "R3/Commands/PrefIommuPages", STAMUNIT_OCCURENCES, "Number of Prefetch IOMMU Pages commands processed.");
6428 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompletePprReq, STAMTYPE_COUNTER, "R3/Commands/CompletePprReq", STAMUNIT_OCCURENCES, "Number of Complete PPR Requests commands processed.");
6429 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuAll, STAMTYPE_COUNTER, "R3/Commands/InvIommuAll", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU All commands processed.");
6430
6431
6432 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeCached, STAMTYPE_COUNTER, "IOTLB/Cached", STAMUNIT_OCCURENCES, "Number of IOTLB entries in the cache.");
6433 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeLazyEvictReuse, STAMTYPE_COUNTER, "IOTLB/LazyEvictReuse", STAMUNIT_OCCURENCES, "Number of IOTLB entries reused after lazy eviction.");
6434
6435 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfDteLookup, STAMTYPE_PROFILE, "Profile/DteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling DTE lookup.");
6436 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIotlbeLookup, STAMTYPE_PROFILE, "Profile/IotlbeLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IOTLBE lookup.");
6437
6438 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteLookup, STAMTYPE_PROFILE, "Profile/IrteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE lookup.");
6439 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteCacheLookup, STAMTYPE_PROFILE, "Profile/IrteCacheLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE cache lookup.");
6440
6441 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHit, STAMTYPE_COUNTER, "Access/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
6442 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheMiss, STAMTYPE_COUNTER, "Access/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
6443 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHitFull, STAMTYPE_COUNTER, "Access/CacheHitFull", STAMUNIT_OCCURENCES, "Number of accesses that was entirely in the cache.");
6444 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheNonContig, STAMTYPE_COUNTER, "Access/CacheNonContig", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in non-contiguous translated regions.");
6445 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCachePermDenied, STAMTYPE_COUNTER, "Access/CacheAddrDenied", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in denied permissions.");
6446 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDteNonContig, STAMTYPE_COUNTER, "Access/DteNonContig", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in non-contiguous translated regions.");
6447 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDtePermDenied, STAMTYPE_COUNTER, "Access/DtePermDenied", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in denied permissions.");
6448
6449 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheHit, STAMTYPE_COUNTER, "Intr/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
6450 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheMiss, STAMTYPE_COUNTER, "Intr/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
6451# endif
6452
6453 /*
6454 * Create the command thread and its event semaphore.
6455 */
6456 char szDevIommu[64];
6457 RT_ZERO(szDevIommu);
6458 RTStrPrintf(szDevIommu, sizeof(szDevIommu), "IOMMU-%u", iInstance);
6459 rc = PDMDevHlpThreadCreate(pDevIns, &pThisCC->pCmdThread, pThis, iommuAmdR3CmdThread, iommuAmdR3CmdThreadWakeUp,
6460 0 /* cbStack */, RTTHREADTYPE_IO, szDevIommu);
6461 AssertLogRelRCReturn(rc, rc);
6462
6463 rc = PDMDevHlpSUPSemEventCreate(pDevIns, &pThis->hEvtCmdThread);
6464 AssertLogRelRCReturn(rc, rc);
6465
6466#ifdef IOMMU_WITH_DTE_CACHE
6467 /*
6468 * Initialize the critsect of the cache.
6469 */
6470 rc = PDMDevHlpCritSectInit(pDevIns, &pThis->CritSectCache, RT_SRC_POS, "IOMMUCache-#%u", pDevIns->iInstance);
6471 AssertLogRelRCReturn(rc, rc);
6472
6473 /*
6474 * Allocate the device table entry cache.
6475 * PCI devices are hotpluggable and we don't have a way of querying the bus for all
6476 * assigned PCI BDF slots. So while this wastes some memory, it should work regardless
6477 * of how code, features and devices around the IOMMU change.
6478 */
6479 size_t cbCache = 0;
6480 size_t const cbDteCache = sizeof(DTECACHE) * IOMMU_DTE_CACHE_MAX;
6481 AssertCompile(IOMMU_DTE_CACHE_MAX >= UINT16_MAX);
6482 pThis->paDteCache = (PDTECACHE)PDMDevHlpMMHeapAllocZ(pDevIns, cbDteCache);
6483 if (!pThis->paDteCache)
6484 return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
6485 N_("Failed to allocate %zu bytes from the hyperheap for the DTE cache."), cbDteCache);
6486 cbCache += cbDteCache;
6487#endif
6488
6489#ifdef IOMMU_WITH_IOTLBE_CACHE
6490 /*
6491 * Allocate IOTLB entries.
6492 * This is allocated upfront since we expect a relatively small number of entries,
6493 * is more cache-line efficient and easier to track least recently used entries for
6494 * eviction when the cache is full. This also avoids unpredictable behavior during
6495 * the lifetime of the VM if the hyperheap gets full.
6496 */
6497 size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX;
6498 pThis->paIotlbes = (PIOTLBE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIotlbes);
6499 if (!pThis->paIotlbes)
6500 return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
6501 N_("Failed to allocate %zu bytes from the hyperheap for the IOTLB cache."), cbIotlbes);
6502 RTListInit(&pThis->LstLruIotlbe);
6503 cbCache += cbIotlbes;
6504#endif
6505
6506#ifdef IOMMU_WITH_IRTE_CACHE
6507 /* Maximum number of elements in the IRTE cache. */
6508 PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3;
6509 rc = pHlp->pfnCFGMQueryU16Def(pCfg, "InterruptCacheCount", &pThis->cIrteCache, IOMMU_IRTE_CACHE_DEFAULT);
6510 if (RT_FAILURE(rc))
6511 return PDMDevHlpVMSetError(pDevIns, rc, RT_SRC_POS, N_("IOMMU: failed to read InterruptCacheCount as integer"));
6512 AssertCompile(IOMMU_IRTE_CACHE_DEFAULT >= IOMMU_IRTE_CACHE_MIN);
6513 AssertCompile(IOMMU_IRTE_CACHE_DEFAULT <= IOMMU_IRTE_CACHE_MAX);
6514 if ( pThis->cIrteCache < IOMMU_IRTE_CACHE_MIN
6515 || pThis->cIrteCache > IOMMU_IRTE_CACHE_MAX)
6516 return PDMDevHlpVMSetError(pDevIns, VERR_INVALID_PARAMETER, RT_SRC_POS,
6517 N_("IOMMU: InterruptCacheCount invalid (must be between %u and %u)."),
6518 IOMMU_IRTE_CACHE_MIN, IOMMU_IRTE_CACHE_MAX);
6519
6520 /*
6521 * Allocate the interrupt remapping cache.
6522 * This is an array of devices and their corresponding interrupt remap table entries.
6523 * Typically only a handful of PCI devices are used in VMs so this is kept rather small.
6524 * If we ever need to support a vast number of interrupt-remapped devices, we can
6525 * implement a more sophisticated cache solution then.
6526 *
6527 * NOTE: IRTE cache entry keys are initialized later in this function by calling
6528 * iommuAmdR3Reset() -> iommuAmdIrteCacheRemoveAll().
6529 */
6530 size_t const cbIrteCache = sizeof(IRTECACHE) * pThis->cIrteCache;
6531 pThis->paIrteCache = (PIRTECACHE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIrteCache);
6532 if (!pThis->paIrteCache)
6533 return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
6534 N_("Failed to allocate %zu bytes from the hyperheap for the interrupt cache."), cbIrteCache);
6535 cbCache += cbIrteCache;
6536#endif
6537
6538#ifdef IOMMU_WITH_DTE_CACHE
6539 LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOMMU cache\n", IOMMU_LOG_PFX, cbCache));
6540#endif
6541
6542 /*
6543 * Initialize read-only registers.
6544 * NOTE! Fields here must match their corresponding field in the ACPI tables.
6545 */
6546 /* Don't remove the commented lines below as it lets us see all features at a glance. */
6547 pThis->ExtFeat.u64 = 0;
6548 //pThis->ExtFeat.n.u1PrefetchSup = 0;
6549 //pThis->ExtFeat.n.u1PprSup = 0;
6550 //pThis->ExtFeat.n.u1X2ApicSup = 0;
6551 //pThis->ExtFeat.n.u1NoExecuteSup = 0;
6552 //pThis->ExtFeat.n.u1GstTranslateSup = 0;
6553 pThis->ExtFeat.n.u1InvAllSup = 1;
6554 //pThis->ExtFeat.n.u1GstVirtApicSup = 0;
6555 pThis->ExtFeat.n.u1HwErrorSup = 1;
6556 //pThis->ExtFeat.n.u1PerfCounterSup = 0;
6557 AssertCompile((IOMMU_MAX_HOST_PT_LEVEL & 0x3) < 3);
6558 pThis->ExtFeat.n.u2HostAddrTranslateSize = (IOMMU_MAX_HOST_PT_LEVEL & 0x3);
6559 //pThis->ExtFeat.n.u2GstAddrTranslateSize = 0; /* Requires GstTranslateSup */
6560 //pThis->ExtFeat.n.u2GstCr3RootTblLevel = 0; /* Requires GstTranslateSup */
6561 //pThis->ExtFeat.n.u2SmiFilterSup = 0;
6562 //pThis->ExtFeat.n.u3SmiFilterCount = 0;
6563 //pThis->ExtFeat.n.u3GstVirtApicModeSup = 0; /* Requires GstVirtApicSup */
6564 //pThis->ExtFeat.n.u2DualPprLogSup = 0;
6565 //pThis->ExtFeat.n.u2DualEvtLogSup = 0;
6566 //pThis->ExtFeat.n.u5MaxPasidSup = 0; /* Requires GstTranslateSup */
6567 //pThis->ExtFeat.n.u1UserSupervisorSup = 0;
6568 AssertCompile(IOMMU_MAX_DEV_TAB_SEGMENTS <= 3);
6569 pThis->ExtFeat.n.u2DevTabSegSup = IOMMU_MAX_DEV_TAB_SEGMENTS;
6570 //pThis->ExtFeat.n.u1PprLogOverflowWarn = 0;
6571 //pThis->ExtFeat.n.u1PprAutoRespSup = 0;
6572 //pThis->ExtFeat.n.u2MarcSup = 0;
6573 //pThis->ExtFeat.n.u1BlockStopMarkSup = 0;
6574 //pThis->ExtFeat.n.u1PerfOptSup = 0;
6575 pThis->ExtFeat.n.u1MsiCapMmioSup = 1;
6576 //pThis->ExtFeat.n.u1GstIoSup = 0;
6577 //pThis->ExtFeat.n.u1HostAccessSup = 0;
6578 //pThis->ExtFeat.n.u1EnhancedPprSup = 0;
6579 //pThis->ExtFeat.n.u1AttrForwardSup = 0;
6580 //pThis->ExtFeat.n.u1HostDirtySup = 0;
6581 //pThis->ExtFeat.n.u1InvIoTlbTypeSup = 0;
6582 //pThis->ExtFeat.n.u1GstUpdateDisSup = 0;
6583 //pThis->ExtFeat.n.u1ForcePhysDstSup = 0;
6584
6585 pThis->RsvdReg = 0;
6586
6587 pThis->DevSpecificFeat.u64 = 0;
6588 pThis->DevSpecificFeat.n.u4RevMajor = IOMMU_DEVSPEC_FEAT_MAJOR_VERSION;
6589 pThis->DevSpecificFeat.n.u4RevMinor = IOMMU_DEVSPEC_FEAT_MINOR_VERSION;
6590
6591 pThis->DevSpecificCtrl.u64 = 0;
6592 pThis->DevSpecificCtrl.n.u4RevMajor = IOMMU_DEVSPEC_CTRL_MAJOR_VERSION;
6593 pThis->DevSpecificCtrl.n.u4RevMinor = IOMMU_DEVSPEC_CTRL_MINOR_VERSION;
6594
6595 pThis->DevSpecificStatus.u64 = 0;
6596 pThis->DevSpecificStatus.n.u4RevMajor = IOMMU_DEVSPEC_STATUS_MAJOR_VERSION;
6597 pThis->DevSpecificStatus.n.u4RevMinor = IOMMU_DEVSPEC_STATUS_MINOR_VERSION;
6598
6599 pThis->MiscInfo.u64 = RT_MAKE_U64(uMiscInfoReg0, uMiscInfoReg1);
6600
6601 /*
6602 * Initialize parts of the IOMMU state as it would during reset.
6603 * Also initializes non-zero initial values like IRTE cache keys.
6604 * Must be called -after- initializing PCI config. space registers.
6605 */
6606 iommuAmdR3Reset(pDevIns);
6607
6608 LogRel(("%s: DSFX=%u.%u DSCX=%u.%u DSSX=%u.%u ExtFeat=%#RX64\n", IOMMU_LOG_PFX,
6609 pThis->DevSpecificFeat.n.u4RevMajor, pThis->DevSpecificFeat.n.u4RevMinor,
6610 pThis->DevSpecificCtrl.n.u4RevMajor, pThis->DevSpecificCtrl.n.u4RevMinor,
6611 pThis->DevSpecificStatus.n.u4RevMajor, pThis->DevSpecificStatus.n.u4RevMinor,
6612 pThis->ExtFeat.u64));
6613 return VINF_SUCCESS;
6614}
6615
6616#else
6617
6618/**
6619 * @callback_method_impl{PDMDEVREGR0,pfnConstruct}
6620 */
6621static DECLCALLBACK(int) iommuAmdRZConstruct(PPDMDEVINS pDevIns)
6622{
6623 PDMDEV_CHECK_VERSIONS_RETURN(pDevIns);
6624 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
6625 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
6626 pThisCC->CTX_SUFF(pDevIns) = pDevIns;
6627
6628 /* We will use PDM's critical section (via helpers) for the IOMMU device. */
6629 int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns));
6630 AssertRCReturn(rc, rc);
6631
6632 /* Set up the MMIO RZ handlers. */
6633 rc = PDMDevHlpMmioSetUpContext(pDevIns, pThis->hMmio, iommuAmdMmioWrite, iommuAmdMmioRead, NULL /* pvUser */);
6634 AssertRCReturn(rc, rc);
6635
6636 /* Set up the IOMMU RZ callbacks. */
6637 PDMIOMMUREGCC IommuReg;
6638 RT_ZERO(IommuReg);
6639 IommuReg.u32Version = PDM_IOMMUREGCC_VERSION;
6640 IommuReg.idxIommu = pThis->idxIommu;
6641 IommuReg.pfnMemAccess = iommuAmdMemAccess;
6642 IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess;
6643 IommuReg.pfnMsiRemap = iommuAmdMsiRemap;
6644 IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION;
6645 rc = PDMDevHlpIommuSetUpContext(pDevIns, &IommuReg, &pThisCC->CTX_SUFF(pIommuHlp));
6646 AssertRCReturn(rc, rc);
6647 AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp), VERR_IOMMU_IPE_1);
6648 AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32Version == CTX_SUFF(PDM_IOMMUHLP)_VERSION, VERR_VERSION_MISMATCH);
6649 AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd == CTX_SUFF(PDM_IOMMUHLP)_VERSION, VERR_VERSION_MISMATCH);
6650 AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp)->pfnLock, VERR_INVALID_POINTER);
6651 AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp)->pfnUnlock, VERR_INVALID_POINTER);
6652 return VINF_SUCCESS;
6653}
6654#endif
6655
6656
6657/**
6658 * The device registration structure.
6659 */
6660const PDMDEVREG g_DeviceIommuAmd =
6661{
6662 /* .u32Version = */ PDM_DEVREG_VERSION,
6663 /* .uReserved0 = */ 0,
6664 /* .szName = */ "iommu-amd",
6665 /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE,
6666 /* .fClass = */ PDM_DEVREG_CLASS_PCI_BUILTIN,
6667 /* .cMaxInstances = */ ~0U,
6668 /* .uSharedVersion = */ 42,
6669 /* .cbInstanceShared = */ sizeof(IOMMU),
6670 /* .cbInstanceCC = */ sizeof(IOMMUCC),
6671 /* .cbInstanceRC = */ sizeof(IOMMURC),
6672 /* .cMaxPciDevices = */ 1,
6673 /* .cMaxMsixVectors = */ 0,
6674 /* .pszDescription = */ "IOMMU (AMD)",
6675#if defined(IN_RING3)
6676 /* .pszRCMod = */ "VBoxDDRC.rc",
6677 /* .pszR0Mod = */ "VBoxDDR0.r0",
6678 /* .pfnConstruct = */ iommuAmdR3Construct,
6679 /* .pfnDestruct = */ iommuAmdR3Destruct,
6680 /* .pfnRelocate = */ NULL,
6681 /* .pfnMemSetup = */ NULL,
6682 /* .pfnPowerOn = */ NULL,
6683 /* .pfnReset = */ iommuAmdR3Reset,
6684 /* .pfnSuspend = */ NULL,
6685 /* .pfnResume = */ NULL,
6686 /* .pfnAttach = */ NULL,
6687 /* .pfnDetach = */ NULL,
6688 /* .pfnQueryInterface = */ NULL,
6689 /* .pfnInitComplete = */ NULL,
6690 /* .pfnPowerOff = */ NULL,
6691 /* .pfnSoftReset = */ NULL,
6692 /* .pfnReserved0 = */ NULL,
6693 /* .pfnReserved1 = */ NULL,
6694 /* .pfnReserved2 = */ NULL,
6695 /* .pfnReserved3 = */ NULL,
6696 /* .pfnReserved4 = */ NULL,
6697 /* .pfnReserved5 = */ NULL,
6698 /* .pfnReserved6 = */ NULL,
6699 /* .pfnReserved7 = */ NULL,
6700#elif defined(IN_RING0)
6701 /* .pfnEarlyConstruct = */ NULL,
6702 /* .pfnConstruct = */ iommuAmdRZConstruct,
6703 /* .pfnDestruct = */ NULL,
6704 /* .pfnFinalDestruct = */ NULL,
6705 /* .pfnRequest = */ NULL,
6706 /* .pfnReserved0 = */ NULL,
6707 /* .pfnReserved1 = */ NULL,
6708 /* .pfnReserved2 = */ NULL,
6709 /* .pfnReserved3 = */ NULL,
6710 /* .pfnReserved4 = */ NULL,
6711 /* .pfnReserved5 = */ NULL,
6712 /* .pfnReserved6 = */ NULL,
6713 /* .pfnReserved7 = */ NULL,
6714#elif defined(IN_RC)
6715 /* .pfnConstruct = */ iommuAmdRZConstruct,
6716 /* .pfnReserved0 = */ NULL,
6717 /* .pfnReserved1 = */ NULL,
6718 /* .pfnReserved2 = */ NULL,
6719 /* .pfnReserved3 = */ NULL,
6720 /* .pfnReserved4 = */ NULL,
6721 /* .pfnReserved5 = */ NULL,
6722 /* .pfnReserved6 = */ NULL,
6723 /* .pfnReserved7 = */ NULL,
6724#else
6725# error "Not in IN_RING3, IN_RING0 or IN_RC!"
6726#endif
6727 /* .u32VersionEnd = */ PDM_DEVREG_VERSION
6728};
6729
6730#endif /* !VBOX_DEVICE_STRUCT_TESTCASE */
6731
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette