VirtualBox

source: vbox/trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp@ 87733

Last change on this file since 87733 was 87733, checked in by vboxsync, 4 years ago

AMD IOMMU: bugref:9654 PREFETCH_IOMMU_PAGES comment and nit.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 266.9 KB
Line 
1/* $Id: DevIommuAmd.cpp 87733 2021-02-12 13:40:48Z vboxsync $ */
2/** @file
3 * IOMMU - Input/Output Memory Management Unit - AMD implementation.
4 */
5
6/*
7 * Copyright (C) 2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_DEV_IOMMU
23#include <VBox/msi.h>
24#include <VBox/iommu-amd.h>
25#include <VBox/vmm/pdmdev.h>
26#include <VBox/AssertGuest.h>
27
28#include <iprt/x86.h>
29#include <iprt/string.h>
30#include <iprt/avl.h>
31#ifdef IN_RING3
32# include <iprt/mem.h>
33#endif
34
35#include "VBoxDD.h"
36#include "DevIommuAmd.h"
37
38
39/*********************************************************************************************************************************
40* Defined Constants And Macros *
41*********************************************************************************************************************************/
42/** Release log prefix string. */
43#define IOMMU_LOG_PFX "AMD-IOMMU"
44/** The current saved state version. */
45#define IOMMU_SAVED_STATE_VERSION 1
46/** The IOMMU device instance magic. */
47#define IOMMU_MAGIC 0x10acce55
48/** Enable the IOTLBE cache. */
49#define IOMMU_WITH_IOTLBE_CACHE
50
51#ifdef IOMMU_WITH_IOTLBE_CACHE
52/** The maximum number of DTE entries. */
53# define IOMMU_DTE_CACHE_MAX UINT16_MAX
54/** The maximum number of IOTLB entries. */
55# define IOMMU_IOTLBE_MAX 96
56/** The mask of bits covering the domain ID in the IOTLBE key. */
57# define IOMMU_IOTLB_DOMAIN_ID_MASK UINT64_C(0xffffff0000000000)
58/** The mask of bits covering the IOVA in the IOTLBE key. */
59# define IOMMU_IOTLB_IOVA_MASK (~IOMMU_IOTLB_DOMAIN_ID_MASK)
60/** The number of bits to shift for the domain ID of the IOTLBE key. */
61# define IOMMU_IOTLB_DOMAIN_ID_SHIFT 40
62/** The mask of bits for the domain ID of the IOTLBE key. */
63# define IOMMU_IOTLB_KEY_NIL UINT64_C(0)
64/** Gets the domain ID from an IOTLB entry key. */
65# define IOMMU_IOTLB_KEY_GET_DOMAIN_ID(a_Key) ((a_Key) >> IOMMU_IOTLB_DOMAIN_ID_SHIFT)
66/** Gets the IOVA from the IOTLB entry key. */
67# define IOMMU_IOTLB_KEY_GET_IOVA(a_Key) (((a_Key) & IOMMU_IOTLB_IOVA_MASK) << X86_PAGE_4K_SHIFT)
68/** Makes an IOTLB entry key.
69 *
70 * Address bits 63:52 of the IOVA are zero extended, so top 12 bits are free.
71 * Address bits 11:0 of the IOVA are offset into the minimum page size of 4K,
72 * so bottom 12 bits are free.
73 *
74 * Thus we use the top 24 bits of key to hold bits 15:0 of the domain ID.
75 * We use the bottom 40 bits of the key to hold bits 51:12 of the IOVA.
76 */
77# define IOMMU_IOTLB_KEY_MAKE(a_DomainId, a_uIova) ( ((uint64_t)(a_DomainId) << IOMMU_IOTLB_DOMAIN_ID_SHIFT) \
78 | (((a_uIova) >> X86_PAGE_4K_SHIFT) & IOMMU_IOTLB_IOVA_MASK))
79
80/** Acquires the cache lock. */
81# define IOMMU_LOCK_CACHE(a_pDevIns, a_pThis) \
82 do { \
83 int const rcLock = PDMDevHlpCritSectEnter((a_pDevIns), &(a_pThis)->CritSectCache, VERR_SEM_BUSY); \
84 if (rcLock == VINF_SUCCESS) \
85 { /* likely */ } \
86 else \
87 { \
88 AssertRC(rcLock); \
89 return rcLock; \
90 } \
91 } while (0)
92
93/** Acquires the cache lock (asserts on failure). */
94# define IOMMU_LOCK_CACHE_NORET(a_pDevIns, a_pThis) \
95 do { \
96 int const rcLock = PDMDevHlpCritSectEnter((a_pDevIns), &(a_pThis)->CritSectCache, VERR_SEM_BUSY); \
97 AssertRC(rcLock); \
98 } while (0)
99
100/** Releases the cache lock. */
101# define IOMMU_UNLOCK_CACHE(a_pDevIns, a_pThis) PDMDevHlpCritSectLeave((a_pDevIns), &(a_pThis)->CritSectCache)
102#endif
103
104/** @name IOMMU_DEV_F_XXX: I/O device flags.
105 *
106 * Some of these flags are "basic" i.e. they correspond directly to their bits in
107 * the DTE. The rest of the flags are based on checks or operations on several DTE
108 * bits.
109 *
110 * The basic flags are:
111 * - VALID (DTE.V)
112 * - IO_PERM_READ (DTE.IR)
113 * - IO_PERM_WRITE (DTE.IW)
114 * - IO_PERM_RSVD (bit following DTW.IW reserved for future & to keep
115 * masking consistent)
116 * - SUPPRESS_ALL_IOPF (DTE.SA)
117 * - SUPPRESS_IOPF (DTE.SE)
118 * - INTR_MAP_VALID (DTE.IV)
119 * - IGNORE_UNMAPPED_INTR (DTE.IG)
120 *
121 * @sa iommuAmdGetBasicDevFlags()
122 * @{ */
123/** The DTE is present. */
124#define IOMMU_DEV_F_PRESENT RT_BIT(0)
125/** The DTE is valid. */
126#define IOMMU_DEV_F_VALID RT_BIT(1)
127/** DTE permissions apply for address translations. */
128#define IOMMU_DEV_F_IO_PERM RT_BIT(2)
129/** DTE permission - I/O read allowed. */
130#define IOMMU_DEV_F_IO_PERM_READ RT_BIT(3)
131/** DTE permission - I/O write allowed. */
132#define IOMMU_DEV_F_IO_PERM_WRITE RT_BIT(4)
133/** DTE permission - reserved. */
134#define IOMMU_DEV_F_IO_PERM_RSVD RT_BIT(5)
135/** Address translation required. */
136#define IOMMU_DEV_F_ADDR_TRANSLATE RT_BIT(6)
137/** Suppress all I/O page faults. */
138#define IOMMU_DEV_F_SUPPRESS_ALL_IOPF RT_BIT(7)
139/** Suppress I/O page faults. */
140#define IOMMU_DEV_F_SUPPRESS_IOPF RT_BIT(8)
141/** Interrupt map valid. */
142#define IOMMU_DEV_F_INTR_MAP_VALID RT_BIT(9)
143/** Ignore unmapped interrupts. */
144#define IOMMU_DEV_F_IGNORE_UNMAPPED_INTR RT_BIT(10)
145/** An I/O page fault has been raised for this device. */
146#define IOMMU_DEV_F_IO_PAGE_FAULT_RAISED RT_BIT(11)
147/** @} */
148/** The number of bits to shift I/O device flags for DTE permissions. */
149#define IOMMU_DEV_F_IO_PERM_SHIFT 3
150/** The mask of DTE permissions in I/O device flags. */
151#define IOMMU_DEV_F_IO_PERM_MASK 0x3
152
153/** Gets the page offset mask given the number of bits to shift. */
154#define IOMMU_GET_PAGE_OFF_MASK(a_cShift) (~(UINT64_C(0xffffffffffffffff) << (a_cShift)))
155
156/** Acquires the PDM lock. */
157#define IOMMU_LOCK(a_pDevIns, a_pThisCC) \
158 do { \
159 int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VERR_SEM_BUSY); \
160 if (RT_LIKELY(rcLock == VINF_SUCCESS)) \
161 { /* likely */ } \
162 else \
163 return rcLock; \
164 } while (0)
165
166/** Acquires the PDM lock (asserts on failure). */
167#define IOMMU_LOCK_NORET(a_pDevIns, a_pThisCC) \
168 do { \
169 int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VERR_SEM_BUSY); \
170 AssertRC(rcLock); \
171 } while (0)
172
173/** Releases the PDM lock. */
174# define IOMMU_UNLOCK(a_pDevIns, a_pThisCC) (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnUnlock((a_pDevIns))
175
176
177/*********************************************************************************************************************************
178* Structures and Typedefs *
179*********************************************************************************************************************************/
180/**
181 * IOMMU operation (transaction).
182 */
183typedef enum IOMMUOP
184{
185 /** Address translation request. */
186 IOMMUOP_TRANSLATE_REQ = 0,
187 /** Memory read request. */
188 IOMMUOP_MEM_READ,
189 /** Memory write request. */
190 IOMMUOP_MEM_WRITE,
191 /** Interrupt request. */
192 IOMMUOP_INTR_REQ,
193 /** Command. */
194 IOMMUOP_CMD
195} IOMMUOP;
196/** Pointer to a IOMMU operation. */
197typedef IOMMUOP *PIOMMUOP;
198
199/**
200 * I/O page lookup.
201 */
202typedef struct IOPAGELOOKUP
203{
204 /** The translated system physical address. */
205 RTGCPHYS GCPhysSpa;
206 /** The number of offset bits in the system physical address. */
207 uint8_t cShift;
208 /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */
209 uint8_t fPerm;
210} IOPAGELOOKUP;
211/** Pointer to an I/O page lookup. */
212typedef IOPAGELOOKUP *PIOPAGELOOKUP;
213/** Pointer to a const I/O page lookup. */
214typedef IOPAGELOOKUP const *PCIOPAGELOOKUP;
215
216/**
217 * I/O address range.
218 */
219typedef struct IOADDRRANGE
220{
221 /** The address (virtual or physical). */
222 uint64_t uAddr;
223 /** The size of the access in bytes. */
224 size_t cb;
225 /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */
226 uint8_t fPerm;
227} IOADDRRANGE;
228/** Pointer to an I/O address range. */
229typedef IOADDRRANGE *PIOADDRRANGE;
230/** Pointer to a const I/O address range. */
231typedef IOADDRRANGE const *PCIOADDRRANGE;
232
233/**
234 * IOMMU I/O Device.
235 * Used for caching as well as passing flags to events.
236 */
237typedef struct IODEVICE
238{
239 /** This device's flags, see IOMMU_DEV_F_XXX. */
240 uint16_t fFlags;
241 /** The domain ID assigned for this device by software. */
242 uint16_t uDomainId;
243} IODEVICE;
244/** Pointer to an I/O device struct. */
245typedef IODEVICE *PIODEVICE;
246/** Pointer to a const I/O device struct. */
247typedef IODEVICE *PCIODEVICE;
248AssertCompileSize(IODEVICE, 4);
249
250#ifdef IOMMU_WITH_IOTLBE_CACHE
251/**
252 * IOMMU I/O TLB Entry.
253 * Keep this as small and aligned as possible.
254 */
255typedef struct IOTLBE
256{
257 /** The AVL tree node. */
258 AVLU64NODECORE Core;
259 /** The least recently used (LRU) list node. */
260 RTLISTNODE NdLru;
261 /** The I/O page lookup results of the translation. */
262 IOPAGELOOKUP PageLookup;
263 /** Whether the entry needs to be evicted from the cache. */
264 bool fEvictPending;
265} IOTLBE;
266/** Pointer to an IOMMU I/O TLB entry struct. */
267typedef IOTLBE *PIOTLBE;
268/** Pointer to a const IOMMU I/O TLB entry struct. */
269typedef IOTLBE const *PCIOTLBE;
270AssertCompileSizeAlignment(IOTLBE, 8);
271AssertCompileMemberOffset(IOTLBE, Core, 0);
272#endif /* IOMMU_WITH_IOTLBE_CACHE */
273
274/**
275 * The shared IOMMU device state.
276 */
277typedef struct IOMMU
278{
279 /** IOMMU device index (0 is at the top of the PCI tree hierarchy). */
280 uint32_t idxIommu;
281 /** IOMMU magic. */
282 uint32_t u32Magic;
283
284 /** Whether the command thread is sleeping. */
285 bool volatile fCmdThreadSleeping;
286 /** Alignment padding. */
287 uint8_t afPadding0[3];
288 /** Whether the command thread has been signaled for wake up. */
289 bool volatile fCmdThreadSignaled;
290 /** Alignment padding. */
291 uint8_t afPadding1[3];
292
293 /** The event semaphore the command thread waits on. */
294 SUPSEMEVENT hEvtCmdThread;
295 /** The MMIO handle. */
296 IOMMMIOHANDLE hMmio;
297
298#ifdef IOMMU_WITH_IOTLBE_CACHE
299 /** The critsect that protects the cache from concurrent access. */
300 PDMCRITSECT CritSectCache;
301 /** L1 Cache - Maps [DeviceId] to [DomainId]. */
302 PIODEVICE paDevices;
303 /** Pointer to array of pre-allocated IOTLBEs. */
304 PIOTLBE paIotlbes;
305 /** L2 Cache - Maps [DomainId,Iova] to [IOTLBE]. */
306 AVLU64TREE TreeIotlbe;
307 /** LRU list anchor for IOTLB entries. */
308 RTLISTANCHOR LstLruIotlbe;
309 /** Index of the next unused IOTLB. */
310 uint32_t idxUnusedIotlbe;
311 /** Number of cached IOTLB entries in the tree. */
312 uint32_t cCachedIotlbes;
313#endif
314
315 /** @name PCI: Base capability block registers.
316 * @{ */
317 IOMMU_BAR_T IommuBar; /**< IOMMU base address register. */
318 /** @} */
319
320 /** @name MMIO: Control and status registers.
321 * @{ */
322 DEV_TAB_BAR_T aDevTabBaseAddrs[8]; /**< Device table base address registers. */
323 CMD_BUF_BAR_T CmdBufBaseAddr; /**< Command buffer base address register. */
324 EVT_LOG_BAR_T EvtLogBaseAddr; /**< Event log base address register. */
325 IOMMU_CTRL_T Ctrl; /**< IOMMU control register. */
326 IOMMU_EXCL_RANGE_BAR_T ExclRangeBaseAddr; /**< IOMMU exclusion range base register. */
327 IOMMU_EXCL_RANGE_LIMIT_T ExclRangeLimit; /**< IOMMU exclusion range limit. */
328 IOMMU_EXT_FEAT_T ExtFeat; /**< IOMMU extended feature register. */
329 /** @} */
330
331 /** @name MMIO: Peripheral Page Request (PPR) Log registers.
332 * @{ */
333 PPR_LOG_BAR_T PprLogBaseAddr; /**< PPR Log base address register. */
334 IOMMU_HW_EVT_HI_T HwEvtHi; /**< IOMMU hardware event register (Hi). */
335 IOMMU_HW_EVT_LO_T HwEvtLo; /**< IOMMU hardware event register (Lo). */
336 IOMMU_HW_EVT_STATUS_T HwEvtStatus; /**< IOMMU hardware event status. */
337 /** @} */
338
339 /** @todo IOMMU: SMI filter. */
340
341 /** @name MMIO: Guest Virtual-APIC Log registers.
342 * @{ */
343 GALOG_BAR_T GALogBaseAddr; /**< Guest Virtual-APIC Log base address register. */
344 GALOG_TAIL_ADDR_T GALogTailAddr; /**< Guest Virtual-APIC Log Tail address register. */
345 /** @} */
346
347 /** @name MMIO: Alternate PPR and Event Log registers.
348 * @{ */
349 PPR_LOG_B_BAR_T PprLogBBaseAddr; /**< PPR Log B base address register. */
350 EVT_LOG_B_BAR_T EvtLogBBaseAddr; /**< Event Log B base address register. */
351 /** @} */
352
353 /** @name MMIO: Device-specific feature registers.
354 * @{ */
355 DEV_SPECIFIC_FEAT_T DevSpecificFeat; /**< Device-specific feature extension register (DSFX). */
356 DEV_SPECIFIC_CTRL_T DevSpecificCtrl; /**< Device-specific control extension register (DSCX). */
357 DEV_SPECIFIC_STATUS_T DevSpecificStatus; /**< Device-specific status extension register (DSSX). */
358 /** @} */
359
360 /** @name MMIO: MSI Capability Block registers.
361 * @{ */
362 MSI_MISC_INFO_T MiscInfo; /**< MSI Misc. info registers / MSI Vector registers. */
363 /** @} */
364
365 /** @name MMIO: Performance Optimization Control registers.
366 * @{ */
367 IOMMU_PERF_OPT_CTRL_T PerfOptCtrl; /**< IOMMU Performance optimization control register. */
368 /** @} */
369
370 /** @name MMIO: x2APIC Control registers.
371 * @{ */
372 IOMMU_XT_GEN_INTR_CTRL_T XtGenIntrCtrl; /**< IOMMU X2APIC General interrupt control register. */
373 IOMMU_XT_PPR_INTR_CTRL_T XtPprIntrCtrl; /**< IOMMU X2APIC PPR interrupt control register. */
374 IOMMU_XT_GALOG_INTR_CTRL_T XtGALogIntrCtrl; /**< IOMMU X2APIC Guest Log interrupt control register. */
375 /** @} */
376
377 /** @name MMIO: Memory Address Routing & Control (MARC) registers.
378 * @{ */
379 MARC_APER_T aMarcApers[4]; /**< MARC Aperture Registers. */
380 /** @} */
381
382 /** @name MMIO: Reserved register.
383 * @{ */
384 IOMMU_RSVD_REG_T RsvdReg; /**< IOMMU Reserved Register. */
385 /** @} */
386
387 /** @name MMIO: Command and Event Log pointer registers.
388 * @{ */
389 CMD_BUF_HEAD_PTR_T CmdBufHeadPtr; /**< Command buffer head pointer register. */
390 CMD_BUF_TAIL_PTR_T CmdBufTailPtr; /**< Command buffer tail pointer register. */
391 EVT_LOG_HEAD_PTR_T EvtLogHeadPtr; /**< Event log head pointer register. */
392 EVT_LOG_TAIL_PTR_T EvtLogTailPtr; /**< Event log tail pointer register. */
393 /** @} */
394
395 /** @name MMIO: Command and Event Status register.
396 * @{ */
397 IOMMU_STATUS_T Status; /**< IOMMU status register. */
398 /** @} */
399
400 /** @name MMIO: PPR Log Head and Tail pointer registers.
401 * @{ */
402 PPR_LOG_HEAD_PTR_T PprLogHeadPtr; /**< IOMMU PPR log head pointer register. */
403 PPR_LOG_TAIL_PTR_T PprLogTailPtr; /**< IOMMU PPR log tail pointer register. */
404 /** @} */
405
406 /** @name MMIO: Guest Virtual-APIC Log Head and Tail pointer registers.
407 * @{ */
408 GALOG_HEAD_PTR_T GALogHeadPtr; /**< Guest Virtual-APIC log head pointer register. */
409 GALOG_TAIL_PTR_T GALogTailPtr; /**< Guest Virtual-APIC log tail pointer register. */
410 /** @} */
411
412 /** @name MMIO: PPR Log B Head and Tail pointer registers.
413 * @{ */
414 PPR_LOG_B_HEAD_PTR_T PprLogBHeadPtr; /**< PPR log B head pointer register. */
415 PPR_LOG_B_TAIL_PTR_T PprLogBTailPtr; /**< PPR log B tail pointer register. */
416 /** @} */
417
418 /** @name MMIO: Event Log B Head and Tail pointer registers.
419 * @{ */
420 EVT_LOG_B_HEAD_PTR_T EvtLogBHeadPtr; /**< Event log B head pointer register. */
421 EVT_LOG_B_TAIL_PTR_T EvtLogBTailPtr; /**< Event log B tail pointer register. */
422 /** @} */
423
424 /** @name MMIO: PPR Log Overflow protection registers.
425 * @{ */
426 PPR_LOG_AUTO_RESP_T PprLogAutoResp; /**< PPR Log Auto Response register. */
427 PPR_LOG_OVERFLOW_EARLY_T PprLogOverflowEarly; /**< PPR Log Overflow Early Indicator register. */
428 PPR_LOG_B_OVERFLOW_EARLY_T PprLogBOverflowEarly; /**< PPR Log B Overflow Early Indicator register. */
429 /** @} */
430
431 /** @todo IOMMU: IOMMU Event counter registers. */
432
433#ifdef VBOX_WITH_STATISTICS
434 /** @name IOMMU: Stat counters.
435 * @{ */
436 STAMCOUNTER StatMmioReadR3; /**< Number of MMIO reads in R3. */
437 STAMCOUNTER StatMmioReadRZ; /**< Number of MMIO reads in RZ. */
438 STAMCOUNTER StatMmioWriteR3; /**< Number of MMIO writes in R3. */
439 STAMCOUNTER StatMmioWriteRZ; /**< Number of MMIO writes in RZ. */
440
441 STAMCOUNTER StatMsiRemapR3; /**< Number of MSI remap requests in R3. */
442 STAMCOUNTER StatMsiRemapRZ; /**< Number of MSI remap requests in RZ. */
443
444 STAMCOUNTER StatMemReadR3; /**< Number of memory read translation requests in R3. */
445 STAMCOUNTER StatMemReadRZ; /**< Number of memory read translation requests in RZ. */
446 STAMCOUNTER StatMemWriteR3; /**< Number of memory write translation requests in R3. */
447 STAMCOUNTER StatMemWriteRZ; /**< Number of memory write translation requests in RZ. */
448
449 STAMCOUNTER StatMemBulkReadR3; /**< Number of memory read bulk translation requests in R3. */
450 STAMCOUNTER StatMemBulkReadRZ; /**< Number of memory read bulk translation requests in RZ. */
451 STAMCOUNTER StatMemBulkWriteR3; /**< Number of memory write bulk translation requests in R3. */
452 STAMCOUNTER StatMemBulkWriteRZ; /**< Number of memory write bulk translation requests in RZ. */
453
454 STAMCOUNTER StatCmd; /**< Number of commands processed in total. */
455 STAMCOUNTER StatCmdCompWait; /**< Number of Completion Wait commands processed. */
456 STAMCOUNTER StatCmdInvDte; /**< Number of Invalidate DTE commands processed. */
457 STAMCOUNTER StatCmdInvIommuPages; /**< Number of Invalidate IOMMU pages commands processed. */
458 STAMCOUNTER StatCmdInvIotlbPages; /**< Number of Invalidate IOTLB pages commands processed. */
459 STAMCOUNTER StatCmdInvIntrTable; /**< Number of Invalidate Interrupt Table commands processed. */
460 STAMCOUNTER StatCmdPrefIommuPages; /**< Number of Prefetch IOMMU Pages commands processed. */
461 STAMCOUNTER StatCmdCompletePprReq; /**< Number of Complete PPR Requests commands processed. */
462 STAMCOUNTER StatCmdInvIommuAll; /**< Number of Invalidate IOMMU All commands processed. */
463
464 STAMCOUNTER StatIotlbeCached; /**< Number of IOTLB entries in the cache. */
465 STAMCOUNTER StatIotlbeLazyEvictReuse; /**< Number of IOTLB entries re-used after lazy eviction. */
466
467 STAMPROFILEADV StatProfIotlbeLookup; /**< Profiling of IOTLB entry lookup (from cache). */
468 STAMPROFILEADV StatProfDteLookup; /**< Profiling of I/O page walk (from memory). */
469
470 STAMCOUNTER StatAccessCacheHit; /**< Number of IOTLB cache hits. */
471 STAMCOUNTER StatAccessCacheHitFull; /**< Number of accesses that were fully looked up from the cache. */
472 STAMCOUNTER StatAccessCacheMiss; /**< Number of cache misses (resulting in DTE lookups). */
473 STAMCOUNTER StatAccessCacheNonContig; /**< Number of cache accesses resulting in non-contiguous access. */
474 STAMCOUNTER StatAccessCachePermDenied; /**< Number of cache accesses resulting in insufficient permissions. */
475 STAMCOUNTER StatAccessDteNonContig; /**< Number of DTE accesses resulting in non-contiguous access. */
476 STAMCOUNTER StatAccessDtePermDenied; /**< Number of DTE accesses resulting in insufficient permissions. */
477 /** @} */
478#endif
479} IOMMU;
480/** Pointer to the IOMMU device state. */
481typedef struct IOMMU *PIOMMU;
482/** Pointer to the const IOMMU device state. */
483typedef const struct IOMMU *PCIOMMU;
484AssertCompileMemberAlignment(IOMMU, fCmdThreadSleeping, 4);
485AssertCompileMemberAlignment(IOMMU, fCmdThreadSignaled, 4);
486AssertCompileMemberAlignment(IOMMU, hEvtCmdThread, 8);
487AssertCompileMemberAlignment(IOMMU, hMmio, 8);
488#ifdef IOMMU_WITH_IOTLBE_CACHE
489AssertCompileMemberAlignment(IOMMU, paDevices, 8);
490AssertCompileMemberAlignment(IOMMU, paIotlbes, 8);
491AssertCompileMemberAlignment(IOMMU, TreeIotlbe, 8);
492AssertCompileMemberAlignment(IOMMU, LstLruIotlbe, 8);
493#endif
494AssertCompileMemberAlignment(IOMMU, IommuBar, 8);
495AssertCompileMemberAlignment(IOMMU, aDevTabBaseAddrs, 8);
496AssertCompileMemberAlignment(IOMMU, CmdBufHeadPtr, 8);
497AssertCompileMemberAlignment(IOMMU, Status, 8);
498
499/**
500 * The ring-3 IOMMU device state.
501 */
502typedef struct IOMMUR3
503{
504 /** Device instance. */
505 PPDMDEVINSR3 pDevInsR3;
506 /** The IOMMU helpers. */
507 R3PTRTYPE(PCPDMIOMMUHLPR3) pIommuHlpR3;
508 /** The command thread handle. */
509 R3PTRTYPE(PPDMTHREAD) pCmdThread;
510} IOMMUR3;
511/** Pointer to the ring-3 IOMMU device state. */
512typedef IOMMUR3 *PIOMMUR3;
513
514/**
515 * The ring-0 IOMMU device state.
516 */
517typedef struct IOMMUR0
518{
519 /** Device instance. */
520 PPDMDEVINSR0 pDevInsR0;
521 /** The IOMMU helpers. */
522 R0PTRTYPE(PCPDMIOMMUHLPR0) pIommuHlpR0;
523} IOMMUR0;
524/** Pointer to the ring-0 IOMMU device state. */
525typedef IOMMUR0 *PIOMMUR0;
526
527/**
528 * The raw-mode IOMMU device state.
529 */
530typedef struct IOMMURC
531{
532 /** Device instance. */
533 PPDMDEVINSRC pDevInsRC;
534 /** The IOMMU helpers. */
535 RCPTRTYPE(PCPDMIOMMUHLPRC) pIommuHlpRC;
536} IOMMURC;
537/** Pointer to the raw-mode IOMMU device state. */
538typedef IOMMURC *PIOMMURC;
539
540/** The IOMMU device state for the current context. */
541typedef CTX_SUFF(IOMMU) IOMMUCC;
542/** Pointer to the IOMMU device state for the current context. */
543typedef CTX_SUFF(PIOMMU) PIOMMUCC;
544
545/**
546 * IOMMU register access.
547 */
548typedef struct IOMMUREGACC
549{
550 const char *pszName;
551 VBOXSTRICTRC (*pfnRead)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value);
552 VBOXSTRICTRC (*pfnWrite)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value);
553} IOMMUREGACC;
554/** Pointer to an IOMMU register access. */
555typedef IOMMUREGACC *PIOMMUREGACC;
556/** Pointer to a const IOMMU register access. */
557typedef IOMMUREGACC const *PCIOMMUREGACC;
558
559#ifdef IOMMU_WITH_IOTLBE_CACHE
560/**
561 * IOTLBE flush argument.
562 */
563typedef struct IOTLBEFLUSHARG
564{
565 /** The IOMMU device state. */
566 PIOMMU pIommu;
567 /** The domain ID to flush. */
568 uint16_t uDomainId;
569} IOTLBEFLUSHARG;
570/** Pointer to an IOTLBE flush argument. */
571typedef IOTLBEFLUSHARG *PIOTLBEFLUSHARG;
572/** Pointer to a const IOTLBE flush argument. */
573typedef IOTLBEFLUSHARG const *PCIOTLBEFLUSHARG;
574
575/**
576 * IOTLBE Info. argument.
577 */
578typedef struct IOTLBEINFOARG
579{
580 /** The IOMMU device state. */
581 PIOMMU pIommu;
582 /** The info helper. */
583 PCDBGFINFOHLP pHlp;
584 /** The domain ID to dump IOTLB entry. */
585 uint16_t uDomainId;
586} IOTLBEINFOARG;
587/** Pointer to an IOTLBE flush argument. */
588typedef IOTLBEINFOARG *PIOTLBEINFOARG;
589/** Pointer to a const IOTLBE flush argument. */
590typedef IOTLBEINFOARG const *PCIOTLBEINFOARG;
591#endif
592
593/**
594 * IOMMU operation auxiliary info.
595 */
596typedef struct IOMMUOPAUX
597{
598 /** The IOMMU operation being performed. */
599 IOMMUOP enmOp;
600 /** The device table entry (can be NULL). */
601 PCDTE_T pDte;
602 /** The device ID. */
603 uint16_t uDeviceId;
604 /** The domain ID (when the DTE isn't provided). */
605 uint16_t uDomainId;
606} IOMMUOPAUX;
607/** Pointer to an I/O address lookup struct. */
608typedef IOMMUOPAUX *PIOMMUOPAUX;
609/** Pointer to a const I/O address lookup struct. */
610typedef IOMMUOPAUX const *PCIOMMUOPAUX;
611
612typedef DECLCALLBACKTYPE(int, FNIOPAGELOOKUP,(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
613 PIOPAGELOOKUP pPageLookup));
614typedef FNIOPAGELOOKUP *PFNIOPAGELOOKUP;
615
616
617/*********************************************************************************************************************************
618* Global Variables *
619*********************************************************************************************************************************/
620/**
621 * An array of the number of device table segments supported.
622 * Indexed by u2DevTabSegSup.
623 */
624static uint8_t const g_acDevTabSegs[] = { 0, 2, 4, 8 };
625
626/**
627 * An array of the masks to select the device table segment index from a device ID.
628 */
629static uint16_t const g_auDevTabSegMasks[] = { 0x0, 0x8000, 0xc000, 0xe000 };
630
631/**
632 * An array of the shift values to select the device table segment index from a
633 * device ID.
634 */
635static uint8_t const g_auDevTabSegShifts[] = { 0, 15, 14, 13 };
636
637/**
638 * The maximum size (inclusive) of each device table segment (0 to 7).
639 * Indexed by the device table segment index.
640 */
641static uint16_t const g_auDevTabSegMaxSizes[] = { 0x1ff, 0xff, 0x7f, 0x7f, 0x3f, 0x3f, 0x3f, 0x3f };
642
643/**
644 * The IOMMU I/O permission names.
645 */
646static const char * const g_aszPerm[] = { "none", "read", "write", "read+write" };
647
648
649#ifndef VBOX_DEVICE_STRUCT_TESTCASE
650/**
651 * Gets the maximum number of buffer entries for the given buffer length.
652 *
653 * @returns Number of buffer entries.
654 * @param uEncodedLen The length (power-of-2 encoded).
655 */
656DECLINLINE(uint32_t) iommuAmdGetBufMaxEntries(uint8_t uEncodedLen)
657{
658 Assert(uEncodedLen > 7);
659 Assert(uEncodedLen < 16);
660 return 2 << (uEncodedLen - 1);
661}
662
663
664/**
665 * Gets the total length of the buffer given a base register's encoded length.
666 *
667 * @returns The length of the buffer in bytes.
668 * @param uEncodedLen The length (power-of-2 encoded).
669 */
670DECLINLINE(uint32_t) iommuAmdGetTotalBufLength(uint8_t uEncodedLen)
671{
672 Assert(uEncodedLen > 7);
673 Assert(uEncodedLen < 16);
674 return (2 << (uEncodedLen - 1)) << 4;
675}
676
677
678/**
679 * Gets the number of (unconsumed) entries in the event log.
680 *
681 * @returns The number of entries in the event log.
682 * @param pThis The IOMMU device state.
683 */
684static uint32_t iommuAmdGetEvtLogEntryCount(PIOMMU pThis)
685{
686 uint32_t const idxTail = pThis->EvtLogTailPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT;
687 uint32_t const idxHead = pThis->EvtLogHeadPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT;
688 if (idxTail >= idxHead)
689 return idxTail - idxHead;
690
691 uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len);
692 return cMaxEvts - idxHead + idxTail;
693}
694
695
696/**
697 * Gets the descriptive I/O permission name for a memory access.
698 *
699 * @returns The I/O permission name.
700 * @param fPerm The I/O permissions for the access, see IOMMU_IO_PERM_XXX.
701 */
702static const char *iommuAmdMemAccessGetPermName(uint8_t fPerm)
703{
704 /* We shouldn't construct an access with "none" or "read+write" (must be read or write) permissions. */
705 Assert(fPerm > 0 && fPerm < RT_ELEMENTS(g_aszPerm));
706 return g_aszPerm[fPerm & IOMMU_IO_PERM_MASK];
707}
708
709
710#if 0
711/**
712 * Gets the number of (unconsumed) commands in the command buffer.
713 *
714 * @returns The number of commands in the command buffer.
715 * @param pThis The IOMMU device state.
716 */
717static uint32_t iommuAmdGetCmdBufEntryCount(PIOMMU pThis)
718{
719 uint32_t const idxTail = pThis->CmdBufTailPtr.n.off >> IOMMU_CMD_GENERIC_SHIFT;
720 uint32_t const idxHead = pThis->CmdBufHeadPtr.n.off >> IOMMU_CMD_GENERIC_SHIFT;
721 if (idxTail >= idxHead)
722 return idxTail - idxHead;
723
724 uint32_t const cMaxCmds = iommuAmdGetBufMaxEntries(pThis->CmdBufBaseAddr.n.u4Len);
725 return cMaxCmds - idxHead + idxTail;
726}
727#endif
728
729
730/**
731 * Checks whether two consecutive I/O page lookup results translates to a physically
732 * contiguous region.
733 *
734 * @returns @c true if they are contiguous, @c false otherwise.
735 * @param pPageLookupPrev The I/O page lookup result of the previous page.
736 * @param pPageLookup The I/O page lookup result of the current page.
737 */
738static bool iommuAmdLookupIsAccessContig(PCIOPAGELOOKUP pPageLookupPrev, PCIOPAGELOOKUP pPageLookup)
739{
740 Assert(pPageLookupPrev->fPerm == pPageLookup->fPerm);
741 size_t const cbPrev = RT_BIT_64(pPageLookupPrev->cShift);
742 RTGCPHYS const GCPhysPrev = pPageLookupPrev->GCPhysSpa;
743 RTGCPHYS const GCPhys = pPageLookup->GCPhysSpa;
744 uint64_t const offMaskPrev = IOMMU_GET_PAGE_OFF_MASK(pPageLookupPrev->cShift);
745 uint64_t const offMask = IOMMU_GET_PAGE_OFF_MASK(pPageLookup->cShift);
746
747 /* Paranoia: Ensure offset bits are 0. */
748 Assert(!(GCPhysPrev & offMaskPrev));
749 Assert(!(GCPhys & offMask));
750
751 if ((GCPhysPrev & ~offMaskPrev) + cbPrev == (GCPhys & ~offMask))
752 return true;
753 return false;
754}
755
756
757/**
758 * Gets the basic I/O device flags for the given device table entry.
759 *
760 * @returns The basic I/O device flags.
761 * @param pDte The device table entry.
762 */
763static uint16_t iommuAmdGetBasicDevFlags(PCDTE_T pDte)
764{
765 /* Extract basic flags from bits 127:0 of the DTE. */
766 uint16_t fFlags = 0;
767 if (pDte->n.u1Valid)
768 {
769 fFlags |= IOMMU_DEV_F_VALID;
770
771 if (pDte->n.u1SuppressAllPfEvents)
772 fFlags |= IOMMU_DEV_F_SUPPRESS_ALL_IOPF;
773 if (pDte->n.u1SuppressPfEvents)
774 fFlags |= IOMMU_DEV_F_SUPPRESS_IOPF;
775
776 uint16_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
777 AssertCompile(IOMMU_DEV_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK);
778 fFlags |= fDtePerm << IOMMU_DEV_F_IO_PERM_SHIFT;
779 }
780
781 /* Extract basic flags from bits 255:128 of the DTE. */
782 if (pDte->n.u1IntrMapValid)
783 {
784 fFlags |= IOMMU_DEV_F_INTR_MAP_VALID;
785 if (pDte->n.u1IgnoreUnmappedIntrs)
786 fFlags |= IOMMU_DEV_F_IGNORE_UNMAPPED_INTR;
787 }
788 return fFlags;
789}
790
791
792#ifdef IOMMU_WITH_IOTLBE_CACHE
793/**
794 * Moves the IOTLB entry to the least recently used slot.
795 *
796 * @param pThis The IOMMU device state.
797 * @param pIotlbe The IOTLB entry.
798 */
799static void iommuAmdIotlbEntryMoveToLru(PIOMMU pThis, PIOTLBE pIotlbe)
800{
801 if (!RTListNodeIsFirst(&pThis->LstLruIotlbe, &pIotlbe->NdLru))
802 {
803 RTListNodeRemove(&pIotlbe->NdLru);
804 RTListPrepend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
805 }
806}
807
808
809/**
810 * Moves the IOTLB entry to the most recently used slot.
811 *
812 * @param pThis The IOMMU device state.
813 * @param pIotlbe The IOTLB entry.
814 */
815static void iommuAmdIotlbEntryMoveToMru(PIOMMU pThis, PIOTLBE pIotlbe)
816{
817 if (!RTListNodeIsLast(&pThis->LstLruIotlbe, &pIotlbe->NdLru))
818 {
819 RTListNodeRemove(&pIotlbe->NdLru);
820 RTListAppend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
821 }
822}
823
824
825#ifdef IN_RING3
826/**
827 * Dumps the IOTLB entry via the debug info helper.
828 *
829 * @returns VINF_SUCCESS.
830 * @param pNode Pointer to an IOTLBE.
831 * @param pvUser Pointer to an IOTLBEINFOARG.
832 */
833static DECLCALLBACK(int) iommuAmdR3IotlbEntryInfo(PAVLU64NODECORE pNode, void *pvUser)
834{
835 /* Validate. */
836 PCIOTLBEINFOARG pArgs = (PCIOTLBEINFOARG)pvUser;
837 AssertPtr(pArgs);
838 AssertPtr(pArgs->pIommu);
839 AssertPtr(pArgs->pHlp);
840 Assert(pArgs->pIommu->u32Magic == IOMMU_MAGIC);
841
842 uint16_t const uDomainId = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
843 if (uDomainId == pArgs->uDomainId)
844 {
845 PCIOTLBE pIotlbe = (PCIOTLBE)pNode;
846 AVLU64KEY const uKey = pIotlbe->Core.Key;
847 uint64_t const uIova = IOMMU_IOTLB_KEY_GET_IOVA(uKey);
848 RTGCPHYS const GCPhysSpa = pIotlbe->PageLookup.GCPhysSpa;
849 uint8_t const cShift = pIotlbe->PageLookup.cShift;
850 size_t const cbPage = RT_BIT_64(cShift);
851 uint8_t const fPerm = pIotlbe->PageLookup.fPerm;
852 const char *pszPerm = iommuAmdMemAccessGetPermName(fPerm);
853 bool const fEvictPending = pIotlbe->fEvictPending;
854
855 PCDBGFINFOHLP pHlp = pArgs->pHlp;
856 pHlp->pfnPrintf(pHlp, " Key = %#RX64 (%#RX64)\n", uKey, uIova);
857 pHlp->pfnPrintf(pHlp, " GCPhys = %#RGp\n", GCPhysSpa);
858 pHlp->pfnPrintf(pHlp, " cShift = %u (%zu bytes)\n", cShift, cbPage);
859 pHlp->pfnPrintf(pHlp, " fPerm = %#x (%s)\n", fPerm, pszPerm);
860 pHlp->pfnPrintf(pHlp, " fEvictPending = %RTbool\n", fEvictPending);
861 }
862
863 return VINF_SUCCESS;
864}
865#endif /* IN_RING3 */
866
867
868/**
869 * Removes the IOTLB entry if it's associated with the specified domain ID.
870 *
871 * @returns VINF_SUCCESS.
872 * @param pNode Pointer to an IOTLBE.
873 * @param pvUser Pointer to an IOTLBEFLUSHARG containing the domain ID.
874 */
875static DECLCALLBACK(int) iommuAmdIotlbEntryRemoveDomainId(PAVLU64NODECORE pNode, void *pvUser)
876{
877 /* Validate. */
878 PCIOTLBEFLUSHARG pArgs = (PCIOTLBEFLUSHARG)pvUser;
879 AssertPtr(pArgs);
880 AssertPtr(pArgs->pIommu);
881 Assert(pArgs->pIommu->u32Magic == IOMMU_MAGIC);
882
883 uint16_t const uDomainId = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
884 if (uDomainId == pArgs->uDomainId)
885 {
886 /* Mark this entry is as invalidated and needs to be evicted later. */
887 PIOTLBE pIotlbe = (PIOTLBE)pNode;
888 pIotlbe->fEvictPending = true;
889 iommuAmdIotlbEntryMoveToLru(pArgs->pIommu, (PIOTLBE)pNode);
890 }
891 return VINF_SUCCESS;
892}
893
894
895/**
896 * Inserts an IOTLB entry into the cache.
897 *
898 * @param pThis The IOMMU device state.
899 * @param pIotlbe The IOTLB entry to initialize and insert.
900 * @param uDomainId The domain ID.
901 * @param uIova The I/O virtual address.
902 * @param pPageLookup The I/O page lookup result of the access.
903 */
904static void iommuAmdIotlbEntryInsert(PIOMMU pThis, PIOTLBE pIotlbe, uint16_t uDomainId, uint64_t uIova,
905 PCIOPAGELOOKUP pPageLookup)
906{
907 /* Initialize the IOTLB entry with results of the I/O page walk. */
908 pIotlbe->Core.Key = IOMMU_IOTLB_KEY_MAKE(uDomainId, uIova);
909 pIotlbe->PageLookup = *pPageLookup;
910
911 /* Validate. */
912 Assert(pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL);
913 Assert(!pIotlbe->fEvictPending);
914
915 /* Check if the entry already exists. */
916 PIOTLBE pFound = (PIOTLBE)RTAvlU64Get(&pThis->TreeIotlbe, pIotlbe->Core.Key);
917 if (!pFound)
918 {
919 /* Insert the entry into the cache. */
920 bool const fInserted = RTAvlU64Insert(&pThis->TreeIotlbe, &pIotlbe->Core);
921 Assert(fInserted); NOREF(fInserted);
922 Assert(pThis->cCachedIotlbes < IOMMU_IOTLBE_MAX);
923 ++pThis->cCachedIotlbes;
924 STAM_COUNTER_INC(&pThis->StatIotlbeCached);
925 }
926 else
927 {
928 /* Update the existing entry. */
929 if (pFound->fEvictPending)
930 {
931 pFound->fEvictPending = false;
932 STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse);
933 }
934 Assert(pFound->PageLookup.cShift == pPageLookup->cShift);
935 pFound->PageLookup.fPerm = pPageLookup->fPerm;
936 pFound->PageLookup.GCPhysSpa = pPageLookup->GCPhysSpa;
937 }
938}
939
940
941/**
942 * Removes an IOTLB entry from the cache for the given key.
943 *
944 * @returns Pointer to the removed IOTLB entry, NULL if the entry wasn't found in
945 * the tree.
946 * @param pThis The IOMMU device state.
947 * @param uKey The key of the IOTLB entry to remove.
948 */
949static PIOTLBE iommuAmdIotlbEntryRemove(PIOMMU pThis, AVLU64KEY uKey)
950{
951 PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Remove(&pThis->TreeIotlbe, uKey);
952 if (pIotlbe)
953 {
954 if (pIotlbe->fEvictPending)
955 STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse);
956
957 RT_ZERO(pIotlbe->Core);
958 RT_ZERO(pIotlbe->PageLookup);
959 /* We must not erase the LRU node connections here! */
960 pIotlbe->fEvictPending = false;
961 Assert(pIotlbe->Core.Key == IOMMU_IOTLB_KEY_NIL);
962
963 Assert(pThis->cCachedIotlbes > 0);
964 --pThis->cCachedIotlbes;
965 STAM_COUNTER_DEC(&pThis->StatIotlbeCached);
966 }
967 return pIotlbe;
968}
969
970
971/**
972 * Looks up an IOTLB from the cache.
973 *
974 * @returns Pointer to IOTLB entry if found, NULL otherwise.
975 * @param pThis The IOMMU device state.
976 * @param uDomainId The domain ID.
977 * @param uIova The I/O virtual address.
978 */
979static PIOTLBE iommuAmdIotlbLookup(PIOMMU pThis, uint64_t uDomainId, uint64_t uIova)
980{
981 uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(uDomainId, uIova);
982 PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Get(&pThis->TreeIotlbe, uKey);
983 if ( pIotlbe
984 && !pIotlbe->fEvictPending)
985 return pIotlbe;
986
987 /*
988 * Domain Id wildcard invalidations only marks entries for eviction later but doesn't remove
989 * them from the cache immediately. We found an entry pending eviction, just return that
990 * nothing was found (rather than evicting now).
991 */
992 return NULL;
993}
994
995
996/**
997 * Adds an IOTLB entry to the cache.
998 *
999 * @param pThis The IOMMU device state.
1000 * @param uDomainId The domain ID.
1001 * @param uIova The I/O virtual address.
1002 * @param pPageLookup The I/O page lookup result of the access.
1003 */
1004static void iommuAmdIotlbAdd(PIOMMU pThis, uint16_t uDomainId, uint64_t uIova, PCIOPAGELOOKUP pPageLookup)
1005{
1006 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
1007 Assert(pPageLookup);
1008 Assert(pPageLookup->cShift <= 31);
1009 Assert(pPageLookup->fPerm != IOMMU_IO_PERM_NONE);
1010
1011 /*
1012 * If there are no unused IOTLB entries, evict the LRU entry.
1013 * Otherwise, get a new IOTLB entry from the pre-allocated list.
1014 */
1015 if (pThis->idxUnusedIotlbe == IOMMU_IOTLBE_MAX)
1016 {
1017 /* Grab the least recently used entry. */
1018 PIOTLBE pIotlbe = RTListGetFirst(&pThis->LstLruIotlbe, IOTLBE, NdLru);
1019 Assert(pIotlbe);
1020
1021 /* If the entry is in the cache, remove it. */
1022 if (pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL)
1023 iommuAmdIotlbEntryRemove(pThis, pIotlbe->Core.Key);
1024
1025 /* Initialize and insert the IOTLB entry into the cache. */
1026 iommuAmdIotlbEntryInsert(pThis, pIotlbe, uDomainId, uIova, pPageLookup);
1027
1028 /* Move the entry to the most recently used slot. */
1029 iommuAmdIotlbEntryMoveToMru(pThis, pIotlbe);
1030 }
1031 else
1032 {
1033 /* Grab an unused IOTLB entry from the pre-allocated list. */
1034 PIOTLBE pIotlbe = &pThis->paIotlbes[pThis->idxUnusedIotlbe];
1035 ++pThis->idxUnusedIotlbe;
1036
1037 /* Initialize and insert the IOTLB entry into the cache. */
1038 iommuAmdIotlbEntryInsert(pThis, pIotlbe, uDomainId, uIova, pPageLookup);
1039
1040 /* Add the entry to the most recently used slot. */
1041 RTListAppend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
1042 }
1043}
1044
1045
1046/**
1047 * Removes all IOTLB entries from the cache.
1048 *
1049 * @param pDevIns The IOMMU instance data.
1050 */
1051static void iommuAmdIotlbRemoveAll(PPDMDEVINS pDevIns)
1052{
1053 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1054 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1055
1056 if (pThis->cCachedIotlbes > 0)
1057 {
1058 pThis->idxUnusedIotlbe = 0;
1059 size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX;
1060 RT_BZERO(pThis->paIotlbes, cbIotlbes);
1061 pThis->cCachedIotlbes = 0;
1062 STAM_COUNTER_RESET(&pThis->StatIotlbeCached);
1063 RTListInit(&pThis->LstLruIotlbe);
1064 }
1065
1066 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1067}
1068
1069
1070/**
1071 * Removes IOTLB entries for the range of I/O virtual addresses and the specified
1072 * domain ID from the cache.
1073 *
1074 * @param pDevIns The IOMMU instance data.
1075 * @param uDomainId The domain ID.
1076 * @param uIova The I/O virtual address to invalidate.
1077 * @param cbInvalidate The size of the invalidation (must be 4K aligned).
1078 */
1079static void iommuAmdIotlbRemoveRange(PPDMDEVINS pDevIns, uint16_t uDomainId, uint64_t uIova, size_t cbInvalidate)
1080{
1081 /* Validate. */
1082 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
1083 Assert(!(cbInvalidate & X86_PAGE_4K_OFFSET_MASK));
1084 Assert(cbInvalidate >= X86_PAGE_4K_SIZE);
1085
1086 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1087 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1088
1089 do
1090 {
1091 uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(uDomainId, uIova);
1092 PIOTLBE pIotlbe = iommuAmdIotlbEntryRemove(pThis, uKey);
1093 if (pIotlbe)
1094 iommuAmdIotlbEntryMoveToLru(pThis, pIotlbe);
1095 uIova += X86_PAGE_4K_SIZE;
1096 cbInvalidate -= X86_PAGE_4K_SIZE;
1097 } while (cbInvalidate > 0);
1098
1099 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1100}
1101
1102
1103/**
1104 * Removes all IOTLB entries for the specified domain ID.
1105 *
1106 * @param pDevIns The IOMMU instance data.
1107 * @param uDomainId The domain ID.
1108 */
1109static void iommuAmdIotlbRemoveDomainId(PPDMDEVINS pDevIns, uint16_t uDomainId)
1110{
1111 /*
1112 * We need to iterate the tree and search based on the domain ID.
1113 * But it seems we cannot remove items while iterating the tree.
1114 * Thus, we simply mark entries for eviction later but move them to the LRU
1115 * so they will eventually get evicted and re-cycled as the cache gets re-populated.
1116 */
1117 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1118 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1119
1120 IOTLBEFLUSHARG Args;
1121 Args.pIommu = pThis;
1122 Args.uDomainId = uDomainId;
1123 RTAvlU64DoWithAll(&pThis->TreeIotlbe, true /* fFromLeft */, iommuAmdIotlbEntryRemoveDomainId, &Args);
1124
1125 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1126}
1127
1128
1129/**
1130 * Adds or updates IOTLB entries for the given range of I/O virtual addresses.
1131 *
1132 * @param pDevIns The IOMMU instance data.
1133 * @param uDomainId The domain ID.
1134 * @param uIova The I/O virtual address.
1135 * @param cbAccess The size of the access (must be 4K aligned).
1136 * @param GCPhysSpa The translated system-physical address.
1137 * @param fPerm The I/O permissions for the access, see IOMMU_IO_PERM_XXX.
1138 */
1139static void iommuAmdIotlbAddRange(PPDMDEVINS pDevIns, uint16_t uDomainId, uint64_t uIova, size_t cbAccess, RTGCPHYS GCPhysSpa,
1140 uint8_t fPerm)
1141{
1142 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
1143 Assert(!(GCPhysSpa & X86_PAGE_4K_OFFSET_MASK));
1144 Assert(!(cbAccess & X86_PAGE_4K_OFFSET_MASK));
1145 Assert(cbAccess >= X86_PAGE_4K_SIZE);
1146
1147 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1148
1149 /* Add IOTLB entries for every page in the access. */
1150 IOPAGELOOKUP PageLookup;
1151 RT_ZERO(PageLookup);
1152 PageLookup.cShift = X86_PAGE_4K_SHIFT;
1153 PageLookup.fPerm = fPerm;
1154 PageLookup.GCPhysSpa = GCPhysSpa;
1155
1156 size_t cPages = cbAccess / X86_PAGE_4K_SIZE;
1157 cPages = RT_MIN(cPages, IOMMU_IOTLBE_MAX);
1158
1159 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1160 /** @todo Check level 1 cache? */
1161 do
1162 {
1163 iommuAmdIotlbAdd(pThis, uDomainId, uIova, &PageLookup);
1164 uIova += X86_PAGE_4K_SIZE;
1165 PageLookup.GCPhysSpa += X86_PAGE_4K_SIZE;
1166 --cPages;
1167 } while (cPages > 0);
1168 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1169}
1170
1171
1172/**
1173 * Updates the I/O device flags for the given device ID.
1174 *
1175 * @param pDevIns The IOMMU instance data.
1176 * @param uDevId The device ID (bus, device, function).
1177 * @param pDte The device table entry. Can be NULL only when @a fFlags is
1178 * 0.
1179 * @param fOrMask The device flags (usually compound flags) to OR in with the
1180 * basic flags, see IOMMU_DEV_F_XXX. Pass 0 to flush the DTE
1181 * from the cache.
1182 */
1183static void iommuAmdDteCacheUpdate(PPDMDEVINS pDevIns, uint16_t uDevId, PCDTE_T pDte, uint16_t fOrMask)
1184{
1185 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1186 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1187
1188 if (fOrMask & IOMMU_DEV_F_PRESENT)
1189 {
1190 Assert(pDte);
1191 pThis->paDevices[uDevId].fFlags = iommuAmdGetBasicDevFlags(pDte) | fOrMask;
1192 pThis->paDevices[uDevId].uDomainId = pDte->n.u16DomainId;
1193 }
1194 else
1195 {
1196 pThis->paDevices[uDevId].fFlags = 0;
1197 pThis->paDevices[uDevId].uDomainId = 0;
1198 }
1199
1200 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1201}
1202
1203
1204/**
1205 * Sets one or more I/O device flags if the device is present in the cache.
1206 *
1207 * @param pDevIns The IOMMU instance data.
1208 * @param uDevId The device ID (bus, device, function).
1209 * @param fDevIoFlags The device flags to set.
1210 */
1211static void iommuAmdDteCacheSetFlags(PPDMDEVINS pDevIns, uint16_t uDevId, uint16_t fDevIoFlags)
1212{
1213 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1214 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1215
1216 if (fDevIoFlags & IOMMU_DEV_F_PRESENT)
1217 pThis->paDevices[uDevId].fFlags |= fDevIoFlags;
1218
1219 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1220}
1221
1222
1223/**
1224 * Removes all entries in the device table entry cache.
1225 *
1226 * @param pDevIns The IOMMU instance data.
1227 */
1228static void iommuAmdDteCacheRemoveAll(PPDMDEVINS pDevIns)
1229{
1230 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1231 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
1232
1233 size_t const cbDevices = sizeof(IODEVICE) * IOMMU_DTE_CACHE_MAX;
1234 RT_BZERO(pThis->paDevices, cbDevices);
1235
1236 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
1237}
1238#endif /* IOMMU_WITH_IOTLBE_CACHE */
1239
1240
1241DECL_FORCE_INLINE(IOMMU_CTRL_T) iommuAmdGetCtrlUnlocked(PCIOMMU pThis)
1242{
1243 IOMMU_CTRL_T Ctrl;
1244 Ctrl.u64 = ASMAtomicReadU64((volatile uint64_t *)&pThis->Ctrl.u64);
1245 return Ctrl;
1246}
1247
1248
1249/**
1250 * Returns whether MSI is enabled for the IOMMU.
1251 *
1252 * @returns Whether MSI is enabled.
1253 * @param pDevIns The IOMMU device instance.
1254 *
1255 * @note There should be a PCIDevXxx function for this.
1256 */
1257static bool iommuAmdIsMsiEnabled(PPDMDEVINS pDevIns)
1258{
1259 MSI_CAP_HDR_T MsiCapHdr;
1260 MsiCapHdr.u32 = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_MSI_CAP_HDR);
1261 return MsiCapHdr.n.u1MsiEnable;
1262}
1263
1264
1265/**
1266 * Signals a PCI target abort.
1267 *
1268 * @param pDevIns The IOMMU device instance.
1269 */
1270static void iommuAmdSetPciTargetAbort(PPDMDEVINS pDevIns)
1271{
1272 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1273 uint16_t const u16Status = PDMPciDevGetStatus(pPciDev) | VBOX_PCI_STATUS_SIG_TARGET_ABORT;
1274 PDMPciDevSetStatus(pPciDev, u16Status);
1275}
1276
1277
1278/**
1279 * Wakes up the command thread if there are commands to be processed or if
1280 * processing is requested to be stopped by software.
1281 *
1282 * @param pDevIns The IOMMU device instance.
1283 *
1284 * @remarks The IOMMU lock must be held while calling this!
1285 */
1286static void iommuAmdCmdThreadWakeUpIfNeeded(PPDMDEVINS pDevIns)
1287{
1288 Log4Func(("\n"));
1289
1290 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1291 if (pThis->Status.n.u1CmdBufRunning)
1292 {
1293 Log4Func(("Signaling command thread\n"));
1294 PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread);
1295 }
1296}
1297
1298
1299/**
1300 * Reads the Device Table Base Address Register.
1301 */
1302static VBOXSTRICTRC iommuAmdDevTabBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1303{
1304 RT_NOREF(pDevIns, offReg);
1305 *pu64Value = pThis->aDevTabBaseAddrs[0].u64;
1306 return VINF_SUCCESS;
1307}
1308
1309
1310/**
1311 * Reads the Command Buffer Base Address Register.
1312 */
1313static VBOXSTRICTRC iommuAmdCmdBufBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1314{
1315 RT_NOREF(pDevIns, offReg);
1316 *pu64Value = pThis->CmdBufBaseAddr.u64;
1317 return VINF_SUCCESS;
1318}
1319
1320
1321/**
1322 * Reads the Event Log Base Address Register.
1323 */
1324static VBOXSTRICTRC iommuAmdEvtLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1325{
1326 RT_NOREF(pDevIns, offReg);
1327 *pu64Value = pThis->EvtLogBaseAddr.u64;
1328 return VINF_SUCCESS;
1329}
1330
1331
1332/**
1333 * Reads the Control Register.
1334 */
1335static VBOXSTRICTRC iommuAmdCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1336{
1337 RT_NOREF(pDevIns, offReg);
1338 *pu64Value = pThis->Ctrl.u64;
1339 return VINF_SUCCESS;
1340}
1341
1342
1343/**
1344 * Reads the Exclusion Range Base Address Register.
1345 */
1346static VBOXSTRICTRC iommuAmdExclRangeBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1347{
1348 RT_NOREF(pDevIns, offReg);
1349 *pu64Value = pThis->ExclRangeBaseAddr.u64;
1350 return VINF_SUCCESS;
1351}
1352
1353
1354/**
1355 * Reads to the Exclusion Range Limit Register.
1356 */
1357static VBOXSTRICTRC iommuAmdExclRangeLimit_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1358{
1359 RT_NOREF(pDevIns, offReg);
1360 *pu64Value = pThis->ExclRangeLimit.u64;
1361 return VINF_SUCCESS;
1362}
1363
1364
1365/**
1366 * Reads to the Extended Feature Register.
1367 */
1368static VBOXSTRICTRC iommuAmdExtFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1369{
1370 RT_NOREF(pDevIns, offReg);
1371 *pu64Value = pThis->ExtFeat.u64;
1372 return VINF_SUCCESS;
1373}
1374
1375
1376/**
1377 * Reads to the PPR Log Base Address Register.
1378 */
1379static VBOXSTRICTRC iommuAmdPprLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1380{
1381 RT_NOREF(pDevIns, offReg);
1382 *pu64Value = pThis->PprLogBaseAddr.u64;
1383 return VINF_SUCCESS;
1384}
1385
1386
1387/**
1388 * Writes the Hardware Event Register (Hi).
1389 */
1390static VBOXSTRICTRC iommuAmdHwEvtHi_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1391{
1392 RT_NOREF(pDevIns, offReg);
1393 *pu64Value = pThis->HwEvtHi.u64;
1394 return VINF_SUCCESS;
1395}
1396
1397
1398/**
1399 * Reads the Hardware Event Register (Lo).
1400 */
1401static VBOXSTRICTRC iommuAmdHwEvtLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1402{
1403 RT_NOREF(pDevIns, offReg);
1404 *pu64Value = pThis->HwEvtLo;
1405 return VINF_SUCCESS;
1406}
1407
1408
1409/**
1410 * Reads the Hardware Event Status Register.
1411 */
1412static VBOXSTRICTRC iommuAmdHwEvtStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1413{
1414 RT_NOREF(pDevIns, offReg);
1415 *pu64Value = pThis->HwEvtStatus.u64;
1416 return VINF_SUCCESS;
1417}
1418
1419
1420/**
1421 * Reads to the GA Log Base Address Register.
1422 */
1423static VBOXSTRICTRC iommuAmdGALogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1424{
1425 RT_NOREF(pDevIns, offReg);
1426 *pu64Value = pThis->GALogBaseAddr.u64;
1427 return VINF_SUCCESS;
1428}
1429
1430
1431/**
1432 * Reads to the PPR Log B Base Address Register.
1433 */
1434static VBOXSTRICTRC iommuAmdPprLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1435{
1436 RT_NOREF(pDevIns, offReg);
1437 *pu64Value = pThis->PprLogBBaseAddr.u64;
1438 return VINF_SUCCESS;
1439}
1440
1441
1442/**
1443 * Reads to the Event Log B Base Address Register.
1444 */
1445static VBOXSTRICTRC iommuAmdEvtLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1446{
1447 RT_NOREF(pDevIns, offReg);
1448 *pu64Value = pThis->EvtLogBBaseAddr.u64;
1449 return VINF_SUCCESS;
1450}
1451
1452
1453/**
1454 * Reads the Device Table Segment Base Address Register.
1455 */
1456static VBOXSTRICTRC iommuAmdDevTabSegBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1457{
1458 RT_NOREF(pDevIns);
1459
1460 /* Figure out which segment is being written. */
1461 uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3;
1462 uint8_t const idxSegment = offSegment + 1;
1463 Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
1464
1465 *pu64Value = pThis->aDevTabBaseAddrs[idxSegment].u64;
1466 return VINF_SUCCESS;
1467}
1468
1469
1470/**
1471 * Reads the Device Specific Feature Extension (DSFX) Register.
1472 */
1473static VBOXSTRICTRC iommuAmdDevSpecificFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1474{
1475 RT_NOREF(pDevIns, offReg);
1476 *pu64Value = pThis->DevSpecificFeat.u64;
1477 return VINF_SUCCESS;
1478}
1479
1480/**
1481 * Reads the Device Specific Control Extension (DSCX) Register.
1482 */
1483static VBOXSTRICTRC iommuAmdDevSpecificCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1484{
1485 RT_NOREF(pDevIns, offReg);
1486 *pu64Value = pThis->DevSpecificCtrl.u64;
1487 return VINF_SUCCESS;
1488}
1489
1490
1491/**
1492 * Reads the Device Specific Status Extension (DSSX) Register.
1493 */
1494static VBOXSTRICTRC iommuAmdDevSpecificStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1495{
1496 RT_NOREF(pDevIns, offReg);
1497 *pu64Value = pThis->DevSpecificStatus.u64;
1498 return VINF_SUCCESS;
1499}
1500
1501
1502/**
1503 * Reads the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit).
1504 */
1505static VBOXSTRICTRC iommuAmdDevMsiVector_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1506{
1507 RT_NOREF(pDevIns, offReg);
1508 uint32_t const uLo = pThis->MiscInfo.au32[0];
1509 uint32_t const uHi = pThis->MiscInfo.au32[1];
1510 *pu64Value = RT_MAKE_U64(uLo, uHi);
1511 return VINF_SUCCESS;
1512}
1513
1514
1515/**
1516 * Reads the MSI Capability Header Register (32-bit) and the MSI Address (Lo)
1517 * Register (32-bit).
1518 */
1519static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1520{
1521 RT_NOREF(pThis, offReg);
1522 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1523 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
1524 uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
1525 uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO);
1526 *pu64Value = RT_MAKE_U64(uLo, uHi);
1527 return VINF_SUCCESS;
1528}
1529
1530
1531/**
1532 * Reads the MSI Address (Hi) Register (32-bit) and the MSI data register (32-bit).
1533 */
1534static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1535{
1536 RT_NOREF(pThis, offReg);
1537 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1538 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
1539 uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI);
1540 uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA);
1541 *pu64Value = RT_MAKE_U64(uLo, uHi);
1542 return VINF_SUCCESS;
1543}
1544
1545
1546/**
1547 * Reads the Command Buffer Head Pointer Register.
1548 */
1549static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1550{
1551 RT_NOREF(pDevIns, offReg);
1552 *pu64Value = pThis->CmdBufHeadPtr.u64;
1553 return VINF_SUCCESS;
1554}
1555
1556
1557/**
1558 * Reads the Command Buffer Tail Pointer Register.
1559 */
1560static VBOXSTRICTRC iommuAmdCmdBufTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1561{
1562 RT_NOREF(pDevIns, offReg);
1563 *pu64Value = pThis->CmdBufTailPtr.u64;
1564 return VINF_SUCCESS;
1565}
1566
1567
1568/**
1569 * Reads the Event Log Head Pointer Register.
1570 */
1571static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1572{
1573 RT_NOREF(pDevIns, offReg);
1574 *pu64Value = pThis->EvtLogHeadPtr.u64;
1575 return VINF_SUCCESS;
1576}
1577
1578
1579/**
1580 * Reads the Event Log Tail Pointer Register.
1581 */
1582static VBOXSTRICTRC iommuAmdEvtLogTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1583{
1584 RT_NOREF(pDevIns, offReg);
1585 *pu64Value = pThis->EvtLogTailPtr.u64;
1586 return VINF_SUCCESS;
1587}
1588
1589
1590/**
1591 * Reads the Status Register.
1592 */
1593static VBOXSTRICTRC iommuAmdStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1594{
1595 RT_NOREF(pDevIns, offReg);
1596 *pu64Value = pThis->Status.u64;
1597 return VINF_SUCCESS;
1598}
1599
1600
1601/**
1602 * Writes the Device Table Base Address Register.
1603 */
1604static VBOXSTRICTRC iommuAmdDevTabBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1605{
1606 RT_NOREF(pDevIns, offReg);
1607
1608 /* Mask out all unrecognized bits. */
1609 u64Value &= IOMMU_DEV_TAB_BAR_VALID_MASK;
1610
1611 /* Update the register. */
1612 pThis->aDevTabBaseAddrs[0].u64 = u64Value;
1613
1614 /* Paranoia. */
1615 Assert(pThis->aDevTabBaseAddrs[0].n.u9Size <= g_auDevTabSegMaxSizes[0]);
1616 return VINF_SUCCESS;
1617}
1618
1619
1620/**
1621 * Writes the Command Buffer Base Address Register.
1622 */
1623static VBOXSTRICTRC iommuAmdCmdBufBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1624{
1625 RT_NOREF(pDevIns, offReg);
1626
1627 /*
1628 * While this is not explicitly specified like the event log base address register,
1629 * the AMD IOMMU spec. does specify "CmdBufRun must be 0b to modify the command buffer registers properly".
1630 * Inconsistent specs :/
1631 */
1632 if (pThis->Status.n.u1CmdBufRunning)
1633 {
1634 LogFunc(("Setting CmdBufBar (%#RX64) when command buffer is running -> Ignored\n", u64Value));
1635 return VINF_SUCCESS;
1636 }
1637
1638 /* Mask out all unrecognized bits. */
1639 CMD_BUF_BAR_T CmdBufBaseAddr;
1640 CmdBufBaseAddr.u64 = u64Value & IOMMU_CMD_BUF_BAR_VALID_MASK;
1641
1642 /* Validate the length. */
1643 if (CmdBufBaseAddr.n.u4Len >= 8)
1644 {
1645 /* Update the register. */
1646 pThis->CmdBufBaseAddr.u64 = CmdBufBaseAddr.u64;
1647
1648 /*
1649 * Writing the command buffer base address, clears the command buffer head and tail pointers.
1650 * See AMD IOMMU spec. 2.4 "Commands".
1651 */
1652 pThis->CmdBufHeadPtr.u64 = 0;
1653 pThis->CmdBufTailPtr.u64 = 0;
1654 }
1655 else
1656 LogFunc(("Command buffer length (%#x) invalid -> Ignored\n", CmdBufBaseAddr.n.u4Len));
1657
1658 return VINF_SUCCESS;
1659}
1660
1661
1662/**
1663 * Writes the Event Log Base Address Register.
1664 */
1665static VBOXSTRICTRC iommuAmdEvtLogBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1666{
1667 RT_NOREF(pDevIns, offReg);
1668
1669 /*
1670 * IOMMU behavior is undefined when software writes this register when event logging is running.
1671 * In our emulation, we ignore the write entirely.
1672 * See AMD IOMMU spec. "Event Log Base Address Register".
1673 */
1674 if (pThis->Status.n.u1EvtLogRunning)
1675 {
1676 LogFunc(("Setting EvtLogBar (%#RX64) when event logging is running -> Ignored\n", u64Value));
1677 return VINF_SUCCESS;
1678 }
1679
1680 /* Mask out all unrecognized bits. */
1681 u64Value &= IOMMU_EVT_LOG_BAR_VALID_MASK;
1682 EVT_LOG_BAR_T EvtLogBaseAddr;
1683 EvtLogBaseAddr.u64 = u64Value;
1684
1685 /* Validate the length. */
1686 if (EvtLogBaseAddr.n.u4Len >= 8)
1687 {
1688 /* Update the register. */
1689 pThis->EvtLogBaseAddr.u64 = EvtLogBaseAddr.u64;
1690
1691 /*
1692 * Writing the event log base address, clears the event log head and tail pointers.
1693 * See AMD IOMMU spec. 2.5 "Event Logging".
1694 */
1695 pThis->EvtLogHeadPtr.u64 = 0;
1696 pThis->EvtLogTailPtr.u64 = 0;
1697 }
1698 else
1699 LogFunc(("Event log length (%#x) invalid -> Ignored\n", EvtLogBaseAddr.n.u4Len));
1700
1701 return VINF_SUCCESS;
1702}
1703
1704
1705/**
1706 * Writes the Control Register.
1707 */
1708static VBOXSTRICTRC iommuAmdCtrl_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1709{
1710 RT_NOREF(pDevIns, offReg);
1711
1712 /* Mask out all unrecognized bits. */
1713 u64Value &= IOMMU_CTRL_VALID_MASK;
1714 IOMMU_CTRL_T NewCtrl;
1715 NewCtrl.u64 = u64Value;
1716
1717 /* Ensure the device table segments are within limits. */
1718 if (NewCtrl.n.u3DevTabSegEn <= pThis->ExtFeat.n.u2DevTabSegSup)
1719 {
1720 IOMMU_CTRL_T const OldCtrl = pThis->Ctrl;
1721
1722 /* Update the register. */
1723 ASMAtomicWriteU64(&pThis->Ctrl.u64, NewCtrl.u64);
1724
1725 bool const fNewIommuEn = NewCtrl.n.u1IommuEn;
1726 bool const fOldIommuEn = OldCtrl.n.u1IommuEn;
1727
1728 /* Enable or disable event logging when the bit transitions. */
1729 bool const fOldEvtLogEn = OldCtrl.n.u1EvtLogEn;
1730 bool const fNewEvtLogEn = NewCtrl.n.u1EvtLogEn;
1731 if ( fOldEvtLogEn != fNewEvtLogEn
1732 || fOldIommuEn != fNewIommuEn)
1733 {
1734 if ( fNewIommuEn
1735 && fNewEvtLogEn)
1736 {
1737 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_OVERFLOW);
1738 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_RUNNING);
1739 }
1740 else
1741 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_RUNNING);
1742 }
1743
1744 /* Enable or disable command buffer processing when the bit transitions. */
1745 bool const fOldCmdBufEn = OldCtrl.n.u1CmdBufEn;
1746 bool const fNewCmdBufEn = NewCtrl.n.u1CmdBufEn;
1747 if ( fOldCmdBufEn != fNewCmdBufEn
1748 || fOldIommuEn != fNewIommuEn)
1749 {
1750 if ( fNewCmdBufEn
1751 && fNewIommuEn)
1752 {
1753 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_CMD_BUF_RUNNING);
1754 LogFunc(("Command buffer enabled\n"));
1755
1756 /* Wake up the command thread to start processing commands. */
1757 iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
1758 }
1759 else
1760 {
1761 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
1762 LogFunc(("Command buffer disabled\n"));
1763 }
1764 }
1765 }
1766 else
1767 {
1768 LogFunc(("Invalid number of device table segments enabled, exceeds %#x (%#RX64) -> Ignored!\n",
1769 pThis->ExtFeat.n.u2DevTabSegSup, NewCtrl.u64));
1770 }
1771
1772 return VINF_SUCCESS;
1773}
1774
1775
1776/**
1777 * Writes to the Exclusion Range Base Address Register.
1778 */
1779static VBOXSTRICTRC iommuAmdExclRangeBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1780{
1781 RT_NOREF(pDevIns, offReg);
1782 pThis->ExclRangeBaseAddr.u64 = u64Value & IOMMU_EXCL_RANGE_BAR_VALID_MASK;
1783 return VINF_SUCCESS;
1784}
1785
1786
1787/**
1788 * Writes to the Exclusion Range Limit Register.
1789 */
1790static VBOXSTRICTRC iommuAmdExclRangeLimit_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1791{
1792 RT_NOREF(pDevIns, offReg);
1793 u64Value &= IOMMU_EXCL_RANGE_LIMIT_VALID_MASK;
1794 u64Value |= UINT64_C(0xfff);
1795 pThis->ExclRangeLimit.u64 = u64Value;
1796 return VINF_SUCCESS;
1797}
1798
1799
1800/**
1801 * Writes the Hardware Event Register (Hi).
1802 */
1803static VBOXSTRICTRC iommuAmdHwEvtHi_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1804{
1805 /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */
1806 RT_NOREF(pDevIns, offReg);
1807 LogFlowFunc(("Writing %#RX64 to hardware event (Hi) register!\n", u64Value));
1808 pThis->HwEvtHi.u64 = u64Value;
1809 return VINF_SUCCESS;
1810}
1811
1812
1813/**
1814 * Writes the Hardware Event Register (Lo).
1815 */
1816static VBOXSTRICTRC iommuAmdHwEvtLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1817{
1818 /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */
1819 RT_NOREF(pDevIns, offReg);
1820 LogFlowFunc(("Writing %#RX64 to hardware event (Lo) register!\n", u64Value));
1821 pThis->HwEvtLo = u64Value;
1822 return VINF_SUCCESS;
1823}
1824
1825
1826/**
1827 * Writes the Hardware Event Status Register.
1828 */
1829static VBOXSTRICTRC iommuAmdHwEvtStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1830{
1831 RT_NOREF(pDevIns, offReg);
1832
1833 /* Mask out all unrecognized bits. */
1834 u64Value &= IOMMU_HW_EVT_STATUS_VALID_MASK;
1835
1836 /*
1837 * The two bits (HEO and HEV) are RW1C (Read/Write 1-to-Clear; writing 0 has no effect).
1838 * If the current status bits or the bits being written are both 0, we've nothing to do.
1839 * The Overflow bit (bit 1) is only valid when the Valid bit (bit 0) is 1.
1840 */
1841 uint64_t HwStatus = pThis->HwEvtStatus.u64;
1842 if (!(HwStatus & RT_BIT(0)))
1843 return VINF_SUCCESS;
1844 if (u64Value & HwStatus & RT_BIT_64(0))
1845 HwStatus &= ~RT_BIT_64(0);
1846 if (u64Value & HwStatus & RT_BIT_64(1))
1847 HwStatus &= ~RT_BIT_64(1);
1848
1849 /* Update the register. */
1850 pThis->HwEvtStatus.u64 = HwStatus;
1851 return VINF_SUCCESS;
1852}
1853
1854
1855/**
1856 * Writes the Device Table Segment Base Address Register.
1857 */
1858static VBOXSTRICTRC iommuAmdDevTabSegBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1859{
1860 RT_NOREF(pDevIns);
1861
1862 /* Figure out which segment is being written. */
1863 uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3;
1864 uint8_t const idxSegment = offSegment + 1;
1865 Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
1866
1867 /* Mask out all unrecognized bits. */
1868 u64Value &= IOMMU_DEV_TAB_SEG_BAR_VALID_MASK;
1869 DEV_TAB_BAR_T DevTabSegBar;
1870 DevTabSegBar.u64 = u64Value;
1871
1872 /* Validate the size. */
1873 uint16_t const uSegSize = DevTabSegBar.n.u9Size;
1874 uint16_t const uMaxSegSize = g_auDevTabSegMaxSizes[idxSegment];
1875 if (uSegSize <= uMaxSegSize)
1876 {
1877 /* Update the register. */
1878 pThis->aDevTabBaseAddrs[idxSegment].u64 = u64Value;
1879 }
1880 else
1881 LogFunc(("Device table segment (%u) size invalid (%#RX32) -> Ignored\n", idxSegment, uSegSize));
1882
1883 return VINF_SUCCESS;
1884}
1885
1886
1887/**
1888 * Writes the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit).
1889 */
1890static VBOXSTRICTRC iommuAmdDevMsiVector_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1891{
1892 RT_NOREF(pDevIns, offReg);
1893
1894 /* MSI Vector Register 0 is read-only. */
1895 /* MSI Vector Register 1. */
1896 uint32_t const uReg = u64Value >> 32;
1897 pThis->MiscInfo.au32[1] = uReg & IOMMU_MSI_VECTOR_1_VALID_MASK;
1898 return VINF_SUCCESS;
1899}
1900
1901
1902/**
1903 * Writes the MSI Capability Header Register (32-bit) or the MSI Address (Lo)
1904 * Register (32-bit).
1905 */
1906static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1907{
1908 RT_NOREF(pThis, offReg);
1909 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1910 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
1911
1912 /* MSI capability header. */
1913 {
1914 uint32_t const uReg = u64Value;
1915 MSI_CAP_HDR_T MsiCapHdr;
1916 MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
1917 MsiCapHdr.n.u1MsiEnable = RT_BOOL(uReg & IOMMU_MSI_CAP_HDR_MSI_EN_MASK);
1918 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR, MsiCapHdr.u32);
1919 }
1920
1921 /* MSI Address Lo. */
1922 {
1923 uint32_t const uReg = u64Value >> 32;
1924 uint32_t const uMsiAddrLo = uReg & VBOX_MSI_ADDR_VALID_MASK;
1925 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, uMsiAddrLo);
1926 }
1927
1928 return VINF_SUCCESS;
1929}
1930
1931
1932/**
1933 * Writes the MSI Address (Hi) Register (32-bit) or the MSI data register (32-bit).
1934 */
1935static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1936{
1937 RT_NOREF(pThis, offReg);
1938 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1939 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
1940
1941 /* MSI Address Hi. */
1942 {
1943 uint32_t const uReg = u64Value;
1944 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, uReg);
1945 }
1946
1947 /* MSI Data. */
1948 {
1949 uint32_t const uReg = u64Value >> 32;
1950 uint32_t const uMsiData = uReg & VBOX_MSI_DATA_VALID_MASK;
1951 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, uMsiData);
1952 }
1953
1954 return VINF_SUCCESS;
1955}
1956
1957
1958/**
1959 * Writes the Command Buffer Head Pointer Register.
1960 */
1961static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
1962{
1963 RT_NOREF(pDevIns, offReg);
1964
1965 /*
1966 * IOMMU behavior is undefined when software writes this register when the command buffer is running.
1967 * In our emulation, we ignore the write entirely.
1968 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
1969 */
1970 if (pThis->Status.n.u1CmdBufRunning)
1971 {
1972 LogFunc(("Setting CmdBufHeadPtr (%#RX64) when command buffer is running -> Ignored\n", u64Value));
1973 return VINF_SUCCESS;
1974 }
1975
1976 /*
1977 * IOMMU behavior is undefined when software writes a value outside the buffer length.
1978 * In our emulation, we ignore the write entirely.
1979 */
1980 uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK;
1981 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
1982 Assert(cbBuf <= _512K);
1983 if (offBuf >= cbBuf)
1984 {
1985 LogFunc(("Setting CmdBufHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX23) -> Ignored\n", offBuf, cbBuf));
1986 return VINF_SUCCESS;
1987 }
1988
1989 /* Update the register. */
1990 pThis->CmdBufHeadPtr.au32[0] = offBuf;
1991
1992 iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
1993
1994 Log4Func(("Set CmdBufHeadPtr to %#RX32\n", offBuf));
1995 return VINF_SUCCESS;
1996}
1997
1998
1999/**
2000 * Writes the Command Buffer Tail Pointer Register.
2001 */
2002static VBOXSTRICTRC iommuAmdCmdBufTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2003{
2004 RT_NOREF(pDevIns, offReg);
2005
2006 /*
2007 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2008 * In our emulation, we ignore the write entirely.
2009 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2010 */
2011 uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_TAIL_PTR_VALID_MASK;
2012 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
2013 Assert(cbBuf <= _512K);
2014 if (offBuf >= cbBuf)
2015 {
2016 LogFunc(("Setting CmdBufTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
2017 return VINF_SUCCESS;
2018 }
2019
2020 /*
2021 * IOMMU behavior is undefined if software advances the tail pointer equal to or beyond the
2022 * head pointer after adding one or more commands to the buffer.
2023 *
2024 * However, we cannot enforce this strictly because it's legal for software to shrink the
2025 * command queue (by reducing the offset) as well as wrap around the pointer (when head isn't
2026 * at 0). Software might even make the queue empty by making head and tail equal which is
2027 * allowed. I don't think we can or should try too hard to prevent software shooting itself
2028 * in the foot here. As long as we make sure the offset value is within the circular buffer
2029 * bounds (which we do by masking bits above) it should be sufficient.
2030 */
2031 pThis->CmdBufTailPtr.au32[0] = offBuf;
2032
2033 iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
2034
2035 Log4Func(("Set CmdBufTailPtr to %#RX32\n", offBuf));
2036 return VINF_SUCCESS;
2037}
2038
2039
2040/**
2041 * Writes the Event Log Head Pointer Register.
2042 */
2043static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2044{
2045 RT_NOREF(pDevIns, offReg);
2046
2047 /*
2048 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2049 * In our emulation, we ignore the write entirely.
2050 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2051 */
2052 uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_HEAD_PTR_VALID_MASK;
2053 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
2054 Assert(cbBuf <= _512K);
2055 if (offBuf >= cbBuf)
2056 {
2057 LogFunc(("Setting EvtLogHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
2058 return VINF_SUCCESS;
2059 }
2060
2061 /* Update the register. */
2062 pThis->EvtLogHeadPtr.au32[0] = offBuf;
2063
2064 LogFlowFunc(("Set EvtLogHeadPtr to %#RX32\n", offBuf));
2065 return VINF_SUCCESS;
2066}
2067
2068
2069/**
2070 * Writes the Event Log Tail Pointer Register.
2071 */
2072static VBOXSTRICTRC iommuAmdEvtLogTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2073{
2074 RT_NOREF(pDevIns, offReg);
2075 NOREF(pThis);
2076
2077 /*
2078 * IOMMU behavior is undefined when software writes this register when the event log is running.
2079 * In our emulation, we ignore the write entirely.
2080 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2081 */
2082 if (pThis->Status.n.u1EvtLogRunning)
2083 {
2084 LogFunc(("Setting EvtLogTailPtr (%#RX64) when event log is running -> Ignored\n", u64Value));
2085 return VINF_SUCCESS;
2086 }
2087
2088 /*
2089 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2090 * In our emulation, we ignore the write entirely.
2091 */
2092 uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK;
2093 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
2094 Assert(cbBuf <= _512K);
2095 if (offBuf >= cbBuf)
2096 {
2097 LogFunc(("Setting EvtLogTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
2098 return VINF_SUCCESS;
2099 }
2100
2101 /* Update the register. */
2102 pThis->EvtLogTailPtr.au32[0] = offBuf;
2103
2104 LogFlowFunc(("Set EvtLogTailPtr to %#RX32\n", offBuf));
2105 return VINF_SUCCESS;
2106}
2107
2108
2109/**
2110 * Writes the Status Register.
2111 */
2112static VBOXSTRICTRC iommuAmdStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2113{
2114 RT_NOREF(pDevIns, offReg);
2115
2116 /* Mask out all unrecognized bits. */
2117 u64Value &= IOMMU_STATUS_VALID_MASK;
2118
2119 /*
2120 * Compute RW1C (read-only, write-1-to-clear) bits and preserve the rest (which are read-only).
2121 * Writing 0 to an RW1C bit has no effect. Writing 1 to an RW1C bit, clears the bit if it's already 1.
2122 */
2123 IOMMU_STATUS_T const OldStatus = pThis->Status;
2124 uint64_t const fOldRw1cBits = (OldStatus.u64 & IOMMU_STATUS_RW1C_MASK);
2125 uint64_t const fOldRoBits = (OldStatus.u64 & ~IOMMU_STATUS_RW1C_MASK);
2126 uint64_t const fNewRw1cBits = (u64Value & IOMMU_STATUS_RW1C_MASK);
2127
2128 uint64_t const uNewStatus = (fOldRw1cBits & ~fNewRw1cBits) | fOldRoBits;
2129
2130 /* Update the register. */
2131 ASMAtomicWriteU64(&pThis->Status.u64, uNewStatus);
2132 return VINF_SUCCESS;
2133}
2134
2135
2136/**
2137 * Register access table 0.
2138 * The MMIO offset of each entry must be a multiple of 8!
2139 */
2140static const IOMMUREGACC g_aRegAccess0[] =
2141{
2142 /* MMIO off. Register name Read function Write function */
2143 { /* 0x00 */ "DEV_TAB_BAR", iommuAmdDevTabBar_r, iommuAmdDevTabBar_w },
2144 { /* 0x08 */ "CMD_BUF_BAR", iommuAmdCmdBufBar_r, iommuAmdCmdBufBar_w },
2145 { /* 0x10 */ "EVT_LOG_BAR", iommuAmdEvtLogBar_r, iommuAmdEvtLogBar_w },
2146 { /* 0x18 */ "CTRL", iommuAmdCtrl_r, iommuAmdCtrl_w },
2147 { /* 0x20 */ "EXCL_BAR", iommuAmdExclRangeBar_r, iommuAmdExclRangeBar_w },
2148 { /* 0x28 */ "EXCL_RANGE_LIMIT", iommuAmdExclRangeLimit_r, iommuAmdExclRangeLimit_w },
2149 { /* 0x30 */ "EXT_FEAT", iommuAmdExtFeat_r, NULL },
2150 { /* 0x38 */ "PPR_LOG_BAR", iommuAmdPprLogBar_r, NULL },
2151 { /* 0x40 */ "HW_EVT_HI", iommuAmdHwEvtHi_r, iommuAmdHwEvtHi_w },
2152 { /* 0x48 */ "HW_EVT_LO", iommuAmdHwEvtLo_r, iommuAmdHwEvtLo_w },
2153 { /* 0x50 */ "HW_EVT_STATUS", iommuAmdHwEvtStatus_r, iommuAmdHwEvtStatus_w },
2154 { /* 0x58 */ NULL, NULL, NULL },
2155
2156 { /* 0x60 */ "SMI_FLT_0", NULL, NULL },
2157 { /* 0x68 */ "SMI_FLT_1", NULL, NULL },
2158 { /* 0x70 */ "SMI_FLT_2", NULL, NULL },
2159 { /* 0x78 */ "SMI_FLT_3", NULL, NULL },
2160 { /* 0x80 */ "SMI_FLT_4", NULL, NULL },
2161 { /* 0x88 */ "SMI_FLT_5", NULL, NULL },
2162 { /* 0x90 */ "SMI_FLT_6", NULL, NULL },
2163 { /* 0x98 */ "SMI_FLT_7", NULL, NULL },
2164 { /* 0xa0 */ "SMI_FLT_8", NULL, NULL },
2165 { /* 0xa8 */ "SMI_FLT_9", NULL, NULL },
2166 { /* 0xb0 */ "SMI_FLT_10", NULL, NULL },
2167 { /* 0xb8 */ "SMI_FLT_11", NULL, NULL },
2168 { /* 0xc0 */ "SMI_FLT_12", NULL, NULL },
2169 { /* 0xc8 */ "SMI_FLT_13", NULL, NULL },
2170 { /* 0xd0 */ "SMI_FLT_14", NULL, NULL },
2171 { /* 0xd8 */ "SMI_FLT_15", NULL, NULL },
2172
2173 { /* 0xe0 */ "GALOG_BAR", iommuAmdGALogBar_r, NULL },
2174 { /* 0xe8 */ "GALOG_TAIL_ADDR", NULL, NULL },
2175 { /* 0xf0 */ "PPR_LOG_B_BAR", iommuAmdPprLogBBaseAddr_r, NULL },
2176 { /* 0xf8 */ "PPR_EVT_B_BAR", iommuAmdEvtLogBBaseAddr_r, NULL },
2177
2178 { /* 0x100 */ "DEV_TAB_SEG_1", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2179 { /* 0x108 */ "DEV_TAB_SEG_2", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2180 { /* 0x110 */ "DEV_TAB_SEG_3", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2181 { /* 0x118 */ "DEV_TAB_SEG_4", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2182 { /* 0x120 */ "DEV_TAB_SEG_5", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2183 { /* 0x128 */ "DEV_TAB_SEG_6", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2184 { /* 0x130 */ "DEV_TAB_SEG_7", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2185
2186 { /* 0x138 */ "DEV_SPECIFIC_FEAT", iommuAmdDevSpecificFeat_r, NULL },
2187 { /* 0x140 */ "DEV_SPECIFIC_CTRL", iommuAmdDevSpecificCtrl_r, NULL },
2188 { /* 0x148 */ "DEV_SPECIFIC_STATUS", iommuAmdDevSpecificStatus_r, NULL },
2189
2190 { /* 0x150 */ "MSI_VECTOR_0 or MSI_VECTOR_1", iommuAmdDevMsiVector_r, iommuAmdDevMsiVector_w },
2191 { /* 0x158 */ "MSI_CAP_HDR or MSI_ADDR_LO", iommuAmdMsiCapHdrAndAddrLo_r, iommuAmdMsiCapHdrAndAddrLo_w },
2192 { /* 0x160 */ "MSI_ADDR_HI or MSI_DATA", iommuAmdMsiAddrHiAndData_r, iommuAmdMsiAddrHiAndData_w },
2193 { /* 0x168 */ "MSI_MAPPING_CAP_HDR or PERF_OPT_CTRL", NULL, NULL },
2194
2195 { /* 0x170 */ "XT_GEN_INTR_CTRL", NULL, NULL },
2196 { /* 0x178 */ "XT_PPR_INTR_CTRL", NULL, NULL },
2197 { /* 0x180 */ "XT_GALOG_INT_CTRL", NULL, NULL },
2198};
2199AssertCompile(RT_ELEMENTS(g_aRegAccess0) == (IOMMU_MMIO_OFF_QWORD_TABLE_0_END - IOMMU_MMIO_OFF_QWORD_TABLE_0_START) / 8);
2200
2201/**
2202 * Register access table 1.
2203 * The MMIO offset of each entry must be a multiple of 8!
2204 */
2205static const IOMMUREGACC g_aRegAccess1[] =
2206{
2207 /* MMIO offset Register name Read function Write function */
2208 { /* 0x200 */ "MARC_APER_BAR_0", NULL, NULL },
2209 { /* 0x208 */ "MARC_APER_RELOC_0", NULL, NULL },
2210 { /* 0x210 */ "MARC_APER_LEN_0", NULL, NULL },
2211 { /* 0x218 */ "MARC_APER_BAR_1", NULL, NULL },
2212 { /* 0x220 */ "MARC_APER_RELOC_1", NULL, NULL },
2213 { /* 0x228 */ "MARC_APER_LEN_1", NULL, NULL },
2214 { /* 0x230 */ "MARC_APER_BAR_2", NULL, NULL },
2215 { /* 0x238 */ "MARC_APER_RELOC_2", NULL, NULL },
2216 { /* 0x240 */ "MARC_APER_LEN_2", NULL, NULL },
2217 { /* 0x248 */ "MARC_APER_BAR_3", NULL, NULL },
2218 { /* 0x250 */ "MARC_APER_RELOC_3", NULL, NULL },
2219 { /* 0x258 */ "MARC_APER_LEN_3", NULL, NULL }
2220};
2221AssertCompile(RT_ELEMENTS(g_aRegAccess1) == (IOMMU_MMIO_OFF_QWORD_TABLE_1_END - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) / 8);
2222
2223/**
2224 * Register access table 2.
2225 * The MMIO offset of each entry must be a multiple of 8!
2226 */
2227static const IOMMUREGACC g_aRegAccess2[] =
2228{
2229 /* MMIO offset Register name Read Function Write function */
2230 { /* 0x1ff8 */ "RSVD_REG", NULL, NULL },
2231
2232 { /* 0x2000 */ "CMD_BUF_HEAD_PTR", iommuAmdCmdBufHeadPtr_r, iommuAmdCmdBufHeadPtr_w },
2233 { /* 0x2008 */ "CMD_BUF_TAIL_PTR", iommuAmdCmdBufTailPtr_r , iommuAmdCmdBufTailPtr_w },
2234 { /* 0x2010 */ "EVT_LOG_HEAD_PTR", iommuAmdEvtLogHeadPtr_r, iommuAmdEvtLogHeadPtr_w },
2235 { /* 0x2018 */ "EVT_LOG_TAIL_PTR", iommuAmdEvtLogTailPtr_r, iommuAmdEvtLogTailPtr_w },
2236
2237 { /* 0x2020 */ "STATUS", iommuAmdStatus_r, iommuAmdStatus_w },
2238 { /* 0x2028 */ NULL, NULL, NULL },
2239
2240 { /* 0x2030 */ "PPR_LOG_HEAD_PTR", NULL, NULL },
2241 { /* 0x2038 */ "PPR_LOG_TAIL_PTR", NULL, NULL },
2242
2243 { /* 0x2040 */ "GALOG_HEAD_PTR", NULL, NULL },
2244 { /* 0x2048 */ "GALOG_TAIL_PTR", NULL, NULL },
2245
2246 { /* 0x2050 */ "PPR_LOG_B_HEAD_PTR", NULL, NULL },
2247 { /* 0x2058 */ "PPR_LOG_B_TAIL_PTR", NULL, NULL },
2248
2249 { /* 0x2060 */ NULL, NULL, NULL },
2250 { /* 0x2068 */ NULL, NULL, NULL },
2251
2252 { /* 0x2070 */ "EVT_LOG_B_HEAD_PTR", NULL, NULL },
2253 { /* 0x2078 */ "EVT_LOG_B_TAIL_PTR", NULL, NULL },
2254
2255 { /* 0x2080 */ "PPR_LOG_AUTO_RESP", NULL, NULL },
2256 { /* 0x2088 */ "PPR_LOG_OVERFLOW_EARLY", NULL, NULL },
2257 { /* 0x2090 */ "PPR_LOG_B_OVERFLOW_EARLY", NULL, NULL }
2258};
2259AssertCompile(RT_ELEMENTS(g_aRegAccess2) == (IOMMU_MMIO_OFF_QWORD_TABLE_2_END - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) / 8);
2260
2261
2262/**
2263 * Gets the register access structure given its MMIO offset.
2264 *
2265 * @returns The register access structure, or NULL if the offset is invalid.
2266 * @param off The MMIO offset of the register being accessed.
2267 */
2268static PCIOMMUREGACC iommuAmdGetRegAccess(uint32_t off)
2269{
2270 /* Figure out which table the register belongs to and validate its index. */
2271 PCIOMMUREGACC pReg;
2272 if (off < IOMMU_MMIO_OFF_QWORD_TABLE_0_END)
2273 {
2274 uint32_t const idxReg = off >> 3;
2275 Assert(idxReg < RT_ELEMENTS(g_aRegAccess0));
2276 pReg = &g_aRegAccess0[idxReg];
2277 }
2278 else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_1_END
2279 && off >= IOMMU_MMIO_OFF_QWORD_TABLE_1_START)
2280 {
2281 uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) >> 3;
2282 Assert(idxReg < RT_ELEMENTS(g_aRegAccess1));
2283 pReg = &g_aRegAccess1[idxReg];
2284 }
2285 else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_2_END
2286 && off >= IOMMU_MMIO_OFF_QWORD_TABLE_2_START)
2287 {
2288 uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) >> 3;
2289 Assert(idxReg < RT_ELEMENTS(g_aRegAccess2));
2290 pReg = &g_aRegAccess2[idxReg];
2291 }
2292 else
2293 return NULL;
2294 return pReg;
2295}
2296
2297
2298/**
2299 * Writes an IOMMU register (32-bit and 64-bit).
2300 *
2301 * @returns Strict VBox status code.
2302 * @param pDevIns The IOMMU device instance.
2303 * @param off MMIO byte offset to the register.
2304 * @param cb The size of the write access.
2305 * @param uValue The value being written.
2306 *
2307 * @thread EMT.
2308 */
2309static VBOXSTRICTRC iommuAmdRegisterWrite(PPDMDEVINS pDevIns, uint32_t off, uint8_t cb, uint64_t uValue)
2310{
2311 /*
2312 * Validate the access in case of IOM bug or incorrect assumption.
2313 */
2314 Assert(off < IOMMU_MMIO_REGION_SIZE);
2315 AssertMsgReturn(cb == 4 || cb == 8, ("Invalid access size %u\n", cb), VINF_SUCCESS);
2316 AssertMsgReturn(!(off & 3), ("Invalid offset %#x\n", off), VINF_SUCCESS);
2317
2318 Log4Func(("off=%#x cb=%u uValue=%#RX64\n", off, cb, uValue));
2319
2320 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2321 PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off);
2322 if (pReg)
2323 { /* likely */ }
2324 else
2325 {
2326 LogFunc(("Writing unknown register %#x with %#RX64 -> Ignored\n", off, uValue));
2327 return VINF_SUCCESS;
2328 }
2329
2330 /* If a write handler doesn't exist, it's either a reserved or read-only register. */
2331 if (pReg->pfnWrite)
2332 { /* likely */ }
2333 else
2334 {
2335 LogFunc(("Writing reserved or read-only register off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue));
2336 return VINF_SUCCESS;
2337 }
2338
2339 /*
2340 * If the write access is 64-bits and aligned on a 64-bit boundary, dispatch right away.
2341 * This handles writes to 64-bit registers as well as aligned, 64-bit writes to two
2342 * consecutive 32-bit registers.
2343 */
2344 if (cb == 8)
2345 {
2346 if (!(off & 7))
2347 return pReg->pfnWrite(pDevIns, pThis, off, uValue);
2348
2349 LogFunc(("Misaligned access while writing register at off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue));
2350 return VINF_SUCCESS;
2351 }
2352
2353 /* We shouldn't get sizes other than 32 bits here as we've specified so with IOM. */
2354 Assert(cb == 4);
2355 if (!(off & 7))
2356 {
2357 /*
2358 * Lower 32 bits of a 64-bit register or a 32-bit register is being written.
2359 * Merge with higher 32 bits (after reading the full 64-bits) and perform a 64-bit write.
2360 */
2361 uint64_t u64Read;
2362 if (pReg->pfnRead)
2363 {
2364 VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off, &u64Read);
2365 if (RT_FAILURE(rcStrict))
2366 {
2367 LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
2368 return rcStrict;
2369 }
2370 }
2371 else
2372 u64Read = 0;
2373
2374 uValue = (u64Read & UINT64_C(0xffffffff00000000)) | uValue;
2375 return pReg->pfnWrite(pDevIns, pThis, off, uValue);
2376 }
2377
2378 /*
2379 * Higher 32 bits of a 64-bit register or a 32-bit register at a 32-bit boundary is being written.
2380 * Merge with lower 32 bits (after reading the full 64-bits) and perform a 64-bit write.
2381 */
2382 Assert(!(off & 3));
2383 Assert(off & 7);
2384 Assert(off >= 4);
2385 uint64_t u64Read;
2386 if (pReg->pfnRead)
2387 {
2388 VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, &u64Read);
2389 if (RT_FAILURE(rcStrict))
2390 {
2391 LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
2392 return rcStrict;
2393 }
2394 }
2395 else
2396 u64Read = 0;
2397
2398 uValue = (uValue << 32) | (u64Read & UINT64_C(0xffffffff));
2399 return pReg->pfnWrite(pDevIns, pThis, off - 4, uValue);
2400}
2401
2402
2403/**
2404 * Reads an IOMMU register (64-bit) given its MMIO offset.
2405 *
2406 * All reads are 64-bit but reads to 32-bit registers that are aligned on an 8-byte
2407 * boundary include the lower half of the subsequent register.
2408 *
2409 * This is because most registers are 64-bit and aligned on 8-byte boundaries but
2410 * some are really 32-bit registers aligned on an 8-byte boundary. We cannot assume
2411 * software will only perform 32-bit reads on those 32-bit registers that are
2412 * aligned on 8-byte boundaries.
2413 *
2414 * @returns Strict VBox status code.
2415 * @param pDevIns The IOMMU device instance.
2416 * @param off The MMIO offset of the register in bytes.
2417 * @param puResult Where to store the value being read.
2418 *
2419 * @thread EMT.
2420 */
2421static VBOXSTRICTRC iommuAmdRegisterRead(PPDMDEVINS pDevIns, uint32_t off, uint64_t *puResult)
2422{
2423 Assert(off < IOMMU_MMIO_REGION_SIZE);
2424 Assert(!(off & 7) || !(off & 3));
2425
2426 Log4Func(("off=%#x\n", off));
2427
2428 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2429 PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
2430 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); NOREF(pPciDev);
2431
2432 PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off);
2433 if (pReg)
2434 { /* likely */ }
2435 else
2436 {
2437 LogFunc(("Reading unknown register %#x -> Ignored\n", off));
2438 return VINF_IOM_MMIO_UNUSED_FF;
2439 }
2440
2441 /* If a read handler doesn't exist, it's a reserved or unknown register. */
2442 if (pReg->pfnRead)
2443 { /* likely */ }
2444 else
2445 {
2446 LogFunc(("Reading reserved or unknown register off=%#x -> returning 0s\n", off));
2447 return VINF_IOM_MMIO_UNUSED_00;
2448 }
2449
2450 /*
2451 * If the read access is aligned on a 64-bit boundary, read the full 64-bits and return.
2452 * The caller takes care of truncating upper 32 bits for 32-bit reads.
2453 */
2454 if (!(off & 7))
2455 return pReg->pfnRead(pDevIns, pThis, off, puResult);
2456
2457 /*
2458 * High 32 bits of a 64-bit register or a 32-bit register at a non 64-bit boundary is being read.
2459 * Read full 64 bits at the previous 64-bit boundary but return only the high 32 bits.
2460 */
2461 Assert(!(off & 3));
2462 Assert(off & 7);
2463 Assert(off >= 4);
2464 VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, puResult);
2465 if (RT_SUCCESS(rcStrict))
2466 *puResult >>= 32;
2467 else
2468 {
2469 *puResult = 0;
2470 LogFunc(("Reading off %#x during split read failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
2471 }
2472
2473 return rcStrict;
2474}
2475
2476
2477/**
2478 * Raises the MSI interrupt for the IOMMU device.
2479 *
2480 * @param pDevIns The IOMMU device instance.
2481 *
2482 * @thread Any.
2483 * @remarks The IOMMU lock may or may not be held.
2484 */
2485static void iommuAmdMsiInterruptRaise(PPDMDEVINS pDevIns)
2486{
2487 LogFlowFunc(("\n"));
2488 if (iommuAmdIsMsiEnabled(pDevIns))
2489 {
2490 LogFunc(("Raising MSI\n"));
2491 PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_HIGH);
2492 }
2493}
2494
2495#if 0
2496/**
2497 * Clears the MSI interrupt for the IOMMU device.
2498 *
2499 * @param pDevIns The IOMMU device instance.
2500 *
2501 * @thread Any.
2502 * @remarks The IOMMU lock may or may not be held.
2503 */
2504static void iommuAmdMsiInterruptClear(PPDMDEVINS pDevIns)
2505{
2506 if (iommuAmdIsMsiEnabled(pDevIns))
2507 PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_LOW);
2508}
2509#endif
2510
2511/**
2512 * Writes an entry to the event log in memory.
2513 *
2514 * @returns VBox status code.
2515 * @param pDevIns The IOMMU device instance.
2516 * @param pEvent The event to log.
2517 *
2518 * @thread Any.
2519 * @remarks The IOMMU lock must be held while calling this function.
2520 */
2521static int iommuAmdEvtLogEntryWrite(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent)
2522{
2523 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2524 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
2525
2526 IOMMU_LOCK_NORET(pDevIns, pThisCC);
2527
2528 /* Check if event logging is active and the log has not overflowed. */
2529 IOMMU_STATUS_T const Status = pThis->Status;
2530 if ( Status.n.u1EvtLogRunning
2531 && !Status.n.u1EvtOverflow)
2532 {
2533 uint32_t const cbEvt = sizeof(*pEvent);
2534
2535 /* Get the offset we need to write the event to in memory (circular buffer offset). */
2536 uint32_t const offEvt = pThis->EvtLogTailPtr.n.off;
2537 Assert(!(offEvt & ~IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK));
2538
2539 /* Ensure we have space in the event log. */
2540 uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len);
2541 uint32_t const cEvts = iommuAmdGetEvtLogEntryCount(pThis);
2542 if (cEvts + 1 < cMaxEvts)
2543 {
2544 /* Write the event log entry to memory. */
2545 RTGCPHYS const GCPhysEvtLog = pThis->EvtLogBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT;
2546 RTGCPHYS const GCPhysEvtLogEntry = GCPhysEvtLog + offEvt;
2547 int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysEvtLogEntry, pEvent, cbEvt);
2548 if (RT_FAILURE(rc))
2549 LogFunc(("Failed to write event log entry at %#RGp. rc=%Rrc\n", GCPhysEvtLogEntry, rc));
2550
2551 /* Increment the event log tail pointer. */
2552 uint32_t const cbEvtLog = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
2553 pThis->EvtLogTailPtr.n.off = (offEvt + cbEvt) % cbEvtLog;
2554
2555 /* Indicate that an event log entry was written. */
2556 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_INTR);
2557
2558 /* Check and signal an interrupt if software wants to receive one when an event log entry is written. */
2559 if (pThis->Ctrl.n.u1EvtIntrEn)
2560 iommuAmdMsiInterruptRaise(pDevIns);
2561 }
2562 else
2563 {
2564 /* Indicate that the event log has overflowed. */
2565 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_OVERFLOW);
2566
2567 /* Check and signal an interrupt if software wants to receive one when the event log has overflowed. */
2568 if (pThis->Ctrl.n.u1EvtIntrEn)
2569 iommuAmdMsiInterruptRaise(pDevIns);
2570 }
2571 }
2572
2573 IOMMU_UNLOCK(pDevIns, pThisCC);
2574
2575 return VINF_SUCCESS;
2576}
2577
2578
2579/**
2580 * Sets an event in the hardware error registers.
2581 *
2582 * @param pDevIns The IOMMU device instance.
2583 * @param pEvent The event.
2584 *
2585 * @thread Any.
2586 */
2587static void iommuAmdHwErrorSet(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent)
2588{
2589 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2590 if (pThis->ExtFeat.n.u1HwErrorSup)
2591 {
2592 if (pThis->HwEvtStatus.n.u1Valid)
2593 pThis->HwEvtStatus.n.u1Overflow = 1;
2594 pThis->HwEvtStatus.n.u1Valid = 1;
2595 pThis->HwEvtHi.u64 = RT_MAKE_U64(pEvent->au32[0], pEvent->au32[1]);
2596 pThis->HwEvtLo = RT_MAKE_U64(pEvent->au32[2], pEvent->au32[3]);
2597 Assert( pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_DEV_TAB_HW_ERROR
2598 || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_PAGE_TAB_HW_ERROR
2599 || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR);
2600 }
2601}
2602
2603
2604/**
2605 * Initializes a PAGE_TAB_HARDWARE_ERROR event.
2606 *
2607 * @param uDevId The device ID (bus, device, function).
2608 * @param uDomainId The domain ID.
2609 * @param GCPhysPtEntity The system physical address of the page table
2610 * entity.
2611 * @param enmOp The IOMMU operation being performed.
2612 * @param pEvtPageTabHwErr Where to store the initialized event.
2613 */
2614static void iommuAmdPageTabHwErrorEventInit(uint16_t uDevId, uint16_t uDomainId, RTGCPHYS GCPhysPtEntity, IOMMUOP enmOp,
2615 PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr)
2616{
2617 memset(pEvtPageTabHwErr, 0, sizeof(*pEvtPageTabHwErr));
2618 pEvtPageTabHwErr->n.u16DevId = uDevId;
2619 pEvtPageTabHwErr->n.u16DomainOrPasidLo = uDomainId;
2620 pEvtPageTabHwErr->n.u1GuestOrNested = 0;
2621 pEvtPageTabHwErr->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
2622 pEvtPageTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
2623 pEvtPageTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
2624 pEvtPageTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT;
2625 pEvtPageTabHwErr->n.u4EvtCode = IOMMU_EVT_PAGE_TAB_HW_ERROR;
2626 pEvtPageTabHwErr->n.u64Addr = GCPhysPtEntity;
2627}
2628
2629
2630/**
2631 * Raises a PAGE_TAB_HARDWARE_ERROR event.
2632 *
2633 * @param pDevIns The IOMMU device instance.
2634 * @param enmOp The IOMMU operation being performed.
2635 * @param pEvtPageTabHwErr The page table hardware error event.
2636 *
2637 * @thread Any.
2638 */
2639static void iommuAmdPageTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr)
2640{
2641 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_PAGE_TAB_HW_ERR_T));
2642 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtPageTabHwErr;
2643
2644 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
2645 IOMMU_LOCK_NORET(pDevIns, pThisCC);
2646
2647 iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
2648 iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
2649 if (enmOp != IOMMUOP_CMD)
2650 iommuAmdSetPciTargetAbort(pDevIns);
2651
2652 IOMMU_UNLOCK(pDevIns, pThisCC);
2653
2654 LogFunc(("Raised PAGE_TAB_HARDWARE_ERROR. uDevId=%#x uDomainId=%#x GCPhysPtEntity=%#RGp enmOp=%u u2Type=%u\n",
2655 pEvtPageTabHwErr->n.u16DevId, pEvtPageTabHwErr->n.u16DomainOrPasidLo, pEvtPageTabHwErr->n.u64Addr, enmOp,
2656 pEvtPageTabHwErr->n.u2Type));
2657}
2658
2659
2660#ifdef IN_RING3
2661/**
2662 * Initializes a COMMAND_HARDWARE_ERROR event.
2663 *
2664 * @param GCPhysAddr The system physical address the IOMMU attempted to access.
2665 * @param pEvtCmdHwErr Where to store the initialized event.
2666 */
2667static void iommuAmdCmdHwErrorEventInit(RTGCPHYS GCPhysAddr, PEVT_CMD_HW_ERR_T pEvtCmdHwErr)
2668{
2669 memset(pEvtCmdHwErr, 0, sizeof(*pEvtCmdHwErr));
2670 pEvtCmdHwErr->n.u2Type = HWEVTTYPE_DATA_ERROR;
2671 pEvtCmdHwErr->n.u4EvtCode = IOMMU_EVT_COMMAND_HW_ERROR;
2672 pEvtCmdHwErr->n.u64Addr = GCPhysAddr;
2673}
2674
2675
2676/**
2677 * Raises a COMMAND_HARDWARE_ERROR event.
2678 *
2679 * @param pDevIns The IOMMU device instance.
2680 * @param pEvtCmdHwErr The command hardware error event.
2681 *
2682 * @thread Any.
2683 */
2684static void iommuAmdCmdHwErrorEventRaise(PPDMDEVINS pDevIns, PCEVT_CMD_HW_ERR_T pEvtCmdHwErr)
2685{
2686 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_CMD_HW_ERR_T));
2687 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtCmdHwErr;
2688 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2689
2690 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
2691 IOMMU_LOCK_NORET(pDevIns, pThisCC);
2692
2693 iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
2694 iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
2695 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
2696
2697 IOMMU_UNLOCK(pDevIns, pThisCC);
2698
2699 LogFunc(("Raised COMMAND_HARDWARE_ERROR. GCPhysCmd=%#RGp u2Type=%u\n", pEvtCmdHwErr->n.u64Addr, pEvtCmdHwErr->n.u2Type));
2700}
2701#endif /* IN_RING3 */
2702
2703
2704/**
2705 * Initializes a DEV_TAB_HARDWARE_ERROR event.
2706 *
2707 * @param uDevId The device ID (bus, device, function).
2708 * @param GCPhysDte The system physical address of the failed device table
2709 * access.
2710 * @param enmOp The IOMMU operation being performed.
2711 * @param pEvtDevTabHwErr Where to store the initialized event.
2712 */
2713static void iommuAmdDevTabHwErrorEventInit(uint16_t uDevId, RTGCPHYS GCPhysDte, IOMMUOP enmOp,
2714 PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr)
2715{
2716 memset(pEvtDevTabHwErr, 0, sizeof(*pEvtDevTabHwErr));
2717 pEvtDevTabHwErr->n.u16DevId = uDevId;
2718 pEvtDevTabHwErr->n.u1Intr = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
2719 /** @todo IOMMU: Any other transaction type that can set read/write bit? */
2720 pEvtDevTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
2721 pEvtDevTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
2722 pEvtDevTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT;
2723 pEvtDevTabHwErr->n.u4EvtCode = IOMMU_EVT_DEV_TAB_HW_ERROR;
2724 pEvtDevTabHwErr->n.u64Addr = GCPhysDte;
2725}
2726
2727
2728/**
2729 * Raises a DEV_TAB_HARDWARE_ERROR event.
2730 *
2731 * @param pDevIns The IOMMU device instance.
2732 * @param enmOp The IOMMU operation being performed.
2733 * @param pEvtDevTabHwErr The device table hardware error event.
2734 *
2735 * @thread Any.
2736 */
2737static void iommuAmdDevTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr)
2738{
2739 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_DEV_TAB_HW_ERROR_T));
2740 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtDevTabHwErr;
2741
2742 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
2743 IOMMU_LOCK_NORET(pDevIns, pThisCC);
2744
2745 iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
2746 iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
2747 if (enmOp != IOMMUOP_CMD)
2748 iommuAmdSetPciTargetAbort(pDevIns);
2749
2750 IOMMU_UNLOCK(pDevIns, pThisCC);
2751
2752 LogFunc(("Raised DEV_TAB_HARDWARE_ERROR. uDevId=%#x GCPhysDte=%#RGp enmOp=%u u2Type=%u\n", pEvtDevTabHwErr->n.u16DevId,
2753 pEvtDevTabHwErr->n.u64Addr, enmOp, pEvtDevTabHwErr->n.u2Type));
2754}
2755
2756
2757#ifdef IN_RING3
2758/**
2759 * Initializes an ILLEGAL_COMMAND_ERROR event.
2760 *
2761 * @param GCPhysCmd The system physical address of the failed command
2762 * access.
2763 * @param pEvtIllegalCmd Where to store the initialized event.
2764 */
2765static void iommuAmdIllegalCmdEventInit(RTGCPHYS GCPhysCmd, PEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd)
2766{
2767 Assert(!(GCPhysCmd & UINT64_C(0xf)));
2768 memset(pEvtIllegalCmd, 0, sizeof(*pEvtIllegalCmd));
2769 pEvtIllegalCmd->n.u4EvtCode = IOMMU_EVT_ILLEGAL_CMD_ERROR;
2770 pEvtIllegalCmd->n.u64Addr = GCPhysCmd;
2771}
2772
2773
2774/**
2775 * Raises an ILLEGAL_COMMAND_ERROR event.
2776 *
2777 * @param pDevIns The IOMMU device instance.
2778 * @param pEvtIllegalCmd The illegal command error event.
2779 */
2780static void iommuAmdIllegalCmdEventRaise(PPDMDEVINS pDevIns, PCEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd)
2781{
2782 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T));
2783 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalCmd;
2784 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2785
2786 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
2787 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
2788
2789 LogFunc(("Raised ILLEGAL_COMMAND_ERROR. Addr=%#RGp\n", pEvtIllegalCmd->n.u64Addr));
2790}
2791#endif /* IN_RING3 */
2792
2793
2794/**
2795 * Initializes an ILLEGAL_DEV_TABLE_ENTRY event.
2796 *
2797 * @param uDevId The device ID (bus, device, function).
2798 * @param uIova The I/O virtual address.
2799 * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if the
2800 * event was caused by an invalid level encoding in the
2801 * DTE.
2802 * @param enmOp The IOMMU operation being performed.
2803 * @param pEvtIllegalDte Where to store the initialized event.
2804 */
2805static void iommuAmdIllegalDteEventInit(uint16_t uDevId, uint64_t uIova, bool fRsvdNotZero, IOMMUOP enmOp,
2806 PEVT_ILLEGAL_DTE_T pEvtIllegalDte)
2807{
2808 memset(pEvtIllegalDte, 0, sizeof(*pEvtIllegalDte));
2809 pEvtIllegalDte->n.u16DevId = uDevId;
2810 pEvtIllegalDte->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
2811 pEvtIllegalDte->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
2812 pEvtIllegalDte->n.u1RsvdNotZero = fRsvdNotZero;
2813 pEvtIllegalDte->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
2814 pEvtIllegalDte->n.u4EvtCode = IOMMU_EVT_ILLEGAL_DEV_TAB_ENTRY;
2815 pEvtIllegalDte->n.u64Addr = uIova & ~UINT64_C(0x3);
2816 /** @todo r=ramshankar: Not sure why the last 2 bits are marked as reserved by the
2817 * IOMMU spec here but not for this field for I/O page fault event. */
2818 Assert(!(uIova & UINT64_C(0x3)));
2819}
2820
2821
2822/**
2823 * Raises an ILLEGAL_DEV_TABLE_ENTRY event.
2824 *
2825 * @param pDevIns The IOMMU instance data.
2826 * @param enmOp The IOMMU operation being performed.
2827 * @param pEvtIllegalDte The illegal device table entry event.
2828 * @param enmEvtType The illegal device table entry event type.
2829 *
2830 * @thread Any.
2831 */
2832static void iommuAmdIllegalDteEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PCEVT_ILLEGAL_DTE_T pEvtIllegalDte,
2833 EVT_ILLEGAL_DTE_TYPE_T enmEvtType)
2834{
2835 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T));
2836 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalDte;
2837
2838 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
2839 if (enmOp != IOMMUOP_CMD)
2840 iommuAmdSetPciTargetAbort(pDevIns);
2841
2842 LogFunc(("Raised ILLEGAL_DTE_EVENT. uDevId=%#x uIova=%#RX64 enmOp=%u enmEvtType=%u\n", pEvtIllegalDte->n.u16DevId,
2843 pEvtIllegalDte->n.u64Addr, enmOp, enmEvtType));
2844 NOREF(enmEvtType);
2845}
2846
2847
2848/**
2849 * Initializes an IO_PAGE_FAULT event.
2850 *
2851 * @param uDevId The device ID (bus, device, function).
2852 * @param uDomainId The domain ID.
2853 * @param uIova The I/O virtual address being accessed.
2854 * @param fPresent Transaction to a page marked as present (including
2855 * DTE.V=1) or interrupt marked as remapped
2856 * (IRTE.RemapEn=1).
2857 * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if
2858 * the I/O page fault was caused by invalid level
2859 * encoding.
2860 * @param fPermDenied Permission denied for the address being accessed.
2861 * @param enmOp The IOMMU operation being performed.
2862 * @param pEvtIoPageFault Where to store the initialized event.
2863 */
2864static void iommuAmdIoPageFaultEventInit(uint16_t uDevId, uint16_t uDomainId, uint64_t uIova, bool fPresent, bool fRsvdNotZero,
2865 bool fPermDenied, IOMMUOP enmOp, PEVT_IO_PAGE_FAULT_T pEvtIoPageFault)
2866{
2867 Assert(!fPermDenied || fPresent);
2868 memset(pEvtIoPageFault, 0, sizeof(*pEvtIoPageFault));
2869 pEvtIoPageFault->n.u16DevId = uDevId;
2870 //pEvtIoPageFault->n.u4PasidHi = 0;
2871 pEvtIoPageFault->n.u16DomainOrPasidLo = uDomainId;
2872 //pEvtIoPageFault->n.u1GuestOrNested = 0;
2873 //pEvtIoPageFault->n.u1NoExecute = 0;
2874 //pEvtIoPageFault->n.u1User = 0;
2875 pEvtIoPageFault->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
2876 pEvtIoPageFault->n.u1Present = fPresent;
2877 pEvtIoPageFault->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
2878 pEvtIoPageFault->n.u1PermDenied = fPermDenied;
2879 pEvtIoPageFault->n.u1RsvdNotZero = fRsvdNotZero;
2880 pEvtIoPageFault->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
2881 pEvtIoPageFault->n.u4EvtCode = IOMMU_EVT_IO_PAGE_FAULT;
2882 pEvtIoPageFault->n.u64Addr = uIova;
2883}
2884
2885
2886/**
2887 * Raises an IO_PAGE_FAULT event.
2888 *
2889 * @param pDevIns The IOMMU instance data.
2890 * @param fIoDevFlags The I/O device flags, see IOMMU_DEV_F_XXX.
2891 * @param pIrte The interrupt remapping table entry, can be NULL.
2892 * @param enmOp The IOMMU operation being performed.
2893 * @param pEvtIoPageFault The I/O page fault event.
2894 * @param enmEvtType The I/O page fault event type.
2895 *
2896 * @thread Any.
2897 */
2898static void iommuAmdIoPageFaultEventRaise(PPDMDEVINS pDevIns, uint16_t fIoDevFlags, PCIRTE_T pIrte, IOMMUOP enmOp,
2899 PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType)
2900{
2901 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_IO_PAGE_FAULT_T));
2902 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIoPageFault;
2903
2904#ifdef IOMMU_WITH_IOTLBE_CACHE
2905# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) iommuAmdDteCacheSetFlags((a_pDevIns), (a_DevId), \
2906 IOMMU_DEV_F_IO_PAGE_FAULT_RAISED)
2907#else
2908# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) do { } while (0)
2909#endif
2910
2911 bool fSuppressEvtLogging = false;
2912 if ( enmOp == IOMMUOP_MEM_READ
2913 || enmOp == IOMMUOP_MEM_WRITE)
2914 {
2915 uint16_t const fSuppressIopf = IOMMU_DEV_F_VALID | IOMMU_DEV_F_SUPPRESS_IOPF | IOMMU_DEV_F_IO_PAGE_FAULT_RAISED;
2916 uint16_t const fSuppressAllIopf = IOMMU_DEV_F_VALID | IOMMU_DEV_F_SUPPRESS_ALL_IOPF;
2917 if ( (fIoDevFlags & fSuppressAllIopf) == fSuppressAllIopf
2918 || (fIoDevFlags & fSuppressIopf) == fSuppressIopf)
2919 {
2920 fSuppressEvtLogging = true;
2921 }
2922 }
2923 else if (enmOp == IOMMUOP_INTR_REQ)
2924 {
2925 uint16_t const fSuppressIopf = IOMMU_DEV_F_VALID | IOMMU_DEV_F_INTR_MAP_VALID | IOMMU_DEV_F_IGNORE_UNMAPPED_INTR;
2926 if ((fIoDevFlags & fSuppressIopf) == fSuppressIopf)
2927 fSuppressEvtLogging = true;
2928 else if (pIrte)
2929 fSuppressEvtLogging = pIrte->n.u1SuppressIoPf;
2930 }
2931 /* else: Events are never suppressed for commands. */
2932
2933 switch (enmEvtType)
2934 {
2935 case kIoPageFaultType_PermDenied:
2936 {
2937 /* Cannot be triggered by a command. */
2938 Assert(enmOp != IOMMUOP_CMD);
2939 RT_FALL_THRU();
2940 }
2941 case kIoPageFaultType_DteRsvdPagingMode:
2942 case kIoPageFaultType_PteInvalidPageSize:
2943 case kIoPageFaultType_PteInvalidLvlEncoding:
2944 case kIoPageFaultType_SkippedLevelIovaNotZero:
2945 case kIoPageFaultType_PteRsvdNotZero:
2946 case kIoPageFaultType_PteValidNotSet:
2947 case kIoPageFaultType_DteTranslationDisabled:
2948 case kIoPageFaultType_PasidInvalidRange:
2949 {
2950 /*
2951 * For a translation request, the IOMMU doesn't signal an I/O page fault nor does it
2952 * create an event log entry. See AMD IOMMU spec. 2.1.3.2 "I/O Page Faults".
2953 */
2954 if (enmOp != IOMMUOP_TRANSLATE_REQ)
2955 {
2956 if (!fSuppressEvtLogging)
2957 {
2958 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
2959 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
2960 }
2961 if (enmOp != IOMMUOP_CMD)
2962 iommuAmdSetPciTargetAbort(pDevIns);
2963 }
2964 break;
2965 }
2966
2967 case kIoPageFaultType_UserSupervisor:
2968 {
2969 /* Access is blocked and only creates an event log entry. */
2970 if (!fSuppressEvtLogging)
2971 {
2972 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
2973 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
2974 }
2975 break;
2976 }
2977
2978 case kIoPageFaultType_IrteAddrInvalid:
2979 case kIoPageFaultType_IrteRsvdNotZero:
2980 case kIoPageFaultType_IrteRemapEn:
2981 case kIoPageFaultType_IrteRsvdIntType:
2982 case kIoPageFaultType_IntrReqAborted:
2983 case kIoPageFaultType_IntrWithPasid:
2984 {
2985 /* Only trigerred by interrupt requests. */
2986 Assert(enmOp == IOMMUOP_INTR_REQ);
2987 if (!fSuppressEvtLogging)
2988 {
2989 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
2990 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
2991 }
2992 iommuAmdSetPciTargetAbort(pDevIns);
2993 break;
2994 }
2995
2996 case kIoPageFaultType_SmiFilterMismatch:
2997 {
2998 /* Not supported and probably will never be, assert. */
2999 AssertMsgFailed(("kIoPageFaultType_SmiFilterMismatch - Upstream SMI requests not supported/implemented."));
3000 break;
3001 }
3002
3003 case kIoPageFaultType_DevId_Invalid:
3004 {
3005 /* Cannot be triggered by a command. */
3006 Assert(enmOp != IOMMUOP_CMD);
3007 Assert(enmOp != IOMMUOP_TRANSLATE_REQ); /** @todo IOMMU: We don't support translation requests yet. */
3008 if (!fSuppressEvtLogging)
3009 {
3010 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3011 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
3012 }
3013 if ( enmOp == IOMMUOP_MEM_READ
3014 || enmOp == IOMMUOP_MEM_WRITE)
3015 iommuAmdSetPciTargetAbort(pDevIns);
3016 break;
3017 }
3018 }
3019
3020#undef IOMMU_DTE_CACHE_SET_PF_RAISED
3021}
3022
3023
3024/**
3025 * Raises an IO_PAGE_FAULT event given the DTE.
3026 *
3027 * @param pDevIns The IOMMU instance data.
3028 * @param pDte The device table entry.
3029 * @param pIrte The interrupt remapping table entry, can be NULL.
3030 * @param enmOp The IOMMU operation being performed.
3031 * @param pEvtIoPageFault The I/O page fault event.
3032 * @param enmEvtType The I/O page fault event type.
3033 *
3034 * @thread Any.
3035 */
3036static void iommuAmdIoPageFaultEventRaiseWithDte(PPDMDEVINS pDevIns, PCDTE_T pDte, PCIRTE_T pIrte, IOMMUOP enmOp,
3037 PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType)
3038{
3039 Assert(pDte);
3040 uint16_t const fIoDevFlags = iommuAmdGetBasicDevFlags(pDte);
3041 return iommuAmdIoPageFaultEventRaise(pDevIns, fIoDevFlags, pIrte, enmOp, pEvtIoPageFault, enmEvtType);
3042}
3043
3044
3045/**
3046 * Reads a device table entry for the given the device ID.
3047 *
3048 * @returns VBox status code.
3049 * @param pDevIns The IOMMU device instance.
3050 * @param uDevId The device ID (bus, device, function).
3051 * @param enmOp The IOMMU operation being performed.
3052 * @param pDte Where to store the device table entry.
3053 *
3054 * @thread Any.
3055 */
3056static int iommuAmdDteRead(PPDMDEVINS pDevIns, uint16_t uDevId, IOMMUOP enmOp, PDTE_T pDte)
3057{
3058 PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3059 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
3060
3061 IOMMU_LOCK(pDevIns, pThisCC);
3062
3063 /* Figure out which device table segment is being accessed. */
3064 uint8_t const idxSegsEn = pThis->Ctrl.n.u3DevTabSegEn;
3065 Assert(idxSegsEn < RT_ELEMENTS(g_auDevTabSegShifts));
3066
3067 uint8_t const idxSeg = (uDevId & g_auDevTabSegMasks[idxSegsEn]) >> g_auDevTabSegShifts[idxSegsEn];
3068 Assert(idxSeg < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
3069 AssertCompile(RT_ELEMENTS(g_auDevTabSegShifts) == RT_ELEMENTS(g_auDevTabSegMasks));
3070
3071 RTGCPHYS const GCPhysDevTab = pThis->aDevTabBaseAddrs[idxSeg].n.u40Base << X86_PAGE_4K_SHIFT;
3072 uint32_t const offDte = (uDevId & ~g_auDevTabSegMasks[idxSegsEn]) * sizeof(DTE_T);
3073 RTGCPHYS const GCPhysDte = GCPhysDevTab + offDte;
3074
3075 /* Ensure the DTE falls completely within the device table segment. */
3076 uint32_t const cbDevTabSeg = (pThis->aDevTabBaseAddrs[idxSeg].n.u9Size + 1) << X86_PAGE_4K_SHIFT;
3077
3078 IOMMU_UNLOCK(pDevIns, pThisCC);
3079
3080 if (offDte + sizeof(DTE_T) <= cbDevTabSeg)
3081 {
3082 /* Read the device table entry from guest memory. */
3083 Assert(!(GCPhysDevTab & X86_PAGE_4K_OFFSET_MASK));
3084 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDte, pDte, sizeof(*pDte));
3085 if (RT_SUCCESS(rc))
3086 return rc;
3087
3088 /* Raise a device table hardware error. */
3089 LogFunc(("Failed to read device table entry at %#RGp. rc=%Rrc -> DevTabHwError\n", GCPhysDte, rc));
3090
3091 EVT_DEV_TAB_HW_ERROR_T EvtDevTabHwErr;
3092 iommuAmdDevTabHwErrorEventInit(uDevId, GCPhysDte, enmOp, &EvtDevTabHwErr);
3093 iommuAmdDevTabHwErrorEventRaise(pDevIns, enmOp, &EvtDevTabHwErr);
3094 return VERR_IOMMU_DTE_READ_FAILED;
3095 }
3096
3097 /* Raise an I/O page fault for out-of-bounds acccess. */
3098 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3099 iommuAmdIoPageFaultEventInit(uDevId, 0 /* uDomainId */, 0 /* uIova */, false /* fPresent */, false /* fRsvdNotZero */,
3100 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3101 iommuAmdIoPageFaultEventRaise(pDevIns, 0 /* fIoDevFlags */, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3102 kIoPageFaultType_DevId_Invalid);
3103 return VERR_IOMMU_DTE_BAD_OFFSET;
3104}
3105
3106
3107/**
3108 * Performs pre-translation checks for the given device table entry.
3109 *
3110 * @returns VBox status code.
3111 * @retval VINF_SUCCESS if the DTE is valid and supports address translation.
3112 * @retval VINF_IOMMU_ADDR_TRANSLATION_DISABLED if the DTE is valid but address
3113 * translation is disabled.
3114 * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED if an error occurred and any
3115 * corresponding event was raised.
3116 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the DTE denies the requested
3117 * permissions.
3118 *
3119 * @param pDevIns The IOMMU device instance.
3120 * @param uIova The I/O virtual address to translate.
3121 * @param uDevId The device ID (bus, device, function).
3122 * @param fPerm The I/O permissions for this access, see
3123 * IOMMU_IO_PERM_XXX.
3124 * @param pDte The device table entry.
3125 * @param enmOp The IOMMU operation being performed.
3126 *
3127 * @thread Any.
3128 */
3129static int iommuAmdPreTranslateChecks(PPDMDEVINS pDevIns, uint16_t uDevId, uint64_t uIova, uint8_t fPerm, PCDTE_T pDte,
3130 IOMMUOP enmOp)
3131{
3132 /*
3133 * Check if the translation is valid, otherwise raise an I/O page fault.
3134 */
3135 if (pDte->n.u1TranslationValid)
3136 { /* likely */ }
3137 else
3138 {
3139 /** @todo r=ramshankar: The AMD IOMMU spec. says page walk is terminated but
3140 * doesn't explicitly say whether an I/O page fault is raised. From other
3141 * places in the spec. it seems early page walk terminations (starting with
3142 * the DTE) return the state computed so far and raises an I/O page fault. So
3143 * returning an invalid translation rather than skipping translation. */
3144 LogFunc(("Translation valid bit not set -> IOPF\n"));
3145 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3146 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */,
3147 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3148 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3149 kIoPageFaultType_DteTranslationDisabled);
3150 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3151 }
3152
3153 /*
3154 * Check permissions bits in the DTE.
3155 * Note: This MUST be checked prior to checking the root page table level below!
3156 */
3157 uint8_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
3158 if ((fPerm & fDtePerm) == fPerm)
3159 { /* likely */ }
3160 else
3161 {
3162 LogFunc(("Permission denied by DTE (fPerm=%#x fDtePerm=%#x) -> IOPF\n", fPerm, fDtePerm));
3163 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3164 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3165 true /* fPermDenied */, enmOp, &EvtIoPageFault);
3166 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3167 kIoPageFaultType_PermDenied);
3168 return VERR_IOMMU_ADDR_ACCESS_DENIED;
3169 }
3170
3171 /*
3172 * If the root page table level is 0, translation is disabled and GPA=SPA and
3173 * the DTE.IR and DTE.IW bits control permissions (verified above).
3174 */
3175 uint8_t const uMaxLevel = pDte->n.u3Mode;
3176 if (uMaxLevel != 0)
3177 { /* likely */ }
3178 else
3179 {
3180 Assert((fPerm & fDtePerm) == fPerm); /* Verify we've checked permissions. */
3181 return VINF_IOMMU_ADDR_TRANSLATION_DISABLED;
3182 }
3183
3184 /*
3185 * If the root page table level exceeds the allowed host-address translation level,
3186 * page walk is terminated and translation fails.
3187 */
3188 if (uMaxLevel <= IOMMU_MAX_HOST_PT_LEVEL)
3189 { /* likely */ }
3190 else
3191 {
3192 /** @todo r=ramshankar: I cannot make out from the AMD IOMMU spec. if I should be
3193 * raising an ILLEGAL_DEV_TABLE_ENTRY event or an IO_PAGE_FAULT event here.
3194 * I'm just going with I/O page fault. */
3195 LogFunc(("Invalid root page table level %#x (uDevId=%#x) -> IOPF\n", uMaxLevel, uDevId));
3196 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3197 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3198 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3199 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3200 kIoPageFaultType_PteInvalidLvlEncoding);
3201 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3202 }
3203
3204 /* The DTE allows translations for this device. */
3205 return VINF_SUCCESS;
3206}
3207
3208
3209/**
3210 * Walks the I/O page table to translate the I/O virtual address to a system
3211 * physical address.
3212 *
3213 * @returns VBox status code.
3214 * @param pDevIns The IOMMU device instance.
3215 * @param uIova The I/O virtual address to translate. Must be 4K aligned.
3216 * @param fPerm The I/O permissions for this access, see
3217 * IOMMU_IO_PERM_XXX.
3218 * @param uDevId The device ID (bus, device, function).
3219 * @param pDte The device table entry.
3220 * @param enmOp The IOMMU operation being performed.
3221 * @param pPageLookup Where to store the results of the I/O page lookup. This
3222 * is only updated when VINF_SUCCESS is returned.
3223 *
3224 * @thread Any.
3225 */
3226static int iommuAmdIoPageTableWalk(PPDMDEVINS pDevIns, uint64_t uIova, uint8_t fPerm, uint16_t uDevId, PCDTE_T pDte,
3227 IOMMUOP enmOp, PIOPAGELOOKUP pPageLookup)
3228{
3229 Assert(pDte->n.u1Valid);
3230 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
3231
3232 /* The virtual address bits indexing table. */
3233 static uint8_t const s_acIovaLevelShifts[] = { 0, 12, 21, 30, 39, 48, 57, 0 };
3234 static uint64_t const s_auIovaLevelMasks[] = { UINT64_C(0x0000000000000000),
3235 UINT64_C(0x00000000001ff000),
3236 UINT64_C(0x000000003fe00000),
3237 UINT64_C(0x0000007fc0000000),
3238 UINT64_C(0x0000ff8000000000),
3239 UINT64_C(0x01ff000000000000),
3240 UINT64_C(0xfe00000000000000),
3241 UINT64_C(0x0000000000000000) };
3242 AssertCompile(RT_ELEMENTS(s_acIovaLevelShifts) == RT_ELEMENTS(s_auIovaLevelMasks));
3243 AssertCompile(RT_ELEMENTS(s_acIovaLevelShifts) > IOMMU_MAX_HOST_PT_LEVEL);
3244
3245 /* Traverse the I/O page table starting with the page directory in the DTE. */
3246 IOPTENTITY_T PtEntity;
3247 PtEntity.u64 = pDte->au64[0];
3248 for (;;)
3249 {
3250 /* Figure out the system physical address of the page table at the current level. */
3251 uint8_t const uLevel = PtEntity.n.u3NextLevel;
3252
3253 /* Read the page table entity at the current level. */
3254 {
3255 Assert(uLevel > 0 && uLevel < RT_ELEMENTS(s_acIovaLevelShifts));
3256 Assert(uLevel <= IOMMU_MAX_HOST_PT_LEVEL);
3257 uint16_t const idxPte = (uIova >> s_acIovaLevelShifts[uLevel]) & UINT64_C(0x1ff);
3258 uint64_t const offPte = idxPte << 3;
3259 RTGCPHYS const GCPhysPtEntity = (PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK) + offPte;
3260 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysPtEntity, &PtEntity.u64, sizeof(PtEntity));
3261 if (RT_FAILURE(rc))
3262 {
3263 LogFunc(("Failed to read page table entry at %#RGp. rc=%Rrc -> PageTabHwError\n", GCPhysPtEntity, rc));
3264 EVT_PAGE_TAB_HW_ERR_T EvtPageTabHwErr;
3265 iommuAmdPageTabHwErrorEventInit(uDevId, pDte->n.u16DomainId, GCPhysPtEntity, enmOp, &EvtPageTabHwErr);
3266 iommuAmdPageTabHwErrorEventRaise(pDevIns, enmOp, &EvtPageTabHwErr);
3267 return VERR_IOMMU_IPE_2;
3268 }
3269 }
3270
3271 /* Check present bit. */
3272 if (PtEntity.n.u1Present)
3273 { /* likely */ }
3274 else
3275 {
3276 LogFunc(("Page table entry not present (uDevId=%#x) -> IOPF\n", uDevId));
3277 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3278 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */,
3279 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3280 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3281 kIoPageFaultType_PermDenied);
3282 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3283 }
3284
3285 /* Check permission bits. */
3286 uint8_t const fPtePerm = (PtEntity.u64 >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
3287 if ((fPerm & fPtePerm) == fPerm)
3288 { /* likely */ }
3289 else
3290 {
3291 LogFunc(("Page table entry access denied (uDevId=%#x fPerm=%#x fPtePerm=%#x) -> IOPF\n", uDevId, fPerm, fPtePerm));
3292 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3293 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3294 true /* fPermDenied */, enmOp, &EvtIoPageFault);
3295 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3296 kIoPageFaultType_PermDenied);
3297 return VERR_IOMMU_ADDR_ACCESS_DENIED;
3298 }
3299
3300 /* If this is a PTE, we're at the final level and we're done. */
3301 uint8_t const uNextLevel = PtEntity.n.u3NextLevel;
3302 if (uNextLevel == 0)
3303 {
3304 /* The page size of the translation is the default (4K). */
3305 pPageLookup->GCPhysSpa = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK;
3306 pPageLookup->cShift = X86_PAGE_4K_SHIFT;
3307 pPageLookup->fPerm = fPtePerm;
3308 return VINF_SUCCESS;
3309 }
3310 if (uNextLevel == 7)
3311 {
3312 /* The default page size of the translation is overridden. */
3313 RTGCPHYS const GCPhysPte = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK;
3314 uint8_t cShift = X86_PAGE_4K_SHIFT;
3315 while (GCPhysPte & RT_BIT_64(cShift++))
3316 ;
3317
3318 /* The page size must be larger than the default size and lower than the default size of the higher level. */
3319 Assert(uLevel < IOMMU_MAX_HOST_PT_LEVEL); /* PTE at level 6 handled outside the loop, uLevel should be <= 5. */
3320 if ( cShift > s_acIovaLevelShifts[uLevel]
3321 && cShift < s_acIovaLevelShifts[uLevel + 1])
3322 {
3323 pPageLookup->GCPhysSpa = GCPhysPte;
3324 pPageLookup->cShift = cShift;
3325 pPageLookup->fPerm = fPtePerm;
3326 return VINF_SUCCESS;
3327 }
3328
3329 LogFunc(("Page size invalid cShift=%#x -> IOPF\n", cShift));
3330 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3331 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3332 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3333 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3334 kIoPageFaultType_PteInvalidPageSize);
3335 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3336 }
3337
3338 /* Validate the next level encoding of the PDE. */
3339#if IOMMU_MAX_HOST_PT_LEVEL < 6
3340 if (uNextLevel <= IOMMU_MAX_HOST_PT_LEVEL)
3341 { /* likely */ }
3342 else
3343 {
3344 LogFunc(("Next level of PDE invalid uNextLevel=%#x -> IOPF\n", uNextLevel));
3345 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3346 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3347 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3348 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3349 kIoPageFaultType_PteInvalidLvlEncoding);
3350 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3351 }
3352#else
3353 Assert(uNextLevel <= IOMMU_MAX_HOST_PT_LEVEL);
3354#endif
3355
3356 /* Validate level transition. */
3357 if (uNextLevel < uLevel)
3358 { /* likely */ }
3359 else
3360 {
3361 LogFunc(("Next level (%#x) must be less than the current level (%#x) -> IOPF\n", uNextLevel, uLevel));
3362 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3363 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3364 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3365 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3366 kIoPageFaultType_PteInvalidLvlEncoding);
3367 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3368 }
3369
3370 /* Ensure IOVA bits of skipped levels are zero. */
3371 Assert(uLevel > 0);
3372 uint64_t uIovaSkipMask = 0;
3373 for (unsigned idxLevel = uLevel - 1; idxLevel > uNextLevel; idxLevel--)
3374 uIovaSkipMask |= s_auIovaLevelMasks[idxLevel];
3375 if (!(uIova & uIovaSkipMask))
3376 { /* likely */ }
3377 else
3378 {
3379 LogFunc(("IOVA of skipped levels are not zero %#RX64 (SkipMask=%#RX64) -> IOPF\n", uIova, uIovaSkipMask));
3380 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3381 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3382 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3383 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3384 kIoPageFaultType_SkippedLevelIovaNotZero);
3385 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3386 }
3387
3388 /* Continue with traversing the page directory at this level. */
3389 }
3390}
3391
3392
3393/**
3394 * Page lookup callback for finding an I/O page from guest memory.
3395 *
3396 * @returns VBox status code.
3397 * @retval VINF_SUCCESS when the page is found and has the right permissions.
3398 * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED when address translation fails.
3399 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are
3400 * insufficient to what is requested.
3401 *
3402 * @param pDevIns The IOMMU instance data.
3403 * @param uIovaPage The I/O virtual address to lookup in the cache (must be
3404 * 4K aligned).
3405 * @param fPerm The I/O permissions for this access, see
3406 * IOMMU_IO_PERM_XXX.
3407 * @param pAux The auxiliary information required during lookup.
3408 * @param pPageLookup Where to store the looked up I/O page.
3409 */
3410static DECLCALLBACK(int) iommuAmdDteLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
3411 PIOPAGELOOKUP pPageLookup)
3412{
3413 AssertPtr(pAux);
3414 AssertPtr(pPageLookup);
3415 Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
3416
3417 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3418 STAM_PROFILE_ADV_START(&pThis->StatProfDteLookup, a);
3419 int rc = iommuAmdIoPageTableWalk(pDevIns, uIovaPage, fPerm, pAux->uDeviceId, pAux->pDte, pAux->enmOp, pPageLookup);
3420 STAM_PROFILE_ADV_STOP(&pThis->StatProfDteLookup, a);
3421 return rc;
3422}
3423
3424
3425/**
3426 * Looks up a range of I/O virtual addresses.
3427 *
3428 * @returns VBox status code.
3429 * @param pDevIns The IOMMU instance data.
3430 * @param pfnIoPageLookup The lookup function to use.
3431 * @param pAddrIn The I/O address range to lookup.
3432 * @param pAux The auxiliary information required by the lookup
3433 * function.
3434 * @param pAddrOut Where to store the translated I/O address range.
3435 * @param pcbPages Where to store the size of the access (round up to
3436 * the page size). Optional, can be NULL.
3437 */
3438static int iommuAmdLookupIoAddrRange(PPDMDEVINS pDevIns, PFNIOPAGELOOKUP pfnIoPageLookup, PCIOADDRRANGE pAddrIn,
3439 PCIOMMUOPAUX pAux, PIOADDRRANGE pAddrOut, size_t *pcbPages)
3440{
3441 AssertPtr(pfnIoPageLookup);
3442 AssertPtr(pAddrIn);
3443 AssertPtr(pAddrOut);
3444
3445 int rc;
3446 size_t const cbAccess = pAddrIn->cb;
3447 uint8_t const fPerm = pAddrIn->fPerm;
3448 uint64_t const uIova = pAddrIn->uAddr;
3449 RTGCPHYS GCPhysSpa = NIL_RTGCPHYS;
3450 size_t cbRemaining = cbAccess;
3451 uint64_t uIovaPage = pAddrIn->uAddr & X86_PAGE_4K_BASE_MASK;
3452 uint64_t offIova = pAddrIn->uAddr & X86_PAGE_4K_OFFSET_MASK;
3453 uint64_t cbPages = 0;
3454
3455 IOPAGELOOKUP PageLookupPrev;
3456 RT_ZERO(PageLookupPrev);
3457 for (;;)
3458 {
3459 IOPAGELOOKUP PageLookup;
3460 rc = pfnIoPageLookup(pDevIns, uIovaPage, fPerm, pAux, &PageLookup);
3461 if (RT_SUCCESS(rc))
3462 {
3463 Assert(PageLookup.cShift >= X86_PAGE_4K_SHIFT);
3464
3465 /* Store the translated address before continuing to access more pages. */
3466 if (cbRemaining == cbAccess)
3467 {
3468 uint64_t const offMask = IOMMU_GET_PAGE_OFF_MASK(PageLookup.cShift);
3469 uint64_t const offSpa = uIova & offMask;
3470 Assert(!(PageLookup.GCPhysSpa & offMask));
3471 GCPhysSpa = PageLookup.GCPhysSpa | offSpa;
3472 }
3473 /* Check if addresses translated so far result in a physically contiguous region. */
3474 else if (!iommuAmdLookupIsAccessContig(&PageLookupPrev, &PageLookup))
3475 {
3476 rc = VERR_OUT_OF_RANGE;
3477 break;
3478 }
3479
3480 /* Store the page lookup result from the first/previous page. */
3481 PageLookupPrev = PageLookup;
3482
3483 /* Update size of all pages read thus far. */
3484 uint64_t const cbPage = RT_BIT_64(PageLookup.cShift);
3485 cbPages += cbPage;
3486
3487 /* Check if we need to access more pages. */
3488 if (cbRemaining > cbPage - offIova)
3489 {
3490 cbRemaining -= (cbPage - offIova); /* Calculate how much more we need to access. */
3491 uIovaPage += cbPage; /* Update address of the next access. */
3492 offIova = 0; /* After first page, all pages are accessed from off 0. */
3493 }
3494 else
3495 {
3496 cbRemaining = 0;
3497 break;
3498 }
3499 }
3500 else
3501 break;
3502 }
3503
3504 pAddrOut->uAddr = GCPhysSpa; /* Update the translated address. */
3505 pAddrOut->cb = cbAccess - cbRemaining; /* Update the size of the contiguous memory region. */
3506 pAddrOut->fPerm = PageLookupPrev.fPerm; /* Update the allowed permissions for this access. */
3507 if (pcbPages)
3508 *pcbPages = cbPages; /* Update the size of the pages accessed. */
3509 return rc;
3510}
3511
3512
3513/**
3514 * Looks up an I/O virtual address from the device table.
3515 *
3516 * @returns VBox status code.
3517 * @param pDevIns The IOMMU instance data.
3518 * @param uDevId The device ID (bus, device, function).
3519 * @param uIova The I/O virtual address to lookup.
3520 * @param cbAccess The size of the access.
3521 * @param fPerm The I/O permissions for this access, see
3522 * IOMMU_IO_PERM_XXX.
3523 * @param enmOp The IOMMU operation being performed.
3524 * @param pGCPhysSpa Where to store the translated system physical address.
3525 * @param pcbContiguous Where to store the number of contiguous bytes translated
3526 * and permission-checked.
3527 *
3528 * @thread Any.
3529 */
3530static int iommuAmdDteLookup(PPDMDEVINS pDevIns, uint16_t uDevId, uint64_t uIova, size_t cbAccess, uint8_t fPerm, IOMMUOP enmOp,
3531 PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
3532{
3533 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3534 RTGCPHYS GCPhysSpa = NIL_RTGCPHYS;
3535 size_t cbContiguous = 0;
3536
3537 /* Read the device table entry from memory. */
3538 DTE_T Dte;
3539 int rc = iommuAmdDteRead(pDevIns, uDevId, enmOp, &Dte);
3540 if (RT_SUCCESS(rc))
3541 {
3542 if (Dte.n.u1Valid)
3543 {
3544 /* Validate bits 127:0 of the device table entry when DTE.V is 1. */
3545 uint64_t const fRsvd0 = Dte.au64[0] & ~(IOMMU_DTE_QWORD_0_VALID_MASK & ~IOMMU_DTE_QWORD_0_FEAT_MASK);
3546 uint64_t const fRsvd1 = Dte.au64[1] & ~(IOMMU_DTE_QWORD_1_VALID_MASK & ~IOMMU_DTE_QWORD_1_FEAT_MASK);
3547 if (RT_LIKELY(!fRsvd0 && !fRsvd1))
3548 {
3549 /*
3550 * Check if the DTE is configured for translating addresses.
3551 * Note: Addresses cannot be subject to exclusion as we do -not- support remote IOTLBs,
3552 * so there's no need to check the address exclusion base/limit here.
3553 */
3554 rc = iommuAmdPreTranslateChecks(pDevIns, uDevId, uIova, fPerm, &Dte, enmOp);
3555 if (rc == VINF_SUCCESS)
3556 {
3557 IOADDRRANGE AddrIn;
3558 AddrIn.uAddr = uIova;
3559 AddrIn.cb = cbAccess;
3560 AddrIn.fPerm = fPerm;
3561
3562 IOMMUOPAUX Aux;
3563 Aux.enmOp = enmOp;
3564 Aux.pDte = &Dte;
3565 Aux.uDeviceId = uDevId;
3566 Aux.uDomainId = Dte.n.u16DomainId;
3567
3568 IOADDRRANGE AddrOut;
3569
3570 /* Lookup the address from the DTE and I/O page tables.*/
3571 size_t cbPages = 0;
3572 rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdDteLookupPage, &AddrIn, &Aux, &AddrOut, &cbPages);
3573 GCPhysSpa = AddrOut.uAddr;
3574 cbContiguous = AddrOut.cb;
3575
3576 /* If we stopped since translation resulted in non-contiguous physical addresses,
3577 what we translated so far is still valid. */
3578 if (rc == VERR_OUT_OF_RANGE)
3579 {
3580 Assert(cbContiguous > 0 && cbContiguous < cbAccess);
3581 rc = VINF_SUCCESS;
3582 STAM_COUNTER_INC(&pThis->StatAccessDteNonContig);
3583 }
3584
3585 if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
3586 STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied);
3587
3588#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
3589 if (RT_SUCCESS(rc))
3590 {
3591 /* Update that addresses requires translation (cumulative permissions of DTE and I/O page tables). */
3592 iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_ADDR_TRANSLATE);
3593 /* Update IOTLB for the contiguous range of I/O virtual addresses. */
3594 iommuAmdIotlbAddRange(pDevIns, Dte.n.u16DomainId, uIova & X86_PAGE_4K_BASE_MASK, cbPages,
3595 GCPhysSpa & X86_PAGE_4K_BASE_MASK, AddrOut.fPerm);
3596 }
3597#endif
3598 }
3599 else if (rc == VINF_IOMMU_ADDR_TRANSLATION_DISABLED)
3600 {
3601 /*
3602 * Translation is disabled for this device (root paging mode is 0).
3603 * GPA=SPA, but the permission bits are important and controls accesses.
3604 */
3605 GCPhysSpa = uIova;
3606 cbContiguous = cbAccess;
3607 rc = VINF_SUCCESS;
3608
3609#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
3610 /* Update that addresses permissions of DTE apply (but omit address translation). */
3611 iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_IO_PERM);
3612#endif
3613 }
3614 else
3615 {
3616 /* Address translation failed or access is denied. */
3617 Assert(rc == VERR_IOMMU_ADDR_ACCESS_DENIED || rc == VERR_IOMMU_ADDR_TRANSLATION_FAILED);
3618 GCPhysSpa = NIL_RTGCPHYS;
3619 cbContiguous = 0;
3620 STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied);
3621 }
3622 }
3623 else
3624 {
3625 /* Invalid reserved bits in the DTE, raise an error event. */
3626 LogFunc(("Invalid DTE reserved bits (u64[0]=%#RX64 u64[1]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1));
3627 EVT_ILLEGAL_DTE_T Event;
3628 iommuAmdIllegalDteEventInit(uDevId, uIova, true /* fRsvdNotZero */, enmOp, &Event);
3629 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero);
3630 rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3631 }
3632 }
3633 else
3634 {
3635 /*
3636 * The DTE is not valid, forward addresses untranslated.
3637 * See AMD IOMMU spec. "Table 5: Feature Enablement for Address Translation".
3638 */
3639 GCPhysSpa = uIova;
3640 cbContiguous = cbAccess;
3641
3642#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
3643 /* Update that addresses don't require translation (nor permission checks) but a DTE is present. */
3644 iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DEV_F_PRESENT);
3645#endif
3646 }
3647 }
3648 else
3649 {
3650 LogFunc(("Failed to read device table entry. uDevId=%#x rc=%Rrc\n", uDevId, rc));
3651 rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3652 }
3653
3654 *pGCPhysSpa = GCPhysSpa;
3655 *pcbContiguous = cbContiguous;
3656 AssertMsg(rc != VINF_SUCCESS || cbContiguous > 0, ("cbContiguous=%zu\n", cbContiguous));
3657 return rc;
3658}
3659
3660
3661#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
3662/**
3663 * I/O page lookup callback for finding an I/O page from the IOTLB.
3664 *
3665 * @returns VBox status code.
3666 * @retval VINF_SUCCESS when the page is found and has the right permissions.
3667 * @retval VERR_NOT_FOUND when the page is not found.
3668 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are
3669 * insufficient to what is requested.
3670 *
3671 * @param pDevIns The IOMMU instance data.
3672 * @param uIovaPage The I/O virtual address to lookup in the cache (must be
3673 * 4K aligned).
3674 * @param fPerm The I/O permissions for this access, see
3675 * IOMMU_IO_PERM_XXX.
3676 * @param pAux The auxiliary information required during lookup.
3677 * @param pPageLookup Where to store the looked up I/O page.
3678 */
3679static DECLCALLBACK(int) iommuAmdCacheLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
3680 PIOPAGELOOKUP pPageLookup)
3681{
3682 Assert(pAux);
3683 Assert(pPageLookup);
3684 Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
3685
3686 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3687
3688 STAM_PROFILE_ADV_START(&pThis->StatProfIotlbeLookup, a);
3689 PCIOTLBE pIotlbe = iommuAmdIotlbLookup(pThis, pAux->uDomainId, uIovaPage);
3690 STAM_PROFILE_ADV_STOP(&pThis->StatProfIotlbeLookup, a);
3691 if (pIotlbe)
3692 {
3693 *pPageLookup = pIotlbe->PageLookup;
3694 if ((pPageLookup->fPerm & fPerm) == fPerm)
3695 {
3696 STAM_COUNTER_INC(&pThis->StatAccessCacheHit);
3697 return VINF_SUCCESS;
3698 }
3699 return VERR_IOMMU_ADDR_ACCESS_DENIED;
3700 }
3701 return VERR_NOT_FOUND;
3702}
3703
3704
3705/**
3706 * Lookups a memory access from the IOMMU cache.
3707 *
3708 * @returns VBox status code.
3709 * @retval VINF_SUCCESS if the access was cached and permissions are verified.
3710 * @retval VERR_OUT_OF_RANGE if the access resulted in a non-contiguous physical
3711 * address region.
3712 * @retval VERR_NOT_FOUND if the access was not cached.
3713 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the access was cached but permissions
3714 * are insufficient.
3715 *
3716 * @param pDevIns The IOMMU instance data.
3717 * @param uDevId The device ID (bus, device, function).
3718 * @param uIova The I/O virtual address to lookup.
3719 * @param cbAccess The size of the access.
3720 * @param fPerm The I/O permissions for this access, see
3721 * IOMMU_IO_PERM_XXX.
3722 * @param enmOp The IOMMU operation being performed.
3723 * @param pGCPhysSpa Where to store the translated system physical address.
3724 * @param pcbContiguous Where to store the number of contiguous bytes translated
3725 * and permission-checked.
3726 */
3727static int iommuAmdCacheLookup(PPDMDEVINS pDevIns, uint16_t uDevId, uint64_t uIova, size_t cbAccess, uint8_t fPerm, IOMMUOP enmOp,
3728 PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
3729{
3730 int rc;
3731 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3732
3733 /*
3734 * We hold the cache lock across both the device and the IOTLB lookups (if any) because
3735 * we don't want the device cache to be invalidate while we perform IOTBL lookups.
3736 */
3737 IOMMU_LOCK_CACHE(pDevIns, pThis);
3738
3739 /* Lookup the device from the level 1 cache. */
3740 PCIODEVICE pDevice = &pThis->paDevices[uDevId];
3741 if ((pDevice->fFlags & (IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_VALID | IOMMU_DEV_F_ADDR_TRANSLATE))
3742 == (IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_VALID | IOMMU_DEV_F_ADDR_TRANSLATE))
3743 {
3744 /* Lookup the IOTLB entries from the level 2 cache. */
3745 IOADDRRANGE AddrIn;
3746 AddrIn.uAddr = uIova;
3747 AddrIn.cb = cbAccess;
3748 AddrIn.fPerm = fPerm;
3749
3750 IOMMUOPAUX Aux;
3751 Aux.enmOp = enmOp;
3752 Aux.pDte = NULL;
3753 Aux.uDeviceId = uDevId;
3754 Aux.uDomainId = pDevice->uDomainId;
3755
3756 IOADDRRANGE AddrOut;
3757 rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdCacheLookupPage, &AddrIn, &Aux, &AddrOut, NULL /* pcbPages */);
3758 Assert(AddrOut.cb <= cbAccess);
3759 *pGCPhysSpa = AddrOut.uAddr;
3760 *pcbContiguous = AddrOut.cb;
3761 }
3762 else if ((pDevice->fFlags & (IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_VALID | IOMMU_DEV_F_IO_PERM))
3763 == (IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_VALID | IOMMU_DEV_F_IO_PERM))
3764 {
3765 /* Address translation is disabled, but DTE permissions apply. */
3766 Assert(!(pDevice->fFlags & IOMMU_DEV_F_ADDR_TRANSLATE));
3767 uint8_t const fDtePerm = (pDevice->fFlags >> IOMMU_DEV_F_IO_PERM_SHIFT) & IOMMU_DEV_F_IO_PERM_MASK;
3768 if ((fDtePerm & fPerm) == fPerm)
3769 {
3770 *pGCPhysSpa = uIova;
3771 *pcbContiguous = cbAccess;
3772 rc = VINF_SUCCESS;
3773 }
3774 else
3775 {
3776 *pGCPhysSpa = NIL_RTGCPHYS;
3777 *pcbContiguous = 0;
3778 rc = VERR_IOMMU_ADDR_ACCESS_DENIED;
3779 }
3780 }
3781 else if (pDevice->fFlags & IOMMU_DEV_F_PRESENT)
3782 {
3783 /* Forward addresses untranslated, without checking permissions. */
3784 *pGCPhysSpa = uIova;
3785 *pcbContiguous = cbAccess;
3786 rc = VINF_SUCCESS;
3787 }
3788 else
3789 {
3790 rc = VERR_NOT_FOUND;
3791 *pGCPhysSpa = NIL_RTGCPHYS;
3792 *pcbContiguous = 0;
3793 }
3794
3795 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
3796
3797 /* Raise event if address translation resulted in a permission failure. */
3798 if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
3799 {
3800 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3801 iommuAmdIoPageFaultEventInit(uDevId, pDevice->uDomainId, uIova, true /* fPresent */,
3802 false /* fRsvdNotZero */, true /* fPermDenied */, enmOp, &EvtIoPageFault);
3803 iommuAmdIoPageFaultEventRaise(pDevIns, pDevice->fFlags, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3804 kIoPageFaultType_PermDenied);
3805 }
3806
3807 return rc;
3808}
3809#endif /* IOMMU_WITH_IOTLBE_CACHE */
3810
3811
3812/**
3813 * Gets the I/O permission and IOMMU operation type for the given access flags.
3814 *
3815 * @param fFlags The PDM IOMMU flags, PDMIOMMU_MEM_F_XXX.
3816 * @param penmOp Where to store the IOMMU operation.
3817 * @param pfPerm Where to store the IOMMU I/O permission.
3818 * @param pStatRead The stat counter to increment for a read operation.
3819 * @param pStatWrite The stat counter to increment for a write operation.
3820 */
3821DECLINLINE(void) iommuAmdMemAccessGetPermAndOp(uint32_t fFlags, PIOMMUOP penmOp, uint8_t *pfPerm, PSTAMCOUNTER pStatRead,
3822 PSTAMCOUNTER pStatWrite)
3823{
3824 if (fFlags & PDMIOMMU_MEM_F_WRITE)
3825 {
3826 *penmOp = IOMMUOP_MEM_WRITE;
3827 *pfPerm = IOMMU_IO_PERM_WRITE;
3828 STAM_COUNTER_INC(pStatWrite);
3829 }
3830 else
3831 {
3832 Assert(fFlags & PDMIOMMU_MEM_F_READ);
3833 *penmOp = IOMMUOP_MEM_READ;
3834 *pfPerm = IOMMU_IO_PERM_READ;
3835 STAM_COUNTER_INC(pStatRead);
3836 }
3837}
3838
3839
3840/**
3841 * Memory access transaction from a device.
3842 *
3843 * @returns VBox status code.
3844 * @param pDevIns The IOMMU device instance.
3845 * @param uDevId The device ID (bus, device, function).
3846 * @param uIova The I/O virtual address being accessed.
3847 * @param cbAccess The number of bytes being accessed.
3848 * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX.
3849 * @param pGCPhysSpa Where to store the translated system physical address.
3850 * @param pcbContiguous Where to store the number of contiguous bytes translated
3851 * and permission-checked.
3852 *
3853 * @thread Any.
3854 */
3855static DECLCALLBACK(int) iommuAmdMemAccess(PPDMDEVINS pDevIns, uint16_t uDevId, uint64_t uIova, size_t cbAccess, uint32_t fFlags,
3856 PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
3857{
3858 /* Validate. */
3859 AssertPtr(pDevIns);
3860 AssertPtr(pGCPhysSpa);
3861 Assert(cbAccess > 0);
3862 Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK));
3863
3864 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3865 IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
3866 if (Ctrl.n.u1IommuEn)
3867 {
3868 IOMMUOP enmOp;
3869 uint8_t fPerm;
3870 iommuAmdMemAccessGetPermAndOp(fFlags, &enmOp, &fPerm, &pThis->CTX_SUFF_Z(StatMemRead), &pThis->CTX_SUFF_Z(StatMemWrite));
3871 LogFlowFunc(("%s: uDevId=%#x uIova=%#RX64 cb=%zu\n", iommuAmdMemAccessGetPermName(fPerm), uDevId, uIova, cbAccess));
3872
3873 int rc;
3874#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
3875 /* Lookup the IOVA from the cache. */
3876 rc = iommuAmdCacheLookup(pDevIns, uDevId, uIova, cbAccess, fPerm, enmOp, pGCPhysSpa, pcbContiguous);
3877 if (rc == VINF_SUCCESS)
3878 {
3879 /* Entire access was cached and permissions were valid. */
3880 Assert(*pcbContiguous == cbAccess);
3881 Assert(*pGCPhysSpa != NIL_RTGCPHYS);
3882 STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull);
3883 return rc;
3884 }
3885 if (rc == VERR_OUT_OF_RANGE)
3886 {
3887 /* Access stopped when translations resulted in non-contiguous memory, let caller resume access. */
3888 Assert(*pcbContiguous > 0 && *pcbContiguous < cbAccess);
3889 STAM_COUNTER_INC(&pThis->StatAccessCacheNonContig);
3890 return VINF_SUCCESS;
3891 }
3892 if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
3893 {
3894 /* Access denied due to insufficient permissions. */
3895 STAM_COUNTER_INC(&pThis->StatAccessCachePermDenied);
3896 return rc;
3897 }
3898
3899 /* Access incomplete as not all pages were in the cache. Lookup the rest from the device table. */
3900 AssertMsg(rc == VERR_NOT_FOUND, ("Invalid cache lookup result: %Rrc\n", rc));
3901 AssertMsg(*pcbContiguous < cbAccess, ("Invalid size: cbContiguous=%zu cbAccess=%zu\n", *pcbContiguous, cbAccess));
3902 uIova += *pcbContiguous;
3903 cbAccess -= *pcbContiguous;
3904 STAM_COUNTER_INC(&pThis->StatAccessCacheMiss);
3905#endif
3906
3907 /* Lookup the IOVA from the device table. */
3908 rc = iommuAmdDteLookup(pDevIns, uDevId, uIova, cbAccess, fPerm, enmOp, pGCPhysSpa, pcbContiguous);
3909 if (RT_SUCCESS(rc))
3910 { /* likely */ }
3911 else
3912 {
3913 Assert(rc != VERR_OUT_OF_RANGE);
3914 LogFunc(("DTE lookup failed! uDevId=%#x uIova=%#RX64 fPerm=%u cbAccess=%zu rc=%#Rrc\n", uDevId, uIova, fPerm,
3915 cbAccess, rc));
3916 }
3917
3918 return rc;
3919 }
3920
3921 /* Addresses are forwarded without translation when the IOMMU is disabled. */
3922 *pGCPhysSpa = uIova;
3923 *pcbContiguous = cbAccess;
3924 return VINF_SUCCESS;
3925}
3926
3927
3928/**
3929 * Memory access bulk (one or more 4K pages) request from a device.
3930 *
3931 * @returns VBox status code.
3932 * @param pDevIns The IOMMU device instance.
3933 * @param uDevId The device ID (bus, device, function).
3934 * @param cIovas The number of addresses being accessed.
3935 * @param pauIovas The I/O virtual addresses for each page being accessed.
3936 * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX.
3937 * @param paGCPhysSpa Where to store the translated physical addresses.
3938 *
3939 * @thread Any.
3940 */
3941static DECLCALLBACK(int) iommuAmdMemBulkAccess(PPDMDEVINS pDevIns, uint16_t uDevId, size_t cIovas, uint64_t const *pauIovas,
3942 uint32_t fFlags, PRTGCPHYS paGCPhysSpa)
3943{
3944 /* Validate. */
3945 AssertPtr(pDevIns);
3946 Assert(cIovas > 0);
3947 AssertPtr(pauIovas);
3948 AssertPtr(paGCPhysSpa);
3949 Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK));
3950
3951 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3952 IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
3953 if (Ctrl.n.u1IommuEn)
3954 {
3955 IOMMUOP enmOp;
3956 uint8_t fPerm;
3957 iommuAmdMemAccessGetPermAndOp(fFlags, &enmOp, &fPerm, &pThis->CTX_SUFF_Z(StatMemBulkRead),
3958 &pThis->CTX_SUFF_Z(StatMemBulkWrite));
3959 LogFlowFunc(("%s: uDevId=%#x cIovas=%zu\n", iommuAmdMemAccessGetPermName(fPerm), uDevId, cIovas));
3960
3961 /** @todo IOMMU: IOTLB cache lookup. */
3962
3963 /* Lookup each IOVA from the device table. */
3964 for (size_t i = 0; i < cIovas; i++)
3965 {
3966 size_t cbContig;
3967 int rc = iommuAmdDteLookup(pDevIns, uDevId, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i], &cbContig);
3968 if (RT_SUCCESS(rc))
3969 { /* likely */ }
3970 else
3971 {
3972 LogFunc(("Failed! uDevId=%#x uIova=%#RX64 fPerm=%u rc=%Rrc\n", uDevId, pauIovas[i], fPerm, rc));
3973 return rc;
3974 }
3975 Assert(cbContig == X86_PAGE_SIZE);
3976 }
3977 }
3978 else
3979 {
3980 /* Addresses are forwarded without translation when the IOMMU is disabled. */
3981 for (size_t i = 0; i < cIovas; i++)
3982 paGCPhysSpa[i] = pauIovas[i];
3983 }
3984
3985 return VINF_SUCCESS;
3986}
3987
3988
3989/**
3990 * Reads an interrupt remapping table entry from guest memory given its DTE.
3991 *
3992 * @returns VBox status code.
3993 * @param pDevIns The IOMMU device instance.
3994 * @param uDevId The device ID (bus, device, function).
3995 * @param pDte The device table entry.
3996 * @param GCPhysIn The source MSI address (used for reporting errors).
3997 * @param uDataIn The source MSI data.
3998 * @param enmOp The IOMMU operation being performed.
3999 * @param pIrte Where to store the interrupt remapping table entry.
4000 *
4001 * @thread Any.
4002 */
4003static int iommuAmdIrteRead(PPDMDEVINS pDevIns, uint16_t uDevId, PCDTE_T pDte, RTGCPHYS GCPhysIn, uint32_t uDataIn,
4004 IOMMUOP enmOp, PIRTE_T pIrte)
4005{
4006 /* Ensure the IRTE length is valid. */
4007 Assert(pDte->n.u4IntrTableLength < IOMMU_DTE_INTR_TAB_LEN_MAX);
4008
4009 RTGCPHYS const GCPhysIntrTable = pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK;
4010 uint16_t const cbIntrTable = IOMMU_GET_INTR_TAB_LEN(pDte);
4011 uint16_t const offIrte = (uDataIn & IOMMU_MSI_DATA_IRTE_OFFSET_MASK) * sizeof(IRTE_T);
4012 RTGCPHYS const GCPhysIrte = GCPhysIntrTable + offIrte;
4013
4014 /* Ensure the IRTE falls completely within the interrupt table. */
4015 if (offIrte + sizeof(IRTE_T) <= cbIntrTable)
4016 { /* likely */ }
4017 else
4018 {
4019 LogFunc(("IRTE exceeds table length (GCPhysIntrTable=%#RGp cbIntrTable=%u offIrte=%#x uDataIn=%#x) -> IOPF\n",
4020 GCPhysIntrTable, cbIntrTable, offIrte, uDataIn));
4021
4022 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4023 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, GCPhysIn, false /* fPresent */, false /* fRsvdNotZero */,
4024 false /* fPermDenied */, enmOp, &EvtIoPageFault);
4025 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
4026 kIoPageFaultType_IrteAddrInvalid);
4027 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4028 }
4029
4030 /* Read the IRTE from memory. */
4031 Assert(!(GCPhysIrte & 3));
4032 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysIrte, pIrte, sizeof(*pIrte));
4033 if (RT_SUCCESS(rc))
4034 return VINF_SUCCESS;
4035
4036 /** @todo The IOMMU spec. does not tell what kind of error is reported in this
4037 * situation. Is it an I/O page fault or a device table hardware error?
4038 * There's no interrupt table hardware error event, but it's unclear what
4039 * we should do here. */
4040 LogFunc(("Failed to read interrupt table entry at %#RGp. rc=%Rrc -> ???\n", GCPhysIrte, rc));
4041 return VERR_IOMMU_IPE_4;
4042}
4043
4044
4045/**
4046 * Remaps the interrupt using the interrupt remapping table.
4047 *
4048 * @returns VBox status code.
4049 * @param pDevIns The IOMMU instance data.
4050 * @param uDevId The device ID (bus, device, function).
4051 * @param pDte The device table entry.
4052 * @param enmOp The IOMMU operation being performed.
4053 * @param pMsiIn The source MSI.
4054 * @param pMsiOut Where to store the remapped MSI.
4055 *
4056 * @thread Any.
4057 */
4058static int iommuAmdIntrRemap(PPDMDEVINS pDevIns, uint16_t uDevId, PCDTE_T pDte, IOMMUOP enmOp, PCMSIMSG pMsiIn,
4059 PMSIMSG pMsiOut)
4060{
4061 Assert(pDte->n.u2IntrCtrl == IOMMU_INTR_CTRL_REMAP);
4062
4063 IRTE_T Irte;
4064 int rc = iommuAmdIrteRead(pDevIns, uDevId, pDte, pMsiIn->Addr.u64, pMsiIn->Data.u32, enmOp, &Irte);
4065 if (RT_SUCCESS(rc))
4066 {
4067 if (Irte.n.u1RemapEnable)
4068 {
4069 if (!Irte.n.u1GuestMode)
4070 {
4071 if (Irte.n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
4072 {
4073 /* Preserve all bits from the source MSI address and data that don't map 1:1 from the IRTE. */
4074 *pMsiOut = *pMsiIn;
4075
4076 pMsiOut->Addr.n.u1DestMode = Irte.n.u1DestMode;
4077 pMsiOut->Addr.n.u8DestId = Irte.n.u8Dest;
4078
4079 pMsiOut->Data.n.u8Vector = Irte.n.u8Vector;
4080 pMsiOut->Data.n.u3DeliveryMode = Irte.n.u3IntrType;
4081 return VINF_SUCCESS;
4082 }
4083
4084 LogFunc(("Interrupt type (%#x) invalid -> IOPF\n", Irte.n.u3IntrType));
4085 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4086 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
4087 true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
4088 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault,
4089 kIoPageFaultType_IrteRsvdIntType);
4090 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4091 }
4092
4093 LogFunc(("Guest mode not supported -> IOPF\n"));
4094 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4095 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
4096 true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
4097 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRsvdNotZero);
4098 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4099 }
4100
4101 LogFunc(("Remapping disabled -> IOPF\n"));
4102 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4103 iommuAmdIoPageFaultEventInit(uDevId, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
4104 false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
4105 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRemapEn);
4106 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4107 }
4108
4109 return rc;
4110}
4111
4112
4113/**
4114 * Looks up an MSI interrupt from the interrupt remapping table.
4115 *
4116 * @returns VBox status code.
4117 * @param pDevIns The IOMMU instance data.
4118 * @param uDevId The device ID (bus, device, function).
4119 * @param enmOp The IOMMU operation being performed.
4120 * @param pMsiIn The source MSI.
4121 * @param pMsiOut Where to store the remapped MSI.
4122 *
4123 * @thread Any.
4124 */
4125static int iommuAmdIntrTableLookup(PPDMDEVINS pDevIns, uint16_t uDevId, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
4126{
4127 /* Read the device table entry from memory. */
4128 LogFlowFunc(("uDevId=%#x (%#x:%#x:%#x) enmOp=%u\n", uDevId,
4129 ((uDevId >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK),
4130 ((uDevId >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK), (uDevId & VBOX_PCI_DEVFN_FUN_MASK), enmOp));
4131
4132 DTE_T Dte;
4133 int rc = iommuAmdDteRead(pDevIns, uDevId, enmOp, &Dte);
4134 if (RT_SUCCESS(rc))
4135 {
4136 /* If the DTE is not valid, all interrupts are forwarded without remapping. */
4137 if (Dte.n.u1IntrMapValid)
4138 {
4139 /* Validate bits 255:128 of the device table entry when DTE.IV is 1. */
4140 uint64_t const fRsvd0 = Dte.au64[2] & ~IOMMU_DTE_QWORD_2_VALID_MASK;
4141 uint64_t const fRsvd1 = Dte.au64[3] & ~IOMMU_DTE_QWORD_3_VALID_MASK;
4142 if (RT_LIKELY( !fRsvd0
4143 && !fRsvd1))
4144 { /* likely */ }
4145 else
4146 {
4147 LogFunc(("Invalid reserved bits in DTE (u64[2]=%#RX64 u64[3]=%#RX64) -> Illegal DTE\n", fRsvd0,
4148 fRsvd1));
4149 EVT_ILLEGAL_DTE_T Event;
4150 iommuAmdIllegalDteEventInit(uDevId, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event);
4151 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero);
4152 return VERR_IOMMU_INTR_REMAP_FAILED;
4153 }
4154
4155 /*
4156 * LINT0/LINT1 pins cannot be driven by PCI(e) devices. Perhaps for a Southbridge
4157 * that's connected through HyperTransport it might be possible; but for us, it
4158 * doesn't seem we need to specially handle these pins.
4159 */
4160
4161 /*
4162 * Validate the MSI source address.
4163 *
4164 * 64-bit MSIs are supported by the PCI and AMD IOMMU spec. However as far as the
4165 * CPU is concerned, the MSI region is fixed and we must ensure no other device
4166 * claims the region as I/O space.
4167 *
4168 * See PCI spec. 6.1.4. "Message Signaled Interrupt (MSI) Support".
4169 * See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support".
4170 * See Intel spec. 10.11.1 "Message Address Register Format".
4171 */
4172 if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE)
4173 {
4174 /*
4175 * The IOMMU remaps fixed and arbitrated interrupts using the IRTE.
4176 * See AMD IOMMU spec. "2.2.5.1 Interrupt Remapping Tables, Guest Virtual APIC Not Enabled".
4177 */
4178 uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
4179 bool fPassThru = false;
4180 switch (u8DeliveryMode)
4181 {
4182 case VBOX_MSI_DELIVERY_MODE_FIXED:
4183 case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO:
4184 {
4185 uint8_t const uIntrCtrl = Dte.n.u2IntrCtrl;
4186 if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
4187 {
4188 /* Validate the encoded interrupt table length when IntCtl specifies remapping. */
4189 uint8_t const uIntrTabLen = Dte.n.u4IntrTableLength;
4190 if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX)
4191 {
4192 /*
4193 * We don't support guest interrupt remapping yet. When we do, we'll need to
4194 * check Ctrl.u1GstVirtApicEn and use the guest Virtual APIC Table Root Pointer
4195 * in the DTE rather than the Interrupt Root Table Pointer. Since the caller
4196 * already reads the control register, add that as a parameter when we eventually
4197 * support guest interrupt remapping. For now, just assert.
4198 */
4199 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4200 Assert(!pThis->ExtFeat.n.u1GstVirtApicSup);
4201 NOREF(pThis);
4202
4203 return iommuAmdIntrRemap(pDevIns, uDevId, &Dte, enmOp, pMsiIn, pMsiOut);
4204 }
4205
4206 LogFunc(("Invalid interrupt table length %#x -> Illegal DTE\n", uIntrTabLen));
4207 EVT_ILLEGAL_DTE_T Event;
4208 iommuAmdIllegalDteEventInit(uDevId, pMsiIn->Addr.u64, false /* fRsvdNotZero */, enmOp, &Event);
4209 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntTabLen);
4210 return VERR_IOMMU_INTR_REMAP_FAILED;
4211 }
4212
4213 if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
4214 {
4215 fPassThru = true;
4216 break;
4217 }
4218
4219 if (uIntrCtrl == IOMMU_INTR_CTRL_TARGET_ABORT)
4220 {
4221 LogFunc(("IntCtl=0: Remapping disallowed for fixed/arbitrated interrupt (%#x) -> Target abort\n",
4222 pMsiIn->Data.n.u8Vector));
4223 iommuAmdSetPciTargetAbort(pDevIns);
4224 return VERR_IOMMU_INTR_REMAP_DENIED;
4225 }
4226
4227 Assert(uIntrCtrl == IOMMU_INTR_CTRL_RSVD); /* Paranoia. */
4228 LogFunc(("IntCtl mode invalid %#x -> Illegal DTE\n", uIntrCtrl));
4229 EVT_ILLEGAL_DTE_T Event;
4230 iommuAmdIllegalDteEventInit(uDevId, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event);
4231 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntCtl);
4232 return VERR_IOMMU_INTR_REMAP_FAILED;
4233 }
4234
4235 /* SMIs are passed through unmapped. We don't implement SMI filters. */
4236 case VBOX_MSI_DELIVERY_MODE_SMI: fPassThru = true; break;
4237 case VBOX_MSI_DELIVERY_MODE_NMI: fPassThru = Dte.n.u1NmiPassthru; break;
4238 case VBOX_MSI_DELIVERY_MODE_INIT: fPassThru = Dte.n.u1InitPassthru; break;
4239 case VBOX_MSI_DELIVERY_MODE_EXT_INT: fPassThru = Dte.n.u1ExtIntPassthru; break;
4240 default:
4241 {
4242 LogFunc(("MSI data delivery mode invalid %#x -> Target abort\n", u8DeliveryMode));
4243 iommuAmdSetPciTargetAbort(pDevIns);
4244 return VERR_IOMMU_INTR_REMAP_FAILED;
4245 }
4246 }
4247
4248 /*
4249 * For those other than fixed and arbitrated interrupts, destination mode must be 0 (physical).
4250 * See AMD IOMMU spec. The note below Table 19: "IOMMU Controls and Actions for Upstream Interrupts".
4251 */
4252 if ( u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO
4253 || !pMsiIn->Addr.n.u1DestMode)
4254 {
4255 if (fPassThru)
4256 {
4257 *pMsiOut = *pMsiIn;
4258 return VINF_SUCCESS;
4259 }
4260 LogFunc(("Remapping/passthru disallowed for interrupt %#x -> Target abort\n", pMsiIn->Data.n.u8Vector));
4261 }
4262 else
4263 LogFunc(("Logical destination mode invalid for delivery mode %#x\n -> Target abort\n", u8DeliveryMode));
4264
4265 iommuAmdSetPciTargetAbort(pDevIns);
4266 return VERR_IOMMU_INTR_REMAP_DENIED;
4267 }
4268 else
4269 {
4270 LogFunc(("MSI address region invalid %#RX64\n", pMsiIn->Addr.u64));
4271 return VERR_IOMMU_INTR_REMAP_FAILED;
4272 }
4273 }
4274 else
4275 {
4276 /** @todo IOMMU: Add to interrupt remapping cache. */
4277 LogFlowFunc(("DTE interrupt map not valid\n"));
4278 *pMsiOut = *pMsiIn;
4279 return VINF_SUCCESS;
4280 }
4281 }
4282
4283 LogFunc(("Failed to read device table entry. uDevId=%#x rc=%Rrc\n", uDevId, rc));
4284 return VERR_IOMMU_INTR_REMAP_FAILED;
4285}
4286
4287
4288/**
4289 * Interrupt remap request from a device.
4290 *
4291 * @returns VBox status code.
4292 * @param pDevIns The IOMMU device instance.
4293 * @param uDevId The device ID (bus, device, function).
4294 * @param pMsiIn The source MSI.
4295 * @param pMsiOut Where to store the remapped MSI.
4296 */
4297static DECLCALLBACK(int) iommuAmdMsiRemap(PPDMDEVINS pDevIns, uint16_t uDevId, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
4298{
4299 /* Validate. */
4300 Assert(pDevIns);
4301 Assert(pMsiIn);
4302 Assert(pMsiOut);
4303
4304 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4305
4306 /* Interrupts are forwarded with remapping when the IOMMU is disabled. */
4307 IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
4308 if (Ctrl.n.u1IommuEn)
4309 {
4310 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMsiRemap));
4311 /** @todo Cache? */
4312
4313 return iommuAmdIntrTableLookup(pDevIns, uDevId, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
4314 }
4315
4316 *pMsiOut = *pMsiIn;
4317 return VINF_SUCCESS;
4318}
4319
4320
4321/**
4322 * @callback_method_impl{FNIOMMMIONEWWRITE}
4323 */
4324static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioWrite(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb)
4325{
4326 NOREF(pvUser);
4327 Assert(cb == 4 || cb == 8);
4328 Assert(!(off & (cb - 1)));
4329
4330 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4331 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioWrite)); NOREF(pThis);
4332
4333 uint64_t const uValue = cb == 8 ? *(uint64_t const *)pv : *(uint32_t const *)pv;
4334 return iommuAmdRegisterWrite(pDevIns, off, cb, uValue);
4335}
4336
4337
4338/**
4339 * @callback_method_impl{FNIOMMMIONEWREAD}
4340 */
4341static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioRead(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb)
4342{
4343 NOREF(pvUser);
4344 Assert(cb == 4 || cb == 8);
4345 Assert(!(off & (cb - 1)));
4346
4347 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4348 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioRead)); NOREF(pThis);
4349
4350 uint64_t uResult;
4351 VBOXSTRICTRC rcStrict = iommuAmdRegisterRead(pDevIns, off, &uResult);
4352 if (cb == 8)
4353 *(uint64_t *)pv = uResult;
4354 else
4355 *(uint32_t *)pv = (uint32_t)uResult;
4356
4357 return rcStrict;
4358}
4359
4360
4361#ifdef IN_RING3
4362/**
4363 * Processes an IOMMU command.
4364 *
4365 * @returns VBox status code.
4366 * @param pDevIns The IOMMU device instance.
4367 * @param pCmd The command to process.
4368 * @param GCPhysCmd The system physical address of the command.
4369 * @param pEvtError Where to store the error event in case of failures.
4370 *
4371 * @thread Command thread.
4372 */
4373static int iommuAmdR3CmdProcess(PPDMDEVINS pDevIns, PCCMD_GENERIC_T pCmd, RTGCPHYS GCPhysCmd, PEVT_GENERIC_T pEvtError)
4374{
4375 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4376 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
4377
4378 STAM_COUNTER_INC(&pThis->StatCmd);
4379
4380 uint8_t const bCmd = pCmd->n.u4Opcode;
4381 switch (bCmd)
4382 {
4383 case IOMMU_CMD_COMPLETION_WAIT:
4384 {
4385 STAM_COUNTER_INC(&pThis->StatCmdCompWait);
4386
4387 PCCMD_COMWAIT_T pCmdComWait = (PCCMD_COMWAIT_T)pCmd;
4388 AssertCompile(sizeof(*pCmdComWait) == sizeof(*pCmd));
4389
4390 /* Validate reserved bits in the command. */
4391 if (!(pCmdComWait->au64[0] & ~IOMMU_CMD_COM_WAIT_QWORD_0_VALID_MASK))
4392 {
4393 /* If Completion Store is requested, write the StoreData to the specified address. */
4394 if (pCmdComWait->n.u1Store)
4395 {
4396 RTGCPHYS const GCPhysStore = RT_MAKE_U64(pCmdComWait->n.u29StoreAddrLo << 3, pCmdComWait->n.u20StoreAddrHi);
4397 uint64_t const u64Data = pCmdComWait->n.u64StoreData;
4398 int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysStore, &u64Data, sizeof(u64Data));
4399 if (RT_FAILURE(rc))
4400 {
4401 LogFunc(("Cmd(%#x): Failed to write StoreData (%#RX64) to %#RGp, rc=%Rrc\n", bCmd, u64Data,
4402 GCPhysStore, rc));
4403 iommuAmdCmdHwErrorEventInit(GCPhysStore, (PEVT_CMD_HW_ERR_T)pEvtError);
4404 return VERR_IOMMU_CMD_HW_ERROR;
4405 }
4406 }
4407
4408 /* If the command requests an interrupt and completion wait interrupts are enabled, raise it. */
4409 if (pCmdComWait->n.u1Interrupt)
4410 {
4411 IOMMU_LOCK(pDevIns, pThisCC);
4412 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_COMPLETION_WAIT_INTR);
4413 bool const fRaiseInt = pThis->Ctrl.n.u1CompWaitIntrEn;
4414 IOMMU_UNLOCK(pDevIns, pThisCC);
4415
4416 if (fRaiseInt)
4417 iommuAmdMsiInterruptRaise(pDevIns);
4418 }
4419 return VINF_SUCCESS;
4420 }
4421 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4422 return VERR_IOMMU_CMD_INVALID_FORMAT;
4423 }
4424
4425 case IOMMU_CMD_INV_DEV_TAB_ENTRY:
4426 {
4427 STAM_COUNTER_INC(&pThis->StatCmdInvDte);
4428#ifdef IOMMU_WITH_IOTLBE_CACHE
4429 PCCMD_INV_DTE_T pCmdInvDte = (PCCMD_INV_DTE_T)pCmd;
4430 AssertCompile(sizeof(*pCmdInvDte) == sizeof(*pCmd));
4431
4432 /* Validate reserved bits in the command. */
4433 if ( !(pCmdInvDte->au64[0] & ~IOMMU_CMD_INV_DTE_QWORD_0_VALID_MASK)
4434 && !(pCmdInvDte->au64[1] & ~IOMMU_CMD_INV_DTE_QWORD_1_VALID_MASK))
4435 {
4436 iommuAmdDteCacheUpdate(pDevIns, pCmdInvDte->n.u16DevId, NULL /* pDte */, 0 /* fFlags */);
4437 return VINF_SUCCESS;
4438 }
4439 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4440 return VERR_IOMMU_CMD_INVALID_FORMAT;
4441#else
4442 return VINF_SUCCESS;
4443#endif
4444 }
4445
4446 case IOMMU_CMD_INV_IOMMU_PAGES:
4447 {
4448 STAM_COUNTER_INC(&pThis->StatCmdInvIommuPages);
4449#ifdef IOMMU_WITH_IOTLBE_CACHE
4450 PCCMD_INV_IOMMU_PAGES_T pCmdInvPages = (PCCMD_INV_IOMMU_PAGES_T)pCmd;
4451 AssertCompile(sizeof(*pCmdInvPages) == sizeof(*pCmd));
4452
4453 /* Validate reserved bits in the command. */
4454 if ( !(pCmdInvPages->au64[0] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_0_VALID_MASK)
4455 && !(pCmdInvPages->au64[1] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_1_VALID_MASK))
4456 {
4457 uint64_t const uIova = RT_MAKE_U64(pCmdInvPages->n.u20AddrLo << X86_PAGE_4K_SHIFT, pCmdInvPages->n.u32AddrHi);
4458 uint16_t const uDomainId = pCmdInvPages->n.u16DomainId;
4459 bool const fFlushPde = pCmdInvPages->n.u1PageDirEntries;
4460 uint8_t cShift;
4461 if (!pCmdInvPages->n.u1Size)
4462 cShift = X86_PAGE_4K_SHIFT;
4463 else
4464 {
4465 /* Find the first clear bit starting from bit 12 to 64 of the I/O virtual address. */
4466 unsigned const uFirstZeroBit = ASMBitLastSetU64(~(uIova >> X86_PAGE_4K_SHIFT));
4467 cShift = X86_PAGE_4K_SHIFT + uFirstZeroBit;
4468
4469 /*
4470 * For the address 0x7ffffffffffff000, cShift would be 76 (12+64) and the code below
4471 * would do the right thing by clearing the entire cache for the specified domain ID.
4472 *
4473 * However, for the address 0xfffffffffffff000, cShift would be computed as 12.
4474 * IOMMU behavior is undefined in this case, so it's safe to invalidate just one page.
4475 * A debug-time assert is in place here to let us know if any software tries this.
4476 *
4477 * See AMD IOMMU spec. 2.4.3 "INVALIDATE_IOMMU_PAGES".
4478 * See AMD IOMMU spec. Table 14: "Example Page Size Encodings".
4479 */
4480 Assert(uIova != UINT64_C(0xfffffffffffff000));
4481 }
4482
4483 /*
4484 * Validate invalidation size.
4485 * See AMD IOMMU spec. 2.2.3 "I/O Page Tables for Host Translations".
4486 */
4487 if ( cShift == 12 /* 4K */ || cShift == 13 /* 8K */
4488 || cShift == 14 /* 16K */ || cShift == 20 /* 1M */
4489 || cShift == 22 /* 4M */ || cShift == 32 /* 4G */)
4490 {
4491 /* Remove the range of I/O virtual addresses requesting to be invalidated. */
4492 size_t const cbAccess = RT_BIT_64(cShift);
4493 iommuAmdIotlbRemoveRange(pDevIns, uDomainId, uIova, cbAccess);
4494 }
4495 else
4496 {
4497 /*
4498 * The guest provided size is invalid or exceeds the largest, meaningful page size.
4499 * In such situations we must remove all ranges for the specified domain ID.
4500 */
4501 iommuAmdIotlbRemoveDomainId(pDevIns, uDomainId);
4502 }
4503
4504 return VINF_SUCCESS;
4505 }
4506 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4507 return VERR_IOMMU_CMD_INVALID_FORMAT;
4508#else
4509 return VINF_SUCCESS;
4510#endif
4511 }
4512
4513 case IOMMU_CMD_INV_IOTLB_PAGES:
4514 {
4515 STAM_COUNTER_INC(&pThis->StatCmdInvIotlbPages);
4516
4517 uint32_t const uCapHdr = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_CAP_HDR);
4518 if (RT_BF_GET(uCapHdr, IOMMU_BF_CAPHDR_IOTLB_SUP))
4519 {
4520 /** @todo IOMMU: Implement remote IOTLB invalidation. */
4521 return VERR_NOT_IMPLEMENTED;
4522 }
4523 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4524 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4525 }
4526
4527 case IOMMU_CMD_INV_INTR_TABLE:
4528 {
4529 /** @todo IOMMU: Implement this once we implement IOTLB. Pretend success until
4530 * then. */
4531 STAM_COUNTER_INC(&pThis->StatCmdInvIntrTable);
4532 return VINF_SUCCESS;
4533 }
4534
4535 case IOMMU_CMD_PREFETCH_IOMMU_PAGES:
4536 {
4537 /* Linux doesn't use prefetching of IOMMU pages, so we don't bother for now. */
4538 STAM_COUNTER_INC(&pThis->StatCmdPrefIommuPages);
4539 Assert(!pThis->ExtFeat.n.u1PrefetchSup);
4540 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4541 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4542 }
4543
4544 case IOMMU_CMD_COMPLETE_PPR_REQ:
4545 {
4546 STAM_COUNTER_INC(&pThis->StatCmdCompletePprReq);
4547
4548 /* We don't support PPR requests yet. */
4549 Assert(!pThis->ExtFeat.n.u1PprSup);
4550 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4551 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4552 }
4553
4554 case IOMMU_CMD_INV_IOMMU_ALL:
4555 {
4556 STAM_COUNTER_INC(&pThis->StatCmdInvIommuAll);
4557 if (pThis->ExtFeat.n.u1InvAllSup)
4558 {
4559#ifdef IOMMU_WITH_IOTLBE_CACHE
4560 PCCMD_INV_IOMMU_ALL_T pCmdInvAll = (PCCMD_INV_IOMMU_ALL_T)pCmd;
4561 AssertCompile(sizeof(*pCmdInvAll) == sizeof(*pCmd));
4562
4563 /* Validate reserved bits in the command. */
4564 if ( !(pCmdInvAll->au64[0] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_0_VALID_MASK)
4565 && !(pCmdInvAll->au64[1] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_1_VALID_MASK))
4566 {
4567 iommuAmdDteCacheRemoveAll(pDevIns);
4568 iommuAmdIotlbRemoveAll(pDevIns);
4569 return VINF_SUCCESS;
4570 }
4571 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4572 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4573#else
4574 return VINF_SUCCESS;
4575#endif
4576 }
4577 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4578 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4579 }
4580 }
4581
4582 STAM_COUNTER_DEC(&pThis->StatCmd);
4583 LogFunc(("Cmd(%#x): Unrecognized\n", bCmd));
4584 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4585 return VERR_IOMMU_CMD_NOT_SUPPORTED;
4586}
4587
4588
4589/**
4590 * The IOMMU command thread.
4591 *
4592 * @returns VBox status code.
4593 * @param pDevIns The IOMMU device instance.
4594 * @param pThread The command thread.
4595 */
4596static DECLCALLBACK(int) iommuAmdR3CmdThread(PPDMDEVINS pDevIns, PPDMTHREAD pThread)
4597{
4598 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4599 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
4600
4601 if (pThread->enmState == PDMTHREADSTATE_INITIALIZING)
4602 return VINF_SUCCESS;
4603
4604 /*
4605 * Pre-allocate the maximum command buffer size supported by the IOMMU.
4606 * This avoid trashing the heap as well as not wasting time allocating
4607 * and freeing buffers while processing commands.
4608 */
4609 size_t const cbMaxCmdBuf = sizeof(CMD_GENERIC_T) * iommuAmdGetBufMaxEntries(15);
4610 void *pvCmds = RTMemAllocZ(cbMaxCmdBuf);
4611 AssertPtrReturn(pvCmds, VERR_NO_MEMORY);
4612
4613 while (pThread->enmState == PDMTHREADSTATE_RUNNING)
4614 {
4615 /*
4616 * Sleep perpetually until we are woken up to process commands.
4617 */
4618 {
4619 ASMAtomicWriteBool(&pThis->fCmdThreadSleeping, true);
4620 bool fSignaled = ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, false);
4621 if (!fSignaled)
4622 {
4623 Assert(ASMAtomicReadBool(&pThis->fCmdThreadSleeping));
4624 int rc = PDMDevHlpSUPSemEventWaitNoResume(pDevIns, pThis->hEvtCmdThread, RT_INDEFINITE_WAIT);
4625 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_INTERRUPTED, ("%Rrc\n", rc), rc);
4626 if (RT_UNLIKELY(pThread->enmState != PDMTHREADSTATE_RUNNING))
4627 break;
4628 Log4Func(("Woken up with rc=%Rrc\n", rc));
4629 ASMAtomicWriteBool(&pThis->fCmdThreadSignaled, false);
4630 }
4631 ASMAtomicWriteBool(&pThis->fCmdThreadSleeping, false);
4632 }
4633
4634 /*
4635 * Fetch and process IOMMU commands.
4636 */
4637 /** @todo r=ramshankar: We currently copy all commands from guest memory into a
4638 * temporary host buffer before processing them as a batch. If we want to
4639 * save on host memory a bit, we could (once PGM has the necessary APIs)
4640 * lock the page mappings page mappings and access them directly. */
4641 IOMMU_LOCK(pDevIns, pThisCC);
4642
4643 if (pThis->Status.n.u1CmdBufRunning)
4644 {
4645 /* Get the offsets we need to read commands from memory (circular buffer offset). */
4646 uint32_t const cbCmdBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
4647 uint32_t const offTail = pThis->CmdBufTailPtr.n.off;
4648 uint32_t offHead = pThis->CmdBufHeadPtr.n.off;
4649
4650 /* Validate. */
4651 Assert(!(offHead & ~IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK));
4652 Assert(offHead < cbCmdBuf);
4653 Assert(cbCmdBuf <= cbMaxCmdBuf);
4654
4655 if (offHead != offTail)
4656 {
4657 /* Read the entire command buffer from memory (avoids multiple PGM calls). */
4658 RTGCPHYS const GCPhysCmdBufBase = pThis->CmdBufBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT;
4659
4660 IOMMU_UNLOCK(pDevIns, pThisCC);
4661 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysCmdBufBase, pvCmds, cbCmdBuf);
4662 IOMMU_LOCK(pDevIns, pThisCC);
4663
4664 if (RT_SUCCESS(rc))
4665 {
4666 /* Indicate to software we've fetched all commands from the buffer. */
4667 pThis->CmdBufHeadPtr.n.off = offTail;
4668
4669 /* Allow IOMMU to do other work while we process commands. */
4670 IOMMU_UNLOCK(pDevIns, pThisCC);
4671
4672 /* Process the fetched commands. */
4673 EVT_GENERIC_T EvtError;
4674 do
4675 {
4676 PCCMD_GENERIC_T pCmd = (PCCMD_GENERIC_T)((uintptr_t)pvCmds + offHead);
4677 rc = iommuAmdR3CmdProcess(pDevIns, pCmd, GCPhysCmdBufBase + offHead, &EvtError);
4678 if (RT_FAILURE(rc))
4679 {
4680 if ( rc == VERR_IOMMU_CMD_NOT_SUPPORTED
4681 || rc == VERR_IOMMU_CMD_INVALID_FORMAT)
4682 {
4683 Assert(EvtError.n.u4EvtCode == IOMMU_EVT_ILLEGAL_CMD_ERROR);
4684 iommuAmdIllegalCmdEventRaise(pDevIns, (PCEVT_ILLEGAL_CMD_ERR_T)&EvtError);
4685 }
4686 else if (rc == VERR_IOMMU_CMD_HW_ERROR)
4687 {
4688 Assert(EvtError.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR);
4689 LogFunc(("Raising command hardware error. Cmd=%#x -> COMMAND_HW_ERROR\n", pCmd->n.u4Opcode));
4690 iommuAmdCmdHwErrorEventRaise(pDevIns, (PCEVT_CMD_HW_ERR_T)&EvtError);
4691 }
4692 break;
4693 }
4694
4695 /* Move to the next command in the circular buffer. */
4696 offHead = (offHead + sizeof(CMD_GENERIC_T)) % cbCmdBuf;
4697 } while (offHead != offTail);
4698 }
4699 else
4700 {
4701 LogFunc(("Failed to read command at %#RGp. rc=%Rrc -> COMMAND_HW_ERROR\n", GCPhysCmdBufBase, rc));
4702 EVT_CMD_HW_ERR_T EvtCmdHwErr;
4703 iommuAmdCmdHwErrorEventInit(GCPhysCmdBufBase, &EvtCmdHwErr);
4704 iommuAmdCmdHwErrorEventRaise(pDevIns, &EvtCmdHwErr);
4705
4706 IOMMU_UNLOCK(pDevIns, pThisCC);
4707 }
4708 }
4709 else
4710 IOMMU_UNLOCK(pDevIns, pThisCC);
4711 }
4712 else
4713 IOMMU_UNLOCK(pDevIns, pThisCC);
4714 }
4715
4716 RTMemFree(pvCmds);
4717 LogFlowFunc(("Command thread terminating\n"));
4718 return VINF_SUCCESS;
4719}
4720
4721
4722/**
4723 * Wakes up the command thread so it can respond to a state change.
4724 *
4725 * @returns VBox status code.
4726 * @param pDevIns The IOMMU device instance.
4727 * @param pThread The command thread.
4728 */
4729static DECLCALLBACK(int) iommuAmdR3CmdThreadWakeUp(PPDMDEVINS pDevIns, PPDMTHREAD pThread)
4730{
4731 RT_NOREF(pThread);
4732 LogFlowFunc(("\n"));
4733 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4734 return PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread);
4735}
4736
4737
4738/**
4739 * @callback_method_impl{FNPCICONFIGREAD}
4740 */
4741static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress,
4742 unsigned cb, uint32_t *pu32Value)
4743{
4744 /** @todo IOMMU: PCI config read stat counter. */
4745 VBOXSTRICTRC rcStrict = PDMDevHlpPCIConfigRead(pDevIns, pPciDev, uAddress, cb, pu32Value);
4746 Log3Func(("uAddress=%#x (cb=%u) -> %#x. rc=%Rrc\n", uAddress, cb, *pu32Value, VBOXSTRICTRC_VAL(rcStrict)));
4747 return rcStrict;
4748}
4749
4750
4751/**
4752 * @callback_method_impl{FNPCICONFIGWRITE}
4753 */
4754static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress,
4755 unsigned cb, uint32_t u32Value)
4756{
4757 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4758
4759 /*
4760 * Discard writes to read-only registers that are specific to the IOMMU.
4761 * Other common PCI registers are handled by the generic code, see devpciR3IsConfigByteWritable().
4762 * See PCI spec. 6.1. "Configuration Space Organization".
4763 */
4764 switch (uAddress)
4765 {
4766 case IOMMU_PCI_OFF_CAP_HDR: /* All bits are read-only. */
4767 case IOMMU_PCI_OFF_RANGE_REG: /* We don't have any devices integrated with the IOMMU. */
4768 case IOMMU_PCI_OFF_MISCINFO_REG_0: /* We don't support MSI-X. */
4769 case IOMMU_PCI_OFF_MISCINFO_REG_1: /* We don't support guest-address translation. */
4770 {
4771 LogFunc(("PCI config write (%#RX32) to read-only register %#x -> Ignored\n", u32Value, uAddress));
4772 return VINF_SUCCESS;
4773 }
4774 }
4775
4776 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
4777 IOMMU_LOCK(pDevIns, pThisCC);
4778
4779 VBOXSTRICTRC rcStrict = VERR_IOMMU_IPE_3;
4780 switch (uAddress)
4781 {
4782 case IOMMU_PCI_OFF_BASE_ADDR_REG_LO:
4783 {
4784 if (pThis->IommuBar.n.u1Enable)
4785 {
4786 rcStrict = VINF_SUCCESS;
4787 LogFunc(("Writing Base Address (Lo) when it's already enabled -> Ignored\n"));
4788 break;
4789 }
4790
4791 pThis->IommuBar.au32[0] = u32Value & IOMMU_BAR_VALID_MASK;
4792 if (pThis->IommuBar.n.u1Enable)
4793 {
4794 Assert(pThis->hMmio != NIL_IOMMMIOHANDLE); /* Paranoia. Ensure we have a valid IOM MMIO handle. */
4795 Assert(!pThis->ExtFeat.n.u1PerfCounterSup); /* Base is 16K aligned when performance counters aren't supported. */
4796 RTGCPHYS const GCPhysMmioBase = RT_MAKE_U64(pThis->IommuBar.au32[0] & 0xffffc000, pThis->IommuBar.au32[1]);
4797 RTGCPHYS const GCPhysMmioBasePrev = PDMDevHlpMmioGetMappingAddress(pDevIns, pThis->hMmio);
4798
4799 /* If the MMIO region is already mapped at the specified address, we're done. */
4800 Assert(GCPhysMmioBase != NIL_RTGCPHYS);
4801 if (GCPhysMmioBasePrev == GCPhysMmioBase)
4802 {
4803 rcStrict = VINF_SUCCESS;
4804 break;
4805 }
4806
4807 /* Unmap the previous MMIO region (which is at a different address). */
4808 if (GCPhysMmioBasePrev != NIL_RTGCPHYS)
4809 {
4810 LogFlowFunc(("Unmapping previous MMIO region at %#RGp\n", GCPhysMmioBasePrev));
4811 rcStrict = PDMDevHlpMmioUnmap(pDevIns, pThis->hMmio);
4812 if (RT_FAILURE(rcStrict))
4813 {
4814 LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
4815 break;
4816 }
4817 }
4818
4819 /* Map the newly specified MMIO region. */
4820 LogFlowFunc(("Mapping MMIO region at %#RGp\n", GCPhysMmioBase));
4821 rcStrict = PDMDevHlpMmioMap(pDevIns, pThis->hMmio, GCPhysMmioBase);
4822 if (RT_FAILURE(rcStrict))
4823 {
4824 LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
4825 break;
4826 }
4827 }
4828 else
4829 rcStrict = VINF_SUCCESS;
4830 break;
4831 }
4832
4833 case IOMMU_PCI_OFF_BASE_ADDR_REG_HI:
4834 {
4835 if (!pThis->IommuBar.n.u1Enable)
4836 pThis->IommuBar.au32[1] = u32Value;
4837 else
4838 {
4839 rcStrict = VINF_SUCCESS;
4840 LogFunc(("Writing Base Address (Hi) when it's already enabled -> Ignored\n"));
4841 }
4842 break;
4843 }
4844
4845 case IOMMU_PCI_OFF_MSI_CAP_HDR:
4846 {
4847 u32Value |= RT_BIT(23); /* 64-bit MSI addressess must always be enabled for IOMMU. */
4848 RT_FALL_THRU();
4849 }
4850 default:
4851 {
4852 rcStrict = PDMDevHlpPCIConfigWrite(pDevIns, pPciDev, uAddress, cb, u32Value);
4853 break;
4854 }
4855 }
4856
4857 IOMMU_UNLOCK(pDevIns, pThisCC);
4858
4859 Log3Func(("uAddress=%#x (cb=%u) with %#x. rc=%Rrc\n", uAddress, cb, u32Value, VBOXSTRICTRC_VAL(rcStrict)));
4860 return rcStrict;
4861}
4862
4863
4864/**
4865 * @callback_method_impl{FNDBGFHANDLERDEV}
4866 */
4867static DECLCALLBACK(void) iommuAmdR3DbgInfo(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
4868{
4869 PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4870 PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
4871 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
4872
4873 bool fVerbose;
4874 if ( pszArgs
4875 && !strncmp(pszArgs, RT_STR_TUPLE("verbose")))
4876 fVerbose = true;
4877 else
4878 fVerbose = false;
4879
4880 pHlp->pfnPrintf(pHlp, "AMD-IOMMU:\n");
4881 /* Device Table Base Addresses (all segments). */
4882 for (unsigned i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
4883 {
4884 DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
4885 pHlp->pfnPrintf(pHlp, " Device Table BAR %u = %#RX64\n", i, DevTabBar.u64);
4886 if (fVerbose)
4887 {
4888 pHlp->pfnPrintf(pHlp, " Size = %#x (%u bytes)\n", DevTabBar.n.u9Size,
4889 IOMMU_GET_DEV_TAB_LEN(&DevTabBar));
4890 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT);
4891 }
4892 }
4893 /* Command Buffer Base Address Register. */
4894 {
4895 CMD_BUF_BAR_T const CmdBufBar = pThis->CmdBufBaseAddr;
4896 uint8_t const uEncodedLen = CmdBufBar.n.u4Len;
4897 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
4898 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
4899 pHlp->pfnPrintf(pHlp, " Command Buffer BAR = %#RX64\n", CmdBufBar.u64);
4900 if (fVerbose)
4901 {
4902 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", CmdBufBar.n.u40Base << X86_PAGE_4K_SHIFT);
4903 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
4904 cEntries, cbBuffer);
4905 }
4906 }
4907 /* Event Log Base Address Register. */
4908 {
4909 EVT_LOG_BAR_T const EvtLogBar = pThis->EvtLogBaseAddr;
4910 uint8_t const uEncodedLen = EvtLogBar.n.u4Len;
4911 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
4912 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
4913 pHlp->pfnPrintf(pHlp, " Event Log BAR = %#RX64\n", EvtLogBar.u64);
4914 if (fVerbose)
4915 {
4916 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", EvtLogBar.n.u40Base << X86_PAGE_4K_SHIFT);
4917 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
4918 cEntries, cbBuffer);
4919 }
4920 }
4921 /* IOMMU Control Register. */
4922 {
4923 IOMMU_CTRL_T const Ctrl = pThis->Ctrl;
4924 pHlp->pfnPrintf(pHlp, " Control = %#RX64\n", Ctrl.u64);
4925 if (fVerbose)
4926 {
4927 pHlp->pfnPrintf(pHlp, " IOMMU enable = %RTbool\n", Ctrl.n.u1IommuEn);
4928 pHlp->pfnPrintf(pHlp, " HT Tunnel translation enable = %RTbool\n", Ctrl.n.u1HtTunEn);
4929 pHlp->pfnPrintf(pHlp, " Event log enable = %RTbool\n", Ctrl.n.u1EvtLogEn);
4930 pHlp->pfnPrintf(pHlp, " Event log interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn);
4931 pHlp->pfnPrintf(pHlp, " Completion wait interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn);
4932 pHlp->pfnPrintf(pHlp, " Invalidation timeout = %u\n", Ctrl.n.u3InvTimeOut);
4933 pHlp->pfnPrintf(pHlp, " Pass posted write = %RTbool\n", Ctrl.n.u1PassPW);
4934 pHlp->pfnPrintf(pHlp, " Respose Pass posted write = %RTbool\n", Ctrl.n.u1ResPassPW);
4935 pHlp->pfnPrintf(pHlp, " Coherent = %RTbool\n", Ctrl.n.u1Coherent);
4936 pHlp->pfnPrintf(pHlp, " Isochronous = %RTbool\n", Ctrl.n.u1Isoc);
4937 pHlp->pfnPrintf(pHlp, " Command buffer enable = %RTbool\n", Ctrl.n.u1CmdBufEn);
4938 pHlp->pfnPrintf(pHlp, " PPR log enable = %RTbool\n", Ctrl.n.u1PprLogEn);
4939 pHlp->pfnPrintf(pHlp, " PPR interrupt enable = %RTbool\n", Ctrl.n.u1PprIntrEn);
4940 pHlp->pfnPrintf(pHlp, " PPR enable = %RTbool\n", Ctrl.n.u1PprEn);
4941 pHlp->pfnPrintf(pHlp, " Guest translation eanble = %RTbool\n", Ctrl.n.u1GstTranslateEn);
4942 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC enable = %RTbool\n", Ctrl.n.u1GstVirtApicEn);
4943 pHlp->pfnPrintf(pHlp, " CRW = %#x\n", Ctrl.n.u4Crw);
4944 pHlp->pfnPrintf(pHlp, " SMI filter enable = %RTbool\n", Ctrl.n.u1SmiFilterEn);
4945 pHlp->pfnPrintf(pHlp, " Self-writeback disable = %RTbool\n", Ctrl.n.u1SelfWriteBackDis);
4946 pHlp->pfnPrintf(pHlp, " SMI filter log enable = %RTbool\n", Ctrl.n.u1SmiFilterLogEn);
4947 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC mode enable = %#x\n", Ctrl.n.u3GstVirtApicModeEn);
4948 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC GA log enable = %RTbool\n", Ctrl.n.u1GstLogEn);
4949 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC interrupt enable = %RTbool\n", Ctrl.n.u1GstIntrEn);
4950 pHlp->pfnPrintf(pHlp, " Dual PPR log enable = %#x\n", Ctrl.n.u2DualPprLogEn);
4951 pHlp->pfnPrintf(pHlp, " Dual event log enable = %#x\n", Ctrl.n.u2DualEvtLogEn);
4952 pHlp->pfnPrintf(pHlp, " Device table segmentation enable = %#x\n", Ctrl.n.u3DevTabSegEn);
4953 pHlp->pfnPrintf(pHlp, " Privilege abort enable = %#x\n", Ctrl.n.u2PrivAbortEn);
4954 pHlp->pfnPrintf(pHlp, " PPR auto response enable = %RTbool\n", Ctrl.n.u1PprAutoRespEn);
4955 pHlp->pfnPrintf(pHlp, " MARC enable = %RTbool\n", Ctrl.n.u1MarcEn);
4956 pHlp->pfnPrintf(pHlp, " Block StopMark enable = %RTbool\n", Ctrl.n.u1BlockStopMarkEn);
4957 pHlp->pfnPrintf(pHlp, " PPR auto response always-on enable = %RTbool\n", Ctrl.n.u1PprAutoRespAlwaysOnEn);
4958 pHlp->pfnPrintf(pHlp, " Domain IDPNE = %RTbool\n", Ctrl.n.u1DomainIDPNE);
4959 pHlp->pfnPrintf(pHlp, " Enhanced PPR handling = %RTbool\n", Ctrl.n.u1EnhancedPpr);
4960 pHlp->pfnPrintf(pHlp, " Host page table access/dirty bit update = %#x\n", Ctrl.n.u2HstAccDirtyBitUpdate);
4961 pHlp->pfnPrintf(pHlp, " Guest page table dirty bit disable = %RTbool\n", Ctrl.n.u1GstDirtyUpdateDis);
4962 pHlp->pfnPrintf(pHlp, " x2APIC enable = %RTbool\n", Ctrl.n.u1X2ApicEn);
4963 pHlp->pfnPrintf(pHlp, " x2APIC interrupt enable = %RTbool\n", Ctrl.n.u1X2ApicIntrGenEn);
4964 pHlp->pfnPrintf(pHlp, " Guest page table access bit update = %RTbool\n", Ctrl.n.u1GstAccessUpdateDis);
4965 }
4966 }
4967 /* Exclusion Base Address Register. */
4968 {
4969 IOMMU_EXCL_RANGE_BAR_T const ExclRangeBar = pThis->ExclRangeBaseAddr;
4970 pHlp->pfnPrintf(pHlp, " Exclusion BAR = %#RX64\n", ExclRangeBar.u64);
4971 if (fVerbose)
4972 {
4973 pHlp->pfnPrintf(pHlp, " Exclusion enable = %RTbool\n", ExclRangeBar.n.u1ExclEnable);
4974 pHlp->pfnPrintf(pHlp, " Allow all devices = %RTbool\n", ExclRangeBar.n.u1AllowAll);
4975 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
4976 ExclRangeBar.n.u40ExclRangeBase << X86_PAGE_4K_SHIFT);
4977 }
4978 }
4979 /* Exclusion Range Limit Register. */
4980 {
4981 IOMMU_EXCL_RANGE_LIMIT_T const ExclRangeLimit = pThis->ExclRangeLimit;
4982 pHlp->pfnPrintf(pHlp, " Exclusion Range Limit = %#RX64\n", ExclRangeLimit.u64);
4983 if (fVerbose)
4984 {
4985 pHlp->pfnPrintf(pHlp, " Range limit = %#RX64\n",
4986 (ExclRangeLimit.n.u40ExclRangeLimit << X86_PAGE_4K_SHIFT) | X86_PAGE_4K_OFFSET_MASK);
4987 }
4988 }
4989 /* Extended Feature Register. */
4990 {
4991 IOMMU_EXT_FEAT_T ExtFeat = pThis->ExtFeat;
4992 pHlp->pfnPrintf(pHlp, " Extended Feature Register = %#RX64\n", ExtFeat.u64);
4993 if (fVerbose)
4994 {
4995 pHlp->pfnPrintf(pHlp, " Prefetch support = %RTbool\n", ExtFeat.n.u1PrefetchSup);
4996 pHlp->pfnPrintf(pHlp, " PPR support = %RTbool\n", ExtFeat.n.u1PprSup);
4997 pHlp->pfnPrintf(pHlp, " x2APIC support = %RTbool\n", ExtFeat.n.u1X2ApicSup);
4998 pHlp->pfnPrintf(pHlp, " NX and privilege level support = %RTbool\n", ExtFeat.n.u1NoExecuteSup);
4999 pHlp->pfnPrintf(pHlp, " Guest translation support = %RTbool\n", ExtFeat.n.u1GstTranslateSup);
5000 pHlp->pfnPrintf(pHlp, " Invalidate-All command support = %RTbool\n", ExtFeat.n.u1InvAllSup);
5001 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC support = %RTbool\n", ExtFeat.n.u1GstVirtApicSup);
5002 pHlp->pfnPrintf(pHlp, " Hardware error register support = %RTbool\n", ExtFeat.n.u1HwErrorSup);
5003 pHlp->pfnPrintf(pHlp, " Performance counters support = %RTbool\n", ExtFeat.n.u1PerfCounterSup);
5004 pHlp->pfnPrintf(pHlp, " Host address translation size = %#x\n", ExtFeat.n.u2HostAddrTranslateSize);
5005 pHlp->pfnPrintf(pHlp, " Guest address translation size = %#x\n", ExtFeat.n.u2GstAddrTranslateSize);
5006 pHlp->pfnPrintf(pHlp, " Guest CR3 root table level support = %#x\n", ExtFeat.n.u2GstCr3RootTblLevel);
5007 pHlp->pfnPrintf(pHlp, " SMI filter register support = %#x\n", ExtFeat.n.u2SmiFilterSup);
5008 pHlp->pfnPrintf(pHlp, " SMI filter register count = %#x\n", ExtFeat.n.u3SmiFilterCount);
5009 pHlp->pfnPrintf(pHlp, " Guest virtual-APIC modes support = %#x\n", ExtFeat.n.u3GstVirtApicModeSup);
5010 pHlp->pfnPrintf(pHlp, " Dual PPR log support = %#x\n", ExtFeat.n.u2DualPprLogSup);
5011 pHlp->pfnPrintf(pHlp, " Dual event log support = %#x\n", ExtFeat.n.u2DualEvtLogSup);
5012 pHlp->pfnPrintf(pHlp, " Maximum PASID = %#x\n", ExtFeat.n.u5MaxPasidSup);
5013 pHlp->pfnPrintf(pHlp, " User/supervisor page protection support = %RTbool\n", ExtFeat.n.u1UserSupervisorSup);
5014 pHlp->pfnPrintf(pHlp, " Device table segments supported = %#x (%u)\n", ExtFeat.n.u2DevTabSegSup,
5015 g_acDevTabSegs[ExtFeat.n.u2DevTabSegSup]);
5016 pHlp->pfnPrintf(pHlp, " PPR log overflow early warning support = %RTbool\n", ExtFeat.n.u1PprLogOverflowWarn);
5017 pHlp->pfnPrintf(pHlp, " PPR auto response support = %RTbool\n", ExtFeat.n.u1PprAutoRespSup);
5018 pHlp->pfnPrintf(pHlp, " MARC support = %#x\n", ExtFeat.n.u2MarcSup);
5019 pHlp->pfnPrintf(pHlp, " Block StopMark message support = %RTbool\n", ExtFeat.n.u1BlockStopMarkSup);
5020 pHlp->pfnPrintf(pHlp, " Performance optimization support = %RTbool\n", ExtFeat.n.u1PerfOptSup);
5021 pHlp->pfnPrintf(pHlp, " MSI capability MMIO access support = %RTbool\n", ExtFeat.n.u1MsiCapMmioSup);
5022 pHlp->pfnPrintf(pHlp, " Guest I/O protection support = %RTbool\n", ExtFeat.n.u1GstIoSup);
5023 pHlp->pfnPrintf(pHlp, " Host access support = %RTbool\n", ExtFeat.n.u1HostAccessSup);
5024 pHlp->pfnPrintf(pHlp, " Enhanced PPR handling support = %RTbool\n", ExtFeat.n.u1EnhancedPprSup);
5025 pHlp->pfnPrintf(pHlp, " Attribute forward supported = %RTbool\n", ExtFeat.n.u1AttrForwardSup);
5026 pHlp->pfnPrintf(pHlp, " Host dirty support = %RTbool\n", ExtFeat.n.u1HostDirtySup);
5027 pHlp->pfnPrintf(pHlp, " Invalidate IOTLB type support = %RTbool\n", ExtFeat.n.u1InvIoTlbTypeSup);
5028 pHlp->pfnPrintf(pHlp, " Guest page table access bit hw disable = %RTbool\n", ExtFeat.n.u1GstUpdateDisSup);
5029 pHlp->pfnPrintf(pHlp, " Force physical dest for remapped intr. = %RTbool\n", ExtFeat.n.u1ForcePhysDstSup);
5030 }
5031 }
5032 /* PPR Log Base Address Register. */
5033 {
5034 PPR_LOG_BAR_T PprLogBar = pThis->PprLogBaseAddr;
5035 uint8_t const uEncodedLen = PprLogBar.n.u4Len;
5036 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5037 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5038 pHlp->pfnPrintf(pHlp, " PPR Log BAR = %#RX64\n", PprLogBar.u64);
5039 if (fVerbose)
5040 {
5041 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", PprLogBar.n.u40Base << X86_PAGE_4K_SHIFT);
5042 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5043 cEntries, cbBuffer);
5044 }
5045 }
5046 /* Hardware Event (Hi) Register. */
5047 {
5048 IOMMU_HW_EVT_HI_T HwEvtHi = pThis->HwEvtHi;
5049 pHlp->pfnPrintf(pHlp, " Hardware Event (Hi) = %#RX64\n", HwEvtHi.u64);
5050 if (fVerbose)
5051 {
5052 pHlp->pfnPrintf(pHlp, " First operand = %#RX64\n", HwEvtHi.n.u60FirstOperand);
5053 pHlp->pfnPrintf(pHlp, " Event code = %#RX8\n", HwEvtHi.n.u4EvtCode);
5054 }
5055 }
5056 /* Hardware Event (Lo) Register. */
5057 pHlp->pfnPrintf(pHlp, " Hardware Event (Lo) = %#RX64\n", pThis->HwEvtLo);
5058 /* Hardware Event Status. */
5059 {
5060 IOMMU_HW_EVT_STATUS_T HwEvtStatus = pThis->HwEvtStatus;
5061 pHlp->pfnPrintf(pHlp, " Hardware Event Status = %#RX64\n", HwEvtStatus.u64);
5062 if (fVerbose)
5063 {
5064 pHlp->pfnPrintf(pHlp, " Valid = %RTbool\n", HwEvtStatus.n.u1Valid);
5065 pHlp->pfnPrintf(pHlp, " Overflow = %RTbool\n", HwEvtStatus.n.u1Overflow);
5066 }
5067 }
5068 /* Guest Virtual-APIC Log Base Address Register. */
5069 {
5070 GALOG_BAR_T const GALogBar = pThis->GALogBaseAddr;
5071 uint8_t const uEncodedLen = GALogBar.n.u4Len;
5072 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5073 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5074 pHlp->pfnPrintf(pHlp, " Guest Log BAR = %#RX64\n", GALogBar.u64);
5075 if (fVerbose)
5076 {
5077 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", GALogBar.n.u40Base << X86_PAGE_4K_SHIFT);
5078 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5079 cEntries, cbBuffer);
5080 }
5081 }
5082 /* Guest Virtual-APIC Log Tail Address Register. */
5083 {
5084 GALOG_TAIL_ADDR_T GALogTail = pThis->GALogTailAddr;
5085 pHlp->pfnPrintf(pHlp, " Guest Log Tail Address = %#RX64\n", GALogTail.u64);
5086 if (fVerbose)
5087 pHlp->pfnPrintf(pHlp, " Tail address = %#RX64\n", GALogTail.n.u40GALogTailAddr);
5088 }
5089 /* PPR Log B Base Address Register. */
5090 {
5091 PPR_LOG_B_BAR_T PprLogBBar = pThis->PprLogBBaseAddr;
5092 uint8_t const uEncodedLen = PprLogBBar.n.u4Len;
5093 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5094 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5095 pHlp->pfnPrintf(pHlp, " PPR Log B BAR = %#RX64\n", PprLogBBar.u64);
5096 if (fVerbose)
5097 {
5098 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", PprLogBBar.n.u40Base << X86_PAGE_4K_SHIFT);
5099 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5100 cEntries, cbBuffer);
5101 }
5102 }
5103 /* Event Log B Base Address Register. */
5104 {
5105 EVT_LOG_B_BAR_T EvtLogBBar = pThis->EvtLogBBaseAddr;
5106 uint8_t const uEncodedLen = EvtLogBBar.n.u4Len;
5107 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5108 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5109 pHlp->pfnPrintf(pHlp, " Event Log B BAR = %#RX64\n", EvtLogBBar.u64);
5110 if (fVerbose)
5111 {
5112 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", EvtLogBBar.n.u40Base << X86_PAGE_4K_SHIFT);
5113 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5114 cEntries, cbBuffer);
5115 }
5116 }
5117 /* Device-Specific Feature Extension Register. */
5118 {
5119 DEV_SPECIFIC_FEAT_T const DevSpecificFeat = pThis->DevSpecificFeat;
5120 pHlp->pfnPrintf(pHlp, " Device-specific Feature = %#RX64\n", DevSpecificFeat.u64);
5121 if (fVerbose)
5122 {
5123 pHlp->pfnPrintf(pHlp, " Feature = %#RX32\n", DevSpecificFeat.n.u24DevSpecFeat);
5124 pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificFeat.n.u4RevMinor);
5125 pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificFeat.n.u4RevMajor);
5126 }
5127 }
5128 /* Device-Specific Control Extension Register. */
5129 {
5130 DEV_SPECIFIC_CTRL_T const DevSpecificCtrl = pThis->DevSpecificCtrl;
5131 pHlp->pfnPrintf(pHlp, " Device-specific Control = %#RX64\n", DevSpecificCtrl.u64);
5132 if (fVerbose)
5133 {
5134 pHlp->pfnPrintf(pHlp, " Control = %#RX32\n", DevSpecificCtrl.n.u24DevSpecCtrl);
5135 pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificCtrl.n.u4RevMinor);
5136 pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificCtrl.n.u4RevMajor);
5137 }
5138 }
5139 /* Device-Specific Status Extension Register. */
5140 {
5141 DEV_SPECIFIC_STATUS_T const DevSpecificStatus = pThis->DevSpecificStatus;
5142 pHlp->pfnPrintf(pHlp, " Device-specific Status = %#RX64\n", DevSpecificStatus.u64);
5143 if (fVerbose)
5144 {
5145 pHlp->pfnPrintf(pHlp, " Status = %#RX32\n", DevSpecificStatus.n.u24DevSpecStatus);
5146 pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificStatus.n.u4RevMinor);
5147 pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificStatus.n.u4RevMajor);
5148 }
5149 }
5150 /* Miscellaneous Information Register (Lo and Hi). */
5151 {
5152 MSI_MISC_INFO_T const MiscInfo = pThis->MiscInfo;
5153 pHlp->pfnPrintf(pHlp, " Misc. Info. Register = %#RX64\n", MiscInfo.u64);
5154 if (fVerbose)
5155 {
5156 pHlp->pfnPrintf(pHlp, " Event Log MSI number = %#x\n", MiscInfo.n.u5MsiNumEvtLog);
5157 pHlp->pfnPrintf(pHlp, " Guest Virtual-Address Size = %#x\n", MiscInfo.n.u3GstVirtAddrSize);
5158 pHlp->pfnPrintf(pHlp, " Physical Address Size = %#x\n", MiscInfo.n.u7PhysAddrSize);
5159 pHlp->pfnPrintf(pHlp, " Virtual-Address Size = %#x\n", MiscInfo.n.u7VirtAddrSize);
5160 pHlp->pfnPrintf(pHlp, " HT Transport ATS Range Reserved = %RTbool\n", MiscInfo.n.u1HtAtsResv);
5161 pHlp->pfnPrintf(pHlp, " PPR MSI number = %#x\n", MiscInfo.n.u5MsiNumPpr);
5162 pHlp->pfnPrintf(pHlp, " GA Log MSI number = %#x\n", MiscInfo.n.u5MsiNumGa);
5163 }
5164 }
5165 /* MSI Capability Header. */
5166 {
5167 MSI_CAP_HDR_T MsiCapHdr;
5168 MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
5169 pHlp->pfnPrintf(pHlp, " MSI Capability Header = %#RX32\n", MsiCapHdr.u32);
5170 if (fVerbose)
5171 {
5172 pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiCapHdr.n.u8MsiCapId);
5173 pHlp->pfnPrintf(pHlp, " Capability Ptr (PCI config offset) = %#x\n", MsiCapHdr.n.u8MsiCapPtr);
5174 pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", MsiCapHdr.n.u1MsiEnable);
5175 pHlp->pfnPrintf(pHlp, " Multi-message capability = %#x\n", MsiCapHdr.n.u3MsiMultiMessCap);
5176 pHlp->pfnPrintf(pHlp, " Multi-message enable = %#x\n", MsiCapHdr.n.u3MsiMultiMessEn);
5177 }
5178 }
5179 /* MSI Address Register (Lo and Hi). */
5180 {
5181 uint32_t const uMsiAddrLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO);
5182 uint32_t const uMsiAddrHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI);
5183 MSIADDR MsiAddr;
5184 MsiAddr.u64 = RT_MAKE_U64(uMsiAddrLo, uMsiAddrHi);
5185 pHlp->pfnPrintf(pHlp, " MSI Address = %#RX64\n", MsiAddr.u64);
5186 if (fVerbose)
5187 {
5188 pHlp->pfnPrintf(pHlp, " Destination mode = %#x\n", MsiAddr.n.u1DestMode);
5189 pHlp->pfnPrintf(pHlp, " Redirection hint = %#x\n", MsiAddr.n.u1RedirHint);
5190 pHlp->pfnPrintf(pHlp, " Destination Id = %#x\n", MsiAddr.n.u8DestId);
5191 pHlp->pfnPrintf(pHlp, " Address = %#RX32\n", MsiAddr.n.u12Addr);
5192 pHlp->pfnPrintf(pHlp, " Address (Hi) / Rsvd? = %#RX32\n", MsiAddr.n.u32Rsvd0);
5193 }
5194 }
5195 /* MSI Data. */
5196 {
5197 MSIDATA MsiData;
5198 MsiData.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA);
5199 pHlp->pfnPrintf(pHlp, " MSI Data = %#RX32\n", MsiData.u32);
5200 if (fVerbose)
5201 {
5202 pHlp->pfnPrintf(pHlp, " Vector = %#x (%u)\n", MsiData.n.u8Vector,
5203 MsiData.n.u8Vector);
5204 pHlp->pfnPrintf(pHlp, " Delivery mode = %#x\n", MsiData.n.u3DeliveryMode);
5205 pHlp->pfnPrintf(pHlp, " Level = %#x\n", MsiData.n.u1Level);
5206 pHlp->pfnPrintf(pHlp, " Trigger mode = %s\n", MsiData.n.u1TriggerMode ?
5207 "level" : "edge");
5208 }
5209 }
5210 /* MSI Mapping Capability Header (HyperTransport, reporting all 0s currently). */
5211 {
5212 MSI_MAP_CAP_HDR_T MsiMapCapHdr;
5213 MsiMapCapHdr.u32 = 0;
5214 pHlp->pfnPrintf(pHlp, " MSI Mapping Capability Header = %#RX32\n", MsiMapCapHdr.u32);
5215 if (fVerbose)
5216 {
5217 pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiMapCapHdr.n.u8MsiMapCapId);
5218 pHlp->pfnPrintf(pHlp, " Map enable = %RTbool\n", MsiMapCapHdr.n.u1MsiMapEn);
5219 pHlp->pfnPrintf(pHlp, " Map fixed = %RTbool\n", MsiMapCapHdr.n.u1MsiMapFixed);
5220 pHlp->pfnPrintf(pHlp, " Map capability type = %#x\n", MsiMapCapHdr.n.u5MapCapType);
5221 }
5222 }
5223 /* Performance Optimization Control Register. */
5224 {
5225 IOMMU_PERF_OPT_CTRL_T const PerfOptCtrl = pThis->PerfOptCtrl;
5226 pHlp->pfnPrintf(pHlp, " Performance Optimization Control = %#RX32\n", PerfOptCtrl.u32);
5227 if (fVerbose)
5228 pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PerfOptCtrl.n.u1PerfOptEn);
5229 }
5230 /* XT (x2APIC) General Interrupt Control Register. */
5231 {
5232 IOMMU_XT_GEN_INTR_CTRL_T const XtGenIntrCtrl = pThis->XtGenIntrCtrl;
5233 pHlp->pfnPrintf(pHlp, " XT General Interrupt Control = %#RX64\n", XtGenIntrCtrl.u64);
5234 if (fVerbose)
5235 {
5236 pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n",
5237 !XtGenIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical");
5238 pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n",
5239 RT_MAKE_U64(XtGenIntrCtrl.n.u24X2ApicIntrDstLo, XtGenIntrCtrl.n.u7X2ApicIntrDstHi));
5240 pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGenIntrCtrl.n.u8X2ApicIntrVector);
5241 pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n",
5242 !XtGenIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated");
5243 }
5244 }
5245 /* XT (x2APIC) PPR Interrupt Control Register. */
5246 {
5247 IOMMU_XT_PPR_INTR_CTRL_T const XtPprIntrCtrl = pThis->XtPprIntrCtrl;
5248 pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtPprIntrCtrl.u64);
5249 if (fVerbose)
5250 {
5251 pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n",
5252 !XtPprIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical");
5253 pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n",
5254 RT_MAKE_U64(XtPprIntrCtrl.n.u24X2ApicIntrDstLo, XtPprIntrCtrl.n.u7X2ApicIntrDstHi));
5255 pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtPprIntrCtrl.n.u8X2ApicIntrVector);
5256 pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n",
5257 !XtPprIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated");
5258 }
5259 }
5260 /* XT (X2APIC) GA Log Interrupt Control Register. */
5261 {
5262 IOMMU_XT_GALOG_INTR_CTRL_T const XtGALogIntrCtrl = pThis->XtGALogIntrCtrl;
5263 pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtGALogIntrCtrl.u64);
5264 if (fVerbose)
5265 {
5266 pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n",
5267 !XtGALogIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical");
5268 pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n",
5269 RT_MAKE_U64(XtGALogIntrCtrl.n.u24X2ApicIntrDstLo, XtGALogIntrCtrl.n.u7X2ApicIntrDstHi));
5270 pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGALogIntrCtrl.n.u8X2ApicIntrVector);
5271 pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n",
5272 !XtGALogIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated");
5273 }
5274 }
5275 /* MARC Registers. */
5276 {
5277 for (unsigned i = 0; i < RT_ELEMENTS(pThis->aMarcApers); i++)
5278 {
5279 pHlp->pfnPrintf(pHlp, " MARC Aperature %u:\n", i);
5280 MARC_APER_BAR_T const MarcAperBar = pThis->aMarcApers[i].Base;
5281 pHlp->pfnPrintf(pHlp, " Base = %#RX64\n", MarcAperBar.n.u40MarcBaseAddr << X86_PAGE_4K_SHIFT);
5282
5283 MARC_APER_RELOC_T const MarcAperReloc = pThis->aMarcApers[i].Reloc;
5284 pHlp->pfnPrintf(pHlp, " Reloc = %#RX64 (addr: %#RX64, read-only: %RTbool, enable: %RTbool)\n",
5285 MarcAperReloc.u64, MarcAperReloc.n.u40MarcRelocAddr << X86_PAGE_4K_SHIFT,
5286 MarcAperReloc.n.u1ReadOnly, MarcAperReloc.n.u1RelocEn);
5287
5288 MARC_APER_LEN_T const MarcAperLen = pThis->aMarcApers[i].Length;
5289 pHlp->pfnPrintf(pHlp, " Length = %u pages\n", MarcAperLen.n.u40MarcLength);
5290 }
5291 }
5292 /* Reserved Register. */
5293 pHlp->pfnPrintf(pHlp, " Reserved Register = %#RX64\n", pThis->RsvdReg);
5294 /* Command Buffer Head Pointer Register. */
5295 {
5296 CMD_BUF_HEAD_PTR_T const CmdBufHeadPtr = pThis->CmdBufHeadPtr;
5297 pHlp->pfnPrintf(pHlp, " Command Buffer Head Pointer = %#RX64 (off: %#x)\n", CmdBufHeadPtr.u64,
5298 CmdBufHeadPtr.n.off);
5299 }
5300 /* Command Buffer Tail Pointer Register. */
5301 {
5302 CMD_BUF_HEAD_PTR_T const CmdBufTailPtr = pThis->CmdBufTailPtr;
5303 pHlp->pfnPrintf(pHlp, " Command Buffer Tail Pointer = %#RX64 (off: %#x)\n", CmdBufTailPtr.u64,
5304 CmdBufTailPtr.n.off);
5305 }
5306 /* Event Log Head Pointer Register. */
5307 {
5308 EVT_LOG_HEAD_PTR_T const EvtLogHeadPtr = pThis->EvtLogHeadPtr;
5309 pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogHeadPtr.u64,
5310 EvtLogHeadPtr.n.off);
5311 }
5312 /* Event Log Tail Pointer Register. */
5313 {
5314 EVT_LOG_TAIL_PTR_T const EvtLogTailPtr = pThis->EvtLogTailPtr;
5315 pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogTailPtr.u64,
5316 EvtLogTailPtr.n.off);
5317 }
5318 /* Status Register. */
5319 {
5320 IOMMU_STATUS_T const Status = pThis->Status;
5321 pHlp->pfnPrintf(pHlp, " Status Register = %#RX64\n", Status.u64);
5322 if (fVerbose)
5323 {
5324 pHlp->pfnPrintf(pHlp, " Event log overflow = %RTbool\n", Status.n.u1EvtOverflow);
5325 pHlp->pfnPrintf(pHlp, " Event log interrupt = %RTbool\n", Status.n.u1EvtLogIntr);
5326 pHlp->pfnPrintf(pHlp, " Completion wait interrupt = %RTbool\n", Status.n.u1CompWaitIntr);
5327 pHlp->pfnPrintf(pHlp, " Event log running = %RTbool\n", Status.n.u1EvtLogRunning);
5328 pHlp->pfnPrintf(pHlp, " Command buffer running = %RTbool\n", Status.n.u1CmdBufRunning);
5329 pHlp->pfnPrintf(pHlp, " PPR overflow = %RTbool\n", Status.n.u1PprOverflow);
5330 pHlp->pfnPrintf(pHlp, " PPR interrupt = %RTbool\n", Status.n.u1PprIntr);
5331 pHlp->pfnPrintf(pHlp, " PPR log running = %RTbool\n", Status.n.u1PprLogRunning);
5332 pHlp->pfnPrintf(pHlp, " Guest log running = %RTbool\n", Status.n.u1GstLogRunning);
5333 pHlp->pfnPrintf(pHlp, " Guest log interrupt = %RTbool\n", Status.n.u1GstLogIntr);
5334 pHlp->pfnPrintf(pHlp, " PPR log B overflow = %RTbool\n", Status.n.u1PprOverflowB);
5335 pHlp->pfnPrintf(pHlp, " PPR log active = %RTbool\n", Status.n.u1PprLogActive);
5336 pHlp->pfnPrintf(pHlp, " Event log B overflow = %RTbool\n", Status.n.u1EvtOverflowB);
5337 pHlp->pfnPrintf(pHlp, " Event log active = %RTbool\n", Status.n.u1EvtLogActive);
5338 pHlp->pfnPrintf(pHlp, " PPR log B overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarlyB);
5339 pHlp->pfnPrintf(pHlp, " PPR log overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarly);
5340 }
5341 }
5342 /* PPR Log Head Pointer. */
5343 {
5344 PPR_LOG_HEAD_PTR_T const PprLogHeadPtr = pThis->PprLogHeadPtr;
5345 pHlp->pfnPrintf(pHlp, " PPR Log Head Pointer = %#RX64 (off: %#x)\n", PprLogHeadPtr.u64,
5346 PprLogHeadPtr.n.off);
5347 }
5348 /* PPR Log Tail Pointer. */
5349 {
5350 PPR_LOG_TAIL_PTR_T const PprLogTailPtr = pThis->PprLogTailPtr;
5351 pHlp->pfnPrintf(pHlp, " PPR Log Tail Pointer = %#RX64 (off: %#x)\n", PprLogTailPtr.u64,
5352 PprLogTailPtr.n.off);
5353 }
5354 /* Guest Virtual-APIC Log Head Pointer. */
5355 {
5356 GALOG_HEAD_PTR_T const GALogHeadPtr = pThis->GALogHeadPtr;
5357 pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Head Pointer = %#RX64 (off: %#x)\n", GALogHeadPtr.u64,
5358 GALogHeadPtr.n.u12GALogPtr);
5359 }
5360 /* Guest Virtual-APIC Log Tail Pointer. */
5361 {
5362 GALOG_HEAD_PTR_T const GALogTailPtr = pThis->GALogTailPtr;
5363 pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Tail Pointer = %#RX64 (off: %#x)\n", GALogTailPtr.u64,
5364 GALogTailPtr.n.u12GALogPtr);
5365 }
5366 /* PPR Log B Head Pointer. */
5367 {
5368 PPR_LOG_B_HEAD_PTR_T const PprLogBHeadPtr = pThis->PprLogBHeadPtr;
5369 pHlp->pfnPrintf(pHlp, " PPR Log B Head Pointer = %#RX64 (off: %#x)\n", PprLogBHeadPtr.u64,
5370 PprLogBHeadPtr.n.off);
5371 }
5372 /* PPR Log B Tail Pointer. */
5373 {
5374 PPR_LOG_B_TAIL_PTR_T const PprLogBTailPtr = pThis->PprLogBTailPtr;
5375 pHlp->pfnPrintf(pHlp, " PPR Log B Tail Pointer = %#RX64 (off: %#x)\n", PprLogBTailPtr.u64,
5376 PprLogBTailPtr.n.off);
5377 }
5378 /* Event Log B Head Pointer. */
5379 {
5380 EVT_LOG_B_HEAD_PTR_T const EvtLogBHeadPtr = pThis->EvtLogBHeadPtr;
5381 pHlp->pfnPrintf(pHlp, " Event Log B Head Pointer = %#RX64 (off: %#x)\n", EvtLogBHeadPtr.u64,
5382 EvtLogBHeadPtr.n.off);
5383 }
5384 /* Event Log B Tail Pointer. */
5385 {
5386 EVT_LOG_B_TAIL_PTR_T const EvtLogBTailPtr = pThis->EvtLogBTailPtr;
5387 pHlp->pfnPrintf(pHlp, " Event Log B Tail Pointer = %#RX64 (off: %#x)\n", EvtLogBTailPtr.u64,
5388 EvtLogBTailPtr.n.off);
5389 }
5390 /* PPR Log Auto Response Register. */
5391 {
5392 PPR_LOG_AUTO_RESP_T const PprLogAutoResp = pThis->PprLogAutoResp;
5393 pHlp->pfnPrintf(pHlp, " PPR Log Auto Response Register = %#RX64\n", PprLogAutoResp.u64);
5394 if (fVerbose)
5395 {
5396 pHlp->pfnPrintf(pHlp, " Code = %#x\n", PprLogAutoResp.n.u4AutoRespCode);
5397 pHlp->pfnPrintf(pHlp, " Mask Gen. = %RTbool\n", PprLogAutoResp.n.u1AutoRespMaskGen);
5398 }
5399 }
5400 /* PPR Log Overflow Early Warning Indicator Register. */
5401 {
5402 PPR_LOG_OVERFLOW_EARLY_T const PprLogOverflowEarly = pThis->PprLogOverflowEarly;
5403 pHlp->pfnPrintf(pHlp, " PPR Log overflow early warning = %#RX64\n", PprLogOverflowEarly.u64);
5404 if (fVerbose)
5405 {
5406 pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogOverflowEarly.n.u15Threshold);
5407 pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogOverflowEarly.n.u1IntrEn);
5408 pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogOverflowEarly.n.u1Enable);
5409 }
5410 }
5411 /* PPR Log Overflow Early Warning Indicator Register. */
5412 {
5413 PPR_LOG_OVERFLOW_EARLY_T const PprLogBOverflowEarly = pThis->PprLogBOverflowEarly;
5414 pHlp->pfnPrintf(pHlp, " PPR Log B overflow early warning = %#RX64\n", PprLogBOverflowEarly.u64);
5415 if (fVerbose)
5416 {
5417 pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogBOverflowEarly.n.u15Threshold);
5418 pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogBOverflowEarly.n.u1IntrEn);
5419 pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogBOverflowEarly.n.u1Enable);
5420 }
5421 }
5422}
5423
5424
5425/**
5426 * Dumps the DTE via the info callback helper.
5427 *
5428 * @param pHlp The info helper.
5429 * @param pDte The device table entry.
5430 * @param pszPrefix The string prefix.
5431 */
5432static void iommuAmdR3DbgInfoDteWorker(PCDBGFINFOHLP pHlp, PCDTE_T pDte, const char *pszPrefix)
5433{
5434 AssertReturnVoid(pHlp);
5435 AssertReturnVoid(pDte);
5436 AssertReturnVoid(pszPrefix);
5437
5438 pHlp->pfnPrintf(pHlp, "%sValid = %RTbool\n", pszPrefix, pDte->n.u1Valid);
5439 pHlp->pfnPrintf(pHlp, "%sTranslation Valid = %RTbool\n", pszPrefix, pDte->n.u1TranslationValid);
5440 pHlp->pfnPrintf(pHlp, "%sHost Access Dirty = %#x\n", pszPrefix, pDte->n.u2Had);
5441 pHlp->pfnPrintf(pHlp, "%sPaging Mode = %u\n", pszPrefix, pDte->n.u3Mode);
5442 pHlp->pfnPrintf(pHlp, "%sPage Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix, pDte->n.u40PageTableRootPtrLo,
5443 pDte->n.u40PageTableRootPtrLo << 12);
5444 pHlp->pfnPrintf(pHlp, "%sPPR enable = %RTbool\n", pszPrefix, pDte->n.u1Ppr);
5445 pHlp->pfnPrintf(pHlp, "%sGuest PPR Resp w/ PASID = %RTbool\n", pszPrefix, pDte->n.u1GstPprRespPasid);
5446 pHlp->pfnPrintf(pHlp, "%sGuest I/O Prot Valid = %RTbool\n", pszPrefix, pDte->n.u1GstIoValid);
5447 pHlp->pfnPrintf(pHlp, "%sGuest Translation Valid = %RTbool\n", pszPrefix, pDte->n.u1GstTranslateValid);
5448 pHlp->pfnPrintf(pHlp, "%sGuest Levels Translated = %#x\n", pszPrefix, pDte->n.u2GstMode);
5449 pHlp->pfnPrintf(pHlp, "%sGuest Root Page Table Ptr = %#x %#x %#x (addr=%#RGp)\n", pszPrefix,
5450 pDte->n.u3GstCr3TableRootPtrLo, pDte->n.u16GstCr3TableRootPtrMid, pDte->n.u21GstCr3TableRootPtrHi,
5451 (pDte->n.u21GstCr3TableRootPtrHi << 31)
5452 | (pDte->n.u16GstCr3TableRootPtrMid << 15)
5453 | (pDte->n.u3GstCr3TableRootPtrLo << 12));
5454 pHlp->pfnPrintf(pHlp, "%sI/O Read = %s\n", pszPrefix, pDte->n.u1IoRead ? "allowed" : "denied");
5455 pHlp->pfnPrintf(pHlp, "%sI/O Write = %s\n", pszPrefix, pDte->n.u1IoWrite ? "allowed" : "denied");
5456 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd0);
5457 pHlp->pfnPrintf(pHlp, "%sDomain ID = %u (%#x)\n", pszPrefix, pDte->n.u16DomainId, pDte->n.u16DomainId);
5458 pHlp->pfnPrintf(pHlp, "%sIOTLB Enable = %RTbool\n", pszPrefix, pDte->n.u1IoTlbEnable);
5459 pHlp->pfnPrintf(pHlp, "%sSuppress I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressPfEvents);
5460 pHlp->pfnPrintf(pHlp, "%sSuppress all I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressAllPfEvents);
5461 pHlp->pfnPrintf(pHlp, "%sPort I/O Control = %#x\n", pszPrefix, pDte->n.u2IoCtl);
5462 pHlp->pfnPrintf(pHlp, "%sIOTLB Cache Hint = %s\n", pszPrefix, pDte->n.u1Cache ? "no caching" : "cache");
5463 pHlp->pfnPrintf(pHlp, "%sSnoop Disable = %RTbool\n", pszPrefix, pDte->n.u1SnoopDisable);
5464 pHlp->pfnPrintf(pHlp, "%sAllow Exclusion = %RTbool\n", pszPrefix, pDte->n.u1AllowExclusion);
5465 pHlp->pfnPrintf(pHlp, "%sSysMgt Message Enable = %RTbool\n", pszPrefix, pDte->n.u2SysMgt);
5466 pHlp->pfnPrintf(pHlp, "%sInterrupt Map Valid = %RTbool\n", pszPrefix, pDte->n.u1IntrMapValid);
5467 uint8_t const uIntrTabLen = pDte->n.u4IntrTableLength;
5468 if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX)
5469 {
5470 uint16_t const cEntries = IOMMU_GET_INTR_TAB_ENTRIES(pDte);
5471 uint16_t const cbIntrTable = IOMMU_GET_INTR_TAB_LEN(pDte);
5472 pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (%u entries, %u bytes)\n", pszPrefix, uIntrTabLen, cEntries,
5473 cbIntrTable);
5474 }
5475 else
5476 pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (invalid!)\n", pszPrefix, uIntrTabLen);
5477 pHlp->pfnPrintf(pHlp, "%sIgnore Unmapped Interrupts = %RTbool\n", pszPrefix, pDte->n.u1IgnoreUnmappedIntrs);
5478 pHlp->pfnPrintf(pHlp, "%sInterrupt Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix,
5479 pDte->n.u46IntrTableRootPtr, pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK);
5480 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u4Rsvd0);
5481 pHlp->pfnPrintf(pHlp, "%sINIT passthru = %RTbool\n", pszPrefix, pDte->n.u1InitPassthru);
5482 pHlp->pfnPrintf(pHlp, "%sExtInt passthru = %RTbool\n", pszPrefix, pDte->n.u1ExtIntPassthru);
5483 pHlp->pfnPrintf(pHlp, "%sNMI passthru = %RTbool\n", pszPrefix, pDte->n.u1NmiPassthru);
5484 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd2);
5485 pHlp->pfnPrintf(pHlp, "%sInterrupt Control = %#x\n", pszPrefix, pDte->n.u2IntrCtrl);
5486 pHlp->pfnPrintf(pHlp, "%sLINT0 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint0Passthru);
5487 pHlp->pfnPrintf(pHlp, "%sLINT1 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint1Passthru);
5488 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u32Rsvd0);
5489 pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u22Rsvd0);
5490 pHlp->pfnPrintf(pHlp, "%sAttribute Override Valid = %RTbool\n", pszPrefix, pDte->n.u1AttrOverride);
5491 pHlp->pfnPrintf(pHlp, "%sMode0FC = %#x\n", pszPrefix, pDte->n.u1Mode0FC);
5492 pHlp->pfnPrintf(pHlp, "%sSnoop Attribute = %#x\n", pszPrefix, pDte->n.u8SnoopAttr);
5493 pHlp->pfnPrintf(pHlp, "\n");
5494}
5495
5496
5497/**
5498 * @callback_method_impl{FNDBGFHANDLERDEV}
5499 */
5500static DECLCALLBACK(void) iommuAmdR3DbgInfoDte(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
5501{
5502 if (pszArgs)
5503 {
5504 uint16_t uDevId = 0;
5505 int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &uDevId);
5506 if (RT_SUCCESS(rc))
5507 {
5508 DTE_T Dte;
5509 rc = iommuAmdDteRead(pDevIns, uDevId, IOMMUOP_TRANSLATE_REQ, &Dte);
5510 if (RT_SUCCESS(rc))
5511 {
5512 pHlp->pfnPrintf(pHlp, "DTE for device %#x\n", uDevId);
5513 iommuAmdR3DbgInfoDteWorker(pHlp, &Dte, " ");
5514 return;
5515 }
5516 pHlp->pfnPrintf(pHlp, "Failed to read DTE for device ID %u (%#x). rc=%Rrc\n", uDevId, uDevId, rc);
5517 }
5518 else
5519 pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit device ID. rc=%Rrc\n", rc);
5520 }
5521 else
5522 pHlp->pfnPrintf(pHlp, "Missing device ID.\n");
5523}
5524
5525
5526#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
5527/**
5528 * @callback_method_impl{FNDBGFHANDLERDEV}
5529 */
5530static DECLCALLBACK(void) iommuAmdR3DbgInfoIotlb(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
5531{
5532 if (pszArgs)
5533 {
5534 uint16_t uDomainId = 0;
5535 int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &uDomainId);
5536 if (RT_SUCCESS(rc))
5537 {
5538 pHlp->pfnPrintf(pHlp, "IOTLBEs for domain %u (%#x):\n", uDomainId, uDomainId);
5539 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5540 IOTLBEINFOARG Args;
5541 Args.pIommu = pThis;
5542 Args.pHlp = pHlp;
5543 Args.uDomainId = uDomainId;
5544
5545 IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
5546 RTAvlU64DoWithAll(&pThis->TreeIotlbe, true /* fFromLeft */, iommuAmdR3IotlbEntryInfo, &Args);
5547 IOMMU_UNLOCK_CACHE(pDevIns, pThis);
5548 }
5549 else
5550 pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit domain ID. rc=%Rrc\n", rc);
5551 }
5552 else
5553 pHlp->pfnPrintf(pHlp, "Missing domain ID.\n");
5554}
5555#endif
5556
5557
5558/**
5559 * @callback_method_impl{FNDBGFHANDLERDEV}
5560 */
5561static DECLCALLBACK(void) iommuAmdR3DbgInfoDevTabs(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
5562{
5563 RT_NOREF(pszArgs);
5564
5565 PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5566 PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
5567 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
5568 NOREF(pPciDev);
5569
5570 uint8_t cSegments = 0;
5571 for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
5572 {
5573 DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
5574 RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT;
5575 if (GCPhysDevTab)
5576 ++cSegments;
5577 }
5578
5579 pHlp->pfnPrintf(pHlp, "AMD-IOMMU device tables with address translations enabled:\n");
5580 pHlp->pfnPrintf(pHlp, " DTE Segments=%u\n", cSegments);
5581 if (!cSegments)
5582 return;
5583
5584 for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
5585 {
5586 DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
5587 RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT;
5588 if (GCPhysDevTab)
5589 {
5590 uint32_t const cbDevTab = IOMMU_GET_DEV_TAB_LEN(&DevTabBar);
5591 uint32_t const cDtes = cbDevTab / sizeof(DTE_T);
5592
5593 void *pvDevTab = RTMemAllocZ(cbDevTab);
5594 if (RT_LIKELY(pvDevTab))
5595 {
5596 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDevTab, pvDevTab, cbDevTab);
5597 if (RT_SUCCESS(rc))
5598 {
5599 for (uint32_t idxDte = 0; idxDte < cDtes; idxDte++)
5600 {
5601 PCDTE_T pDte = (PCDTE_T)((uintptr_t)pvDevTab + idxDte * sizeof(DTE_T));
5602 if ( pDte->n.u1Valid
5603 && pDte->n.u1TranslationValid
5604 && pDte->n.u3Mode != 0)
5605 {
5606 pHlp->pfnPrintf(pHlp, " DTE %u (BDF %02x:%02x.%d)\n", idxDte,
5607 (idxDte >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK,
5608 (idxDte >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK,
5609 idxDte & VBOX_PCI_DEVFN_FUN_MASK);
5610 iommuAmdR3DbgInfoDteWorker(pHlp, pDte, " ");
5611 pHlp->pfnPrintf(pHlp, "\n");
5612 }
5613 }
5614 pHlp->pfnPrintf(pHlp, "\n");
5615 }
5616 else
5617 {
5618 pHlp->pfnPrintf(pHlp, " Failed to read table at %#RGp of size %zu bytes. rc=%Rrc!\n", GCPhysDevTab,
5619 cbDevTab, rc);
5620 }
5621
5622 RTMemFree(pvDevTab);
5623 }
5624 else
5625 {
5626 pHlp->pfnPrintf(pHlp, " Allocating %zu bytes for reading the device table failed!\n", cbDevTab);
5627 return;
5628 }
5629 }
5630 }
5631}
5632
5633
5634/**
5635 * @callback_method_impl{FNSSMDEVSAVEEXEC}
5636 */
5637static DECLCALLBACK(int) iommuAmdR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM)
5638{
5639 /** @todo IOMMU: Save state. */
5640 RT_NOREF2(pDevIns, pSSM);
5641 LogFlowFunc(("\n"));
5642 return VERR_NOT_IMPLEMENTED;
5643}
5644
5645
5646/**
5647 * @callback_method_impl{FNSSMDEVLOADEXEC}
5648 */
5649static DECLCALLBACK(int) iommuAmdR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
5650{
5651 /** @todo IOMMU: Load state. */
5652 RT_NOREF4(pDevIns, pSSM, uVersion, uPass);
5653 LogFlowFunc(("\n"));
5654 return VERR_NOT_IMPLEMENTED;
5655}
5656
5657
5658/**
5659 * @interface_method_impl{PDMDEVREG,pfnReset}
5660 */
5661static DECLCALLBACK(void) iommuAmdR3Reset(PPDMDEVINS pDevIns)
5662{
5663 /*
5664 * Resets read-write portion of the IOMMU state.
5665 *
5666 * NOTE! State not initialized here is expected to be initialized during
5667 * device construction and remain read-only through the lifetime of the VM.
5668 */
5669 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5670 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
5671 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
5672 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
5673
5674 IOMMU_LOCK_NORET(pDevIns, pThisCC);
5675
5676 LogFlowFunc(("\n"));
5677
5678 memset(&pThis->aDevTabBaseAddrs[0], 0, sizeof(pThis->aDevTabBaseAddrs));
5679
5680 pThis->CmdBufBaseAddr.u64 = 0;
5681 pThis->CmdBufBaseAddr.n.u4Len = 8;
5682
5683 pThis->EvtLogBaseAddr.u64 = 0;
5684 pThis->EvtLogBaseAddr.n.u4Len = 8;
5685
5686 pThis->Ctrl.u64 = 0;
5687 pThis->Ctrl.n.u1Coherent = 1;
5688 Assert(!pThis->ExtFeat.n.u1BlockStopMarkSup);
5689
5690 pThis->ExclRangeBaseAddr.u64 = 0;
5691 pThis->ExclRangeLimit.u64 = 0;
5692
5693 pThis->PprLogBaseAddr.u64 = 0;
5694 pThis->PprLogBaseAddr.n.u4Len = 8;
5695
5696 pThis->HwEvtHi.u64 = 0;
5697 pThis->HwEvtLo = 0;
5698 pThis->HwEvtStatus.u64 = 0;
5699
5700 pThis->GALogBaseAddr.u64 = 0;
5701 pThis->GALogBaseAddr.n.u4Len = 8;
5702 pThis->GALogTailAddr.u64 = 0;
5703
5704 pThis->PprLogBBaseAddr.u64 = 0;
5705 pThis->PprLogBBaseAddr.n.u4Len = 8;
5706
5707 pThis->EvtLogBBaseAddr.u64 = 0;
5708 pThis->EvtLogBBaseAddr.n.u4Len = 8;
5709
5710 pThis->PerfOptCtrl.u32 = 0;
5711
5712 pThis->XtGenIntrCtrl.u64 = 0;
5713 pThis->XtPprIntrCtrl.u64 = 0;
5714 pThis->XtGALogIntrCtrl.u64 = 0;
5715
5716 memset(&pThis->aMarcApers[0], 0, sizeof(pThis->aMarcApers));
5717
5718 pThis->CmdBufHeadPtr.u64 = 0;
5719 pThis->CmdBufTailPtr.u64 = 0;
5720 pThis->EvtLogHeadPtr.u64 = 0;
5721 pThis->EvtLogTailPtr.u64 = 0;
5722
5723 pThis->Status.u64 = 0;
5724
5725 pThis->PprLogHeadPtr.u64 = 0;
5726 pThis->PprLogTailPtr.u64 = 0;
5727
5728 pThis->GALogHeadPtr.u64 = 0;
5729 pThis->GALogTailPtr.u64 = 0;
5730
5731 pThis->PprLogBHeadPtr.u64 = 0;
5732 pThis->PprLogBTailPtr.u64 = 0;
5733
5734 pThis->EvtLogBHeadPtr.u64 = 0;
5735 pThis->EvtLogBTailPtr.u64 = 0;
5736
5737 pThis->PprLogAutoResp.u64 = 0;
5738 pThis->PprLogOverflowEarly.u64 = 0;
5739 pThis->PprLogBOverflowEarly.u64 = 0;
5740
5741 pThis->IommuBar.u64 = 0;
5742 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0);
5743 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0);
5744
5745 PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER);
5746
5747 IOMMU_UNLOCK(pDevIns, pThisCC);
5748
5749#ifdef IOMMU_WITH_IOTLBE_CACHE
5750 iommuAmdDteCacheRemoveAll(pDevIns);
5751 iommuAmdIotlbRemoveAll(pDevIns);
5752#endif
5753}
5754
5755
5756/**
5757 * @interface_method_impl{PDMDEVREG,pfnDestruct}
5758 */
5759static DECLCALLBACK(int) iommuAmdR3Destruct(PPDMDEVINS pDevIns)
5760{
5761 PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns);
5762 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5763 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
5764 LogFlowFunc(("\n"));
5765
5766 IOMMU_LOCK_NORET(pDevIns, pThisCC);
5767
5768 /* Close the command thread semaphore. */
5769 if (pThis->hEvtCmdThread != NIL_SUPSEMEVENT)
5770 {
5771 PDMDevHlpSUPSemEventClose(pDevIns, pThis->hEvtCmdThread);
5772 pThis->hEvtCmdThread = NIL_SUPSEMEVENT;
5773 }
5774
5775#ifdef IOMMU_WITH_IOTLBE_CACHE
5776 /* Destroy level 1 cache. */
5777 if (pThis->paDevices)
5778 {
5779 PDMDevHlpMMHeapFree(pDevIns, pThis->paDevices);
5780 pThis->paDevices = NULL;
5781 }
5782
5783 /* Destroy level 2 cache. */
5784 if (pThis->paIotlbes)
5785 {
5786 PDMDevHlpMMHeapFree(pDevIns, pThis->paIotlbes);
5787 pThis->paIotlbes = NULL;
5788 }
5789#endif
5790
5791 IOMMU_UNLOCK(pDevIns, pThisCC);
5792 return VINF_SUCCESS;
5793}
5794
5795
5796/**
5797 * @interface_method_impl{PDMDEVREG,pfnConstruct}
5798 */
5799static DECLCALLBACK(int) iommuAmdR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg)
5800{
5801 PDMDEV_CHECK_VERSIONS_RETURN(pDevIns);
5802 RT_NOREF(pCfg);
5803
5804 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5805 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
5806 pThis->u32Magic = IOMMU_MAGIC;
5807 pThisCC->pDevInsR3 = pDevIns;
5808
5809 LogFlowFunc(("iInstance=%d\n", iInstance));
5810
5811 /*
5812 * Register the IOMMU with PDM.
5813 */
5814 PDMIOMMUREGR3 IommuReg;
5815 RT_ZERO(IommuReg);
5816 IommuReg.u32Version = PDM_IOMMUREGCC_VERSION;
5817 IommuReg.pfnMemAccess = iommuAmdMemAccess;
5818 IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess;
5819 IommuReg.pfnMsiRemap = iommuAmdMsiRemap;
5820 IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION;
5821 int rc = PDMDevHlpIommuRegister(pDevIns, &IommuReg, &pThisCC->CTX_SUFF(pIommuHlp), &pThis->idxIommu);
5822 if (RT_FAILURE(rc))
5823 return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as an IOMMU device"));
5824 if (pThisCC->CTX_SUFF(pIommuHlp)->u32Version != PDM_IOMMUHLPR3_VERSION)
5825 return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS,
5826 N_("IOMMU helper version mismatch; got %#x expected %#x"),
5827 pThisCC->CTX_SUFF(pIommuHlp)->u32Version, PDM_IOMMUHLPR3_VERSION);
5828 if (pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd != PDM_IOMMUHLPR3_VERSION)
5829 return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS,
5830 N_("IOMMU helper end-version mismatch; got %#x expected %#x"),
5831 pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd, PDM_IOMMUHLPR3_VERSION);
5832
5833 /*
5834 * We will use PDM's critical section (via helpers) for the IOMMU device.
5835 */
5836 rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns));
5837 AssertRCReturn(rc, rc);
5838
5839 /*
5840 * Initialize read-only PCI configuration space.
5841 */
5842 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
5843 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
5844
5845 /* Header. */
5846 PDMPciDevSetVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* AMD */
5847 PDMPciDevSetDeviceId(pPciDev, IOMMU_PCI_DEVICE_ID); /* VirtualBox IOMMU device */
5848 PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER); /* Enable bus master (as we directly access main memory) */
5849 PDMPciDevSetStatus(pPciDev, VBOX_PCI_STATUS_CAP_LIST); /* Capability list supported */
5850 PDMPciDevSetRevisionId(pPciDev, IOMMU_PCI_REVISION_ID); /* VirtualBox specific device implementation revision */
5851 PDMPciDevSetClassBase(pPciDev, VBOX_PCI_CLASS_SYSTEM); /* System Base Peripheral */
5852 PDMPciDevSetClassSub(pPciDev, VBOX_PCI_SUB_SYSTEM_IOMMU); /* IOMMU */
5853 PDMPciDevSetClassProg(pPciDev, 0x0); /* IOMMU Programming interface */
5854 PDMPciDevSetHeaderType(pPciDev, 0x0); /* Single function, type 0 */
5855 PDMPciDevSetSubSystemId(pPciDev, IOMMU_PCI_DEVICE_ID); /* AMD */
5856 PDMPciDevSetSubSystemVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* VirtualBox IOMMU device */
5857 PDMPciDevSetCapabilityList(pPciDev, IOMMU_PCI_OFF_CAP_HDR); /* Offset into capability registers */
5858 PDMPciDevSetInterruptPin(pPciDev, 0x1); /* INTA#. */
5859 PDMPciDevSetInterruptLine(pPciDev, 0x0); /* For software compatibility; no effect on hardware */
5860
5861 /* Capability Header. */
5862 /* NOTE! Fields (e.g, EFR) must match what we expose in the ACPI tables. */
5863 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_CAP_HDR,
5864 RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_ID, 0xf) /* RO - Secure Device capability block */
5865 | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_PTR, IOMMU_PCI_OFF_MSI_CAP_HDR) /* RO - Next capability offset */
5866 | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_TYPE, 0x3) /* RO - IOMMU capability block */
5867 | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_REV, 0x1) /* RO - IOMMU interface revision */
5868 | RT_BF_MAKE(IOMMU_BF_CAPHDR_IOTLB_SUP, 0x0) /* RO - Remote IOTLB support */
5869 | RT_BF_MAKE(IOMMU_BF_CAPHDR_HT_TUNNEL, 0x0) /* RO - HyperTransport Tunnel support */
5870 | RT_BF_MAKE(IOMMU_BF_CAPHDR_NP_CACHE, 0x0) /* RO - Cache NP page table entries */
5871 | RT_BF_MAKE(IOMMU_BF_CAPHDR_EFR_SUP, 0x1) /* RO - Extended Feature Register support */
5872 | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_EXT, 0x1)); /* RO - Misc. Information Register support */
5873
5874 /* Base Address Register. */
5875 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0x0); /* RW - Base address (Lo) and enable bit */
5876 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0x0); /* RW - Base address (Hi) */
5877
5878 /* IOMMU Range Register. */
5879 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_RANGE_REG, 0x0); /* RW - Range register (implemented as RO by us) */
5880
5881 /* Misc. Information Register. */
5882 /* NOTE! Fields (e.g, GVA size) must match what we expose in the ACPI tables. */
5883 uint32_t const uMiscInfoReg0 = RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM, 0) /* RO - MSI number */
5884 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_GVA_SIZE, 2) /* RO - Guest Virt. Addr size (2=48 bits) */
5885 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_PA_SIZE, 48) /* RO - Physical Addr size (48 bits) */
5886 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_VA_SIZE, 64) /* RO - Virt. Addr size (64 bits) */
5887 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_HT_ATS_RESV, 0) /* RW - HT ATS reserved */
5888 | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM_PPR, 0); /* RW - PPR interrupt number */
5889 uint32_t const uMiscInfoReg1 = 0;
5890 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_0, uMiscInfoReg0);
5891 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_1, uMiscInfoReg1);
5892
5893 /* MSI Capability Header register. */
5894 PDMMSIREG MsiReg;
5895 RT_ZERO(MsiReg);
5896 MsiReg.cMsiVectors = 1;
5897 MsiReg.iMsiCapOffset = IOMMU_PCI_OFF_MSI_CAP_HDR;
5898 MsiReg.iMsiNextOffset = 0; /* IOMMU_PCI_OFF_MSI_MAP_CAP_HDR */
5899 MsiReg.fMsi64bit = 1; /* 64-bit addressing support is mandatory; See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support". */
5900
5901 /* MSI Address (Lo, Hi) and MSI data are read-write PCI config registers handled by our generic PCI config space code. */
5902#if 0
5903 /* MSI Address Lo. */
5904 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, 0); /* RW - MSI message address (Lo) */
5905 /* MSI Address Hi. */
5906 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, 0); /* RW - MSI message address (Hi) */
5907 /* MSI Data. */
5908 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, 0); /* RW - MSI data */
5909#endif
5910
5911#if 0
5912 /** @todo IOMMU: I don't know if we need to support this, enable later if
5913 * required. */
5914 /* MSI Mapping Capability Header register. */
5915 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_MAP_CAP_HDR,
5916 RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_ID, 0x8) /* RO - Capability ID */
5917 | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_PTR, 0x0) /* RO - Offset to next capability (NULL) */
5918 | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_EN, 0x1) /* RO - MSI mapping capability enable */
5919 | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_FIXED, 0x1) /* RO - MSI mapping range is fixed */
5920 | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_TYPE, 0x15)); /* RO - MSI mapping capability */
5921 /* When implementing don't forget to copy this to its MMIO shadow register (MsiMapCapHdr) in iommuAmdR3Init. */
5922#endif
5923
5924 /*
5925 * Register the PCI function with PDM.
5926 */
5927 rc = PDMDevHlpPCIRegister(pDevIns, pPciDev);
5928 AssertLogRelRCReturn(rc, rc);
5929
5930 /*
5931 * Register MSI support for the PCI device.
5932 * This must be done -after- register it as a PCI device!
5933 */
5934 rc = PDMDevHlpPCIRegisterMsi(pDevIns, &MsiReg);
5935 AssertRCReturn(rc, rc);
5936
5937 /*
5938 * Intercept PCI config. space accesses.
5939 */
5940 rc = PDMDevHlpPCIInterceptConfigAccesses(pDevIns, pPciDev, iommuAmdR3PciConfigRead, iommuAmdR3PciConfigWrite);
5941 AssertLogRelRCReturn(rc, rc);
5942
5943 /*
5944 * Create the MMIO region.
5945 * Mapping of the region is done when software configures it via PCI config space.
5946 */
5947 rc = PDMDevHlpMmioCreate(pDevIns, IOMMU_MMIO_REGION_SIZE, pPciDev, 0 /* iPciRegion */, iommuAmdMmioWrite, iommuAmdMmioRead,
5948 NULL /* pvUser */,
5949 IOMMMIO_FLAGS_READ_DWORD_QWORD
5950 | IOMMMIO_FLAGS_WRITE_DWORD_QWORD_READ_MISSING
5951 | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_READ
5952 | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_WRITE,
5953 "AMD-IOMMU", &pThis->hMmio);
5954 AssertLogRelRCReturn(rc, rc);
5955
5956 /*
5957 * Register saved state.
5958 */
5959 rc = PDMDevHlpSSMRegisterEx(pDevIns, IOMMU_SAVED_STATE_VERSION, sizeof(IOMMU), NULL,
5960 NULL, NULL, NULL,
5961 NULL, iommuAmdR3SaveExec, NULL,
5962 NULL, iommuAmdR3LoadExec, NULL);
5963 AssertLogRelRCReturn(rc, rc);
5964
5965 /*
5966 * Register debugger info items.
5967 */
5968 PDMDevHlpDBGFInfoRegister(pDevIns, "iommu", "Display IOMMU state.", iommuAmdR3DbgInfo);
5969 PDMDevHlpDBGFInfoRegister(pDevIns, "iommudte", "Display the DTE for a device. Arguments: DeviceID.", iommuAmdR3DbgInfoDte);
5970 PDMDevHlpDBGFInfoRegister(pDevIns, "iommudevtabs", "Display active IOMMU device tables.", iommuAmdR3DbgInfoDevTabs);
5971#ifdef IOMMU_WITH_IOTLBE_CACHE
5972 PDMDevHlpDBGFInfoRegister(pDevIns, "iommutlb", "Display IOTLBs for a domain. Arguments: DomainID.", iommuAmdR3DbgInfoIotlb);
5973#endif
5974
5975# ifdef VBOX_WITH_STATISTICS
5976 /*
5977 * Statistics.
5978 */
5979 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadR3, STAMTYPE_COUNTER, "R3/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in R3");
5980 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadRZ, STAMTYPE_COUNTER, "RZ/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in RZ.");
5981
5982 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteR3, STAMTYPE_COUNTER, "R3/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in R3.");
5983 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteRZ, STAMTYPE_COUNTER, "RZ/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in RZ.");
5984
5985 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapR3, STAMTYPE_COUNTER, "R3/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in R3.");
5986 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapRZ, STAMTYPE_COUNTER, "RZ/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in RZ.");
5987
5988 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadR3, STAMTYPE_COUNTER, "R3/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in R3.");
5989 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadRZ, STAMTYPE_COUNTER, "RZ/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in RZ.");
5990
5991 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteR3, STAMTYPE_COUNTER, "R3/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in R3.");
5992 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteRZ, STAMTYPE_COUNTER, "RZ/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in RZ.");
5993
5994 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadR3, STAMTYPE_COUNTER, "R3/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in R3.");
5995 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadRZ, STAMTYPE_COUNTER, "RZ/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in RZ.");
5996
5997 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteR3, STAMTYPE_COUNTER, "R3/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in R3.");
5998 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteRZ, STAMTYPE_COUNTER, "RZ/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in RZ.");
5999
6000 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmd, STAMTYPE_COUNTER, "R3/Commands", STAMUNIT_OCCURENCES, "Number of commands processed (total).");
6001 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompWait, STAMTYPE_COUNTER, "R3/Commands/CompWait", STAMUNIT_OCCURENCES, "Number of Completion Wait commands processed.");
6002 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvDte, STAMTYPE_COUNTER, "R3/Commands/InvDte", STAMUNIT_OCCURENCES, "Number of Invalidate DTE commands processed.");
6003 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuPages, STAMTYPE_COUNTER, "R3/Commands/InvIommuPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU Pages commands processed.");
6004 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIotlbPages, STAMTYPE_COUNTER, "R3/Commands/InvIotlbPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOTLB Pages commands processed.");
6005 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIntrTable, STAMTYPE_COUNTER, "R3/Commands/InvIntrTable", STAMUNIT_OCCURENCES, "Number of Invalidate Interrupt Table commands processed.");
6006 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdPrefIommuPages, STAMTYPE_COUNTER, "R3/Commands/PrefIommuPages", STAMUNIT_OCCURENCES, "Number of Prefetch IOMMU Pages commands processed.");
6007 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompletePprReq, STAMTYPE_COUNTER, "R3/Commands/CompletePprReq", STAMUNIT_OCCURENCES, "Number of Complete PPR Requests commands processed.");
6008 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuAll, STAMTYPE_COUNTER, "R3/Commands/InvIommuAll", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU All commands processed.");
6009
6010
6011 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeCached, STAMTYPE_COUNTER, "IOTLB/Cached", STAMUNIT_OCCURENCES, "Number of IOTLB entries in the cache.");
6012 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeLazyEvictReuse, STAMTYPE_COUNTER, "IOTLB/LazyEvictReuse", STAMUNIT_OCCURENCES, "Number of IOTLB entries reused after lazy eviction.");
6013
6014 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfDteLookup, STAMTYPE_PROFILE, "Profile/DteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling DTE lookup.");
6015 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIotlbeLookup, STAMTYPE_PROFILE, "Profile/IotlbeLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IOTLBE lookup.");
6016
6017 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHit, STAMTYPE_COUNTER, "Access/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
6018 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheMiss, STAMTYPE_COUNTER, "Access/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
6019 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHitFull, STAMTYPE_COUNTER, "Access/CacheHitFull", STAMUNIT_OCCURENCES, "Number of accesses that was entirely in the cache.");
6020 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheNonContig, STAMTYPE_COUNTER, "Access/CacheNonContig", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in non-contiguous translated regions.");
6021 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCachePermDenied, STAMTYPE_COUNTER, "Access/CacheAddrDenied", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in denied permissions.");
6022 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDteNonContig, STAMTYPE_COUNTER, "Access/DteNonContig", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in non-contiguous translated regions.");
6023 PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDtePermDenied, STAMTYPE_COUNTER, "Access/DtePermDenied", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in denied permissions.");
6024# endif
6025
6026 /*
6027 * Create the command thread and its event semaphore.
6028 */
6029 char szDevIommu[64];
6030 RT_ZERO(szDevIommu);
6031 RTStrPrintf(szDevIommu, sizeof(szDevIommu), "IOMMU-%u", iInstance);
6032 rc = PDMDevHlpThreadCreate(pDevIns, &pThisCC->pCmdThread, pThis, iommuAmdR3CmdThread, iommuAmdR3CmdThreadWakeUp,
6033 0 /* cbStack */, RTTHREADTYPE_IO, szDevIommu);
6034 AssertLogRelRCReturn(rc, rc);
6035
6036 rc = PDMDevHlpSUPSemEventCreate(pDevIns, &pThis->hEvtCmdThread);
6037 AssertLogRelRCReturn(rc, rc);
6038
6039#ifdef IOMMU_WITH_IOTLBE_CACHE
6040 /*
6041 * Initialize the critsect of the cache.
6042 */
6043 rc = PDMDevHlpCritSectInit(pDevIns, &pThis->CritSectCache, RT_SRC_POS, "IOMMUCache-#%u", pDevIns->iInstance);
6044 AssertLogRelRCReturn(rc, rc);
6045
6046 /*
6047 * Allocate the level 1 cache (device ID to domain ID mapping).
6048 * PCI devices are hotpluggable, plus we don't have a way of querying the bus for all
6049 * assigned PCI BDF slots. So while this wastes some memory, it should work regardless
6050 * of how code, features and devices around the IOMMU changes.
6051 */
6052 size_t const cbDevices = sizeof(IODEVICE) * IOMMU_DTE_CACHE_MAX;
6053 AssertCompile(IOMMU_DTE_CACHE_MAX >= UINT16_MAX);
6054 pThis->paDevices = (PIODEVICE)PDMDevHlpMMHeapAllocZ(pDevIns, cbDevices);
6055 if (!pThis->paDevices)
6056 {
6057 return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
6058 N_("Failed to allocate %zu bytes from the hyperheap for the IOMMU level 1 cache."), cbDevices);
6059 }
6060
6061 /*
6062 * Allocate the level 2 cache (IOTLB entries).
6063 * This is allocated upfront since we expect a relatively small number of entries,
6064 * is more cache-line efficient and easier to track least recently used entries for
6065 * eviction when the cache is full. This also prevents unpredictable behavior during
6066 * the lifetime of the VM if the hyperheap gets full as allocation would fail upfront
6067 * or not at all.
6068 */
6069 size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX;
6070 pThis->paIotlbes = (PIOTLBE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIotlbes);
6071 if (!pThis->paIotlbes)
6072 {
6073 return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
6074 N_("Failed to allocate %zu bytes from the hyperheap for the IOMMU level 2 cache."),
6075 cbIotlbes);
6076 }
6077 RTListInit(&pThis->LstLruIotlbe);
6078
6079 LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOTLB cache\n", IOMMU_LOG_PFX, cbDevices + cbIotlbes));
6080#endif
6081
6082 /*
6083 * Initialize read-only registers.
6084 * NOTE! Fields here must match their corresponding field in the ACPI tables.
6085 */
6086 /* Don't remove the commented lines below as it lets us see all features at a glance. */
6087 pThis->ExtFeat.u64 = 0;
6088 pThis->ExtFeat.n.u1PrefetchSup = 0;
6089 //pThis->ExtFeat.n.u1PprSup = 0;
6090 //pThis->ExtFeat.n.u1X2ApicSup = 0;
6091 //pThis->ExtFeat.n.u1NoExecuteSup = 0;
6092 //pThis->ExtFeat.n.u1GstTranslateSup = 0;
6093 pThis->ExtFeat.n.u1InvAllSup = 1;
6094 //pThis->ExtFeat.n.u1GstVirtApicSup = 0;
6095 pThis->ExtFeat.n.u1HwErrorSup = 1;
6096 //pThis->ExtFeat.n.u1PerfCounterSup = 0;
6097 AssertCompile((IOMMU_MAX_HOST_PT_LEVEL & 0x3) < 3);
6098 pThis->ExtFeat.n.u2HostAddrTranslateSize = (IOMMU_MAX_HOST_PT_LEVEL & 0x3);
6099 //pThis->ExtFeat.n.u2GstAddrTranslateSize = 0; /* Requires GstTranslateSup */
6100 //pThis->ExtFeat.n.u2GstCr3RootTblLevel = 0; /* Requires GstTranslateSup */
6101 //pThis->ExtFeat.n.u2SmiFilterSup = 0;
6102 //pThis->ExtFeat.n.u3SmiFilterCount = 0;
6103 //pThis->ExtFeat.n.u3GstVirtApicModeSup = 0; /* Requires GstVirtApicSup */
6104 //pThis->ExtFeat.n.u2DualPprLogSup = 0;
6105 //pThis->ExtFeat.n.u2DualEvtLogSup = 0;
6106 //pThis->ExtFeat.n.u5MaxPasidSup = 0; /* Requires GstTranslateSup */
6107 //pThis->ExtFeat.n.u1UserSupervisorSup = 0;
6108 AssertCompile(IOMMU_MAX_DEV_TAB_SEGMENTS <= 3);
6109 pThis->ExtFeat.n.u2DevTabSegSup = IOMMU_MAX_DEV_TAB_SEGMENTS;
6110 //pThis->ExtFeat.n.u1PprLogOverflowWarn = 0;
6111 //pThis->ExtFeat.n.u1PprAutoRespSup = 0;
6112 //pThis->ExtFeat.n.u2MarcSup = 0;
6113 //pThis->ExtFeat.n.u1BlockStopMarkSup = 0;
6114 //pThis->ExtFeat.n.u1PerfOptSup = 0;
6115 pThis->ExtFeat.n.u1MsiCapMmioSup = 1;
6116 //pThis->ExtFeat.n.u1GstIoSup = 0;
6117 //pThis->ExtFeat.n.u1HostAccessSup = 0;
6118 //pThis->ExtFeat.n.u1EnhancedPprSup = 0;
6119 //pThis->ExtFeat.n.u1AttrForwardSup = 0;
6120 //pThis->ExtFeat.n.u1HostDirtySup = 0;
6121 //pThis->ExtFeat.n.u1InvIoTlbTypeSup = 0;
6122 //pThis->ExtFeat.n.u1GstUpdateDisSup = 0;
6123 //pThis->ExtFeat.n.u1ForcePhysDstSup = 0;
6124
6125 pThis->RsvdReg = 0;
6126
6127 pThis->DevSpecificFeat.u64 = 0;
6128 pThis->DevSpecificFeat.n.u4RevMajor = IOMMU_DEVSPEC_FEAT_MAJOR_VERSION;
6129 pThis->DevSpecificFeat.n.u4RevMinor = IOMMU_DEVSPEC_FEAT_MINOR_VERSION;
6130
6131 pThis->DevSpecificCtrl.u64 = 0;
6132 pThis->DevSpecificCtrl.n.u4RevMajor = IOMMU_DEVSPEC_CTRL_MAJOR_VERSION;
6133 pThis->DevSpecificCtrl.n.u4RevMinor = IOMMU_DEVSPEC_CTRL_MINOR_VERSION;
6134
6135 pThis->DevSpecificStatus.u64 = 0;
6136 pThis->DevSpecificStatus.n.u4RevMajor = IOMMU_DEVSPEC_STATUS_MAJOR_VERSION;
6137 pThis->DevSpecificStatus.n.u4RevMinor = IOMMU_DEVSPEC_STATUS_MINOR_VERSION;
6138
6139 pThis->MiscInfo.u64 = RT_MAKE_U64(uMiscInfoReg0, uMiscInfoReg1);
6140
6141 /*
6142 * Initialize parts of the IOMMU state as it would during reset.
6143 * Must be called -after- initializing PCI config. space registers.
6144 */
6145 iommuAmdR3Reset(pDevIns);
6146
6147 LogRel(("%s: DSFX=%u.%u DSCX=%u.%u DSSX=%u.%u ExtFeat=%#RX64\n", IOMMU_LOG_PFX,
6148 pThis->DevSpecificFeat.n.u4RevMajor, pThis->DevSpecificFeat.n.u4RevMinor,
6149 pThis->DevSpecificCtrl.n.u4RevMajor, pThis->DevSpecificCtrl.n.u4RevMinor,
6150 pThis->DevSpecificStatus.n.u4RevMajor, pThis->DevSpecificStatus.n.u4RevMinor,
6151 pThis->ExtFeat.u64));
6152 return VINF_SUCCESS;
6153}
6154
6155#else
6156
6157/**
6158 * @callback_method_impl{PDMDEVREGR0,pfnConstruct}
6159 */
6160static DECLCALLBACK(int) iommuAmdRZConstruct(PPDMDEVINS pDevIns)
6161{
6162 PDMDEV_CHECK_VERSIONS_RETURN(pDevIns);
6163 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
6164 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
6165 pThisCC->CTX_SUFF(pDevIns) = pDevIns;
6166
6167 /* We will use PDM's critical section (via helpers) for the IOMMU device. */
6168 int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns));
6169 AssertRCReturn(rc, rc);
6170
6171 /* Set up the MMIO RZ handlers. */
6172 rc = PDMDevHlpMmioSetUpContext(pDevIns, pThis->hMmio, iommuAmdMmioWrite, iommuAmdMmioRead, NULL /* pvUser */);
6173 AssertRCReturn(rc, rc);
6174
6175 /* Set up the IOMMU RZ callbacks. */
6176 PDMIOMMUREGCC IommuReg;
6177 RT_ZERO(IommuReg);
6178 IommuReg.u32Version = PDM_IOMMUREGCC_VERSION;
6179 IommuReg.idxIommu = pThis->idxIommu;
6180 IommuReg.pfnMemAccess = iommuAmdMemAccess;
6181 IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess;
6182 IommuReg.pfnMsiRemap = iommuAmdMsiRemap;
6183 IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION;
6184 rc = PDMDevHlpIommuSetUpContext(pDevIns, &IommuReg, &pThisCC->CTX_SUFF(pIommuHlp));
6185 AssertRCReturn(rc, rc);
6186 AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp), VERR_IOMMU_IPE_1);
6187 AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32Version == CTX_SUFF(PDM_IOMMUHLP)_VERSION, VERR_VERSION_MISMATCH);
6188 AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd == CTX_SUFF(PDM_IOMMUHLP)_VERSION, VERR_VERSION_MISMATCH);
6189 AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp)->pfnLock, VERR_INVALID_POINTER);
6190 AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp)->pfnUnlock, VERR_INVALID_POINTER);
6191 return VINF_SUCCESS;
6192}
6193#endif
6194
6195
6196/**
6197 * The device registration structure.
6198 */
6199const PDMDEVREG g_DeviceIommuAmd =
6200{
6201 /* .u32Version = */ PDM_DEVREG_VERSION,
6202 /* .uReserved0 = */ 0,
6203 /* .szName = */ "iommu-amd",
6204 /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE,
6205 /* .fClass = */ PDM_DEVREG_CLASS_PCI_BUILTIN,
6206 /* .cMaxInstances = */ ~0U,
6207 /* .uSharedVersion = */ 42,
6208 /* .cbInstanceShared = */ sizeof(IOMMU),
6209 /* .cbInstanceCC = */ sizeof(IOMMUCC),
6210 /* .cbInstanceRC = */ sizeof(IOMMURC),
6211 /* .cMaxPciDevices = */ 1,
6212 /* .cMaxMsixVectors = */ 0,
6213 /* .pszDescription = */ "IOMMU (AMD)",
6214#if defined(IN_RING3)
6215 /* .pszRCMod = */ "VBoxDDRC.rc",
6216 /* .pszR0Mod = */ "VBoxDDR0.r0",
6217 /* .pfnConstruct = */ iommuAmdR3Construct,
6218 /* .pfnDestruct = */ iommuAmdR3Destruct,
6219 /* .pfnRelocate = */ NULL,
6220 /* .pfnMemSetup = */ NULL,
6221 /* .pfnPowerOn = */ NULL,
6222 /* .pfnReset = */ iommuAmdR3Reset,
6223 /* .pfnSuspend = */ NULL,
6224 /* .pfnResume = */ NULL,
6225 /* .pfnAttach = */ NULL,
6226 /* .pfnDetach = */ NULL,
6227 /* .pfnQueryInterface = */ NULL,
6228 /* .pfnInitComplete = */ NULL,
6229 /* .pfnPowerOff = */ NULL,
6230 /* .pfnSoftReset = */ NULL,
6231 /* .pfnReserved0 = */ NULL,
6232 /* .pfnReserved1 = */ NULL,
6233 /* .pfnReserved2 = */ NULL,
6234 /* .pfnReserved3 = */ NULL,
6235 /* .pfnReserved4 = */ NULL,
6236 /* .pfnReserved5 = */ NULL,
6237 /* .pfnReserved6 = */ NULL,
6238 /* .pfnReserved7 = */ NULL,
6239#elif defined(IN_RING0)
6240 /* .pfnEarlyConstruct = */ NULL,
6241 /* .pfnConstruct = */ iommuAmdRZConstruct,
6242 /* .pfnDestruct = */ NULL,
6243 /* .pfnFinalDestruct = */ NULL,
6244 /* .pfnRequest = */ NULL,
6245 /* .pfnReserved0 = */ NULL,
6246 /* .pfnReserved1 = */ NULL,
6247 /* .pfnReserved2 = */ NULL,
6248 /* .pfnReserved3 = */ NULL,
6249 /* .pfnReserved4 = */ NULL,
6250 /* .pfnReserved5 = */ NULL,
6251 /* .pfnReserved6 = */ NULL,
6252 /* .pfnReserved7 = */ NULL,
6253#elif defined(IN_RC)
6254 /* .pfnConstruct = */ iommuAmdRZConstruct,
6255 /* .pfnReserved0 = */ NULL,
6256 /* .pfnReserved1 = */ NULL,
6257 /* .pfnReserved2 = */ NULL,
6258 /* .pfnReserved3 = */ NULL,
6259 /* .pfnReserved4 = */ NULL,
6260 /* .pfnReserved5 = */ NULL,
6261 /* .pfnReserved6 = */ NULL,
6262 /* .pfnReserved7 = */ NULL,
6263#else
6264# error "Not in IN_RING3, IN_RING0 or IN_RC!"
6265#endif
6266 /* .u32VersionEnd = */ PDM_DEVREG_VERSION
6267};
6268
6269#endif /* !VBOX_DEVICE_STRUCT_TESTCASE */
6270
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette