VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 101521

Last change on this file since 101521 was 99739, checked in by vboxsync, 19 months ago

*: doxygen corrections (mostly about removing @returns from functions returning void).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.3 KB
Line 
1/* $Id: PDMBlkCache.cpp 99739 2023-05-11 01:01:08Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
29 * This component implements an I/O cache based on the 2Q cache algorithm.
30 */
31
32
33/*********************************************************************************************************************************
34* Header Files *
35*********************************************************************************************************************************/
36#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
37#include "PDMInternal.h"
38#include <iprt/asm.h>
39#include <iprt/mem.h>
40#include <iprt/path.h>
41#include <iprt/string.h>
42#include <iprt/trace.h>
43#include <VBox/log.h>
44#include <VBox/vmm/stam.h>
45#include <VBox/vmm/uvm.h>
46#include <VBox/vmm/vm.h>
47
48#include "PDMBlkCacheInternal.h"
49
50
51/*********************************************************************************************************************************
52* Defined Constants And Macros *
53*********************************************************************************************************************************/
54#ifdef VBOX_STRICT
55# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
56 do \
57 { \
58 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
59 ("Thread does not own critical section\n"));\
60 } while (0)
61
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
63 do \
64 { \
65 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
66 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
67 } while (0)
68
69# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
70 do \
71 { \
72 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
73 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
74 } while (0)
75
76#else
77# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while (0)
78# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while (0)
79# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while (0)
80#endif
81
82#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
83
84/* Enable to enable some tracing in the block cache code for investigating issues. */
85/*#define VBOX_BLKCACHE_TRACING 1*/
86
87
88/*********************************************************************************************************************************
89* Internal Functions *
90*********************************************************************************************************************************/
91
92static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
93 uint64_t off, size_t cbData, uint8_t *pbBuffer);
94static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
95
96
97/**
98 * Add message to the VM trace buffer.
99 *
100 * @param pBlkCache The block cache.
101 * @param pszFmt The format string.
102 * @param ... Additional parameters for the string formatter.
103 */
104DECLINLINE(void) pdmBlkCacheR3TraceMsgF(PPDMBLKCACHE pBlkCache, const char *pszFmt, ...)
105{
106#if defined(VBOX_BLKCACHE_TRACING)
107 va_list va;
108 va_start(va, pszFmt);
109 RTTraceBufAddMsgV(pBlkCache->pCache->pVM->CTX_SUFF(hTraceBuf), pszFmt, va);
110 va_end(va);
111#else
112 RT_NOREF2(pBlkCache, pszFmt);
113#endif
114}
115
116/**
117 * Decrement the reference counter of the given cache entry.
118 *
119 * @param pEntry The entry to release.
120 */
121DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
122{
123 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
124 ASMAtomicDecU32(&pEntry->cRefs);
125}
126
127/**
128 * Increment the reference counter of the given cache entry.
129 *
130 * @param pEntry The entry to reference.
131 */
132DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
133{
134 ASMAtomicIncU32(&pEntry->cRefs);
135}
136
137#ifdef VBOX_STRICT
138static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
139{
140 /* Amount of cached data should never exceed the maximum amount. */
141 AssertMsg(pCache->cbCached <= pCache->cbMax,
142 ("Current amount of cached data exceeds maximum\n"));
143
144 /* The amount of cached data in the LRU and FRU list should match cbCached */
145 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
146 ("Amount of cached data doesn't match\n"));
147
148 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
149 ("Paged out list exceeds maximum\n"));
150}
151#endif
152
153DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
154{
155 RTCritSectEnter(&pCache->CritSect);
156#ifdef VBOX_STRICT
157 pdmBlkCacheValidate(pCache);
158#endif
159}
160
161DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
162{
163#ifdef VBOX_STRICT
164 pdmBlkCacheValidate(pCache);
165#endif
166 RTCritSectLeave(&pCache->CritSect);
167}
168
169DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
170{
171 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
172 pCache->cbCached -= cbAmount;
173}
174
175DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
176{
177 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
178 pCache->cbCached += cbAmount;
179}
180
181DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
182{
183 pList->cbCached += cbAmount;
184}
185
186DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
187{
188 pList->cbCached -= cbAmount;
189}
190
191#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
192/**
193 * Checks consistency of a LRU list.
194 *
195 * @param pList The LRU list to check.
196 * @param pNotInList Element which is not allowed to occur in the list.
197 */
198static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
199{
200 PPDMBLKCACHEENTRY pCurr = pList->pHead;
201
202 /* Check that there are no double entries and no cycles in the list. */
203 while (pCurr)
204 {
205 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
206
207 while (pNext)
208 {
209 AssertMsg(pCurr != pNext,
210 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
211 pCurr, pList));
212 pNext = pNext->pNext;
213 }
214
215 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
216
217 if (!pCurr->pNext)
218 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
219
220 pCurr = pCurr->pNext;
221 }
222}
223#endif
224
225/**
226 * Unlinks a cache entry from the LRU list it is assigned to.
227 *
228 * @param pEntry The entry to unlink.
229 */
230static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
231{
232 PPDMBLKLRULIST pList = pEntry->pList;
233 PPDMBLKCACHEENTRY pPrev, pNext;
234
235 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
236
237 AssertPtr(pList);
238
239#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
240 pdmBlkCacheCheckList(pList, NULL);
241#endif
242
243 pPrev = pEntry->pPrev;
244 pNext = pEntry->pNext;
245
246 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
247 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
248
249 if (pPrev)
250 pPrev->pNext = pNext;
251 else
252 {
253 pList->pHead = pNext;
254
255 if (pNext)
256 pNext->pPrev = NULL;
257 }
258
259 if (pNext)
260 pNext->pPrev = pPrev;
261 else
262 {
263 pList->pTail = pPrev;
264
265 if (pPrev)
266 pPrev->pNext = NULL;
267 }
268
269 pEntry->pList = NULL;
270 pEntry->pPrev = NULL;
271 pEntry->pNext = NULL;
272 pdmBlkCacheListSub(pList, pEntry->cbData);
273#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
274 pdmBlkCacheCheckList(pList, pEntry);
275#endif
276}
277
278/**
279 * Adds a cache entry to the given LRU list unlinking it from the currently
280 * assigned list if needed.
281 *
282 * @param pList List to the add entry to.
283 * @param pEntry Entry to add.
284 */
285static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
286{
287 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
288#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
289 pdmBlkCacheCheckList(pList, NULL);
290#endif
291
292 /* Remove from old list if needed */
293 if (pEntry->pList)
294 pdmBlkCacheEntryRemoveFromList(pEntry);
295
296 pEntry->pNext = pList->pHead;
297 if (pList->pHead)
298 pList->pHead->pPrev = pEntry;
299 else
300 {
301 Assert(!pList->pTail);
302 pList->pTail = pEntry;
303 }
304
305 pEntry->pPrev = NULL;
306 pList->pHead = pEntry;
307 pdmBlkCacheListAdd(pList, pEntry->cbData);
308 pEntry->pList = pList;
309#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
310 pdmBlkCacheCheckList(pList, NULL);
311#endif
312}
313
314/**
315 * Destroys a LRU list freeing all entries.
316 *
317 * @param pList Pointer to the LRU list to destroy.
318 *
319 * @note The caller must own the critical section of the cache.
320 */
321static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
322{
323 while (pList->pHead)
324 {
325 PPDMBLKCACHEENTRY pEntry = pList->pHead;
326
327 pList->pHead = pEntry->pNext;
328
329 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
330 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
331
332 RTMemPageFree(pEntry->pbData, pEntry->cbData);
333 RTMemFree(pEntry);
334 }
335}
336
337/**
338 * Tries to remove the given amount of bytes from a given list in the cache
339 * moving the entries to one of the given ghosts lists
340 *
341 * @returns Amount of data which could be freed.
342 * @param pCache Pointer to the global cache data.
343 * @param cbData The amount of the data to free.
344 * @param pListSrc The source list to evict data from.
345 * @param pGhostListDst Where the ghost list removed entries should be
346 * moved to, NULL if the entry should be freed.
347 * @param fReuseBuffer Flag whether a buffer should be reused if it has
348 * the same size
349 * @param ppbBuffer Where to store the address of the buffer if an
350 * entry with the same size was found and
351 * fReuseBuffer is true.
352 *
353 * @note This function may return fewer bytes than requested because entries
354 * may be marked as non evictable if they are used for I/O at the
355 * moment.
356 */
357static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
358 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
359 bool fReuseBuffer, uint8_t **ppbBuffer)
360{
361 size_t cbEvicted = 0;
362
363 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
364
365 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
366 AssertMsg( !pGhostListDst
367 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
368 ("Destination list must be NULL or the recently used but paged out list\n"));
369
370 if (fReuseBuffer)
371 {
372 AssertPtr(ppbBuffer);
373 *ppbBuffer = NULL;
374 }
375
376 /* Start deleting from the tail. */
377 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
378
379 while ((cbEvicted < cbData) && pEntry)
380 {
381 PPDMBLKCACHEENTRY pCurr = pEntry;
382
383 pEntry = pEntry->pPrev;
384
385 /* We can't evict pages which are currently in progress or dirty but not in progress */
386 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
387 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
388 {
389 /* Ok eviction candidate. Grab the endpoint semaphore and check again
390 * because somebody else might have raced us. */
391 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
392 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
393
394 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
395 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
396 {
397 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
398
399 if (fReuseBuffer && pCurr->cbData == cbData)
400 {
401 STAM_COUNTER_INC(&pCache->StatBuffersReused);
402 *ppbBuffer = pCurr->pbData;
403 }
404 else if (pCurr->pbData)
405 RTMemPageFree(pCurr->pbData, pCurr->cbData);
406
407 pCurr->pbData = NULL;
408 cbEvicted += pCurr->cbData;
409
410 pdmBlkCacheEntryRemoveFromList(pCurr);
411 pdmBlkCacheSub(pCache, pCurr->cbData);
412
413 if (pGhostListDst)
414 {
415 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
416
417 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
418
419 /* We have to remove the last entries from the paged out list. */
420 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
421 && pGhostEntFree)
422 {
423 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
424 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
425
426 pGhostEntFree = pGhostEntFree->pPrev;
427
428 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
429
430 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
431 {
432 pdmBlkCacheEntryRemoveFromList(pFree);
433
434 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
435 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
436 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
437
438 RTMemFree(pFree);
439 }
440
441 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
442 }
443
444 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
445 {
446 /* Couldn't remove enough entries. Delete */
447 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
448 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
449 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
450
451 RTMemFree(pCurr);
452 }
453 else
454 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
455 }
456 else
457 {
458 /* Delete the entry from the AVL tree it is assigned to. */
459 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
460 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
461 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
462
463 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
464 RTMemFree(pCurr);
465 }
466 }
467 else
468 {
469 LogFlow(("Someone raced us, entry %#p (%u bytes) cannot be evicted any more (fFlags=%#x cRefs=%#x)\n",
470 pCurr, pCurr->cbData, pCurr->fFlags, pCurr->cRefs));
471 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
472 }
473
474 }
475 else
476 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
477 }
478
479 return cbEvicted;
480}
481
482static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
483{
484 size_t cbRemoved = 0;
485
486 if ((pCache->cbCached + cbData) < pCache->cbMax)
487 return true;
488 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
489 {
490 /* Try to evict as many bytes as possible from A1in */
491 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
492 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
493
494 /*
495 * If it was not possible to remove enough entries
496 * try the frequently accessed cache.
497 */
498 if (cbRemoved < cbData)
499 {
500 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
501
502 /*
503 * If we removed something we can't pass the reuse buffer flag anymore because
504 * we don't need to evict that much data
505 */
506 if (!cbRemoved)
507 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
508 NULL, fReuseBuffer, ppbBuffer);
509 else
510 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
511 NULL, false, NULL);
512 }
513 }
514 else
515 {
516 /* We have to remove entries from frequently access list. */
517 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
518 NULL, fReuseBuffer, ppbBuffer);
519 }
520
521 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
522 return (cbRemoved >= cbData);
523}
524
525DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
526{
527 int rc = VINF_SUCCESS;
528
529 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
530 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
531
532 ASMAtomicIncU32(&pBlkCache->cIoXfersActive);
533 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool , %d) queued (%u now active)",
534 pIoXfer, pIoXfer->fIoCache, pIoXfer->enmXferDir, pBlkCache->cIoXfersActive);
535
536 switch (pBlkCache->enmType)
537 {
538 case PDMBLKCACHETYPE_DEV:
539 {
540 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
541 pIoXfer->enmXferDir,
542 off, cbXfer,
543 &pIoXfer->SgBuf, pIoXfer);
544 break;
545 }
546 case PDMBLKCACHETYPE_DRV:
547 {
548 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
549 pIoXfer->enmXferDir,
550 off, cbXfer,
551 &pIoXfer->SgBuf, pIoXfer);
552 break;
553 }
554 case PDMBLKCACHETYPE_USB:
555 {
556 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
557 pIoXfer->enmXferDir,
558 off, cbXfer,
559 &pIoXfer->SgBuf, pIoXfer);
560 break;
561 }
562 case PDMBLKCACHETYPE_INTERNAL:
563 {
564 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
565 pIoXfer->enmXferDir,
566 off, cbXfer,
567 &pIoXfer->SgBuf, pIoXfer);
568 break;
569 }
570 default:
571 AssertMsgFailed(("Unknown block cache type!\n"));
572 }
573
574 if (RT_FAILURE(rc))
575 {
576 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: Queueing I/O req %#p failed %Rrc", pIoXfer, rc);
577 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
578 }
579
580 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
581 return rc;
582}
583
584/**
585 * Initiates a read I/O task for the given entry.
586 *
587 * @returns VBox status code.
588 * @param pEntry The entry to fetch the data to.
589 */
590static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
591{
592 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
593 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
594
595 /* Make sure no one evicts the entry while it is accessed. */
596 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
597
598 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
599 if (RT_UNLIKELY(!pIoXfer))
600 return VERR_NO_MEMORY;
601
602 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
603
604 pIoXfer->fIoCache = true;
605 pIoXfer->pEntry = pEntry;
606 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
607 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
608 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
609 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
610
611 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
612}
613
614/**
615 * Initiates a write I/O task for the given entry.
616 *
617 * @returns VBox status code.
618 * @param pEntry The entry to read the data from.
619 */
620static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
621{
622 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
623 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
624
625 /* Make sure no one evicts the entry while it is accessed. */
626 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
627
628 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
629 if (RT_UNLIKELY(!pIoXfer))
630 return VERR_NO_MEMORY;
631
632 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
633
634 pIoXfer->fIoCache = true;
635 pIoXfer->pEntry = pEntry;
636 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
637 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
638 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
639 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
640
641 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
642}
643
644/**
645 * Passthrough a part of a request directly to the I/O manager handling the
646 * endpoint.
647 *
648 * @returns VBox status code.
649 * @param pBlkCache The endpoint cache.
650 * @param pReq The request.
651 * @param pSgBuf The scatter/gather buffer.
652 * @param offStart Offset to start transfer from.
653 * @param cbData Amount of data to transfer.
654 * @param enmXferDir The transfer type (read/write)
655 */
656static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
657 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
658 PDMBLKCACHEXFERDIR enmXferDir)
659{
660
661 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
662 if (RT_UNLIKELY(!pIoXfer))
663 return VERR_NO_MEMORY;
664
665 ASMAtomicIncU32(&pReq->cXfersPending);
666 pIoXfer->fIoCache = false;
667 pIoXfer->pReq = pReq;
668 pIoXfer->enmXferDir = enmXferDir;
669 if (pSgBuf)
670 {
671 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
672 RTSgBufAdvance(pSgBuf, cbData);
673 }
674
675 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
676}
677
678/**
679 * Commit a single dirty entry to the endpoint
680 *
681 * @param pEntry The entry to commit.
682 */
683static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
684{
685 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
686 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
687 ("Invalid flags set for entry %#p\n", pEntry));
688
689 pdmBlkCacheEntryWriteToMedium(pEntry);
690}
691
692/**
693 * Commit all dirty entries for a single endpoint.
694 *
695 * @param pBlkCache The endpoint cache to commit.
696 */
697static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
698{
699 uint32_t cbCommitted = 0;
700
701 /* Return if the cache was suspended. */
702 if (pBlkCache->fSuspended)
703 return;
704
705 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
706
707 /* The list is moved to a new header to reduce locking overhead. */
708 RTLISTANCHOR ListDirtyNotCommitted;
709
710 RTSpinlockAcquire(pBlkCache->LockList);
711 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
712 RTSpinlockRelease(pBlkCache->LockList);
713
714 if (!RTListIsEmpty(&ListDirtyNotCommitted))
715 {
716 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
717
718 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
719 {
720 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
721 NodeNotCommitted);
722 pdmBlkCacheEntryCommit(pEntry);
723 cbCommitted += pEntry->cbData;
724 RTListNodeRemove(&pEntry->NodeNotCommitted);
725 pEntry = pNext;
726 }
727
728 /* Commit the last endpoint */
729 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
730 pdmBlkCacheEntryCommit(pEntry);
731 cbCommitted += pEntry->cbData;
732 RTListNodeRemove(&pEntry->NodeNotCommitted);
733 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
734 ("Committed all entries but list is not empty\n"));
735 }
736
737 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
738 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
739 ("Number of committed bytes exceeds number of dirty bytes\n"));
740 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
741
742 /* Reset the commit timer if we don't have any dirty bits. */
743 if ( !(cbDirtyOld - cbCommitted)
744 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
745 TMTimerStop(pBlkCache->pCache->pVM, pBlkCache->pCache->hTimerCommit);
746}
747
748/**
749 * Commit all dirty entries in the cache.
750 *
751 * @param pCache The global cache instance.
752 */
753static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
754{
755 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
756
757 if (!fCommitInProgress)
758 {
759 pdmBlkCacheLockEnter(pCache);
760 Assert(!RTListIsEmpty(&pCache->ListUsers));
761
762 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
763 AssertPtr(pBlkCache);
764
765 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
766 {
767 pdmBlkCacheCommit(pBlkCache);
768
769 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
770 NodeCacheUser);
771 }
772
773 /* Commit the last endpoint */
774 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
775 pdmBlkCacheCommit(pBlkCache);
776
777 pdmBlkCacheLockLeave(pCache);
778 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
779 }
780}
781
782/**
783 * Adds the given entry as a dirty to the cache.
784 *
785 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
786 * @param pBlkCache The endpoint cache the entry belongs to.
787 * @param pEntry The entry to add.
788 */
789static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
790{
791 bool fDirtyBytesExceeded = false;
792 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
793
794 /* If the commit timer is disabled we commit right away. */
795 if (pCache->u32CommitTimeoutMs == 0)
796 {
797 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
798 pdmBlkCacheEntryCommit(pEntry);
799 }
800 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
801 {
802 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
803
804 RTSpinlockAcquire(pBlkCache->LockList);
805 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
806 RTSpinlockRelease(pBlkCache->LockList);
807
808 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
809
810 /* Prevent committing if the VM was suspended. */
811 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
812 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
813 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
814 {
815 /* Arm the commit timer. */
816 TMTimerSetMillies(pCache->pVM, pCache->hTimerCommit, pCache->u32CommitTimeoutMs);
817 }
818 }
819
820 return fDirtyBytesExceeded;
821}
822
823static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
824{
825 bool fFound = false;
826
827 PPDMBLKCACHE pBlkCache;
828 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
829 {
830 if (!RTStrCmp(pBlkCache->pszId, pcszId))
831 {
832 fFound = true;
833 break;
834 }
835 }
836
837 return fFound ? pBlkCache : NULL;
838}
839
840/**
841 * @callback_method_impl{FNTMTIMERINT, Commit timer callback.}
842 */
843static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, TMTIMERHANDLE hTimer, void *pvUser)
844{
845 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
846 RT_NOREF(pVM, hTimer);
847
848 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
849
850 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
851 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
852 pdmBlkCacheCommitDirtyEntries(pCache);
853
854 LogFlowFunc(("Entries committed, going to sleep\n"));
855}
856
857static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
858{
859 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
860
861 AssertPtr(pBlkCacheGlobal);
862
863 pdmBlkCacheLockEnter(pBlkCacheGlobal);
864
865 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
866
867 /* Go through the list and save all dirty entries. */
868 PPDMBLKCACHE pBlkCache;
869 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
870 {
871 uint32_t cEntries = 0;
872 PPDMBLKCACHEENTRY pEntry;
873
874 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
875 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
876 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
877
878 /* Count the number of entries to safe. */
879 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
880 {
881 cEntries++;
882 }
883
884 SSMR3PutU32(pSSM, cEntries);
885
886 /* Walk the list of all dirty entries and save them. */
887 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
888 {
889 /* A few sanity checks. */
890 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
891 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
892 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
893 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
894 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
895 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
896 ("Invalid list\n"));
897 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
898 ("Size and range do not match\n"));
899
900 /* Save */
901 SSMR3PutU64(pSSM, pEntry->Core.Key);
902 SSMR3PutU32(pSSM, pEntry->cbData);
903 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
904 }
905
906 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
907 }
908
909 pdmBlkCacheLockLeave(pBlkCacheGlobal);
910
911 /* Terminator */
912 return SSMR3PutU32(pSSM, UINT32_MAX);
913}
914
915static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
916{
917 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
918 uint32_t cRefs;
919
920 NOREF(uPass);
921 AssertPtr(pBlkCacheGlobal);
922
923 pdmBlkCacheLockEnter(pBlkCacheGlobal);
924
925 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
926 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
927
928 SSMR3GetU32(pSSM, &cRefs);
929
930 /*
931 * Fewer users in the saved state than in the current VM are allowed
932 * because that means that there are only new ones which don't have any saved state
933 * which can get lost.
934 * More saved state entries than registered cache users are only allowed if the
935 * missing users don't have any data saved in the cache.
936 */
937 int rc = VINF_SUCCESS;
938 char *pszId = NULL;
939
940 while ( cRefs > 0
941 && RT_SUCCESS(rc))
942 {
943 PPDMBLKCACHE pBlkCache = NULL;
944 uint32_t cbId = 0;
945
946 SSMR3GetU32(pSSM, &cbId);
947 Assert(cbId > 0);
948
949 cbId++; /* Include terminator */
950 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
951 if (!pszId)
952 {
953 rc = VERR_NO_MEMORY;
954 break;
955 }
956
957 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
958 AssertRC(rc);
959
960 /* Search for the block cache with the provided id. */
961 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
962
963 /* Get the entries */
964 uint32_t cEntries;
965 SSMR3GetU32(pSSM, &cEntries);
966
967 if (!pBlkCache && (cEntries > 0))
968 {
969 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
970 N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations"));
971 break;
972 }
973
974 RTMemFree(pszId);
975 pszId = NULL;
976
977 while (cEntries > 0)
978 {
979 PPDMBLKCACHEENTRY pEntry;
980 uint64_t off;
981 uint32_t cbEntry;
982
983 SSMR3GetU64(pSSM, &off);
984 SSMR3GetU32(pSSM, &cbEntry);
985
986 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
987 if (!pEntry)
988 {
989 rc = VERR_NO_MEMORY;
990 break;
991 }
992
993 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
994 if (RT_FAILURE(rc))
995 {
996 RTMemFree(pEntry->pbData);
997 RTMemFree(pEntry);
998 break;
999 }
1000
1001 /* Insert into the tree. */
1002 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1003 Assert(fInserted); NOREF(fInserted);
1004
1005 /* Add to the dirty list. */
1006 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
1007 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
1008 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
1009 pdmBlkCacheEntryRelease(pEntry);
1010 cEntries--;
1011 }
1012
1013 cRefs--;
1014 }
1015
1016 if (pszId)
1017 RTMemFree(pszId);
1018
1019 if (cRefs && RT_SUCCESS(rc))
1020 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
1021 N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations"));
1022
1023 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1024
1025 if (RT_SUCCESS(rc))
1026 {
1027 uint32_t u32 = 0;
1028 rc = SSMR3GetU32(pSSM, &u32);
1029 if (RT_SUCCESS(rc))
1030 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
1031 }
1032
1033 return rc;
1034}
1035
1036int pdmR3BlkCacheInit(PVM pVM)
1037{
1038 int rc = VINF_SUCCESS;
1039 PUVM pUVM = pVM->pUVM;
1040 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
1041
1042 LogFlowFunc((": pVM=%p\n", pVM));
1043
1044 VM_ASSERT_EMT(pVM);
1045
1046 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
1047 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1048
1049 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1050 if (!pBlkCacheGlobal)
1051 return VERR_NO_MEMORY;
1052
1053 RTListInit(&pBlkCacheGlobal->ListUsers);
1054 pBlkCacheGlobal->pVM = pVM;
1055 pBlkCacheGlobal->cRefs = 0;
1056 pBlkCacheGlobal->cbCached = 0;
1057 pBlkCacheGlobal->fCommitInProgress = false;
1058
1059 /* Initialize members */
1060 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1061 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1062 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1063
1064 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1065 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1066 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1067
1068 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1069 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1070 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1071
1072 do
1073 {
1074 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1075 AssertLogRelRCBreak(rc);
1076 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1077
1078 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1079 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1080 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1081 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1082
1083 /** @todo r=aeichner: Experiment to find optimal default values */
1084 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1085 AssertLogRelRCBreak(rc);
1086 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1087 AssertLogRelRCBreak(rc);
1088 } while (0);
1089
1090 if (RT_SUCCESS(rc))
1091 {
1092 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1093 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1094 "/PDM/BlkCache/cbMax",
1095 STAMUNIT_BYTES,
1096 "Maximum cache size");
1097 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1098 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1099 "/PDM/BlkCache/cbCached",
1100 STAMUNIT_BYTES,
1101 "Currently used cache");
1102 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1103 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1104 "/PDM/BlkCache/cbCachedMruIn",
1105 STAMUNIT_BYTES,
1106 "Number of bytes cached in MRU list");
1107 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1108 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1109 "/PDM/BlkCache/cbCachedMruOut",
1110 STAMUNIT_BYTES,
1111 "Number of bytes cached in FRU list");
1112 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1113 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1114 "/PDM/BlkCache/cbCachedFru",
1115 STAMUNIT_BYTES,
1116 "Number of bytes cached in FRU ghost list");
1117
1118#ifdef VBOX_WITH_STATISTICS
1119 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1120 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1121 "/PDM/BlkCache/CacheHits",
1122 STAMUNIT_COUNT, "Number of hits in the cache");
1123 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1124 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1125 "/PDM/BlkCache/CachePartialHits",
1126 STAMUNIT_COUNT, "Number of partial hits in the cache");
1127 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1128 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1129 "/PDM/BlkCache/CacheMisses",
1130 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1131 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1132 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1133 "/PDM/BlkCache/CacheRead",
1134 STAMUNIT_BYTES, "Number of bytes read from the cache");
1135 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1136 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1137 "/PDM/BlkCache/CacheWritten",
1138 STAMUNIT_BYTES, "Number of bytes written to the cache");
1139 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1140 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1141 "/PDM/BlkCache/CacheTreeGet",
1142 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1143 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1144 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1145 "/PDM/BlkCache/CacheTreeInsert",
1146 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1147 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1148 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1149 "/PDM/BlkCache/CacheTreeRemove",
1150 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1151 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1152 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1153 "/PDM/BlkCache/CacheBuffersReused",
1154 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1155#endif
1156
1157 /* Initialize the critical section */
1158 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1159 }
1160
1161 if (RT_SUCCESS(rc))
1162 {
1163 /* Create the commit timer */
1164 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1165 rc = TMR3TimerCreate(pVM, TMCLOCK_REAL, pdmBlkCacheCommitTimerCallback, pBlkCacheGlobal,
1166 TMTIMER_FLAGS_NO_RING0, "BlkCache-Commit", &pBlkCacheGlobal->hTimerCommit);
1167
1168 if (RT_SUCCESS(rc))
1169 {
1170 /* Register saved state handler. */
1171 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1172 NULL, NULL, NULL,
1173 NULL, pdmR3BlkCacheSaveExec, NULL,
1174 NULL, pdmR3BlkCacheLoadExec, NULL);
1175 if (RT_SUCCESS(rc))
1176 {
1177 LogRel(("BlkCache: Cache successfully initialized. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1178 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1179 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1180 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1181 return VINF_SUCCESS;
1182 }
1183 }
1184
1185 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1186 }
1187
1188 if (pBlkCacheGlobal)
1189 RTMemFree(pBlkCacheGlobal);
1190
1191 LogFlowFunc((": returns rc=%Rrc\n", rc));
1192 return rc;
1193}
1194
1195void pdmR3BlkCacheTerm(PVM pVM)
1196{
1197 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1198
1199 if (pBlkCacheGlobal)
1200 {
1201 /* Make sure no one else uses the cache now */
1202 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1203
1204 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1205 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1206 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1207 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1208
1209 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1210
1211 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1212 RTMemFree(pBlkCacheGlobal);
1213 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1214 }
1215}
1216
1217int pdmR3BlkCacheResume(PVM pVM)
1218{
1219 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1220
1221 LogFlowFunc(("pVM=%#p\n", pVM));
1222
1223 if ( pBlkCacheGlobal
1224 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1225 {
1226 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1227 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1228 }
1229
1230 return VINF_SUCCESS;
1231}
1232
1233static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1234{
1235 int rc = VINF_SUCCESS;
1236 PPDMBLKCACHE pBlkCache = NULL;
1237 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1238
1239 if (!pBlkCacheGlobal)
1240 return VERR_NOT_SUPPORTED;
1241
1242 /*
1243 * Check that no other user cache has the same id first,
1244 * Unique id's are necessary in case the state is saved.
1245 */
1246 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1247
1248 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1249
1250 if (!pBlkCache)
1251 {
1252 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1253
1254 if (pBlkCache)
1255 pBlkCache->pszId = RTStrDup(pcszId);
1256
1257 if ( pBlkCache
1258 && pBlkCache->pszId)
1259 {
1260 pBlkCache->fSuspended = false;
1261 pBlkCache->cIoXfersActive = 0;
1262 pBlkCache->pCache = pBlkCacheGlobal;
1263 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1264
1265 rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain");
1266 if (RT_SUCCESS(rc))
1267 {
1268 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1269 if (RT_SUCCESS(rc))
1270 {
1271 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1272 if (pBlkCache->pTree)
1273 {
1274#ifdef VBOX_WITH_STATISTICS
1275 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1276 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1277 STAMUNIT_COUNT, "Number of deferred writes",
1278 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1279#endif
1280
1281 /* Add to the list of users. */
1282 pBlkCacheGlobal->cRefs++;
1283 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1284 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1285
1286 *ppBlkCache = pBlkCache;
1287 LogFlowFunc(("returns success\n"));
1288 return VINF_SUCCESS;
1289 }
1290
1291 rc = VERR_NO_MEMORY;
1292 RTSemRWDestroy(pBlkCache->SemRWEntries);
1293 }
1294
1295 RTSpinlockDestroy(pBlkCache->LockList);
1296 }
1297
1298 RTStrFree(pBlkCache->pszId);
1299 }
1300 else
1301 rc = VERR_NO_MEMORY;
1302
1303 if (pBlkCache)
1304 RTMemFree(pBlkCache);
1305 }
1306 else
1307 rc = VERR_ALREADY_EXISTS;
1308
1309 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1310
1311 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1312 return rc;
1313}
1314
1315VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1316 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1317 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1318 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1319 const char *pcszId)
1320{
1321 int rc = VINF_SUCCESS;
1322 PPDMBLKCACHE pBlkCache;
1323
1324 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1325 if (RT_SUCCESS(rc))
1326 {
1327 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1328 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1329 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1330 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1331 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1332 *ppBlkCache = pBlkCache;
1333 }
1334
1335 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1336 return rc;
1337}
1338
1339VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1340 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1341 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1342 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1343 const char *pcszId)
1344{
1345 int rc = VINF_SUCCESS;
1346 PPDMBLKCACHE pBlkCache;
1347
1348 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1349 if (RT_SUCCESS(rc))
1350 {
1351 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1352 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1353 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1354 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1355 pBlkCache->u.Dev.pDevIns = pDevIns;
1356 *ppBlkCache = pBlkCache;
1357 }
1358
1359 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1360 return rc;
1361
1362}
1363
1364VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1365 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1366 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1367 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1368 const char *pcszId)
1369{
1370 int rc = VINF_SUCCESS;
1371 PPDMBLKCACHE pBlkCache;
1372
1373 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1374 if (RT_SUCCESS(rc))
1375 {
1376 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1377 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1378 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1379 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1380 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1381 *ppBlkCache = pBlkCache;
1382 }
1383
1384 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1385 return rc;
1386
1387}
1388
1389VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1390 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1391 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1392 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1393 const char *pcszId)
1394{
1395 int rc = VINF_SUCCESS;
1396 PPDMBLKCACHE pBlkCache;
1397
1398 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1399 if (RT_SUCCESS(rc))
1400 {
1401 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1402 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1403 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1404 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1405 pBlkCache->u.Int.pvUser = pvUser;
1406 *ppBlkCache = pBlkCache;
1407 }
1408
1409 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1410 return rc;
1411
1412}
1413
1414/**
1415 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1416 *
1417 * @returns IPRT status code.
1418 * @param pNode The node to destroy.
1419 * @param pvUser Opaque user data.
1420 */
1421static DECLCALLBACK(int) pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1422{
1423 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1424 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1425 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1426
1427 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1428 {
1429 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1430 pdmBlkCacheEntryRef(pEntry);
1431 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1432 pdmBlkCacheLockLeave(pCache);
1433
1434 RTThreadSleep(250);
1435
1436 /* Re-enter all locks */
1437 pdmBlkCacheLockEnter(pCache);
1438 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1439 pdmBlkCacheEntryRelease(pEntry);
1440 }
1441
1442 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1443 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1444
1445 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1446 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1447
1448 pdmBlkCacheEntryRemoveFromList(pEntry);
1449
1450 if (fUpdateCache)
1451 pdmBlkCacheSub(pCache, pEntry->cbData);
1452
1453 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1454 RTMemFree(pEntry);
1455
1456 return VINF_SUCCESS;
1457}
1458
1459VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1460{
1461 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1462
1463 /*
1464 * Commit all dirty entries now (they are waited on for completion during the
1465 * destruction of the AVL tree below).
1466 * The exception is if the VM was paused because of an I/O error before.
1467 */
1468 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1469 pdmBlkCacheCommit(pBlkCache);
1470
1471 /* Make sure nobody is accessing the cache while we delete the tree. */
1472 pdmBlkCacheLockEnter(pCache);
1473 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1474 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1475 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1476
1477 RTSpinlockDestroy(pBlkCache->LockList);
1478
1479 pCache->cRefs--;
1480 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1481
1482 pdmBlkCacheLockLeave(pCache);
1483
1484 RTMemFree(pBlkCache->pTree);
1485 pBlkCache->pTree = NULL;
1486 RTSemRWDestroy(pBlkCache->SemRWEntries);
1487
1488#ifdef VBOX_WITH_STATISTICS
1489 STAMR3DeregisterF(pCache->pVM->pUVM, "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1490#endif
1491
1492 RTStrFree(pBlkCache->pszId);
1493 RTMemFree(pBlkCache);
1494}
1495
1496VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1497{
1498 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1499
1500 /*
1501 * Validate input.
1502 */
1503 if (!pDevIns)
1504 return;
1505 VM_ASSERT_EMT(pVM);
1506
1507 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1508 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1509
1510 /* Return silently if not supported. */
1511 if (!pBlkCacheGlobal)
1512 return;
1513
1514 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1515
1516 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1517 {
1518 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1519 && pBlkCache->u.Dev.pDevIns == pDevIns)
1520 PDMR3BlkCacheRelease(pBlkCache);
1521 }
1522
1523 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1524}
1525
1526VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1527{
1528 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1529
1530 /*
1531 * Validate input.
1532 */
1533 if (!pDrvIns)
1534 return;
1535 VM_ASSERT_EMT(pVM);
1536
1537 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1538 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1539
1540 /* Return silently if not supported. */
1541 if (!pBlkCacheGlobal)
1542 return;
1543
1544 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1545
1546 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1547 {
1548 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1549 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1550 PDMR3BlkCacheRelease(pBlkCache);
1551 }
1552
1553 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1554}
1555
1556VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1557{
1558 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1559
1560 /*
1561 * Validate input.
1562 */
1563 if (!pUsbIns)
1564 return;
1565 VM_ASSERT_EMT(pVM);
1566
1567 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1568 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1569
1570 /* Return silently if not supported. */
1571 if (!pBlkCacheGlobal)
1572 return;
1573
1574 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1575
1576 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1577 {
1578 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1579 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1580 PDMR3BlkCacheRelease(pBlkCache);
1581 }
1582
1583 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1584}
1585
1586static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1587{
1588 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1589
1590 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1591 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1592 if (pEntry)
1593 pdmBlkCacheEntryRef(pEntry);
1594 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1595
1596 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1597
1598 return pEntry;
1599}
1600
1601/**
1602 * Return the best fit cache entries for the given offset.
1603 *
1604 * @param pBlkCache The endpoint cache.
1605 * @param off The offset.
1606 * @param ppEntryAbove Where to store the pointer to the best fit entry above
1607 * the given offset. NULL if not required.
1608 */
1609static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEENTRY *ppEntryAbove)
1610{
1611 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1612
1613 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1614 if (ppEntryAbove)
1615 {
1616 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1617 if (*ppEntryAbove)
1618 pdmBlkCacheEntryRef(*ppEntryAbove);
1619 }
1620
1621 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1622
1623 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1624}
1625
1626static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1627{
1628 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1629 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1630 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1631 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1632 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1633 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1634}
1635
1636/**
1637 * Allocates and initializes a new entry for the cache.
1638 * The entry has a reference count of 1.
1639 *
1640 * @returns Pointer to the new cache entry or NULL if out of memory.
1641 * @param pBlkCache The cache the entry belongs to.
1642 * @param off Start offset.
1643 * @param cbData Size of the cache entry.
1644 * @param pbBuffer Pointer to the buffer to use.
1645 * NULL if a new buffer should be allocated.
1646 * The buffer needs to have the same size of the entry.
1647 */
1648static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbData, uint8_t *pbBuffer)
1649{
1650 AssertReturn(cbData <= UINT32_MAX, NULL);
1651 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1652
1653 if (RT_UNLIKELY(!pEntryNew))
1654 return NULL;
1655
1656 pEntryNew->Core.Key = off;
1657 pEntryNew->Core.KeyLast = off + cbData - 1;
1658 pEntryNew->pBlkCache = pBlkCache;
1659 pEntryNew->fFlags = 0;
1660 pEntryNew->cRefs = 1; /* We are using it now. */
1661 pEntryNew->pList = NULL;
1662 pEntryNew->cbData = (uint32_t)cbData;
1663 pEntryNew->pWaitingHead = NULL;
1664 pEntryNew->pWaitingTail = NULL;
1665 if (pbBuffer)
1666 pEntryNew->pbData = pbBuffer;
1667 else
1668 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1669
1670 if (RT_UNLIKELY(!pEntryNew->pbData))
1671 {
1672 RTMemFree(pEntryNew);
1673 return NULL;
1674 }
1675
1676 return pEntryNew;
1677}
1678
1679/**
1680 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1681 * in exclusive mode.
1682 *
1683 * @returns true if the flag in fSet is set and the one in fClear is clear.
1684 * false otherwise.
1685 * The R/W semaphore is only held if true is returned.
1686 *
1687 * @param pBlkCache The endpoint cache instance data.
1688 * @param pEntry The entry to check the flags for.
1689 * @param fSet The flag which is tested to be set.
1690 * @param fClear The flag which is tested to be clear.
1691 */
1692DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1693 PPDMBLKCACHEENTRY pEntry,
1694 uint32_t fSet, uint32_t fClear)
1695{
1696 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1697 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1698
1699 if (fPassed)
1700 {
1701 /* Acquire the lock and check again because the completion callback might have raced us. */
1702 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1703
1704 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1705 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1706
1707 /* Drop the lock if we didn't passed the test. */
1708 if (!fPassed)
1709 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1710 }
1711
1712 return fPassed;
1713}
1714
1715/**
1716 * Adds a segment to the waiting list for a cache entry
1717 * which is currently in progress.
1718 *
1719 * @param pEntry The cache entry to add the segment to.
1720 * @param pWaiter The waiter entry to add.
1721 */
1722DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEWAITER pWaiter)
1723{
1724 pWaiter->pNext = NULL;
1725
1726 if (pEntry->pWaitingHead)
1727 {
1728 AssertPtr(pEntry->pWaitingTail);
1729
1730 pEntry->pWaitingTail->pNext = pWaiter;
1731 pEntry->pWaitingTail = pWaiter;
1732 }
1733 else
1734 {
1735 Assert(!pEntry->pWaitingTail);
1736
1737 pEntry->pWaitingHead = pWaiter;
1738 pEntry->pWaitingTail = pWaiter;
1739 }
1740}
1741
1742/**
1743 * Add a buffer described by the I/O memory context
1744 * to the entry waiting for completion.
1745 *
1746 * @returns VBox status code.
1747 * @param pEntry The entry to add the buffer to.
1748 * @param pReq The request.
1749 * @param pSgBuf The scatter/gather buffer. Will be advanced by cbData.
1750 * @param offDiff Offset from the start of the buffer in the entry.
1751 * @param cbData Amount of data to wait for onthis entry.
1752 * @param fWrite Flag whether the task waits because it wants to write to
1753 * the cache entry.
1754 */
1755static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEREQ pReq,
1756 PRTSGBUF pSgBuf, uint64_t offDiff, size_t cbData, bool fWrite)
1757{
1758 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1759 if (!pWaiter)
1760 return VERR_NO_MEMORY;
1761
1762 ASMAtomicIncU32(&pReq->cXfersPending);
1763 pWaiter->pReq = pReq;
1764 pWaiter->offCacheEntry = offDiff;
1765 pWaiter->cbTransfer = cbData;
1766 pWaiter->fWrite = fWrite;
1767 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1768 RTSgBufAdvance(pSgBuf, cbData);
1769
1770 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1771
1772 return VINF_SUCCESS;
1773}
1774
1775/**
1776 * Calculate aligned offset and size for a new cache entry which do not
1777 * intersect with an already existing entry and the file end.
1778 *
1779 * @returns The number of bytes the entry can hold of the requested amount
1780 * of bytes.
1781 * @param pBlkCache The endpoint cache.
1782 * @param off The start offset.
1783 * @param cb The number of bytes the entry needs to hold at
1784 * least.
1785 * @param pcbEntry Where to store the number of bytes the entry can hold.
1786 * Can be less than given because of other entries.
1787 */
1788static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1789 uint64_t off, uint32_t cb,
1790 uint32_t *pcbEntry)
1791{
1792 /* Get the best fit entries around the offset */
1793 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1794 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1795
1796 /* Log the info */
1797 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1798 pEntryAbove ? "B" : "No b",
1799 off,
1800 pEntryAbove ? pEntryAbove->Core.Key : 0,
1801 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1802 pEntryAbove ? pEntryAbove->cbData : 0));
1803
1804 uint32_t cbNext;
1805 uint32_t cbInEntry;
1806 if ( pEntryAbove
1807 && off + cb > pEntryAbove->Core.Key)
1808 {
1809 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1810 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1811 }
1812 else
1813 {
1814 cbInEntry = cb;
1815 cbNext = cb;
1816 }
1817
1818 /* A few sanity checks */
1819 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1820 ("Aligned size intersects with another cache entry\n"));
1821 Assert(cbInEntry <= cbNext);
1822
1823 if (pEntryAbove)
1824 pdmBlkCacheEntryRelease(pEntryAbove);
1825
1826 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1827
1828 *pcbEntry = cbNext;
1829
1830 return cbInEntry;
1831}
1832
1833/**
1834 * Create a new cache entry evicting data from the cache if required.
1835 *
1836 * @returns Pointer to the new cache entry or NULL
1837 * if not enough bytes could be evicted from the cache.
1838 * @param pBlkCache The endpoint cache.
1839 * @param off The offset.
1840 * @param cb Number of bytes the cache entry should have.
1841 * @param pcbData Where to store the number of bytes the new
1842 * entry can hold. May be lower than actually
1843 * requested due to another entry intersecting the
1844 * access range.
1845 */
1846static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cb, size_t *pcbData)
1847{
1848 uint32_t cbEntry = 0;
1849
1850 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1851 AssertReturn(cb <= UINT32_MAX, NULL);
1852
1853 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1854 pdmBlkCacheLockEnter(pCache);
1855
1856 PPDMBLKCACHEENTRY pEntryNew = NULL;
1857 uint8_t *pbBuffer = NULL;
1858 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1859 if (fEnough)
1860 {
1861 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1862
1863 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1864 if (RT_LIKELY(pEntryNew))
1865 {
1866 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1867 pdmBlkCacheAdd(pCache, cbEntry);
1868 pdmBlkCacheLockLeave(pCache);
1869
1870 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1871
1872 AssertMsg( (off >= pEntryNew->Core.Key)
1873 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1874 ("Overflow in calculation off=%llu\n", off));
1875 }
1876 else
1877 pdmBlkCacheLockLeave(pCache);
1878 }
1879 else
1880 pdmBlkCacheLockLeave(pCache);
1881
1882 return pEntryNew;
1883}
1884
1885static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1886{
1887 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1888
1889 if (RT_LIKELY(pReq))
1890 {
1891 pReq->pvUser = pvUser;
1892 pReq->rcReq = VINF_SUCCESS;
1893 pReq->cXfersPending = 0;
1894 }
1895
1896 return pReq;
1897}
1898
1899static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1900{
1901 switch (pBlkCache->enmType)
1902 {
1903 case PDMBLKCACHETYPE_DEV:
1904 {
1905 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1906 pReq->pvUser, pReq->rcReq);
1907 break;
1908 }
1909 case PDMBLKCACHETYPE_DRV:
1910 {
1911 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1912 pReq->pvUser, pReq->rcReq);
1913 break;
1914 }
1915 case PDMBLKCACHETYPE_USB:
1916 {
1917 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1918 pReq->pvUser, pReq->rcReq);
1919 break;
1920 }
1921 case PDMBLKCACHETYPE_INTERNAL:
1922 {
1923 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1924 pReq->pvUser, pReq->rcReq);
1925 break;
1926 }
1927 default:
1928 AssertMsgFailed(("Unknown block cache type!\n"));
1929 }
1930
1931 RTMemFree(pReq);
1932}
1933
1934static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1935 int rcReq, bool fCallHandler)
1936{
1937 if (RT_FAILURE(rcReq))
1938 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1939
1940 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1941 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1942
1943 if (!cXfersPending)
1944 {
1945 if (fCallHandler)
1946 pdmBlkCacheReqComplete(pBlkCache, pReq);
1947 return true;
1948 }
1949
1950 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1951 return false;
1952}
1953
1954VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1955 PCRTSGBUF pSgBuf, size_t cbRead, void *pvUser)
1956{
1957 int rc = VINF_SUCCESS;
1958 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1959 PPDMBLKCACHEENTRY pEntry;
1960 PPDMBLKCACHEREQ pReq;
1961
1962 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbRead=%u pvUser=%#p\n",
1963 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbRead, pvUser));
1964
1965 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1966 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1967
1968 RTSGBUF SgBuf;
1969 RTSgBufClone(&SgBuf, pSgBuf);
1970
1971 /* Allocate new request structure. */
1972 pReq = pdmBlkCacheReqAlloc(pvUser);
1973 if (RT_UNLIKELY(!pReq))
1974 return VERR_NO_MEMORY;
1975
1976 /* Increment data transfer counter to keep the request valid while we access it. */
1977 ASMAtomicIncU32(&pReq->cXfersPending);
1978
1979 while (cbRead)
1980 {
1981 size_t cbToRead;
1982
1983 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1984
1985 /*
1986 * If there is no entry we try to create a new one eviciting unused pages
1987 * if the cache is full. If this is not possible we will pass the request through
1988 * and skip the caching (all entries may be still in progress so they can't
1989 * be evicted)
1990 * If we have an entry it can be in one of the LRU lists where the entry
1991 * contains data (recently used or frequently used LRU) so we can just read
1992 * the data we need and put the entry at the head of the frequently used LRU list.
1993 * In case the entry is in one of the ghost lists it doesn't contain any data.
1994 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1995 */
1996 if (pEntry)
1997 {
1998 uint64_t offDiff = off - pEntry->Core.Key;
1999
2000 AssertMsg(off >= pEntry->Core.Key,
2001 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2002 off, pEntry->Core.Key));
2003
2004 AssertPtr(pEntry->pList);
2005
2006 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
2007
2008 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
2009 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
2010 off, cbToRead));
2011
2012 cbRead -= cbToRead;
2013
2014 if (!cbRead)
2015 STAM_COUNTER_INC(&pCache->cHits);
2016 else
2017 STAM_COUNTER_INC(&pCache->cPartialHits);
2018
2019 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
2020
2021 /* Ghost lists contain no data. */
2022 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2023 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2024 {
2025 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2026 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2027 PDMBLKCACHE_ENTRY_IS_DIRTY))
2028 {
2029 /* Entry didn't completed yet. Append to the list */
2030 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2031 &SgBuf, offDiff, cbToRead,
2032 false /* fWrite */);
2033 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2034 }
2035 else
2036 {
2037 /* Read as much as we can from the entry. */
2038 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2039 }
2040
2041 /* Move this entry to the top position */
2042 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2043 {
2044 pdmBlkCacheLockEnter(pCache);
2045 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2046 pdmBlkCacheLockLeave(pCache);
2047 }
2048 /* Release the entry */
2049 pdmBlkCacheEntryRelease(pEntry);
2050 }
2051 else
2052 {
2053 uint8_t *pbBuffer = NULL;
2054
2055 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2056
2057 pdmBlkCacheLockEnter(pCache);
2058 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2059 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2060
2061 /* Move the entry to Am and fetch it to the cache. */
2062 if (fEnough)
2063 {
2064 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2065 pdmBlkCacheAdd(pCache, pEntry->cbData);
2066 pdmBlkCacheLockLeave(pCache);
2067
2068 if (pbBuffer)
2069 pEntry->pbData = pbBuffer;
2070 else
2071 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2072 AssertPtr(pEntry->pbData);
2073
2074 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2075 &SgBuf, offDiff, cbToRead,
2076 false /* fWrite */);
2077 pdmBlkCacheEntryReadFromMedium(pEntry);
2078 /* Release the entry */
2079 pdmBlkCacheEntryRelease(pEntry);
2080 }
2081 else
2082 {
2083 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2084 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2085 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2086 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2087 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2088
2089 pdmBlkCacheLockLeave(pCache);
2090
2091 RTMemFree(pEntry);
2092
2093 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2094 &SgBuf, off, cbToRead,
2095 PDMBLKCACHEXFERDIR_READ);
2096 }
2097 }
2098 }
2099 else
2100 {
2101#ifdef VBOX_WITH_IO_READ_CACHE
2102 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2103 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2104 off, cbRead,
2105 &cbToRead);
2106
2107 cbRead -= cbToRead;
2108
2109 if (pEntryNew)
2110 {
2111 if (!cbRead)
2112 STAM_COUNTER_INC(&pCache->cMisses);
2113 else
2114 STAM_COUNTER_INC(&pCache->cPartialHits);
2115
2116 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2117 &SgBuf,
2118 off - pEntryNew->Core.Key,
2119 cbToRead,
2120 false /* fWrite */);
2121 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2122 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2123 }
2124 else
2125 {
2126 /*
2127 * There is not enough free space in the cache.
2128 * Pass the request directly to the I/O manager.
2129 */
2130 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2131
2132 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2133 &SgBuf, off, cbToRead,
2134 PDMBLKCACHEXFERDIR_READ);
2135 }
2136#else
2137 /* Clip read size if necessary. */
2138 PPDMBLKCACHEENTRY pEntryAbove;
2139 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2140
2141 if (pEntryAbove)
2142 {
2143 if (off + cbRead > pEntryAbove->Core.Key)
2144 cbToRead = pEntryAbove->Core.Key - off;
2145 else
2146 cbToRead = cbRead;
2147
2148 pdmBlkCacheEntryRelease(pEntryAbove);
2149 }
2150 else
2151 cbToRead = cbRead;
2152
2153 cbRead -= cbToRead;
2154 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2155 &SgBuf, off, cbToRead,
2156 PDMBLKCACHEXFERDIR_READ);
2157#endif
2158 }
2159 off += cbToRead;
2160 }
2161
2162 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2163 rc = VINF_AIO_TASK_PENDING;
2164 else
2165 {
2166 rc = pReq->rcReq;
2167 RTMemFree(pReq);
2168 }
2169
2170 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2171
2172 return rc;
2173}
2174
2175VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off, PCRTSGBUF pSgBuf, size_t cbWrite, void *pvUser)
2176{
2177 int rc = VINF_SUCCESS;
2178 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2179 PPDMBLKCACHEENTRY pEntry;
2180 PPDMBLKCACHEREQ pReq;
2181
2182 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2183 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbWrite, pvUser));
2184
2185 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2186 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2187
2188 RTSGBUF SgBuf;
2189 RTSgBufClone(&SgBuf, pSgBuf);
2190
2191 /* Allocate new request structure. */
2192 pReq = pdmBlkCacheReqAlloc(pvUser);
2193 if (RT_UNLIKELY(!pReq))
2194 return VERR_NO_MEMORY;
2195
2196 /* Increment data transfer counter to keep the request valid while we access it. */
2197 ASMAtomicIncU32(&pReq->cXfersPending);
2198
2199 while (cbWrite)
2200 {
2201 size_t cbToWrite;
2202
2203 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2204 if (pEntry)
2205 {
2206 /* Write the data into the entry and mark it as dirty */
2207 AssertPtr(pEntry->pList);
2208
2209 uint64_t offDiff = off - pEntry->Core.Key;
2210 AssertMsg(off >= pEntry->Core.Key, ("Overflow in calculation off=%llu OffsetAligned=%llu\n", off, pEntry->Core.Key));
2211
2212 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2213 cbWrite -= cbToWrite;
2214
2215 if (!cbWrite)
2216 STAM_COUNTER_INC(&pCache->cHits);
2217 else
2218 STAM_COUNTER_INC(&pCache->cPartialHits);
2219
2220 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2221
2222 /* Ghost lists contain no data. */
2223 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2224 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2225 {
2226 /* Check if the entry is dirty. */
2227 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2228 PDMBLKCACHE_ENTRY_IS_DIRTY,
2229 0))
2230 {
2231 /* If it is already dirty but not in progress just update the data. */
2232 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2233 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2234 else
2235 {
2236 /* The data isn't written to the file yet */
2237 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2238 &SgBuf, offDiff, cbToWrite,
2239 true /* fWrite */);
2240 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2241 }
2242
2243 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2244 }
2245 else /* Dirty bit not set */
2246 {
2247 /*
2248 * Check if a read is in progress for this entry.
2249 * We have to defer processing in that case.
2250 */
2251 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2252 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2253 0))
2254 {
2255 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2256 &SgBuf, offDiff, cbToWrite,
2257 true /* fWrite */);
2258 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2259 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2260 }
2261 else /* I/O in progress flag not set */
2262 {
2263 /* Write as much as we can into the entry and update the file. */
2264 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2265
2266 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2267 if (fCommit)
2268 pdmBlkCacheCommitDirtyEntries(pCache);
2269 }
2270 } /* Dirty bit not set */
2271
2272 /* Move this entry to the top position */
2273 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2274 {
2275 pdmBlkCacheLockEnter(pCache);
2276 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2277 pdmBlkCacheLockLeave(pCache);
2278 }
2279
2280 pdmBlkCacheEntryRelease(pEntry);
2281 }
2282 else /* Entry is on the ghost list */
2283 {
2284 uint8_t *pbBuffer = NULL;
2285
2286 pdmBlkCacheLockEnter(pCache);
2287 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2288 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2289
2290 if (fEnough)
2291 {
2292 /* Move the entry to Am and fetch it to the cache. */
2293 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2294 pdmBlkCacheAdd(pCache, pEntry->cbData);
2295 pdmBlkCacheLockLeave(pCache);
2296
2297 if (pbBuffer)
2298 pEntry->pbData = pbBuffer;
2299 else
2300 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2301 AssertPtr(pEntry->pbData);
2302
2303 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2304 &SgBuf, offDiff, cbToWrite,
2305 true /* fWrite */);
2306 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2307 pdmBlkCacheEntryReadFromMedium(pEntry);
2308
2309 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2310 pdmBlkCacheEntryRelease(pEntry);
2311 }
2312 else
2313 {
2314 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2315 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2316 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2317 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2318 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2319
2320 pdmBlkCacheLockLeave(pCache);
2321
2322 RTMemFree(pEntry);
2323 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2324 &SgBuf, off, cbToWrite,
2325 PDMBLKCACHEXFERDIR_WRITE);
2326 }
2327 }
2328 }
2329 else /* No entry found */
2330 {
2331 /*
2332 * No entry found. Try to create a new cache entry to store the data in and if that fails
2333 * write directly to the file.
2334 */
2335 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2336 off, cbWrite,
2337 &cbToWrite);
2338
2339 cbWrite -= cbToWrite;
2340
2341 if (pEntryNew)
2342 {
2343 uint64_t offDiff = off - pEntryNew->Core.Key;
2344
2345 STAM_COUNTER_INC(&pCache->cHits);
2346
2347 /*
2348 * Check if it is possible to just write the data without waiting
2349 * for it to get fetched first.
2350 */
2351 if (!offDiff && pEntryNew->cbData == cbToWrite)
2352 {
2353 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2354
2355 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2356 if (fCommit)
2357 pdmBlkCacheCommitDirtyEntries(pCache);
2358 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2359 }
2360 else
2361 {
2362 /* Defer the write and fetch the data from the endpoint. */
2363 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2364 &SgBuf, offDiff, cbToWrite,
2365 true /* fWrite */);
2366 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2367 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2368 }
2369
2370 pdmBlkCacheEntryRelease(pEntryNew);
2371 }
2372 else
2373 {
2374 /*
2375 * There is not enough free space in the cache.
2376 * Pass the request directly to the I/O manager.
2377 */
2378 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2379
2380 STAM_COUNTER_INC(&pCache->cMisses);
2381
2382 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2383 &SgBuf, off, cbToWrite,
2384 PDMBLKCACHEXFERDIR_WRITE);
2385 }
2386 }
2387
2388 off += cbToWrite;
2389 }
2390
2391 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2392 rc = VINF_AIO_TASK_PENDING;
2393 else
2394 {
2395 rc = pReq->rcReq;
2396 RTMemFree(pReq);
2397 }
2398
2399 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2400
2401 return rc;
2402}
2403
2404VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2405{
2406 int rc = VINF_SUCCESS;
2407 PPDMBLKCACHEREQ pReq;
2408
2409 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2410
2411 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2412 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2413
2414 /* Commit dirty entries in the cache. */
2415 pdmBlkCacheCommit(pBlkCache);
2416
2417 /* Allocate new request structure. */
2418 pReq = pdmBlkCacheReqAlloc(pvUser);
2419 if (RT_UNLIKELY(!pReq))
2420 return VERR_NO_MEMORY;
2421
2422 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2423 PDMBLKCACHEXFERDIR_FLUSH);
2424 AssertRC(rc);
2425
2426 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2427 return VINF_AIO_TASK_PENDING;
2428}
2429
2430VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2431 unsigned cRanges, void *pvUser)
2432{
2433 int rc = VINF_SUCCESS;
2434 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2435 PPDMBLKCACHEENTRY pEntry;
2436 PPDMBLKCACHEREQ pReq;
2437
2438 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2439 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2440
2441 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2442 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2443
2444 /* Allocate new request structure. */
2445 pReq = pdmBlkCacheReqAlloc(pvUser);
2446 if (RT_UNLIKELY(!pReq))
2447 return VERR_NO_MEMORY;
2448
2449 /* Increment data transfer counter to keep the request valid while we access it. */
2450 ASMAtomicIncU32(&pReq->cXfersPending);
2451
2452 for (unsigned i = 0; i < cRanges; i++)
2453 {
2454 uint64_t offCur = paRanges[i].offStart;
2455 size_t cbLeft = paRanges[i].cbRange;
2456
2457 while (cbLeft)
2458 {
2459 size_t cbThisDiscard = 0;
2460
2461 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2462
2463 if (pEntry)
2464 {
2465 /* Write the data into the entry and mark it as dirty */
2466 AssertPtr(pEntry->pList);
2467
2468 uint64_t offDiff = offCur - pEntry->Core.Key;
2469
2470 AssertMsg(offCur >= pEntry->Core.Key,
2471 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2472 offCur, pEntry->Core.Key));
2473
2474 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2475
2476 /* Ghost lists contain no data. */
2477 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2478 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2479 {
2480 /* Check if the entry is dirty. */
2481 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2482 PDMBLKCACHE_ENTRY_IS_DIRTY,
2483 0))
2484 {
2485 /* If it is dirty but not yet in progress remove it. */
2486 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2487 {
2488 pdmBlkCacheLockEnter(pCache);
2489 pdmBlkCacheEntryRemoveFromList(pEntry);
2490
2491 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2492 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2493 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2494
2495 pdmBlkCacheLockLeave(pCache);
2496
2497 RTMemFree(pEntry);
2498 }
2499 else
2500 {
2501#if 0
2502 /* The data isn't written to the file yet */
2503 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2504 &SgBuf, offDiff, cbToWrite,
2505 true /* fWrite */);
2506 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2507#endif
2508 }
2509
2510 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2511 pdmBlkCacheEntryRelease(pEntry);
2512 }
2513 else /* Dirty bit not set */
2514 {
2515 /*
2516 * Check if a read is in progress for this entry.
2517 * We have to defer processing in that case.
2518 */
2519 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2520 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2521 0))
2522 {
2523#if 0
2524 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2525 &SgBuf, offDiff, cbToWrite,
2526 true /* fWrite */);
2527#endif
2528 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2529 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2530 pdmBlkCacheEntryRelease(pEntry);
2531 }
2532 else /* I/O in progress flag not set */
2533 {
2534 pdmBlkCacheLockEnter(pCache);
2535 pdmBlkCacheEntryRemoveFromList(pEntry);
2536
2537 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2538 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2539 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2540 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2541 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2542
2543 pdmBlkCacheLockLeave(pCache);
2544
2545 RTMemFree(pEntry);
2546 }
2547 } /* Dirty bit not set */
2548 }
2549 else /* Entry is on the ghost list just remove cache entry. */
2550 {
2551 pdmBlkCacheLockEnter(pCache);
2552 pdmBlkCacheEntryRemoveFromList(pEntry);
2553
2554 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2555 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2556 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2557 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2558 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2559
2560 pdmBlkCacheLockLeave(pCache);
2561
2562 RTMemFree(pEntry);
2563 }
2564 }
2565 /* else: no entry found. */
2566
2567 offCur += cbThisDiscard;
2568 cbLeft -= cbThisDiscard;
2569 }
2570 }
2571
2572 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2573 rc = VINF_AIO_TASK_PENDING;
2574 else
2575 {
2576 rc = pReq->rcReq;
2577 RTMemFree(pReq);
2578 }
2579
2580 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2581
2582 return rc;
2583}
2584
2585/**
2586 * Completes a task segment freeing all resources and completes the task handle
2587 * if everything was transferred.
2588 *
2589 * @returns Next task segment handle.
2590 * @param pBlkCache The endpoint block cache.
2591 * @param pWaiter Task segment to complete.
2592 * @param rc Status code to set.
2593 */
2594static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEWAITER pWaiter, int rc)
2595{
2596 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2597 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2598
2599 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2600
2601 RTMemFree(pWaiter);
2602
2603 return pNext;
2604}
2605
2606static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2607{
2608 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2609 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2610
2611 /* Reference the entry now as we are clearing the I/O in progress flag
2612 * which protected the entry till now. */
2613 pdmBlkCacheEntryRef(pEntry);
2614
2615 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2616 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2617
2618 /* Process waiting segment list. The data in entry might have changed in-between. */
2619 bool fDirty = false;
2620 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2621 PPDMBLKCACHEWAITER pCurr = pComplete;
2622
2623 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2624 ("The list tail was not updated correctly\n"));
2625 pEntry->pWaitingTail = NULL;
2626 pEntry->pWaitingHead = NULL;
2627
2628 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2629 {
2630 /*
2631 * An error here is difficult to handle as the original request completed already.
2632 * The error is logged for now and the VM is paused.
2633 * If the user continues the entry is written again in the hope
2634 * the user fixed the problem and the next write succeeds.
2635 */
2636 if (RT_FAILURE(rcIoXfer))
2637 {
2638 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2639 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2640
2641 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2642 {
2643 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2644 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2645 "Make sure there is enough free space on the disk and that the disk is working properly. "
2646 "Operation can be resumed afterwards"),
2647 pBlkCache->pszId, rcIoXfer);
2648 AssertRC(rc);
2649 }
2650
2651 /* Mark the entry as dirty again to get it added to the list later on. */
2652 fDirty = true;
2653 }
2654
2655 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2656
2657 while (pCurr)
2658 {
2659 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2660
2661 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2662 fDirty = true;
2663 pCurr = pCurr->pNext;
2664 }
2665 }
2666 else
2667 {
2668 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2669 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2670 ("Invalid flags set\n"));
2671
2672 while (pCurr)
2673 {
2674 if (pCurr->fWrite)
2675 {
2676 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2677 fDirty = true;
2678 }
2679 else
2680 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2681
2682 pCurr = pCurr->pNext;
2683 }
2684 }
2685
2686 bool fCommit = false;
2687 if (fDirty)
2688 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2689
2690 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2691
2692 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2693 pdmBlkCacheEntryRelease(pEntry);
2694
2695 if (fCommit)
2696 pdmBlkCacheCommitDirtyEntries(pCache);
2697
2698 /* Complete waiters now. */
2699 while (pComplete)
2700 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2701}
2702
2703VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2704{
2705 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2706
2707 if (hIoXfer->fIoCache)
2708 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2709 else
2710 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2711
2712 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
2713 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool) completed (%u now active)",
2714 hIoXfer, hIoXfer->fIoCache, pBlkCache->cIoXfersActive);
2715 RTMemFree(hIoXfer);
2716}
2717
2718/**
2719 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2720 *
2721 * @returns IPRT status code.
2722 * @param pNode The node to destroy.
2723 * @param pvUser Opaque user data.
2724 */
2725static DECLCALLBACK(int) pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2726{
2727 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2728 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2729 NOREF(pvUser);
2730
2731 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2732 {
2733 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2734 pdmBlkCacheEntryRef(pEntry);
2735 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2736
2737 RTThreadSleep(1);
2738
2739 /* Re-enter all locks and drop the reference. */
2740 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2741 pdmBlkCacheEntryRelease(pEntry);
2742 }
2743
2744 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2745 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2746
2747 return VINF_SUCCESS;
2748}
2749
2750VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2751{
2752 int rc = VINF_SUCCESS;
2753 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2754
2755 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2756
2757 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2758 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2759 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2760
2761 /* Wait for all I/O to complete. */
2762 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2763 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2764 AssertRC(rc);
2765 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2766
2767 return rc;
2768}
2769
2770VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2771{
2772 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2773
2774 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2775
2776 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2777
2778 return VINF_SUCCESS;
2779}
2780
2781VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2782{
2783 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2784
2785 /*
2786 * Commit all dirty entries now (they are waited on for completion during the
2787 * destruction of the AVL tree below).
2788 * The exception is if the VM was paused because of an I/O error before.
2789 */
2790 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2791 pdmBlkCacheCommit(pBlkCache);
2792
2793 /* Make sure nobody is accessing the cache while we delete the tree. */
2794 pdmBlkCacheLockEnter(pCache);
2795 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2796 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2797 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2798
2799 pdmBlkCacheLockLeave(pCache);
2800 return VINF_SUCCESS;
2801}
2802
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette