VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 94800

Last change on this file since 94800 was 94319, checked in by vboxsync, 3 years ago

VMM/PDMBlockCache: Added missing sempahore release call to a infrequent pdmBlkCacheEvictPagesFrom code path handling a race. ticketref:20875

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.3 KB
Line 
1/* $Id: PDMBlkCache.cpp 94319 2022-03-21 23:40:13Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22
23/*********************************************************************************************************************************
24* Header Files *
25*********************************************************************************************************************************/
26#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
27#include "PDMInternal.h"
28#include <iprt/asm.h>
29#include <iprt/mem.h>
30#include <iprt/path.h>
31#include <iprt/string.h>
32#include <iprt/trace.h>
33#include <VBox/log.h>
34#include <VBox/vmm/stam.h>
35#include <VBox/vmm/uvm.h>
36#include <VBox/vmm/vm.h>
37
38#include "PDMBlkCacheInternal.h"
39
40
41/*********************************************************************************************************************************
42* Defined Constants And Macros *
43*********************************************************************************************************************************/
44#ifdef VBOX_STRICT
45# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
46 do \
47 { \
48 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
49 ("Thread does not own critical section\n"));\
50 } while (0)
51
52# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
53 do \
54 { \
55 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
56 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
57 } while (0)
58
59# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
60 do \
61 { \
62 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
63 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
64 } while (0)
65
66#else
67# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while (0)
68# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while (0)
69# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while (0)
70#endif
71
72#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
73
74/* Enable to enable some tracing in the block cache code for investigating issues. */
75/*#define VBOX_BLKCACHE_TRACING 1*/
76
77
78/*********************************************************************************************************************************
79* Internal Functions *
80*********************************************************************************************************************************/
81
82static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
83 uint64_t off, size_t cbData, uint8_t *pbBuffer);
84static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
85
86
87/**
88 * Add message to the VM trace buffer.
89 *
90 * @returns nothing.
91 * @param pBlkCache The block cache.
92 * @param pszFmt The format string.
93 * @param ... Additional parameters for the string formatter.
94 */
95DECLINLINE(void) pdmBlkCacheR3TraceMsgF(PPDMBLKCACHE pBlkCache, const char *pszFmt, ...)
96{
97#if defined(VBOX_BLKCACHE_TRACING)
98 va_list va;
99 va_start(va, pszFmt);
100 RTTraceBufAddMsgV(pBlkCache->pCache->pVM->CTX_SUFF(hTraceBuf), pszFmt, va);
101 va_end(va);
102#else
103 RT_NOREF2(pBlkCache, pszFmt);
104#endif
105}
106
107/**
108 * Decrement the reference counter of the given cache entry.
109 *
110 * @returns nothing.
111 * @param pEntry The entry to release.
112 */
113DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
114{
115 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
116 ASMAtomicDecU32(&pEntry->cRefs);
117}
118
119/**
120 * Increment the reference counter of the given cache entry.
121 *
122 * @returns nothing.
123 * @param pEntry The entry to reference.
124 */
125DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
126{
127 ASMAtomicIncU32(&pEntry->cRefs);
128}
129
130#ifdef VBOX_STRICT
131static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
132{
133 /* Amount of cached data should never exceed the maximum amount. */
134 AssertMsg(pCache->cbCached <= pCache->cbMax,
135 ("Current amount of cached data exceeds maximum\n"));
136
137 /* The amount of cached data in the LRU and FRU list should match cbCached */
138 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
139 ("Amount of cached data doesn't match\n"));
140
141 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
142 ("Paged out list exceeds maximum\n"));
143}
144#endif
145
146DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
147{
148 RTCritSectEnter(&pCache->CritSect);
149#ifdef VBOX_STRICT
150 pdmBlkCacheValidate(pCache);
151#endif
152}
153
154DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
155{
156#ifdef VBOX_STRICT
157 pdmBlkCacheValidate(pCache);
158#endif
159 RTCritSectLeave(&pCache->CritSect);
160}
161
162DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
163{
164 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
165 pCache->cbCached -= cbAmount;
166}
167
168DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
169{
170 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
171 pCache->cbCached += cbAmount;
172}
173
174DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
175{
176 pList->cbCached += cbAmount;
177}
178
179DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
180{
181 pList->cbCached -= cbAmount;
182}
183
184#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
185/**
186 * Checks consistency of a LRU list.
187 *
188 * @returns nothing
189 * @param pList The LRU list to check.
190 * @param pNotInList Element which is not allowed to occur in the list.
191 */
192static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
193{
194 PPDMBLKCACHEENTRY pCurr = pList->pHead;
195
196 /* Check that there are no double entries and no cycles in the list. */
197 while (pCurr)
198 {
199 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
200
201 while (pNext)
202 {
203 AssertMsg(pCurr != pNext,
204 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
205 pCurr, pList));
206 pNext = pNext->pNext;
207 }
208
209 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
210
211 if (!pCurr->pNext)
212 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
213
214 pCurr = pCurr->pNext;
215 }
216}
217#endif
218
219/**
220 * Unlinks a cache entry from the LRU list it is assigned to.
221 *
222 * @returns nothing.
223 * @param pEntry The entry to unlink.
224 */
225static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
226{
227 PPDMBLKLRULIST pList = pEntry->pList;
228 PPDMBLKCACHEENTRY pPrev, pNext;
229
230 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
231
232 AssertPtr(pList);
233
234#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
235 pdmBlkCacheCheckList(pList, NULL);
236#endif
237
238 pPrev = pEntry->pPrev;
239 pNext = pEntry->pNext;
240
241 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
242 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
243
244 if (pPrev)
245 pPrev->pNext = pNext;
246 else
247 {
248 pList->pHead = pNext;
249
250 if (pNext)
251 pNext->pPrev = NULL;
252 }
253
254 if (pNext)
255 pNext->pPrev = pPrev;
256 else
257 {
258 pList->pTail = pPrev;
259
260 if (pPrev)
261 pPrev->pNext = NULL;
262 }
263
264 pEntry->pList = NULL;
265 pEntry->pPrev = NULL;
266 pEntry->pNext = NULL;
267 pdmBlkCacheListSub(pList, pEntry->cbData);
268#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
269 pdmBlkCacheCheckList(pList, pEntry);
270#endif
271}
272
273/**
274 * Adds a cache entry to the given LRU list unlinking it from the currently
275 * assigned list if needed.
276 *
277 * @returns nothing.
278 * @param pList List to the add entry to.
279 * @param pEntry Entry to add.
280 */
281static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
282{
283 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
284#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
285 pdmBlkCacheCheckList(pList, NULL);
286#endif
287
288 /* Remove from old list if needed */
289 if (pEntry->pList)
290 pdmBlkCacheEntryRemoveFromList(pEntry);
291
292 pEntry->pNext = pList->pHead;
293 if (pList->pHead)
294 pList->pHead->pPrev = pEntry;
295 else
296 {
297 Assert(!pList->pTail);
298 pList->pTail = pEntry;
299 }
300
301 pEntry->pPrev = NULL;
302 pList->pHead = pEntry;
303 pdmBlkCacheListAdd(pList, pEntry->cbData);
304 pEntry->pList = pList;
305#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
306 pdmBlkCacheCheckList(pList, NULL);
307#endif
308}
309
310/**
311 * Destroys a LRU list freeing all entries.
312 *
313 * @returns nothing
314 * @param pList Pointer to the LRU list to destroy.
315 *
316 * @note The caller must own the critical section of the cache.
317 */
318static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
319{
320 while (pList->pHead)
321 {
322 PPDMBLKCACHEENTRY pEntry = pList->pHead;
323
324 pList->pHead = pEntry->pNext;
325
326 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
327 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
328
329 RTMemPageFree(pEntry->pbData, pEntry->cbData);
330 RTMemFree(pEntry);
331 }
332}
333
334/**
335 * Tries to remove the given amount of bytes from a given list in the cache
336 * moving the entries to one of the given ghosts lists
337 *
338 * @returns Amount of data which could be freed.
339 * @param pCache Pointer to the global cache data.
340 * @param cbData The amount of the data to free.
341 * @param pListSrc The source list to evict data from.
342 * @param pGhostListDst Where the ghost list removed entries should be
343 * moved to, NULL if the entry should be freed.
344 * @param fReuseBuffer Flag whether a buffer should be reused if it has
345 * the same size
346 * @param ppbBuffer Where to store the address of the buffer if an
347 * entry with the same size was found and
348 * fReuseBuffer is true.
349 *
350 * @note This function may return fewer bytes than requested because entries
351 * may be marked as non evictable if they are used for I/O at the
352 * moment.
353 */
354static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
355 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
356 bool fReuseBuffer, uint8_t **ppbBuffer)
357{
358 size_t cbEvicted = 0;
359
360 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
361
362 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
363 AssertMsg( !pGhostListDst
364 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
365 ("Destination list must be NULL or the recently used but paged out list\n"));
366
367 if (fReuseBuffer)
368 {
369 AssertPtr(ppbBuffer);
370 *ppbBuffer = NULL;
371 }
372
373 /* Start deleting from the tail. */
374 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
375
376 while ((cbEvicted < cbData) && pEntry)
377 {
378 PPDMBLKCACHEENTRY pCurr = pEntry;
379
380 pEntry = pEntry->pPrev;
381
382 /* We can't evict pages which are currently in progress or dirty but not in progress */
383 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
384 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
385 {
386 /* Ok eviction candidate. Grab the endpoint semaphore and check again
387 * because somebody else might have raced us. */
388 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
389 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
390
391 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
392 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
393 {
394 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
395
396 if (fReuseBuffer && pCurr->cbData == cbData)
397 {
398 STAM_COUNTER_INC(&pCache->StatBuffersReused);
399 *ppbBuffer = pCurr->pbData;
400 }
401 else if (pCurr->pbData)
402 RTMemPageFree(pCurr->pbData, pCurr->cbData);
403
404 pCurr->pbData = NULL;
405 cbEvicted += pCurr->cbData;
406
407 pdmBlkCacheEntryRemoveFromList(pCurr);
408 pdmBlkCacheSub(pCache, pCurr->cbData);
409
410 if (pGhostListDst)
411 {
412 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
413
414 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
415
416 /* We have to remove the last entries from the paged out list. */
417 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
418 && pGhostEntFree)
419 {
420 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
421 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
422
423 pGhostEntFree = pGhostEntFree->pPrev;
424
425 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
426
427 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
428 {
429 pdmBlkCacheEntryRemoveFromList(pFree);
430
431 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
432 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
433 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
434
435 RTMemFree(pFree);
436 }
437
438 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
439 }
440
441 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
442 {
443 /* Couldn't remove enough entries. Delete */
444 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
445 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
446 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
447
448 RTMemFree(pCurr);
449 }
450 else
451 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
452 }
453 else
454 {
455 /* Delete the entry from the AVL tree it is assigned to. */
456 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
457 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
458 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
459
460 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
461 RTMemFree(pCurr);
462 }
463 }
464 else
465 {
466 LogFlow(("Someone raced us, entry %#p (%u bytes) cannot be evicted any more (fFlags=%#x cRefs=%#x)\n",
467 pCurr, pCurr->cbData, pCurr->fFlags, pCurr->cRefs));
468 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
469 }
470
471 }
472 else
473 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
474 }
475
476 return cbEvicted;
477}
478
479static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
480{
481 size_t cbRemoved = 0;
482
483 if ((pCache->cbCached + cbData) < pCache->cbMax)
484 return true;
485 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
486 {
487 /* Try to evict as many bytes as possible from A1in */
488 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
489 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
490
491 /*
492 * If it was not possible to remove enough entries
493 * try the frequently accessed cache.
494 */
495 if (cbRemoved < cbData)
496 {
497 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
498
499 /*
500 * If we removed something we can't pass the reuse buffer flag anymore because
501 * we don't need to evict that much data
502 */
503 if (!cbRemoved)
504 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
505 NULL, fReuseBuffer, ppbBuffer);
506 else
507 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
508 NULL, false, NULL);
509 }
510 }
511 else
512 {
513 /* We have to remove entries from frequently access list. */
514 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
515 NULL, fReuseBuffer, ppbBuffer);
516 }
517
518 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
519 return (cbRemoved >= cbData);
520}
521
522DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
523{
524 int rc = VINF_SUCCESS;
525
526 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
527 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
528
529 ASMAtomicIncU32(&pBlkCache->cIoXfersActive);
530 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool , %d) queued (%u now active)",
531 pIoXfer, pIoXfer->fIoCache, pIoXfer->enmXferDir, pBlkCache->cIoXfersActive);
532
533 switch (pBlkCache->enmType)
534 {
535 case PDMBLKCACHETYPE_DEV:
536 {
537 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
538 pIoXfer->enmXferDir,
539 off, cbXfer,
540 &pIoXfer->SgBuf, pIoXfer);
541 break;
542 }
543 case PDMBLKCACHETYPE_DRV:
544 {
545 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
546 pIoXfer->enmXferDir,
547 off, cbXfer,
548 &pIoXfer->SgBuf, pIoXfer);
549 break;
550 }
551 case PDMBLKCACHETYPE_USB:
552 {
553 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
554 pIoXfer->enmXferDir,
555 off, cbXfer,
556 &pIoXfer->SgBuf, pIoXfer);
557 break;
558 }
559 case PDMBLKCACHETYPE_INTERNAL:
560 {
561 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
562 pIoXfer->enmXferDir,
563 off, cbXfer,
564 &pIoXfer->SgBuf, pIoXfer);
565 break;
566 }
567 default:
568 AssertMsgFailed(("Unknown block cache type!\n"));
569 }
570
571 if (RT_FAILURE(rc))
572 {
573 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: Queueing I/O req %#p failed %Rrc", pIoXfer, rc);
574 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
575 }
576
577 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
578 return rc;
579}
580
581/**
582 * Initiates a read I/O task for the given entry.
583 *
584 * @returns VBox status code.
585 * @param pEntry The entry to fetch the data to.
586 */
587static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
588{
589 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
590 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
591
592 /* Make sure no one evicts the entry while it is accessed. */
593 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
594
595 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
596 if (RT_UNLIKELY(!pIoXfer))
597 return VERR_NO_MEMORY;
598
599 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
600
601 pIoXfer->fIoCache = true;
602 pIoXfer->pEntry = pEntry;
603 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
604 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
605 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
606 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
607
608 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
609}
610
611/**
612 * Initiates a write I/O task for the given entry.
613 *
614 * @returns nothing.
615 * @param pEntry The entry to read the data from.
616 */
617static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
618{
619 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
620 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
621
622 /* Make sure no one evicts the entry while it is accessed. */
623 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
624
625 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
626 if (RT_UNLIKELY(!pIoXfer))
627 return VERR_NO_MEMORY;
628
629 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
630
631 pIoXfer->fIoCache = true;
632 pIoXfer->pEntry = pEntry;
633 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
634 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
635 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
636 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
637
638 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
639}
640
641/**
642 * Passthrough a part of a request directly to the I/O manager handling the
643 * endpoint.
644 *
645 * @returns VBox status code.
646 * @param pBlkCache The endpoint cache.
647 * @param pReq The request.
648 * @param pSgBuf The scatter/gather buffer.
649 * @param offStart Offset to start transfer from.
650 * @param cbData Amount of data to transfer.
651 * @param enmXferDir The transfer type (read/write)
652 */
653static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
654 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
655 PDMBLKCACHEXFERDIR enmXferDir)
656{
657
658 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
659 if (RT_UNLIKELY(!pIoXfer))
660 return VERR_NO_MEMORY;
661
662 ASMAtomicIncU32(&pReq->cXfersPending);
663 pIoXfer->fIoCache = false;
664 pIoXfer->pReq = pReq;
665 pIoXfer->enmXferDir = enmXferDir;
666 if (pSgBuf)
667 {
668 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
669 RTSgBufAdvance(pSgBuf, cbData);
670 }
671
672 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
673}
674
675/**
676 * Commit a single dirty entry to the endpoint
677 *
678 * @returns nothing
679 * @param pEntry The entry to commit.
680 */
681static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
682{
683 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
684 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
685 ("Invalid flags set for entry %#p\n", pEntry));
686
687 pdmBlkCacheEntryWriteToMedium(pEntry);
688}
689
690/**
691 * Commit all dirty entries for a single endpoint.
692 *
693 * @returns nothing.
694 * @param pBlkCache The endpoint cache to commit.
695 */
696static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
697{
698 uint32_t cbCommitted = 0;
699
700 /* Return if the cache was suspended. */
701 if (pBlkCache->fSuspended)
702 return;
703
704 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
705
706 /* The list is moved to a new header to reduce locking overhead. */
707 RTLISTANCHOR ListDirtyNotCommitted;
708
709 RTSpinlockAcquire(pBlkCache->LockList);
710 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
711 RTSpinlockRelease(pBlkCache->LockList);
712
713 if (!RTListIsEmpty(&ListDirtyNotCommitted))
714 {
715 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
716
717 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
718 {
719 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
720 NodeNotCommitted);
721 pdmBlkCacheEntryCommit(pEntry);
722 cbCommitted += pEntry->cbData;
723 RTListNodeRemove(&pEntry->NodeNotCommitted);
724 pEntry = pNext;
725 }
726
727 /* Commit the last endpoint */
728 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
729 pdmBlkCacheEntryCommit(pEntry);
730 cbCommitted += pEntry->cbData;
731 RTListNodeRemove(&pEntry->NodeNotCommitted);
732 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
733 ("Committed all entries but list is not empty\n"));
734 }
735
736 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
737 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
738 ("Number of committed bytes exceeds number of dirty bytes\n"));
739 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
740
741 /* Reset the commit timer if we don't have any dirty bits. */
742 if ( !(cbDirtyOld - cbCommitted)
743 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
744 TMTimerStop(pBlkCache->pCache->pVM, pBlkCache->pCache->hTimerCommit);
745}
746
747/**
748 * Commit all dirty entries in the cache.
749 *
750 * @returns nothing.
751 * @param pCache The global cache instance.
752 */
753static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
754{
755 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
756
757 if (!fCommitInProgress)
758 {
759 pdmBlkCacheLockEnter(pCache);
760 Assert(!RTListIsEmpty(&pCache->ListUsers));
761
762 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
763 AssertPtr(pBlkCache);
764
765 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
766 {
767 pdmBlkCacheCommit(pBlkCache);
768
769 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
770 NodeCacheUser);
771 }
772
773 /* Commit the last endpoint */
774 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
775 pdmBlkCacheCommit(pBlkCache);
776
777 pdmBlkCacheLockLeave(pCache);
778 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
779 }
780}
781
782/**
783 * Adds the given entry as a dirty to the cache.
784 *
785 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
786 * @param pBlkCache The endpoint cache the entry belongs to.
787 * @param pEntry The entry to add.
788 */
789static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
790{
791 bool fDirtyBytesExceeded = false;
792 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
793
794 /* If the commit timer is disabled we commit right away. */
795 if (pCache->u32CommitTimeoutMs == 0)
796 {
797 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
798 pdmBlkCacheEntryCommit(pEntry);
799 }
800 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
801 {
802 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
803
804 RTSpinlockAcquire(pBlkCache->LockList);
805 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
806 RTSpinlockRelease(pBlkCache->LockList);
807
808 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
809
810 /* Prevent committing if the VM was suspended. */
811 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
812 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
813 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
814 {
815 /* Arm the commit timer. */
816 TMTimerSetMillies(pCache->pVM, pCache->hTimerCommit, pCache->u32CommitTimeoutMs);
817 }
818 }
819
820 return fDirtyBytesExceeded;
821}
822
823static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
824{
825 bool fFound = false;
826
827 PPDMBLKCACHE pBlkCache;
828 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
829 {
830 if (!RTStrCmp(pBlkCache->pszId, pcszId))
831 {
832 fFound = true;
833 break;
834 }
835 }
836
837 return fFound ? pBlkCache : NULL;
838}
839
840/**
841 * @callback_method_impl{FNTMTIMERINT, Commit timer callback.}
842 */
843static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, TMTIMERHANDLE hTimer, void *pvUser)
844{
845 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
846 RT_NOREF(pVM, hTimer);
847
848 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
849
850 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
851 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
852 pdmBlkCacheCommitDirtyEntries(pCache);
853
854 LogFlowFunc(("Entries committed, going to sleep\n"));
855}
856
857static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
858{
859 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
860
861 AssertPtr(pBlkCacheGlobal);
862
863 pdmBlkCacheLockEnter(pBlkCacheGlobal);
864
865 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
866
867 /* Go through the list and save all dirty entries. */
868 PPDMBLKCACHE pBlkCache;
869 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
870 {
871 uint32_t cEntries = 0;
872 PPDMBLKCACHEENTRY pEntry;
873
874 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
875 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
876 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
877
878 /* Count the number of entries to safe. */
879 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
880 {
881 cEntries++;
882 }
883
884 SSMR3PutU32(pSSM, cEntries);
885
886 /* Walk the list of all dirty entries and save them. */
887 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
888 {
889 /* A few sanity checks. */
890 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
891 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
892 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
893 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
894 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
895 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
896 ("Invalid list\n"));
897 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
898 ("Size and range do not match\n"));
899
900 /* Save */
901 SSMR3PutU64(pSSM, pEntry->Core.Key);
902 SSMR3PutU32(pSSM, pEntry->cbData);
903 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
904 }
905
906 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
907 }
908
909 pdmBlkCacheLockLeave(pBlkCacheGlobal);
910
911 /* Terminator */
912 return SSMR3PutU32(pSSM, UINT32_MAX);
913}
914
915static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
916{
917 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
918 uint32_t cRefs;
919
920 NOREF(uPass);
921 AssertPtr(pBlkCacheGlobal);
922
923 pdmBlkCacheLockEnter(pBlkCacheGlobal);
924
925 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
926 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
927
928 SSMR3GetU32(pSSM, &cRefs);
929
930 /*
931 * Fewer users in the saved state than in the current VM are allowed
932 * because that means that there are only new ones which don't have any saved state
933 * which can get lost.
934 * More saved state entries than registered cache users are only allowed if the
935 * missing users don't have any data saved in the cache.
936 */
937 int rc = VINF_SUCCESS;
938 char *pszId = NULL;
939
940 while ( cRefs > 0
941 && RT_SUCCESS(rc))
942 {
943 PPDMBLKCACHE pBlkCache = NULL;
944 uint32_t cbId = 0;
945
946 SSMR3GetU32(pSSM, &cbId);
947 Assert(cbId > 0);
948
949 cbId++; /* Include terminator */
950 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
951 if (!pszId)
952 {
953 rc = VERR_NO_MEMORY;
954 break;
955 }
956
957 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
958 AssertRC(rc);
959
960 /* Search for the block cache with the provided id. */
961 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
962
963 /* Get the entries */
964 uint32_t cEntries;
965 SSMR3GetU32(pSSM, &cEntries);
966
967 if (!pBlkCache && (cEntries > 0))
968 {
969 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
970 N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations"));
971 break;
972 }
973
974 RTMemFree(pszId);
975 pszId = NULL;
976
977 while (cEntries > 0)
978 {
979 PPDMBLKCACHEENTRY pEntry;
980 uint64_t off;
981 uint32_t cbEntry;
982
983 SSMR3GetU64(pSSM, &off);
984 SSMR3GetU32(pSSM, &cbEntry);
985
986 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
987 if (!pEntry)
988 {
989 rc = VERR_NO_MEMORY;
990 break;
991 }
992
993 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
994 if (RT_FAILURE(rc))
995 {
996 RTMemFree(pEntry->pbData);
997 RTMemFree(pEntry);
998 break;
999 }
1000
1001 /* Insert into the tree. */
1002 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1003 Assert(fInserted); NOREF(fInserted);
1004
1005 /* Add to the dirty list. */
1006 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
1007 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
1008 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
1009 pdmBlkCacheEntryRelease(pEntry);
1010 cEntries--;
1011 }
1012
1013 cRefs--;
1014 }
1015
1016 if (pszId)
1017 RTMemFree(pszId);
1018
1019 if (cRefs && RT_SUCCESS(rc))
1020 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
1021 N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations"));
1022
1023 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1024
1025 if (RT_SUCCESS(rc))
1026 {
1027 uint32_t u32 = 0;
1028 rc = SSMR3GetU32(pSSM, &u32);
1029 if (RT_SUCCESS(rc))
1030 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
1031 }
1032
1033 return rc;
1034}
1035
1036int pdmR3BlkCacheInit(PVM pVM)
1037{
1038 int rc = VINF_SUCCESS;
1039 PUVM pUVM = pVM->pUVM;
1040 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
1041
1042 LogFlowFunc((": pVM=%p\n", pVM));
1043
1044 VM_ASSERT_EMT(pVM);
1045
1046 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
1047 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1048
1049 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1050 if (!pBlkCacheGlobal)
1051 return VERR_NO_MEMORY;
1052
1053 RTListInit(&pBlkCacheGlobal->ListUsers);
1054 pBlkCacheGlobal->pVM = pVM;
1055 pBlkCacheGlobal->cRefs = 0;
1056 pBlkCacheGlobal->cbCached = 0;
1057 pBlkCacheGlobal->fCommitInProgress = false;
1058
1059 /* Initialize members */
1060 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1061 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1062 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1063
1064 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1065 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1066 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1067
1068 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1069 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1070 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1071
1072 do
1073 {
1074 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1075 AssertLogRelRCBreak(rc);
1076 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1077
1078 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1079 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1080 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1081 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1082
1083 /** @todo r=aeichner: Experiment to find optimal default values */
1084 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1085 AssertLogRelRCBreak(rc);
1086 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1087 AssertLogRelRCBreak(rc);
1088 } while (0);
1089
1090 if (RT_SUCCESS(rc))
1091 {
1092 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1093 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1094 "/PDM/BlkCache/cbMax",
1095 STAMUNIT_BYTES,
1096 "Maximum cache size");
1097 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1098 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1099 "/PDM/BlkCache/cbCached",
1100 STAMUNIT_BYTES,
1101 "Currently used cache");
1102 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1103 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1104 "/PDM/BlkCache/cbCachedMruIn",
1105 STAMUNIT_BYTES,
1106 "Number of bytes cached in MRU list");
1107 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1108 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1109 "/PDM/BlkCache/cbCachedMruOut",
1110 STAMUNIT_BYTES,
1111 "Number of bytes cached in FRU list");
1112 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1113 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1114 "/PDM/BlkCache/cbCachedFru",
1115 STAMUNIT_BYTES,
1116 "Number of bytes cached in FRU ghost list");
1117
1118#ifdef VBOX_WITH_STATISTICS
1119 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1120 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1121 "/PDM/BlkCache/CacheHits",
1122 STAMUNIT_COUNT, "Number of hits in the cache");
1123 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1124 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1125 "/PDM/BlkCache/CachePartialHits",
1126 STAMUNIT_COUNT, "Number of partial hits in the cache");
1127 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1128 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1129 "/PDM/BlkCache/CacheMisses",
1130 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1131 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1132 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1133 "/PDM/BlkCache/CacheRead",
1134 STAMUNIT_BYTES, "Number of bytes read from the cache");
1135 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1136 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1137 "/PDM/BlkCache/CacheWritten",
1138 STAMUNIT_BYTES, "Number of bytes written to the cache");
1139 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1140 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1141 "/PDM/BlkCache/CacheTreeGet",
1142 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1143 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1144 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1145 "/PDM/BlkCache/CacheTreeInsert",
1146 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1147 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1148 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1149 "/PDM/BlkCache/CacheTreeRemove",
1150 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1151 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1152 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1153 "/PDM/BlkCache/CacheBuffersReused",
1154 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1155#endif
1156
1157 /* Initialize the critical section */
1158 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1159 }
1160
1161 if (RT_SUCCESS(rc))
1162 {
1163 /* Create the commit timer */
1164 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1165 rc = TMR3TimerCreate(pVM, TMCLOCK_REAL, pdmBlkCacheCommitTimerCallback, pBlkCacheGlobal,
1166 TMTIMER_FLAGS_NO_RING0, "BlkCache-Commit", &pBlkCacheGlobal->hTimerCommit);
1167
1168 if (RT_SUCCESS(rc))
1169 {
1170 /* Register saved state handler. */
1171 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1172 NULL, NULL, NULL,
1173 NULL, pdmR3BlkCacheSaveExec, NULL,
1174 NULL, pdmR3BlkCacheLoadExec, NULL);
1175 if (RT_SUCCESS(rc))
1176 {
1177 LogRel(("BlkCache: Cache successfully initialized. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1178 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1179 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1180 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1181 return VINF_SUCCESS;
1182 }
1183 }
1184
1185 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1186 }
1187
1188 if (pBlkCacheGlobal)
1189 RTMemFree(pBlkCacheGlobal);
1190
1191 LogFlowFunc((": returns rc=%Rrc\n", rc));
1192 return rc;
1193}
1194
1195void pdmR3BlkCacheTerm(PVM pVM)
1196{
1197 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1198
1199 if (pBlkCacheGlobal)
1200 {
1201 /* Make sure no one else uses the cache now */
1202 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1203
1204 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1205 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1206 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1207 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1208
1209 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1210
1211 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1212 RTMemFree(pBlkCacheGlobal);
1213 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1214 }
1215}
1216
1217int pdmR3BlkCacheResume(PVM pVM)
1218{
1219 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1220
1221 LogFlowFunc(("pVM=%#p\n", pVM));
1222
1223 if ( pBlkCacheGlobal
1224 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1225 {
1226 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1227 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1228 }
1229
1230 return VINF_SUCCESS;
1231}
1232
1233static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1234{
1235 int rc = VINF_SUCCESS;
1236 PPDMBLKCACHE pBlkCache = NULL;
1237 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1238
1239 if (!pBlkCacheGlobal)
1240 return VERR_NOT_SUPPORTED;
1241
1242 /*
1243 * Check that no other user cache has the same id first,
1244 * Unique id's are necessary in case the state is saved.
1245 */
1246 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1247
1248 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1249
1250 if (!pBlkCache)
1251 {
1252 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1253
1254 if (pBlkCache)
1255 pBlkCache->pszId = RTStrDup(pcszId);
1256
1257 if ( pBlkCache
1258 && pBlkCache->pszId)
1259 {
1260 pBlkCache->fSuspended = false;
1261 pBlkCache->cIoXfersActive = 0;
1262 pBlkCache->pCache = pBlkCacheGlobal;
1263 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1264
1265 rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain");
1266 if (RT_SUCCESS(rc))
1267 {
1268 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1269 if (RT_SUCCESS(rc))
1270 {
1271 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1272 if (pBlkCache->pTree)
1273 {
1274#ifdef VBOX_WITH_STATISTICS
1275 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1276 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1277 STAMUNIT_COUNT, "Number of deferred writes",
1278 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1279#endif
1280
1281 /* Add to the list of users. */
1282 pBlkCacheGlobal->cRefs++;
1283 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1284 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1285
1286 *ppBlkCache = pBlkCache;
1287 LogFlowFunc(("returns success\n"));
1288 return VINF_SUCCESS;
1289 }
1290
1291 rc = VERR_NO_MEMORY;
1292 RTSemRWDestroy(pBlkCache->SemRWEntries);
1293 }
1294
1295 RTSpinlockDestroy(pBlkCache->LockList);
1296 }
1297
1298 RTStrFree(pBlkCache->pszId);
1299 }
1300 else
1301 rc = VERR_NO_MEMORY;
1302
1303 if (pBlkCache)
1304 RTMemFree(pBlkCache);
1305 }
1306 else
1307 rc = VERR_ALREADY_EXISTS;
1308
1309 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1310
1311 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1312 return rc;
1313}
1314
1315VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1316 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1317 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1318 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1319 const char *pcszId)
1320{
1321 int rc = VINF_SUCCESS;
1322 PPDMBLKCACHE pBlkCache;
1323
1324 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1325 if (RT_SUCCESS(rc))
1326 {
1327 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1328 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1329 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1330 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1331 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1332 *ppBlkCache = pBlkCache;
1333 }
1334
1335 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1336 return rc;
1337}
1338
1339VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1340 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1341 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1342 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1343 const char *pcszId)
1344{
1345 int rc = VINF_SUCCESS;
1346 PPDMBLKCACHE pBlkCache;
1347
1348 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1349 if (RT_SUCCESS(rc))
1350 {
1351 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1352 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1353 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1354 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1355 pBlkCache->u.Dev.pDevIns = pDevIns;
1356 *ppBlkCache = pBlkCache;
1357 }
1358
1359 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1360 return rc;
1361
1362}
1363
1364VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1365 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1366 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1367 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1368 const char *pcszId)
1369{
1370 int rc = VINF_SUCCESS;
1371 PPDMBLKCACHE pBlkCache;
1372
1373 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1374 if (RT_SUCCESS(rc))
1375 {
1376 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1377 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1378 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1379 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1380 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1381 *ppBlkCache = pBlkCache;
1382 }
1383
1384 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1385 return rc;
1386
1387}
1388
1389VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1390 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1391 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1392 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1393 const char *pcszId)
1394{
1395 int rc = VINF_SUCCESS;
1396 PPDMBLKCACHE pBlkCache;
1397
1398 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1399 if (RT_SUCCESS(rc))
1400 {
1401 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1402 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1403 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1404 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1405 pBlkCache->u.Int.pvUser = pvUser;
1406 *ppBlkCache = pBlkCache;
1407 }
1408
1409 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1410 return rc;
1411
1412}
1413
1414/**
1415 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1416 *
1417 * @returns IPRT status code.
1418 * @param pNode The node to destroy.
1419 * @param pvUser Opaque user data.
1420 */
1421static DECLCALLBACK(int) pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1422{
1423 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1424 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1425 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1426
1427 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1428 {
1429 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1430 pdmBlkCacheEntryRef(pEntry);
1431 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1432 pdmBlkCacheLockLeave(pCache);
1433
1434 RTThreadSleep(250);
1435
1436 /* Re-enter all locks */
1437 pdmBlkCacheLockEnter(pCache);
1438 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1439 pdmBlkCacheEntryRelease(pEntry);
1440 }
1441
1442 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1443 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1444
1445 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1446 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1447
1448 pdmBlkCacheEntryRemoveFromList(pEntry);
1449
1450 if (fUpdateCache)
1451 pdmBlkCacheSub(pCache, pEntry->cbData);
1452
1453 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1454 RTMemFree(pEntry);
1455
1456 return VINF_SUCCESS;
1457}
1458
1459VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1460{
1461 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1462
1463 /*
1464 * Commit all dirty entries now (they are waited on for completion during the
1465 * destruction of the AVL tree below).
1466 * The exception is if the VM was paused because of an I/O error before.
1467 */
1468 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1469 pdmBlkCacheCommit(pBlkCache);
1470
1471 /* Make sure nobody is accessing the cache while we delete the tree. */
1472 pdmBlkCacheLockEnter(pCache);
1473 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1474 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1475 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1476
1477 RTSpinlockDestroy(pBlkCache->LockList);
1478
1479 pCache->cRefs--;
1480 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1481
1482 pdmBlkCacheLockLeave(pCache);
1483
1484 RTMemFree(pBlkCache->pTree);
1485 pBlkCache->pTree = NULL;
1486 RTSemRWDestroy(pBlkCache->SemRWEntries);
1487
1488#ifdef VBOX_WITH_STATISTICS
1489 STAMR3DeregisterF(pCache->pVM->pUVM, "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1490#endif
1491
1492 RTStrFree(pBlkCache->pszId);
1493 RTMemFree(pBlkCache);
1494}
1495
1496VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1497{
1498 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1499
1500 /*
1501 * Validate input.
1502 */
1503 if (!pDevIns)
1504 return;
1505 VM_ASSERT_EMT(pVM);
1506
1507 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1508 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1509
1510 /* Return silently if not supported. */
1511 if (!pBlkCacheGlobal)
1512 return;
1513
1514 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1515
1516 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1517 {
1518 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1519 && pBlkCache->u.Dev.pDevIns == pDevIns)
1520 PDMR3BlkCacheRelease(pBlkCache);
1521 }
1522
1523 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1524}
1525
1526VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1527{
1528 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1529
1530 /*
1531 * Validate input.
1532 */
1533 if (!pDrvIns)
1534 return;
1535 VM_ASSERT_EMT(pVM);
1536
1537 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1538 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1539
1540 /* Return silently if not supported. */
1541 if (!pBlkCacheGlobal)
1542 return;
1543
1544 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1545
1546 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1547 {
1548 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1549 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1550 PDMR3BlkCacheRelease(pBlkCache);
1551 }
1552
1553 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1554}
1555
1556VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1557{
1558 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1559
1560 /*
1561 * Validate input.
1562 */
1563 if (!pUsbIns)
1564 return;
1565 VM_ASSERT_EMT(pVM);
1566
1567 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1568 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1569
1570 /* Return silently if not supported. */
1571 if (!pBlkCacheGlobal)
1572 return;
1573
1574 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1575
1576 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1577 {
1578 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1579 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1580 PDMR3BlkCacheRelease(pBlkCache);
1581 }
1582
1583 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1584}
1585
1586static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1587{
1588 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1589
1590 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1591 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1592 if (pEntry)
1593 pdmBlkCacheEntryRef(pEntry);
1594 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1595
1596 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1597
1598 return pEntry;
1599}
1600
1601/**
1602 * Return the best fit cache entries for the given offset.
1603 *
1604 * @returns nothing.
1605 * @param pBlkCache The endpoint cache.
1606 * @param off The offset.
1607 * @param ppEntryAbove Where to store the pointer to the best fit entry above
1608 * the given offset. NULL if not required.
1609 */
1610static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEENTRY *ppEntryAbove)
1611{
1612 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1613
1614 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1615 if (ppEntryAbove)
1616 {
1617 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1618 if (*ppEntryAbove)
1619 pdmBlkCacheEntryRef(*ppEntryAbove);
1620 }
1621
1622 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1623
1624 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1625}
1626
1627static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1628{
1629 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1630 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1631 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1632 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1633 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1634 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1635}
1636
1637/**
1638 * Allocates and initializes a new entry for the cache.
1639 * The entry has a reference count of 1.
1640 *
1641 * @returns Pointer to the new cache entry or NULL if out of memory.
1642 * @param pBlkCache The cache the entry belongs to.
1643 * @param off Start offset.
1644 * @param cbData Size of the cache entry.
1645 * @param pbBuffer Pointer to the buffer to use.
1646 * NULL if a new buffer should be allocated.
1647 * The buffer needs to have the same size of the entry.
1648 */
1649static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbData, uint8_t *pbBuffer)
1650{
1651 AssertReturn(cbData <= UINT32_MAX, NULL);
1652 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1653
1654 if (RT_UNLIKELY(!pEntryNew))
1655 return NULL;
1656
1657 pEntryNew->Core.Key = off;
1658 pEntryNew->Core.KeyLast = off + cbData - 1;
1659 pEntryNew->pBlkCache = pBlkCache;
1660 pEntryNew->fFlags = 0;
1661 pEntryNew->cRefs = 1; /* We are using it now. */
1662 pEntryNew->pList = NULL;
1663 pEntryNew->cbData = (uint32_t)cbData;
1664 pEntryNew->pWaitingHead = NULL;
1665 pEntryNew->pWaitingTail = NULL;
1666 if (pbBuffer)
1667 pEntryNew->pbData = pbBuffer;
1668 else
1669 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1670
1671 if (RT_UNLIKELY(!pEntryNew->pbData))
1672 {
1673 RTMemFree(pEntryNew);
1674 return NULL;
1675 }
1676
1677 return pEntryNew;
1678}
1679
1680/**
1681 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1682 * in exclusive mode.
1683 *
1684 * @returns true if the flag in fSet is set and the one in fClear is clear.
1685 * false otherwise.
1686 * The R/W semaphore is only held if true is returned.
1687 *
1688 * @param pBlkCache The endpoint cache instance data.
1689 * @param pEntry The entry to check the flags for.
1690 * @param fSet The flag which is tested to be set.
1691 * @param fClear The flag which is tested to be clear.
1692 */
1693DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1694 PPDMBLKCACHEENTRY pEntry,
1695 uint32_t fSet, uint32_t fClear)
1696{
1697 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1698 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1699
1700 if (fPassed)
1701 {
1702 /* Acquire the lock and check again because the completion callback might have raced us. */
1703 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1704
1705 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1706 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1707
1708 /* Drop the lock if we didn't passed the test. */
1709 if (!fPassed)
1710 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1711 }
1712
1713 return fPassed;
1714}
1715
1716/**
1717 * Adds a segment to the waiting list for a cache entry
1718 * which is currently in progress.
1719 *
1720 * @returns nothing.
1721 * @param pEntry The cache entry to add the segment to.
1722 * @param pWaiter The waiter entry to add.
1723 */
1724DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1725 PPDMBLKCACHEWAITER pWaiter)
1726{
1727 pWaiter->pNext = NULL;
1728
1729 if (pEntry->pWaitingHead)
1730 {
1731 AssertPtr(pEntry->pWaitingTail);
1732
1733 pEntry->pWaitingTail->pNext = pWaiter;
1734 pEntry->pWaitingTail = pWaiter;
1735 }
1736 else
1737 {
1738 Assert(!pEntry->pWaitingTail);
1739
1740 pEntry->pWaitingHead = pWaiter;
1741 pEntry->pWaitingTail = pWaiter;
1742 }
1743}
1744
1745/**
1746 * Add a buffer described by the I/O memory context
1747 * to the entry waiting for completion.
1748 *
1749 * @returns VBox status code.
1750 * @param pEntry The entry to add the buffer to.
1751 * @param pReq The request.
1752 * @param pSgBuf The scatter/gather buffer. Will be advanced by cbData.
1753 * @param offDiff Offset from the start of the buffer in the entry.
1754 * @param cbData Amount of data to wait for onthis entry.
1755 * @param fWrite Flag whether the task waits because it wants to write to
1756 * the cache entry.
1757 */
1758static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEREQ pReq,
1759 PRTSGBUF pSgBuf, uint64_t offDiff, size_t cbData, bool fWrite)
1760{
1761 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1762 if (!pWaiter)
1763 return VERR_NO_MEMORY;
1764
1765 ASMAtomicIncU32(&pReq->cXfersPending);
1766 pWaiter->pReq = pReq;
1767 pWaiter->offCacheEntry = offDiff;
1768 pWaiter->cbTransfer = cbData;
1769 pWaiter->fWrite = fWrite;
1770 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1771 RTSgBufAdvance(pSgBuf, cbData);
1772
1773 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1774
1775 return VINF_SUCCESS;
1776}
1777
1778/**
1779 * Calculate aligned offset and size for a new cache entry which do not
1780 * intersect with an already existing entry and the file end.
1781 *
1782 * @returns The number of bytes the entry can hold of the requested amount
1783 * of bytes.
1784 * @param pBlkCache The endpoint cache.
1785 * @param off The start offset.
1786 * @param cb The number of bytes the entry needs to hold at
1787 * least.
1788 * @param pcbEntry Where to store the number of bytes the entry can hold.
1789 * Can be less than given because of other entries.
1790 */
1791static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1792 uint64_t off, uint32_t cb,
1793 uint32_t *pcbEntry)
1794{
1795 /* Get the best fit entries around the offset */
1796 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1797 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1798
1799 /* Log the info */
1800 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1801 pEntryAbove ? "B" : "No b",
1802 off,
1803 pEntryAbove ? pEntryAbove->Core.Key : 0,
1804 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1805 pEntryAbove ? pEntryAbove->cbData : 0));
1806
1807 uint32_t cbNext;
1808 uint32_t cbInEntry;
1809 if ( pEntryAbove
1810 && off + cb > pEntryAbove->Core.Key)
1811 {
1812 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1813 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1814 }
1815 else
1816 {
1817 cbInEntry = cb;
1818 cbNext = cb;
1819 }
1820
1821 /* A few sanity checks */
1822 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1823 ("Aligned size intersects with another cache entry\n"));
1824 Assert(cbInEntry <= cbNext);
1825
1826 if (pEntryAbove)
1827 pdmBlkCacheEntryRelease(pEntryAbove);
1828
1829 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1830
1831 *pcbEntry = cbNext;
1832
1833 return cbInEntry;
1834}
1835
1836/**
1837 * Create a new cache entry evicting data from the cache if required.
1838 *
1839 * @returns Pointer to the new cache entry or NULL
1840 * if not enough bytes could be evicted from the cache.
1841 * @param pBlkCache The endpoint cache.
1842 * @param off The offset.
1843 * @param cb Number of bytes the cache entry should have.
1844 * @param pcbData Where to store the number of bytes the new
1845 * entry can hold. May be lower than actually
1846 * requested due to another entry intersecting the
1847 * access range.
1848 */
1849static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cb, size_t *pcbData)
1850{
1851 uint32_t cbEntry = 0;
1852
1853 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1854 AssertReturn(cb <= UINT32_MAX, NULL);
1855
1856 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1857 pdmBlkCacheLockEnter(pCache);
1858
1859 PPDMBLKCACHEENTRY pEntryNew = NULL;
1860 uint8_t *pbBuffer = NULL;
1861 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1862 if (fEnough)
1863 {
1864 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1865
1866 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1867 if (RT_LIKELY(pEntryNew))
1868 {
1869 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1870 pdmBlkCacheAdd(pCache, cbEntry);
1871 pdmBlkCacheLockLeave(pCache);
1872
1873 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1874
1875 AssertMsg( (off >= pEntryNew->Core.Key)
1876 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1877 ("Overflow in calculation off=%llu\n", off));
1878 }
1879 else
1880 pdmBlkCacheLockLeave(pCache);
1881 }
1882 else
1883 pdmBlkCacheLockLeave(pCache);
1884
1885 return pEntryNew;
1886}
1887
1888static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1889{
1890 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1891
1892 if (RT_LIKELY(pReq))
1893 {
1894 pReq->pvUser = pvUser;
1895 pReq->rcReq = VINF_SUCCESS;
1896 pReq->cXfersPending = 0;
1897 }
1898
1899 return pReq;
1900}
1901
1902static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1903{
1904 switch (pBlkCache->enmType)
1905 {
1906 case PDMBLKCACHETYPE_DEV:
1907 {
1908 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1909 pReq->pvUser, pReq->rcReq);
1910 break;
1911 }
1912 case PDMBLKCACHETYPE_DRV:
1913 {
1914 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1915 pReq->pvUser, pReq->rcReq);
1916 break;
1917 }
1918 case PDMBLKCACHETYPE_USB:
1919 {
1920 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1921 pReq->pvUser, pReq->rcReq);
1922 break;
1923 }
1924 case PDMBLKCACHETYPE_INTERNAL:
1925 {
1926 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1927 pReq->pvUser, pReq->rcReq);
1928 break;
1929 }
1930 default:
1931 AssertMsgFailed(("Unknown block cache type!\n"));
1932 }
1933
1934 RTMemFree(pReq);
1935}
1936
1937static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1938 int rcReq, bool fCallHandler)
1939{
1940 if (RT_FAILURE(rcReq))
1941 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1942
1943 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1944 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1945
1946 if (!cXfersPending)
1947 {
1948 if (fCallHandler)
1949 pdmBlkCacheReqComplete(pBlkCache, pReq);
1950 return true;
1951 }
1952
1953 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1954 return false;
1955}
1956
1957VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1958 PCRTSGBUF pSgBuf, size_t cbRead, void *pvUser)
1959{
1960 int rc = VINF_SUCCESS;
1961 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1962 PPDMBLKCACHEENTRY pEntry;
1963 PPDMBLKCACHEREQ pReq;
1964
1965 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbRead=%u pvUser=%#p\n",
1966 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbRead, pvUser));
1967
1968 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1969 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1970
1971 RTSGBUF SgBuf;
1972 RTSgBufClone(&SgBuf, pSgBuf);
1973
1974 /* Allocate new request structure. */
1975 pReq = pdmBlkCacheReqAlloc(pvUser);
1976 if (RT_UNLIKELY(!pReq))
1977 return VERR_NO_MEMORY;
1978
1979 /* Increment data transfer counter to keep the request valid while we access it. */
1980 ASMAtomicIncU32(&pReq->cXfersPending);
1981
1982 while (cbRead)
1983 {
1984 size_t cbToRead;
1985
1986 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1987
1988 /*
1989 * If there is no entry we try to create a new one eviciting unused pages
1990 * if the cache is full. If this is not possible we will pass the request through
1991 * and skip the caching (all entries may be still in progress so they can't
1992 * be evicted)
1993 * If we have an entry it can be in one of the LRU lists where the entry
1994 * contains data (recently used or frequently used LRU) so we can just read
1995 * the data we need and put the entry at the head of the frequently used LRU list.
1996 * In case the entry is in one of the ghost lists it doesn't contain any data.
1997 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1998 */
1999 if (pEntry)
2000 {
2001 uint64_t offDiff = off - pEntry->Core.Key;
2002
2003 AssertMsg(off >= pEntry->Core.Key,
2004 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2005 off, pEntry->Core.Key));
2006
2007 AssertPtr(pEntry->pList);
2008
2009 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
2010
2011 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
2012 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
2013 off, cbToRead));
2014
2015 cbRead -= cbToRead;
2016
2017 if (!cbRead)
2018 STAM_COUNTER_INC(&pCache->cHits);
2019 else
2020 STAM_COUNTER_INC(&pCache->cPartialHits);
2021
2022 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
2023
2024 /* Ghost lists contain no data. */
2025 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2026 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2027 {
2028 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2029 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2030 PDMBLKCACHE_ENTRY_IS_DIRTY))
2031 {
2032 /* Entry didn't completed yet. Append to the list */
2033 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2034 &SgBuf, offDiff, cbToRead,
2035 false /* fWrite */);
2036 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2037 }
2038 else
2039 {
2040 /* Read as much as we can from the entry. */
2041 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2042 }
2043
2044 /* Move this entry to the top position */
2045 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2046 {
2047 pdmBlkCacheLockEnter(pCache);
2048 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2049 pdmBlkCacheLockLeave(pCache);
2050 }
2051 /* Release the entry */
2052 pdmBlkCacheEntryRelease(pEntry);
2053 }
2054 else
2055 {
2056 uint8_t *pbBuffer = NULL;
2057
2058 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2059
2060 pdmBlkCacheLockEnter(pCache);
2061 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2062 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2063
2064 /* Move the entry to Am and fetch it to the cache. */
2065 if (fEnough)
2066 {
2067 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2068 pdmBlkCacheAdd(pCache, pEntry->cbData);
2069 pdmBlkCacheLockLeave(pCache);
2070
2071 if (pbBuffer)
2072 pEntry->pbData = pbBuffer;
2073 else
2074 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2075 AssertPtr(pEntry->pbData);
2076
2077 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2078 &SgBuf, offDiff, cbToRead,
2079 false /* fWrite */);
2080 pdmBlkCacheEntryReadFromMedium(pEntry);
2081 /* Release the entry */
2082 pdmBlkCacheEntryRelease(pEntry);
2083 }
2084 else
2085 {
2086 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2087 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2088 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2089 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2090 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2091
2092 pdmBlkCacheLockLeave(pCache);
2093
2094 RTMemFree(pEntry);
2095
2096 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2097 &SgBuf, off, cbToRead,
2098 PDMBLKCACHEXFERDIR_READ);
2099 }
2100 }
2101 }
2102 else
2103 {
2104#ifdef VBOX_WITH_IO_READ_CACHE
2105 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2106 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2107 off, cbRead,
2108 &cbToRead);
2109
2110 cbRead -= cbToRead;
2111
2112 if (pEntryNew)
2113 {
2114 if (!cbRead)
2115 STAM_COUNTER_INC(&pCache->cMisses);
2116 else
2117 STAM_COUNTER_INC(&pCache->cPartialHits);
2118
2119 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2120 &SgBuf,
2121 off - pEntryNew->Core.Key,
2122 cbToRead,
2123 false /* fWrite */);
2124 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2125 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2126 }
2127 else
2128 {
2129 /*
2130 * There is not enough free space in the cache.
2131 * Pass the request directly to the I/O manager.
2132 */
2133 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2134
2135 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2136 &SgBuf, off, cbToRead,
2137 PDMBLKCACHEXFERDIR_READ);
2138 }
2139#else
2140 /* Clip read size if necessary. */
2141 PPDMBLKCACHEENTRY pEntryAbove;
2142 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2143
2144 if (pEntryAbove)
2145 {
2146 if (off + cbRead > pEntryAbove->Core.Key)
2147 cbToRead = pEntryAbove->Core.Key - off;
2148 else
2149 cbToRead = cbRead;
2150
2151 pdmBlkCacheEntryRelease(pEntryAbove);
2152 }
2153 else
2154 cbToRead = cbRead;
2155
2156 cbRead -= cbToRead;
2157 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2158 &SgBuf, off, cbToRead,
2159 PDMBLKCACHEXFERDIR_READ);
2160#endif
2161 }
2162 off += cbToRead;
2163 }
2164
2165 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2166 rc = VINF_AIO_TASK_PENDING;
2167 else
2168 {
2169 rc = pReq->rcReq;
2170 RTMemFree(pReq);
2171 }
2172
2173 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2174
2175 return rc;
2176}
2177
2178VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off, PCRTSGBUF pSgBuf, size_t cbWrite, void *pvUser)
2179{
2180 int rc = VINF_SUCCESS;
2181 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2182 PPDMBLKCACHEENTRY pEntry;
2183 PPDMBLKCACHEREQ pReq;
2184
2185 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2186 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbWrite, pvUser));
2187
2188 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2189 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2190
2191 RTSGBUF SgBuf;
2192 RTSgBufClone(&SgBuf, pSgBuf);
2193
2194 /* Allocate new request structure. */
2195 pReq = pdmBlkCacheReqAlloc(pvUser);
2196 if (RT_UNLIKELY(!pReq))
2197 return VERR_NO_MEMORY;
2198
2199 /* Increment data transfer counter to keep the request valid while we access it. */
2200 ASMAtomicIncU32(&pReq->cXfersPending);
2201
2202 while (cbWrite)
2203 {
2204 size_t cbToWrite;
2205
2206 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2207 if (pEntry)
2208 {
2209 /* Write the data into the entry and mark it as dirty */
2210 AssertPtr(pEntry->pList);
2211
2212 uint64_t offDiff = off - pEntry->Core.Key;
2213 AssertMsg(off >= pEntry->Core.Key, ("Overflow in calculation off=%llu OffsetAligned=%llu\n", off, pEntry->Core.Key));
2214
2215 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2216 cbWrite -= cbToWrite;
2217
2218 if (!cbWrite)
2219 STAM_COUNTER_INC(&pCache->cHits);
2220 else
2221 STAM_COUNTER_INC(&pCache->cPartialHits);
2222
2223 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2224
2225 /* Ghost lists contain no data. */
2226 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2227 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2228 {
2229 /* Check if the entry is dirty. */
2230 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2231 PDMBLKCACHE_ENTRY_IS_DIRTY,
2232 0))
2233 {
2234 /* If it is already dirty but not in progress just update the data. */
2235 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2236 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2237 else
2238 {
2239 /* The data isn't written to the file yet */
2240 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2241 &SgBuf, offDiff, cbToWrite,
2242 true /* fWrite */);
2243 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2244 }
2245
2246 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2247 }
2248 else /* Dirty bit not set */
2249 {
2250 /*
2251 * Check if a read is in progress for this entry.
2252 * We have to defer processing in that case.
2253 */
2254 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2255 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2256 0))
2257 {
2258 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2259 &SgBuf, offDiff, cbToWrite,
2260 true /* fWrite */);
2261 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2262 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2263 }
2264 else /* I/O in progress flag not set */
2265 {
2266 /* Write as much as we can into the entry and update the file. */
2267 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2268
2269 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2270 if (fCommit)
2271 pdmBlkCacheCommitDirtyEntries(pCache);
2272 }
2273 } /* Dirty bit not set */
2274
2275 /* Move this entry to the top position */
2276 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2277 {
2278 pdmBlkCacheLockEnter(pCache);
2279 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2280 pdmBlkCacheLockLeave(pCache);
2281 }
2282
2283 pdmBlkCacheEntryRelease(pEntry);
2284 }
2285 else /* Entry is on the ghost list */
2286 {
2287 uint8_t *pbBuffer = NULL;
2288
2289 pdmBlkCacheLockEnter(pCache);
2290 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2291 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2292
2293 if (fEnough)
2294 {
2295 /* Move the entry to Am and fetch it to the cache. */
2296 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2297 pdmBlkCacheAdd(pCache, pEntry->cbData);
2298 pdmBlkCacheLockLeave(pCache);
2299
2300 if (pbBuffer)
2301 pEntry->pbData = pbBuffer;
2302 else
2303 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2304 AssertPtr(pEntry->pbData);
2305
2306 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2307 &SgBuf, offDiff, cbToWrite,
2308 true /* fWrite */);
2309 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2310 pdmBlkCacheEntryReadFromMedium(pEntry);
2311
2312 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2313 pdmBlkCacheEntryRelease(pEntry);
2314 }
2315 else
2316 {
2317 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2318 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2319 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2320 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2321 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2322
2323 pdmBlkCacheLockLeave(pCache);
2324
2325 RTMemFree(pEntry);
2326 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2327 &SgBuf, off, cbToWrite,
2328 PDMBLKCACHEXFERDIR_WRITE);
2329 }
2330 }
2331 }
2332 else /* No entry found */
2333 {
2334 /*
2335 * No entry found. Try to create a new cache entry to store the data in and if that fails
2336 * write directly to the file.
2337 */
2338 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2339 off, cbWrite,
2340 &cbToWrite);
2341
2342 cbWrite -= cbToWrite;
2343
2344 if (pEntryNew)
2345 {
2346 uint64_t offDiff = off - pEntryNew->Core.Key;
2347
2348 STAM_COUNTER_INC(&pCache->cHits);
2349
2350 /*
2351 * Check if it is possible to just write the data without waiting
2352 * for it to get fetched first.
2353 */
2354 if (!offDiff && pEntryNew->cbData == cbToWrite)
2355 {
2356 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2357
2358 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2359 if (fCommit)
2360 pdmBlkCacheCommitDirtyEntries(pCache);
2361 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2362 }
2363 else
2364 {
2365 /* Defer the write and fetch the data from the endpoint. */
2366 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2367 &SgBuf, offDiff, cbToWrite,
2368 true /* fWrite */);
2369 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2370 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2371 }
2372
2373 pdmBlkCacheEntryRelease(pEntryNew);
2374 }
2375 else
2376 {
2377 /*
2378 * There is not enough free space in the cache.
2379 * Pass the request directly to the I/O manager.
2380 */
2381 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2382
2383 STAM_COUNTER_INC(&pCache->cMisses);
2384
2385 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2386 &SgBuf, off, cbToWrite,
2387 PDMBLKCACHEXFERDIR_WRITE);
2388 }
2389 }
2390
2391 off += cbToWrite;
2392 }
2393
2394 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2395 rc = VINF_AIO_TASK_PENDING;
2396 else
2397 {
2398 rc = pReq->rcReq;
2399 RTMemFree(pReq);
2400 }
2401
2402 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2403
2404 return rc;
2405}
2406
2407VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2408{
2409 int rc = VINF_SUCCESS;
2410 PPDMBLKCACHEREQ pReq;
2411
2412 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2413
2414 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2415 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2416
2417 /* Commit dirty entries in the cache. */
2418 pdmBlkCacheCommit(pBlkCache);
2419
2420 /* Allocate new request structure. */
2421 pReq = pdmBlkCacheReqAlloc(pvUser);
2422 if (RT_UNLIKELY(!pReq))
2423 return VERR_NO_MEMORY;
2424
2425 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2426 PDMBLKCACHEXFERDIR_FLUSH);
2427 AssertRC(rc);
2428
2429 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2430 return VINF_AIO_TASK_PENDING;
2431}
2432
2433VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2434 unsigned cRanges, void *pvUser)
2435{
2436 int rc = VINF_SUCCESS;
2437 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2438 PPDMBLKCACHEENTRY pEntry;
2439 PPDMBLKCACHEREQ pReq;
2440
2441 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2442 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2443
2444 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2445 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2446
2447 /* Allocate new request structure. */
2448 pReq = pdmBlkCacheReqAlloc(pvUser);
2449 if (RT_UNLIKELY(!pReq))
2450 return VERR_NO_MEMORY;
2451
2452 /* Increment data transfer counter to keep the request valid while we access it. */
2453 ASMAtomicIncU32(&pReq->cXfersPending);
2454
2455 for (unsigned i = 0; i < cRanges; i++)
2456 {
2457 uint64_t offCur = paRanges[i].offStart;
2458 size_t cbLeft = paRanges[i].cbRange;
2459
2460 while (cbLeft)
2461 {
2462 size_t cbThisDiscard = 0;
2463
2464 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2465
2466 if (pEntry)
2467 {
2468 /* Write the data into the entry and mark it as dirty */
2469 AssertPtr(pEntry->pList);
2470
2471 uint64_t offDiff = offCur - pEntry->Core.Key;
2472
2473 AssertMsg(offCur >= pEntry->Core.Key,
2474 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2475 offCur, pEntry->Core.Key));
2476
2477 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2478
2479 /* Ghost lists contain no data. */
2480 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2481 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2482 {
2483 /* Check if the entry is dirty. */
2484 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2485 PDMBLKCACHE_ENTRY_IS_DIRTY,
2486 0))
2487 {
2488 /* If it is dirty but not yet in progress remove it. */
2489 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2490 {
2491 pdmBlkCacheLockEnter(pCache);
2492 pdmBlkCacheEntryRemoveFromList(pEntry);
2493
2494 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2495 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2496 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2497
2498 pdmBlkCacheLockLeave(pCache);
2499
2500 RTMemFree(pEntry);
2501 }
2502 else
2503 {
2504#if 0
2505 /* The data isn't written to the file yet */
2506 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2507 &SgBuf, offDiff, cbToWrite,
2508 true /* fWrite */);
2509 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2510#endif
2511 }
2512
2513 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2514 pdmBlkCacheEntryRelease(pEntry);
2515 }
2516 else /* Dirty bit not set */
2517 {
2518 /*
2519 * Check if a read is in progress for this entry.
2520 * We have to defer processing in that case.
2521 */
2522 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2523 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2524 0))
2525 {
2526#if 0
2527 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2528 &SgBuf, offDiff, cbToWrite,
2529 true /* fWrite */);
2530#endif
2531 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2532 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2533 pdmBlkCacheEntryRelease(pEntry);
2534 }
2535 else /* I/O in progress flag not set */
2536 {
2537 pdmBlkCacheLockEnter(pCache);
2538 pdmBlkCacheEntryRemoveFromList(pEntry);
2539
2540 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2541 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2542 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2543 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2544 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2545
2546 pdmBlkCacheLockLeave(pCache);
2547
2548 RTMemFree(pEntry);
2549 }
2550 } /* Dirty bit not set */
2551 }
2552 else /* Entry is on the ghost list just remove cache entry. */
2553 {
2554 pdmBlkCacheLockEnter(pCache);
2555 pdmBlkCacheEntryRemoveFromList(pEntry);
2556
2557 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2558 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2559 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2560 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2561 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2562
2563 pdmBlkCacheLockLeave(pCache);
2564
2565 RTMemFree(pEntry);
2566 }
2567 }
2568 /* else: no entry found. */
2569
2570 offCur += cbThisDiscard;
2571 cbLeft -= cbThisDiscard;
2572 }
2573 }
2574
2575 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2576 rc = VINF_AIO_TASK_PENDING;
2577 else
2578 {
2579 rc = pReq->rcReq;
2580 RTMemFree(pReq);
2581 }
2582
2583 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2584
2585 return rc;
2586}
2587
2588/**
2589 * Completes a task segment freeing all resources and completes the task handle
2590 * if everything was transferred.
2591 *
2592 * @returns Next task segment handle.
2593 * @param pBlkCache The endpoint block cache.
2594 * @param pWaiter Task segment to complete.
2595 * @param rc Status code to set.
2596 */
2597static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEWAITER pWaiter, int rc)
2598{
2599 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2600 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2601
2602 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2603
2604 RTMemFree(pWaiter);
2605
2606 return pNext;
2607}
2608
2609static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2610{
2611 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2612 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2613
2614 /* Reference the entry now as we are clearing the I/O in progress flag
2615 * which protected the entry till now. */
2616 pdmBlkCacheEntryRef(pEntry);
2617
2618 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2619 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2620
2621 /* Process waiting segment list. The data in entry might have changed in-between. */
2622 bool fDirty = false;
2623 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2624 PPDMBLKCACHEWAITER pCurr = pComplete;
2625
2626 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2627 ("The list tail was not updated correctly\n"));
2628 pEntry->pWaitingTail = NULL;
2629 pEntry->pWaitingHead = NULL;
2630
2631 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2632 {
2633 /*
2634 * An error here is difficult to handle as the original request completed already.
2635 * The error is logged for now and the VM is paused.
2636 * If the user continues the entry is written again in the hope
2637 * the user fixed the problem and the next write succeeds.
2638 */
2639 if (RT_FAILURE(rcIoXfer))
2640 {
2641 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2642 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2643
2644 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2645 {
2646 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2647 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2648 "Make sure there is enough free space on the disk and that the disk is working properly. "
2649 "Operation can be resumed afterwards"),
2650 pBlkCache->pszId, rcIoXfer);
2651 AssertRC(rc);
2652 }
2653
2654 /* Mark the entry as dirty again to get it added to the list later on. */
2655 fDirty = true;
2656 }
2657
2658 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2659
2660 while (pCurr)
2661 {
2662 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2663
2664 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2665 fDirty = true;
2666 pCurr = pCurr->pNext;
2667 }
2668 }
2669 else
2670 {
2671 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2672 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2673 ("Invalid flags set\n"));
2674
2675 while (pCurr)
2676 {
2677 if (pCurr->fWrite)
2678 {
2679 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2680 fDirty = true;
2681 }
2682 else
2683 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2684
2685 pCurr = pCurr->pNext;
2686 }
2687 }
2688
2689 bool fCommit = false;
2690 if (fDirty)
2691 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2692
2693 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2694
2695 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2696 pdmBlkCacheEntryRelease(pEntry);
2697
2698 if (fCommit)
2699 pdmBlkCacheCommitDirtyEntries(pCache);
2700
2701 /* Complete waiters now. */
2702 while (pComplete)
2703 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2704}
2705
2706VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2707{
2708 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2709
2710 if (hIoXfer->fIoCache)
2711 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2712 else
2713 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2714
2715 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
2716 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool) completed (%u now active)",
2717 hIoXfer, hIoXfer->fIoCache, pBlkCache->cIoXfersActive);
2718 RTMemFree(hIoXfer);
2719}
2720
2721/**
2722 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2723 *
2724 * @returns IPRT status code.
2725 * @param pNode The node to destroy.
2726 * @param pvUser Opaque user data.
2727 */
2728static DECLCALLBACK(int) pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2729{
2730 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2731 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2732 NOREF(pvUser);
2733
2734 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2735 {
2736 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2737 pdmBlkCacheEntryRef(pEntry);
2738 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2739
2740 RTThreadSleep(1);
2741
2742 /* Re-enter all locks and drop the reference. */
2743 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2744 pdmBlkCacheEntryRelease(pEntry);
2745 }
2746
2747 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2748 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2749
2750 return VINF_SUCCESS;
2751}
2752
2753VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2754{
2755 int rc = VINF_SUCCESS;
2756 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2757
2758 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2759
2760 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2761 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2762 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2763
2764 /* Wait for all I/O to complete. */
2765 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2766 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2767 AssertRC(rc);
2768 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2769
2770 return rc;
2771}
2772
2773VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2774{
2775 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2776
2777 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2778
2779 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2780
2781 return VINF_SUCCESS;
2782}
2783
2784VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2785{
2786 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2787
2788 /*
2789 * Commit all dirty entries now (they are waited on for completion during the
2790 * destruction of the AVL tree below).
2791 * The exception is if the VM was paused because of an I/O error before.
2792 */
2793 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2794 pdmBlkCacheCommit(pBlkCache);
2795
2796 /* Make sure nobody is accessing the cache while we delete the tree. */
2797 pdmBlkCacheLockEnter(pCache);
2798 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2799 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2800 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2801
2802 pdmBlkCacheLockLeave(pCache);
2803 return VINF_SUCCESS;
2804}
2805
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette