VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 43346

Last change on this file since 43346 was 42892, checked in by vboxsync, 12 years ago

BlockCache: Relax check when restoring a state. It is allowed to have more cache users in the saved state than there are registered currently if the missig users don't have any data saved. Should fix restore errors if a VM with snapshots was moved to a filesystem with known data corruption bugs which disables async I/O whe the VM starts

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 98.4 KB
Line 
1/* $Id: PDMBlkCache.cpp 42892 2012-08-20 20:48:06Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
26#include "PDMInternal.h"
27#include <iprt/asm.h>
28#include <iprt/mem.h>
29#include <iprt/path.h>
30#include <iprt/string.h>
31#include <VBox/log.h>
32#include <VBox/vmm/stam.h>
33#include <VBox/vmm/uvm.h>
34#include <VBox/vmm/vm.h>
35
36#include "PDMBlkCacheInternal.h"
37
38#ifdef VBOX_STRICT
39# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
40 do \
41 { \
42 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
43 ("Thread does not own critical section\n"));\
44 } while(0)
45
46# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
47 do \
48 { \
49 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
50 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
51 } while(0)
52
53# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
54 do \
55 { \
56 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
57 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
58 } while(0)
59
60#else
61# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
63# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
64#endif
65
66#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
67
68/*******************************************************************************
69* Internal Functions *
70*******************************************************************************/
71
72static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
73 uint64_t off, size_t cbData, uint8_t *pbBuffer);
74static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
75
76/**
77 * Decrement the reference counter of the given cache entry.
78 *
79 * @returns nothing.
80 * @param pEntry The entry to release.
81 */
82DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
83{
84 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
85 ASMAtomicDecU32(&pEntry->cRefs);
86}
87
88/**
89 * Increment the reference counter of the given cache entry.
90 *
91 * @returns nothing.
92 * @param pEntry The entry to reference.
93 */
94DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
95{
96 ASMAtomicIncU32(&pEntry->cRefs);
97}
98
99#ifdef VBOX_STRICT
100static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
101{
102 /* Amount of cached data should never exceed the maximum amount. */
103 AssertMsg(pCache->cbCached <= pCache->cbMax,
104 ("Current amount of cached data exceeds maximum\n"));
105
106 /* The amount of cached data in the LRU and FRU list should match cbCached */
107 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
108 ("Amount of cached data doesn't match\n"));
109
110 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
111 ("Paged out list exceeds maximum\n"));
112}
113#endif
114
115DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
116{
117 RTCritSectEnter(&pCache->CritSect);
118#ifdef VBOX_STRICT
119 pdmBlkCacheValidate(pCache);
120#endif
121}
122
123DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
124{
125#ifdef VBOX_STRICT
126 pdmBlkCacheValidate(pCache);
127#endif
128 RTCritSectLeave(&pCache->CritSect);
129}
130
131DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
132{
133 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
134 pCache->cbCached -= cbAmount;
135}
136
137DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
138{
139 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
140 pCache->cbCached += cbAmount;
141}
142
143DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
144{
145 pList->cbCached += cbAmount;
146}
147
148DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
149{
150 pList->cbCached -= cbAmount;
151}
152
153#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
154/**
155 * Checks consistency of a LRU list.
156 *
157 * @returns nothing
158 * @param pList The LRU list to check.
159 * @param pNotInList Element which is not allowed to occur in the list.
160 */
161static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
162{
163 PPDMBLKCACHEENTRY pCurr = pList->pHead;
164
165 /* Check that there are no double entries and no cycles in the list. */
166 while (pCurr)
167 {
168 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
169
170 while (pNext)
171 {
172 AssertMsg(pCurr != pNext,
173 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
174 pCurr, pList));
175 pNext = pNext->pNext;
176 }
177
178 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
179
180 if (!pCurr->pNext)
181 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
182
183 pCurr = pCurr->pNext;
184 }
185}
186#endif
187
188/**
189 * Unlinks a cache entry from the LRU list it is assigned to.
190 *
191 * @returns nothing.
192 * @param pEntry The entry to unlink.
193 */
194static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
195{
196 PPDMBLKLRULIST pList = pEntry->pList;
197 PPDMBLKCACHEENTRY pPrev, pNext;
198
199 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
200
201 AssertPtr(pList);
202
203#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
204 pdmBlkCacheCheckList(pList, NULL);
205#endif
206
207 pPrev = pEntry->pPrev;
208 pNext = pEntry->pNext;
209
210 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
211 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
212
213 if (pPrev)
214 pPrev->pNext = pNext;
215 else
216 {
217 pList->pHead = pNext;
218
219 if (pNext)
220 pNext->pPrev = NULL;
221 }
222
223 if (pNext)
224 pNext->pPrev = pPrev;
225 else
226 {
227 pList->pTail = pPrev;
228
229 if (pPrev)
230 pPrev->pNext = NULL;
231 }
232
233 pEntry->pList = NULL;
234 pEntry->pPrev = NULL;
235 pEntry->pNext = NULL;
236 pdmBlkCacheListSub(pList, pEntry->cbData);
237#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
238 pdmBlkCacheCheckList(pList, pEntry);
239#endif
240}
241
242/**
243 * Adds a cache entry to the given LRU list unlinking it from the currently
244 * assigned list if needed.
245 *
246 * @returns nothing.
247 * @param pList List to the add entry to.
248 * @param pEntry Entry to add.
249 */
250static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
251{
252 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
253#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
254 pdmBlkCacheCheckList(pList, NULL);
255#endif
256
257 /* Remove from old list if needed */
258 if (pEntry->pList)
259 pdmBlkCacheEntryRemoveFromList(pEntry);
260
261 pEntry->pNext = pList->pHead;
262 if (pList->pHead)
263 pList->pHead->pPrev = pEntry;
264 else
265 {
266 Assert(!pList->pTail);
267 pList->pTail = pEntry;
268 }
269
270 pEntry->pPrev = NULL;
271 pList->pHead = pEntry;
272 pdmBlkCacheListAdd(pList, pEntry->cbData);
273 pEntry->pList = pList;
274#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
275 pdmBlkCacheCheckList(pList, NULL);
276#endif
277}
278
279/**
280 * Destroys a LRU list freeing all entries.
281 *
282 * @returns nothing
283 * @param pList Pointer to the LRU list to destroy.
284 *
285 * @note The caller must own the critical section of the cache.
286 */
287static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
288{
289 while (pList->pHead)
290 {
291 PPDMBLKCACHEENTRY pEntry = pList->pHead;
292
293 pList->pHead = pEntry->pNext;
294
295 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
296 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
297
298 RTMemPageFree(pEntry->pbData, pEntry->cbData);
299 RTMemFree(pEntry);
300 }
301}
302
303/**
304 * Tries to remove the given amount of bytes from a given list in the cache
305 * moving the entries to one of the given ghosts lists
306 *
307 * @returns Amount of data which could be freed.
308 * @param pCache Pointer to the global cache data.
309 * @param cbData The amount of the data to free.
310 * @param pListSrc The source list to evict data from.
311 * @param pGhostListSrc The ghost list removed entries should be moved to
312 * NULL if the entry should be freed.
313 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
314 * @param ppbBuf Where to store the address of the buffer if an entry with the
315 * same size was found and fReuseBuffer is true.
316 *
317 * @note This function may return fewer bytes than requested because entries
318 * may be marked as non evictable if they are used for I/O at the
319 * moment.
320 */
321static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
322 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
323 bool fReuseBuffer, uint8_t **ppbBuffer)
324{
325 size_t cbEvicted = 0;
326
327 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
328
329 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
330 AssertMsg( !pGhostListDst
331 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
332 ("Destination list must be NULL or the recently used but paged out list\n"));
333
334 if (fReuseBuffer)
335 {
336 AssertPtr(ppbBuffer);
337 *ppbBuffer = NULL;
338 }
339
340 /* Start deleting from the tail. */
341 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
342
343 while ((cbEvicted < cbData) && pEntry)
344 {
345 PPDMBLKCACHEENTRY pCurr = pEntry;
346
347 pEntry = pEntry->pPrev;
348
349 /* We can't evict pages which are currently in progress or dirty but not in progress */
350 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
351 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
352 {
353 /* Ok eviction candidate. Grab the endpoint semaphore and check again
354 * because somebody else might have raced us. */
355 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
356 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
357
358 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
359 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
360 {
361 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
362
363 if (fReuseBuffer && pCurr->cbData == cbData)
364 {
365 STAM_COUNTER_INC(&pCache->StatBuffersReused);
366 *ppbBuffer = pCurr->pbData;
367 }
368 else if (pCurr->pbData)
369 RTMemPageFree(pCurr->pbData, pCurr->cbData);
370
371 pCurr->pbData = NULL;
372 cbEvicted += pCurr->cbData;
373
374 pdmBlkCacheEntryRemoveFromList(pCurr);
375 pdmBlkCacheSub(pCache, pCurr->cbData);
376
377 if (pGhostListDst)
378 {
379 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
380
381 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
382
383 /* We have to remove the last entries from the paged out list. */
384 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
385 && pGhostEntFree)
386 {
387 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
388 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
389
390 pGhostEntFree = pGhostEntFree->pPrev;
391
392 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
393
394 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
395 {
396 pdmBlkCacheEntryRemoveFromList(pFree);
397
398 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
399 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
400 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
401
402 RTMemFree(pFree);
403 }
404
405 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
406 }
407
408 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
409 {
410 /* Couldn't remove enough entries. Delete */
411 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
412 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
413 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
414
415 RTMemFree(pCurr);
416 }
417 else
418 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
419 }
420 else
421 {
422 /* Delete the entry from the AVL tree it is assigned to. */
423 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
424 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
425 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
426
427 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
428 RTMemFree(pCurr);
429 }
430 }
431
432 }
433 else
434 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
435 }
436
437 return cbEvicted;
438}
439
440static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
441{
442 size_t cbRemoved = 0;
443
444 if ((pCache->cbCached + cbData) < pCache->cbMax)
445 return true;
446 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
447 {
448 /* Try to evict as many bytes as possible from A1in */
449 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
450 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
451
452 /*
453 * If it was not possible to remove enough entries
454 * try the frequently accessed cache.
455 */
456 if (cbRemoved < cbData)
457 {
458 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
459
460 /*
461 * If we removed something we can't pass the reuse buffer flag anymore because
462 * we don't need to evict that much data
463 */
464 if (!cbRemoved)
465 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
466 NULL, fReuseBuffer, ppbBuffer);
467 else
468 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
469 NULL, false, NULL);
470 }
471 }
472 else
473 {
474 /* We have to remove entries from frequently access list. */
475 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
476 NULL, fReuseBuffer, ppbBuffer);
477 }
478
479 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
480 return (cbRemoved >= cbData);
481}
482
483DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
484{
485 int rc = VINF_SUCCESS;
486
487 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
488 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
489
490 switch (pBlkCache->enmType)
491 {
492 case PDMBLKCACHETYPE_DEV:
493 {
494 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
495 pIoXfer->enmXferDir,
496 off, cbXfer,
497 &pIoXfer->SgBuf, pIoXfer);
498 break;
499 }
500 case PDMBLKCACHETYPE_DRV:
501 {
502 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
503 pIoXfer->enmXferDir,
504 off, cbXfer,
505 &pIoXfer->SgBuf, pIoXfer);
506 break;
507 }
508 case PDMBLKCACHETYPE_USB:
509 {
510 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
511 pIoXfer->enmXferDir,
512 off, cbXfer,
513 &pIoXfer->SgBuf, pIoXfer);
514 break;
515 }
516 case PDMBLKCACHETYPE_INTERNAL:
517 {
518 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
519 pIoXfer->enmXferDir,
520 off, cbXfer,
521 &pIoXfer->SgBuf, pIoXfer);
522 break;
523 }
524 default:
525 AssertMsgFailed(("Unknown block cache type!\n"));
526 }
527
528 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
529 return rc;
530}
531
532/**
533 * Initiates a read I/O task for the given entry.
534 *
535 * @returns VBox status code.
536 * @param pEntry The entry to fetch the data to.
537 */
538static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
539{
540 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
541 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
542
543 /* Make sure no one evicts the entry while it is accessed. */
544 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
545
546 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
547 if (RT_UNLIKELY(!pIoXfer))
548 return VERR_NO_MEMORY;
549
550 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
551
552 pIoXfer->fIoCache = true;
553 pIoXfer->pEntry = pEntry;
554 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
555 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
556 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
557 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
558
559 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
560}
561
562/**
563 * Initiates a write I/O task for the given entry.
564 *
565 * @returns nothing.
566 * @param pEntry The entry to read the data from.
567 */
568static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
569{
570 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
571 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
572
573 /* Make sure no one evicts the entry while it is accessed. */
574 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
575
576 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
577 if (RT_UNLIKELY(!pIoXfer))
578 return VERR_NO_MEMORY;
579
580 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
581
582 pIoXfer->fIoCache = true;
583 pIoXfer->pEntry = pEntry;
584 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
585 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
586 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
587 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
588
589 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
590}
591
592/**
593 * Passthrough a part of a request directly to the I/O manager
594 * handling the endpoint.
595 *
596 * @returns VBox status code.
597 * @param pEndpoint The endpoint.
598 * @param pTask The task.
599 * @param pIoMemCtx The I/O memory context to use.
600 * @param offStart Offset to start transfer from.
601 * @param cbData Amount of data to transfer.
602 * @param enmTransferType The transfer type (read/write)
603 */
604static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
605 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
606 PDMBLKCACHEXFERDIR enmXferDir)
607{
608
609 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
610 if (RT_UNLIKELY(!pIoXfer))
611 return VERR_NO_MEMORY;
612
613 ASMAtomicIncU32(&pReq->cXfersPending);
614 pIoXfer->fIoCache = false;
615 pIoXfer->pReq = pReq;
616 pIoXfer->enmXferDir = enmXferDir;
617 if (pSgBuf)
618 {
619 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
620 RTSgBufAdvance(pSgBuf, cbData);
621 }
622
623 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
624}
625
626/**
627 * Commit a single dirty entry to the endpoint
628 *
629 * @returns nothing
630 * @param pEntry The entry to commit.
631 */
632static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
633{
634 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
635 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
636 ("Invalid flags set for entry %#p\n", pEntry));
637
638 pdmBlkCacheEntryWriteToMedium(pEntry);
639}
640
641/**
642 * Commit all dirty entries for a single endpoint.
643 *
644 * @returns nothing.
645 * @param pBlkCache The endpoint cache to commit.
646 */
647static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
648{
649 uint32_t cbCommitted = 0;
650
651 /* Return if the cache was suspended. */
652 if (pBlkCache->fSuspended)
653 return;
654
655 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
656
657 /* The list is moved to a new header to reduce locking overhead. */
658 RTLISTANCHOR ListDirtyNotCommitted;
659
660 RTListInit(&ListDirtyNotCommitted);
661 RTSpinlockAcquire(pBlkCache->LockList);
662 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
663 RTSpinlockRelease(pBlkCache->LockList);
664
665 if (!RTListIsEmpty(&ListDirtyNotCommitted))
666 {
667 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
668
669 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
670 {
671 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
672 NodeNotCommitted);
673 pdmBlkCacheEntryCommit(pEntry);
674 cbCommitted += pEntry->cbData;
675 RTListNodeRemove(&pEntry->NodeNotCommitted);
676 pEntry = pNext;
677 }
678
679 /* Commit the last endpoint */
680 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
681 pdmBlkCacheEntryCommit(pEntry);
682 cbCommitted += pEntry->cbData;
683 RTListNodeRemove(&pEntry->NodeNotCommitted);
684 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
685 ("Committed all entries but list is not empty\n"));
686 }
687
688 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
689 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
690 ("Number of committed bytes exceeds number of dirty bytes\n"));
691 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
692
693 /* Reset the commit timer if we don't have any dirty bits. */
694 if ( !(cbDirtyOld - cbCommitted)
695 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
696 TMTimerStop(pBlkCache->pCache->pTimerCommit);
697}
698
699/**
700 * Commit all dirty entries in the cache.
701 *
702 * @returns nothing.
703 * @param pCache The global cache instance.
704 */
705static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
706{
707 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
708
709 if (!fCommitInProgress)
710 {
711 pdmBlkCacheLockEnter(pCache);
712 Assert(!RTListIsEmpty(&pCache->ListUsers));
713
714 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
715 AssertPtr(pBlkCache);
716
717 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
718 {
719 pdmBlkCacheCommit(pBlkCache);
720
721 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
722 NodeCacheUser);
723 }
724
725 /* Commit the last endpoint */
726 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
727 pdmBlkCacheCommit(pBlkCache);
728
729 pdmBlkCacheLockLeave(pCache);
730 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
731 }
732}
733
734/**
735 * Adds the given entry as a dirty to the cache.
736 *
737 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
738 * @param pBlkCache The endpoint cache the entry belongs to.
739 * @param pEntry The entry to add.
740 */
741static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
742{
743 bool fDirtyBytesExceeded = false;
744 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
745
746 /* If the commit timer is disabled we commit right away. */
747 if (pCache->u32CommitTimeoutMs == 0)
748 {
749 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
750 pdmBlkCacheEntryCommit(pEntry);
751 }
752 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
753 {
754 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
755
756 RTSpinlockAcquire(pBlkCache->LockList);
757 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
758 RTSpinlockRelease(pBlkCache->LockList);
759
760 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
761
762 /* Prevent committing if the VM was suspended. */
763 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
764 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
765 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
766 {
767 /* Arm the commit timer. */
768 TMTimerSetMillies(pCache->pTimerCommit, pCache->u32CommitTimeoutMs);
769 }
770 }
771
772 return fDirtyBytesExceeded;
773}
774
775static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
776{
777 bool fFound = false;
778 PPDMBLKCACHE pBlkCache = NULL;
779
780 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
781 {
782 if (!RTStrCmp(pBlkCache->pszId, pcszId))
783 {
784 fFound = true;
785 break;
786 }
787 }
788
789 return fFound ? pBlkCache : NULL;
790}
791
792/**
793 * Commit timer callback.
794 */
795static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
796{
797 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
798 NOREF(pVM); NOREF(pTimer);
799
800 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
801
802 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
803 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
804 pdmBlkCacheCommitDirtyEntries(pCache);
805
806 LogFlowFunc(("Entries committed, going to sleep\n"));
807}
808
809static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
810{
811 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
812
813 AssertPtr(pBlkCacheGlobal);
814
815 pdmBlkCacheLockEnter(pBlkCacheGlobal);
816
817 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
818
819 /* Go through the list and save all dirty entries. */
820 PPDMBLKCACHE pBlkCache;
821 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
822 {
823 uint32_t cEntries = 0;
824 PPDMBLKCACHEENTRY pEntry;
825
826 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
827 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
828 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
829
830 /* Count the number of entries to safe. */
831 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
832 {
833 cEntries++;
834 }
835
836 SSMR3PutU32(pSSM, cEntries);
837
838 /* Walk the list of all dirty entries and save them. */
839 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
840 {
841 /* A few sanity checks. */
842 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
843 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
844 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
845 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
846 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
847 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
848 ("Invalid list\n"));
849 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
850 ("Size and range do not match\n"));
851
852 /* Save */
853 SSMR3PutU64(pSSM, pEntry->Core.Key);
854 SSMR3PutU32(pSSM, pEntry->cbData);
855 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
856 }
857
858 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
859 }
860
861 pdmBlkCacheLockLeave(pBlkCacheGlobal);
862
863 /* Terminator */
864 return SSMR3PutU32(pSSM, UINT32_MAX);
865}
866
867static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
868{
869 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
870 uint32_t cRefs;
871
872 NOREF(uPass);
873 AssertPtr(pBlkCacheGlobal);
874
875 pdmBlkCacheLockEnter(pBlkCacheGlobal);
876
877 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
878 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
879
880 SSMR3GetU32(pSSM, &cRefs);
881
882 /*
883 * Fewer users in the saved state than in the current VM are allowed
884 * because that means that there are only new ones which don't have any saved state
885 * which can get lost.
886 * More saved state entries than registered cache users are only allowed if the
887 * missing users don't have any data saved in the cache.
888 */
889 int rc = VINF_SUCCESS;
890 char *pszId = NULL;
891
892 while ( cRefs > 0
893 && RT_SUCCESS(rc))
894 {
895 PPDMBLKCACHE pBlkCache = NULL;
896 uint32_t cbId = 0;
897
898 SSMR3GetU32(pSSM, &cbId);
899 Assert(cbId > 0);
900
901 cbId++; /* Include terminator */
902 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
903 if (!pszId)
904 {
905 rc = VERR_NO_MEMORY;
906 break;
907 }
908
909 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
910 AssertRC(rc);
911
912 /* Search for the block cache with the provided id. */
913 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
914
915 /* Get the entries */
916 uint32_t cEntries;
917 SSMR3GetU32(pSSM, &cEntries);
918
919 if (!pBlkCache && (cEntries > 0))
920 {
921 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
922 N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations"));
923 break;
924 }
925
926 RTStrFree(pszId);
927 pszId = NULL;
928
929 while (cEntries > 0)
930 {
931 PPDMBLKCACHEENTRY pEntry;
932 uint64_t off;
933 uint32_t cbEntry;
934
935 SSMR3GetU64(pSSM, &off);
936 SSMR3GetU32(pSSM, &cbEntry);
937
938 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
939 if (!pEntry)
940 {
941 rc = VERR_NO_MEMORY;
942 break;
943 }
944
945 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
946 if (RT_FAILURE(rc))
947 {
948 RTMemFree(pEntry->pbData);
949 RTMemFree(pEntry);
950 break;
951 }
952
953 /* Insert into the tree. */
954 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
955 Assert(fInserted); NOREF(fInserted);
956
957 /* Add to the dirty list. */
958 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
959 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
960 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
961 pdmBlkCacheEntryRelease(pEntry);
962 cEntries--;
963 }
964
965 cRefs--;
966 }
967
968 if (pszId)
969 RTStrFree(pszId);
970
971 if (cRefs && RT_SUCCESS(rc))
972 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
973 N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations"));
974
975 pdmBlkCacheLockLeave(pBlkCacheGlobal);
976
977 if (RT_SUCCESS(rc))
978 {
979 uint32_t u32 = 0;
980 rc = SSMR3GetU32(pSSM, &u32);
981 if (RT_SUCCESS(rc))
982 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
983 }
984
985 return rc;
986}
987
988int pdmR3BlkCacheInit(PVM pVM)
989{
990 int rc = VINF_SUCCESS;
991 PUVM pUVM = pVM->pUVM;
992 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
993
994 LogFlowFunc((": pVM=%p\n", pVM));
995
996 VM_ASSERT_EMT(pVM);
997
998 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
999 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1000
1001 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1002 if (!pBlkCacheGlobal)
1003 return VERR_NO_MEMORY;
1004
1005 RTListInit(&pBlkCacheGlobal->ListUsers);
1006 pBlkCacheGlobal->pVM = pVM;
1007 pBlkCacheGlobal->cRefs = 0;
1008 pBlkCacheGlobal->cbCached = 0;
1009 pBlkCacheGlobal->fCommitInProgress = false;
1010
1011 /* Initialize members */
1012 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1013 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1014 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1015
1016 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1017 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1018 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1019
1020 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1021 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1022 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1023
1024 do
1025 {
1026 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1027 AssertLogRelRCBreak(rc);
1028 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1029
1030 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1031 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1032 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1033 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1034
1035 /** @todo r=aeichner: Experiment to find optimal default values */
1036 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1037 AssertLogRelRCBreak(rc);
1038 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1039 AssertLogRelRCBreak(rc);
1040 } while (0);
1041
1042 if (RT_SUCCESS(rc))
1043 {
1044 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1045 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1046 "/PDM/BlkCache/cbMax",
1047 STAMUNIT_BYTES,
1048 "Maximum cache size");
1049 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1050 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1051 "/PDM/BlkCache/cbCached",
1052 STAMUNIT_BYTES,
1053 "Currently used cache");
1054 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1055 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1056 "/PDM/BlkCache/cbCachedMruIn",
1057 STAMUNIT_BYTES,
1058 "Number of bytes cached in MRU list");
1059 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1060 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1061 "/PDM/BlkCache/cbCachedMruOut",
1062 STAMUNIT_BYTES,
1063 "Number of bytes cached in FRU list");
1064 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1065 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1066 "/PDM/BlkCache/cbCachedFru",
1067 STAMUNIT_BYTES,
1068 "Number of bytes cached in FRU ghost list");
1069
1070#ifdef VBOX_WITH_STATISTICS
1071 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1072 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1073 "/PDM/BlkCache/CacheHits",
1074 STAMUNIT_COUNT, "Number of hits in the cache");
1075 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1076 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1077 "/PDM/BlkCache/CachePartialHits",
1078 STAMUNIT_COUNT, "Number of partial hits in the cache");
1079 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1080 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1081 "/PDM/BlkCache/CacheMisses",
1082 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1083 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1084 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1085 "/PDM/BlkCache/CacheRead",
1086 STAMUNIT_BYTES, "Number of bytes read from the cache");
1087 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1088 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1089 "/PDM/BlkCache/CacheWritten",
1090 STAMUNIT_BYTES, "Number of bytes written to the cache");
1091 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1092 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1093 "/PDM/BlkCache/CacheTreeGet",
1094 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1095 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1096 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1097 "/PDM/BlkCache/CacheTreeInsert",
1098 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1099 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1100 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1101 "/PDM/BlkCache/CacheTreeRemove",
1102 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1103 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1104 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1105 "/PDM/BlkCache/CacheBuffersReused",
1106 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1107#endif
1108
1109 /* Initialize the critical section */
1110 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1111 }
1112
1113 if (RT_SUCCESS(rc))
1114 {
1115 /* Create the commit timer */
1116 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1117 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
1118 pdmBlkCacheCommitTimerCallback,
1119 pBlkCacheGlobal,
1120 "BlkCache-Commit",
1121 &pBlkCacheGlobal->pTimerCommit);
1122
1123 if (RT_SUCCESS(rc))
1124 {
1125 /* Register saved state handler. */
1126 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1127 NULL, NULL, NULL,
1128 NULL, pdmR3BlkCacheSaveExec, NULL,
1129 NULL, pdmR3BlkCacheLoadExec, NULL);
1130 if (RT_SUCCESS(rc))
1131 {
1132 LogRel(("BlkCache: Cache successfully initialised. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1133 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1134 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1135 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1136 return VINF_SUCCESS;
1137 }
1138 }
1139
1140 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1141 }
1142
1143 if (pBlkCacheGlobal)
1144 RTMemFree(pBlkCacheGlobal);
1145
1146 LogFlowFunc((": returns rc=%Rrc\n", pVM, rc));
1147 return rc;
1148}
1149
1150void pdmR3BlkCacheTerm(PVM pVM)
1151{
1152 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1153
1154 if (pBlkCacheGlobal)
1155 {
1156 /* Make sure no one else uses the cache now */
1157 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1158
1159 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1160 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1161 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1162 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1163
1164 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1165
1166 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1167 RTMemFree(pBlkCacheGlobal);
1168 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1169 }
1170}
1171
1172int pdmR3BlkCacheResume(PVM pVM)
1173{
1174 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1175
1176 LogFlowFunc(("pVM=%#p\n", pVM));
1177
1178 if ( pBlkCacheGlobal
1179 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1180 {
1181 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1182 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1183 }
1184
1185 return VINF_SUCCESS;
1186}
1187
1188static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1189{
1190 int rc = VINF_SUCCESS;
1191 PPDMBLKCACHE pBlkCache = NULL;
1192 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1193
1194 if (!pBlkCacheGlobal)
1195 return VERR_NOT_SUPPORTED;
1196
1197 /*
1198 * Check that no other user cache has the same id first,
1199 * Unique id's are necessary in case the state is saved.
1200 */
1201 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1202
1203 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1204
1205 if (!pBlkCache)
1206 {
1207 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1208
1209 if (pBlkCache)
1210 pBlkCache->pszId = RTStrDup(pcszId);
1211
1212 if ( pBlkCache
1213 && pBlkCache->pszId)
1214 {
1215 pBlkCache->fSuspended = false;
1216 pBlkCache->pCache = pBlkCacheGlobal;
1217 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1218
1219 rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain");
1220 if (RT_SUCCESS(rc))
1221 {
1222 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1223 if (RT_SUCCESS(rc))
1224 {
1225 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1226 if (pBlkCache->pTree)
1227 {
1228#ifdef VBOX_WITH_STATISTICS
1229 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1230 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1231 STAMUNIT_COUNT, "Number of deferred writes",
1232 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1233#endif
1234
1235 /* Add to the list of users. */
1236 pBlkCacheGlobal->cRefs++;
1237 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1238 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1239
1240 *ppBlkCache = pBlkCache;
1241 LogFlowFunc(("returns success\n"));
1242 return VINF_SUCCESS;
1243 }
1244 else
1245 rc = VERR_NO_MEMORY;
1246
1247 RTSemRWDestroy(pBlkCache->SemRWEntries);
1248 }
1249
1250 RTSpinlockDestroy(pBlkCache->LockList);
1251 }
1252
1253 RTStrFree(pBlkCache->pszId);
1254 }
1255 else
1256 rc = VERR_NO_MEMORY;
1257
1258 if (pBlkCache)
1259 RTMemFree(pBlkCache);
1260 }
1261 else
1262 rc = VERR_ALREADY_EXISTS;
1263
1264 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1265
1266 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1267 return rc;
1268}
1269
1270VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1271 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1272 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1273 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1274 const char *pcszId)
1275{
1276 int rc = VINF_SUCCESS;
1277 PPDMBLKCACHE pBlkCache;
1278
1279 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1280 if (RT_SUCCESS(rc))
1281 {
1282 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1283 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1284 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1285 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1286 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1287 *ppBlkCache = pBlkCache;
1288 }
1289
1290 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1291 return rc;
1292}
1293
1294VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1295 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1296 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1297 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1298 const char *pcszId)
1299{
1300 int rc = VINF_SUCCESS;
1301 PPDMBLKCACHE pBlkCache;
1302
1303 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1304 if (RT_SUCCESS(rc))
1305 {
1306 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1307 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1308 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1309 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1310 pBlkCache->u.Dev.pDevIns = pDevIns;
1311 *ppBlkCache = pBlkCache;
1312 }
1313
1314 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1315 return rc;
1316
1317}
1318
1319VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1320 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1321 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1322 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1323 const char *pcszId)
1324{
1325 int rc = VINF_SUCCESS;
1326 PPDMBLKCACHE pBlkCache;
1327
1328 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1329 if (RT_SUCCESS(rc))
1330 {
1331 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1332 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1333 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1334 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1335 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1336 *ppBlkCache = pBlkCache;
1337 }
1338
1339 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1340 return rc;
1341
1342}
1343
1344VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1345 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1346 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1347 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1348 const char *pcszId)
1349{
1350 int rc = VINF_SUCCESS;
1351 PPDMBLKCACHE pBlkCache;
1352
1353 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1354 if (RT_SUCCESS(rc))
1355 {
1356 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1357 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1358 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1359 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1360 pBlkCache->u.Int.pvUser = pvUser;
1361 *ppBlkCache = pBlkCache;
1362 }
1363
1364 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1365 return rc;
1366
1367}
1368
1369/**
1370 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1371 *
1372 * @returns IPRT status code.
1373 * @param pNode The node to destroy.
1374 * @param pvUser Opaque user data.
1375 */
1376static int pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1377{
1378 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1379 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1380 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1381
1382 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1383 {
1384 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1385 pdmBlkCacheEntryRef(pEntry);
1386 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1387 pdmBlkCacheLockLeave(pCache);
1388
1389 RTThreadSleep(250);
1390
1391 /* Re-enter all locks */
1392 pdmBlkCacheLockEnter(pCache);
1393 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1394 pdmBlkCacheEntryRelease(pEntry);
1395 }
1396
1397 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1398 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1399
1400 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1401 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1402
1403 pdmBlkCacheEntryRemoveFromList(pEntry);
1404
1405 if (fUpdateCache)
1406 pdmBlkCacheSub(pCache, pEntry->cbData);
1407
1408 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1409 RTMemFree(pEntry);
1410
1411 return VINF_SUCCESS;
1412}
1413
1414/**
1415 * Destroys all cache resources used by the given endpoint.
1416 *
1417 * @returns nothing.
1418 * @param pEndpoint The endpoint to the destroy.
1419 */
1420VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1421{
1422 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1423
1424 /*
1425 * Commit all dirty entries now (they are waited on for completion during the
1426 * destruction of the AVL tree below).
1427 * The exception is if the VM was paused because of an I/O error before.
1428 */
1429 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1430 pdmBlkCacheCommit(pBlkCache);
1431
1432 /* Make sure nobody is accessing the cache while we delete the tree. */
1433 pdmBlkCacheLockEnter(pCache);
1434 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1435 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1436 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1437
1438 RTSpinlockDestroy(pBlkCache->LockList);
1439
1440 pCache->cRefs--;
1441 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1442
1443 pdmBlkCacheLockLeave(pCache);
1444
1445 RTSemRWDestroy(pBlkCache->SemRWEntries);
1446
1447#ifdef VBOX_WITH_STATISTICS
1448 STAMR3Deregister(pCache->pVM, &pBlkCache->StatWriteDeferred);
1449#endif
1450
1451 RTStrFree(pBlkCache->pszId);
1452 RTMemFree(pBlkCache);
1453}
1454
1455VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1456{
1457 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1458
1459 /*
1460 * Validate input.
1461 */
1462 if (!pDevIns)
1463 return;
1464 VM_ASSERT_EMT(pVM);
1465
1466 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1467 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1468
1469 /* Return silently if not supported. */
1470 if (!pBlkCacheGlobal)
1471 return;
1472
1473 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1474
1475 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1476 {
1477 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1478 && pBlkCache->u.Dev.pDevIns == pDevIns)
1479 PDMR3BlkCacheRelease(pBlkCache);
1480 }
1481
1482 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1483}
1484
1485VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1486{
1487 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1488
1489 /*
1490 * Validate input.
1491 */
1492 if (!pDrvIns)
1493 return;
1494 VM_ASSERT_EMT(pVM);
1495
1496 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1497 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1498
1499 /* Return silently if not supported. */
1500 if (!pBlkCacheGlobal)
1501 return;
1502
1503 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1504
1505 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1506 {
1507 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1508 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1509 PDMR3BlkCacheRelease(pBlkCache);
1510 }
1511
1512 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1513}
1514
1515VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1516{
1517 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1518
1519 /*
1520 * Validate input.
1521 */
1522 if (!pUsbIns)
1523 return;
1524 VM_ASSERT_EMT(pVM);
1525
1526 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1527 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1528
1529 /* Return silently if not supported. */
1530 if (!pBlkCacheGlobal)
1531 return;
1532
1533 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1534
1535 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1536 {
1537 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1538 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1539 PDMR3BlkCacheRelease(pBlkCache);
1540 }
1541
1542 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1543}
1544
1545static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1546{
1547 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1548
1549 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1550 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1551 if (pEntry)
1552 pdmBlkCacheEntryRef(pEntry);
1553 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1554
1555 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1556
1557 return pEntry;
1558}
1559
1560/**
1561 * Return the best fit cache entries for the given offset.
1562 *
1563 * @returns nothing.
1564 * @param pBlkCache The endpoint cache.
1565 * @param off The offset.
1566 * @param pEntryAbove Where to store the pointer to the best fit entry above the
1567 * the given offset. NULL if not required.
1568 */
1569static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off,
1570 PPDMBLKCACHEENTRY *ppEntryAbove)
1571{
1572 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1573
1574 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1575 if (ppEntryAbove)
1576 {
1577 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1578 if (*ppEntryAbove)
1579 pdmBlkCacheEntryRef(*ppEntryAbove);
1580 }
1581
1582 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1583
1584 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1585}
1586
1587static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1588{
1589 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1590 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1591 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1592 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1593 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1594 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1595}
1596
1597/**
1598 * Allocates and initializes a new entry for the cache.
1599 * The entry has a reference count of 1.
1600 *
1601 * @returns Pointer to the new cache entry or NULL if out of memory.
1602 * @param pBlkCache The cache the entry belongs to.
1603 * @param off Start offset.
1604 * @param cbData Size of the cache entry.
1605 * @param pbBuffer Pointer to the buffer to use.
1606 * NULL if a new buffer should be allocated.
1607 * The buffer needs to have the same size of the entry.
1608 */
1609static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
1610 uint64_t off, size_t cbData, uint8_t *pbBuffer)
1611{
1612 AssertReturn(cbData <= UINT32_MAX, NULL);
1613 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1614
1615 if (RT_UNLIKELY(!pEntryNew))
1616 return NULL;
1617
1618 pEntryNew->Core.Key = off;
1619 pEntryNew->Core.KeyLast = off + cbData - 1;
1620 pEntryNew->pBlkCache = pBlkCache;
1621 pEntryNew->fFlags = 0;
1622 pEntryNew->cRefs = 1; /* We are using it now. */
1623 pEntryNew->pList = NULL;
1624 pEntryNew->cbData = (uint32_t)cbData;
1625 pEntryNew->pWaitingHead = NULL;
1626 pEntryNew->pWaitingTail = NULL;
1627 if (pbBuffer)
1628 pEntryNew->pbData = pbBuffer;
1629 else
1630 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1631
1632 if (RT_UNLIKELY(!pEntryNew->pbData))
1633 {
1634 RTMemFree(pEntryNew);
1635 return NULL;
1636 }
1637
1638 return pEntryNew;
1639}
1640
1641/**
1642 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1643 * in exclusive mode.
1644 *
1645 * @returns true if the flag in fSet is set and the one in fClear is clear.
1646 * false otherwise.
1647 * The R/W semaphore is only held if true is returned.
1648 *
1649 * @param pBlkCache The endpoint cache instance data.
1650 * @param pEntry The entry to check the flags for.
1651 * @param fSet The flag which is tested to be set.
1652 * @param fClear The flag which is tested to be clear.
1653 */
1654DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1655 PPDMBLKCACHEENTRY pEntry,
1656 uint32_t fSet, uint32_t fClear)
1657{
1658 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1659 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1660
1661 if (fPassed)
1662 {
1663 /* Acquire the lock and check again because the completion callback might have raced us. */
1664 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1665
1666 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1667 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1668
1669 /* Drop the lock if we didn't passed the test. */
1670 if (!fPassed)
1671 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1672 }
1673
1674 return fPassed;
1675}
1676
1677/**
1678 * Adds a segment to the waiting list for a cache entry
1679 * which is currently in progress.
1680 *
1681 * @returns nothing.
1682 * @param pEntry The cache entry to add the segment to.
1683 * @param pSeg The segment to add.
1684 */
1685DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1686 PPDMBLKCACHEWAITER pWaiter)
1687{
1688 pWaiter->pNext = NULL;
1689
1690 if (pEntry->pWaitingHead)
1691 {
1692 AssertPtr(pEntry->pWaitingTail);
1693
1694 pEntry->pWaitingTail->pNext = pWaiter;
1695 pEntry->pWaitingTail = pWaiter;
1696 }
1697 else
1698 {
1699 Assert(!pEntry->pWaitingTail);
1700
1701 pEntry->pWaitingHead = pWaiter;
1702 pEntry->pWaitingTail = pWaiter;
1703 }
1704}
1705
1706/**
1707 * Add a buffer described by the I/O memory context
1708 * to the entry waiting for completion.
1709 *
1710 * @returns VBox status code.
1711 * @param pEntry The entry to add the buffer to.
1712 * @param pTask Task associated with the buffer.
1713 * @param pIoMemCtx The memory context to use.
1714 * @param offDiff Offset from the start of the buffer
1715 * in the entry.
1716 * @param cbData Amount of data to wait for onthis entry.
1717 * @param fWrite Flag whether the task waits because it wants to write
1718 * to the cache entry.
1719 */
1720static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry,
1721 PPDMBLKCACHEREQ pReq,
1722 PRTSGBUF pSgBuf, uint64_t offDiff,
1723 size_t cbData, bool fWrite)
1724{
1725 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1726 if (!pWaiter)
1727 return VERR_NO_MEMORY;
1728
1729 ASMAtomicIncU32(&pReq->cXfersPending);
1730 pWaiter->pReq = pReq;
1731 pWaiter->offCacheEntry = offDiff;
1732 pWaiter->cbTransfer = cbData;
1733 pWaiter->fWrite = fWrite;
1734 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1735 RTSgBufAdvance(pSgBuf, cbData);
1736
1737 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1738
1739 return VINF_SUCCESS;
1740}
1741
1742/**
1743 * Calculate aligned offset and size for a new cache entry which do not
1744 * intersect with an already existing entry and the file end.
1745 *
1746 * @returns The number of bytes the entry can hold of the requested amount
1747 * of bytes.
1748 * @param pEndpoint The endpoint.
1749 * @param pBlkCache The endpoint cache.
1750 * @param off The start offset.
1751 * @param cb The number of bytes the entry needs to hold at
1752 * least.
1753 * @param pcbEntry Where to store the number of bytes the entry can hold.
1754 * Can be less than given because of other entries.
1755 */
1756static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1757 uint64_t off, uint32_t cb,
1758 uint32_t *pcbEntry)
1759{
1760 /* Get the best fit entries around the offset */
1761 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1762 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1763
1764 /* Log the info */
1765 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1766 pEntryAbove ? "B" : "No b",
1767 off,
1768 pEntryAbove ? pEntryAbove->Core.Key : 0,
1769 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1770 pEntryAbove ? pEntryAbove->cbData : 0));
1771
1772 uint32_t cbNext;
1773 uint32_t cbInEntry;
1774 if ( pEntryAbove
1775 && off + cb > pEntryAbove->Core.Key)
1776 {
1777 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1778 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1779 }
1780 else
1781 {
1782 cbInEntry = cb;
1783 cbNext = cb;
1784 }
1785
1786 /* A few sanity checks */
1787 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1788 ("Aligned size intersects with another cache entry\n"));
1789 Assert(cbInEntry <= cbNext);
1790
1791 if (pEntryAbove)
1792 pdmBlkCacheEntryRelease(pEntryAbove);
1793
1794 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1795
1796 *pcbEntry = cbNext;
1797
1798 return cbInEntry;
1799}
1800
1801/**
1802 * Create a new cache entry evicting data from the cache if required.
1803 *
1804 * @returns Pointer to the new cache entry or NULL
1805 * if not enough bytes could be evicted from the cache.
1806 * @param pEndpoint The endpoint.
1807 * @param pBlkCache The endpoint cache.
1808 * @param off The offset.
1809 * @param cb Number of bytes the cache entry should have.
1810 * @param pcbData Where to store the number of bytes the new
1811 * entry can hold. May be lower than actually requested
1812 * due to another entry intersecting the access range.
1813 */
1814static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache,
1815 uint64_t off, size_t cb,
1816 size_t *pcbData)
1817{
1818 uint32_t cbEntry = 0;
1819
1820 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1821 AssertReturn(cb <= UINT32_MAX, NULL);
1822
1823 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1824 pdmBlkCacheLockEnter(pCache);
1825
1826 PPDMBLKCACHEENTRY pEntryNew = NULL;
1827 uint8_t *pbBuffer = NULL;
1828 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1829 if (fEnough)
1830 {
1831 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1832
1833 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1834 if (RT_LIKELY(pEntryNew))
1835 {
1836 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1837 pdmBlkCacheAdd(pCache, cbEntry);
1838 pdmBlkCacheLockLeave(pCache);
1839
1840 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1841
1842 AssertMsg( (off >= pEntryNew->Core.Key)
1843 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1844 ("Overflow in calculation off=%llu\n", off));
1845 }
1846 else
1847 pdmBlkCacheLockLeave(pCache);
1848 }
1849 else
1850 pdmBlkCacheLockLeave(pCache);
1851
1852 return pEntryNew;
1853}
1854
1855static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1856{
1857 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1858
1859 if (RT_LIKELY(pReq))
1860 {
1861 pReq->pvUser = pvUser;
1862 pReq->rcReq = VINF_SUCCESS;
1863 pReq->cXfersPending = 0;
1864 }
1865
1866 return pReq;
1867}
1868
1869static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1870{
1871 switch (pBlkCache->enmType)
1872 {
1873 case PDMBLKCACHETYPE_DEV:
1874 {
1875 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1876 pReq->pvUser, pReq->rcReq);
1877 break;
1878 }
1879 case PDMBLKCACHETYPE_DRV:
1880 {
1881 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1882 pReq->pvUser, pReq->rcReq);
1883 break;
1884 }
1885 case PDMBLKCACHETYPE_USB:
1886 {
1887 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1888 pReq->pvUser, pReq->rcReq);
1889 break;
1890 }
1891 case PDMBLKCACHETYPE_INTERNAL:
1892 {
1893 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1894 pReq->pvUser, pReq->rcReq);
1895 break;
1896 }
1897 default:
1898 AssertMsgFailed(("Unknown block cache type!\n"));
1899 }
1900
1901 RTMemFree(pReq);
1902}
1903
1904static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1905 int rcReq, bool fCallHandler)
1906{
1907 if (RT_FAILURE(rcReq))
1908 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1909
1910 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1911 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1912
1913 if (!cXfersPending)
1914 {
1915 if (fCallHandler)
1916 pdmBlkCacheReqComplete(pBlkCache, pReq);
1917 else
1918 RTMemFree(pReq);
1919 return true;
1920 }
1921
1922 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1923 return false;
1924}
1925
1926VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1927 PCRTSGBUF pcSgBuf, size_t cbRead, void *pvUser)
1928{
1929 int rc = VINF_SUCCESS;
1930 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1931 PPDMBLKCACHEENTRY pEntry;
1932 PPDMBLKCACHEREQ pReq;
1933
1934 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbRead=%u pvUser=%#p\n",
1935 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbRead, pvUser));
1936
1937 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1938 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1939
1940 RTSGBUF SgBuf;
1941 RTSgBufClone(&SgBuf, pcSgBuf);
1942
1943 /* Allocate new request structure. */
1944 pReq = pdmBlkCacheReqAlloc(pvUser);
1945 if (RT_UNLIKELY(!pReq))
1946 return VERR_NO_MEMORY;
1947
1948 /* Increment data transfer counter to keep the request valid while we access it. */
1949 ASMAtomicIncU32(&pReq->cXfersPending);
1950
1951 while (cbRead)
1952 {
1953 size_t cbToRead;
1954
1955 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1956
1957 /*
1958 * If there is no entry we try to create a new one eviciting unused pages
1959 * if the cache is full. If this is not possible we will pass the request through
1960 * and skip the caching (all entries may be still in progress so they can't
1961 * be evicted)
1962 * If we have an entry it can be in one of the LRU lists where the entry
1963 * contains data (recently used or frequently used LRU) so we can just read
1964 * the data we need and put the entry at the head of the frequently used LRU list.
1965 * In case the entry is in one of the ghost lists it doesn't contain any data.
1966 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1967 */
1968 if (pEntry)
1969 {
1970 uint64_t offDiff = off - pEntry->Core.Key;
1971
1972 AssertMsg(off >= pEntry->Core.Key,
1973 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1974 off, pEntry->Core.Key));
1975
1976 AssertPtr(pEntry->pList);
1977
1978 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
1979
1980 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1981 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
1982 off, cbToRead));
1983
1984 cbRead -= cbToRead;
1985
1986 if (!cbRead)
1987 STAM_COUNTER_INC(&pCache->cHits);
1988 else
1989 STAM_COUNTER_INC(&pCache->cPartialHits);
1990
1991 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1992
1993 /* Ghost lists contain no data. */
1994 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1995 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1996 {
1997 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
1998 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
1999 PDMBLKCACHE_ENTRY_IS_DIRTY))
2000 {
2001 /* Entry didn't completed yet. Append to the list */
2002 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2003 &SgBuf, offDiff, cbToRead,
2004 false /* fWrite */);
2005 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2006 }
2007 else
2008 {
2009 /* Read as much as we can from the entry. */
2010 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2011 }
2012
2013 /* Move this entry to the top position */
2014 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2015 {
2016 pdmBlkCacheLockEnter(pCache);
2017 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2018 pdmBlkCacheLockLeave(pCache);
2019 }
2020 /* Release the entry */
2021 pdmBlkCacheEntryRelease(pEntry);
2022 }
2023 else
2024 {
2025 uint8_t *pbBuffer = NULL;
2026
2027 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2028
2029 pdmBlkCacheLockEnter(pCache);
2030 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2031 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2032
2033 /* Move the entry to Am and fetch it to the cache. */
2034 if (fEnough)
2035 {
2036 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2037 pdmBlkCacheAdd(pCache, pEntry->cbData);
2038 pdmBlkCacheLockLeave(pCache);
2039
2040 if (pbBuffer)
2041 pEntry->pbData = pbBuffer;
2042 else
2043 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2044 AssertPtr(pEntry->pbData);
2045
2046 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2047 &SgBuf, offDiff, cbToRead,
2048 false /* fWrite */);
2049 pdmBlkCacheEntryReadFromMedium(pEntry);
2050 /* Release the entry */
2051 pdmBlkCacheEntryRelease(pEntry);
2052 }
2053 else
2054 {
2055 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2056 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2057 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2058 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2059 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2060
2061 pdmBlkCacheLockLeave(pCache);
2062
2063 RTMemFree(pEntry);
2064
2065 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2066 &SgBuf, off, cbToRead,
2067 PDMBLKCACHEXFERDIR_READ);
2068 }
2069 }
2070 }
2071 else
2072 {
2073#ifdef VBOX_WITH_IO_READ_CACHE
2074 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2075 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2076 off, cbRead,
2077 &cbToRead);
2078
2079 cbRead -= cbToRead;
2080
2081 if (pEntryNew)
2082 {
2083 if (!cbRead)
2084 STAM_COUNTER_INC(&pCache->cMisses);
2085 else
2086 STAM_COUNTER_INC(&pCache->cPartialHits);
2087
2088 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2089 &SgBuf,
2090 off - pEntryNew->Core.Key,
2091 cbToRead,
2092 false /* fWrite */);
2093 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2094 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2095 }
2096 else
2097 {
2098 /*
2099 * There is not enough free space in the cache.
2100 * Pass the request directly to the I/O manager.
2101 */
2102 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2103
2104 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2105 &SgBuf, off, cbToRead,
2106 PDMBLKCACHEXFERDIR_READ);
2107 }
2108#else
2109 /* Clip read size if necessary. */
2110 PPDMBLKCACHEENTRY pEntryAbove;
2111 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2112
2113 if (pEntryAbove)
2114 {
2115 if (off + cbRead > pEntryAbove->Core.Key)
2116 cbToRead = pEntryAbove->Core.Key - off;
2117 else
2118 cbToRead = cbRead;
2119
2120 pdmBlkCacheEntryRelease(pEntryAbove);
2121 }
2122 else
2123 cbToRead = cbRead;
2124
2125 cbRead -= cbToRead;
2126 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2127 &SgBuf, off, cbToRead,
2128 PDMBLKCACHEXFERDIR_READ);
2129#endif
2130 }
2131 off += cbToRead;
2132 }
2133
2134 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2135 rc = VINF_AIO_TASK_PENDING;
2136
2137 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2138
2139 return rc;
2140}
2141
2142VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off,
2143 PCRTSGBUF pcSgBuf, size_t cbWrite, void *pvUser)
2144{
2145 int rc = VINF_SUCCESS;
2146 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2147 PPDMBLKCACHEENTRY pEntry;
2148 PPDMBLKCACHEREQ pReq;
2149
2150 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2151 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbWrite, pvUser));
2152
2153 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2154 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2155
2156 RTSGBUF SgBuf;
2157 RTSgBufClone(&SgBuf, pcSgBuf);
2158
2159 /* Allocate new request structure. */
2160 pReq = pdmBlkCacheReqAlloc(pvUser);
2161 if (RT_UNLIKELY(!pReq))
2162 return VERR_NO_MEMORY;
2163
2164 /* Increment data transfer counter to keep the request valid while we access it. */
2165 ASMAtomicIncU32(&pReq->cXfersPending);
2166
2167 while (cbWrite)
2168 {
2169 size_t cbToWrite;
2170
2171 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2172 if (pEntry)
2173 {
2174 /* Write the data into the entry and mark it as dirty */
2175 AssertPtr(pEntry->pList);
2176
2177 uint64_t offDiff = off - pEntry->Core.Key;
2178
2179 AssertMsg(off >= pEntry->Core.Key,
2180 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2181 off, pEntry->Core.Key));
2182
2183 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2184 cbWrite -= cbToWrite;
2185
2186 if (!cbWrite)
2187 STAM_COUNTER_INC(&pCache->cHits);
2188 else
2189 STAM_COUNTER_INC(&pCache->cPartialHits);
2190
2191 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2192
2193 /* Ghost lists contain no data. */
2194 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2195 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2196 {
2197 /* Check if the entry is dirty. */
2198 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2199 PDMBLKCACHE_ENTRY_IS_DIRTY,
2200 0))
2201 {
2202 /* If it is already dirty but not in progress just update the data. */
2203 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2204 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2205 else
2206 {
2207 /* The data isn't written to the file yet */
2208 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2209 &SgBuf, offDiff, cbToWrite,
2210 true /* fWrite */);
2211 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2212 }
2213
2214 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2215 }
2216 else /* Dirty bit not set */
2217 {
2218 /*
2219 * Check if a read is in progress for this entry.
2220 * We have to defer processing in that case.
2221 */
2222 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2223 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2224 0))
2225 {
2226 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2227 &SgBuf, offDiff, cbToWrite,
2228 true /* fWrite */);
2229 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2230 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2231 }
2232 else /* I/O in progress flag not set */
2233 {
2234 /* Write as much as we can into the entry and update the file. */
2235 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2236
2237 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2238 if (fCommit)
2239 pdmBlkCacheCommitDirtyEntries(pCache);
2240 }
2241 } /* Dirty bit not set */
2242
2243 /* Move this entry to the top position */
2244 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2245 {
2246 pdmBlkCacheLockEnter(pCache);
2247 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2248 pdmBlkCacheLockLeave(pCache);
2249 }
2250
2251 pdmBlkCacheEntryRelease(pEntry);
2252 }
2253 else /* Entry is on the ghost list */
2254 {
2255 uint8_t *pbBuffer = NULL;
2256
2257 pdmBlkCacheLockEnter(pCache);
2258 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2259 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2260
2261 if (fEnough)
2262 {
2263 /* Move the entry to Am and fetch it to the cache. */
2264 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2265 pdmBlkCacheAdd(pCache, pEntry->cbData);
2266 pdmBlkCacheLockLeave(pCache);
2267
2268 if (pbBuffer)
2269 pEntry->pbData = pbBuffer;
2270 else
2271 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2272 AssertPtr(pEntry->pbData);
2273
2274 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2275 &SgBuf, offDiff, cbToWrite,
2276 true /* fWrite */);
2277 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2278 pdmBlkCacheEntryReadFromMedium(pEntry);
2279
2280 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2281 pdmBlkCacheEntryRelease(pEntry);
2282 }
2283 else
2284 {
2285 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2286 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2287 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2288 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2289 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2290
2291 pdmBlkCacheLockLeave(pCache);
2292
2293 RTMemFree(pEntry);
2294 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2295 &SgBuf, off, cbToWrite,
2296 PDMBLKCACHEXFERDIR_WRITE);
2297 }
2298 }
2299 }
2300 else /* No entry found */
2301 {
2302 /*
2303 * No entry found. Try to create a new cache entry to store the data in and if that fails
2304 * write directly to the file.
2305 */
2306 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2307 off, cbWrite,
2308 &cbToWrite);
2309
2310 cbWrite -= cbToWrite;
2311
2312 if (pEntryNew)
2313 {
2314 uint64_t offDiff = off - pEntryNew->Core.Key;
2315
2316 STAM_COUNTER_INC(&pCache->cHits);
2317
2318 /*
2319 * Check if it is possible to just write the data without waiting
2320 * for it to get fetched first.
2321 */
2322 if (!offDiff && pEntryNew->cbData == cbToWrite)
2323 {
2324 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2325
2326 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2327 if (fCommit)
2328 pdmBlkCacheCommitDirtyEntries(pCache);
2329 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2330 }
2331 else
2332 {
2333 /* Defer the write and fetch the data from the endpoint. */
2334 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2335 &SgBuf, offDiff, cbToWrite,
2336 true /* fWrite */);
2337 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2338 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2339 }
2340
2341 pdmBlkCacheEntryRelease(pEntryNew);
2342 }
2343 else
2344 {
2345 /*
2346 * There is not enough free space in the cache.
2347 * Pass the request directly to the I/O manager.
2348 */
2349 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2350
2351 STAM_COUNTER_INC(&pCache->cMisses);
2352
2353 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2354 &SgBuf, off, cbToWrite,
2355 PDMBLKCACHEXFERDIR_WRITE);
2356 }
2357 }
2358
2359 off += cbToWrite;
2360 }
2361
2362 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2363 rc = VINF_AIO_TASK_PENDING;
2364
2365 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2366
2367 return rc;
2368}
2369
2370VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2371{
2372 int rc = VINF_SUCCESS;
2373 PPDMBLKCACHEREQ pReq;
2374
2375 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2376
2377 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2378 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2379
2380 /* Commit dirty entries in the cache. */
2381 pdmBlkCacheCommit(pBlkCache);
2382
2383 /* Allocate new request structure. */
2384 pReq = pdmBlkCacheReqAlloc(pvUser);
2385 if (RT_UNLIKELY(!pReq))
2386 return VERR_NO_MEMORY;
2387
2388 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2389 PDMBLKCACHEXFERDIR_FLUSH);
2390 AssertRC(rc);
2391
2392 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2393 return VINF_AIO_TASK_PENDING;
2394}
2395
2396VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2397 unsigned cRanges, void *pvUser)
2398{
2399 int rc = VINF_SUCCESS;
2400 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2401 PPDMBLKCACHEENTRY pEntry;
2402 PPDMBLKCACHEREQ pReq;
2403
2404 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2405 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2406
2407 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2408 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2409
2410 /* Allocate new request structure. */
2411 pReq = pdmBlkCacheReqAlloc(pvUser);
2412 if (RT_UNLIKELY(!pReq))
2413 return VERR_NO_MEMORY;
2414
2415 /* Increment data transfer counter to keep the request valid while we access it. */
2416 ASMAtomicIncU32(&pReq->cXfersPending);
2417
2418 for (unsigned i = 0; i < cRanges; i++)
2419 {
2420 uint64_t offCur = paRanges[i].offStart;
2421 size_t cbLeft = paRanges[i].cbRange;
2422
2423 while (cbLeft)
2424 {
2425 size_t cbThisDiscard = 0;
2426
2427 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2428
2429 if (pEntry)
2430 {
2431 /* Write the data into the entry and mark it as dirty */
2432 AssertPtr(pEntry->pList);
2433
2434 uint64_t offDiff = offCur - pEntry->Core.Key;
2435
2436 AssertMsg(offCur >= pEntry->Core.Key,
2437 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2438 offCur, pEntry->Core.Key));
2439
2440 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2441
2442 /* Ghost lists contain no data. */
2443 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2444 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2445 {
2446 /* Check if the entry is dirty. */
2447 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2448 PDMBLKCACHE_ENTRY_IS_DIRTY,
2449 0))
2450 {
2451 /* If it is dirty but not yet in progress remove it. */
2452 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2453 {
2454 pdmBlkCacheLockEnter(pCache);
2455 pdmBlkCacheEntryRemoveFromList(pEntry);
2456
2457 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2458 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2459 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2460
2461 pdmBlkCacheLockLeave(pCache);
2462
2463 RTMemFree(pEntry);
2464 }
2465 else
2466 {
2467#if 0
2468 /* The data isn't written to the file yet */
2469 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2470 &SgBuf, offDiff, cbToWrite,
2471 true /* fWrite */);
2472 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2473#endif
2474 }
2475
2476 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2477 pdmBlkCacheEntryRelease(pEntry);
2478 }
2479 else /* Dirty bit not set */
2480 {
2481 /*
2482 * Check if a read is in progress for this entry.
2483 * We have to defer processing in that case.
2484 */
2485 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2486 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2487 0))
2488 {
2489#if 0
2490 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2491 &SgBuf, offDiff, cbToWrite,
2492 true /* fWrite */);
2493#endif
2494 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2495 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2496 pdmBlkCacheEntryRelease(pEntry);
2497 }
2498 else /* I/O in progress flag not set */
2499 {
2500 pdmBlkCacheLockEnter(pCache);
2501 pdmBlkCacheEntryRemoveFromList(pEntry);
2502
2503 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2504 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2505 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2506 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2507 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2508
2509 pdmBlkCacheLockLeave(pCache);
2510
2511 RTMemFree(pEntry);
2512 }
2513 } /* Dirty bit not set */
2514 }
2515 else /* Entry is on the ghost list just remove cache entry. */
2516 {
2517 pdmBlkCacheLockEnter(pCache);
2518 pdmBlkCacheEntryRemoveFromList(pEntry);
2519
2520 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2521 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2522 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2523 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2524 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2525
2526 pdmBlkCacheLockLeave(pCache);
2527
2528 RTMemFree(pEntry);
2529 }
2530 }
2531 /* else: no entry found. */
2532
2533 offCur += cbThisDiscard;
2534 cbLeft -= cbThisDiscard;
2535 }
2536 }
2537
2538 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2539 rc = VINF_AIO_TASK_PENDING;
2540
2541 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2542
2543 return rc;
2544}
2545
2546/**
2547 * Completes a task segment freeing all resources and completes the task handle
2548 * if everything was transferred.
2549 *
2550 * @returns Next task segment handle.
2551 * @param pTaskSeg Task segment to complete.
2552 * @param rc Status code to set.
2553 */
2554static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache,
2555 PPDMBLKCACHEWAITER pWaiter,
2556 int rc)
2557{
2558 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2559 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2560
2561 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2562
2563 RTMemFree(pWaiter);
2564
2565 return pNext;
2566}
2567
2568static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2569{
2570 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2571 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2572
2573 /* Reference the entry now as we are clearing the I/O in progress flag
2574 * which protected the entry till now. */
2575 pdmBlkCacheEntryRef(pEntry);
2576
2577 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2578 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2579
2580 /* Process waiting segment list. The data in entry might have changed in-between. */
2581 bool fDirty = false;
2582 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2583 PPDMBLKCACHEWAITER pCurr = pComplete;
2584
2585 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2586 ("The list tail was not updated correctly\n"));
2587 pEntry->pWaitingTail = NULL;
2588 pEntry->pWaitingHead = NULL;
2589
2590 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2591 {
2592 /*
2593 * An error here is difficult to handle as the original request completed already.
2594 * The error is logged for now and the VM is paused.
2595 * If the user continues the entry is written again in the hope
2596 * the user fixed the problem and the next write succeeds.
2597 */
2598 if (RT_FAILURE(rcIoXfer))
2599 {
2600 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2601 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2602
2603 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2604 {
2605 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2606 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2607 "Make sure there is enough free space on the disk and that the disk is working properly. "
2608 "Operation can be resumed afterwards"),
2609 pBlkCache->pszId, rcIoXfer);
2610 AssertRC(rc);
2611 }
2612
2613 /* Mark the entry as dirty again to get it added to the list later on. */
2614 fDirty = true;
2615 }
2616
2617 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2618
2619 while (pCurr)
2620 {
2621 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2622
2623 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2624 fDirty = true;
2625 pCurr = pCurr->pNext;
2626 }
2627 }
2628 else
2629 {
2630 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2631 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2632 ("Invalid flags set\n"));
2633
2634 while (pCurr)
2635 {
2636 if (pCurr->fWrite)
2637 {
2638 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2639 fDirty = true;
2640 }
2641 else
2642 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2643
2644 pCurr = pCurr->pNext;
2645 }
2646 }
2647
2648 bool fCommit = false;
2649 if (fDirty)
2650 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2651
2652 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2653
2654 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2655 pdmBlkCacheEntryRelease(pEntry);
2656
2657 if (fCommit)
2658 pdmBlkCacheCommitDirtyEntries(pCache);
2659
2660 /* Complete waiters now. */
2661 while (pComplete)
2662 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2663}
2664
2665VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2666{
2667 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2668
2669 if (hIoXfer->fIoCache)
2670 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2671 else
2672 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2673 RTMemFree(hIoXfer);
2674}
2675
2676/**
2677 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2678 *
2679 * @returns IPRT status code.
2680 * @param pNode The node to destroy.
2681 * @param pvUser Opaque user data.
2682 */
2683static int pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2684{
2685 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2686 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2687 NOREF(pvUser);
2688
2689 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2690 {
2691 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2692 pdmBlkCacheEntryRef(pEntry);
2693 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2694
2695 RTThreadSleep(1);
2696
2697 /* Re-enter all locks and drop the reference. */
2698 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2699 pdmBlkCacheEntryRelease(pEntry);
2700 }
2701
2702 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2703 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2704
2705 return VINF_SUCCESS;
2706}
2707
2708VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2709{
2710 int rc = VINF_SUCCESS;
2711 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2712
2713 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2714
2715 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2716 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2717 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2718
2719 /* Wait for all I/O to complete. */
2720 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2721 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2722 AssertRC(rc);
2723 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2724
2725 return rc;
2726}
2727
2728VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2729{
2730 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2731
2732 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2733
2734 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2735
2736 return VINF_SUCCESS;
2737}
2738
2739VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2740{
2741 int rc = VINF_SUCCESS;
2742 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2743
2744 /*
2745 * Commit all dirty entries now (they are waited on for completion during the
2746 * destruction of the AVL tree below).
2747 * The exception is if the VM was paused because of an I/O error before.
2748 */
2749 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2750 pdmBlkCacheCommit(pBlkCache);
2751
2752 /* Make sure nobody is accessing the cache while we delete the tree. */
2753 pdmBlkCacheLockEnter(pCache);
2754 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2755 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2756 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2757
2758 pdmBlkCacheLockLeave(pCache);
2759 return rc;
2760}
2761
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette