VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 36054

Last change on this file since 36054 was 35346, checked in by vboxsync, 14 years ago

VMM reorg: Moving the public include files from include/VBox to include/VBox/vmm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 91.7 KB
Line 
1/* $Id: PDMBlkCache.cpp 35346 2010-12-27 16:13:13Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
26#include "PDMInternal.h"
27#include <iprt/asm.h>
28#include <iprt/mem.h>
29#include <iprt/path.h>
30#include <iprt/string.h>
31#include <VBox/log.h>
32#include <VBox/vmm/stam.h>
33#include <VBox/vmm/uvm.h>
34#include <VBox/vmm/vm.h>
35
36#include "PDMBlkCacheInternal.h"
37
38#ifdef VBOX_STRICT
39# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
40 do \
41 { \
42 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
43 ("Thread does not own critical section\n"));\
44 } while(0)
45
46# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
47 do \
48 { \
49 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
50 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
51 } while(0)
52
53# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
54 do \
55 { \
56 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
57 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
58 } while(0)
59
60#else
61# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
63# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
64#endif
65
66#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
67
68/*******************************************************************************
69* Internal Functions *
70*******************************************************************************/
71
72static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
73 uint64_t off, size_t cbData, uint8_t *pbBuffer);
74static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
75
76/**
77 * Decrement the reference counter of the given cache entry.
78 *
79 * @returns nothing.
80 * @param pEntry The entry to release.
81 */
82DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
83{
84 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
85 ASMAtomicDecU32(&pEntry->cRefs);
86}
87
88/**
89 * Increment the reference counter of the given cache entry.
90 *
91 * @returns nothing.
92 * @param pEntry The entry to reference.
93 */
94DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
95{
96 ASMAtomicIncU32(&pEntry->cRefs);
97}
98
99#ifdef DEBUG
100static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
101{
102 /* Amount of cached data should never exceed the maximum amount. */
103 AssertMsg(pCache->cbCached <= pCache->cbMax,
104 ("Current amount of cached data exceeds maximum\n"));
105
106 /* The amount of cached data in the LRU and FRU list should match cbCached */
107 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
108 ("Amount of cached data doesn't match\n"));
109
110 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
111 ("Paged out list exceeds maximum\n"));
112}
113#endif
114
115DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
116{
117 RTCritSectEnter(&pCache->CritSect);
118#ifdef DEBUG
119 pdmBlkCacheValidate(pCache);
120#endif
121}
122
123DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
124{
125#ifdef DEBUG
126 pdmBlkCacheValidate(pCache);
127#endif
128 RTCritSectLeave(&pCache->CritSect);
129}
130
131DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
132{
133 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
134 pCache->cbCached -= cbAmount;
135}
136
137DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
138{
139 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
140 pCache->cbCached += cbAmount;
141}
142
143DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
144{
145 pList->cbCached += cbAmount;
146}
147
148DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
149{
150 pList->cbCached -= cbAmount;
151}
152
153#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
154/**
155 * Checks consistency of a LRU list.
156 *
157 * @returns nothing
158 * @param pList The LRU list to check.
159 * @param pNotInList Element which is not allowed to occur in the list.
160 */
161static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
162{
163 PPDMBLKCACHEENTRY pCurr = pList->pHead;
164
165 /* Check that there are no double entries and no cycles in the list. */
166 while (pCurr)
167 {
168 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
169
170 while (pNext)
171 {
172 AssertMsg(pCurr != pNext,
173 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
174 pCurr, pList));
175 pNext = pNext->pNext;
176 }
177
178 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
179
180 if (!pCurr->pNext)
181 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
182
183 pCurr = pCurr->pNext;
184 }
185}
186#endif
187
188/**
189 * Unlinks a cache entry from the LRU list it is assigned to.
190 *
191 * @returns nothing.
192 * @param pEntry The entry to unlink.
193 */
194static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
195{
196 PPDMBLKLRULIST pList = pEntry->pList;
197 PPDMBLKCACHEENTRY pPrev, pNext;
198
199 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
200
201 AssertPtr(pList);
202
203#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
204 pdmBlkCacheCheckList(pList, NULL);
205#endif
206
207 pPrev = pEntry->pPrev;
208 pNext = pEntry->pNext;
209
210 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
211 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
212
213 if (pPrev)
214 pPrev->pNext = pNext;
215 else
216 {
217 pList->pHead = pNext;
218
219 if (pNext)
220 pNext->pPrev = NULL;
221 }
222
223 if (pNext)
224 pNext->pPrev = pPrev;
225 else
226 {
227 pList->pTail = pPrev;
228
229 if (pPrev)
230 pPrev->pNext = NULL;
231 }
232
233 pEntry->pList = NULL;
234 pEntry->pPrev = NULL;
235 pEntry->pNext = NULL;
236 pdmBlkCacheListSub(pList, pEntry->cbData);
237#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
238 pdmBlkCacheCheckList(pList, pEntry);
239#endif
240}
241
242/**
243 * Adds a cache entry to the given LRU list unlinking it from the currently
244 * assigned list if needed.
245 *
246 * @returns nothing.
247 * @param pList List to the add entry to.
248 * @param pEntry Entry to add.
249 */
250static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
251{
252 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
253#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
254 pdmBlkCacheCheckList(pList, NULL);
255#endif
256
257 /* Remove from old list if needed */
258 if (pEntry->pList)
259 pdmBlkCacheEntryRemoveFromList(pEntry);
260
261 pEntry->pNext = pList->pHead;
262 if (pList->pHead)
263 pList->pHead->pPrev = pEntry;
264 else
265 {
266 Assert(!pList->pTail);
267 pList->pTail = pEntry;
268 }
269
270 pEntry->pPrev = NULL;
271 pList->pHead = pEntry;
272 pdmBlkCacheListAdd(pList, pEntry->cbData);
273 pEntry->pList = pList;
274#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
275 pdmBlkCacheCheckList(pList, NULL);
276#endif
277}
278
279/**
280 * Destroys a LRU list freeing all entries.
281 *
282 * @returns nothing
283 * @param pList Pointer to the LRU list to destroy.
284 *
285 * @note The caller must own the critical section of the cache.
286 */
287static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
288{
289 while (pList->pHead)
290 {
291 PPDMBLKCACHEENTRY pEntry = pList->pHead;
292
293 pList->pHead = pEntry->pNext;
294
295 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
296 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
297
298 RTMemPageFree(pEntry->pbData, pEntry->cbData);
299 RTMemFree(pEntry);
300 }
301}
302
303/**
304 * Tries to remove the given amount of bytes from a given list in the cache
305 * moving the entries to one of the given ghosts lists
306 *
307 * @returns Amount of data which could be freed.
308 * @param pCache Pointer to the global cache data.
309 * @param cbData The amount of the data to free.
310 * @param pListSrc The source list to evict data from.
311 * @param pGhostListSrc The ghost list removed entries should be moved to
312 * NULL if the entry should be freed.
313 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
314 * @param ppbBuf Where to store the address of the buffer if an entry with the
315 * same size was found and fReuseBuffer is true.
316 *
317 * @note This function may return fewer bytes than requested because entries
318 * may be marked as non evictable if they are used for I/O at the
319 * moment.
320 */
321static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
322 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
323 bool fReuseBuffer, uint8_t **ppbBuffer)
324{
325 size_t cbEvicted = 0;
326
327 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
328
329 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
330 AssertMsg( !pGhostListDst
331 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
332 ("Destination list must be NULL or the recently used but paged out list\n"));
333
334 if (fReuseBuffer)
335 {
336 AssertPtr(ppbBuffer);
337 *ppbBuffer = NULL;
338 }
339
340 /* Start deleting from the tail. */
341 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
342
343 while ((cbEvicted < cbData) && pEntry)
344 {
345 PPDMBLKCACHEENTRY pCurr = pEntry;
346
347 pEntry = pEntry->pPrev;
348
349 /* We can't evict pages which are currently in progress or dirty but not in progress */
350 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
351 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
352 {
353 /* Ok eviction candidate. Grab the endpoint semaphore and check again
354 * because somebody else might have raced us. */
355 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
356 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
357
358 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
359 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
360 {
361 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
362
363 if (fReuseBuffer && (pCurr->cbData == cbData))
364 {
365 STAM_COUNTER_INC(&pCache->StatBuffersReused);
366 *ppbBuffer = pCurr->pbData;
367 }
368 else if (pCurr->pbData)
369 RTMemPageFree(pCurr->pbData, pCurr->cbData);
370
371 pCurr->pbData = NULL;
372 cbEvicted += pCurr->cbData;
373
374 pdmBlkCacheEntryRemoveFromList(pCurr);
375 pdmBlkCacheSub(pCache, pCurr->cbData);
376
377 if (pGhostListDst)
378 {
379 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
380
381 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
382
383 /* We have to remove the last entries from the paged out list. */
384 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
385 && pGhostEntFree)
386 {
387 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
388 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
389
390 pGhostEntFree = pGhostEntFree->pPrev;
391
392 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
393
394 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
395 {
396 pdmBlkCacheEntryRemoveFromList(pFree);
397
398 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
399 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
400 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
401
402 RTMemFree(pFree);
403 }
404
405 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
406 }
407
408 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
409 {
410 /* Couldn't remove enough entries. Delete */
411 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
412 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
413 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
414
415 RTMemFree(pCurr);
416 }
417 else
418 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
419 }
420 else
421 {
422 /* Delete the entry from the AVL tree it is assigned to. */
423 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
424 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
425 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
426
427 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
428 RTMemFree(pCurr);
429 }
430 }
431
432 }
433 else
434 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
435 }
436
437 return cbEvicted;
438}
439
440static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
441{
442 size_t cbRemoved = 0;
443
444 if ((pCache->cbCached + cbData) < pCache->cbMax)
445 return true;
446 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
447 {
448 /* Try to evict as many bytes as possible from A1in */
449 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
450 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
451
452 /*
453 * If it was not possible to remove enough entries
454 * try the frequently accessed cache.
455 */
456 if (cbRemoved < cbData)
457 {
458 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
459
460 /*
461 * If we removed something we can't pass the reuse buffer flag anymore because
462 * we don't need to evict that much data
463 */
464 if (!cbRemoved)
465 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
466 NULL, fReuseBuffer, ppbBuffer);
467 else
468 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
469 NULL, false, NULL);
470 }
471 }
472 else
473 {
474 /* We have to remove entries from frequently access list. */
475 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
476 NULL, fReuseBuffer, ppbBuffer);
477 }
478
479 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
480 return (cbRemoved >= cbData);
481}
482
483DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEIOXFER pIoXfer)
484{
485 int rc = VINF_SUCCESS;
486
487 switch (pBlkCache->enmType)
488 {
489 case PDMBLKCACHETYPE_DEV:
490 {
491 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
492 pIoXfer->enmXferDir,
493 off, pIoXfer->cbXfer,
494 &pIoXfer->SgBuf, pIoXfer);
495 break;
496 }
497 case PDMBLKCACHETYPE_DRV:
498 {
499 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
500 pIoXfer->enmXferDir,
501 off, pIoXfer->cbXfer,
502 &pIoXfer->SgBuf, pIoXfer);
503 break;
504 }
505 case PDMBLKCACHETYPE_USB:
506 {
507 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
508 pIoXfer->enmXferDir,
509 off, pIoXfer->cbXfer,
510 &pIoXfer->SgBuf, pIoXfer);
511 break;
512 }
513 case PDMBLKCACHETYPE_INTERNAL:
514 {
515 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
516 pIoXfer->enmXferDir,
517 off, pIoXfer->cbXfer,
518 &pIoXfer->SgBuf, pIoXfer);
519 break;
520 }
521 default:
522 AssertMsgFailed(("Unknown block cache type!\n"));
523 }
524
525 return rc;
526}
527
528/**
529 * Initiates a read I/O task for the given entry.
530 *
531 * @returns VBox status code.
532 * @param pEntry The entry to fetch the data to.
533 */
534static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
535{
536 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
537 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
538
539 /* Make sure no one evicts the entry while it is accessed. */
540 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
541
542 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
543 if (RT_UNLIKELY(!pIoXfer))
544 return VERR_NO_MEMORY;
545
546 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
547
548 pIoXfer->fIoCache = true;
549 pIoXfer->pEntry = pEntry;
550 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
551 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
552 pIoXfer->cbXfer = pEntry->cbData;
553 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
554 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
555
556 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
557}
558
559/**
560 * Initiates a write I/O task for the given entry.
561 *
562 * @returns nothing.
563 * @param pEntry The entry to read the data from.
564 */
565static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
566{
567 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
568 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
569
570 /* Make sure no one evicts the entry while it is accessed. */
571 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
572
573 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
574 if (RT_UNLIKELY(!pIoXfer))
575 return VERR_NO_MEMORY;
576
577 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
578
579 pIoXfer->fIoCache = true;
580 pIoXfer->pEntry = pEntry;
581 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
582 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
583 pIoXfer->cbXfer = pEntry->cbData;
584 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
585 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
586
587 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
588}
589
590/**
591 * Passthrough a part of a request directly to the I/O manager
592 * handling the endpoint.
593 *
594 * @returns VBox status code.
595 * @param pEndpoint The endpoint.
596 * @param pTask The task.
597 * @param pIoMemCtx The I/O memory context to use.
598 * @param offStart Offset to start transfer from.
599 * @param cbData Amount of data to transfer.
600 * @param enmTransferType The transfer type (read/write)
601 */
602static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
603 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
604 PDMBLKCACHEXFERDIR enmXferDir)
605{
606
607 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
608 if (RT_UNLIKELY(!pIoXfer))
609 return VERR_NO_MEMORY;
610
611 ASMAtomicIncU32(&pReq->cXfersPending);
612 pIoXfer->fIoCache = false;
613 pIoXfer->pReq = pReq;
614 pIoXfer->cbXfer = cbData;
615 pIoXfer->enmXferDir = enmXferDir;
616 if (pSgBuf)
617 {
618 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
619 RTSgBufAdvance(pSgBuf, cbData);
620 }
621
622 return pdmBlkCacheEnqueue(pBlkCache, offStart, pIoXfer);
623}
624
625/**
626 * Commit a single dirty entry to the endpoint
627 *
628 * @returns nothing
629 * @param pEntry The entry to commit.
630 */
631static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
632{
633 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
634 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
635 ("Invalid flags set for entry %#p\n", pEntry));
636
637 pdmBlkCacheEntryWriteToMedium(pEntry);
638}
639
640/**
641 * Commit all dirty entries for a single endpoint.
642 *
643 * @returns nothing.
644 * @param pBlkCache The endpoint cache to commit.
645 */
646static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
647{
648 uint32_t cbCommitted = 0;
649
650 /* Return if the cache was suspended. */
651 if (pBlkCache->fSuspended)
652 return;
653
654 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
655
656 /* The list is moved to a new header to reduce locking overhead. */
657 RTLISTNODE ListDirtyNotCommitted;
658 RTSPINLOCKTMP Tmp;
659
660 RTListInit(&ListDirtyNotCommitted);
661 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
662 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
663 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
664
665 if (!RTListIsEmpty(&ListDirtyNotCommitted))
666 {
667 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
668
669 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
670 {
671 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
672 NodeNotCommitted);
673 pdmBlkCacheEntryCommit(pEntry);
674 cbCommitted += pEntry->cbData;
675 RTListNodeRemove(&pEntry->NodeNotCommitted);
676 pEntry = pNext;
677 }
678
679 /* Commit the last endpoint */
680 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
681 pdmBlkCacheEntryCommit(pEntry);
682 RTListNodeRemove(&pEntry->NodeNotCommitted);
683 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
684 ("Committed all entries but list is not empty\n"));
685 }
686
687 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
688 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
689 ("Number of committed bytes exceeds number of dirty bytes\n"));
690 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
691
692 /* Reset the commit timer if we don't have any dirty bits. */
693 if ( !(cbDirtyOld - cbCommitted)
694 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
695 TMTimerStop(pBlkCache->pCache->pTimerCommit);
696}
697
698/**
699 * Commit all dirty entries in the cache.
700 *
701 * @returns nothing.
702 * @param pCache The global cache instance.
703 */
704static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
705{
706 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
707
708 if (!fCommitInProgress)
709 {
710 pdmBlkCacheLockEnter(pCache);
711 Assert(!RTListIsEmpty(&pCache->ListUsers));
712
713 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
714 AssertPtr(pBlkCache);
715
716 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
717 {
718 pdmBlkCacheCommit(pBlkCache);
719
720 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
721 NodeCacheUser);
722 }
723
724 /* Commit the last endpoint */
725 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
726 pdmBlkCacheCommit(pBlkCache);
727
728 pdmBlkCacheLockLeave(pCache);
729 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
730 }
731}
732
733/**
734 * Adds the given entry as a dirty to the cache.
735 *
736 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
737 * @param pBlkCache The endpoint cache the entry belongs to.
738 * @param pEntry The entry to add.
739 */
740static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
741{
742 bool fDirtyBytesExceeded = false;
743 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
744
745 /* If the commit timer is disabled we commit right away. */
746 if (pCache->u32CommitTimeoutMs == 0)
747 {
748 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
749 pdmBlkCacheEntryCommit(pEntry);
750 }
751 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
752 {
753 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
754
755 RTSPINLOCKTMP Tmp;
756 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
757 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
758 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
759
760 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
761
762 /* Prevent committing if the VM was suspended. */
763 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
764 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
765 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
766 {
767 /* Arm the commit timer. */
768 TMTimerSetMillies(pCache->pTimerCommit, pCache->u32CommitTimeoutMs);
769 }
770 }
771
772 return fDirtyBytesExceeded;
773}
774
775static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
776{
777 bool fFound = false;
778 PPDMBLKCACHE pBlkCache = NULL;
779
780 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
781 {
782 if (!RTStrCmp(pBlkCache->pszId, pcszId))
783 {
784 fFound = true;
785 break;
786 }
787 }
788
789 return fFound ? pBlkCache : NULL;
790}
791
792/**
793 * Commit timer callback.
794 */
795static void pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
796{
797 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
798
799 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
800
801 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
802 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
803 pdmBlkCacheCommitDirtyEntries(pCache);
804
805 LogFlowFunc(("Entries committed, going to sleep\n"));
806}
807
808static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
809{
810 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
811
812 AssertPtr(pBlkCacheGlobal);
813
814 pdmBlkCacheLockEnter(pBlkCacheGlobal);
815
816 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
817
818 /* Go through the list and save all dirty entries. */
819 PPDMBLKCACHE pBlkCache;
820 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
821 {
822 uint32_t cEntries = 0;
823 PPDMBLKCACHEENTRY pEntry;
824
825 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
826 SSMR3PutU32(pSSM, strlen(pBlkCache->pszId));
827 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
828
829 /* Count the number of entries to safe. */
830 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
831 {
832 cEntries++;
833 }
834
835 SSMR3PutU32(pSSM, cEntries);
836
837 /* Walk the list of all dirty entries and save them. */
838 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
839 {
840 /* A few sanity checks. */
841 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
842 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
843 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
844 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
845 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
846 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
847 ("Invalid list\n"));
848 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
849 ("Size and range do not match\n"));
850
851 /* Save */
852 SSMR3PutU64(pSSM, pEntry->Core.Key);
853 SSMR3PutU32(pSSM, pEntry->cbData);
854 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
855 }
856
857 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
858 }
859
860 pdmBlkCacheLockLeave(pBlkCacheGlobal);
861
862 /* Terminator */
863 return SSMR3PutU32(pSSM, UINT32_MAX);
864}
865
866static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
867{
868 int rc = VINF_SUCCESS;
869 uint32_t cRefs;
870 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
871
872 AssertPtr(pBlkCacheGlobal);
873
874 pdmBlkCacheLockEnter(pBlkCacheGlobal);
875
876 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
877 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
878
879 SSMR3GetU32(pSSM, &cRefs);
880
881 if (cRefs == pBlkCacheGlobal->cRefs)
882 {
883 char *pszId = NULL;
884
885 while ( cRefs > 0
886 && RT_SUCCESS(rc))
887 {
888 PPDMBLKCACHE pBlkCache = NULL;
889 uint32_t cbId = 0;
890
891 SSMR3GetU32(pSSM, &cbId);
892 Assert(cbId > 0);
893
894 cbId++; /* Include terminator */
895 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
896 if (!pszId)
897 {
898 rc = VERR_NO_MEMORY;
899 break;
900 }
901
902 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
903 AssertRC(rc);
904
905 /* Search for the block cache with the provided id. */
906 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
907 if (!pBlkCache)
908 {
909 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
910 N_("The VM is missing a block device. Please make sure the source and target VMs have compatible storage configurations"));
911 break;
912 }
913
914 RTStrFree(pszId);
915 pszId = NULL;
916
917 /* Get the entries */
918 uint32_t cEntries;
919 SSMR3GetU32(pSSM, &cEntries);
920
921 while (cEntries > 0)
922 {
923 PPDMBLKCACHEENTRY pEntry;
924 uint64_t off;
925 uint32_t cbEntry;
926
927 SSMR3GetU64(pSSM, &off);
928 SSMR3GetU32(pSSM, &cbEntry);
929
930 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
931 if (!pEntry)
932 {
933 rc = VERR_NO_MEMORY;
934 break;
935 }
936
937 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
938 if (RT_FAILURE(rc))
939 {
940 RTMemFree(pEntry->pbData);
941 RTMemFree(pEntry);
942 break;
943 }
944
945 /* Insert into the tree. */
946 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
947 Assert(fInserted);
948
949 /* Add to the dirty list. */
950 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
951 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
952 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
953 pdmBlkCacheEntryRelease(pEntry);
954 cEntries--;
955 }
956
957 cRefs--;
958 }
959
960 if (pszId)
961 RTStrFree(pszId);
962 }
963 else
964 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
965 N_("The VM is missing a block device. Please make sure the source and target VMs have compatible storage configurations"));
966
967 pdmBlkCacheLockLeave(pBlkCacheGlobal);
968
969 if (RT_SUCCESS(rc))
970 {
971 uint32_t u32 = 0;
972 rc = SSMR3GetU32(pSSM, &u32);
973 if (RT_SUCCESS(rc))
974 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
975 }
976
977 return rc;
978}
979
980int pdmR3BlkCacheInit(PVM pVM)
981{
982 int rc = VINF_SUCCESS;
983 PUVM pUVM = pVM->pUVM;
984 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
985
986 LogFlowFunc((": pVM=%p\n", pVM));
987
988 VM_ASSERT_EMT(pVM);
989
990 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
991 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
992
993 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
994 if (!pBlkCacheGlobal)
995 return VERR_NO_MEMORY;
996
997 RTListInit(&pBlkCacheGlobal->ListUsers);
998 pBlkCacheGlobal->pVM = pVM;
999 pBlkCacheGlobal->cRefs = 0;
1000 pBlkCacheGlobal->cbCached = 0;
1001 pBlkCacheGlobal->fCommitInProgress = false;
1002
1003 /* Initialize members */
1004 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1005 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1006 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1007
1008 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1009 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1010 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1011
1012 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1013 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1014 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1015
1016 do
1017 {
1018 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1019 AssertLogRelRCBreak(rc);
1020 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1021
1022 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1023 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1024 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1025 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1026
1027 /** @todo r=aeichner: Experiment to find optimal default values */
1028 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1029 AssertLogRelRCBreak(rc);
1030 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1031 AssertLogRelRCBreak(rc);
1032 } while (0);
1033
1034 if (RT_SUCCESS(rc))
1035 {
1036 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1037 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1038 "/PDM/BlkCache/cbMax",
1039 STAMUNIT_BYTES,
1040 "Maximum cache size");
1041 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1042 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1043 "/PDM/BlkCache/cbCached",
1044 STAMUNIT_BYTES,
1045 "Currently used cache");
1046 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1047 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1048 "/PDM/BlkCache/cbCachedMruIn",
1049 STAMUNIT_BYTES,
1050 "Number of bytes cached in MRU list");
1051 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1052 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1053 "/PDM/BlkCache/cbCachedMruOut",
1054 STAMUNIT_BYTES,
1055 "Number of bytes cached in FRU list");
1056 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1057 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1058 "/PDM/BlkCache/cbCachedFru",
1059 STAMUNIT_BYTES,
1060 "Number of bytes cached in FRU ghost list");
1061
1062#ifdef VBOX_WITH_STATISTICS
1063 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1064 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1065 "/PDM/BlkCache/CacheHits",
1066 STAMUNIT_COUNT, "Number of hits in the cache");
1067 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1068 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1069 "/PDM/BlkCache/CachePartialHits",
1070 STAMUNIT_COUNT, "Number of partial hits in the cache");
1071 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1072 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1073 "/PDM/BlkCache/CacheMisses",
1074 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1075 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1076 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1077 "/PDM/BlkCache/CacheRead",
1078 STAMUNIT_BYTES, "Number of bytes read from the cache");
1079 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1080 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1081 "/PDM/BlkCache/CacheWritten",
1082 STAMUNIT_BYTES, "Number of bytes written to the cache");
1083 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1084 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1085 "/PDM/BlkCache/CacheTreeGet",
1086 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1087 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1088 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1089 "/PDM/BlkCache/CacheTreeInsert",
1090 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1091 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1092 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1093 "/PDM/BlkCache/CacheTreeRemove",
1094 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1095 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1096 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1097 "/PDM/BlkCache/CacheBuffersReused",
1098 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1099#endif
1100
1101 /* Initialize the critical section */
1102 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1103 }
1104
1105 if (RT_SUCCESS(rc))
1106 {
1107 /* Create the commit timer */
1108 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1109 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
1110 pdmBlkCacheCommitTimerCallback,
1111 pBlkCacheGlobal,
1112 "BlkCache-Commit",
1113 &pBlkCacheGlobal->pTimerCommit);
1114
1115 if (RT_SUCCESS(rc))
1116 {
1117 /* Register saved state handler. */
1118 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1119 NULL, NULL, NULL,
1120 NULL, pdmR3BlkCacheSaveExec, NULL,
1121 NULL, pdmR3BlkCacheLoadExec, NULL);
1122 if (RT_SUCCESS(rc))
1123 {
1124 LogRel(("BlkCache: Cache successfully initialised. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1125 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1126 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1127 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1128 return VINF_SUCCESS;
1129 }
1130 }
1131
1132 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1133 }
1134
1135 if (pBlkCacheGlobal)
1136 RTMemFree(pBlkCacheGlobal);
1137
1138 LogFlowFunc((": returns rc=%Rrc\n", pVM, rc));
1139 return rc;
1140}
1141
1142void pdmR3BlkCacheTerm(PVM pVM)
1143{
1144 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1145
1146 if (pBlkCacheGlobal)
1147 {
1148 /* Make sure no one else uses the cache now */
1149 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1150
1151 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1152 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1153 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1154 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1155
1156 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1157
1158 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1159 RTMemFree(pBlkCacheGlobal);
1160 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1161 }
1162}
1163
1164int pdmR3BlkCacheResume(PVM pVM)
1165{
1166 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1167
1168 LogFlowFunc(("pVM=%#p\n", pVM));
1169
1170 if ( pBlkCacheGlobal
1171 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1172 {
1173 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1174 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1175 }
1176
1177 return VINF_SUCCESS;
1178}
1179
1180static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1181{
1182 int rc = VINF_SUCCESS;
1183 PPDMBLKCACHE pBlkCache = NULL;
1184 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1185
1186 if (!pBlkCacheGlobal)
1187 return VERR_NOT_SUPPORTED;
1188
1189 /*
1190 * Check that no other user cache has the same id first,
1191 * Unique id's are necessary in case the state is saved.
1192 */
1193 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1194
1195 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1196
1197 if (!pBlkCache)
1198 {
1199 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1200
1201 if (pBlkCache)
1202 pBlkCache->pszId = RTStrDup(pcszId);
1203
1204 if ( pBlkCache
1205 && pBlkCache->pszId)
1206 {
1207 pBlkCache->fSuspended = false;
1208 pBlkCache->pCache = pBlkCacheGlobal;
1209 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1210
1211 rc = RTSpinlockCreate(&pBlkCache->LockList);
1212 if (RT_SUCCESS(rc))
1213 {
1214 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1215 if (RT_SUCCESS(rc))
1216 {
1217 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1218 if (pBlkCache->pTree)
1219 {
1220#ifdef VBOX_WITH_STATISTICS
1221 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1222 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1223 STAMUNIT_COUNT, "Number of deferred writes",
1224 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1225#endif
1226
1227 /* Add to the list of users. */
1228 pBlkCacheGlobal->cRefs++;
1229 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1230 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1231
1232 *ppBlkCache = pBlkCache;
1233 LogFlowFunc(("returns success\n"));
1234 return VINF_SUCCESS;
1235 }
1236 else
1237 rc = VERR_NO_MEMORY;
1238
1239 RTSemRWDestroy(pBlkCache->SemRWEntries);
1240 }
1241
1242 RTSpinlockDestroy(pBlkCache->LockList);
1243 }
1244
1245 RTStrFree(pBlkCache->pszId);
1246 }
1247 else
1248 rc = VERR_NO_MEMORY;
1249
1250 if (pBlkCache)
1251 RTMemFree(pBlkCache);
1252 }
1253 else
1254 rc = VERR_ALREADY_EXISTS;
1255
1256 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1257
1258 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1259 return rc;
1260}
1261
1262VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1263 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1264 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1265 const char *pcszId)
1266{
1267 int rc = VINF_SUCCESS;
1268 PPDMBLKCACHE pBlkCache;
1269
1270 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1271 if (RT_SUCCESS(rc))
1272 {
1273 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1274 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1275 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1276 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1277 *ppBlkCache = pBlkCache;
1278 }
1279
1280 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1281 return rc;
1282}
1283
1284VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1285 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1286 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1287 const char *pcszId)
1288{
1289 int rc = VINF_SUCCESS;
1290 PPDMBLKCACHE pBlkCache;
1291
1292 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1293 if (RT_SUCCESS(rc))
1294 {
1295 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1296 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1297 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1298 pBlkCache->u.Dev.pDevIns = pDevIns;
1299 *ppBlkCache = pBlkCache;
1300 }
1301
1302 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1303 return rc;
1304
1305}
1306
1307VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1308 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1309 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1310 const char *pcszId)
1311{
1312 int rc = VINF_SUCCESS;
1313 PPDMBLKCACHE pBlkCache;
1314
1315 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1316 if (RT_SUCCESS(rc))
1317 {
1318 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1319 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1320 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1321 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1322 *ppBlkCache = pBlkCache;
1323 }
1324
1325 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1326 return rc;
1327
1328}
1329
1330VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1331 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1332 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1333 const char *pcszId)
1334{
1335 int rc = VINF_SUCCESS;
1336 PPDMBLKCACHE pBlkCache;
1337
1338 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1339 if (RT_SUCCESS(rc))
1340 {
1341 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1342 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1343 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1344 pBlkCache->u.Int.pvUser = pvUser;
1345 *ppBlkCache = pBlkCache;
1346 }
1347
1348 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1349 return rc;
1350
1351}
1352
1353/**
1354 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1355 *
1356 * @returns IPRT status code.
1357 * @param pNode The node to destroy.
1358 * @param pvUser Opaque user data.
1359 */
1360static int pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1361{
1362 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1363 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1364 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1365
1366 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1367 {
1368 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1369 pdmBlkCacheEntryRef(pEntry);
1370 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1371 pdmBlkCacheLockLeave(pCache);
1372
1373 RTThreadSleep(250);
1374
1375 /* Re-enter all locks */
1376 pdmBlkCacheLockEnter(pCache);
1377 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1378 pdmBlkCacheEntryRelease(pEntry);
1379 }
1380
1381 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1382 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1383
1384 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1385 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1386
1387 pdmBlkCacheEntryRemoveFromList(pEntry);
1388
1389 if (fUpdateCache)
1390 pdmBlkCacheSub(pCache, pEntry->cbData);
1391
1392 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1393 RTMemFree(pEntry);
1394
1395 return VINF_SUCCESS;
1396}
1397
1398/**
1399 * Destroys all cache resources used by the given endpoint.
1400 *
1401 * @returns nothing.
1402 * @param pEndpoint The endpoint to the destroy.
1403 */
1404VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1405{
1406 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1407
1408 /*
1409 * Commit all dirty entries now (they are waited on for completion during the
1410 * destruction of the AVL tree below).
1411 * The exception is if the VM was paused because of an I/O error before.
1412 */
1413 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1414 pdmBlkCacheCommit(pBlkCache);
1415
1416 /* Make sure nobody is accessing the cache while we delete the tree. */
1417 pdmBlkCacheLockEnter(pCache);
1418 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1419 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1420 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1421
1422 RTSpinlockDestroy(pBlkCache->LockList);
1423
1424 pCache->cRefs--;
1425 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1426
1427 pdmBlkCacheLockLeave(pCache);
1428
1429 RTSemRWDestroy(pBlkCache->SemRWEntries);
1430
1431#ifdef VBOX_WITH_STATISTICS
1432 STAMR3Deregister(pCache->pVM, &pBlkCache->StatWriteDeferred);
1433#endif
1434
1435 RTStrFree(pBlkCache->pszId);
1436 RTMemFree(pBlkCache);
1437}
1438
1439VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1440{
1441 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1442
1443 /*
1444 * Validate input.
1445 */
1446 if (!pDevIns)
1447 return;
1448 VM_ASSERT_EMT(pVM);
1449
1450 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1451 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1452
1453 /* Return silently if not supported. */
1454 if (!pBlkCacheGlobal)
1455 return;
1456
1457 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1458
1459 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1460 {
1461 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1462 && pBlkCache->u.Dev.pDevIns == pDevIns)
1463 PDMR3BlkCacheRelease(pBlkCache);
1464 }
1465
1466 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1467}
1468
1469VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1470{
1471 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1472
1473 /*
1474 * Validate input.
1475 */
1476 if (!pDrvIns)
1477 return;
1478 VM_ASSERT_EMT(pVM);
1479
1480 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1481 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1482
1483 /* Return silently if not supported. */
1484 if (!pBlkCacheGlobal)
1485 return;
1486
1487 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1488
1489 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1490 {
1491 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1492 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1493 PDMR3BlkCacheRelease(pBlkCache);
1494 }
1495
1496 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1497}
1498
1499VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1500{
1501 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1502
1503 /*
1504 * Validate input.
1505 */
1506 if (!pUsbIns)
1507 return;
1508 VM_ASSERT_EMT(pVM);
1509
1510 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1511 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1512
1513 /* Return silently if not supported. */
1514 if (!pBlkCacheGlobal)
1515 return;
1516
1517 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1518
1519 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1520 {
1521 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1522 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1523 PDMR3BlkCacheRelease(pBlkCache);
1524 }
1525
1526 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1527}
1528
1529static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1530{
1531 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1532 PPDMBLKCACHEENTRY pEntry = NULL;
1533
1534 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1535
1536 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1537 pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1538 if (pEntry)
1539 pdmBlkCacheEntryRef(pEntry);
1540 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1541
1542 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1543
1544 return pEntry;
1545}
1546
1547/**
1548 * Return the best fit cache entries for the given offset.
1549 *
1550 * @returns nothing.
1551 * @param pBlkCache The endpoint cache.
1552 * @param off The offset.
1553 * @param pEntryAbove Where to store the pointer to the best fit entry above the
1554 * the given offset. NULL if not required.
1555 */
1556static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off,
1557 PPDMBLKCACHEENTRY *ppEntryAbove)
1558{
1559 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1560
1561 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1562
1563 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1564 if (ppEntryAbove)
1565 {
1566 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1567 if (*ppEntryAbove)
1568 pdmBlkCacheEntryRef(*ppEntryAbove);
1569 }
1570
1571 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1572
1573 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1574}
1575
1576static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1577{
1578 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1579
1580 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1581 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1582 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1583 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1584 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1585 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1586}
1587
1588/**
1589 * Allocates and initializes a new entry for the cache.
1590 * The entry has a reference count of 1.
1591 *
1592 * @returns Pointer to the new cache entry or NULL if out of memory.
1593 * @param pBlkCache The cache the entry belongs to.
1594 * @param off Start offset.
1595 * @param cbData Size of the cache entry.
1596 * @param pbBuffer Pointer to the buffer to use.
1597 * NULL if a new buffer should be allocated.
1598 * The buffer needs to have the same size of the entry.
1599 */
1600static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
1601 uint64_t off, size_t cbData, uint8_t *pbBuffer)
1602{
1603 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1604
1605 if (RT_UNLIKELY(!pEntryNew))
1606 return NULL;
1607
1608 pEntryNew->Core.Key = off;
1609 pEntryNew->Core.KeyLast = off + cbData - 1;
1610 pEntryNew->pBlkCache = pBlkCache;
1611 pEntryNew->fFlags = 0;
1612 pEntryNew->cRefs = 1; /* We are using it now. */
1613 pEntryNew->pList = NULL;
1614 pEntryNew->cbData = cbData;
1615 pEntryNew->pWaitingHead = NULL;
1616 pEntryNew->pWaitingTail = NULL;
1617 if (pbBuffer)
1618 pEntryNew->pbData = pbBuffer;
1619 else
1620 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1621
1622 if (RT_UNLIKELY(!pEntryNew->pbData))
1623 {
1624 RTMemFree(pEntryNew);
1625 return NULL;
1626 }
1627
1628 return pEntryNew;
1629}
1630
1631/**
1632 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1633 * in exclusive mode.
1634 *
1635 * @returns true if the flag in fSet is set and the one in fClear is clear.
1636 * false otherwise.
1637 * The R/W semaphore is only held if true is returned.
1638 *
1639 * @param pBlkCache The endpoint cache instance data.
1640 * @param pEntry The entry to check the flags for.
1641 * @param fSet The flag which is tested to be set.
1642 * @param fClear The flag which is tested to be clear.
1643 */
1644DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1645 PPDMBLKCACHEENTRY pEntry,
1646 uint32_t fSet, uint32_t fClear)
1647{
1648 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1649 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1650
1651 if (fPassed)
1652 {
1653 /* Acquire the lock and check again because the completion callback might have raced us. */
1654 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1655
1656 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1657 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1658
1659 /* Drop the lock if we didn't passed the test. */
1660 if (!fPassed)
1661 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1662 }
1663
1664 return fPassed;
1665}
1666
1667/**
1668 * Adds a segment to the waiting list for a cache entry
1669 * which is currently in progress.
1670 *
1671 * @returns nothing.
1672 * @param pEntry The cache entry to add the segment to.
1673 * @param pSeg The segment to add.
1674 */
1675DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1676 PPDMBLKCACHEWAITER pWaiter)
1677{
1678 pWaiter->pNext = NULL;
1679
1680 if (pEntry->pWaitingHead)
1681 {
1682 AssertPtr(pEntry->pWaitingTail);
1683
1684 pEntry->pWaitingTail->pNext = pWaiter;
1685 pEntry->pWaitingTail = pWaiter;
1686 }
1687 else
1688 {
1689 Assert(!pEntry->pWaitingTail);
1690
1691 pEntry->pWaitingHead = pWaiter;
1692 pEntry->pWaitingTail = pWaiter;
1693 }
1694}
1695
1696/**
1697 * Add a buffer described by the I/O memory context
1698 * to the entry waiting for completion.
1699 *
1700 * @returns VBox status code.
1701 * @param pEntry The entry to add the buffer to.
1702 * @param pTask Task associated with the buffer.
1703 * @param pIoMemCtx The memory context to use.
1704 * @param offDiff Offset from the start of the buffer
1705 * in the entry.
1706 * @param cbData Amount of data to wait for onthis entry.
1707 * @param fWrite Flag whether the task waits because it wants to write
1708 * to the cache entry.
1709 */
1710static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry,
1711 PPDMBLKCACHEREQ pReq,
1712 PRTSGBUF pSgBuf, uint64_t offDiff,
1713 size_t cbData, bool fWrite)
1714{
1715 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1716 if (!pWaiter)
1717 return VERR_NO_MEMORY;
1718
1719 ASMAtomicIncU32(&pReq->cXfersPending);
1720 pWaiter->pReq = pReq;
1721 pWaiter->offCacheEntry = offDiff;
1722 pWaiter->cbTransfer = cbData;
1723 pWaiter->fWrite = fWrite;
1724 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1725 RTSgBufAdvance(pSgBuf, cbData);
1726
1727 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1728
1729 return VINF_SUCCESS;
1730}
1731
1732/**
1733 * Calculate aligned offset and size for a new cache entry
1734 * which do not intersect with an already existing entry and the
1735 * file end.
1736 *
1737 * @returns The number of bytes the entry can hold of the requested amount
1738 * of byte.
1739 * @param pEndpoint The endpoint.
1740 * @param pBlkCache The endpoint cache.
1741 * @param off The start offset.
1742 * @param cb The number of bytes the entry needs to hold at least.
1743 * @param uAlignment Alignment of the boundary sizes.
1744 * @param poffAligned Where to store the aligned offset.
1745 * @param pcbAligned Where to store the aligned size of the entry.
1746 */
1747static size_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1748 uint64_t off, size_t cb,
1749 unsigned uAlignment,
1750 uint64_t *poffAligned, size_t *pcbAligned)
1751{
1752 size_t cbAligned;
1753 size_t cbInEntry = 0;
1754 uint64_t offAligned;
1755 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1756
1757 /* Get the best fit entries around the offset */
1758 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1759
1760 /* Log the info */
1761 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1762 pEntryAbove ? "B" : "No b",
1763 off,
1764 pEntryAbove ? pEntryAbove->Core.Key : 0,
1765 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1766 pEntryAbove ? pEntryAbove->cbData : 0));
1767
1768 offAligned = off;
1769
1770 if ( pEntryAbove
1771 && off + cb > pEntryAbove->Core.Key)
1772 {
1773 cbInEntry = pEntryAbove->Core.Key - off;
1774 cbAligned = pEntryAbove->Core.Key - offAligned;
1775 }
1776 else
1777 {
1778 cbAligned = cb;
1779 cbInEntry = cb;
1780 }
1781
1782 /* A few sanity checks */
1783 AssertMsg(!pEntryAbove || (offAligned + cbAligned) <= pEntryAbove->Core.Key,
1784 ("Aligned size intersects with another cache entry\n"));
1785 Assert(cbInEntry <= cbAligned);
1786
1787 if (pEntryAbove)
1788 pdmBlkCacheEntryRelease(pEntryAbove);
1789
1790 LogFlow(("offAligned=%llu cbAligned=%u\n", offAligned, cbAligned));
1791
1792 *poffAligned = offAligned;
1793 *pcbAligned = cbAligned;
1794
1795 return cbInEntry;
1796}
1797
1798/**
1799 * Create a new cache entry evicting data from the cache if required.
1800 *
1801 * @returns Pointer to the new cache entry or NULL
1802 * if not enough bytes could be evicted from the cache.
1803 * @param pEndpoint The endpoint.
1804 * @param pBlkCache The endpoint cache.
1805 * @param off The offset.
1806 * @param cb Number of bytes the cache entry should have.
1807 * @param uAlignment Alignment the size of the entry should have.
1808 * @param pcbData Where to store the number of bytes the new
1809 * entry can hold. May be lower than actually requested
1810 * due to another entry intersecting the access range.
1811 */
1812static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache,
1813 uint64_t off, size_t cb,
1814 unsigned uAlignment,
1815 size_t *pcbData)
1816{
1817 uint64_t offStart = 0;
1818 size_t cbEntry = 0;
1819 PPDMBLKCACHEENTRY pEntryNew = NULL;
1820 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1821 uint8_t *pbBuffer = NULL;
1822
1823 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, cb, uAlignment,
1824 &offStart, &cbEntry);
1825
1826 pdmBlkCacheLockEnter(pCache);
1827 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1828
1829 if (fEnough)
1830 {
1831 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1832
1833 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, offStart, cbEntry, pbBuffer);
1834 if (RT_LIKELY(pEntryNew))
1835 {
1836 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1837 pdmBlkCacheAdd(pCache, cbEntry);
1838 pdmBlkCacheLockLeave(pCache);
1839
1840 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1841
1842 AssertMsg( (off >= pEntryNew->Core.Key)
1843 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1844 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1845 off, pEntryNew->Core.Key));
1846 }
1847 else
1848 pdmBlkCacheLockLeave(pCache);
1849 }
1850 else
1851 pdmBlkCacheLockLeave(pCache);
1852
1853 return pEntryNew;
1854}
1855
1856static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(size_t cbXfer, void *pvUser)
1857{
1858 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1859
1860 if (RT_LIKELY(pReq))
1861 {
1862 pReq->pvUser = pvUser;
1863 pReq->cbXfer = cbXfer;
1864 pReq->rcReq = VINF_SUCCESS;
1865 pReq->cXfersPending = 0;
1866 }
1867
1868 return pReq;
1869}
1870
1871static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1872{
1873 switch (pBlkCache->enmType)
1874 {
1875 case PDMBLKCACHETYPE_DEV:
1876 {
1877 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1878 pReq->pvUser, pReq->rcReq);
1879 break;
1880 }
1881 case PDMBLKCACHETYPE_DRV:
1882 {
1883 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1884 pReq->pvUser, pReq->rcReq);
1885 break;
1886 }
1887 case PDMBLKCACHETYPE_USB:
1888 {
1889 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1890 pReq->pvUser, pReq->rcReq);
1891 break;
1892 }
1893 case PDMBLKCACHETYPE_INTERNAL:
1894 {
1895 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1896 pReq->pvUser, pReq->rcReq);
1897 break;
1898 }
1899 default:
1900 AssertMsgFailed(("Unknown block cache type!\n"));
1901 }
1902
1903 RTMemFree(pReq);
1904}
1905
1906static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1907 size_t cbComplete, int rcReq, bool fCallHandler)
1908{
1909 if (RT_FAILURE(rcReq))
1910 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1911
1912 AssertMsg(pReq->cbXfer >= cbComplete, ("Completed more than left\n"));
1913 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1914 uint32_t cbOld = ASMAtomicSubU32(&pReq->cbXfer, cbComplete);
1915 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1916
1917 if ( !(cbOld - cbComplete)
1918 && !cXfersPending)
1919 {
1920 if (fCallHandler)
1921 pdmBlkCacheReqComplete(pBlkCache, pReq);
1922 return true;
1923 }
1924
1925 LogFlowFunc(("pReq=%#p cXfersPending=%u cbXfer=%u\n", pReq, cXfersPending, (cbOld - cbComplete)));
1926 return false;
1927}
1928
1929VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1930 PCRTSGBUF pcSgBuf, size_t cbRead, void *pvUser)
1931{
1932 int rc = VINF_SUCCESS;
1933 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1934 PPDMBLKCACHEENTRY pEntry;
1935 PPDMBLKCACHEREQ pReq;
1936
1937 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbRead=%u pvUser=%#p\n",
1938 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbRead, pvUser));
1939
1940 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1941 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1942
1943 RTSGBUF SgBuf;
1944 RTSgBufClone(&SgBuf, pcSgBuf);
1945
1946 /* Allocate new request structure. */
1947 pReq = pdmBlkCacheReqAlloc(cbRead, pvUser);
1948 if (RT_UNLIKELY(!pReq))
1949 return VERR_NO_MEMORY;
1950
1951 /* Increment data transfer counter to keep the request valid while we access it. */
1952 ASMAtomicIncU32(&pReq->cXfersPending);
1953
1954 while (cbRead)
1955 {
1956 size_t cbToRead;
1957
1958 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1959
1960 /*
1961 * If there is no entry we try to create a new one eviciting unused pages
1962 * if the cache is full. If this is not possible we will pass the request through
1963 * and skip the caching (all entries may be still in progress so they can't
1964 * be evicted)
1965 * If we have an entry it can be in one of the LRU lists where the entry
1966 * contains data (recently used or frequently used LRU) so we can just read
1967 * the data we need and put the entry at the head of the frequently used LRU list.
1968 * In case the entry is in one of the ghost lists it doesn't contain any data.
1969 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1970 */
1971 if (pEntry)
1972 {
1973 uint64_t offDiff = off - pEntry->Core.Key;
1974
1975 AssertMsg(off >= pEntry->Core.Key,
1976 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1977 off, pEntry->Core.Key));
1978
1979 AssertPtr(pEntry->pList);
1980
1981 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
1982
1983 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1984 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
1985 off, cbToRead));
1986
1987 cbRead -= cbToRead;
1988
1989 if (!cbRead)
1990 STAM_COUNTER_INC(&pCache->cHits);
1991 else
1992 STAM_COUNTER_INC(&pCache->cPartialHits);
1993
1994 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1995
1996 /* Ghost lists contain no data. */
1997 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1998 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1999 {
2000 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2001 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2002 PDMBLKCACHE_ENTRY_IS_DIRTY))
2003 {
2004 /* Entry didn't completed yet. Append to the list */
2005 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2006 &SgBuf, offDiff, cbToRead,
2007 false /* fWrite */);
2008 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2009 }
2010 else
2011 {
2012 /* Read as much as we can from the entry. */
2013 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2014 ASMAtomicSubU32(&pReq->cbXfer, cbToRead);
2015 }
2016
2017 /* Move this entry to the top position */
2018 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2019 {
2020 pdmBlkCacheLockEnter(pCache);
2021 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2022 pdmBlkCacheLockLeave(pCache);
2023 }
2024 /* Release the entry */
2025 pdmBlkCacheEntryRelease(pEntry);
2026 }
2027 else
2028 {
2029 uint8_t *pbBuffer = NULL;
2030
2031 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2032
2033 pdmBlkCacheLockEnter(pCache);
2034 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2035 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2036
2037 /* Move the entry to Am and fetch it to the cache. */
2038 if (fEnough)
2039 {
2040 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2041 pdmBlkCacheAdd(pCache, pEntry->cbData);
2042 pdmBlkCacheLockLeave(pCache);
2043
2044 if (pbBuffer)
2045 pEntry->pbData = pbBuffer;
2046 else
2047 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2048 AssertPtr(pEntry->pbData);
2049
2050 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2051 &SgBuf, offDiff, cbToRead,
2052 false /* fWrite */);
2053 pdmBlkCacheEntryReadFromMedium(pEntry);
2054 /* Release the entry */
2055 pdmBlkCacheEntryRelease(pEntry);
2056 }
2057 else
2058 {
2059 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2060 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2061 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2062 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2063 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2064
2065 pdmBlkCacheLockLeave(pCache);
2066
2067 RTMemFree(pEntry);
2068
2069 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2070 &SgBuf, off, cbToRead,
2071 PDMBLKCACHEXFERDIR_READ);
2072 }
2073 }
2074 }
2075 else
2076 {
2077#ifdef VBOX_WITH_IO_READ_CACHE
2078 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2079 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2080 off, cbRead,
2081 PAGE_SIZE,
2082 &cbToRead);
2083
2084 cbRead -= cbToRead;
2085
2086 if (pEntryNew)
2087 {
2088 if (!cbRead)
2089 STAM_COUNTER_INC(&pCache->cMisses);
2090 else
2091 STAM_COUNTER_INC(&pCache->cPartialHits);
2092
2093 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2094 &SgBuf,
2095 off - pEntryNew->Core.Key,
2096 cbToRead,
2097 false /* fWrite */);
2098 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2099 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2100 }
2101 else
2102 {
2103 /*
2104 * There is not enough free space in the cache.
2105 * Pass the request directly to the I/O manager.
2106 */
2107 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2108
2109 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2110 &SgBuf, off, cbToRead,
2111 PDMBLKCACHEXFERDIR_READ);
2112 }
2113#else
2114 /* Clip read size if necessary. */
2115 PPDMBLKCACHEENTRY pEntryAbove;
2116 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2117
2118 if (pEntryAbove)
2119 {
2120 if (off + cbRead > pEntryAbove->Core.Key)
2121 cbToRead = pEntryAbove->Core.Key - off;
2122 else
2123 cbToRead = cbRead;
2124
2125 pdmBlkCacheEntryRelease(pEntryAbove);
2126 }
2127 else
2128 cbToRead = cbRead;
2129
2130 cbRead -= cbToRead;
2131 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2132 &SgBuf, off, cbToRead,
2133 PDMBLKCACHEXFERDIR_READ);
2134#endif
2135 }
2136 off += cbToRead;
2137 }
2138
2139 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
2140 rc = VINF_AIO_TASK_PENDING;
2141
2142 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2143
2144 return rc;
2145}
2146
2147VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off,
2148 PCRTSGBUF pcSgBuf, size_t cbWrite, void *pvUser)
2149{
2150 int rc = VINF_SUCCESS;
2151 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2152 PPDMBLKCACHEENTRY pEntry;
2153 PPDMBLKCACHEREQ pReq;
2154
2155 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2156 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbWrite, pvUser));
2157
2158 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2159 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2160
2161 RTSGBUF SgBuf;
2162 RTSgBufClone(&SgBuf, pcSgBuf);
2163
2164 /* Allocate new request structure. */
2165 pReq = pdmBlkCacheReqAlloc(cbWrite, pvUser);
2166 if (RT_UNLIKELY(!pReq))
2167 return VERR_NO_MEMORY;
2168
2169 /* Increment data transfer counter to keep the request valid while we access it. */
2170 ASMAtomicIncU32(&pReq->cXfersPending);
2171
2172 while (cbWrite)
2173 {
2174 size_t cbToWrite;
2175
2176 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2177
2178 if (pEntry)
2179 {
2180 /* Write the data into the entry and mark it as dirty */
2181 AssertPtr(pEntry->pList);
2182
2183 uint64_t offDiff = off - pEntry->Core.Key;
2184
2185 AssertMsg(off >= pEntry->Core.Key,
2186 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2187 off, pEntry->Core.Key));
2188
2189 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2190 cbWrite -= cbToWrite;
2191
2192 if (!cbWrite)
2193 STAM_COUNTER_INC(&pCache->cHits);
2194 else
2195 STAM_COUNTER_INC(&pCache->cPartialHits);
2196
2197 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2198
2199 /* Ghost lists contain no data. */
2200 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2201 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2202 {
2203 /* Check if the entry is dirty. */
2204 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2205 PDMBLKCACHE_ENTRY_IS_DIRTY,
2206 0))
2207 {
2208 /* If it is already dirty but not in progress just update the data. */
2209 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2210 {
2211 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff,
2212 cbToWrite);
2213 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2214 }
2215 else
2216 {
2217 /* The data isn't written to the file yet */
2218 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2219 &SgBuf, offDiff, cbToWrite,
2220 true /* fWrite */);
2221 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2222 }
2223
2224 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2225 }
2226 else /* Dirty bit not set */
2227 {
2228 /*
2229 * Check if a read is in progress for this entry.
2230 * We have to defer processing in that case.
2231 */
2232 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2233 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2234 0))
2235 {
2236 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2237 &SgBuf, offDiff, cbToWrite,
2238 true /* fWrite */);
2239 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2240 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2241 }
2242 else /* I/O in progress flag not set */
2243 {
2244 /* Write as much as we can into the entry and update the file. */
2245 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2246 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2247
2248 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2249 if (fCommit)
2250 pdmBlkCacheCommitDirtyEntries(pCache);
2251 }
2252 } /* Dirty bit not set */
2253
2254 /* Move this entry to the top position */
2255 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2256 {
2257 pdmBlkCacheLockEnter(pCache);
2258 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2259 pdmBlkCacheLockLeave(pCache);
2260 }
2261
2262 pdmBlkCacheEntryRelease(pEntry);
2263 }
2264 else /* Entry is on the ghost list */
2265 {
2266 uint8_t *pbBuffer = NULL;
2267
2268 pdmBlkCacheLockEnter(pCache);
2269 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2270 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2271
2272 if (fEnough)
2273 {
2274 /* Move the entry to Am and fetch it to the cache. */
2275 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2276 pdmBlkCacheAdd(pCache, pEntry->cbData);
2277 pdmBlkCacheLockLeave(pCache);
2278
2279 if (pbBuffer)
2280 pEntry->pbData = pbBuffer;
2281 else
2282 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2283 AssertPtr(pEntry->pbData);
2284
2285 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2286 &SgBuf, offDiff, cbToWrite,
2287 true /* fWrite */);
2288 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2289 pdmBlkCacheEntryReadFromMedium(pEntry);
2290
2291 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2292 pdmBlkCacheEntryRelease(pEntry);
2293 }
2294 else
2295 {
2296 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2297 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2298 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2299 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2300 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2301
2302 pdmBlkCacheLockLeave(pCache);
2303
2304 RTMemFree(pEntry);
2305 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2306 &SgBuf, off, cbToWrite,
2307 PDMBLKCACHEXFERDIR_WRITE);
2308 }
2309 }
2310 }
2311 else /* No entry found */
2312 {
2313 /*
2314 * No entry found. Try to create a new cache entry to store the data in and if that fails
2315 * write directly to the file.
2316 */
2317 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2318 off, cbWrite,
2319 512, &cbToWrite);
2320
2321 cbWrite -= cbToWrite;
2322
2323 if (pEntryNew)
2324 {
2325 uint64_t offDiff = off - pEntryNew->Core.Key;
2326
2327 STAM_COUNTER_INC(&pCache->cHits);
2328
2329 /*
2330 * Check if it is possible to just write the data without waiting
2331 * for it to get fetched first.
2332 */
2333 if (!offDiff && pEntryNew->cbData == cbToWrite)
2334 {
2335 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2336 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2337
2338 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2339 if (fCommit)
2340 pdmBlkCacheCommitDirtyEntries(pCache);
2341 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2342 }
2343 else
2344 {
2345 /* Defer the write and fetch the data from the endpoint. */
2346 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2347 &SgBuf, offDiff, cbToWrite,
2348 true /* fWrite */);
2349 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2350 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2351 }
2352
2353 pdmBlkCacheEntryRelease(pEntryNew);
2354 }
2355 else
2356 {
2357 /*
2358 * There is not enough free space in the cache.
2359 * Pass the request directly to the I/O manager.
2360 */
2361 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2362
2363 STAM_COUNTER_INC(&pCache->cMisses);
2364
2365 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2366 &SgBuf, off, cbToWrite,
2367 PDMBLKCACHEXFERDIR_WRITE);
2368 }
2369 }
2370
2371 off += cbToWrite;
2372 }
2373
2374 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
2375 rc = VINF_AIO_TASK_PENDING;
2376
2377 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2378
2379 return rc;
2380}
2381
2382VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2383{
2384 int rc = VINF_SUCCESS;
2385 PPDMBLKCACHEREQ pReq;
2386
2387 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2388
2389 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2390 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2391
2392 /* Commit dirty entries in the cache. */
2393 pdmBlkCacheCommit(pBlkCache);
2394
2395 /* Allocate new request structure. */
2396 pReq = pdmBlkCacheReqAlloc(0, pvUser);
2397 if (RT_UNLIKELY(!pReq))
2398 return VERR_NO_MEMORY;
2399
2400 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2401 PDMBLKCACHEXFERDIR_FLUSH);
2402 AssertRC(rc);
2403
2404 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2405 return VINF_AIO_TASK_PENDING;
2406}
2407
2408/**
2409 * Completes a task segment freeing all resources and completes the task handle
2410 * if everything was transferred.
2411 *
2412 * @returns Next task segment handle.
2413 * @param pTaskSeg Task segment to complete.
2414 * @param rc Status code to set.
2415 */
2416static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache,
2417 PPDMBLKCACHEWAITER pWaiter,
2418 int rc)
2419{
2420 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2421 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2422
2423 pdmBlkCacheReqUpdate(pBlkCache, pWaiter->pReq, pWaiter->cbTransfer, rc, true);
2424
2425 RTMemFree(pWaiter);
2426
2427 return pNext;
2428}
2429
2430static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2431{
2432 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2433 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2434
2435 /* Reference the entry now as we are clearing the I/O in progress flag
2436 * which protected the entry till now. */
2437 pdmBlkCacheEntryRef(pEntry);
2438
2439 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2440 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2441
2442 /* Process waiting segment list. The data in entry might have changed in-between. */
2443 bool fDirty = false;
2444 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2445 PPDMBLKCACHEWAITER pCurr = pComplete;
2446
2447 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2448 ("The list tail was not updated correctly\n"));
2449 pEntry->pWaitingTail = NULL;
2450 pEntry->pWaitingHead = NULL;
2451
2452 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2453 {
2454 /*
2455 * An error here is difficult to handle as the original request completed already.
2456 * The error is logged for now and the VM is paused.
2457 * If the user continues the entry is written again in the hope
2458 * the user fixed the problem and the next write succeeds.
2459 */
2460 if (RT_FAILURE(rcIoXfer))
2461 {
2462 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2463 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2464
2465 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2466 {
2467 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2468 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2469 "Make sure there is enough free space on the disk and that the disk is working properly. "
2470 "Operation can be resumed afterwards"),
2471 pBlkCache->pszId, rcIoXfer);
2472 AssertRC(rc);
2473 }
2474
2475 /*
2476 * The entry is still marked as dirty which prevents eviction.
2477 * Add the waiters to the list again.
2478 */
2479 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY; /* Clear so it gets added to the list again. */
2480 fDirty = true;
2481
2482 if (pComplete)
2483 {
2484 pEntry->pWaitingHead = pComplete;
2485 while (pComplete->pNext)
2486 pComplete = pComplete->pNext;
2487 pEntry->pWaitingTail = pComplete;
2488 pComplete = NULL;
2489 }
2490 }
2491 else
2492 {
2493 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2494
2495 while (pCurr)
2496 {
2497 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2498
2499 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2500 fDirty = true;
2501
2502 pCurr = pCurr->pNext;
2503 }
2504 }
2505 }
2506 else
2507 {
2508 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2509 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2510 ("Invalid flags set\n"));
2511
2512 while (pCurr)
2513 {
2514 if (pCurr->fWrite)
2515 {
2516 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2517 fDirty = true;
2518 }
2519 else
2520 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2521
2522 pCurr = pCurr->pNext;
2523 }
2524 }
2525
2526 bool fCommit = false;
2527 if (fDirty)
2528 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2529
2530 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2531
2532 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2533 pdmBlkCacheEntryRelease(pEntry);
2534
2535 if (fCommit)
2536 pdmBlkCacheCommitDirtyEntries(pCache);
2537
2538 /* Complete waiters now. */
2539 while (pComplete)
2540 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2541}
2542
2543VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2544{
2545 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2546
2547 if (hIoXfer->fIoCache)
2548 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2549 else
2550 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, hIoXfer->cbXfer, rcIoXfer, true);
2551}
2552
2553/**
2554 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2555 *
2556 * @returns IPRT status code.
2557 * @param pNode The node to destroy.
2558 * @param pvUser Opaque user data.
2559 */
2560static int pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2561{
2562 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2563 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2564
2565 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2566 {
2567 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2568 pdmBlkCacheEntryRef(pEntry);
2569 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2570
2571 RTThreadSleep(1);
2572
2573 /* Re-enter all locks and drop the reference. */
2574 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2575 pdmBlkCacheEntryRelease(pEntry);
2576 }
2577
2578 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2579 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2580
2581 return VINF_SUCCESS;
2582}
2583
2584VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2585{
2586 int rc = VINF_SUCCESS;
2587 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2588
2589 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2590
2591 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2592
2593 /* Wait for all I/O to complete. */
2594 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2595 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2596 AssertRC(rc);
2597 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2598
2599 return rc;
2600}
2601
2602VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2603{
2604 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2605
2606 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2607
2608 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2609
2610 return VINF_SUCCESS;
2611}
2612
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette