VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileCache.cpp@ 24061

Last change on this file since 24061 was 23975, checked in by vboxsync, 15 years ago

AsyncCompletion: Fix possible data corruption

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 57.1 KB
Line 
1/* $Id: PDMAsyncCompletionFileCache.cpp 23975 2009-10-22 12:54:52Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * File data cache.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22
23/** @page pg_pdm_async_completion_cache PDM Async Completion Cache - The file I/O cache
24 * This component implements an I/O cache for file endpoints based on the ARC algorithm.
25 * http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache
26 *
27 * The algorithm uses four LRU (Least frequently used) lists to store data in the cache.
28 * Two of them contain data where one stores entries which were accessed recently and one
29 * which is used for frequently accessed data.
30 * The other two lists are called ghost lists and store information about the accessed range
31 * but do not contain data. They are used to track data access. If these entries are accessed
32 * they will push the data to a higher position in the cache preventing it from getting removed
33 * quickly again.
34 *
35 * The algorithm needs to be modified to meet our requirements. Like the implementation
36 * for the ZFS filesystem we need to handle pages with a variable size. It would
37 * be possible to use a fixed size but would increase the computational
38 * and memory overhead.
39 * Because we do I/O asynchronously we also need to mark entries which are currently accessed
40 * as non evictable to prevent removal of the entry while the data is being accessed.
41 */
42
43/*******************************************************************************
44* Header Files *
45*******************************************************************************/
46#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
47#include <iprt/types.h>
48#include <iprt/mem.h>
49#include <VBox/log.h>
50#include <VBox/stam.h>
51
52#include "PDMAsyncCompletionFileInternal.h"
53
54#ifdef VBOX_STRICT
55# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
56 do \
57 { \
58 AssertMsg(RTCritSectIsOwner(&pCache->CritSect), \
59 ("Thread does not own critical section\n"));\
60 } while(0);
61#else
62# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0);
63#endif
64
65/*******************************************************************************
66* Internal Functions *
67*******************************************************************************/
68static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser);
69
70DECLINLINE(void) pdmacFileEpCacheEntryRelease(PPDMACFILECACHEENTRY pEntry)
71{
72 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
73 ASMAtomicDecU32(&pEntry->cRefs);
74}
75
76DECLINLINE(void) pdmacFileEpCacheEntryRef(PPDMACFILECACHEENTRY pEntry)
77{
78 ASMAtomicIncU32(&pEntry->cRefs);
79}
80
81/**
82 * Checks consistency of a LRU list.
83 *
84 * @returns nothing
85 * @param pList The LRU list to check.
86 * @param pNotInList Element which is not allowed to occur in the list.
87 */
88static void pdmacFileCacheCheckList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pNotInList)
89{
90#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
91 PPDMACFILECACHEENTRY pCurr = pList->pHead;
92
93 /* Check that there are no double entries and no cycles in the list. */
94 while (pCurr)
95 {
96 PPDMACFILECACHEENTRY pNext = pCurr->pNext;
97
98 while (pNext)
99 {
100 AssertMsg(pCurr != pNext,
101 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
102 pCurr, pList));
103 pNext = pNext->pNext;
104 }
105
106 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
107
108 if (!pCurr->pNext)
109 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
110
111 pCurr = pCurr->pNext;
112 }
113#endif
114}
115
116/**
117 * Unlinks a cache entry from the LRU list it is assigned to.
118 *
119 * @returns nothing.
120 * @param pEntry The entry to unlink.
121 */
122static void pdmacFileCacheEntryRemoveFromList(PPDMACFILECACHEENTRY pEntry)
123{
124 PPDMACFILELRULIST pList = pEntry->pList;
125 PPDMACFILECACHEENTRY pPrev, pNext;
126
127 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
128
129 AssertPtr(pList);
130 pdmacFileCacheCheckList(pList, NULL);
131
132 pPrev = pEntry->pPrev;
133 pNext = pEntry->pNext;
134
135 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
136 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
137
138 if (pPrev)
139 pPrev->pNext = pNext;
140 else
141 {
142 pList->pHead = pNext;
143
144 if (pNext)
145 pNext->pPrev = NULL;
146 }
147
148 if (pNext)
149 pNext->pPrev = pPrev;
150 else
151 {
152 pList->pTail = pPrev;
153
154 if (pPrev)
155 pPrev->pNext = NULL;
156 }
157
158 pEntry->pList = NULL;
159 pEntry->pPrev = NULL;
160 pEntry->pNext = NULL;
161 pList->cbCached -= pEntry->cbData;
162 pdmacFileCacheCheckList(pList, pEntry);
163}
164
165/**
166 * Adds a cache entry to the given LRU list unlinking it from the currently
167 * assigned list if needed.
168 *
169 * @returns nothing.
170 * @param pList List to the add entry to.
171 * @param pEntry Entry to add.
172 */
173static void pdmacFileCacheEntryAddToList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pEntry)
174{
175 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
176 pdmacFileCacheCheckList(pList, NULL);
177
178 /* Remove from old list if needed */
179 if (pEntry->pList)
180 pdmacFileCacheEntryRemoveFromList(pEntry);
181
182 pEntry->pNext = pList->pHead;
183 if (pList->pHead)
184 pList->pHead->pPrev = pEntry;
185 else
186 {
187 Assert(!pList->pTail);
188 pList->pTail = pEntry;
189 }
190
191 pEntry->pPrev = NULL;
192 pList->pHead = pEntry;
193 pList->cbCached += pEntry->cbData;
194 pEntry->pList = pList;
195 pdmacFileCacheCheckList(pList, NULL);
196}
197
198/**
199 * Destroys a LRU list freeing all entries.
200 *
201 * @returns nothing
202 * @param pList Pointer to the LRU list to destroy.
203 *
204 * @note The caller must own the critical section of the cache.
205 */
206static void pdmacFileCacheDestroyList(PPDMACFILELRULIST pList)
207{
208 while (pList->pHead)
209 {
210 PPDMACFILECACHEENTRY pEntry = pList->pHead;
211
212 pList->pHead = pEntry->pNext;
213
214 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
215 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
216
217 RTMemPageFree(pEntry->pbData);
218 RTMemFree(pEntry);
219 }
220}
221
222/**
223 * Tries to remove the given amount of bytes from a given list in the cache
224 * moving the entries to one of the given ghosts lists
225 *
226 * @returns Amount of data which could be freed.
227 * @param pCache Pointer to the global cache data.
228 * @param cbData The amount of the data to free.
229 * @param pListSrc The source list to evict data from.
230 * @param pGhostListSrc The ghost list removed entries should be moved to
231 * NULL if the entry should be freed.
232 *
233 * @notes This function may return fewer bytes than requested because entries
234 * may be marked as non evictable if they are used for I/O at the moment.
235 */
236static size_t pdmacFileCacheEvictPagesFrom(PPDMACFILECACHEGLOBAL pCache, size_t cbData,
237 PPDMACFILELRULIST pListSrc, PPDMACFILELRULIST pGhostListDst)
238{
239 size_t cbEvicted = 0;
240
241 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
242
243 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
244 AssertMsg( !pGhostListDst
245 || (pGhostListDst == &pCache->LruRecentlyGhost)
246 || (pGhostListDst == &pCache->LruFrequentlyGhost),
247 ("Destination list must be NULL or one of the ghost lists\n"));
248
249 /* Start deleting from the tail. */
250 PPDMACFILECACHEENTRY pEntry = pListSrc->pTail;
251
252 while ((cbEvicted < cbData) && pEntry)
253 {
254 PPDMACFILECACHEENTRY pCurr = pEntry;
255
256 pEntry = pEntry->pPrev;
257
258 /* We can't evict pages which are currently in progress */
259 if (!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
260 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
261 {
262 /* Ok eviction candidate. Grab the endpoint semaphore and check again
263 * because somebody else might have raced us. */
264 PPDMACFILEENDPOINTCACHE pEndpointCache = &pCurr->pEndpoint->DataCache;
265 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
266
267 if (!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
268 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
269 {
270 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
271 if (pCurr->pbData)
272 {
273 RTMemPageFree(pCurr->pbData);
274 pCurr->pbData = NULL;
275 }
276
277 cbEvicted += pCurr->cbData;
278
279 if (pGhostListDst)
280 {
281 pdmacFileCacheEntryAddToList(pGhostListDst, pCurr);
282 }
283 else
284 {
285 /* Delete the entry from the AVL tree it is assigned to. */
286 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
287 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
288 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
289
290 pdmacFileCacheEntryRemoveFromList(pCurr);
291 pCache->cbCached -= pCurr->cbData;
292 RTMemFree(pCurr);
293 }
294 }
295 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
296 }
297 else
298 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
299 }
300
301 return cbEvicted;
302}
303
304static size_t pdmacFileCacheReplace(PPDMACFILECACHEGLOBAL pCache, size_t cbData, PPDMACFILELRULIST pEntryList)
305{
306 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
307
308 if ( (pCache->LruRecentlyUsed.cbCached)
309 && ( (pCache->LruRecentlyUsed.cbCached > pCache->uAdaptVal)
310 || ( (pEntryList == &pCache->LruFrequentlyGhost)
311 && (pCache->LruRecentlyUsed.cbCached == pCache->uAdaptVal))))
312 {
313 /* We need to remove entry size pages from T1 and move the entries to B1 */
314 return pdmacFileCacheEvictPagesFrom(pCache, cbData,
315 &pCache->LruRecentlyUsed,
316 &pCache->LruRecentlyGhost);
317 }
318 else
319 {
320 /* We need to remove entry size pages from T2 and move the entries to B2 */
321 return pdmacFileCacheEvictPagesFrom(pCache, cbData,
322 &pCache->LruFrequentlyUsed,
323 &pCache->LruFrequentlyGhost);
324 }
325}
326
327/**
328 * Tries to evict the given amount of the data from the cache.
329 *
330 * @returns Bytes removed.
331 * @param pCache The global cache data.
332 * @param cbData Number of bytes to evict.
333 */
334static size_t pdmacFileCacheEvict(PPDMACFILECACHEGLOBAL pCache, size_t cbData)
335{
336 size_t cbRemoved = ~0;
337
338 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
339
340 if ((pCache->LruRecentlyUsed.cbCached + pCache->LruRecentlyGhost.cbCached) >= pCache->cbMax)
341 {
342 /* Delete desired pages from the cache. */
343 if (pCache->LruRecentlyUsed.cbCached < pCache->cbMax)
344 {
345 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
346 &pCache->LruRecentlyGhost,
347 NULL);
348 }
349 else
350 {
351 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
352 &pCache->LruRecentlyUsed,
353 NULL);
354 }
355 }
356 else
357 {
358 uint32_t cbUsed = pCache->LruRecentlyUsed.cbCached + pCache->LruRecentlyGhost.cbCached +
359 pCache->LruFrequentlyUsed.cbCached + pCache->LruFrequentlyGhost.cbCached;
360
361 if (cbUsed >= pCache->cbMax)
362 {
363 if (cbUsed == 2*pCache->cbMax)
364 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
365 &pCache->LruFrequentlyGhost,
366 NULL);
367
368 if (cbRemoved >= cbData)
369 cbRemoved = pdmacFileCacheReplace(pCache, cbData, NULL);
370 }
371 }
372
373 return cbRemoved;
374}
375
376/**
377 * Updates the cache parameters
378 *
379 * @returns nothing.
380 * @param pCache The global cache data.
381 * @param pEntry The entry usign for the update.
382 */
383static void pdmacFileCacheUpdate(PPDMACFILECACHEGLOBAL pCache, PPDMACFILECACHEENTRY pEntry)
384{
385 int32_t uUpdateVal = 0;
386
387 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
388
389 /* Update parameters */
390 if (pEntry->pList == &pCache->LruRecentlyGhost)
391 {
392 if (pCache->LruRecentlyGhost.cbCached >= pCache->LruFrequentlyGhost.cbCached)
393 uUpdateVal = 1;
394 else
395 uUpdateVal = pCache->LruFrequentlyGhost.cbCached / pCache->LruRecentlyGhost.cbCached;
396
397 pCache->uAdaptVal = RT_MIN(pCache->uAdaptVal + uUpdateVal, pCache->cbMax);
398 }
399 else if (pEntry->pList == &pCache->LruFrequentlyGhost)
400 {
401 if (pCache->LruFrequentlyGhost.cbCached >= pCache->LruRecentlyGhost.cbCached)
402 uUpdateVal = 1;
403 else
404 uUpdateVal = pCache->LruRecentlyGhost.cbCached / pCache->LruFrequentlyGhost.cbCached;
405
406 pCache->uAdaptVal = RT_MIN(pCache->uAdaptVal - uUpdateVal, 0);
407 }
408 else
409 AssertMsgFailed(("Invalid list type\n"));
410}
411
412/**
413 * Initiates a read I/O task for the given entry.
414 *
415 * @returns nothing.
416 * @param pEntry The entry to fetch the data to.
417 */
418static void pdmacFileCacheReadFromEndpoint(PPDMACFILECACHEENTRY pEntry)
419{
420 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
421
422 /* Make sure no one evicts the entry while it is accessed. */
423 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
424
425 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
426 AssertPtr(pIoTask);
427
428 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
429
430 pIoTask->pEndpoint = pEntry->pEndpoint;
431 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
432 pIoTask->Off = pEntry->Core.Key;
433 pIoTask->DataSeg.cbSeg = pEntry->cbData;
434 pIoTask->DataSeg.pvSeg = pEntry->pbData;
435 pIoTask->pvUser = pEntry;
436 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
437
438 /* Send it off to the I/O manager. */
439 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
440}
441
442/**
443 * Initiates a write I/O task for the given entry.
444 *
445 * @returns nothing.
446 * @param pEntry The entry to read the data from.
447 */
448static void pdmacFileCacheWriteToEndpoint(PPDMACFILECACHEENTRY pEntry)
449{
450 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
451
452 /* Make sure no one evicts the entry while it is accessed. */
453 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
454
455 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
456 AssertPtr(pIoTask);
457
458 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
459
460 pIoTask->pEndpoint = pEntry->pEndpoint;
461 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
462 pIoTask->Off = pEntry->Core.Key;
463 pIoTask->DataSeg.cbSeg = pEntry->cbData;
464 pIoTask->DataSeg.pvSeg = pEntry->pbData;
465 pIoTask->pvUser = pEntry;
466 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
467
468 /* Send it off to the I/O manager. */
469 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
470}
471
472/**
473 * Completion callback for I/O tasks.
474 *
475 * @returns nothing.
476 * @param pTask The completed task.
477 * @param pvUser Opaque user data.
478 */
479static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
480{
481 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pvUser;
482 PPDMACFILECACHEGLOBAL pCache = pEntry->pCache;
483 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pEntry->pEndpoint;
484
485 /* Reference the entry now as we are clearing the I/O in progres flag
486 * which protects the entry till now. */
487 pdmacFileEpCacheEntryRef(pEntry);
488
489 RTSemRWRequestWrite(pEndpoint->DataCache.SemRWEntries, RT_INDEFINITE_WAIT);
490 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
491
492 /* Process waiting segment list. The data in entry might have changed inbetween. */
493 PPDMACFILETASKSEG pCurr = pEntry->pWaitingHead;
494
495 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
496 ("The list tail was not updated correctly\n"));
497 pEntry->pWaitingTail = NULL;
498 pEntry->pWaitingHead = NULL;
499
500 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
501 {
502 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DIRTY;
503
504 while (pCurr)
505 {
506 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
507
508 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
509 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
510
511 uint32_t uOld = ASMAtomicSubU32(&pCurr->pTask->cbTransferLeft, pCurr->cbTransfer);
512 AssertMsg(uOld >= pCurr->cbTransfer, ("New value would overflow\n"));
513 if (!(uOld - pCurr->cbTransfer)
514 && !ASMAtomicXchgBool(&pCurr->pTask->fCompleted, true))
515 pdmR3AsyncCompletionCompleteTask(&pCurr->pTask->Core);
516
517 PPDMACFILETASKSEG pFree = pCurr;
518 pCurr = pCurr->pNext;
519
520 RTMemFree(pFree);
521 }
522 }
523 else
524 {
525 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_READ, ("Invalid transfer type\n"));
526 AssertMsg(!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY),("Invalid flags set\n"));
527
528 while (pCurr)
529 {
530 if (pCurr->fWrite)
531 {
532 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
533 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
534 }
535 else
536 memcpy(pCurr->pvBuf, pEntry->pbData + pCurr->uBufOffset, pCurr->cbTransfer);
537
538 uint32_t uOld = ASMAtomicSubU32(&pCurr->pTask->cbTransferLeft, pCurr->cbTransfer);
539 AssertMsg(uOld >= pCurr->cbTransfer, ("New value would overflow\n"));
540 if (!(uOld - pCurr->cbTransfer)
541 && !ASMAtomicXchgBool(&pCurr->pTask->fCompleted, true))
542 pdmR3AsyncCompletionCompleteTask(&pCurr->pTask->Core);
543
544 PPDMACFILETASKSEG pFree = pCurr;
545 pCurr = pCurr->pNext;
546
547 RTMemFree(pFree);
548 }
549 }
550
551 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
552 pdmacFileCacheWriteToEndpoint(pEntry);
553
554 RTSemRWReleaseWrite(pEndpoint->DataCache.SemRWEntries);
555
556 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
557 pdmacFileEpCacheEntryRelease(pEntry);
558}
559
560/**
561 * Initializies the I/O cache.
562 *
563 * returns VBox status code.
564 * @param pClassFile The global class data for file endpoints.
565 * @param pCfgNode CFGM node to query configuration data from.
566 */
567int pdmacFileCacheInit(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile, PCFGMNODE pCfgNode)
568{
569 int rc = VINF_SUCCESS;
570 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
571
572 /* Initialize members */
573 pCache->LruRecentlyUsed.pHead = NULL;
574 pCache->LruRecentlyUsed.pTail = NULL;
575 pCache->LruRecentlyUsed.cbCached = 0;
576
577 pCache->LruFrequentlyUsed.pHead = NULL;
578 pCache->LruFrequentlyUsed.pTail = NULL;
579 pCache->LruFrequentlyUsed.cbCached = 0;
580
581 pCache->LruRecentlyGhost.pHead = NULL;
582 pCache->LruRecentlyGhost.pTail = NULL;
583 pCache->LruRecentlyGhost.cbCached = 0;
584
585 pCache->LruFrequentlyGhost.pHead = NULL;
586 pCache->LruFrequentlyGhost.pTail = NULL;
587 pCache->LruFrequentlyGhost.cbCached = 0;
588
589 rc = CFGMR3QueryU32Def(pCfgNode, "CacheSize", &pCache->cbMax, 5 * _1M);
590 AssertLogRelRCReturn(rc, rc);
591
592 pCache->cbCached = 0;
593 pCache->uAdaptVal = 0;
594 LogFlowFunc((": Maximum number of bytes cached %u\n", pCache->cbMax));
595
596 STAMR3Register(pClassFile->Core.pVM, &pCache->cbMax,
597 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
598 "/PDM/AsyncCompletion/File/cbMax",
599 STAMUNIT_BYTES,
600 "Maximum cache size");
601 STAMR3Register(pClassFile->Core.pVM, &pCache->cbCached,
602 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
603 "/PDM/AsyncCompletion/File/cbCached",
604 STAMUNIT_BYTES,
605 "Currently used cache");
606 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsed.cbCached,
607 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
608 "/PDM/AsyncCompletion/File/cbCachedMru",
609 STAMUNIT_BYTES,
610 "Number of bytes cached in Mru list");
611 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
612 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
613 "/PDM/AsyncCompletion/File/cbCachedFru",
614 STAMUNIT_BYTES,
615 "Number of bytes cached in Fru list");
616 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyGhost.cbCached,
617 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
618 "/PDM/AsyncCompletion/File/cbCachedMruGhost",
619 STAMUNIT_BYTES,
620 "Number of bytes cached in Mru ghost list");
621 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyGhost.cbCached,
622 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
623 "/PDM/AsyncCompletion/File/cbCachedFruGhost",
624 STAMUNIT_BYTES, "Number of bytes cached in Fru ghost list");
625
626#ifdef VBOX_WITH_STATISTICS
627 STAMR3Register(pClassFile->Core.pVM, &pCache->cHits,
628 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
629 "/PDM/AsyncCompletion/File/CacheHits",
630 STAMUNIT_COUNT, "Number of hits in the cache");
631 STAMR3Register(pClassFile->Core.pVM, &pCache->cPartialHits,
632 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
633 "/PDM/AsyncCompletion/File/CachePartialHits",
634 STAMUNIT_COUNT, "Number of partial hits in the cache");
635 STAMR3Register(pClassFile->Core.pVM, &pCache->cMisses,
636 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
637 "/PDM/AsyncCompletion/File/CacheMisses",
638 STAMUNIT_COUNT, "Number of misses when accessing the cache");
639 STAMR3Register(pClassFile->Core.pVM, &pCache->StatRead,
640 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
641 "/PDM/AsyncCompletion/File/CacheRead",
642 STAMUNIT_BYTES, "Number of bytes read from the cache");
643 STAMR3Register(pClassFile->Core.pVM, &pCache->StatWritten,
644 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
645 "/PDM/AsyncCompletion/File/CacheWritten",
646 STAMUNIT_BYTES, "Number of bytes written to the cache");
647 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeGet,
648 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
649 "/PDM/AsyncCompletion/File/CacheTreeGet",
650 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
651 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeInsert,
652 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
653 "/PDM/AsyncCompletion/File/CacheTreeInsert",
654 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
655 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeRemove,
656 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
657 "/PDM/AsyncCompletion/File/CacheTreeRemove",
658 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
659#endif
660
661 /* Initialize the critical section */
662 rc = RTCritSectInit(&pCache->CritSect);
663 return rc;
664}
665
666/**
667 * Destroysthe cache freeing all data.
668 *
669 * returns nothing.
670 * @param pClassFile The global class data for file endpoints.
671 */
672void pdmacFileCacheDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
673{
674 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
675
676 /* Make sure no one else uses the cache now */
677 RTCritSectEnter(&pCache->CritSect);
678
679 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
680 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsed);
681 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
682 pdmacFileCacheDestroyList(&pCache->LruRecentlyGhost);
683 pdmacFileCacheDestroyList(&pCache->LruFrequentlyGhost);
684
685 RTCritSectLeave(&pCache->CritSect);
686
687 RTCritSectDelete(&pCache->CritSect);
688}
689
690/**
691 * Initializes per endpoint cache data
692 * like the AVL tree used to access cached entries.
693 *
694 * @returns VBox status code.
695 * @param pEndpoint The endpoint to init the cache for,
696 * @param pClassFile The global class data for file endpoints.
697 */
698int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
699{
700 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
701
702 pEndpointCache->pCache = &pClassFile->Cache;
703
704 int rc = RTSemRWCreate(&pEndpointCache->SemRWEntries);
705 if (RT_SUCCESS(rc))
706 {
707 pEndpointCache->pTree = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
708 if (!pEndpointCache->pTree)
709 {
710 rc = VERR_NO_MEMORY;
711 RTSemRWDestroy(pEndpointCache->SemRWEntries);
712 }
713 }
714
715 return rc;
716}
717
718/**
719 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
720 *
721 * @returns IPRT status code.
722 * @param pNode The node to destroy.
723 * @param pvUser Opaque user data.
724 */
725static int pdmacFileEpCacheEntryDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
726{
727 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pNode;
728 PPDMACFILECACHEGLOBAL pCache = (PPDMACFILECACHEGLOBAL)pvUser;
729 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEntry->pEndpoint->DataCache;
730
731 while (pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY))
732 {
733 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
734 RTThreadSleep(250);
735 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
736 }
737
738 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
739 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
740
741 pdmacFileCacheEntryRemoveFromList(pEntry);
742 pCache->cbCached -= pEntry->cbData;
743
744 RTMemPageFree(pEntry->pbData);
745 RTMemFree(pEntry);
746
747 return VINF_SUCCESS;
748}
749
750/**
751 * Destroys all cache ressources used by the given endpoint.
752 *
753 * @returns nothing.
754 * @param pEndpoint The endpoint to the destroy.
755 */
756void pdmacFileEpCacheDestroy(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
757{
758 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
759 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
760
761 /* Make sure nobody is accessing the cache while we delete the tree. */
762 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
763 RTCritSectEnter(&pCache->CritSect);
764 RTAvlrFileOffsetDestroy(pEndpointCache->pTree, pdmacFileEpCacheEntryDestroy, pCache);
765 RTCritSectLeave(&pCache->CritSect);
766 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
767
768 RTSemRWDestroy(pEndpointCache->SemRWEntries);
769}
770
771static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
772{
773 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
774 PPDMACFILECACHEENTRY pEntry = NULL;
775
776 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
777
778 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
779 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetRangeGet(pEndpointCache->pTree, off);
780 if (pEntry)
781 pdmacFileEpCacheEntryRef(pEntry);
782 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
783
784 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
785
786 return pEntry;
787}
788
789static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheBestFitEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
790{
791 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
792 PPDMACFILECACHEENTRY pEntry = NULL;
793
794 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
795
796 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
797 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetGetBestFit(pEndpointCache->pTree, off, true);
798 if (pEntry)
799 pdmacFileEpCacheEntryRef(pEntry);
800 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
801
802 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
803
804 return pEntry;
805}
806
807static void pdmacFileEpCacheInsertEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
808{
809 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
810
811 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
812 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
813 bool fInserted = RTAvlrFileOffsetInsert(pEndpointCache->pTree, &pEntry->Core);
814 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
815 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
816 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
817}
818
819/**
820 * Allocates and initializes a new entry for the cache.
821 * The entry has a reference count of 1.
822 *
823 * @returns Pointer to the new cache entry or NULL if out of memory.
824 * @param pCache The cache the entry belongs to.
825 * @param pEndoint The endpoint the entry holds data for.
826 * @param off Start offset.
827 * @param cbData Size of the cache entry.
828 */
829static PPDMACFILECACHEENTRY pdmacFileCacheEntryAlloc(PPDMACFILECACHEGLOBAL pCache,
830 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
831 RTFOFF off, size_t cbData)
832{
833 PPDMACFILECACHEENTRY pEntryNew = (PPDMACFILECACHEENTRY)RTMemAllocZ(sizeof(PDMACFILECACHEENTRY));
834
835 if (RT_UNLIKELY(!pEntryNew))
836 return NULL;
837
838 pEntryNew->Core.Key = off;
839 pEntryNew->Core.KeyLast = off + cbData - 1;
840 pEntryNew->pEndpoint = pEndpoint;
841 pEntryNew->pCache = pCache;
842 pEntryNew->fFlags = 0;
843 pEntryNew->cRefs = 1; /* We are using it now. */
844 pEntryNew->pList = NULL;
845 pEntryNew->cbData = cbData;
846 pEntryNew->pWaitingHead = NULL;
847 pEntryNew->pWaitingTail = NULL;
848 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
849
850 if (RT_UNLIKELY(!pEntryNew->pbData))
851 {
852 RTMemFree(pEntryNew);
853 return NULL;
854 }
855
856 return pEntryNew;
857}
858
859/**
860 * Adds a segment to the waiting list for a cache entry
861 * which is currently in progress.
862 *
863 * @returns nothing.
864 * @param pEntry The cache entry to add the segment to.
865 * @param pSeg The segment to add.
866 */
867static void pdmacFileEpCacheEntryAddWaitingSegment(PPDMACFILECACHEENTRY pEntry, PPDMACFILETASKSEG pSeg)
868{
869 pSeg->pNext = NULL;
870
871 if (pEntry->pWaitingHead)
872 {
873 AssertPtr(pEntry->pWaitingTail);
874
875 pEntry->pWaitingTail->pNext = pSeg;
876 pEntry->pWaitingTail = pSeg;
877 }
878 else
879 {
880 Assert(!pEntry->pWaitingTail);
881
882 pEntry->pWaitingHead = pSeg;
883 pEntry->pWaitingTail = pSeg;
884 }
885}
886
887/**
888 * Advances the current segment buffer by the number of bytes transfered
889 * or gets the next segment.
890 */
891#define ADVANCE_SEGMENT_BUFFER(BytesTransfered) \
892 do \
893 { \
894 cbSegLeft -= BytesTransfered; \
895 if (!cbSegLeft) \
896 { \
897 iSegCurr++; \
898 cbSegLeft = paSegments[iSegCurr].cbSeg; \
899 pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg; \
900 } \
901 else \
902 pbSegBuf += BytesTransfered; \
903 } \
904 while (0);
905
906/**
907 * Reads the specified data from the endpoint using the cache if possible.
908 *
909 * @returns VBox status code.
910 * @param pEndpoint The endpoint to read from.
911 * @param pTask The task structure used as identifier for this request.
912 * @param off The offset to start reading from.
913 * @param paSegments Pointer to the array holding the destination buffers.
914 * @param cSegments Number of segments in the array.
915 * @param cbRead Number of bytes to read.
916 */
917int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
918 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
919 size_t cbRead)
920{
921 int rc = VINF_SUCCESS;
922 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
923 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
924 PPDMACFILECACHEENTRY pEntry;
925
926 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbRead=%u\n",
927 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbRead));
928
929 pTask->cbTransferLeft = cbRead;
930 /* Set to completed to make sure that the task is valid while we access it. */
931 ASMAtomicWriteBool(&pTask->fCompleted, true);
932
933 int iSegCurr = 0;
934 uint8_t *pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg;
935 size_t cbSegLeft = paSegments[iSegCurr].cbSeg;
936
937 while (cbRead)
938 {
939 size_t cbToRead;
940
941 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
942
943 /*
944 * If there is no entry we try to create a new one eviciting unused pages
945 * if the cache is full. If this is not possible we will pass the request through
946 * and skip the caching (all entries may be still in progress so they can't
947 * be evicted)
948 * If we have an entry it can be in one of the LRU lists where the entry
949 * contains data (recently used or frequently used LRU) so we can just read
950 * the data we need and put the entry at the head of the frequently used LRU list.
951 * In case the entry is in one of the ghost lists it doesn't contain any data.
952 * We have to fetch it again evicting pages from either T1 or T2 to make room.
953 */
954 if (pEntry)
955 {
956 RTFOFF OffDiff = off - pEntry->Core.Key;
957
958 AssertMsg(off >= pEntry->Core.Key,
959 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
960 off, pEntry->Core.Key));
961
962 AssertPtr(pEntry->pList);
963
964 cbToRead = RT_MIN(pEntry->cbData - OffDiff, cbRead);
965 cbRead -= cbToRead;
966
967 if (!cbRead)
968 STAM_COUNTER_INC(&pCache->cHits);
969 else
970 STAM_COUNTER_INC(&pCache->cPartialHits);
971
972 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
973
974 /* Ghost lists contain no data. */
975 if ( (pEntry->pList == &pCache->LruRecentlyUsed)
976 || (pEntry->pList == &pCache->LruFrequentlyUsed))
977 {
978 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
979
980 if ( (pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
981 && !(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY))
982 {
983 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
984 /* Check again. The completion callback might have raced us. */
985
986 if ( (pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
987 && !(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY))
988 {
989 /* Entry didn't completed yet. Append to the list */
990 while (cbToRead)
991 {
992 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
993
994 pSeg->pTask = pTask;
995 pSeg->uBufOffset = OffDiff;
996 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
997 pSeg->pvBuf = pbSegBuf;
998 pSeg->fWrite = false;
999
1000 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1001
1002 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1003
1004 off += pSeg->cbTransfer;
1005 cbToRead -= pSeg->cbTransfer;
1006 OffDiff += pSeg->cbTransfer;
1007 }
1008 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1009 }
1010 else
1011 {
1012 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1013
1014 /* Read as much as we can from the entry. */
1015 while (cbToRead)
1016 {
1017 size_t cbCopy = RT_MIN(cbSegLeft, cbToRead);
1018
1019 memcpy(pbSegBuf, pEntry->pbData + OffDiff, cbCopy);
1020
1021 ADVANCE_SEGMENT_BUFFER(cbCopy);
1022
1023 cbToRead -= cbCopy;
1024 off += cbCopy;
1025 OffDiff += cbCopy;
1026 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1027 }
1028 }
1029 }
1030 else
1031 {
1032 /* Read as much as we can from the entry. */
1033 while (cbToRead)
1034 {
1035 size_t cbCopy = RT_MIN(cbSegLeft, cbToRead);
1036
1037 memcpy(pbSegBuf, pEntry->pbData + OffDiff, cbCopy);
1038
1039 ADVANCE_SEGMENT_BUFFER(cbCopy);
1040
1041 cbToRead -= cbCopy;
1042 off += cbCopy;
1043 OffDiff += cbCopy;
1044 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1045 }
1046 }
1047
1048 /* Move this entry to the top position */
1049 RTCritSectEnter(&pCache->CritSect);
1050 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1051 RTCritSectLeave(&pCache->CritSect);
1052 }
1053 else
1054 {
1055 RTCritSectEnter(&pCache->CritSect);
1056 pdmacFileCacheUpdate(pCache, pEntry);
1057 pdmacFileCacheReplace(pCache, pEntry->cbData, pEntry->pList);
1058
1059 /* Move the entry to T2 and fetch it to the cache. */
1060 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1061 RTCritSectLeave(&pCache->CritSect);
1062
1063 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1064 AssertPtr(pEntry->pbData);
1065
1066 while (cbToRead)
1067 {
1068 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1069
1070 AssertMsg(off >= pEntry->Core.Key,
1071 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1072 off, pEntry->Core.Key));
1073
1074 pSeg->pTask = pTask;
1075 pSeg->uBufOffset = OffDiff;
1076 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1077 pSeg->pvBuf = pbSegBuf;
1078
1079 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1080
1081 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1082
1083 off += pSeg->cbTransfer;
1084 OffDiff += pSeg->cbTransfer;
1085 cbToRead -= pSeg->cbTransfer;
1086 }
1087
1088 pdmacFileCacheReadFromEndpoint(pEntry);
1089 }
1090 pdmacFileEpCacheEntryRelease(pEntry);
1091 }
1092 else
1093 {
1094 /* No entry found for this offset. Get best fit entry and fetch the data to the cache. */
1095 size_t cbToReadAligned;
1096 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1097
1098 LogFlow(("%sbest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1099 pEntryBestFit ? "" : "No ",
1100 off,
1101 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1102 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1103 pEntryBestFit ? pEntryBestFit->cbData : 0));
1104
1105 if (pEntryBestFit && ((off + (RTFOFF)cbRead) > pEntryBestFit->Core.Key))
1106 {
1107 cbToRead = pEntryBestFit->Core.Key - off;
1108 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1109 cbToReadAligned = cbToRead;
1110 }
1111 else
1112 {
1113 /*
1114 * Align the size to a 4KB boundary.
1115 * Memory size is aligned to a page boundary
1116 * and memory is wasted if the size is rahter small.
1117 * (For example reads with a size of 512 bytes.
1118 */
1119 cbToRead = cbRead;
1120 cbToReadAligned = RT_ALIGN_Z(cbRead, PAGE_SIZE);
1121
1122 /* Clip read to file size */
1123 cbToReadAligned = RT_MIN(pEndpoint->cbFile - off, cbToReadAligned);
1124 if (pEntryBestFit)
1125 cbToReadAligned = RT_MIN(cbToReadAligned, pEntryBestFit->Core.Key - off);
1126 }
1127
1128 cbRead -= cbToRead;
1129
1130 if (!cbRead)
1131 STAM_COUNTER_INC(&pCache->cMisses);
1132 else
1133 STAM_COUNTER_INC(&pCache->cPartialHits);
1134
1135 RTCritSectEnter(&pCache->CritSect);
1136 size_t cbRemoved = pdmacFileCacheEvict(pCache, cbToReadAligned);
1137 RTCritSectLeave(&pCache->CritSect);
1138
1139 if (cbRemoved >= cbToRead)
1140 {
1141 LogFlow(("Evicted %u bytes (%u requested). Creating new cache entry\n", cbRemoved, cbToReadAligned));
1142 PPDMACFILECACHEENTRY pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToReadAligned);
1143 AssertPtr(pEntryNew);
1144
1145 RTCritSectEnter(&pCache->CritSect);
1146 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsed, pEntryNew);
1147 RTCritSectLeave(&pCache->CritSect);
1148
1149 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1150 uint32_t uBufOffset = 0;
1151
1152 pCache->cbCached += cbToReadAligned;
1153
1154 while (cbToRead)
1155 {
1156 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1157
1158 pSeg->pTask = pTask;
1159 pSeg->uBufOffset = uBufOffset;
1160 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1161 pSeg->pvBuf = pbSegBuf;
1162
1163 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1164
1165 pdmacFileEpCacheEntryAddWaitingSegment(pEntryNew, pSeg);
1166
1167 off += pSeg->cbTransfer;
1168 cbToRead -= pSeg->cbTransfer;
1169 uBufOffset += pSeg->cbTransfer;
1170 }
1171
1172 pdmacFileCacheReadFromEndpoint(pEntryNew);
1173 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1174 }
1175 else
1176 {
1177 /*
1178 * There is not enough free space in the cache.
1179 * Pass the request directly to the I/O manager.
1180 */
1181 LogFlow(("Couldn't evict %u bytes from the cache (%u actually removed). Remaining request will be passed through\n", cbToRead, cbRemoved));
1182
1183 while (cbToRead)
1184 {
1185 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1186 AssertPtr(pIoTask);
1187
1188 pIoTask->pEndpoint = pEndpoint;
1189 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
1190 pIoTask->Off = off;
1191 pIoTask->DataSeg.cbSeg = RT_MIN(cbToRead, cbSegLeft);
1192 pIoTask->DataSeg.pvSeg = pbSegBuf;
1193 pIoTask->pvUser = pTask;
1194 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1195
1196 off += pIoTask->DataSeg.cbSeg;
1197 cbToRead -= pIoTask->DataSeg.cbSeg;
1198
1199 ADVANCE_SEGMENT_BUFFER(pIoTask->DataSeg.cbSeg);
1200
1201 /* Send it off to the I/O manager. */
1202 pdmacFileEpAddTask(pEndpoint, pIoTask);
1203 }
1204 }
1205 }
1206 }
1207
1208 ASMAtomicWriteBool(&pTask->fCompleted, false);
1209
1210 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1211 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1212 pdmR3AsyncCompletionCompleteTask(&pTask->Core);
1213
1214 return rc;
1215}
1216
1217/**
1218 * Writes the given data to the endpoint using the cache if possible.
1219 *
1220 * @returns VBox status code.
1221 * @param pEndpoint The endpoint to write to.
1222 * @param pTask The task structure used as identifier for this request.
1223 * @param off The offset to start writing to
1224 * @param paSegments Pointer to the array holding the source buffers.
1225 * @param cSegments Number of segments in the array.
1226 * @param cbWrite Number of bytes to write.
1227 */
1228int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1229 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1230 size_t cbWrite)
1231{
1232 int rc = VINF_SUCCESS;
1233 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1234 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1235 PPDMACFILECACHEENTRY pEntry;
1236
1237 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbWrite=%u\n",
1238 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbWrite));
1239
1240 pTask->cbTransferLeft = cbWrite;
1241 /* Set to completed to make sure that the task is valid while we access it. */
1242 ASMAtomicWriteBool(&pTask->fCompleted, true);
1243
1244 int iSegCurr = 0;
1245 uint8_t *pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg;
1246 size_t cbSegLeft = paSegments[iSegCurr].cbSeg;
1247
1248 while (cbWrite)
1249 {
1250 size_t cbToWrite;
1251
1252 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1253
1254 if (pEntry)
1255 {
1256 /* Write the data into the entry and mark it as dirty */
1257 AssertPtr(pEntry->pList);
1258
1259 RTFOFF OffDiff = off - pEntry->Core.Key;
1260
1261 AssertMsg(off >= pEntry->Core.Key,
1262 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1263 off, pEntry->Core.Key));
1264
1265 cbToWrite = RT_MIN(pEntry->cbData - OffDiff, cbWrite);
1266 cbWrite -= cbToWrite;
1267
1268 if (!cbWrite)
1269 STAM_COUNTER_INC(&pCache->cHits);
1270 else
1271 STAM_COUNTER_INC(&pCache->cPartialHits);
1272
1273 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1274
1275 /* Ghost lists contain no data. */
1276 if ( (pEntry->pList == &pCache->LruRecentlyUsed)
1277 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1278 {
1279 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
1280 {
1281 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1282 /* Check again. The completion callback might have raced us. */
1283
1284 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
1285 {
1286 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1287 ("Entry is dirty but not in progress\n"));
1288
1289 /* The data isn't written to the file yet */
1290 while (cbToWrite)
1291 {
1292 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1293
1294 pSeg->pTask = pTask;
1295 pSeg->uBufOffset = OffDiff;
1296 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1297 pSeg->pvBuf = pbSegBuf;
1298 pSeg->fWrite = true;
1299
1300 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1301
1302 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1303
1304 off += pSeg->cbTransfer;
1305 OffDiff += pSeg->cbTransfer;
1306 cbToWrite -= pSeg->cbTransfer;
1307 }
1308 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1309 }
1310 else
1311 {
1312 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1313
1314 AssertMsg(!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS),
1315 ("Entry is not dirty but in progress\n"));
1316
1317 /* Write as much as we can into the entry and update the file. */
1318 while (cbToWrite)
1319 {
1320 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1321
1322 memcpy(pEntry->pbData + OffDiff, pbSegBuf, cbCopy);
1323
1324 ADVANCE_SEGMENT_BUFFER(cbCopy);
1325
1326 cbToWrite-= cbCopy;
1327 off += cbCopy;
1328 OffDiff += cbCopy;
1329 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1330 }
1331
1332 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
1333 pdmacFileCacheWriteToEndpoint(pEntry);
1334 }
1335 }
1336 else
1337 {
1338 /*
1339 * Check if a read is in progress for this entry.
1340 * We have to defer processing in that case.
1341 */
1342 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
1343 {
1344 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1345
1346 /* Check again. The completion callback might have raced us. */
1347 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
1348 {
1349
1350 while (cbToWrite)
1351 {
1352 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1353
1354 pSeg->pTask = pTask;
1355 pSeg->uBufOffset = OffDiff;
1356 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1357 pSeg->pvBuf = pbSegBuf;
1358 pSeg->fWrite = true;
1359
1360 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1361
1362 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1363
1364 off += pSeg->cbTransfer;
1365 OffDiff += pSeg->cbTransfer;
1366 cbToWrite -= pSeg->cbTransfer;
1367 }
1368
1369 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1370 }
1371 else
1372 {
1373 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1374
1375 /* Write as much as we can into the entry and update the file. */
1376 while (cbToWrite)
1377 {
1378 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1379
1380 memcpy(pEntry->pbData + OffDiff, pbSegBuf, cbCopy);
1381
1382 ADVANCE_SEGMENT_BUFFER(cbCopy);
1383
1384 cbToWrite-= cbCopy;
1385 off += cbCopy;
1386 OffDiff += cbCopy;
1387 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1388 }
1389
1390 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
1391 pdmacFileCacheWriteToEndpoint(pEntry);
1392 }
1393 }
1394 else
1395 {
1396 /* Write as much as we can into the entry and update the file. */
1397 while (cbToWrite)
1398 {
1399 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1400
1401 memcpy(pEntry->pbData + OffDiff, pbSegBuf, cbCopy);
1402
1403 ADVANCE_SEGMENT_BUFFER(cbCopy);
1404
1405 cbToWrite-= cbCopy;
1406 off += cbCopy;
1407 OffDiff += cbCopy;
1408 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1409 }
1410
1411 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
1412 pdmacFileCacheWriteToEndpoint(pEntry);
1413 }
1414 }
1415
1416 /* Move this entry to the top position */
1417 RTCritSectEnter(&pCache->CritSect);
1418 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1419 RTCritSectLeave(&pCache->CritSect);
1420 }
1421 else
1422 {
1423 RTCritSectEnter(&pCache->CritSect);
1424 pdmacFileCacheUpdate(pCache, pEntry);
1425 pdmacFileCacheReplace(pCache, pEntry->cbData, pEntry->pList);
1426
1427 /* Move the entry to T2 and fetch it to the cache. */
1428 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1429 RTCritSectLeave(&pCache->CritSect);
1430
1431 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1432 AssertPtr(pEntry->pbData);
1433
1434 while (cbToWrite)
1435 {
1436 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1437
1438 AssertMsg(off >= pEntry->Core.Key,
1439 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1440 off, pEntry->Core.Key));
1441
1442 pSeg->pTask = pTask;
1443 pSeg->uBufOffset = OffDiff;
1444 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1445 pSeg->pvBuf = pbSegBuf;
1446 pSeg->fWrite = true;
1447
1448 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1449
1450 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1451
1452 off += pSeg->cbTransfer;
1453 OffDiff += pSeg->cbTransfer;
1454 cbToWrite -= pSeg->cbTransfer;
1455 }
1456
1457 pdmacFileCacheReadFromEndpoint(pEntry);
1458 }
1459
1460 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
1461 pdmacFileEpCacheEntryRelease(pEntry);
1462 }
1463 else
1464 {
1465 /*
1466 * No entry found. Write directly into file.
1467 */
1468 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1469
1470 LogFlow(("%sbest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1471 pEntryBestFit ? "" : "No ",
1472 off,
1473 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1474 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1475 pEntryBestFit ? pEntryBestFit->cbData : 0));
1476
1477 if (pEntryBestFit && ((off + (RTFOFF)cbWrite) > pEntryBestFit->Core.Key))
1478 {
1479 cbToWrite = pEntryBestFit->Core.Key - off;
1480 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1481 }
1482 else
1483 cbToWrite = cbWrite;
1484
1485 cbWrite -= cbToWrite;
1486
1487 while (cbToWrite)
1488 {
1489 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1490 AssertPtr(pIoTask);
1491
1492 pIoTask->pEndpoint = pEndpoint;
1493 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
1494 pIoTask->Off = off;
1495 pIoTask->DataSeg.cbSeg = RT_MIN(cbToWrite, cbSegLeft);
1496 pIoTask->DataSeg.pvSeg = pbSegBuf;
1497 pIoTask->pvUser = pTask;
1498 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1499
1500 off += pIoTask->DataSeg.cbSeg;
1501 cbToWrite -= pIoTask->DataSeg.cbSeg;
1502
1503 ADVANCE_SEGMENT_BUFFER(pIoTask->DataSeg.cbSeg);
1504
1505 /* Send it off to the I/O manager. */
1506 pdmacFileEpAddTask(pEndpoint, pIoTask);
1507 }
1508 }
1509 }
1510
1511 ASMAtomicWriteBool(&pTask->fCompleted, false);
1512
1513 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1514 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1515 pdmR3AsyncCompletionCompleteTask(&pTask->Core);
1516
1517 return VINF_SUCCESS;
1518}
1519
1520#undef ADVANCE_SEGMENT_BUFFER
1521
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette