VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileCache.cpp@ 25147

Last change on this file since 25147 was 25147, checked in by vboxsync, 15 years ago

AsyncCompletion/Cache: Complete all pending writes before completing a flush

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 76.0 KB
Line 
1/* $Id: PDMAsyncCompletionFileCache.cpp 25147 2009-12-02 13:42:32Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * File data cache.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22
23/** @page pg_pdm_async_completion_cache PDM Async Completion Cache - The file I/O cache
24 * This component implements an I/O cache for file endpoints based on the ARC algorithm.
25 * http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache
26 *
27 * The algorithm uses four LRU (Least frequently used) lists to store data in the cache.
28 * Two of them contain data where one stores entries which were accessed recently and one
29 * which is used for frequently accessed data.
30 * The other two lists are called ghost lists and store information about the accessed range
31 * but do not contain data. They are used to track data access. If these entries are accessed
32 * they will push the data to a higher position in the cache preventing it from getting removed
33 * quickly again.
34 *
35 * The algorithm needs to be modified to meet our requirements. Like the implementation
36 * for the ZFS filesystem we need to handle pages with a variable size. It would
37 * be possible to use a fixed size but would increase the computational
38 * and memory overhead.
39 * Because we do I/O asynchronously we also need to mark entries which are currently accessed
40 * as non evictable to prevent removal of the entry while the data is being accessed.
41 */
42
43/*******************************************************************************
44* Header Files *
45*******************************************************************************/
46#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
47#define RT_STRICT
48#include <iprt/types.h>
49#include <iprt/mem.h>
50#include <iprt/path.h>
51#include <VBox/log.h>
52#include <VBox/stam.h>
53
54#include "PDMAsyncCompletionFileInternal.h"
55
56#ifdef VBOX_STRICT
57# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
58 do \
59 { \
60 AssertMsg(RTCritSectIsOwner(&pCache->CritSect), \
61 ("Thread does not own critical section\n"));\
62 } while(0);
63#else
64# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0);
65#endif
66
67/*******************************************************************************
68* Internal Functions *
69*******************************************************************************/
70static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser);
71
72DECLINLINE(void) pdmacFileEpCacheEntryRelease(PPDMACFILECACHEENTRY pEntry)
73{
74 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
75 ASMAtomicDecU32(&pEntry->cRefs);
76}
77
78DECLINLINE(void) pdmacFileEpCacheEntryRef(PPDMACFILECACHEENTRY pEntry)
79{
80 ASMAtomicIncU32(&pEntry->cRefs);
81}
82
83/**
84 * Checks consistency of a LRU list.
85 *
86 * @returns nothing
87 * @param pList The LRU list to check.
88 * @param pNotInList Element which is not allowed to occur in the list.
89 */
90static void pdmacFileCacheCheckList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pNotInList)
91{
92#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
93 PPDMACFILECACHEENTRY pCurr = pList->pHead;
94
95 /* Check that there are no double entries and no cycles in the list. */
96 while (pCurr)
97 {
98 PPDMACFILECACHEENTRY pNext = pCurr->pNext;
99
100 while (pNext)
101 {
102 AssertMsg(pCurr != pNext,
103 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
104 pCurr, pList));
105 pNext = pNext->pNext;
106 }
107
108 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
109
110 if (!pCurr->pNext)
111 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
112
113 pCurr = pCurr->pNext;
114 }
115#endif
116}
117
118/**
119 * Unlinks a cache entry from the LRU list it is assigned to.
120 *
121 * @returns nothing.
122 * @param pEntry The entry to unlink.
123 */
124static void pdmacFileCacheEntryRemoveFromList(PPDMACFILECACHEENTRY pEntry)
125{
126 PPDMACFILELRULIST pList = pEntry->pList;
127 PPDMACFILECACHEENTRY pPrev, pNext;
128
129 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
130
131 AssertPtr(pList);
132 pdmacFileCacheCheckList(pList, NULL);
133
134 pPrev = pEntry->pPrev;
135 pNext = pEntry->pNext;
136
137 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
138 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
139
140 if (pPrev)
141 pPrev->pNext = pNext;
142 else
143 {
144 pList->pHead = pNext;
145
146 if (pNext)
147 pNext->pPrev = NULL;
148 }
149
150 if (pNext)
151 pNext->pPrev = pPrev;
152 else
153 {
154 pList->pTail = pPrev;
155
156 if (pPrev)
157 pPrev->pNext = NULL;
158 }
159
160 pEntry->pList = NULL;
161 pEntry->pPrev = NULL;
162 pEntry->pNext = NULL;
163 pList->cbCached -= pEntry->cbData;
164 pdmacFileCacheCheckList(pList, pEntry);
165}
166
167/**
168 * Adds a cache entry to the given LRU list unlinking it from the currently
169 * assigned list if needed.
170 *
171 * @returns nothing.
172 * @param pList List to the add entry to.
173 * @param pEntry Entry to add.
174 */
175static void pdmacFileCacheEntryAddToList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pEntry)
176{
177 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
178 pdmacFileCacheCheckList(pList, NULL);
179
180 /* Remove from old list if needed */
181 if (pEntry->pList)
182 pdmacFileCacheEntryRemoveFromList(pEntry);
183
184 pEntry->pNext = pList->pHead;
185 if (pList->pHead)
186 pList->pHead->pPrev = pEntry;
187 else
188 {
189 Assert(!pList->pTail);
190 pList->pTail = pEntry;
191 }
192
193 pEntry->pPrev = NULL;
194 pList->pHead = pEntry;
195 pList->cbCached += pEntry->cbData;
196 pEntry->pList = pList;
197 pdmacFileCacheCheckList(pList, NULL);
198}
199
200/**
201 * Destroys a LRU list freeing all entries.
202 *
203 * @returns nothing
204 * @param pList Pointer to the LRU list to destroy.
205 *
206 * @note The caller must own the critical section of the cache.
207 */
208static void pdmacFileCacheDestroyList(PPDMACFILELRULIST pList)
209{
210 while (pList->pHead)
211 {
212 PPDMACFILECACHEENTRY pEntry = pList->pHead;
213
214 pList->pHead = pEntry->pNext;
215
216 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
217 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
218
219 RTMemPageFree(pEntry->pbData);
220 RTMemFree(pEntry);
221 }
222}
223
224/**
225 * Tries to remove the given amount of bytes from a given list in the cache
226 * moving the entries to one of the given ghosts lists
227 *
228 * @returns Amount of data which could be freed.
229 * @param pCache Pointer to the global cache data.
230 * @param cbData The amount of the data to free.
231 * @param pListSrc The source list to evict data from.
232 * @param pGhostListSrc The ghost list removed entries should be moved to
233 * NULL if the entry should be freed.
234 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
235 * @param ppbBuf Where to store the address of the buffer if an entry with the
236 * same size was found and fReuseBuffer is true.
237 *
238 * @notes This function may return fewer bytes than requested because entries
239 * may be marked as non evictable if they are used for I/O at the moment.
240 */
241static size_t pdmacFileCacheEvictPagesFrom(PPDMACFILECACHEGLOBAL pCache, size_t cbData,
242 PPDMACFILELRULIST pListSrc, PPDMACFILELRULIST pGhostListDst,
243 bool fReuseBuffer, uint8_t **ppbBuffer)
244{
245 size_t cbEvicted = 0;
246
247 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
248
249 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
250#ifdef VBOX_WITH_2Q_CACHE
251 AssertMsg( !pGhostListDst
252 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
253 ("Destination list must be NULL or the recently used but paged out list\n"));
254#else
255 AssertMsg( !pGhostListDst
256 || (pGhostListDst == &pCache->LruRecentlyGhost)
257 || (pGhostListDst == &pCache->LruFrequentlyGhost),
258 ("Destination list must be NULL or one of the ghost lists\n"));
259#endif
260
261 if (fReuseBuffer)
262 {
263 AssertPtr(ppbBuffer);
264 *ppbBuffer = NULL;
265 }
266
267 /* Start deleting from the tail. */
268 PPDMACFILECACHEENTRY pEntry = pListSrc->pTail;
269
270 while ((cbEvicted < cbData) && pEntry)
271 {
272 PPDMACFILECACHEENTRY pCurr = pEntry;
273
274 pEntry = pEntry->pPrev;
275
276 /* We can't evict pages which are currently in progress */
277 if (!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
278 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
279 {
280 /* Ok eviction candidate. Grab the endpoint semaphore and check again
281 * because somebody else might have raced us. */
282 PPDMACFILEENDPOINTCACHE pEndpointCache = &pCurr->pEndpoint->DataCache;
283 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
284
285 if (!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
286 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
287 {
288 AssertMsg(!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED),
289 ("This entry is deprecated so it should have the I/O in progress flag set\n"));
290 Assert(!pCurr->pbDataReplace);
291
292 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
293
294 if (fReuseBuffer && (pCurr->cbData == cbData))
295 {
296 STAM_COUNTER_INC(&pCache->StatBuffersReused);
297 *ppbBuffer = pCurr->pbData;
298 }
299 else if (pCurr->pbData)
300 RTMemPageFree(pCurr->pbData);
301
302 pCurr->pbData = NULL;
303 cbEvicted += pCurr->cbData;
304
305 pCache->cbCached -= pCurr->cbData;
306
307 if (pGhostListDst)
308 {
309#ifdef VBOX_WITH_2Q_CACHE
310 /* We have to remove the last entries from the paged out list. */
311 while (pGhostListDst->cbCached > pCache->cbRecentlyUsedOutMax)
312 {
313 PPDMACFILECACHEENTRY pFree = pGhostListDst->pTail;
314 PPDMACFILEENDPOINTCACHE pEndpointCacheFree = &pFree->pEndpoint->DataCache;
315
316 RTSemRWRequestWrite(pEndpointCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
317
318 pdmacFileCacheEntryRemoveFromList(pFree);
319
320 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
321 RTAvlrFileOffsetRemove(pEndpointCacheFree->pTree, pFree->Core.Key);
322 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
323
324 RTSemRWReleaseWrite(pEndpointCacheFree->SemRWEntries);
325 RTMemFree(pFree);
326 }
327#endif
328
329 pdmacFileCacheEntryAddToList(pGhostListDst, pCurr);
330 }
331 else
332 {
333 /* Delete the entry from the AVL tree it is assigned to. */
334 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
335 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
336 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
337
338 pdmacFileCacheEntryRemoveFromList(pCurr);
339 RTMemFree(pCurr);
340 }
341 }
342 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
343 }
344 else
345 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
346 }
347
348 return cbEvicted;
349}
350
351#ifdef VBOX_WITH_2Q_CACHE
352static bool pdmacFileCacheReclaim(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
353{
354 size_t cbRemoved = 0;
355
356 if ((pCache->cbCached + cbData) < pCache->cbMax)
357 return true;
358 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
359 {
360 /* Try to evict as many bytes as possible from A1in */
361 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
362 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
363
364 /*
365 * If it was not possible to remove enough entries
366 * try the frequently accessed cache.
367 */
368 if (cbRemoved < cbData)
369 {
370 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
371
372 cbRemoved += pdmacFileCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
373 NULL, fReuseBuffer, ppbBuffer);
374 }
375 }
376 else
377 {
378 /* We have to remove entries from frequently access list. */
379 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
380 NULL, fReuseBuffer, ppbBuffer);
381 }
382
383 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
384 return (cbRemoved >= cbData);
385}
386
387#else
388
389static size_t pdmacFileCacheReplace(PPDMACFILECACHEGLOBAL pCache, size_t cbData, PPDMACFILELRULIST pEntryList,
390 bool fReuseBuffer, uint8_t **ppbBuffer)
391{
392 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
393
394 if ( (pCache->LruRecentlyUsed.cbCached)
395 && ( (pCache->LruRecentlyUsed.cbCached > pCache->uAdaptVal)
396 || ( (pEntryList == &pCache->LruFrequentlyGhost)
397 && (pCache->LruRecentlyUsed.cbCached == pCache->uAdaptVal))))
398 {
399 /* We need to remove entry size pages from T1 and move the entries to B1 */
400 return pdmacFileCacheEvictPagesFrom(pCache, cbData,
401 &pCache->LruRecentlyUsed,
402 &pCache->LruRecentlyGhost,
403 fReuseBuffer, ppbBuffer);
404 }
405 else
406 {
407 /* We need to remove entry size pages from T2 and move the entries to B2 */
408 return pdmacFileCacheEvictPagesFrom(pCache, cbData,
409 &pCache->LruFrequentlyUsed,
410 &pCache->LruFrequentlyGhost,
411 fReuseBuffer, ppbBuffer);
412 }
413}
414
415/**
416 * Tries to evict the given amount of the data from the cache.
417 *
418 * @returns Bytes removed.
419 * @param pCache The global cache data.
420 * @param cbData Number of bytes to evict.
421 */
422static size_t pdmacFileCacheEvict(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
423{
424 size_t cbRemoved = ~0;
425
426 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
427
428 if ((pCache->LruRecentlyUsed.cbCached + pCache->LruRecentlyGhost.cbCached) >= pCache->cbMax)
429 {
430 /* Delete desired pages from the cache. */
431 if (pCache->LruRecentlyUsed.cbCached < pCache->cbMax)
432 {
433 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
434 &pCache->LruRecentlyGhost,
435 NULL,
436 fReuseBuffer, ppbBuffer);
437 }
438 else
439 {
440 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
441 &pCache->LruRecentlyUsed,
442 NULL,
443 fReuseBuffer, ppbBuffer);
444 }
445 }
446 else
447 {
448 uint32_t cbUsed = pCache->LruRecentlyUsed.cbCached + pCache->LruRecentlyGhost.cbCached +
449 pCache->LruFrequentlyUsed.cbCached + pCache->LruFrequentlyGhost.cbCached;
450
451 if (cbUsed >= pCache->cbMax)
452 {
453 if (cbUsed == 2*pCache->cbMax)
454 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
455 &pCache->LruFrequentlyGhost,
456 NULL,
457 fReuseBuffer, ppbBuffer);
458
459 if (cbRemoved >= cbData)
460 cbRemoved = pdmacFileCacheReplace(pCache, cbData, NULL, fReuseBuffer, ppbBuffer);
461 }
462 }
463
464 return cbRemoved;
465}
466
467/**
468 * Updates the cache parameters
469 *
470 * @returns nothing.
471 * @param pCache The global cache data.
472 * @param pEntry The entry usign for the update.
473 */
474static void pdmacFileCacheUpdate(PPDMACFILECACHEGLOBAL pCache, PPDMACFILECACHEENTRY pEntry)
475{
476 int32_t uUpdateVal = 0;
477
478 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
479
480 /* Update parameters */
481 if (pEntry->pList == &pCache->LruRecentlyGhost)
482 {
483 if (pCache->LruRecentlyGhost.cbCached >= pCache->LruFrequentlyGhost.cbCached)
484 uUpdateVal = 1;
485 else
486 uUpdateVal = pCache->LruFrequentlyGhost.cbCached / pCache->LruRecentlyGhost.cbCached;
487
488 pCache->uAdaptVal = RT_MIN(pCache->uAdaptVal + uUpdateVal, pCache->cbMax);
489 }
490 else if (pEntry->pList == &pCache->LruFrequentlyGhost)
491 {
492 if (pCache->LruFrequentlyGhost.cbCached >= pCache->LruRecentlyGhost.cbCached)
493 uUpdateVal = 1;
494 else
495 uUpdateVal = pCache->LruRecentlyGhost.cbCached / pCache->LruFrequentlyGhost.cbCached;
496
497 pCache->uAdaptVal = RT_MIN(pCache->uAdaptVal - uUpdateVal, 0);
498 }
499 else
500 AssertMsgFailed(("Invalid list type\n"));
501}
502#endif
503
504/**
505 * Initiates a read I/O task for the given entry.
506 *
507 * @returns nothing.
508 * @param pEntry The entry to fetch the data to.
509 */
510static void pdmacFileCacheReadFromEndpoint(PPDMACFILECACHEENTRY pEntry)
511{
512 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
513
514 /* Make sure no one evicts the entry while it is accessed. */
515 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
516
517 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
518 AssertPtr(pIoTask);
519
520 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
521
522 pIoTask->pEndpoint = pEntry->pEndpoint;
523 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
524 pIoTask->Off = pEntry->Core.Key;
525 pIoTask->DataSeg.cbSeg = pEntry->cbData;
526 pIoTask->DataSeg.pvSeg = pEntry->pbData;
527 pIoTask->pvUser = pEntry;
528 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
529
530 /* Send it off to the I/O manager. */
531 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
532}
533
534/**
535 * Initiates a write I/O task for the given entry.
536 *
537 * @returns nothing.
538 * @param pEntry The entry to read the data from.
539 */
540static void pdmacFileCacheWriteToEndpoint(PPDMACFILECACHEENTRY pEntry)
541{
542 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
543
544 /* Make sure no one evicts the entry while it is accessed. */
545 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
546
547 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
548 AssertPtr(pIoTask);
549
550 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
551
552 pIoTask->pEndpoint = pEntry->pEndpoint;
553 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
554 pIoTask->Off = pEntry->Core.Key;
555 pIoTask->DataSeg.cbSeg = pEntry->cbData;
556 pIoTask->DataSeg.pvSeg = pEntry->pbData;
557 pIoTask->pvUser = pEntry;
558 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
559
560 /* Send it off to the I/O manager. */
561 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
562}
563
564/**
565 * Completes a task segment freeing all ressources and completes the task handle
566 * if everything was transfered.
567 *
568 * @returns Next task segment handle.
569 * @param pEndpointCache The endpoint cache.
570 * @param pTaskSeg Task segment to complete.
571 */
572static PPDMACFILETASKSEG pdmacFileCacheTaskComplete(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILETASKSEG pTaskSeg)
573{
574 PPDMACFILETASKSEG pNext = pTaskSeg->pNext;
575
576 uint32_t uOld = ASMAtomicSubU32(&pTaskSeg->pTask->cbTransferLeft, pTaskSeg->cbTransfer);
577 AssertMsg(uOld >= pTaskSeg->cbTransfer, ("New value would overflow\n"));
578 if (!(uOld - pTaskSeg->cbTransfer)
579 && !ASMAtomicXchgBool(&pTaskSeg->pTask->fCompleted, true))
580 {
581 pdmR3AsyncCompletionCompleteTask(&pTaskSeg->pTask->Core);
582
583 if (pTaskSeg->fWrite)
584 {
585 /* Complete a pending flush if all writes have completed */
586 uint32_t cWritesOutstanding = ASMAtomicDecU32(&pEndpointCache->cWritesOutstanding);
587 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
588
589 if (!cWritesOutstanding && pTaskFlush)
590 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core);
591 }
592 }
593
594 RTMemFree(pTaskSeg);
595
596 return pNext;
597}
598
599/**
600 * Completion callback for I/O tasks.
601 *
602 * @returns nothing.
603 * @param pTask The completed task.
604 * @param pvUser Opaque user data.
605 */
606static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
607{
608 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pvUser;
609 PPDMACFILECACHEGLOBAL pCache = pEntry->pCache;
610 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pEntry->pEndpoint;
611 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
612
613 /* Reference the entry now as we are clearing the I/O in progres flag
614 * which protects the entry till now. */
615 pdmacFileEpCacheEntryRef(pEntry);
616
617 RTSemRWRequestWrite(pEndpoint->DataCache.SemRWEntries, RT_INDEFINITE_WAIT);
618 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
619
620 /* Process waiting segment list. The data in entry might have changed inbetween. */
621 PPDMACFILETASKSEG pCurr = pEntry->pWaitingHead;
622
623 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
624 ("The list tail was not updated correctly\n"));
625 pEntry->pWaitingTail = NULL;
626 pEntry->pWaitingHead = NULL;
627
628 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
629 {
630 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED)
631 {
632 AssertMsg(!pCurr, ("The entry is deprecated but has waiting write segments attached\n"));
633
634 RTMemPageFree(pEntry->pbData);
635 pEntry->pbData = pEntry->pbDataReplace;
636 pEntry->pbDataReplace = NULL;
637 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DEPRECATED;
638 }
639 else
640 {
641 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DIRTY;
642
643 while (pCurr)
644 {
645 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
646
647 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
648 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
649
650 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
651 }
652 }
653 }
654 else
655 {
656 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_READ, ("Invalid transfer type\n"));
657 AssertMsg(!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY),("Invalid flags set\n"));
658
659 while (pCurr)
660 {
661 if (pCurr->fWrite)
662 {
663 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
664 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
665 }
666 else
667 memcpy(pCurr->pvBuf, pEntry->pbData + pCurr->uBufOffset, pCurr->cbTransfer);
668
669 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
670 }
671 }
672
673 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
674 pdmacFileCacheWriteToEndpoint(pEntry);
675
676 RTSemRWReleaseWrite(pEndpoint->DataCache.SemRWEntries);
677
678 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
679 pdmacFileEpCacheEntryRelease(pEntry);
680}
681
682/**
683 * Initializies the I/O cache.
684 *
685 * returns VBox status code.
686 * @param pClassFile The global class data for file endpoints.
687 * @param pCfgNode CFGM node to query configuration data from.
688 */
689int pdmacFileCacheInit(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile, PCFGMNODE pCfgNode)
690{
691 int rc = VINF_SUCCESS;
692 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
693
694 rc = CFGMR3QueryU32Def(pCfgNode, "CacheSize", &pCache->cbMax, 5 * _1M);
695 AssertLogRelRCReturn(rc, rc);
696
697 pCache->cbCached = 0;
698 LogFlowFunc((": Maximum number of bytes cached %u\n", pCache->cbMax));
699
700 /* Initialize members */
701#ifdef VBOX_WITH_2Q_CACHE
702 pCache->LruRecentlyUsedIn.pHead = NULL;
703 pCache->LruRecentlyUsedIn.pTail = NULL;
704 pCache->LruRecentlyUsedIn.cbCached = 0;
705
706 pCache->LruRecentlyUsedOut.pHead = NULL;
707 pCache->LruRecentlyUsedOut.pTail = NULL;
708 pCache->LruRecentlyUsedOut.cbCached = 0;
709
710 pCache->LruFrequentlyUsed.pHead = NULL;
711 pCache->LruFrequentlyUsed.pTail = NULL;
712 pCache->LruFrequentlyUsed.cbCached = 0;
713
714 pCache->cbRecentlyUsedInMax = (pCache->cbMax / 100) * 25; /* 25% of the buffer size */
715 pCache->cbRecentlyUsedOutMax = (pCache->cbMax / 100) * 50; /* 50% of the buffer size */
716 LogFlowFunc((": cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n", pCache->cbRecentlyUsedInMax, pCache->cbRecentlyUsedOutMax));
717#else
718 pCache->LruRecentlyUsed.pHead = NULL;
719 pCache->LruRecentlyUsed.pTail = NULL;
720 pCache->LruRecentlyUsed.cbCached = 0;
721
722 pCache->LruFrequentlyUsed.pHead = NULL;
723 pCache->LruFrequentlyUsed.pTail = NULL;
724 pCache->LruFrequentlyUsed.cbCached = 0;
725
726 pCache->LruRecentlyGhost.pHead = NULL;
727 pCache->LruRecentlyGhost.pTail = NULL;
728 pCache->LruRecentlyGhost.cbCached = 0;
729
730 pCache->LruFrequentlyGhost.pHead = NULL;
731 pCache->LruFrequentlyGhost.pTail = NULL;
732 pCache->LruFrequentlyGhost.cbCached = 0;
733
734 pCache->uAdaptVal = 0;
735#endif
736
737 STAMR3Register(pClassFile->Core.pVM, &pCache->cbMax,
738 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
739 "/PDM/AsyncCompletion/File/cbMax",
740 STAMUNIT_BYTES,
741 "Maximum cache size");
742 STAMR3Register(pClassFile->Core.pVM, &pCache->cbCached,
743 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
744 "/PDM/AsyncCompletion/File/cbCached",
745 STAMUNIT_BYTES,
746 "Currently used cache");
747#ifdef VBOX_WITH_2Q_CACHE
748 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedIn.cbCached,
749 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
750 "/PDM/AsyncCompletion/File/cbCachedMruIn",
751 STAMUNIT_BYTES,
752 "Number of bytes cached in MRU list");
753 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedOut.cbCached,
754 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
755 "/PDM/AsyncCompletion/File/cbCachedMruOut",
756 STAMUNIT_BYTES,
757 "Number of bytes cached in FRU list");
758 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
759 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
760 "/PDM/AsyncCompletion/File/cbCachedFru",
761 STAMUNIT_BYTES,
762 "Number of bytes cached in FRU ghost list");
763#else
764 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsed.cbCached,
765 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
766 "/PDM/AsyncCompletion/File/cbCachedMru",
767 STAMUNIT_BYTES,
768 "Number of bytes cached in Mru list");
769 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
770 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
771 "/PDM/AsyncCompletion/File/cbCachedFru",
772 STAMUNIT_BYTES,
773 "Number of bytes cached in Fru list");
774 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyGhost.cbCached,
775 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
776 "/PDM/AsyncCompletion/File/cbCachedMruGhost",
777 STAMUNIT_BYTES,
778 "Number of bytes cached in Mru ghost list");
779 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyGhost.cbCached,
780 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
781 "/PDM/AsyncCompletion/File/cbCachedFruGhost",
782 STAMUNIT_BYTES, "Number of bytes cached in Fru ghost list");
783#endif
784
785#ifdef VBOX_WITH_STATISTICS
786 STAMR3Register(pClassFile->Core.pVM, &pCache->cHits,
787 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
788 "/PDM/AsyncCompletion/File/CacheHits",
789 STAMUNIT_COUNT, "Number of hits in the cache");
790 STAMR3Register(pClassFile->Core.pVM, &pCache->cPartialHits,
791 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
792 "/PDM/AsyncCompletion/File/CachePartialHits",
793 STAMUNIT_COUNT, "Number of partial hits in the cache");
794 STAMR3Register(pClassFile->Core.pVM, &pCache->cMisses,
795 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
796 "/PDM/AsyncCompletion/File/CacheMisses",
797 STAMUNIT_COUNT, "Number of misses when accessing the cache");
798 STAMR3Register(pClassFile->Core.pVM, &pCache->StatRead,
799 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
800 "/PDM/AsyncCompletion/File/CacheRead",
801 STAMUNIT_BYTES, "Number of bytes read from the cache");
802 STAMR3Register(pClassFile->Core.pVM, &pCache->StatWritten,
803 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
804 "/PDM/AsyncCompletion/File/CacheWritten",
805 STAMUNIT_BYTES, "Number of bytes written to the cache");
806 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeGet,
807 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
808 "/PDM/AsyncCompletion/File/CacheTreeGet",
809 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
810 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeInsert,
811 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
812 "/PDM/AsyncCompletion/File/CacheTreeInsert",
813 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
814 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeRemove,
815 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
816 "/PDM/AsyncCompletion/File/CacheTreeRemove",
817 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
818 STAMR3Register(pClassFile->Core.pVM, &pCache->StatBuffersReused,
819 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
820 "/PDM/AsyncCompletion/File/CacheBuffersReused",
821 STAMUNIT_COUNT, "Number of times a buffer could be reused");
822#ifndef VBOX_WITH_2Q_CACHE
823 STAMR3Register(pClassFile->Core.pVM, &pCache->uAdaptVal,
824 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
825 "/PDM/AsyncCompletion/File/CacheAdaptValue",
826 STAMUNIT_COUNT,
827 "Adaption value of the cache");
828#endif
829#endif
830
831 /* Initialize the critical section */
832 rc = RTCritSectInit(&pCache->CritSect);
833
834 if (RT_SUCCESS(rc))
835 LogRel(("AIOMgr: Cache successfully initialised. Cache size is %u bytes\n", pCache->cbMax));
836
837 return rc;
838}
839
840/**
841 * Destroysthe cache freeing all data.
842 *
843 * returns nothing.
844 * @param pClassFile The global class data for file endpoints.
845 */
846void pdmacFileCacheDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
847{
848 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
849
850 /* Make sure no one else uses the cache now */
851 RTCritSectEnter(&pCache->CritSect);
852
853#ifdef VBOX_WITH_2Q_CACHE
854 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
855 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedIn);
856 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedOut);
857 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
858#else
859 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
860 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsed);
861 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
862 pdmacFileCacheDestroyList(&pCache->LruRecentlyGhost);
863 pdmacFileCacheDestroyList(&pCache->LruFrequentlyGhost);
864#endif
865
866 RTCritSectLeave(&pCache->CritSect);
867
868 RTCritSectDelete(&pCache->CritSect);
869}
870
871/**
872 * Initializes per endpoint cache data
873 * like the AVL tree used to access cached entries.
874 *
875 * @returns VBox status code.
876 * @param pEndpoint The endpoint to init the cache for,
877 * @param pClassFile The global class data for file endpoints.
878 */
879int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
880{
881 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
882
883 pEndpointCache->pCache = &pClassFile->Cache;
884
885 int rc = RTSemRWCreate(&pEndpointCache->SemRWEntries);
886 if (RT_SUCCESS(rc))
887 {
888 pEndpointCache->pTree = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
889 if (!pEndpointCache->pTree)
890 {
891 rc = VERR_NO_MEMORY;
892 RTSemRWDestroy(pEndpointCache->SemRWEntries);
893 }
894 }
895
896#ifdef VBOX_WITH_STATISTICS
897 if (RT_SUCCESS(rc))
898 {
899 STAMR3RegisterF(pClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred,
900 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
901 STAMUNIT_COUNT, "Number of deferred writes",
902 "/PDM/AsyncCompletion/File/%s/Cache/DeferredWrites", RTPathFilename(pEndpoint->Core.pszUri));
903 }
904#endif
905
906 return rc;
907}
908
909/**
910 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
911 *
912 * @returns IPRT status code.
913 * @param pNode The node to destroy.
914 * @param pvUser Opaque user data.
915 */
916static int pdmacFileEpCacheEntryDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
917{
918 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pNode;
919 PPDMACFILECACHEGLOBAL pCache = (PPDMACFILECACHEGLOBAL)pvUser;
920 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEntry->pEndpoint->DataCache;
921
922 while (pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY))
923 {
924 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
925 RTThreadSleep(250);
926 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
927 }
928
929 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
930 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
931
932 pdmacFileCacheEntryRemoveFromList(pEntry);
933 pCache->cbCached -= pEntry->cbData;
934
935 RTMemPageFree(pEntry->pbData);
936 RTMemFree(pEntry);
937
938 return VINF_SUCCESS;
939}
940
941/**
942 * Destroys all cache ressources used by the given endpoint.
943 *
944 * @returns nothing.
945 * @param pEndpoint The endpoint to the destroy.
946 */
947void pdmacFileEpCacheDestroy(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
948{
949 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
950 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
951
952 /* Make sure nobody is accessing the cache while we delete the tree. */
953 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
954 RTCritSectEnter(&pCache->CritSect);
955 RTAvlrFileOffsetDestroy(pEndpointCache->pTree, pdmacFileEpCacheEntryDestroy, pCache);
956 RTCritSectLeave(&pCache->CritSect);
957 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
958
959 RTSemRWDestroy(pEndpointCache->SemRWEntries);
960
961#ifdef VBOX_WITH_STATISTICS
962 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
963
964 STAMR3Deregister(pEpClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred);
965#endif
966}
967
968static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
969{
970 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
971 PPDMACFILECACHEENTRY pEntry = NULL;
972
973 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
974
975 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
976 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetRangeGet(pEndpointCache->pTree, off);
977 if (pEntry)
978 pdmacFileEpCacheEntryRef(pEntry);
979 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
980
981 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
982
983 return pEntry;
984}
985
986static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheBestFitEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
987{
988 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
989 PPDMACFILECACHEENTRY pEntry = NULL;
990
991 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
992
993 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
994 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetGetBestFit(pEndpointCache->pTree, off, true);
995 if (pEntry)
996 pdmacFileEpCacheEntryRef(pEntry);
997 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
998
999 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1000
1001 return pEntry;
1002}
1003
1004static void pdmacFileEpCacheInsertEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
1005{
1006 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1007
1008 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1009 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1010 bool fInserted = RTAvlrFileOffsetInsert(pEndpointCache->pTree, &pEntry->Core);
1011 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1012 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1013 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1014}
1015
1016/**
1017 * Allocates and initializes a new entry for the cache.
1018 * The entry has a reference count of 1.
1019 *
1020 * @returns Pointer to the new cache entry or NULL if out of memory.
1021 * @param pCache The cache the entry belongs to.
1022 * @param pEndoint The endpoint the entry holds data for.
1023 * @param off Start offset.
1024 * @param cbData Size of the cache entry.
1025 * @param pbBuffer Pointer to the buffer to use.
1026 * NULL if a new buffer should be allocated.
1027 * The buffer needs to have the same size of the entry.
1028 */
1029static PPDMACFILECACHEENTRY pdmacFileCacheEntryAlloc(PPDMACFILECACHEGLOBAL pCache,
1030 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1031 RTFOFF off, size_t cbData, uint8_t *pbBuffer)
1032{
1033 PPDMACFILECACHEENTRY pEntryNew = (PPDMACFILECACHEENTRY)RTMemAllocZ(sizeof(PDMACFILECACHEENTRY));
1034
1035 if (RT_UNLIKELY(!pEntryNew))
1036 return NULL;
1037
1038 pEntryNew->Core.Key = off;
1039 pEntryNew->Core.KeyLast = off + cbData - 1;
1040 pEntryNew->pEndpoint = pEndpoint;
1041 pEntryNew->pCache = pCache;
1042 pEntryNew->fFlags = 0;
1043 pEntryNew->cRefs = 1; /* We are using it now. */
1044 pEntryNew->pList = NULL;
1045 pEntryNew->cbData = cbData;
1046 pEntryNew->pWaitingHead = NULL;
1047 pEntryNew->pWaitingTail = NULL;
1048 pEntryNew->pbDataReplace = NULL;
1049 if (pbBuffer)
1050 pEntryNew->pbData = pbBuffer;
1051 else
1052 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1053
1054 if (RT_UNLIKELY(!pEntryNew->pbData))
1055 {
1056 RTMemFree(pEntryNew);
1057 return NULL;
1058 }
1059
1060 return pEntryNew;
1061}
1062
1063/**
1064 * Adds a segment to the waiting list for a cache entry
1065 * which is currently in progress.
1066 *
1067 * @returns nothing.
1068 * @param pEntry The cache entry to add the segment to.
1069 * @param pSeg The segment to add.
1070 */
1071DECLINLINE(void) pdmacFileEpCacheEntryAddWaitingSegment(PPDMACFILECACHEENTRY pEntry, PPDMACFILETASKSEG pSeg)
1072{
1073 pSeg->pNext = NULL;
1074
1075 if (pEntry->pWaitingHead)
1076 {
1077 AssertPtr(pEntry->pWaitingTail);
1078
1079 pEntry->pWaitingTail->pNext = pSeg;
1080 pEntry->pWaitingTail = pSeg;
1081 }
1082 else
1083 {
1084 Assert(!pEntry->pWaitingTail);
1085
1086 pEntry->pWaitingHead = pSeg;
1087 pEntry->pWaitingTail = pSeg;
1088 }
1089}
1090
1091/**
1092 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1093 * in exclusive mode.
1094 *
1095 * @returns true if the flag in fSet is set and the one in fClear is clear.
1096 * false othwerise.
1097 * The R/W semaphore is only held if true is returned.
1098 *
1099 * @param pEndpointCache The endpoint cache instance data.
1100 * @param pEntry The entry to check the flags for.
1101 * @param fSet The flag which is tested to be set.
1102 * @param fClear The flag which is tested to be clear.
1103 */
1104DECLINLINE(bool) pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(PPDMACFILEENDPOINTCACHE pEndpointCache,
1105 PPDMACFILECACHEENTRY pEntry,
1106 uint32_t fSet, uint32_t fClear)
1107{
1108 bool fPassed = ((pEntry->fFlags & fSet) && !(pEntry->fFlags & fClear));
1109
1110 if (fPassed)
1111 {
1112 /* Acquire the lock and check again becuase the completion callback might have raced us. */
1113 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1114
1115 fPassed = ((pEntry->fFlags & fSet) && !(pEntry->fFlags & fClear));
1116
1117 /* Drop the lock if we didn't passed the test. */
1118 if (!fPassed)
1119 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1120 }
1121
1122 return fPassed;
1123}
1124
1125/**
1126 * Advances the current segment buffer by the number of bytes transfered
1127 * or gets the next segment.
1128 */
1129#define ADVANCE_SEGMENT_BUFFER(BytesTransfered) \
1130 do \
1131 { \
1132 cbSegLeft -= BytesTransfered; \
1133 if (!cbSegLeft) \
1134 { \
1135 iSegCurr++; \
1136 cbSegLeft = paSegments[iSegCurr].cbSeg; \
1137 pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg; \
1138 } \
1139 else \
1140 pbSegBuf += BytesTransfered; \
1141 } \
1142 while (0)
1143
1144/**
1145 * Reads the specified data from the endpoint using the cache if possible.
1146 *
1147 * @returns VBox status code.
1148 * @param pEndpoint The endpoint to read from.
1149 * @param pTask The task structure used as identifier for this request.
1150 * @param off The offset to start reading from.
1151 * @param paSegments Pointer to the array holding the destination buffers.
1152 * @param cSegments Number of segments in the array.
1153 * @param cbRead Number of bytes to read.
1154 */
1155int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1156 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1157 size_t cbRead)
1158{
1159 int rc = VINF_SUCCESS;
1160 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1161 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1162 PPDMACFILECACHEENTRY pEntry;
1163
1164 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbRead=%u\n",
1165 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbRead));
1166
1167 pTask->cbTransferLeft = cbRead;
1168 /* Set to completed to make sure that the task is valid while we access it. */
1169 ASMAtomicWriteBool(&pTask->fCompleted, true);
1170
1171 int iSegCurr = 0;
1172 uint8_t *pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg;
1173 size_t cbSegLeft = paSegments[iSegCurr].cbSeg;
1174
1175 while (cbRead)
1176 {
1177 size_t cbToRead;
1178
1179 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1180
1181 /*
1182 * If there is no entry we try to create a new one eviciting unused pages
1183 * if the cache is full. If this is not possible we will pass the request through
1184 * and skip the caching (all entries may be still in progress so they can't
1185 * be evicted)
1186 * If we have an entry it can be in one of the LRU lists where the entry
1187 * contains data (recently used or frequently used LRU) so we can just read
1188 * the data we need and put the entry at the head of the frequently used LRU list.
1189 * In case the entry is in one of the ghost lists it doesn't contain any data.
1190 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1191 */
1192 if (pEntry)
1193 {
1194 RTFOFF OffDiff = off - pEntry->Core.Key;
1195
1196 AssertMsg(off >= pEntry->Core.Key,
1197 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1198 off, pEntry->Core.Key));
1199
1200 AssertPtr(pEntry->pList);
1201
1202 cbToRead = RT_MIN(pEntry->cbData - OffDiff, cbRead);
1203 cbRead -= cbToRead;
1204
1205 if (!cbRead)
1206 STAM_COUNTER_INC(&pCache->cHits);
1207 else
1208 STAM_COUNTER_INC(&pCache->cPartialHits);
1209
1210 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1211
1212 /* Ghost lists contain no data. */
1213#ifdef VBOX_WITH_2Q_CACHE
1214 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1215 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1216 {
1217#else
1218 if ( (pEntry->pList == &pCache->LruRecentlyUsed)
1219 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1220 {
1221#endif
1222 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1223 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1224 0))
1225 {
1226 /* Entry is deprecated. Read data from the new buffer. */
1227 while (cbToRead)
1228 {
1229 size_t cbCopy = RT_MIN(cbSegLeft, cbToRead);
1230
1231 memcpy(pbSegBuf, pEntry->pbDataReplace + OffDiff, cbCopy);
1232
1233 ADVANCE_SEGMENT_BUFFER(cbCopy);
1234
1235 cbToRead -= cbCopy;
1236 off += cbCopy;
1237 OffDiff += cbCopy;
1238 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1239 }
1240 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1241 }
1242 else
1243 {
1244 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1245 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1246 PDMACFILECACHE_ENTRY_IS_DIRTY))
1247 {
1248 /* Entry didn't completed yet. Append to the list */
1249 while (cbToRead)
1250 {
1251 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1252
1253 pSeg->pTask = pTask;
1254 pSeg->uBufOffset = OffDiff;
1255 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1256 pSeg->pvBuf = pbSegBuf;
1257 pSeg->fWrite = false;
1258
1259 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1260
1261 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1262
1263 off += pSeg->cbTransfer;
1264 cbToRead -= pSeg->cbTransfer;
1265 OffDiff += pSeg->cbTransfer;
1266 }
1267 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1268 }
1269 else
1270 {
1271 /* Read as much as we can from the entry. */
1272 while (cbToRead)
1273 {
1274 size_t cbCopy = RT_MIN(cbSegLeft, cbToRead);
1275
1276 memcpy(pbSegBuf, pEntry->pbData + OffDiff, cbCopy);
1277
1278 ADVANCE_SEGMENT_BUFFER(cbCopy);
1279
1280 cbToRead -= cbCopy;
1281 off += cbCopy;
1282 OffDiff += cbCopy;
1283 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1284 }
1285 }
1286 }
1287
1288 /* Move this entry to the top position */
1289#ifdef VBOX_WITH_2Q_CACHE
1290 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1291 {
1292 RTCritSectEnter(&pCache->CritSect);
1293 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1294 RTCritSectLeave(&pCache->CritSect);
1295 }
1296#else
1297 RTCritSectEnter(&pCache->CritSect);
1298 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1299 RTCritSectLeave(&pCache->CritSect);
1300#endif
1301 }
1302 else
1303 {
1304 uint8_t *pbBuffer = NULL;
1305
1306 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1307
1308#ifdef VBOX_WITH_2Q_CACHE
1309 RTCritSectEnter(&pCache->CritSect);
1310 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1311 pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1312
1313 /* Move the entry to Am and fetch it to the cache. */
1314 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1315 RTCritSectLeave(&pCache->CritSect);
1316#else
1317 RTCritSectEnter(&pCache->CritSect);
1318 pdmacFileCacheUpdate(pCache, pEntry);
1319 pdmacFileCacheReplace(pCache, pEntry->cbData, pEntry->pList, true, &pbBuffer);
1320
1321 /* Move the entry to T2 and fetch it to the cache. */
1322 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1323 RTCritSectLeave(&pCache->CritSect);
1324#endif
1325
1326 if (pbBuffer)
1327 pEntry->pbData = pbBuffer;
1328 else
1329 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1330 AssertPtr(pEntry->pbData);
1331
1332 while (cbToRead)
1333 {
1334 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1335
1336 AssertMsg(off >= pEntry->Core.Key,
1337 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1338 off, pEntry->Core.Key));
1339
1340 pSeg->pTask = pTask;
1341 pSeg->uBufOffset = OffDiff;
1342 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1343 pSeg->pvBuf = pbSegBuf;
1344
1345 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1346
1347 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1348
1349 off += pSeg->cbTransfer;
1350 OffDiff += pSeg->cbTransfer;
1351 cbToRead -= pSeg->cbTransfer;
1352 }
1353
1354 pdmacFileCacheReadFromEndpoint(pEntry);
1355 }
1356 pdmacFileEpCacheEntryRelease(pEntry);
1357 }
1358 else
1359 {
1360 /* No entry found for this offset. Get best fit entry and fetch the data to the cache. */
1361 size_t cbToReadAligned;
1362 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1363
1364 LogFlow(("%sbest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1365 pEntryBestFit ? "" : "No ",
1366 off,
1367 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1368 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1369 pEntryBestFit ? pEntryBestFit->cbData : 0));
1370
1371 if (pEntryBestFit && ((off + (RTFOFF)cbRead) > pEntryBestFit->Core.Key))
1372 {
1373 cbToRead = pEntryBestFit->Core.Key - off;
1374 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1375 cbToReadAligned = cbToRead;
1376 }
1377 else
1378 {
1379 if (pEntryBestFit)
1380 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1381
1382 /*
1383 * Align the size to a 4KB boundary.
1384 * Memory size is aligned to a page boundary
1385 * and memory is wasted if the size is rahter small.
1386 * (For example reads with a size of 512 bytes.
1387 */
1388 cbToRead = cbRead;
1389 cbToReadAligned = RT_ALIGN_Z(cbRead, PAGE_SIZE);
1390
1391 /* Clip read to file size */
1392 cbToReadAligned = RT_MIN(pEndpoint->cbFile - off, cbToReadAligned);
1393 if (pEntryBestFit)
1394 cbToReadAligned = RT_MIN(cbToReadAligned, pEntryBestFit->Core.Key - off);
1395 }
1396
1397 cbRead -= cbToRead;
1398
1399 if (!cbRead)
1400 STAM_COUNTER_INC(&pCache->cMisses);
1401 else
1402 STAM_COUNTER_INC(&pCache->cPartialHits);
1403
1404 uint8_t *pbBuffer = NULL;
1405
1406#ifdef VBOX_WITH_2Q_CACHE
1407 RTCritSectEnter(&pCache->CritSect);
1408 bool fEnough = pdmacFileCacheReclaim(pCache, cbToReadAligned, true, &pbBuffer);
1409 RTCritSectLeave(&pCache->CritSect);
1410
1411 if (fEnough)
1412 {
1413 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToReadAligned));
1414#else
1415 RTCritSectEnter(&pCache->CritSect);
1416 size_t cbRemoved = pdmacFileCacheEvict(pCache, cbToReadAligned, true, &pbBuffer);
1417 RTCritSectLeave(&pCache->CritSect);
1418
1419 if (cbRemoved >= cbToReadAligned)
1420 {
1421 LogFlow(("Evicted %u bytes (%u requested). Creating new cache entry\n", cbRemoved, cbToReadAligned));
1422#endif
1423 PPDMACFILECACHEENTRY pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToReadAligned, pbBuffer);
1424 AssertPtr(pEntryNew);
1425
1426 RTCritSectEnter(&pCache->CritSect);
1427#ifdef VBOX_WITH_2Q_CACHE
1428 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1429#else
1430 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsed, pEntryNew);
1431#endif
1432 pCache->cbCached += cbToReadAligned;
1433 RTCritSectLeave(&pCache->CritSect);
1434
1435 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1436 uint32_t uBufOffset = 0;
1437
1438 while (cbToRead)
1439 {
1440 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1441
1442 pSeg->pTask = pTask;
1443 pSeg->uBufOffset = uBufOffset;
1444 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1445 pSeg->pvBuf = pbSegBuf;
1446
1447 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1448
1449 pdmacFileEpCacheEntryAddWaitingSegment(pEntryNew, pSeg);
1450
1451 off += pSeg->cbTransfer;
1452 cbToRead -= pSeg->cbTransfer;
1453 uBufOffset += pSeg->cbTransfer;
1454 }
1455
1456 pdmacFileCacheReadFromEndpoint(pEntryNew);
1457 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1458 }
1459 else
1460 {
1461 /*
1462 * There is not enough free space in the cache.
1463 * Pass the request directly to the I/O manager.
1464 */
1465 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1466
1467 while (cbToRead)
1468 {
1469 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1470 AssertPtr(pIoTask);
1471
1472 pIoTask->pEndpoint = pEndpoint;
1473 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
1474 pIoTask->Off = off;
1475 pIoTask->DataSeg.cbSeg = RT_MIN(cbToRead, cbSegLeft);
1476 pIoTask->DataSeg.pvSeg = pbSegBuf;
1477 pIoTask->pvUser = pTask;
1478 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1479
1480 off += pIoTask->DataSeg.cbSeg;
1481 cbToRead -= pIoTask->DataSeg.cbSeg;
1482
1483 ADVANCE_SEGMENT_BUFFER(pIoTask->DataSeg.cbSeg);
1484
1485 /* Send it off to the I/O manager. */
1486 pdmacFileEpAddTask(pEndpoint, pIoTask);
1487 }
1488 }
1489 }
1490 }
1491
1492 ASMAtomicWriteBool(&pTask->fCompleted, false);
1493
1494 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1495 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1496 pdmR3AsyncCompletionCompleteTask(&pTask->Core);
1497
1498 return rc;
1499}
1500
1501/**
1502 * Writes the given data to the endpoint using the cache if possible.
1503 *
1504 * @returns VBox status code.
1505 * @param pEndpoint The endpoint to write to.
1506 * @param pTask The task structure used as identifier for this request.
1507 * @param off The offset to start writing to
1508 * @param paSegments Pointer to the array holding the source buffers.
1509 * @param cSegments Number of segments in the array.
1510 * @param cbWrite Number of bytes to write.
1511 */
1512int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1513 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1514 size_t cbWrite)
1515{
1516 int rc = VINF_SUCCESS;
1517 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1518 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1519 PPDMACFILECACHEENTRY pEntry;
1520
1521 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbWrite=%u\n",
1522 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbWrite));
1523
1524 pTask->cbTransferLeft = cbWrite;
1525 /* Set to completed to make sure that the task is valid while we access it. */
1526 ASMAtomicWriteBool(&pTask->fCompleted, true);
1527 ASMAtomicIncU32(&pEndpointCache->cWritesOutstanding);
1528
1529 int iSegCurr = 0;
1530 uint8_t *pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg;
1531 size_t cbSegLeft = paSegments[iSegCurr].cbSeg;
1532
1533 while (cbWrite)
1534 {
1535 size_t cbToWrite;
1536
1537 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1538
1539 if (pEntry)
1540 {
1541 /* Write the data into the entry and mark it as dirty */
1542 AssertPtr(pEntry->pList);
1543
1544 RTFOFF OffDiff = off - pEntry->Core.Key;
1545
1546 AssertMsg(off >= pEntry->Core.Key,
1547 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1548 off, pEntry->Core.Key));
1549
1550 cbToWrite = RT_MIN(pEntry->cbData - OffDiff, cbWrite);
1551 cbWrite -= cbToWrite;
1552
1553 if (!cbWrite)
1554 STAM_COUNTER_INC(&pCache->cHits);
1555 else
1556 STAM_COUNTER_INC(&pCache->cPartialHits);
1557
1558 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1559
1560 /* Ghost lists contain no data. */
1561#ifdef VBOX_WITH_2Q_CACHE
1562 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1563 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1564#else
1565 if ( (pEntry->pList == &pCache->LruRecentlyUsed)
1566 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1567#endif
1568 {
1569 /* Check if the buffer is deprecated. */
1570 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1571 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1572 0))
1573 {
1574 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1575 ("Entry is deprecated but not in progress\n"));
1576 AssertPtr(pEntry->pbDataReplace);
1577
1578 LogFlow(("Writing to deprecated buffer of entry %#p\n", pEntry));
1579
1580 /* Update the data from the write. */
1581 while (cbToWrite)
1582 {
1583 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1584
1585 memcpy(pEntry->pbDataReplace + OffDiff, pbSegBuf, cbCopy);
1586
1587 ADVANCE_SEGMENT_BUFFER(cbCopy);
1588
1589 cbToWrite-= cbCopy;
1590 off += cbCopy;
1591 OffDiff += cbCopy;
1592 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1593 }
1594 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1595 }
1596 else /* Deprecated flag not set */
1597 {
1598 /* If the entry is dirty it must be also in progress now and we have to defer updating it again. */
1599 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1600 PDMACFILECACHE_ENTRY_IS_DIRTY,
1601 0))
1602 {
1603 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1604 ("Entry is dirty but not in progress\n"));
1605 Assert(!pEntry->pbDataReplace);
1606
1607 /* Deprecate the current buffer. */
1608 if (!pEntry->pWaitingHead)
1609 pEntry->pbDataReplace = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1610
1611 /* If we are out of memory or have waiting segments
1612 * defer the write. */
1613 if (!pEntry->pbDataReplace || pEntry->pWaitingHead)
1614 {
1615 /* The data isn't written to the file yet */
1616 while (cbToWrite)
1617 {
1618 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1619
1620 pSeg->pTask = pTask;
1621 pSeg->uBufOffset = OffDiff;
1622 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1623 pSeg->pvBuf = pbSegBuf;
1624 pSeg->fWrite = true;
1625
1626 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1627
1628 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1629
1630 off += pSeg->cbTransfer;
1631 OffDiff += pSeg->cbTransfer;
1632 cbToWrite -= pSeg->cbTransfer;
1633 }
1634 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1635 }
1636 else /* Deprecate buffer */
1637 {
1638 LogFlow(("Deprecating buffer for entry %#p\n", pEntry));
1639 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DEPRECATED;
1640
1641#if 1
1642 /* Copy the data before the update. */
1643 if (OffDiff)
1644 memcpy(pEntry->pbDataReplace, pEntry->pbData, OffDiff);
1645
1646 /* Copy data behind the update. */
1647 if ((pEntry->cbData - OffDiff - cbToWrite) > 0)
1648 memcpy(pEntry->pbDataReplace + OffDiff + cbToWrite,
1649 pEntry->pbData + OffDiff + cbToWrite,
1650 (pEntry->cbData - OffDiff - cbToWrite));
1651#else
1652 /* A safer method but probably slower. */
1653 memcpy(pEntry->pbDataReplace, pEntry->pbData, pEntry->cbData);
1654#endif
1655
1656 /* Update the data from the write. */
1657 while (cbToWrite)
1658 {
1659 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1660
1661 memcpy(pEntry->pbDataReplace + OffDiff, pbSegBuf, cbCopy);
1662
1663 ADVANCE_SEGMENT_BUFFER(cbCopy);
1664
1665 cbToWrite-= cbCopy;
1666 off += cbCopy;
1667 OffDiff += cbCopy;
1668 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1669 }
1670
1671 /* We are done here. A new write is initiated if the current request completes. */
1672 }
1673
1674 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1675 }
1676 else /* Dirty bit not set */
1677 {
1678 /*
1679 * Check if a read is in progress for this entry.
1680 * We have to defer processing in that case.
1681 */
1682 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1683 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1684 0))
1685 {
1686 while (cbToWrite)
1687 {
1688 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1689
1690 pSeg->pTask = pTask;
1691 pSeg->uBufOffset = OffDiff;
1692 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1693 pSeg->pvBuf = pbSegBuf;
1694 pSeg->fWrite = true;
1695
1696 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1697
1698 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1699
1700 off += pSeg->cbTransfer;
1701 OffDiff += pSeg->cbTransfer;
1702 cbToWrite -= pSeg->cbTransfer;
1703 }
1704 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1705 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1706 }
1707 else /* I/O in progres flag not set */
1708 {
1709 /* Write as much as we can into the entry and update the file. */
1710 while (cbToWrite)
1711 {
1712 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1713
1714 memcpy(pEntry->pbData + OffDiff, pbSegBuf, cbCopy);
1715
1716 ADVANCE_SEGMENT_BUFFER(cbCopy);
1717
1718 cbToWrite-= cbCopy;
1719 off += cbCopy;
1720 OffDiff += cbCopy;
1721 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1722 }
1723
1724 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
1725 pdmacFileCacheWriteToEndpoint(pEntry);
1726 }
1727 } /* Dirty bit not set */
1728
1729 /* Move this entry to the top position */
1730#ifdef VBOX_WITH_2Q_CACHE
1731 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1732 {
1733 RTCritSectEnter(&pCache->CritSect);
1734 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1735 RTCritSectLeave(&pCache->CritSect);
1736 } /* Deprecated flag not set. */
1737#else
1738 RTCritSectEnter(&pCache->CritSect);
1739 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1740 RTCritSectLeave(&pCache->CritSect);
1741#endif
1742 }
1743 }
1744 else /* Entry is on the ghost list */
1745 {
1746 uint8_t *pbBuffer = NULL;
1747
1748#ifdef VBOX_WITH_2Q_CACHE
1749 RTCritSectEnter(&pCache->CritSect);
1750 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1751 pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1752
1753 /* Move the entry to Am and fetch it to the cache. */
1754 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1755 RTCritSectLeave(&pCache->CritSect);
1756#else
1757 RTCritSectEnter(&pCache->CritSect);
1758 pdmacFileCacheUpdate(pCache, pEntry);
1759 pdmacFileCacheReplace(pCache, pEntry->cbData, pEntry->pList, true, &pbBuffer);
1760
1761 /* Move the entry to T2 and fetch it to the cache. */
1762 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1763 RTCritSectLeave(&pCache->CritSect);
1764#endif
1765
1766 if (pbBuffer)
1767 pEntry->pbData = pbBuffer;
1768 else
1769 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1770 AssertPtr(pEntry->pbData);
1771
1772 while (cbToWrite)
1773 {
1774 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1775
1776 AssertMsg(off >= pEntry->Core.Key,
1777 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1778 off, pEntry->Core.Key));
1779
1780 pSeg->pTask = pTask;
1781 pSeg->uBufOffset = OffDiff;
1782 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1783 pSeg->pvBuf = pbSegBuf;
1784 pSeg->fWrite = true;
1785
1786 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1787
1788 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1789
1790 off += pSeg->cbTransfer;
1791 OffDiff += pSeg->cbTransfer;
1792 cbToWrite -= pSeg->cbTransfer;
1793 }
1794
1795 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1796 pdmacFileCacheReadFromEndpoint(pEntry);
1797 }
1798
1799 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
1800 pdmacFileEpCacheEntryRelease(pEntry);
1801 }
1802 else /* No entry found */
1803 {
1804 /*
1805 * No entry found. Try to create a new cache entry to store the data in and if that fails
1806 * write directly to the file.
1807 */
1808 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1809
1810 LogFlow(("%sest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1811 pEntryBestFit ? "B" : "No b",
1812 off,
1813 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1814 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1815 pEntryBestFit ? pEntryBestFit->cbData : 0));
1816
1817 if (pEntryBestFit && ((off + (RTFOFF)cbWrite) > pEntryBestFit->Core.Key))
1818 {
1819 cbToWrite = pEntryBestFit->Core.Key - off;
1820 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1821 }
1822 else
1823 {
1824 if (pEntryBestFit)
1825 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1826
1827 cbToWrite = cbWrite;
1828 }
1829
1830 cbWrite -= cbToWrite;
1831
1832 STAM_COUNTER_INC(&pCache->cMisses);
1833 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1834
1835 uint8_t *pbBuffer = NULL;
1836
1837#ifdef VBOX_WITH_2Q_CACHE
1838 RTCritSectEnter(&pCache->CritSect);
1839 bool fEnough = pdmacFileCacheReclaim(pCache, cbToWrite, true, &pbBuffer);
1840 RTCritSectLeave(&pCache->CritSect);
1841
1842 if (fEnough)
1843 {
1844 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToWrite));
1845#else
1846 RTCritSectEnter(&pCache->CritSect);
1847 size_t cbRemoved = pdmacFileCacheEvict(pCache, cbToWrite, true, &pbBuffer);
1848 RTCritSectLeave(&pCache->CritSect);
1849
1850 if (cbRemoved >= cbToWrite)
1851 {
1852 LogFlow(("Evicted %u bytes (%u requested). Creating new cache entry\n", cbRemoved, cbToWrite));
1853
1854#endif
1855 uint8_t *pbBuf;
1856 PPDMACFILECACHEENTRY pEntryNew;
1857
1858 pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToWrite, pbBuffer);
1859 AssertPtr(pEntryNew);
1860
1861 RTCritSectEnter(&pCache->CritSect);
1862#ifdef VBOX_WITH_2Q_CACHE
1863 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1864#else
1865 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsed, pEntryNew);
1866#endif
1867 pCache->cbCached += cbToWrite;
1868 RTCritSectLeave(&pCache->CritSect);
1869
1870 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1871
1872 off += cbToWrite;
1873 pbBuf = pEntryNew->pbData;
1874
1875 while (cbToWrite)
1876 {
1877 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1878
1879 memcpy(pbBuf, pbSegBuf, cbCopy);
1880
1881 ADVANCE_SEGMENT_BUFFER(cbCopy);
1882
1883 cbToWrite -= cbCopy;
1884 pbBuf += cbCopy;
1885 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1886 }
1887
1888 pEntryNew->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
1889 pdmacFileCacheWriteToEndpoint(pEntryNew);
1890 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1891 }
1892 else
1893 {
1894 /*
1895 * There is not enough free space in the cache.
1896 * Pass the request directly to the I/O manager.
1897 */
1898 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
1899
1900 while (cbToWrite)
1901 {
1902 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1903 AssertPtr(pIoTask);
1904
1905 pIoTask->pEndpoint = pEndpoint;
1906 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
1907 pIoTask->Off = off;
1908 pIoTask->DataSeg.cbSeg = RT_MIN(cbToWrite, cbSegLeft);
1909 pIoTask->DataSeg.pvSeg = pbSegBuf;
1910 pIoTask->pvUser = pTask;
1911 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1912
1913 off += pIoTask->DataSeg.cbSeg;
1914 cbToWrite -= pIoTask->DataSeg.cbSeg;
1915
1916 ADVANCE_SEGMENT_BUFFER(pIoTask->DataSeg.cbSeg);
1917
1918 /* Send it off to the I/O manager. */
1919 pdmacFileEpAddTask(pEndpoint, pIoTask);
1920 }
1921 }
1922 }
1923 }
1924
1925 ASMAtomicWriteBool(&pTask->fCompleted, false);
1926
1927 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1928 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1929 {
1930 pdmR3AsyncCompletionCompleteTask(&pTask->Core);
1931
1932 /* Complete a pending flush if all writes have completed */
1933 uint32_t cWritesOutstanding = ASMAtomicDecU32(&pEndpointCache->cWritesOutstanding);
1934 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
1935
1936 if (!cWritesOutstanding && pTaskFlush)
1937 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core);
1938 }
1939
1940 return VINF_SUCCESS;
1941}
1942
1943#undef ADVANCE_SEGMENT_BUFFER
1944
1945int pdmacFileEpCacheFlush(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask)
1946{
1947 int rc = VINF_SUCCESS;
1948
1949 if (ASMAtomicReadPtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush))
1950 rc = VERR_RESOURCE_BUSY;
1951 else
1952 {
1953 if (ASMAtomicReadU32(&pEndpoint->DataCache.cWritesOutstanding) > 0)
1954 ASMAtomicWritePtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush, pTask);
1955 else
1956 pdmR3AsyncCompletionCompleteTask(&pTask->Core);
1957 }
1958
1959 return rc;
1960}
1961
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette