VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFile.cpp@ 26956

Last change on this file since 26956 was 26812, checked in by vboxsync, 15 years ago

AsyncCompletion: Don't immediately commit dirty buffers to the endpoint to reduce the I/O load on the host and the I/O performance in the guest for often updated cache entries

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 35.1 KB
Line 
1/* $Id: PDMAsyncCompletionFile.cpp 26812 2010-02-25 20:55:08Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 */
5
6/*
7 * Copyright (C) 2006-2009 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
27//#define DEBUG
28#include "PDMInternal.h"
29#include <VBox/pdm.h>
30#include <VBox/mm.h>
31#include <VBox/vm.h>
32#include <VBox/err.h>
33#include <VBox/log.h>
34
35#include <iprt/asm.h>
36#include <iprt/assert.h>
37#include <iprt/critsect.h>
38#include <iprt/env.h>
39#include <iprt/file.h>
40#include <iprt/mem.h>
41#include <iprt/semaphore.h>
42#include <iprt/string.h>
43#include <iprt/thread.h>
44#include <iprt/path.h>
45
46#include "PDMAsyncCompletionFileInternal.h"
47
48/**
49 * Frees a task.
50 *
51 * @returns nothing.
52 * @param pEndpoint Pointer to the endpoint the segment was for.
53 * @param pTask The task to free.
54 */
55void pdmacFileTaskFree(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
56 PPDMACTASKFILE pTask)
57{
58 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
59
60 LogFlowFunc((": pEndpoint=%p pTask=%p\n", pEndpoint, pTask));
61
62 /* Try the per endpoint cache first. */
63 if (pEndpoint->cTasksCached < pEpClass->cTasksCacheMax)
64 {
65 /* Add it to the list. */
66 pEndpoint->pTasksFreeTail->pNext = pTask;
67 pEndpoint->pTasksFreeTail = pTask;
68 ASMAtomicIncU32(&pEndpoint->cTasksCached);
69 }
70 else if (false)
71 {
72 /* Bigger class cache */
73 }
74 else
75 {
76 Log(("Freeing task %p because all caches are full\n", pTask));
77 MMR3HeapFree(pTask);
78 }
79}
80
81/**
82 * Allocates a task segment
83 *
84 * @returns Pointer to the new task segment or NULL
85 * @param pEndpoint Pointer to the endpoint
86 */
87PPDMACTASKFILE pdmacFileTaskAlloc(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
88{
89 PPDMACTASKFILE pTask = NULL;
90
91 /* Try the small per endpoint cache first. */
92 if (pEndpoint->pTasksFreeHead == pEndpoint->pTasksFreeTail)
93 {
94 /* Try the bigger endpoint class cache. */
95 PPDMASYNCCOMPLETIONEPCLASSFILE pEndpointClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
96
97#if 0
98 /* We start with the assigned slot id to distribute the load when allocating new tasks. */
99 unsigned iSlot = pEndpoint->iSlotStart;
100 do
101 {
102 pTask = (PPDMASYNCCOMPLETIONTASK)ASMAtomicXchgPtr((void * volatile *)&pEndpointClass->apTaskCache[iSlot], NULL);
103 if (pTask)
104 break;
105
106 iSlot = (iSlot + 1) % RT_ELEMENTS(pEndpointClass->apTaskCache);
107 } while (iSlot != pEndpoint->iSlotStart);
108#endif
109 if (!pTask)
110 {
111 /*
112 * Allocate completely new.
113 * If this fails we return NULL.
114 */
115 int rc = MMR3HeapAllocZEx(pEndpointClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
116 sizeof(PDMACTASKFILE),
117 (void **)&pTask);
118 if (RT_FAILURE(rc))
119 pTask = NULL;
120
121 LogFlow(("Allocated task %p\n", pTask));
122 }
123#if 0
124 else
125 {
126 /* Remove the first element and put the rest into the slot again. */
127 PPDMASYNCCOMPLETIONTASK pTaskHeadNew = pTask->pNext;
128
129 pTaskHeadNew->pPrev = NULL;
130
131 /* Put back into the list adding any new tasks. */
132 while (true)
133 {
134 bool fChanged = ASMAtomicCmpXchgPtr((void * volatile *)&pEndpointClass->apTaskCache[iSlot], pTaskHeadNew, NULL);
135
136 if (fChanged)
137 break;
138
139 PPDMASYNCCOMPLETIONTASK pTaskHead = (PPDMASYNCCOMPLETIONTASK)ASMAtomicXchgPtr((void * volatile *)&pEndpointClass->apTaskCache[iSlot], NULL);
140
141 /* The new task could be taken inbetween */
142 if (pTaskHead)
143 {
144 /* Go to the end of the probably much shorter new list. */
145 PPDMASYNCCOMPLETIONTASK pTaskTail = pTaskHead;
146 while (pTaskTail->pNext)
147 pTaskTail = pTaskTail->pNext;
148
149 /* Concatenate */
150 pTaskTail->pNext = pTaskHeadNew;
151
152 pTaskHeadNew = pTaskHead;
153 }
154 /* Another round trying to change the list. */
155 }
156 /* We got a task from the global cache so decrement the counter */
157 ASMAtomicDecU32(&pEndpointClass->cTasksCached);
158 }
159#endif
160 }
161 else
162 {
163 /* Grab a free task from the head. */
164 AssertMsg(pEndpoint->cTasksCached > 0, ("No tasks cached but list contains more than one element\n"));
165
166 pTask = pEndpoint->pTasksFreeHead;
167 pEndpoint->pTasksFreeHead = pTask->pNext;
168 ASMAtomicDecU32(&pEndpoint->cTasksCached);
169 }
170
171 pTask->pNext = NULL;
172
173 return pTask;
174}
175
176PPDMACTASKFILE pdmacFileEpGetNewTasks(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
177{
178 PPDMACTASKFILE pTasks = NULL;
179
180 /*
181 * Get pending tasks.
182 */
183 pTasks = (PPDMACTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpoint->pTasksNewHead, NULL);
184
185 /* Reverse the list to process in FIFO order. */
186 if (pTasks)
187 {
188 PPDMACTASKFILE pTask = pTasks;
189
190 pTasks = NULL;
191
192 while (pTask)
193 {
194 PPDMACTASKFILE pCur = pTask;
195 pTask = pTask->pNext;
196 pCur->pNext = pTasks;
197 pTasks = pCur;
198 }
199 }
200
201 return pTasks;
202}
203
204static void pdmacFileAioMgrWakeup(PPDMACEPFILEMGR pAioMgr)
205{
206 bool fWokenUp = ASMAtomicXchgBool(&pAioMgr->fWokenUp, true);
207
208 if (!fWokenUp)
209 {
210 int rc = VINF_SUCCESS;
211 bool fWaitingEventSem = ASMAtomicReadBool(&pAioMgr->fWaitingEventSem);
212
213 if (fWaitingEventSem)
214 rc = RTSemEventSignal(pAioMgr->EventSem);
215
216 AssertRC(rc);
217 }
218}
219
220static int pdmacFileAioMgrWaitForBlockingEvent(PPDMACEPFILEMGR pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT enmEvent)
221{
222 int rc = VINF_SUCCESS;
223
224 ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, enmEvent);
225 Assert(!pAioMgr->fBlockingEventPending);
226 ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, true);
227
228 /* Wakeup the async I/O manager */
229 pdmacFileAioMgrWakeup(pAioMgr);
230
231 /* Wait for completion. */
232 rc = RTSemEventWait(pAioMgr->EventSemBlock, RT_INDEFINITE_WAIT);
233 AssertRC(rc);
234
235 ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, false);
236 ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID);
237
238 return rc;
239}
240
241int pdmacFileAioMgrAddEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
242{
243 int rc;
244
245 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
246 AssertRCReturn(rc, rc);
247
248 ASMAtomicWritePtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, pEndpoint);
249 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT);
250
251 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
252
253 if (RT_SUCCESS(rc))
254 ASMAtomicWritePtr((void * volatile *)&pEndpoint->pAioMgr, pAioMgr);
255
256 return rc;
257}
258
259static int pdmacFileAioMgrRemoveEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
260{
261 int rc;
262
263 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
264 AssertRCReturn(rc, rc);
265
266 ASMAtomicWritePtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, pEndpoint);
267 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT);
268
269 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
270
271 return rc;
272}
273
274static int pdmacFileAioMgrCloseEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
275{
276 int rc;
277
278 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
279 AssertRCReturn(rc, rc);
280
281 ASMAtomicWritePtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, pEndpoint);
282 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT);
283
284 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
285
286 return rc;
287}
288
289static int pdmacFileAioMgrShutdown(PPDMACEPFILEMGR pAioMgr)
290{
291 int rc;
292
293 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
294 AssertRCReturn(rc, rc);
295
296 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN);
297
298 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
299
300 return rc;
301}
302
303int pdmacFileEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
304{
305 PPDMACTASKFILE pNext;
306 do
307 {
308 pNext = pEndpoint->pTasksNewHead;
309 pTask->pNext = pNext;
310 } while (!ASMAtomicCmpXchgPtr((void * volatile *)&pEndpoint->pTasksNewHead, (void *)pTask, (void *)pNext));
311
312 pdmacFileAioMgrWakeup((PPDMACEPFILEMGR)ASMAtomicReadPtr((void * volatile *)&pEndpoint->pAioMgr));
313
314 return VINF_SUCCESS;
315}
316
317void pdmacFileEpTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
318{
319 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pvUser;
320
321 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
322 {
323 pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, true);
324 }
325 else
326 {
327 Assert((uint32_t)pTask->DataSeg.cbSeg == pTask->DataSeg.cbSeg && (int32_t)pTask->DataSeg.cbSeg >= 0);
328 uint32_t uOld = ASMAtomicSubS32(&pTaskFile->cbTransferLeft, (int32_t)pTask->DataSeg.cbSeg);
329
330 if (!(uOld - pTask->DataSeg.cbSeg)
331 && !ASMAtomicXchgBool(&pTaskFile->fCompleted, true))
332 pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, true);
333 }
334}
335
336int pdmacFileEpTaskInitiate(PPDMASYNCCOMPLETIONTASK pTask,
337 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
338 PCPDMDATASEG paSegments, size_t cSegments,
339 size_t cbTransfer, PDMACTASKFILETRANSFER enmTransfer)
340{
341 int rc = VINF_SUCCESS;
342 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
343 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
344 PPDMACEPFILEMGR pAioMgr = pEpFile->pAioMgr;
345
346 Assert( (enmTransfer == PDMACTASKFILETRANSFER_READ)
347 || (enmTransfer == PDMACTASKFILETRANSFER_WRITE));
348
349 Assert((uint32_t)cbTransfer == cbTransfer && (int32_t)cbTransfer >= 0);
350 ASMAtomicWriteS32(&pTaskFile->cbTransferLeft, (int32_t)cbTransfer);
351 ASMAtomicWriteBool(&pTaskFile->fCompleted, false);
352
353 for (unsigned i = 0; i < cSegments; i++)
354 {
355 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
356 AssertPtr(pIoTask);
357
358 pIoTask->pEndpoint = pEpFile;
359 pIoTask->enmTransferType = enmTransfer;
360 pIoTask->Off = off;
361 pIoTask->DataSeg.cbSeg = paSegments[i].cbSeg;
362 pIoTask->DataSeg.pvSeg = paSegments[i].pvSeg;
363 pIoTask->pvUser = pTaskFile;
364 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
365
366 /* Send it off to the I/O manager. */
367 pdmacFileEpAddTask(pEpFile, pIoTask);
368 off += paSegments[i].cbSeg;
369 cbTransfer -= paSegments[i].cbSeg;
370 }
371
372 AssertMsg(!cbTransfer, ("Incomplete transfer %u bytes left\n", cbTransfer));
373
374 if (ASMAtomicReadS32(&pTaskFile->cbTransferLeft) == 0
375 && !ASMAtomicXchgBool(&pTaskFile->fCompleted, true))
376 pdmR3AsyncCompletionCompleteTask(pTask, false);
377 else
378 rc = VINF_AIO_TASK_PENDING;
379
380 return rc;
381}
382
383/**
384 * Creates a new async I/O manager.
385 *
386 * @returns VBox status code.
387 * @param pEpClass Pointer to the endpoint class data.
388 * @param ppAioMgr Where to store the pointer to the new async I/O manager on success.
389 * @param fFailsafe Flag to force a failsafe manager even if the global flag is not set.
390 */
391int pdmacFileAioMgrCreate(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass, PPPDMACEPFILEMGR ppAioMgr, bool fFailsafe)
392{
393 int rc = VINF_SUCCESS;
394 PPDMACEPFILEMGR pAioMgrNew;
395
396 LogFlowFunc((": Entered\n"));
397
398 rc = MMR3HeapAllocZEx(pEpClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION, sizeof(PDMACEPFILEMGR), (void **)&pAioMgrNew);
399 if (RT_SUCCESS(rc))
400 {
401 pAioMgrNew->fFailsafe = fFailsafe || pEpClass->fFailsafe;
402
403 rc = RTSemEventCreate(&pAioMgrNew->EventSem);
404 if (RT_SUCCESS(rc))
405 {
406 rc = RTSemEventCreate(&pAioMgrNew->EventSemBlock);
407 if (RT_SUCCESS(rc))
408 {
409 rc = RTCritSectInit(&pAioMgrNew->CritSectBlockingEvent);
410 if (RT_SUCCESS(rc))
411 {
412 /* Init the rest of the manager. */
413 if (!pAioMgrNew->fFailsafe)
414 rc = pdmacFileAioMgrNormalInit(pAioMgrNew);
415
416 if (RT_SUCCESS(rc))
417 {
418 pAioMgrNew->enmState = PDMACEPFILEMGRSTATE_RUNNING;
419
420 rc = RTThreadCreateF(&pAioMgrNew->Thread,
421 pAioMgrNew->fFailsafe
422 ? pdmacFileAioMgrFailsafe
423 : pdmacFileAioMgrNormal,
424 pAioMgrNew,
425 0,
426 RTTHREADTYPE_IO,
427 0,
428 "AioMgr%d-%s", pEpClass->cAioMgrs,
429 pAioMgrNew->fFailsafe
430 ? "F"
431 : "N");
432 if (RT_SUCCESS(rc))
433 {
434 /* Link it into the list. */
435 RTCritSectEnter(&pEpClass->CritSect);
436 pAioMgrNew->pNext = pEpClass->pAioMgrHead;
437 if (pEpClass->pAioMgrHead)
438 pEpClass->pAioMgrHead->pPrev = pAioMgrNew;
439 pEpClass->pAioMgrHead = pAioMgrNew;
440 pEpClass->cAioMgrs++;
441 RTCritSectLeave(&pEpClass->CritSect);
442
443 *ppAioMgr = pAioMgrNew;
444
445 Log(("PDMAC: Successfully created new file AIO Mgr {%s}\n", RTThreadGetName(pAioMgrNew->Thread)));
446 return VINF_SUCCESS;
447 }
448 pdmacFileAioMgrNormalDestroy(pAioMgrNew);
449 }
450 RTCritSectDelete(&pAioMgrNew->CritSectBlockingEvent);
451 }
452 RTSemEventDestroy(pAioMgrNew->EventSem);
453 }
454 RTSemEventDestroy(pAioMgrNew->EventSemBlock);
455 }
456 MMR3HeapFree(pAioMgrNew);
457 }
458
459 LogFlowFunc((": Leave rc=%Rrc\n", rc));
460
461 return rc;
462}
463
464/**
465 * Destroys a async I/O manager.
466 *
467 * @returns nothing.
468 * @param pAioMgr The async I/O manager to destroy.
469 */
470static void pdmacFileAioMgrDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile, PPDMACEPFILEMGR pAioMgr)
471{
472 int rc = pdmacFileAioMgrShutdown(pAioMgr);
473 AssertRC(rc);
474
475 /* Unlink from the list. */
476 rc = RTCritSectEnter(&pEpClassFile->CritSect);
477 AssertRC(rc);
478
479 PPDMACEPFILEMGR pPrev = pAioMgr->pPrev;
480 PPDMACEPFILEMGR pNext = pAioMgr->pNext;
481
482 if (pPrev)
483 pPrev->pNext = pNext;
484 else
485 pEpClassFile->pAioMgrHead = pNext;
486
487 if (pNext)
488 pNext->pPrev = pPrev;
489
490 pEpClassFile->cAioMgrs--;
491 rc = RTCritSectLeave(&pEpClassFile->CritSect);
492 AssertRC(rc);
493
494 /* Free the ressources. */
495 RTCritSectDelete(&pAioMgr->CritSectBlockingEvent);
496 RTSemEventDestroy(pAioMgr->EventSem);
497 if (!pAioMgr->fFailsafe)
498 pdmacFileAioMgrNormalDestroy(pAioMgr);
499
500 MMR3HeapFree(pAioMgr);
501}
502
503static int pdmacFileBwMgrInitialize(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile,
504 PCFGMNODE pCfgNode, PPPDMACFILEBWMGR ppBwMgr)
505{
506 int rc = VINF_SUCCESS;
507 PPDMACFILEBWMGR pBwMgr = NULL;
508
509 rc = MMR3HeapAllocZEx(pEpClassFile->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
510 sizeof(PDMACFILEBWMGR),
511 (void **)&pBwMgr);
512 if (RT_SUCCESS(rc))
513 {
514 /* Init I/O flow control. */
515 rc = CFGMR3QueryU32Def(pCfgNode, "VMTransferPerSecMax", &pBwMgr->cbVMTransferPerSecMax, UINT32_MAX);
516 AssertLogRelRCReturn(rc, rc);
517 rc = CFGMR3QueryU32Def(pCfgNode, "VMTransferPerSecStart", &pBwMgr->cbVMTransferPerSecStart, _1M);
518 AssertLogRelRCReturn(rc, rc);
519 rc = CFGMR3QueryU32Def(pCfgNode, "VMTransferPerSecStep", &pBwMgr->cbVMTransferPerSecStep, _1M);
520 AssertLogRelRCReturn(rc, rc);
521
522 pBwMgr->cbVMTransferAllowed = pBwMgr->cbVMTransferPerSecStart;
523 pBwMgr->tsUpdatedLast = RTTimeSystemNanoTS();
524
525 *ppBwMgr = pBwMgr;
526 }
527
528 return rc;
529}
530
531static void pdmacFileBwMgrDestroy(PPDMACFILEBWMGR pBwMgr)
532{
533 MMR3HeapFree(pBwMgr);
534}
535
536static void pdmacFileBwRef(PPDMACFILEBWMGR pBwMgr)
537{
538 pBwMgr->cRefs++;
539}
540
541static void pdmacFileBwUnref(PPDMACFILEBWMGR pBwMgr)
542{
543 Assert(pBwMgr->cRefs > 0);
544 pBwMgr->cRefs--;
545}
546
547bool pdmacFileBwMgrIsTransferAllowed(PPDMACFILEBWMGR pBwMgr, uint32_t cbTransfer)
548{
549 bool fAllowed = false;
550
551 LogFlowFunc(("pBwMgr=%p cbTransfer=%u\n", pBwMgr, cbTransfer));
552
553 uint32_t cbOld = ASMAtomicSubU32(&pBwMgr->cbVMTransferAllowed, cbTransfer);
554 if (RT_LIKELY(cbOld >= cbTransfer))
555 fAllowed = true;
556 else
557 {
558 /* We are out of ressources Check if we can update again. */
559 uint64_t tsNow = RTTimeSystemNanoTS();
560 uint64_t tsUpdatedLast = ASMAtomicUoReadU64(&pBwMgr->tsUpdatedLast);
561
562 if (tsNow - tsUpdatedLast >= (1000*1000*1000))
563 {
564 if (ASMAtomicCmpXchgU64(&pBwMgr->tsUpdatedLast, tsNow, tsUpdatedLast))
565 {
566 if (pBwMgr->cbVMTransferPerSecStart < pBwMgr->cbVMTransferPerSecMax)
567 {
568 pBwMgr->cbVMTransferPerSecStart = RT_MIN(pBwMgr->cbVMTransferPerSecMax, pBwMgr->cbVMTransferPerSecStart + pBwMgr->cbVMTransferPerSecStep);
569 LogFlow(("AIOMgr: Increasing maximum bandwidth to %u bytes/sec\n", pBwMgr->cbVMTransferPerSecStart));
570 }
571
572 /* Update */
573 ASMAtomicWriteU32(&pBwMgr->cbVMTransferAllowed, pBwMgr->cbVMTransferPerSecStart - cbTransfer);
574 fAllowed = true;
575 LogFlow(("AIOMgr: Refreshed bandwidth\n"));
576 }
577 }
578 else
579 ASMAtomicAddU32(&pBwMgr->cbVMTransferAllowed, cbTransfer);
580 }
581
582 LogFlowFunc(("fAllowed=%RTbool\n", fAllowed));
583
584 return fAllowed;
585}
586
587static int pdmacFileInitialize(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals, PCFGMNODE pCfgNode)
588{
589 int rc = VINF_SUCCESS;
590 RTFILEAIOLIMITS AioLimits; /** < Async I/O limitations. */
591
592 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
593
594 rc = RTFileAioGetLimits(&AioLimits);
595#ifdef DEBUG
596 if (RT_SUCCESS(rc) && RTEnvExist("VBOX_ASYNC_IO_FAILBACK"))
597 rc = VERR_ENV_VAR_NOT_FOUND;
598#endif
599 if (RT_FAILURE(rc))
600 {
601 LogRel(("AIO: Async I/O manager not supported (rc=%Rrc). Falling back to failsafe manager\n",
602 rc));
603 pEpClassFile->fFailsafe = true;
604 }
605 else
606 {
607 pEpClassFile->uBitmaskAlignment = AioLimits.cbBufferAlignment ? ~((RTR3UINTPTR)AioLimits.cbBufferAlignment - 1) : RTR3UINTPTR_MAX;
608 pEpClassFile->cReqsOutstandingMax = AioLimits.cReqsOutstandingMax;
609
610 /* The user can force the failsafe manager. */
611 rc = CFGMR3QueryBoolDef(pCfgNode, "UseFailsafeIo", &pEpClassFile->fFailsafe, false);
612 AssertLogRelRCReturn(rc, rc);
613
614 if (pEpClassFile->fFailsafe)
615 LogRel(("AIOMgr: Failsafe I/O was requested by user\n"));
616 }
617
618 /* Init critical section. */
619 rc = RTCritSectInit(&pEpClassFile->CritSect);
620 if (RT_SUCCESS(rc))
621 {
622 /* Check if the host cache should be used too. */
623#ifndef RT_OS_LINUX
624 rc = CFGMR3QueryBoolDef(pCfgNode, "HostCacheEnabled", &pEpClassFile->fHostCacheEnabled, false);
625 AssertLogRelRCReturn(rc, rc);
626#else
627 /*
628 * Host cache + async I/O is not supported on Linux. Check if the user enabled the cache,
629 * leave a warning and disable it always.
630 */
631 bool fDummy;
632 rc = CFGMR3QueryBool(pCfgNode, "HostCacheEnabled", &fDummy);
633 if (RT_SUCCESS(rc))
634 LogRel(("AIOMgr: The host cache is not supported with async I/O on Linux\n"));
635
636 pEpClassFile->fHostCacheEnabled = false;
637#endif
638
639 /* Check if the cache was disabled by the user. */
640 rc = CFGMR3QueryBoolDef(pCfgNode, "CacheEnabled", &pEpClassFile->fCacheEnabled, true);
641 AssertLogRelRCReturn(rc, rc);
642
643 if (pEpClassFile->fCacheEnabled)
644 {
645 /* Init cache structure */
646 rc = pdmacFileCacheInit(pEpClassFile, pCfgNode);
647 if (RT_FAILURE(rc))
648 {
649 pEpClassFile->fCacheEnabled = false;
650 LogRel(("AIOMgr: Failed to initialise the cache (rc=%Rrc), disabled caching\n"));
651 }
652 }
653 else
654 LogRel(("AIOMgr: Cache was globally disabled\n"));
655
656 rc = pdmacFileBwMgrInitialize(pEpClassFile, pCfgNode, &pEpClassFile->pBwMgr);
657 if (RT_FAILURE(rc))
658 RTCritSectDelete(&pEpClassFile->CritSect);
659 }
660
661 return rc;
662}
663
664static void pdmacFileTerminate(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals)
665{
666 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
667
668 /* All endpoints should be closed at this point. */
669 AssertMsg(!pEpClassFile->Core.pEndpointsHead, ("There are still endpoints left\n"));
670
671 /* Destroy all left async I/O managers. */
672 while (pEpClassFile->pAioMgrHead)
673 pdmacFileAioMgrDestroy(pEpClassFile, pEpClassFile->pAioMgrHead);
674
675 /* Destroy the cache. */
676 if (pEpClassFile->fCacheEnabled)
677 pdmacFileCacheDestroy(pEpClassFile);
678
679 RTCritSectDelete(&pEpClassFile->CritSect);
680 pdmacFileBwMgrDestroy(pEpClassFile->pBwMgr);
681}
682
683static int pdmacFileEpInitialize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint,
684 const char *pszUri, uint32_t fFlags)
685{
686 int rc = VINF_SUCCESS;
687 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
688 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
689 bool fUseFailsafeManager = pEpClassFile->fFailsafe;
690
691 AssertMsgReturn((fFlags & ~(PDMACEP_FILE_FLAGS_READ_ONLY | PDMACEP_FILE_FLAGS_CACHING)) == 0,
692 ("PDMAsyncCompletion: Invalid flag specified\n"), VERR_INVALID_PARAMETER);
693
694 unsigned fFileFlags = fFlags & PDMACEP_FILE_FLAGS_READ_ONLY
695 ? RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE
696 : RTFILE_O_READWRITE | RTFILE_O_OPEN | RTFILE_O_DENY_WRITE;
697
698 if (!pEpClassFile->fFailsafe)
699 {
700 fFileFlags |= (RTFILE_O_ASYNC_IO | RTFILE_O_WRITE_THROUGH);
701
702 /*
703 * We only disable the cache if the size of the file is a multiple of 512.
704 * Certain hosts like Windows, Linux and Solaris require that transfer sizes
705 * are aligned to the volume sector size.
706 * If not we just make sure that the data is written to disk with RTFILE_O_WRITE_THROUGH
707 * which will trash the host cache but ensures that the host cache will not
708 * contain dirty buffers.
709 */
710 RTFILE File = NIL_RTFILE;
711
712 rc = RTFileOpen(&File, pszUri, RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE);
713 if (RT_SUCCESS(rc))
714 {
715 uint64_t cbSize;
716
717 rc = RTFileGetSize(File, &cbSize);
718 if (RT_SUCCESS(rc) && ((cbSize % 512) == 0))
719 {
720 fFileFlags &= ~RTFILE_O_WRITE_THROUGH;
721
722#if defined(RT_OS_LINUX)
723 AssertMsg(!pEpClassFile->fHostCacheEnabled, ("Host cache + async I/O is not supported on Linux\n"));
724 fFileFlags |= RTFILE_O_NO_CACHE;
725#else
726 if (!pEpClassFile->fHostCacheEnabled)
727 fFileFlags |= RTFILE_O_NO_CACHE;
728#endif
729 }
730
731 pEpFile->cbFile = cbSize;
732
733 RTFileClose(File);
734 }
735 }
736
737 /* Open with final flags. */
738 rc = RTFileOpen(&pEpFile->File, pszUri, fFileFlags);
739 if ((rc == VERR_INVALID_FUNCTION) || (rc == VERR_INVALID_PARAMETER))
740 {
741 LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed with %Rrc\n",
742 pszUri, fFileFlags, rc));
743 /*
744 * Solaris doesn't support directio on ZFS so far. :-\
745 * Trying to enable it returns VERR_INVALID_FUNCTION
746 * (ENOTTY). Remove it and hope for the best.
747 * ZFS supports write throttling in case applications
748 * write more data than can be synced to the disk
749 * without blocking the whole application.
750 *
751 * On Linux we have the same problem with cifs.
752 * Have to disable async I/O here too because it requires O_DIRECT.
753 */
754 fFileFlags &= ~RTFILE_O_NO_CACHE;
755
756#ifdef RT_OS_LINUX
757 fFileFlags &= ~RTFILE_O_ASYNC_IO;
758 fUseFailsafeManager = true;
759#endif
760
761 /* Open again. */
762 rc = RTFileOpen(&pEpFile->File, pszUri, fFileFlags);
763
764 if (RT_FAILURE(rc))
765 {
766 LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed AGAIN(!) with %Rrc\n",
767 pszUri, fFileFlags, rc));
768 }
769 }
770
771 if (RT_SUCCESS(rc))
772 {
773 pEpFile->fFlags = fFileFlags;
774
775 rc = RTFileGetSize(pEpFile->File, (uint64_t *)&pEpFile->cbFile);
776 if (RT_SUCCESS(rc) && (pEpFile->cbFile == 0))
777 {
778 /* Could be a block device */
779 rc = RTFileSeek(pEpFile->File, 0, RTFILE_SEEK_END, (uint64_t *)&pEpFile->cbFile);
780 }
781
782 if (RT_SUCCESS(rc))
783 {
784 /* Initialize the segment cache */
785 rc = MMR3HeapAllocZEx(pEpClassFile->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
786 sizeof(PDMACTASKFILE),
787 (void **)&pEpFile->pTasksFreeHead);
788 if (RT_SUCCESS(rc))
789 {
790 PPDMACEPFILEMGR pAioMgr = NULL;
791
792 pEpFile->pTasksFreeTail = pEpFile->pTasksFreeHead;
793 pEpFile->cTasksCached = 0;
794 pEpFile->pBwMgr = pEpClassFile->pBwMgr;
795 pdmacFileBwRef(pEpFile->pBwMgr);
796
797 if (fUseFailsafeManager)
798 {
799 /* Safe mode. Every file has its own async I/O manager. */
800 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, true);
801 AssertRC(rc);
802 }
803 else
804 {
805 if ( (fFlags & PDMACEP_FILE_FLAGS_CACHING)
806 && (pEpClassFile->fCacheEnabled))
807 {
808 pEpFile->fCaching = true;
809 rc = pdmacFileEpCacheInit(pEpFile, pEpClassFile);
810 if (RT_FAILURE(rc))
811 {
812 LogRel(("AIOMgr: Endpoint for \"%s\" was opened with caching but initializing cache failed. Disabled caching\n", pszUri));
813 pEpFile->fCaching = false;
814 }
815 }
816
817 pAioMgr = pEpClassFile->pAioMgrHead;
818
819 /* Check for an idling not failsafe one or create new if not found */
820 while (pAioMgr && pAioMgr->fFailsafe)
821 pAioMgr = pAioMgr->pNext;
822
823 if (!pAioMgr)
824 {
825 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, false);
826 AssertRC(rc);
827 }
828 }
829
830 pEpFile->AioMgr.pTreeRangesLocked = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
831 if (!pEpFile->AioMgr.pTreeRangesLocked)
832 rc = VERR_NO_MEMORY;
833 else
834 {
835 pEpFile->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
836
837 /* Assign the endpoint to the thread. */
838 rc = pdmacFileAioMgrAddEndpoint(pAioMgr, pEpFile);
839 if (RT_FAILURE(rc))
840 {
841 RTMemFree(pEpFile->AioMgr.pTreeRangesLocked);
842 MMR3HeapFree(pEpFile->pTasksFreeHead);
843 pdmacFileBwUnref(pEpFile->pBwMgr);
844 }
845 }
846 }
847 }
848
849 if (RT_FAILURE(rc))
850 RTFileClose(pEpFile->File);
851 }
852
853#ifdef VBOX_WITH_STATISTICS
854 if (RT_SUCCESS(rc))
855 {
856 STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatRead,
857 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
858 STAMUNIT_TICKS_PER_CALL, "Time taken to read from the endpoint",
859 "/PDM/AsyncCompletion/File/%s/Read", RTPathFilename(pEpFile->Core.pszUri));
860
861 STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatWrite,
862 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
863 STAMUNIT_TICKS_PER_CALL, "Time taken to write to the endpoint",
864 "/PDM/AsyncCompletion/File/%s/Write", RTPathFilename(pEpFile->Core.pszUri));
865 }
866#endif
867
868 return rc;
869}
870
871static int pdmacFileEpRangesLockedDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
872{
873 AssertMsgFailed(("The locked ranges tree should be empty at that point\n"));
874 return VINF_SUCCESS;
875}
876
877static int pdmacFileEpClose(PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
878{
879 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
880 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
881
882 /* Make sure that all tasks finished for this endpoint. */
883 int rc = pdmacFileAioMgrCloseEndpoint(pEpFile->pAioMgr, pEpFile);
884 AssertRC(rc);
885
886 /*
887 * If the async I/O manager is in failsafe mode this is the only endpoint
888 * he processes and thus can be destroyed now.
889 */
890 if (pEpFile->pAioMgr->fFailsafe)
891 pdmacFileAioMgrDestroy(pEpClassFile, pEpFile->pAioMgr);
892
893 /* Free cached tasks. */
894 PPDMACTASKFILE pTask = pEpFile->pTasksFreeHead;
895
896 while (pTask)
897 {
898 PPDMACTASKFILE pTaskFree = pTask;
899 pTask = pTask->pNext;
900 MMR3HeapFree(pTaskFree);
901 }
902
903 /* Free the cached data. */
904 if (pEpFile->fCaching)
905 pdmacFileEpCacheDestroy(pEpFile);
906
907 /* Remove from the bandwidth manager */
908 pdmacFileBwUnref(pEpFile->pBwMgr);
909
910 /* Destroy the locked ranges tree now. */
911 RTAvlrFileOffsetDestroy(pEpFile->AioMgr.pTreeRangesLocked, pdmacFileEpRangesLockedDestroy, NULL);
912
913 RTFileClose(pEpFile->File);
914
915#ifdef VBOX_WITH_STATISTICS
916 STAMR3Deregister(pEpClassFile->Core.pVM, &pEpFile->StatRead);
917 STAMR3Deregister(pEpClassFile->Core.pVM, &pEpFile->StatWrite);
918#endif
919
920 return VINF_SUCCESS;
921}
922
923static int pdmacFileEpRead(PPDMASYNCCOMPLETIONTASK pTask,
924 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
925 PCPDMDATASEG paSegments, size_t cSegments,
926 size_t cbRead)
927{
928 int rc = VINF_SUCCESS;
929 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
930
931 STAM_PROFILE_ADV_START(&pEpFile->StatRead, Read);
932
933 if (pEpFile->fCaching)
934 rc = pdmacFileEpCacheRead(pEpFile, (PPDMASYNCCOMPLETIONTASKFILE)pTask,
935 off, paSegments, cSegments, cbRead);
936 else
937 rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbRead,
938 PDMACTASKFILETRANSFER_READ);
939
940 STAM_PROFILE_ADV_STOP(&pEpFile->StatRead, Read);
941
942 return rc;
943}
944
945static int pdmacFileEpWrite(PPDMASYNCCOMPLETIONTASK pTask,
946 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
947 PCPDMDATASEG paSegments, size_t cSegments,
948 size_t cbWrite)
949{
950 int rc = VINF_SUCCESS;
951 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
952
953 if (RT_UNLIKELY(pEpFile->fReadonly))
954 return VERR_NOT_SUPPORTED;
955
956 STAM_PROFILE_ADV_START(&pEpFile->StatWrite, Write);
957
958 if (pEpFile->fCaching)
959 rc = pdmacFileEpCacheWrite(pEpFile, (PPDMASYNCCOMPLETIONTASKFILE)pTask,
960 off, paSegments, cSegments, cbWrite);
961 else
962 rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbWrite,
963 PDMACTASKFILETRANSFER_WRITE);
964
965 STAM_PROFILE_ADV_STOP(&pEpFile->StatWrite, Write);
966
967 return rc;
968}
969
970static int pdmacFileEpFlush(PPDMASYNCCOMPLETIONTASK pTask,
971 PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
972{
973 int rc = VINF_SUCCESS;
974 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
975 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
976
977 if (RT_UNLIKELY(pEpFile->fReadonly))
978 return VERR_NOT_SUPPORTED;
979
980 pTaskFile->cbTransferLeft = 0;
981
982 if (pEpFile->fCaching)
983 rc = pdmacFileEpCacheFlush(pEpFile, pTaskFile);
984 else
985 {
986 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
987 AssertPtr(pIoTask);
988
989 pIoTask->pEndpoint = pEpFile;
990 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_FLUSH;
991 pIoTask->pvUser = pTaskFile;
992 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
993 pdmacFileEpAddTask(pEpFile, pIoTask);
994 rc = VINF_AIO_TASK_PENDING;
995 }
996
997 return rc;
998}
999
1000static int pdmacFileEpGetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t *pcbSize)
1001{
1002 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1003
1004 *pcbSize = ASMAtomicReadU64(&pEpFile->cbFile);
1005
1006 return VINF_SUCCESS;
1007}
1008
1009const PDMASYNCCOMPLETIONEPCLASSOPS g_PDMAsyncCompletionEndpointClassFile =
1010{
1011 /* u32Version */
1012 PDMAC_EPCLASS_OPS_VERSION,
1013 /* pcszName */
1014 "File",
1015 /* enmClassType */
1016 PDMASYNCCOMPLETIONEPCLASSTYPE_FILE,
1017 /* cbEndpointClassGlobal */
1018 sizeof(PDMASYNCCOMPLETIONEPCLASSFILE),
1019 /* cbEndpoint */
1020 sizeof(PDMASYNCCOMPLETIONENDPOINTFILE),
1021 /* cbTask */
1022 sizeof(PDMASYNCCOMPLETIONTASKFILE),
1023 /* pfnInitialize */
1024 pdmacFileInitialize,
1025 /* pfnTerminate */
1026 pdmacFileTerminate,
1027 /* pfnEpInitialize. */
1028 pdmacFileEpInitialize,
1029 /* pfnEpClose */
1030 pdmacFileEpClose,
1031 /* pfnEpRead */
1032 pdmacFileEpRead,
1033 /* pfnEpWrite */
1034 pdmacFileEpWrite,
1035 /* pfnEpFlush */
1036 pdmacFileEpFlush,
1037 /* pfnEpGetSize */
1038 pdmacFileEpGetSize,
1039 /* u32VersionEnd */
1040 PDMAC_EPCLASS_OPS_VERSION
1041};
1042
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette