VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMAsyncCompletionFile.cpp@ 95512

Last change on this file since 95512 was 93115, checked in by vboxsync, 3 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 47.6 KB
Line 
1/* $Id: PDMAsyncCompletionFile.cpp 93115 2022-01-01 11:31:46Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#include "PDMInternal.h"
24#include <VBox/vmm/pdm.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/vm.h>
27#include <VBox/err.h>
28#include <VBox/log.h>
29#include <VBox/dbg.h>
30#include <VBox/vmm/uvm.h>
31#include <VBox/vmm/tm.h>
32
33#include <iprt/asm.h>
34#include <iprt/assert.h>
35#include <iprt/critsect.h>
36#include <iprt/env.h>
37#include <iprt/file.h>
38#include <iprt/mem.h>
39#include <iprt/semaphore.h>
40#include <iprt/string.h>
41#include <iprt/thread.h>
42#include <iprt/path.h>
43#include <iprt/rand.h>
44
45#include "PDMAsyncCompletionFileInternal.h"
46
47
48/*********************************************************************************************************************************
49* Internal Functions *
50*********************************************************************************************************************************/
51#ifdef VBOX_WITH_DEBUGGER
52static FNDBGCCMD pdmacEpFileErrorInject;
53# ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY
54static FNDBGCCMD pdmacEpFileDelayInject;
55# endif
56#endif
57
58
59/*********************************************************************************************************************************
60* Global Variables *
61*********************************************************************************************************************************/
62#ifdef VBOX_WITH_DEBUGGER
63static const DBGCVARDESC g_aInjectErrorArgs[] =
64{
65 /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */
66 { 1, 1, DBGCVAR_CAT_STRING, 0, "direction", "write/read." },
67 { 1, 1, DBGCVAR_CAT_STRING, 0, "filename", "Filename." },
68 { 1, 1, DBGCVAR_CAT_NUMBER, 0, "errcode", "VBox status code." },
69};
70
71# ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY
72static const DBGCVARDESC g_aInjectDelayArgs[] =
73{
74 /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */
75 { 1, 1, DBGCVAR_CAT_STRING, 0, "direction", "write|read|flush|any." },
76 { 1, 1, DBGCVAR_CAT_STRING, 0, "filename", "Filename." },
77 { 1, 1, DBGCVAR_CAT_NUMBER, 0, "delay", "Delay in milliseconds." },
78 { 1, 1, DBGCVAR_CAT_NUMBER, 0, "jitter", "Jitter of the delay." },
79 { 1, 1, DBGCVAR_CAT_NUMBER, 0, "reqs", "Number of requests to delay." }
80
81};
82# endif
83
84/** Command descriptors. */
85static const DBGCCMD g_aCmds[] =
86{
87 /* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, fFlags, pfnHandler pszSyntax,.pszDescription */
88 { "injecterror", 3, 3, &g_aInjectErrorArgs[0], 3, 0, pdmacEpFileErrorInject, "", "Inject error into I/O subsystem." }
89# ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY
90 ,{ "injectdelay", 3, 5, &g_aInjectDelayArgs[0], RT_ELEMENTS(g_aInjectDelayArgs), 0, pdmacEpFileDelayInject, "", "Inject a delay of a request." }
91# endif
92};
93#endif
94
95
96/**
97 * Frees a task.
98 *
99 * @returns nothing.
100 * @param pEndpoint Pointer to the endpoint the segment was for.
101 * @param pTask The task to free.
102 */
103void pdmacFileTaskFree(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
104{
105 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
106
107 LogFlowFunc((": pEndpoint=%p pTask=%p\n", pEndpoint, pTask));
108
109 /* Try the per endpoint cache first. */
110 if (pEndpoint->cTasksCached < pEpClass->cTasksCacheMax)
111 {
112 /* Add it to the list. */
113 pEndpoint->pTasksFreeTail->pNext = pTask;
114 pEndpoint->pTasksFreeTail = pTask;
115 ASMAtomicIncU32(&pEndpoint->cTasksCached);
116 }
117 else
118 {
119 Log(("Freeing task %p because all caches are full\n", pTask));
120 MMR3HeapFree(pTask);
121 }
122}
123
124/**
125 * Allocates a task segment
126 *
127 * @returns Pointer to the new task segment or NULL
128 * @param pEndpoint Pointer to the endpoint
129 */
130PPDMACTASKFILE pdmacFileTaskAlloc(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
131{
132 PPDMACTASKFILE pTask = NULL;
133
134 /* Try the small per endpoint cache first. */
135 if (pEndpoint->pTasksFreeHead == pEndpoint->pTasksFreeTail)
136 {
137 /* Try the bigger endpoint class cache. */
138 PPDMASYNCCOMPLETIONEPCLASSFILE pEndpointClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
139
140 /*
141 * Allocate completely new.
142 * If this fails we return NULL.
143 */
144 int rc = MMR3HeapAllocZEx(pEndpointClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
145 sizeof(PDMACTASKFILE),
146 (void **)&pTask);
147 if (RT_FAILURE(rc))
148 pTask = NULL;
149
150 LogFlow(("Allocated task %p -> %Rrc\n", pTask, rc));
151 }
152 else
153 {
154 /* Grab a free task from the head. */
155 AssertMsg(pEndpoint->cTasksCached > 0, ("No tasks cached but list contains more than one element\n"));
156
157 pTask = pEndpoint->pTasksFreeHead;
158 pEndpoint->pTasksFreeHead = pTask->pNext;
159 ASMAtomicDecU32(&pEndpoint->cTasksCached);
160 pTask->pNext = NULL;
161 }
162
163 return pTask;
164}
165
166PPDMACTASKFILE pdmacFileEpGetNewTasks(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
167{
168 /*
169 * Get pending tasks.
170 */
171 PPDMACTASKFILE pTasks = ASMAtomicXchgPtrT(&pEndpoint->pTasksNewHead, NULL, PPDMACTASKFILE);
172
173 /* Reverse the list to process in FIFO order. */
174 if (pTasks)
175 {
176 PPDMACTASKFILE pTask = pTasks;
177
178 pTasks = NULL;
179
180 while (pTask)
181 {
182 PPDMACTASKFILE pCur = pTask;
183 pTask = pTask->pNext;
184 pCur->pNext = pTasks;
185 pTasks = pCur;
186 }
187 }
188
189 return pTasks;
190}
191
192static void pdmacFileAioMgrWakeup(PPDMACEPFILEMGR pAioMgr)
193{
194 bool fWokenUp = ASMAtomicXchgBool(&pAioMgr->fWokenUp, true);
195 if (!fWokenUp)
196 {
197 bool fWaitingEventSem = ASMAtomicReadBool(&pAioMgr->fWaitingEventSem);
198 if (fWaitingEventSem)
199 {
200 int rc = RTSemEventSignal(pAioMgr->EventSem);
201 AssertRC(rc);
202 }
203 }
204}
205
206static int pdmacFileAioMgrWaitForBlockingEvent(PPDMACEPFILEMGR pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT enmEvent)
207{
208 ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, enmEvent);
209 Assert(!pAioMgr->fBlockingEventPending);
210 ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, true);
211
212 /* Wakeup the async I/O manager */
213 pdmacFileAioMgrWakeup(pAioMgr);
214
215 /* Wait for completion. */
216 int rc = RTSemEventWait(pAioMgr->EventSemBlock, RT_INDEFINITE_WAIT);
217 AssertRC(rc);
218
219 ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, false);
220 ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID);
221
222 return rc;
223}
224
225int pdmacFileAioMgrAddEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
226{
227 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p{%s}\n", pAioMgr, pEndpoint, pEndpoint->Core.pszUri));
228
229 /* Update the assigned I/O manager. */
230 ASMAtomicWritePtr(&pEndpoint->pAioMgr, pAioMgr);
231
232 int rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
233 AssertRCReturn(rc, rc);
234
235 ASMAtomicWritePtr(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, pEndpoint);
236 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT);
237 ASMAtomicWriteNullPtr(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
238
239 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
240
241 return rc;
242}
243
244#ifdef SOME_UNUSED_FUNCTION
245static int pdmacFileAioMgrRemoveEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
246{
247 int rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
248 AssertRCReturn(rc, rc);
249
250 ASMAtomicWritePtr(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, pEndpoint);
251 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT);
252 ASMAtomicWriteNullPtr(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
253
254 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
255
256 return rc;
257}
258#endif
259
260static int pdmacFileAioMgrCloseEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
261{
262 int rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
263 AssertRCReturn(rc, rc);
264
265 ASMAtomicWritePtr(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, pEndpoint);
266 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT);
267 ASMAtomicWriteNullPtr(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
268
269 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
270
271 return rc;
272}
273
274static int pdmacFileAioMgrShutdown(PPDMACEPFILEMGR pAioMgr)
275{
276 int rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
277 AssertRCReturn(rc, rc);
278
279 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN);
280
281 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
282
283 return rc;
284}
285
286int pdmacFileEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
287{
288 PPDMACTASKFILE pNext;
289 do
290 {
291 pNext = pEndpoint->pTasksNewHead;
292 pTask->pNext = pNext;
293 } while (!ASMAtomicCmpXchgPtr(&pEndpoint->pTasksNewHead, pTask, pNext));
294
295 pdmacFileAioMgrWakeup(ASMAtomicReadPtrT(&pEndpoint->pAioMgr, PPDMACEPFILEMGR));
296
297 return VINF_SUCCESS;
298}
299
300static DECLCALLBACK(void) pdmacFileEpTaskCompleted(PPDMACTASKFILE pTask, void *pvUser, int rc)
301{
302 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pvUser;
303
304 LogFlowFunc(("pTask=%#p pvUser=%#p rc=%Rrc\n", pTask, pvUser, rc));
305
306 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
307 pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, rc, true);
308 else
309 {
310 Assert((uint32_t)pTask->DataSeg.cbSeg == pTask->DataSeg.cbSeg && (int32_t)pTask->DataSeg.cbSeg >= 0);
311 uint32_t uOld = ASMAtomicSubS32(&pTaskFile->cbTransferLeft, (int32_t)pTask->DataSeg.cbSeg);
312
313 /* The first error will be returned. */
314 if (RT_FAILURE(rc))
315 ASMAtomicCmpXchgS32(&pTaskFile->rc, rc, VINF_SUCCESS);
316#ifdef VBOX_WITH_DEBUGGER
317 else
318 {
319 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pTaskFile->Core.pEndpoint;
320
321 /* Overwrite with injected error code. */
322 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
323 rc = ASMAtomicXchgS32(&pEpFile->rcReqRead, VINF_SUCCESS);
324 else
325 rc = ASMAtomicXchgS32(&pEpFile->rcReqWrite, VINF_SUCCESS);
326
327 if (RT_FAILURE(rc))
328 ASMAtomicCmpXchgS32(&pTaskFile->rc, rc, VINF_SUCCESS);
329 }
330#endif
331
332 if (!(uOld - pTask->DataSeg.cbSeg)
333 && !ASMAtomicXchgBool(&pTaskFile->fCompleted, true))
334 {
335#ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY
336 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pTaskFile->Core.pEndpoint;
337 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEpFile->Core.pEpClass;
338
339 /* Check if we should delay completion of the request. */
340 if ( ASMAtomicReadU32(&pEpFile->msDelay) > 0
341 && ASMAtomicReadU32(&pEpFile->cReqsDelay) > 0)
342 {
343 uint64_t tsDelay = pEpFile->msDelay;
344
345 if (pEpFile->msJitter)
346 tsDelay = (RTRandU32() % 100) > 50 ? pEpFile->msDelay + (RTRandU32() % pEpFile->msJitter)
347 : pEpFile->msDelay - (RTRandU32() % pEpFile->msJitter);
348 ASMAtomicDecU32(&pEpFile->cReqsDelay);
349
350 /* Arm the delay. */
351 pTaskFile->tsDelayEnd = RTTimeProgramMilliTS() + tsDelay;
352
353 /* Append to the list. */
354 PPDMASYNCCOMPLETIONTASKFILE pHead = NULL;
355 do
356 {
357 pHead = ASMAtomicReadPtrT(&pEpFile->pDelayedHead, PPDMASYNCCOMPLETIONTASKFILE);
358 pTaskFile->pDelayedNext = pHead;
359 } while (!ASMAtomicCmpXchgPtr(&pEpFile->pDelayedHead, pTaskFile, pHead));
360
361 if (tsDelay < pEpClassFile->cMilliesNext)
362 {
363 ASMAtomicWriteU64(&pEpClassFile->cMilliesNext, tsDelay);
364 TMTimerSetMillies(pVM, pEpClassFile->hTimer, tsDelay);
365 }
366
367 LogRel(("AIOMgr: Delaying request %#p for %u ms\n", pTaskFile, tsDelay));
368 }
369 else
370#endif
371 pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, pTaskFile->rc, true);
372 }
373 }
374}
375
376DECLINLINE(void) pdmacFileEpTaskInit(PPDMASYNCCOMPLETIONTASK pTask, size_t cbTransfer)
377{
378 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
379
380 Assert((uint32_t)cbTransfer == cbTransfer && (int32_t)cbTransfer >= 0);
381 ASMAtomicWriteS32(&pTaskFile->cbTransferLeft, (int32_t)cbTransfer);
382 ASMAtomicWriteBool(&pTaskFile->fCompleted, false);
383 ASMAtomicWriteS32(&pTaskFile->rc, VINF_SUCCESS);
384}
385
386int pdmacFileEpTaskInitiate(PPDMASYNCCOMPLETIONTASK pTask,
387 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
388 PCRTSGSEG paSegments, size_t cSegments,
389 size_t cbTransfer, PDMACTASKFILETRANSFER enmTransfer)
390{
391 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
392 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
393
394 Assert( (enmTransfer == PDMACTASKFILETRANSFER_READ)
395 || (enmTransfer == PDMACTASKFILETRANSFER_WRITE));
396
397 for (size_t i = 0; i < cSegments; i++)
398 {
399 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
400 AssertPtr(pIoTask);
401
402 pIoTask->pEndpoint = pEpFile;
403 pIoTask->enmTransferType = enmTransfer;
404 pIoTask->Off = off;
405 pIoTask->DataSeg.cbSeg = paSegments[i].cbSeg;
406 pIoTask->DataSeg.pvSeg = paSegments[i].pvSeg;
407 pIoTask->pvUser = pTaskFile;
408 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
409
410 /* Send it off to the I/O manager. */
411 pdmacFileEpAddTask(pEpFile, pIoTask);
412 off += paSegments[i].cbSeg;
413 cbTransfer -= paSegments[i].cbSeg;
414 }
415
416 AssertMsg(!cbTransfer, ("Incomplete transfer %u bytes left\n", cbTransfer));
417
418 return VINF_AIO_TASK_PENDING;
419}
420
421/**
422 * Creates a new async I/O manager.
423 *
424 * @returns VBox status code.
425 * @param pEpClass Pointer to the endpoint class data.
426 * @param ppAioMgr Where to store the pointer to the new async I/O manager on success.
427 * @param enmMgrType Wanted manager type - can be overwritten by the global override.
428 */
429int pdmacFileAioMgrCreate(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass, PPPDMACEPFILEMGR ppAioMgr,
430 PDMACEPFILEMGRTYPE enmMgrType)
431{
432 LogFlowFunc((": Entered\n"));
433
434 PPDMACEPFILEMGR pAioMgrNew;
435 int rc = MMR3HeapAllocZEx(pEpClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION, sizeof(PDMACEPFILEMGR), (void **)&pAioMgrNew);
436 if (RT_SUCCESS(rc))
437 {
438 if (enmMgrType < pEpClass->enmMgrTypeOverride)
439 pAioMgrNew->enmMgrType = enmMgrType;
440 else
441 pAioMgrNew->enmMgrType = pEpClass->enmMgrTypeOverride;
442
443 pAioMgrNew->msBwLimitExpired = RT_INDEFINITE_WAIT;
444
445 rc = RTSemEventCreate(&pAioMgrNew->EventSem);
446 if (RT_SUCCESS(rc))
447 {
448 rc = RTSemEventCreate(&pAioMgrNew->EventSemBlock);
449 if (RT_SUCCESS(rc))
450 {
451 rc = RTCritSectInit(&pAioMgrNew->CritSectBlockingEvent);
452 if (RT_SUCCESS(rc))
453 {
454 /* Init the rest of the manager. */
455 if (pAioMgrNew->enmMgrType != PDMACEPFILEMGRTYPE_SIMPLE)
456 rc = pdmacFileAioMgrNormalInit(pAioMgrNew);
457
458 if (RT_SUCCESS(rc))
459 {
460 pAioMgrNew->enmState = PDMACEPFILEMGRSTATE_RUNNING;
461
462 rc = RTThreadCreateF(&pAioMgrNew->Thread,
463 pAioMgrNew->enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE
464 ? pdmacFileAioMgrFailsafe
465 : pdmacFileAioMgrNormal,
466 pAioMgrNew,
467 0,
468 RTTHREADTYPE_IO,
469 0,
470 "AioMgr%d-%s", pEpClass->cAioMgrs,
471 pAioMgrNew->enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE
472 ? "F"
473 : "N");
474 if (RT_SUCCESS(rc))
475 {
476 /* Link it into the list. */
477 RTCritSectEnter(&pEpClass->CritSect);
478 pAioMgrNew->pNext = pEpClass->pAioMgrHead;
479 if (pEpClass->pAioMgrHead)
480 pEpClass->pAioMgrHead->pPrev = pAioMgrNew;
481 pEpClass->pAioMgrHead = pAioMgrNew;
482 pEpClass->cAioMgrs++;
483 RTCritSectLeave(&pEpClass->CritSect);
484
485 *ppAioMgr = pAioMgrNew;
486
487 Log(("PDMAC: Successfully created new file AIO Mgr {%s}\n", RTThreadGetName(pAioMgrNew->Thread)));
488 return VINF_SUCCESS;
489 }
490 pdmacFileAioMgrNormalDestroy(pAioMgrNew);
491 }
492 RTCritSectDelete(&pAioMgrNew->CritSectBlockingEvent);
493 }
494 RTSemEventDestroy(pAioMgrNew->EventSem);
495 }
496 RTSemEventDestroy(pAioMgrNew->EventSemBlock);
497 }
498 MMR3HeapFree(pAioMgrNew);
499 }
500
501 LogFlowFunc((": Leave rc=%Rrc\n", rc));
502
503 return rc;
504}
505
506/**
507 * Destroys a async I/O manager.
508 *
509 * @returns nothing.
510 * @param pEpClassFile Pointer to globals for the file endpoint class.
511 * @param pAioMgr The async I/O manager to destroy.
512 */
513static void pdmacFileAioMgrDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile, PPDMACEPFILEMGR pAioMgr)
514{
515 int rc = pdmacFileAioMgrShutdown(pAioMgr);
516 AssertRC(rc);
517
518 /* Unlink from the list. */
519 rc = RTCritSectEnter(&pEpClassFile->CritSect);
520 AssertRC(rc);
521
522 PPDMACEPFILEMGR pPrev = pAioMgr->pPrev;
523 PPDMACEPFILEMGR pNext = pAioMgr->pNext;
524
525 if (pPrev)
526 pPrev->pNext = pNext;
527 else
528 pEpClassFile->pAioMgrHead = pNext;
529
530 if (pNext)
531 pNext->pPrev = pPrev;
532
533 pEpClassFile->cAioMgrs--;
534 rc = RTCritSectLeave(&pEpClassFile->CritSect);
535 AssertRC(rc);
536
537 /* Free the resources. */
538 RTCritSectDelete(&pAioMgr->CritSectBlockingEvent);
539 RTSemEventDestroy(pAioMgr->EventSem);
540 RTSemEventDestroy(pAioMgr->EventSemBlock);
541 if (pAioMgr->enmMgrType != PDMACEPFILEMGRTYPE_SIMPLE)
542 pdmacFileAioMgrNormalDestroy(pAioMgr);
543
544 MMR3HeapFree(pAioMgr);
545}
546
547static int pdmacFileMgrTypeFromName(const char *pszVal, PPDMACEPFILEMGRTYPE penmMgrType)
548{
549 int rc = VINF_SUCCESS;
550
551 if (!RTStrCmp(pszVal, "Simple"))
552 *penmMgrType = PDMACEPFILEMGRTYPE_SIMPLE;
553 else if (!RTStrCmp(pszVal, "Async"))
554 *penmMgrType = PDMACEPFILEMGRTYPE_ASYNC;
555 else
556 rc = VERR_CFGM_CONFIG_UNKNOWN_VALUE;
557
558 return rc;
559}
560
561static const char *pdmacFileMgrTypeToName(PDMACEPFILEMGRTYPE enmMgrType)
562{
563 if (enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE)
564 return "Simple";
565 if (enmMgrType == PDMACEPFILEMGRTYPE_ASYNC)
566 return "Async";
567
568 return NULL;
569}
570
571static int pdmacFileBackendTypeFromName(const char *pszVal, PPDMACFILEEPBACKEND penmBackendType)
572{
573 int rc = VINF_SUCCESS;
574
575 if (!RTStrCmp(pszVal, "Buffered"))
576 *penmBackendType = PDMACFILEEPBACKEND_BUFFERED;
577 else if (!RTStrCmp(pszVal, "NonBuffered"))
578 *penmBackendType = PDMACFILEEPBACKEND_NON_BUFFERED;
579 else
580 rc = VERR_CFGM_CONFIG_UNKNOWN_VALUE;
581
582 return rc;
583}
584
585static const char *pdmacFileBackendTypeToName(PDMACFILEEPBACKEND enmBackendType)
586{
587 if (enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
588 return "Buffered";
589 if (enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
590 return "NonBuffered";
591
592 return NULL;
593}
594
595#ifdef VBOX_WITH_DEBUGGER
596
597/**
598 * @callback_method_impl{FNDBGCCMD, The '.injecterror' command.}
599 */
600static DECLCALLBACK(int) pdmacEpFileErrorInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR pArgs, unsigned cArgs)
601{
602 /*
603 * Validate input.
604 */
605 DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM);
606 DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, -1, cArgs == 3);
607 DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 0, pArgs[0].enmType == DBGCVAR_TYPE_STRING);
608 DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 1, pArgs[1].enmType == DBGCVAR_TYPE_STRING);
609 DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 2, pArgs[2].enmType == DBGCVAR_TYPE_NUMBER);
610
611 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile;
612 pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pUVM->pdm.s.apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_FILE];
613
614 /* Syntax is "read|write <filename> <status code>" */
615 bool fWrite;
616 if (!RTStrCmp(pArgs[0].u.pszString, "read"))
617 fWrite = false;
618 else if (!RTStrCmp(pArgs[0].u.pszString, "write"))
619 fWrite = true;
620 else
621 return DBGCCmdHlpFail(pCmdHlp, pCmd, "invalid transfer direction '%s'", pArgs[0].u.pszString);
622
623 int32_t rcToInject = (int32_t)pArgs[2].u.u64Number;
624 if ((uint64_t)rcToInject != pArgs[2].u.u64Number)
625 return DBGCCmdHlpFail(pCmdHlp, pCmd, "The status code '%lld' is out of range", pArgs[0].u.u64Number);
626
627 /*
628 * Search for the matching endpoint.
629 */
630 RTCritSectEnter(&pEpClassFile->Core.CritSect);
631
632 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpClassFile->Core.pEndpointsHead;
633 while (pEpFile)
634 {
635 if (!RTStrCmp(pArgs[1].u.pszString, RTPathFilename(pEpFile->Core.pszUri)))
636 break;
637 pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpFile->Core.pNext;
638 }
639
640 if (pEpFile)
641 {
642 /*
643 * Do the job.
644 */
645 if (fWrite)
646 ASMAtomicXchgS32(&pEpFile->rcReqWrite, rcToInject);
647 else
648 ASMAtomicXchgS32(&pEpFile->rcReqRead, rcToInject);
649
650 DBGCCmdHlpPrintf(pCmdHlp, "Injected %Rrc into '%s' for %s\n",
651 (int)rcToInject, pArgs[1].u.pszString, pArgs[0].u.pszString);
652 }
653
654 RTCritSectLeave(&pEpClassFile->Core.CritSect);
655
656 if (!pEpFile)
657 return DBGCCmdHlpFail(pCmdHlp, pCmd, "No file with name '%s' found", pArgs[1].u.pszString);
658 return VINF_SUCCESS;
659}
660
661# ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY
662/**
663 * @callback_method_impl{FNDBGCCMD, The '.injectdelay' command.}
664 */
665static DECLCALLBACK(int) pdmacEpFileDelayInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR pArgs, unsigned cArgs)
666{
667 /*
668 * Validate input.
669 */
670 DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM);
671 DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, -1, cArgs >= 3);
672 DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 0, pArgs[0].enmType == DBGCVAR_TYPE_STRING);
673 DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 1, pArgs[1].enmType == DBGCVAR_TYPE_STRING);
674 DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 2, pArgs[2].enmType == DBGCVAR_TYPE_NUMBER);
675
676 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile;
677 pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pUVM->pdm.s.apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_FILE];
678
679 /* Syntax is "read|write|flush|any <filename> <delay> [reqs]" */
680 PDMACFILEREQTYPEDELAY enmDelayType = PDMACFILEREQTYPEDELAY_ANY;
681 if (!RTStrCmp(pArgs[0].u.pszString, "read"))
682 enmDelayType = PDMACFILEREQTYPEDELAY_READ;
683 else if (!RTStrCmp(pArgs[0].u.pszString, "write"))
684 enmDelayType = PDMACFILEREQTYPEDELAY_WRITE;
685 else if (!RTStrCmp(pArgs[0].u.pszString, "flush"))
686 enmDelayType = PDMACFILEREQTYPEDELAY_FLUSH;
687 else if (!RTStrCmp(pArgs[0].u.pszString, "any"))
688 enmDelayType = PDMACFILEREQTYPEDELAY_ANY;
689 else
690 return DBGCCmdHlpFail(pCmdHlp, pCmd, "invalid transfer direction '%s'", pArgs[0].u.pszString);
691
692 uint32_t msDelay = (uint32_t)pArgs[2].u.u64Number;
693 if ((uint64_t)msDelay != pArgs[2].u.u64Number)
694 return DBGCCmdHlpFail(pCmdHlp, pCmd, "The delay '%lld' is out of range", pArgs[0].u.u64Number);
695
696 uint32_t cReqsDelay = 1;
697 uint32_t msJitter = 0;
698 if (cArgs >= 4)
699 msJitter = (uint32_t)pArgs[3].u.u64Number;
700 if (cArgs == 5)
701 cReqsDelay = (uint32_t)pArgs[4].u.u64Number;
702
703 /*
704 * Search for the matching endpoint.
705 */
706 RTCritSectEnter(&pEpClassFile->Core.CritSect);
707
708 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpClassFile->Core.pEndpointsHead;
709 while (pEpFile)
710 {
711 if (!RTStrCmp(pArgs[1].u.pszString, RTPathFilename(pEpFile->Core.pszUri)))
712 break;
713 pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpFile->Core.pNext;
714 }
715
716 if (pEpFile)
717 {
718 ASMAtomicWriteSize(&pEpFile->enmTypeDelay, enmDelayType);
719 ASMAtomicWriteU32(&pEpFile->msDelay, msDelay);
720 ASMAtomicWriteU32(&pEpFile->msJitter, msJitter);
721 ASMAtomicWriteU32(&pEpFile->cReqsDelay, cReqsDelay);
722
723 DBGCCmdHlpPrintf(pCmdHlp, "Injected delay for the next %u requests of %u ms into '%s' for %s\n",
724 cReqsDelay, msDelay, pArgs[1].u.pszString, pArgs[0].u.pszString);
725 }
726
727 RTCritSectLeave(&pEpClassFile->Core.CritSect);
728
729 if (!pEpFile)
730 return DBGCCmdHlpFail(pCmdHlp, pCmd, "No file with name '%s' found", pArgs[1].u.pszString);
731 return VINF_SUCCESS;
732}
733
734/**
735 * @callback_method_impl{FNTMTIMERINT, }
736 */
737static DECLCALLBACK(void) pdmacR3TimerCallback(PVM pVM, TMTIMERHANDLE hTimer, void *pvUser)
738{
739 Assert(hTimer == pEpClassFile->hTimer);
740 uint64_t tsCur = RTTimeProgramMilliTS();
741 uint64_t cMilliesNext = UINT64_MAX;
742 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pvUser;
743
744 ASMAtomicWriteU64(&pEpClassFile->cMilliesNext, UINT64_MAX);
745
746 /* Go through all endpoints and check for expired requests. */
747 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpClassFile->Core.pEndpointsHead;
748
749 while (pEpFile)
750 {
751 /* Check for an expired delay. */
752 if (pEpFile->pDelayedHead != NULL)
753 {
754 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = ASMAtomicXchgPtrT(&pEpFile->pDelayedHead, NULL, PPDMASYNCCOMPLETIONTASKFILE);
755
756 while (pTaskFile)
757 {
758 PPDMASYNCCOMPLETIONTASKFILE pTmp = pTaskFile;
759 pTaskFile = pTaskFile->pDelayedNext;
760
761 if (tsCur >= pTmp->tsDelayEnd)
762 {
763 LogRel(("AIOMgr: Delayed request %#p completed\n", pTmp));
764 pdmR3AsyncCompletionCompleteTask(&pTmp->Core, pTmp->rc, true);
765 }
766 else
767 {
768 /* Prepend to the delayed list again. */
769 PPDMASYNCCOMPLETIONTASKFILE pHead = NULL;
770
771 if (pTmp->tsDelayEnd - tsCur < cMilliesNext)
772 cMilliesNext = pTmp->tsDelayEnd - tsCur;
773
774 do
775 {
776 pHead = ASMAtomicReadPtrT(&pEpFile->pDelayedHead, PPDMASYNCCOMPLETIONTASKFILE);
777 pTmp->pDelayedNext = pHead;
778 } while (!ASMAtomicCmpXchgPtr(&pEpFile->pDelayedHead, pTmp, pHead));
779 }
780 }
781 }
782
783 pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpFile->Core.pNext;
784 }
785
786 if (cMilliesNext < pEpClassFile->cMilliesNext)
787 {
788 ASMAtomicWriteU64(&pEpClassFile->cMilliesNext, cMilliesNext);
789 TMTimerSetMillies(pVM, hTimer, cMilliesNext);
790 }
791}
792
793# endif /* PDM_ASYNC_COMPLETION_FILE_WITH_DELAY */
794
795#endif /* VBOX_WITH_DEBUGGER */
796
797static DECLCALLBACK(int) pdmacFileInitialize(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals, PCFGMNODE pCfgNode)
798{
799 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
800 RTFILEAIOLIMITS AioLimits; /** < Async I/O limitations. */
801
802 int rc = RTFileAioGetLimits(&AioLimits);
803#ifdef DEBUG
804 if (RT_SUCCESS(rc) && RTEnvExist("VBOX_ASYNC_IO_FAILBACK"))
805 rc = VERR_ENV_VAR_NOT_FOUND;
806#endif
807 if (RT_FAILURE(rc))
808 {
809 LogRel(("AIO: Async I/O manager not supported (rc=%Rrc). Falling back to simple manager\n", rc));
810 pEpClassFile->enmMgrTypeOverride = PDMACEPFILEMGRTYPE_SIMPLE;
811 pEpClassFile->enmEpBackendDefault = PDMACFILEEPBACKEND_BUFFERED;
812 }
813 else
814 {
815 pEpClassFile->uBitmaskAlignment = AioLimits.cbBufferAlignment ? ~((RTR3UINTPTR)AioLimits.cbBufferAlignment - 1) : RTR3UINTPTR_MAX;
816 pEpClassFile->cReqsOutstandingMax = AioLimits.cReqsOutstandingMax;
817
818 if (pCfgNode)
819 {
820 /* Query the default manager type */
821 char *pszVal = NULL;
822 rc = CFGMR3QueryStringAllocDef(pCfgNode, "IoMgr", &pszVal, "Async");
823 AssertLogRelRCReturn(rc, rc);
824
825 rc = pdmacFileMgrTypeFromName(pszVal, &pEpClassFile->enmMgrTypeOverride);
826 MMR3HeapFree(pszVal);
827 if (RT_FAILURE(rc))
828 return rc;
829
830 LogRel(("AIOMgr: Default manager type is '%s'\n", pdmacFileMgrTypeToName(pEpClassFile->enmMgrTypeOverride)));
831
832 /* Query default backend type */
833 rc = CFGMR3QueryStringAllocDef(pCfgNode, "FileBackend", &pszVal, "NonBuffered");
834 AssertLogRelRCReturn(rc, rc);
835
836 rc = pdmacFileBackendTypeFromName(pszVal, &pEpClassFile->enmEpBackendDefault);
837 MMR3HeapFree(pszVal);
838 if (RT_FAILURE(rc))
839 return rc;
840
841 LogRel(("AIOMgr: Default file backend is '%s'\n", pdmacFileBackendTypeToName(pEpClassFile->enmEpBackendDefault)));
842
843#ifdef RT_OS_LINUX
844 if ( pEpClassFile->enmMgrTypeOverride == PDMACEPFILEMGRTYPE_ASYNC
845 && pEpClassFile->enmEpBackendDefault == PDMACFILEEPBACKEND_BUFFERED)
846 {
847 LogRel(("AIOMgr: Linux does not support buffered async I/O, changing to non buffered\n"));
848 pEpClassFile->enmEpBackendDefault = PDMACFILEEPBACKEND_NON_BUFFERED;
849 }
850#endif
851 }
852 else
853 {
854 /* No configuration supplied, set defaults */
855 pEpClassFile->enmEpBackendDefault = PDMACFILEEPBACKEND_NON_BUFFERED;
856 pEpClassFile->enmMgrTypeOverride = PDMACEPFILEMGRTYPE_ASYNC;
857 }
858 }
859
860 /* Init critical section. */
861 rc = RTCritSectInit(&pEpClassFile->CritSect);
862
863#ifdef VBOX_WITH_DEBUGGER
864 /* Install the error injection handler. */
865 if (RT_SUCCESS(rc))
866 {
867 rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds));
868 AssertRC(rc);
869 }
870
871# ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY
872 rc = TMR3TimerCreate(pEpClassFile->Core.pVM, TMCLOCK_REAL, pdmacR3TimerCallback, pEpClassFile,
873 TMTIMER_FLAGS_NO_RING0, "AC Delay", &pEpClassFile->hTimer);
874 AssertRC(rc);
875 pEpClassFile->cMilliesNext = UINT64_MAX;
876# endif
877#endif
878
879 return rc;
880}
881
882static DECLCALLBACK(void) pdmacFileTerminate(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals)
883{
884 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
885
886 /* All endpoints should be closed at this point. */
887 AssertMsg(!pEpClassFile->Core.pEndpointsHead, ("There are still endpoints left\n"));
888
889 /* Destroy all left async I/O managers. */
890 while (pEpClassFile->pAioMgrHead)
891 pdmacFileAioMgrDestroy(pEpClassFile, pEpClassFile->pAioMgrHead);
892
893 RTCritSectDelete(&pEpClassFile->CritSect);
894}
895
896static DECLCALLBACK(int) pdmacFileEpInitialize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint,
897 const char *pszUri, uint32_t fFlags)
898{
899 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
900 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
901 PDMACEPFILEMGRTYPE enmMgrType = pEpClassFile->enmMgrTypeOverride;
902 PDMACFILEEPBACKEND enmEpBackend = pEpClassFile->enmEpBackendDefault;
903
904 AssertMsgReturn((fFlags & ~(PDMACEP_FILE_FLAGS_READ_ONLY | PDMACEP_FILE_FLAGS_DONT_LOCK | PDMACEP_FILE_FLAGS_HOST_CACHE_ENABLED)) == 0,
905 ("PDMAsyncCompletion: Invalid flag specified\n"), VERR_INVALID_PARAMETER);
906
907 unsigned fFileFlags = RTFILE_O_OPEN;
908
909 /*
910 * Revert to the simple manager and the buffered backend if
911 * the host cache should be enabled.
912 */
913 if (fFlags & PDMACEP_FILE_FLAGS_HOST_CACHE_ENABLED)
914 {
915 enmMgrType = PDMACEPFILEMGRTYPE_SIMPLE;
916 enmEpBackend = PDMACFILEEPBACKEND_BUFFERED;
917 }
918
919 if (fFlags & PDMACEP_FILE_FLAGS_READ_ONLY)
920 fFileFlags |= RTFILE_O_READ | RTFILE_O_DENY_NONE;
921 else
922 {
923 fFileFlags |= RTFILE_O_READWRITE;
924
925 /*
926 * Opened in read/write mode. Check whether the caller wants to
927 * avoid the lock. Return an error in case caching is enabled
928 * because this can lead to data corruption.
929 */
930 if (fFlags & PDMACEP_FILE_FLAGS_DONT_LOCK)
931 fFileFlags |= RTFILE_O_DENY_NONE;
932 else
933 fFileFlags |= RTFILE_O_DENY_WRITE;
934 }
935
936 if (enmMgrType == PDMACEPFILEMGRTYPE_ASYNC)
937 fFileFlags |= RTFILE_O_ASYNC_IO;
938
939 int rc;
940 if (enmEpBackend == PDMACFILEEPBACKEND_NON_BUFFERED)
941 {
942 /*
943 * We only disable the cache if the size of the file is a multiple of 512.
944 * Certain hosts like Windows, Linux and Solaris require that transfer sizes
945 * are aligned to the volume sector size.
946 * If not we just make sure that the data is written to disk with RTFILE_O_WRITE_THROUGH
947 * which will trash the host cache but ensures that the host cache will not
948 * contain dirty buffers.
949 */
950 RTFILE hFile;
951 rc = RTFileOpen(&hFile, pszUri, RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE);
952 if (RT_SUCCESS(rc))
953 {
954 uint64_t cbSize;
955
956 rc = RTFileQuerySize(hFile, &cbSize);
957
958 if (RT_SUCCESS(rc) && ((cbSize % 512) == 0))
959 fFileFlags |= RTFILE_O_NO_CACHE;
960 else
961 {
962 /* Downgrade to the buffered backend */
963 enmEpBackend = PDMACFILEEPBACKEND_BUFFERED;
964
965#ifdef RT_OS_LINUX
966 fFileFlags &= ~RTFILE_O_ASYNC_IO;
967 enmMgrType = PDMACEPFILEMGRTYPE_SIMPLE;
968#endif
969 }
970 RTFileClose(hFile);
971 }
972 }
973
974 /* Open with final flags. */
975 rc = RTFileOpen(&pEpFile->hFile, pszUri, fFileFlags);
976 if ( rc == VERR_INVALID_FUNCTION
977 || rc == VERR_INVALID_PARAMETER)
978 {
979 LogRel(("AIOMgr: pdmacFileEpInitialize: RTFileOpen %s / %08x failed with %Rrc\n",
980 pszUri, fFileFlags, rc));
981 /*
982 * Solaris doesn't support directio on ZFS so far. :-\
983 * Trying to enable it returns VERR_INVALID_FUNCTION
984 * (ENOTTY). Remove it and hope for the best.
985 * ZFS supports write throttling in case applications
986 * write more data than can be synced to the disk
987 * without blocking the whole application.
988 *
989 * On Linux we have the same problem with cifs.
990 * Have to disable async I/O here too because it requires O_DIRECT.
991 */
992 fFileFlags &= ~RTFILE_O_NO_CACHE;
993 enmEpBackend = PDMACFILEEPBACKEND_BUFFERED;
994
995#ifdef RT_OS_LINUX
996 fFileFlags &= ~RTFILE_O_ASYNC_IO;
997 enmMgrType = PDMACEPFILEMGRTYPE_SIMPLE;
998#endif
999
1000 /* Open again. */
1001 rc = RTFileOpen(&pEpFile->hFile, pszUri, fFileFlags);
1002
1003 if (RT_FAILURE(rc))
1004 {
1005 LogRel(("AIOMgr: pdmacFileEpInitialize: RTFileOpen %s / %08x failed AGAIN(!) with %Rrc\n",
1006 pszUri, fFileFlags, rc));
1007 }
1008 }
1009
1010 if (RT_SUCCESS(rc))
1011 {
1012 pEpFile->fFlags = fFileFlags;
1013
1014 rc = RTFileQuerySize(pEpFile->hFile, (uint64_t *)&pEpFile->cbFile);
1015 if (RT_SUCCESS(rc))
1016 {
1017 /* Initialize the segment cache */
1018 rc = MMR3HeapAllocZEx(pEpClassFile->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
1019 sizeof(PDMACTASKFILE),
1020 (void **)&pEpFile->pTasksFreeHead);
1021 if (RT_SUCCESS(rc))
1022 {
1023 PPDMACEPFILEMGR pAioMgr = NULL;
1024
1025 pEpFile->pTasksFreeTail = pEpFile->pTasksFreeHead;
1026 pEpFile->cTasksCached = 0;
1027 pEpFile->enmBackendType = enmEpBackend;
1028 /*
1029 * Disable async flushes on Solaris for now.
1030 * They cause weird hangs which needs more investigations.
1031 */
1032#ifndef RT_OS_SOLARIS
1033 pEpFile->fAsyncFlushSupported = true;
1034#else
1035 pEpFile->fAsyncFlushSupported = false;
1036#endif
1037
1038 if (enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE)
1039 {
1040 /* Simple mode. Every file has its own async I/O manager. */
1041 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, PDMACEPFILEMGRTYPE_SIMPLE);
1042 }
1043 else
1044 {
1045 pAioMgr = pEpClassFile->pAioMgrHead;
1046
1047 /* Check for an idling manager of the same type */
1048 while (pAioMgr)
1049 {
1050 if (pAioMgr->enmMgrType == enmMgrType)
1051 break;
1052 pAioMgr = pAioMgr->pNext;
1053 }
1054
1055 if (!pAioMgr)
1056 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, enmMgrType);
1057 }
1058
1059 if (RT_SUCCESS(rc))
1060 {
1061 pEpFile->AioMgr.pTreeRangesLocked = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1062 if (!pEpFile->AioMgr.pTreeRangesLocked)
1063 rc = VERR_NO_MEMORY;
1064 else
1065 {
1066 pEpFile->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1067
1068 /* Assign the endpoint to the thread. */
1069 rc = pdmacFileAioMgrAddEndpoint(pAioMgr, pEpFile);
1070 if (RT_FAILURE(rc))
1071 {
1072 RTMemFree(pEpFile->AioMgr.pTreeRangesLocked);
1073 MMR3HeapFree(pEpFile->pTasksFreeHead);
1074 }
1075 }
1076 }
1077 else if (rc == VERR_FILE_AIO_INSUFFICIENT_EVENTS)
1078 {
1079 PUVM pUVM = VMR3GetUVM(pEpClassFile->Core.pVM);
1080#if defined(RT_OS_LINUX)
1081 rc = VMR3SetError(pUVM, rc, RT_SRC_POS,
1082 N_("Failed to create I/O manager for VM due to insufficient resources on the host. "
1083 "Either increase the amount of allowed events in /proc/sys/fs/aio-max-nr or enable "
1084 "the host I/O cache"));
1085#else
1086 rc = VMR3SetError(pUVM, rc, RT_SRC_POS,
1087 N_("Failed to create I/O manager for VM due to insufficient resources on the host. "
1088 "Enable the host I/O cache"));
1089#endif
1090 }
1091 else
1092 {
1093 PUVM pUVM = VMR3GetUVM(pEpClassFile->Core.pVM);
1094 rc = VMR3SetError(pUVM, rc, RT_SRC_POS,
1095 N_("Failed to create I/O manager for VM due to an unknown error"));
1096 }
1097 }
1098 }
1099
1100 if (RT_FAILURE(rc))
1101 RTFileClose(pEpFile->hFile);
1102 }
1103
1104#ifdef VBOX_WITH_STATISTICS
1105 if (RT_SUCCESS(rc))
1106 {
1107 STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatRead,
1108 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1109 STAMUNIT_TICKS_PER_CALL, "Time taken to read from the endpoint",
1110 "/PDM/AsyncCompletion/File/%s/%d/Read", RTPathFilename(pEpFile->Core.pszUri), pEpFile->Core.iStatId);
1111
1112 STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatWrite,
1113 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1114 STAMUNIT_TICKS_PER_CALL, "Time taken to write to the endpoint",
1115 "/PDM/AsyncCompletion/File/%s/%d/Write", RTPathFilename(pEpFile->Core.pszUri), pEpFile->Core.iStatId);
1116 }
1117#endif
1118
1119 if (RT_SUCCESS(rc))
1120 LogRel(("AIOMgr: Endpoint for file '%s' (flags %08x) created successfully\n", pszUri, pEpFile->fFlags));
1121
1122 return rc;
1123}
1124
1125static DECLCALLBACK(int) pdmacFileEpRangesLockedDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
1126{
1127 NOREF(pNode); NOREF(pvUser);
1128 AssertMsgFailed(("The locked ranges tree should be empty at that point\n"));
1129 return VINF_SUCCESS;
1130}
1131
1132static DECLCALLBACK(int) pdmacFileEpClose(PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
1133{
1134 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1135 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
1136
1137 /* Make sure that all tasks finished for this endpoint. */
1138 int rc = pdmacFileAioMgrCloseEndpoint(pEpFile->pAioMgr, pEpFile);
1139 AssertRC(rc);
1140
1141 /*
1142 * If the async I/O manager is in failsafe mode this is the only endpoint
1143 * he processes and thus can be destroyed now.
1144 */
1145 if (pEpFile->pAioMgr->enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE)
1146 pdmacFileAioMgrDestroy(pEpClassFile, pEpFile->pAioMgr);
1147
1148 /* Free cached tasks. */
1149 PPDMACTASKFILE pTask = pEpFile->pTasksFreeHead;
1150
1151 while (pTask)
1152 {
1153 PPDMACTASKFILE pTaskFree = pTask;
1154 pTask = pTask->pNext;
1155 MMR3HeapFree(pTaskFree);
1156 }
1157
1158 /* Destroy the locked ranges tree now. */
1159 RTAvlrFileOffsetDestroy(pEpFile->AioMgr.pTreeRangesLocked, pdmacFileEpRangesLockedDestroy, NULL);
1160 RTMemFree(pEpFile->AioMgr.pTreeRangesLocked);
1161 pEpFile->AioMgr.pTreeRangesLocked = NULL;
1162
1163 RTFileClose(pEpFile->hFile);
1164
1165#ifdef VBOX_WITH_STATISTICS
1166 /* Not sure if this might be unnecessary because of similar statement in pdmR3AsyncCompletionStatisticsDeregister? */
1167 STAMR3DeregisterF(pEpClassFile->Core.pVM->pUVM, "/PDM/AsyncCompletion/File/%s/*", RTPathFilename(pEpFile->Core.pszUri));
1168#endif
1169
1170 return VINF_SUCCESS;
1171}
1172
1173static DECLCALLBACK(int) pdmacFileEpRead(PPDMASYNCCOMPLETIONTASK pTask,
1174 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
1175 PCRTSGSEG paSegments, size_t cSegments,
1176 size_t cbRead)
1177{
1178 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1179
1180 LogFlowFunc(("pTask=%#p pEndpoint=%#p off=%RTfoff paSegments=%#p cSegments=%zu cbRead=%zu\n",
1181 pTask, pEndpoint, off, paSegments, cSegments, cbRead));
1182
1183 if (RT_UNLIKELY((uint64_t)off + cbRead > pEpFile->cbFile))
1184 return VERR_EOF;
1185
1186 STAM_PROFILE_ADV_START(&pEpFile->StatRead, Read);
1187 pdmacFileEpTaskInit(pTask, cbRead);
1188 int rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbRead,
1189 PDMACTASKFILETRANSFER_READ);
1190 STAM_PROFILE_ADV_STOP(&pEpFile->StatRead, Read);
1191
1192 return rc;
1193}
1194
1195static DECLCALLBACK(int) pdmacFileEpWrite(PPDMASYNCCOMPLETIONTASK pTask,
1196 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
1197 PCRTSGSEG paSegments, size_t cSegments,
1198 size_t cbWrite)
1199{
1200 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1201
1202 if (RT_UNLIKELY(pEpFile->fReadonly))
1203 return VERR_NOT_SUPPORTED;
1204
1205 STAM_PROFILE_ADV_START(&pEpFile->StatWrite, Write);
1206
1207 pdmacFileEpTaskInit(pTask, cbWrite);
1208
1209 int rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbWrite,
1210 PDMACTASKFILETRANSFER_WRITE);
1211
1212 STAM_PROFILE_ADV_STOP(&pEpFile->StatWrite, Write);
1213
1214 return rc;
1215}
1216
1217static DECLCALLBACK(int) pdmacFileEpFlush(PPDMASYNCCOMPLETIONTASK pTask,
1218 PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
1219{
1220 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1221 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
1222
1223 if (RT_UNLIKELY(pEpFile->fReadonly))
1224 return VERR_NOT_SUPPORTED;
1225
1226 pdmacFileEpTaskInit(pTask, 0);
1227
1228 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
1229 if (RT_UNLIKELY(!pIoTask))
1230 return VERR_NO_MEMORY;
1231
1232 pIoTask->pEndpoint = pEpFile;
1233 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_FLUSH;
1234 pIoTask->pvUser = pTaskFile;
1235 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1236 pdmacFileEpAddTask(pEpFile, pIoTask);
1237
1238 return VINF_AIO_TASK_PENDING;
1239}
1240
1241static DECLCALLBACK(int) pdmacFileEpGetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t *pcbSize)
1242{
1243 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1244
1245 *pcbSize = ASMAtomicReadU64(&pEpFile->cbFile);
1246
1247 return VINF_SUCCESS;
1248}
1249
1250static DECLCALLBACK(int) pdmacFileEpSetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t cbSize)
1251{
1252 int rc;
1253 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1254
1255 rc = RTFileSetSize(pEpFile->hFile, cbSize);
1256 if (RT_SUCCESS(rc))
1257 ASMAtomicWriteU64(&pEpFile->cbFile, cbSize);
1258
1259 return rc;
1260}
1261
1262const PDMASYNCCOMPLETIONEPCLASSOPS g_PDMAsyncCompletionEndpointClassFile =
1263{
1264 /* u32Version */
1265 PDMAC_EPCLASS_OPS_VERSION,
1266 /* pcszName */
1267 "File",
1268 /* enmClassType */
1269 PDMASYNCCOMPLETIONEPCLASSTYPE_FILE,
1270 /* cbEndpointClassGlobal */
1271 sizeof(PDMASYNCCOMPLETIONEPCLASSFILE),
1272 /* cbEndpoint */
1273 sizeof(PDMASYNCCOMPLETIONENDPOINTFILE),
1274 /* cbTask */
1275 sizeof(PDMASYNCCOMPLETIONTASKFILE),
1276 /* pfnInitialize */
1277 pdmacFileInitialize,
1278 /* pfnTerminate */
1279 pdmacFileTerminate,
1280 /* pfnEpInitialize. */
1281 pdmacFileEpInitialize,
1282 /* pfnEpClose */
1283 pdmacFileEpClose,
1284 /* pfnEpRead */
1285 pdmacFileEpRead,
1286 /* pfnEpWrite */
1287 pdmacFileEpWrite,
1288 /* pfnEpFlush */
1289 pdmacFileEpFlush,
1290 /* pfnEpGetSize */
1291 pdmacFileEpGetSize,
1292 /* pfnEpSetSize */
1293 pdmacFileEpSetSize,
1294 /* u32VersionEnd */
1295 PDMAC_EPCLASS_OPS_VERSION
1296};
1297
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette