VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp@ 43858

Last change on this file since 43858 was 43858, checked in by vboxsync, 12 years ago

AsyncCompletion: Log if async flushes are disabled and fix endless loop if a request is not aligned to a 512 byte boundary

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 65.0 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 43858 2012-11-12 16:14:29Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Async File I/O manager.
4 */
5
6/*
7 * Copyright (C) 2006-2011 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
22#include <iprt/types.h>
23#include <iprt/asm.h>
24#include <iprt/file.h>
25#include <iprt/mem.h>
26#include <iprt/string.h>
27#include <iprt/assert.h>
28#include <VBox/log.h>
29
30#include "PDMAsyncCompletionFileInternal.h"
31
32/** The update period for the I/O load statistics in ms. */
33#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
34/** Maximum number of requests a manager will handle. */
35#define PDMACEPFILEMGR_REQS_STEP 512
36
37
38/*******************************************************************************
39* Internal functions *
40*******************************************************************************/
41static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
42 PPDMACEPFILEMGR pAioMgr,
43 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
44
45static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
46 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
47 PPDMACFILERANGELOCK pRangeLock);
48
49static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
50 int rc, size_t cbTransfered);
51
52
53int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
54{
55 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
56
57 int rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
58 if (rc == VERR_OUT_OF_RANGE)
59 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax);
60
61 if (RT_SUCCESS(rc))
62 {
63 /* Initialize request handle array. */
64 pAioMgr->iFreeEntry = 0;
65 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
66 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
67
68 if (pAioMgr->pahReqsFree)
69 {
70 /* Create the range lock memcache. */
71 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
72 0, UINT32_MAX, NULL, NULL, NULL, 0);
73 if (RT_SUCCESS(rc))
74 return VINF_SUCCESS;
75
76 RTMemFree(pAioMgr->pahReqsFree);
77 }
78 else
79 {
80 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
81 rc = VERR_NO_MEMORY;
82 }
83 }
84
85 return rc;
86}
87
88void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
89{
90 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
91
92 while (pAioMgr->iFreeEntry > 0)
93 {
94 pAioMgr->iFreeEntry--;
95 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
96 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
97 }
98
99 RTMemFree(pAioMgr->pahReqsFree);
100 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
101}
102
103#if 0 /* currently unused */
104/**
105 * Sorts the endpoint list with insertion sort.
106 */
107static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
108{
109 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
110
111 pEpPrev = pAioMgr->pEndpointsHead;
112 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
113
114 while (pEpCurr)
115 {
116 /* Remember the next element to sort because the list might change. */
117 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
118
119 /* Unlink the current element from the list. */
120 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
121 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
122
123 if (pPrev)
124 pPrev->AioMgr.pEndpointNext = pNext;
125 else
126 pAioMgr->pEndpointsHead = pNext;
127
128 if (pNext)
129 pNext->AioMgr.pEndpointPrev = pPrev;
130
131 /* Go back until we reached the place to insert the current endpoint into. */
132 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
133 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
134
135 /* Link the endpoint into the list. */
136 if (pEpPrev)
137 pNext = pEpPrev->AioMgr.pEndpointNext;
138 else
139 pNext = pAioMgr->pEndpointsHead;
140
141 pEpCurr->AioMgr.pEndpointNext = pNext;
142 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
143
144 if (pNext)
145 pNext->AioMgr.pEndpointPrev = pEpCurr;
146
147 if (pEpPrev)
148 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
149 else
150 pAioMgr->pEndpointsHead = pEpCurr;
151
152 pEpCurr = pEpNextToSort;
153 }
154
155#ifdef DEBUG
156 /* Validate sorting algorithm */
157 unsigned cEndpoints = 0;
158 pEpCurr = pAioMgr->pEndpointsHead;
159
160 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
161 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
162
163 while (pEpCurr)
164 {
165 cEndpoints++;
166
167 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
168 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
169
170 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
171 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
172
173 pEpCurr = pNext;
174 }
175
176 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
177
178#endif
179}
180#endif /* currently unused */
181
182/**
183 * Removes an endpoint from the currently assigned manager.
184 *
185 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
186 * FALSE otherwise.
187 * @param pEndpointRemove The endpoint to remove.
188 */
189static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
190{
191 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
192 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
193 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
194
195 pAioMgr->cEndpoints--;
196
197 if (pPrev)
198 pPrev->AioMgr.pEndpointNext = pNext;
199 else
200 pAioMgr->pEndpointsHead = pNext;
201
202 if (pNext)
203 pNext->AioMgr.pEndpointPrev = pPrev;
204
205 /* Make sure that there is no request pending on this manager for the endpoint. */
206 if (!pEndpointRemove->AioMgr.cRequestsActive)
207 {
208 Assert(!pEndpointRemove->pFlushReq);
209
210 /* Reopen the file so that the new endpoint can re-associate with the file */
211 RTFileClose(pEndpointRemove->hFile);
212 int rc = RTFileOpen(&pEndpointRemove->hFile, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
213 AssertRC(rc);
214 return false;
215 }
216
217 return true;
218}
219
220#if 0 /* currently unused */
221
222static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
223{
224 /* Balancing doesn't make sense with only one endpoint. */
225 if (pAioMgr->cEndpoints == 1)
226 return false;
227
228 /* Doesn't make sens to move endpoints if only one produces the whole load */
229 unsigned cEndpointsWithLoad = 0;
230
231 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
232
233 while (pCurr)
234 {
235 if (pCurr->AioMgr.cReqsPerSec)
236 cEndpointsWithLoad++;
237
238 pCurr = pCurr->AioMgr.pEndpointNext;
239 }
240
241 return (cEndpointsWithLoad > 1);
242}
243
244/**
245 * Creates a new I/O manager and spreads the I/O load of the endpoints
246 * between the given I/O manager and the new one.
247 *
248 * @returns nothing.
249 * @param pAioMgr The I/O manager with high I/O load.
250 */
251static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
252{
253 /*
254 * Check if balancing would improve the situation.
255 */
256 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
257 {
258 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
259 PPDMACEPFILEMGR pAioMgrNew = NULL;
260
261 int rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
262 if (RT_SUCCESS(rc))
263 {
264 /* We will sort the list by request count per second. */
265 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
266
267 /* Now move some endpoints to the new manager. */
268 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
269 unsigned cReqsOther = 0;
270 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
271
272 while (pCurr)
273 {
274 if (cReqsHere <= cReqsOther)
275 {
276 /*
277 * The other manager has more requests to handle now.
278 * We will keep the current endpoint.
279 */
280 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
281 cReqsHere += pCurr->AioMgr.cReqsPerSec;
282 pCurr = pCurr->AioMgr.pEndpointNext;
283 }
284 else
285 {
286 /* Move to other endpoint. */
287 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
288 cReqsOther += pCurr->AioMgr.cReqsPerSec;
289
290 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
291
292 pCurr = pCurr->AioMgr.pEndpointNext;
293
294 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
295
296 if (fReqsPending)
297 {
298 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
299 pMove->AioMgr.fMoving = true;
300 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
301 }
302 else
303 {
304 pMove->AioMgr.fMoving = false;
305 pMove->AioMgr.pAioMgrDst = NULL;
306 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
307 }
308 }
309 }
310 }
311 else
312 {
313 /* Don't process further but leave a log entry about reduced performance. */
314 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
315 }
316 }
317 else
318 Log(("AIOMgr: Load balancing would not improve anything\n"));
319}
320
321#endif /* unused */
322
323/**
324 * Increase the maximum number of active requests for the given I/O manager.
325 *
326 * @returns VBox status code.
327 * @param pAioMgr The I/O manager to grow.
328 */
329static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
330{
331 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
332
333 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
334 && !pAioMgr->cRequestsActive,
335 ("Invalid state of the I/O manager\n"));
336
337#ifdef RT_OS_WINDOWS
338 /*
339 * Reopen the files of all assigned endpoints first so we can assign them to the new
340 * I/O context.
341 */
342 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
343
344 while (pCurr)
345 {
346 RTFileClose(pCurr->hFile);
347 int rc2 = RTFileOpen(&pCurr->hFile, pCurr->Core.pszUri, pCurr->fFlags); AssertRC(rc2);
348
349 pCurr = pCurr->AioMgr.pEndpointNext;
350 }
351#endif
352
353 /* Create the new bigger context. */
354 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
355
356 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
357 int rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS);
358 if (rc == VERR_OUT_OF_RANGE)
359 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax);
360
361 if (RT_SUCCESS(rc))
362 {
363 /* Close the old context. */
364 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
365 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
366
367 pAioMgr->hAioCtx = hAioCtxNew;
368
369 /* Create a new I/O task handle array */
370 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
371 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
372
373 if (pahReqNew)
374 {
375 /* Copy the cached request handles. */
376 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
377 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
378
379 RTMemFree(pAioMgr->pahReqsFree);
380 pAioMgr->pahReqsFree = pahReqNew;
381 pAioMgr->cReqEntries = cReqEntriesNew;
382 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
383 pAioMgr->cRequestsActiveMax));
384 }
385 else
386 rc = VERR_NO_MEMORY;
387 }
388
389#ifdef RT_OS_WINDOWS
390 /* Assign the file to the new context. */
391 pCurr = pAioMgr->pEndpointsHead;
392 while (pCurr)
393 {
394 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->hFile);
395 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
396
397 pCurr = pCurr->AioMgr.pEndpointNext;
398 }
399#endif
400
401 if (RT_FAILURE(rc))
402 {
403 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
404 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
405 }
406
407 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
408 LogFlowFunc(("returns rc=%Rrc\n", rc));
409
410 return rc;
411}
412
413/**
414 * Checks if a given status code is fatal.
415 * Non fatal errors can be fixed by migrating the endpoint to a
416 * failsafe manager.
417 *
418 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
419 * false If the error can be fixed by a migration. (image on NFS disk for example)
420 * @param rcReq The status code to check.
421 */
422DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
423{
424 return rcReq == VERR_DEV_IO_ERROR
425 || rcReq == VERR_FILE_IO_ERROR
426 || rcReq == VERR_DISK_IO_ERROR
427 || rcReq == VERR_DISK_FULL
428 || rcReq == VERR_FILE_TOO_BIG;
429}
430
431/**
432 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
433 *
434 * @returns VBox status code
435 * @param pAioMgr The I/O manager the error occurred on.
436 * @param rc The error code.
437 */
438static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
439{
440 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
441 pAioMgr, rc));
442 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
443 LogRel(("AIOMgr: Please contact the product vendor\n"));
444
445 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
446
447 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
448 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
449
450 AssertMsgFailed(("Implement\n"));
451 return VINF_SUCCESS;
452}
453
454/**
455 * Put a list of tasks in the pending request list of an endpoint.
456 */
457DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
458{
459 /* Add the rest of the tasks to the pending list */
460 if (!pEndpoint->AioMgr.pReqsPendingHead)
461 {
462 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
463 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
464 }
465 else
466 {
467 Assert(pEndpoint->AioMgr.pReqsPendingTail);
468 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
469 }
470
471 /* Update the tail. */
472 while (pTaskHead->pNext)
473 pTaskHead = pTaskHead->pNext;
474
475 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
476 pTaskHead->pNext = NULL;
477}
478
479/**
480 * Put one task in the pending request list of an endpoint.
481 */
482DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
483{
484 /* Add the rest of the tasks to the pending list */
485 if (!pEndpoint->AioMgr.pReqsPendingHead)
486 {
487 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
488 pEndpoint->AioMgr.pReqsPendingHead = pTask;
489 }
490 else
491 {
492 Assert(pEndpoint->AioMgr.pReqsPendingTail);
493 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
494 }
495
496 pEndpoint->AioMgr.pReqsPendingTail = pTask;
497 pTask->pNext = NULL;
498}
499
500/**
501 * Allocates a async I/O request.
502 *
503 * @returns Handle to the request.
504 * @param pAioMgr The I/O manager.
505 */
506static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
507{
508 /* Get a request handle. */
509 RTFILEAIOREQ hReq;
510 if (pAioMgr->iFreeEntry > 0)
511 {
512 pAioMgr->iFreeEntry--;
513 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
514 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
515 Assert(hReq != NIL_RTFILEAIOREQ);
516 }
517 else
518 {
519 int rc = RTFileAioReqCreate(&hReq);
520 AssertRCReturn(rc, NIL_RTFILEAIOREQ);
521 }
522
523 return hReq;
524}
525
526/**
527 * Frees a async I/O request handle.
528 *
529 * @returns nothing.
530 * @param pAioMgr The I/O manager.
531 * @param hReq The I/O request handle to free.
532 */
533static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
534{
535 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
536 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
537
538 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
539 pAioMgr->iFreeEntry++;
540}
541
542/**
543 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
544 */
545static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
546 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
547 PRTFILEAIOREQ pahReqs, unsigned cReqs)
548{
549 pAioMgr->cRequestsActive += cReqs;
550 pEndpoint->AioMgr.cRequestsActive += cReqs;
551
552 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
553 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
554
555 int rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
556 if (RT_FAILURE(rc))
557 {
558 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
559 {
560 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
561
562 /* Append any not submitted task to the waiting list. */
563 for (size_t i = 0; i < cReqs; i++)
564 {
565 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
566
567 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
568 {
569 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
570
571 Assert(pTask->hReq == pahReqs[i]);
572 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
573 pAioMgr->cRequestsActive--;
574 pEndpoint->AioMgr.cRequestsActive--;
575
576 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
577 {
578 /* Clear the pending flush */
579 Assert(pEndpoint->pFlushReq == pTask);
580 pEndpoint->pFlushReq = NULL;
581 }
582 }
583 }
584
585 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
586
587 /* Print an entry in the release log */
588 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
589 {
590 pEpClass->fOutOfResourcesWarningPrinted = true;
591 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
592 pAioMgr->cRequestsActive));
593 }
594
595 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
596 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
597 rc = VINF_SUCCESS;
598 }
599 else /* Another kind of error happened (full disk, ...) */
600 {
601 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
602 for (size_t i = 0; i < cReqs; i++)
603 {
604 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
605
606 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
607 {
608 /* We call ourself again to do any error handling which might come up now. */
609 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
610 AssertRC(rc);
611 }
612 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
613 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
614 }
615
616
617 if ( pEndpoint->pFlushReq
618 && !pAioMgr->cRequestsActive
619 && !pEndpoint->fAsyncFlushSupported)
620 {
621 /*
622 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
623 * the async flush API.
624 * Happens only if this we just noticed that this is not supported
625 * and the only active request was a flush.
626 */
627 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
628 pEndpoint->pFlushReq = NULL;
629 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
630 pdmacFileTaskFree(pEndpoint, pFlush);
631 }
632 }
633 }
634
635 return VINF_SUCCESS;
636}
637
638static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
639 RTFOFF offStart, size_t cbRange,
640 PPDMACTASKFILE pTask)
641{
642 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
643 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
644 ("Invalid task type %d\n", pTask->enmTransferType));
645
646 PPDMACFILERANGELOCK pRangeLock;
647 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
648 if (!pRangeLock)
649 {
650 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
651 /* Check if we intersect with the range. */
652 if ( !pRangeLock
653 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
654 && (pRangeLock->Core.KeyLast) >= offStart))
655 {
656 pRangeLock = NULL; /* False alarm */
657 }
658 }
659
660 /* Check whether we have one of the situations explained below */
661 if ( pRangeLock
662#if 0 /** @todo later. For now we will just block all requests if they interfere */
663 && ( (pRangeLock->fReadLock && pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
664 || (!pRangeLock->fReadLock)
665#endif
666 )
667 {
668 /* Add to the list. */
669 pTask->pNext = NULL;
670
671 if (!pRangeLock->pWaitingTasksHead)
672 {
673 Assert(!pRangeLock->pWaitingTasksTail);
674 pRangeLock->pWaitingTasksHead = pTask;
675 pRangeLock->pWaitingTasksTail = pTask;
676 }
677 else
678 {
679 AssertPtr(pRangeLock->pWaitingTasksTail);
680 pRangeLock->pWaitingTasksTail->pNext = pTask;
681 pRangeLock->pWaitingTasksTail = pTask;
682 }
683 return true;
684 }
685
686 return false;
687}
688
689static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
690 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
691 RTFOFF offStart, size_t cbRange,
692 PPDMACTASKFILE pTask)
693{
694 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p offStart=%RTfoff cbRange=%zu pTask=%#p\n",
695 pAioMgr, pEndpoint, offStart, cbRange, pTask));
696
697 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask),
698 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
699 offStart, cbRange));
700
701 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
702 if (!pRangeLock)
703 return VERR_NO_MEMORY;
704
705 /* Init the lock. */
706 pRangeLock->Core.Key = offStart;
707 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
708 pRangeLock->cRefs = 1;
709 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
710 pRangeLock->pWaitingTasksHead = NULL;
711 pRangeLock->pWaitingTasksTail = NULL;
712
713 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
714 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
715
716 /* Let the task point to its lock. */
717 pTask->pRangeLock = pRangeLock;
718
719 return VINF_SUCCESS;
720}
721
722static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
723 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
724 PPDMACFILERANGELOCK pRangeLock)
725{
726 PPDMACTASKFILE pTasksWaitingHead;
727
728 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p pRangeLock=%#p\n",
729 pAioMgr, pEndpoint, pRangeLock));
730
731 AssertPtr(pRangeLock);
732 Assert(pRangeLock->cRefs == 1);
733
734 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
735 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
736 pRangeLock->pWaitingTasksHead = NULL;
737 pRangeLock->pWaitingTasksTail = NULL;
738 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
739
740 return pTasksWaitingHead;
741}
742
743static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
744 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
745 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
746{
747 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
748 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
749 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
750 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
751
752 pTask->fPrefetch = false;
753 pTask->cbBounceBuffer = 0;
754
755 /*
756 * Before we start to setup the request we have to check whether there is a task
757 * already active which range intersects with ours. We have to defer execution
758 * of this task in two cases:
759 * - The pending task is a write and the current is either read or write
760 * - The pending task is a read and the current task is a write task.
761 *
762 * To check whether a range is currently "locked" we use the AVL tree where every pending task
763 * is stored by its file offset range. The current task will be added to the active task
764 * and will be executed when the active one completes. (The method below
765 * which checks whether a range is already used will add the task)
766 *
767 * This is necessary because of the requirement to align all requests to a 512 boundary
768 * which is enforced by the host OS (Linux and Windows atm). It is possible that
769 * we have to process unaligned tasks and need to align them using bounce buffers.
770 * While the data is fetched from the file another request might arrive writing to
771 * the same range. This will result in data corruption if both are executed concurrently.
772 */
773 int rc = VINF_SUCCESS;
774 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask);
775 if (!fLocked)
776 {
777 /* Get a request handle. */
778 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
779 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
780
781 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
782 {
783 /* Grow the file if needed. */
784 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
785 {
786 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
787 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
788 }
789
790 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
791 pTask->Off, pTask->DataSeg.pvSeg,
792 pTask->DataSeg.cbSeg, pTask);
793 }
794 else
795 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
796 pTask->Off, pTask->DataSeg.pvSeg,
797 pTask->DataSeg.cbSeg, pTask);
798 AssertRC(rc);
799
800 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
801 pTask->DataSeg.cbSeg,
802 pTask);
803
804 if (RT_SUCCESS(rc))
805 {
806 pTask->hReq = hReq;
807 *phReq = hReq;
808 }
809 }
810 else
811 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
812
813 return rc;
814}
815
816static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
817 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
818 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
819{
820 /*
821 * Check if the alignment requirements are met.
822 * Offset, transfer size and buffer address
823 * need to be on a 512 boundary.
824 */
825 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
826 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
827 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
828
829 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
830 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
831 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
832 offStart, cbToTransfer, pEndpoint->cbFile));
833
834 pTask->fPrefetch = false;
835
836 /*
837 * Before we start to setup the request we have to check whether there is a task
838 * already active which range intersects with ours. We have to defer execution
839 * of this task in two cases:
840 * - The pending task is a write and the current is either read or write
841 * - The pending task is a read and the current task is a write task.
842 *
843 * To check whether a range is currently "locked" we use the AVL tree where every pending task
844 * is stored by its file offset range. The current task will be added to the active task
845 * and will be executed when the active one completes. (The method below
846 * which checks whether a range is already used will add the task)
847 *
848 * This is necessary because of the requirement to align all requests to a 512 boundary
849 * which is enforced by the host OS (Linux and Windows atm). It is possible that
850 * we have to process unaligned tasks and need to align them using bounce buffers.
851 * While the data is fetched from the file another request might arrive writing to
852 * the same range. This will result in data corruption if both are executed concurrently.
853 */
854 int rc = VINF_SUCCESS;
855 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask);
856 if (!fLocked)
857 {
858 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
859 void *pvBuf = pTask->DataSeg.pvSeg;
860
861 /* Get a request handle. */
862 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
863 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
864
865 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
866 || RT_UNLIKELY(offStart != pTask->Off)
867 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
868 {
869 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
870 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
871
872 /* Create bounce buffer. */
873 pTask->cbBounceBuffer = cbToTransfer;
874
875 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
876 pTask->Off, offStart));
877 pTask->offBounceBuffer = pTask->Off - offStart;
878
879 /** @todo: I think we need something like a RTMemAllocAligned method here.
880 * Current assumption is that the maximum alignment is 4096byte
881 * (GPT disk on Windows)
882 * so we can use RTMemPageAlloc here.
883 */
884 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
885 if (RT_LIKELY(pTask->pvBounceBuffer))
886 {
887 pvBuf = pTask->pvBounceBuffer;
888
889 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
890 {
891 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
892 || RT_UNLIKELY(offStart != pTask->Off))
893 {
894 /* We have to fill the buffer first before we can update the data. */
895 LogFlow(("Prefetching data for task %#p\n", pTask));
896 pTask->fPrefetch = true;
897 enmTransferType = PDMACTASKFILETRANSFER_READ;
898 }
899 else
900 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
901 }
902 }
903 else
904 rc = VERR_NO_MEMORY;
905 }
906 else
907 pTask->cbBounceBuffer = 0;
908
909 if (RT_SUCCESS(rc))
910 {
911 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
912 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
913
914 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
915 {
916 /* Grow the file if needed. */
917 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
918 {
919 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
920 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
921 }
922
923 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
924 offStart, pvBuf, cbToTransfer, pTask);
925 }
926 else
927 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
928 offStart, pvBuf, cbToTransfer, pTask);
929 AssertRC(rc);
930
931 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask);
932
933 if (RT_SUCCESS(rc))
934 {
935 pTask->hReq = hReq;
936 *phReq = hReq;
937 }
938 else
939 {
940 /* Cleanup */
941 if (pTask->cbBounceBuffer)
942 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
943 }
944 }
945 }
946 else
947 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
948
949 return rc;
950}
951
952static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
953 PPDMACEPFILEMGR pAioMgr,
954 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
955{
956 RTFILEAIOREQ apReqs[20];
957 unsigned cRequests = 0;
958 int rc = VINF_SUCCESS;
959
960 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
961 ("Trying to process request lists of a non active endpoint!\n"));
962
963 /* Go through the list and queue the requests until we get a flush request */
964 while ( pTaskHead
965 && !pEndpoint->pFlushReq
966 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
967 && RT_SUCCESS(rc))
968 {
969 RTMSINTERVAL msWhenNext;
970 PPDMACTASKFILE pCurr = pTaskHead;
971
972 if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext))
973 {
974 pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext);
975 break;
976 }
977
978 pTaskHead = pTaskHead->pNext;
979
980 pCurr->pNext = NULL;
981
982 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
983 ("Endpoints do not match\n"));
984
985 switch (pCurr->enmTransferType)
986 {
987 case PDMACTASKFILETRANSFER_FLUSH:
988 {
989 /* If there is no data transfer request this flush request finished immediately. */
990 if (pEndpoint->fAsyncFlushSupported)
991 {
992 /* Issue a flush to the host. */
993 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
994 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
995
996 LogFlow(("Flush request %#p\n", hReq));
997
998 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->hFile, pCurr);
999 if (RT_FAILURE(rc))
1000 {
1001 LogRel(("AIOMgr: Preparing flush failed with %Rrc, disabling async flushes\n", rc));
1002 pEndpoint->fAsyncFlushSupported = false;
1003 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1004 rc = VINF_SUCCESS; /* Fake success */
1005 }
1006 else
1007 {
1008 pCurr->hReq = hReq;
1009 apReqs[cRequests] = hReq;
1010 pEndpoint->AioMgr.cReqsProcessed++;
1011 cRequests++;
1012 }
1013 }
1014
1015 if ( !pEndpoint->AioMgr.cRequestsActive
1016 && !pEndpoint->fAsyncFlushSupported)
1017 {
1018 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1019 pdmacFileTaskFree(pEndpoint, pCurr);
1020 }
1021 else
1022 {
1023 Assert(!pEndpoint->pFlushReq);
1024 pEndpoint->pFlushReq = pCurr;
1025 }
1026 break;
1027 }
1028 case PDMACTASKFILETRANSFER_READ:
1029 case PDMACTASKFILETRANSFER_WRITE:
1030 {
1031 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1032
1033 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1034 {
1035 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1036 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1037 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1038 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1039 else
1040 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1041
1042 AssertRC(rc);
1043 }
1044 else
1045 {
1046 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1047 hReq = pCurr->hReq;
1048 }
1049
1050 LogFlow(("Read/Write request %#p\n", hReq));
1051
1052 if (hReq != NIL_RTFILEAIOREQ)
1053 {
1054 apReqs[cRequests] = hReq;
1055 cRequests++;
1056 }
1057 break;
1058 }
1059 default:
1060 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1061 } /* switch transfer type */
1062
1063 /* Queue the requests if the array is full. */
1064 if (cRequests == RT_ELEMENTS(apReqs))
1065 {
1066 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1067 cRequests = 0;
1068 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1069 ("Unexpected return code\n"));
1070 }
1071 }
1072
1073 if (cRequests)
1074 {
1075 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1076 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1077 ("Unexpected return code rc=%Rrc\n", rc));
1078 }
1079
1080 if (pTaskHead)
1081 {
1082 /* Add the rest of the tasks to the pending list */
1083 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1084
1085 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1086 && !pEndpoint->pFlushReq))
1087 {
1088#if 0
1089 /*
1090 * The I/O manager has no room left for more requests
1091 * but there are still requests to process.
1092 * Create a new I/O manager and let it handle some endpoints.
1093 */
1094 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1095#else
1096 /* Grow the I/O manager */
1097 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1098#endif
1099 }
1100 }
1101
1102 /* Insufficient resources are not fatal. */
1103 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1104 rc = VINF_SUCCESS;
1105
1106 return rc;
1107}
1108
1109/**
1110 * Adds all pending requests for the given endpoint
1111 * until a flush request is encountered or there is no
1112 * request anymore.
1113 *
1114 * @returns VBox status code.
1115 * @param pAioMgr The async I/O manager for the endpoint
1116 * @param pEndpoint The endpoint to get the requests from.
1117 */
1118static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1119 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1120{
1121 int rc = VINF_SUCCESS;
1122 PPDMACTASKFILE pTasksHead = NULL;
1123
1124 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1125 ("Trying to process request lists of a non active endpoint!\n"));
1126
1127 Assert(!pEndpoint->pFlushReq);
1128
1129 /* Check the pending list first */
1130 if (pEndpoint->AioMgr.pReqsPendingHead)
1131 {
1132 LogFlow(("Queuing pending requests first\n"));
1133
1134 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1135 /*
1136 * Clear the list as the processing routine will insert them into the list
1137 * again if it gets a flush request.
1138 */
1139 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1140 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1141 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1142 AssertRC(rc); /** @todo r=bird: status code potentially overwritten. */
1143 }
1144
1145 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1146 {
1147 /* Now the request queue. */
1148 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1149 if (pTasksHead)
1150 {
1151 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1152 AssertRC(rc);
1153 }
1154 }
1155
1156 return rc;
1157}
1158
1159static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1160{
1161 int rc = VINF_SUCCESS;
1162 bool fNotifyWaiter = false;
1163
1164 LogFlowFunc((": Enter\n"));
1165
1166 Assert(pAioMgr->fBlockingEventPending);
1167
1168 switch (pAioMgr->enmBlockingEvent)
1169 {
1170 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1171 {
1172 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1173 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1174
1175 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1176
1177 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1178 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1179 if (pAioMgr->pEndpointsHead)
1180 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1181 pAioMgr->pEndpointsHead = pEndpointNew;
1182
1183 /* Assign the completion point to this file. */
1184 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->hFile);
1185 fNotifyWaiter = true;
1186 pAioMgr->cEndpoints++;
1187 break;
1188 }
1189 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1190 {
1191 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1192 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1193
1194 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1195 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1196 break;
1197 }
1198 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1199 {
1200 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1201 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1202
1203 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1204 {
1205 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1206
1207 /* Make sure all tasks finished. Process the queues a last time first. */
1208 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1209 AssertRC(rc);
1210
1211 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1212 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1213 }
1214 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1215 && (!pEndpointClose->AioMgr.cRequestsActive))
1216 fNotifyWaiter = true;
1217 break;
1218 }
1219 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1220 {
1221 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1222 if (!pAioMgr->cRequestsActive)
1223 fNotifyWaiter = true;
1224 break;
1225 }
1226 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1227 {
1228 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1229 break;
1230 }
1231 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1232 {
1233 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1234 fNotifyWaiter = true;
1235 break;
1236 }
1237 default:
1238 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1239 }
1240
1241 if (fNotifyWaiter)
1242 {
1243 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1244 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1245
1246 /* Release the waiting thread. */
1247 LogFlow(("Signalling waiter\n"));
1248 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1249 AssertRC(rc);
1250 }
1251
1252 LogFlowFunc((": Leave\n"));
1253 return rc;
1254}
1255
1256/**
1257 * Checks all endpoints for pending events or new requests.
1258 *
1259 * @returns VBox status code.
1260 * @param pAioMgr The I/O manager handle.
1261 */
1262static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1263{
1264 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1265 int rc = VINF_SUCCESS;
1266 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1267
1268 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
1269
1270 while (pEndpoint)
1271 {
1272 if (!pEndpoint->pFlushReq
1273 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1274 && !pEndpoint->AioMgr.fMoving)
1275 {
1276 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1277 if (RT_FAILURE(rc))
1278 return rc;
1279 }
1280 else if ( !pEndpoint->AioMgr.cRequestsActive
1281 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1282 {
1283 /* Reopen the file so that the new endpoint can re-associate with the file */
1284 RTFileClose(pEndpoint->hFile);
1285 rc = RTFileOpen(&pEndpoint->hFile, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1286 AssertRC(rc);
1287
1288 if (pEndpoint->AioMgr.fMoving)
1289 {
1290 pEndpoint->AioMgr.fMoving = false;
1291 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1292 }
1293 else
1294 {
1295 Assert(pAioMgr->fBlockingEventPending);
1296 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1297
1298 /* Release the waiting thread. */
1299 LogFlow(("Signalling waiter\n"));
1300 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1301 AssertRC(rc);
1302 }
1303 }
1304
1305 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1306 }
1307
1308 return rc;
1309}
1310
1311/**
1312 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1313 */
1314static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1315{
1316 size_t cbTransfered = 0;
1317 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1318
1319 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1320}
1321
1322static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1323 int rcReq, size_t cbTransfered)
1324{
1325 int rc = VINF_SUCCESS;
1326 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1327 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1328 PPDMACTASKFILE pTasksWaiting;
1329
1330 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1331
1332 pEndpoint = pTask->pEndpoint;
1333
1334 pTask->hReq = NIL_RTFILEAIOREQ;
1335
1336 pAioMgr->cRequestsActive--;
1337 pEndpoint->AioMgr.cRequestsActive--;
1338 pEndpoint->AioMgr.cReqsProcessed++;
1339
1340 /*
1341 * It is possible that the request failed on Linux with kernels < 2.6.23
1342 * if the passed buffer was allocated with remap_pfn_range or if the file
1343 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1344 * The endpoint will be migrated to a failsafe manager in case a request fails.
1345 */
1346 if (RT_FAILURE(rcReq))
1347 {
1348 /* Free bounce buffers and the IPRT request. */
1349 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1350
1351 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1352 {
1353 LogRel(("AIOMgr: Flush failed with %Rrc, disabling async flushes\n", rc));
1354 pEndpoint->fAsyncFlushSupported = false;
1355 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1356 /* The other method will take over now. */
1357
1358 pEndpoint->pFlushReq = NULL;
1359 /* Call completion callback */
1360 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, VINF_SUCCESS));
1361 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1362 pdmacFileTaskFree(pEndpoint, pTask);
1363 }
1364 else
1365 {
1366 /* Free the lock and process pending tasks if necessary */
1367 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1368 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1369 AssertRC(rc);
1370
1371 if (pTask->cbBounceBuffer)
1372 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1373
1374 /*
1375 * Fatal errors are reported to the guest and non-fatal errors
1376 * will cause a migration to the failsafe manager in the hope
1377 * that the error disappears.
1378 */
1379 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1380 {
1381 /* Queue the request on the pending list. */
1382 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1383 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1384
1385 /* Create a new failsafe manager if necessary. */
1386 if (!pEndpoint->AioMgr.fMoving)
1387 {
1388 PPDMACEPFILEMGR pAioMgrFailsafe;
1389
1390 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1391 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1392
1393 pEndpoint->AioMgr.fMoving = true;
1394
1395 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1396 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1397 AssertRC(rc);
1398
1399 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1400
1401 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1402 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1403 }
1404
1405 /* If this was the last request for the endpoint migrate it to the new manager. */
1406 if (!pEndpoint->AioMgr.cRequestsActive)
1407 {
1408 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1409 Assert(!fReqsPending);
1410
1411 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1412 AssertRC(rc);
1413 }
1414 }
1415 else
1416 {
1417 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1418 pdmacFileTaskFree(pEndpoint, pTask);
1419 }
1420 }
1421 }
1422 else
1423 {
1424 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1425 {
1426 /* Clear pending flush */
1427 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1428 pEndpoint->pFlushReq = NULL;
1429 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1430
1431 /* Call completion callback */
1432 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1433 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1434 pdmacFileTaskFree(pEndpoint, pTask);
1435 }
1436 else
1437 {
1438 /*
1439 * Restart an incomplete transfer.
1440 * This usually means that the request will return an error now
1441 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1442 * the transfer needs to be continued.
1443 */
1444 if (RT_UNLIKELY( cbTransfered < pTask->DataSeg.cbSeg
1445 || ( pTask->cbBounceBuffer
1446 && cbTransfered < pTask->cbBounceBuffer)))
1447 {
1448 RTFOFF offStart;
1449 size_t cbToTransfer;
1450 uint8_t *pbBuf = NULL;
1451
1452 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n",
1453 pTask, cbTransfered));
1454 Assert(cbTransfered % 512 == 0);
1455
1456 if (pTask->cbBounceBuffer)
1457 {
1458 AssertPtr(pTask->pvBounceBuffer);
1459 offStart = (pTask->Off & ~((RTFOFF)512-1)) + cbTransfered;
1460 cbToTransfer = pTask->cbBounceBuffer - cbTransfered;
1461 pbBuf = (uint8_t *)pTask->pvBounceBuffer + cbTransfered;
1462 }
1463 else
1464 {
1465 Assert(!pTask->pvBounceBuffer);
1466 offStart = pTask->Off + cbTransfered;
1467 cbToTransfer = pTask->DataSeg.cbSeg - cbTransfered;
1468 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + cbTransfered;
1469 }
1470
1471 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1472 {
1473 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, offStart,
1474 pbBuf, cbToTransfer, pTask);
1475 }
1476 else
1477 {
1478 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1479 ("Invalid transfer type\n"));
1480 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, offStart,
1481 pbBuf, cbToTransfer, pTask);
1482 }
1483 AssertRC(rc);
1484
1485 pTask->hReq = hReq;
1486 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1487 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1488 ("Unexpected return code rc=%Rrc\n", rc));
1489 }
1490 else if (pTask->fPrefetch)
1491 {
1492 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1493 Assert(pTask->cbBounceBuffer);
1494
1495 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1496 pTask->DataSeg.pvSeg,
1497 pTask->DataSeg.cbSeg);
1498
1499 /* Write it now. */
1500 pTask->fPrefetch = false;
1501 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1502 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
1503
1504 /* Grow the file if needed. */
1505 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1506 {
1507 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1508 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
1509 }
1510
1511 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
1512 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1513 AssertRC(rc);
1514 pTask->hReq = hReq;
1515 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1516 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1517 ("Unexpected return code rc=%Rrc\n", rc));
1518 }
1519 else
1520 {
1521 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1522 {
1523 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1524 memcpy(pTask->DataSeg.pvSeg,
1525 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1526 pTask->DataSeg.cbSeg);
1527
1528 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1529 }
1530
1531 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1532
1533 /* Free the lock and process pending tasks if necessary */
1534 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1535 if (pTasksWaiting)
1536 {
1537 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1538 AssertRC(rc);
1539 }
1540
1541 /* Call completion callback */
1542 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1543 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1544 pdmacFileTaskFree(pEndpoint, pTask);
1545
1546 /*
1547 * If there is no request left on the endpoint but a flush request is set
1548 * it completed now and we notify the owner.
1549 * Furthermore we look for new requests and continue.
1550 */
1551 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1552 {
1553 /* Call completion callback */
1554 pTask = pEndpoint->pFlushReq;
1555 pEndpoint->pFlushReq = NULL;
1556
1557 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1558
1559 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1560 pdmacFileTaskFree(pEndpoint, pTask);
1561 }
1562 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1563 {
1564 /* If the endpoint is about to be migrated do it now. */
1565 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1566 Assert(!fReqsPending);
1567
1568 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1569 AssertRC(rc);
1570 }
1571 }
1572 } /* Not a flush request */
1573 } /* request completed successfully */
1574}
1575
1576/** Helper macro for checking for error codes. */
1577#define CHECK_RC(pAioMgr, rc) \
1578 if (RT_FAILURE(rc)) \
1579 {\
1580 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1581 return rc2;\
1582 }
1583
1584/**
1585 * The normal I/O manager using the RTFileAio* API
1586 *
1587 * @returns VBox status code.
1588 * @param hThreadSelf Handle of the thread.
1589 * @param pvUser Opaque user data.
1590 */
1591DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser)
1592{
1593 int rc = VINF_SUCCESS;
1594 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1595 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1596 NOREF(hThreadSelf);
1597
1598 while ( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1599 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING
1600 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1601 {
1602 if (!pAioMgr->cRequestsActive)
1603 {
1604 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1605 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1606 rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired);
1607 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1608 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1609
1610 LogFlow(("Got woken up\n"));
1611 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1612 }
1613
1614 /* Check for an external blocking event first. */
1615 if (pAioMgr->fBlockingEventPending)
1616 {
1617 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1618 CHECK_RC(pAioMgr, rc);
1619 }
1620
1621 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1622 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1623 {
1624 /* We got woken up because an endpoint issued new requests. Queue them. */
1625 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1626 CHECK_RC(pAioMgr, rc);
1627
1628 while (pAioMgr->cRequestsActive)
1629 {
1630 RTFILEAIOREQ apReqs[20];
1631 uint32_t cReqsCompleted = 0;
1632 size_t cReqsWait;
1633
1634 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1635 cReqsWait = RT_ELEMENTS(apReqs);
1636 else
1637 cReqsWait = pAioMgr->cRequestsActive;
1638
1639 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1640
1641 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1642 1,
1643 RT_INDEFINITE_WAIT, apReqs,
1644 cReqsWait, &cReqsCompleted);
1645 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1646 CHECK_RC(pAioMgr, rc);
1647
1648 LogFlow(("%d tasks completed\n", cReqsCompleted));
1649
1650 for (uint32_t i = 0; i < cReqsCompleted; i++)
1651 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1652
1653 /* Check for an external blocking event before we go to sleep again. */
1654 if (pAioMgr->fBlockingEventPending)
1655 {
1656 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1657 CHECK_RC(pAioMgr, rc);
1658 }
1659
1660 /* Update load statistics. */
1661 uint64_t uMillisCurr = RTTimeMilliTS();
1662 if (uMillisCurr > uMillisEnd)
1663 {
1664 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1665
1666 /* Calculate timespan. */
1667 uMillisCurr -= uMillisEnd;
1668
1669 while (pEndpointCurr)
1670 {
1671 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1672 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1673 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1674 }
1675
1676 /* Set new update interval */
1677 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1678 }
1679
1680 /* Check endpoints for new requests. */
1681 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1682 {
1683 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1684 CHECK_RC(pAioMgr, rc);
1685 }
1686 } /* while requests are active. */
1687
1688 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1689 {
1690 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1691 AssertRC(rc);
1692 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1693
1694 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1695 CHECK_RC(pAioMgr, rc);
1696 }
1697 } /* if still running */
1698 } /* while running */
1699
1700 LogFlowFunc(("rc=%Rrc\n", rc));
1701 return rc;
1702}
1703
1704#undef CHECK_RC
1705
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette