VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp@ 106952

Last change on this file since 106952 was 106061, checked in by vboxsync, 2 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 66.4 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Async File I/O manager.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
33#include <iprt/types.h>
34#include <iprt/asm.h>
35#include <iprt/file.h>
36#include <iprt/mem.h>
37#include <iprt/string.h>
38#include <iprt/assert.h>
39#include <VBox/log.h>
40
41#include "PDMAsyncCompletionFileInternal.h"
42
43
44/*********************************************************************************************************************************
45* Defined Constants And Macros *
46*********************************************************************************************************************************/
47/** The update period for the I/O load statistics in ms. */
48#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
49/** Maximum number of requests a manager will handle. */
50#define PDMACEPFILEMGR_REQS_STEP 64
51
52
53/*********************************************************************************************************************************
54* Internal functions *
55*********************************************************************************************************************************/
56static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
57 PPDMACEPFILEMGR pAioMgr,
58 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
59
60static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
61 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
62 PPDMACFILERANGELOCK pRangeLock);
63
64static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
65 int rc, size_t cbTransfered);
66
67
68int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
69{
70 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
71
72 int rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
73 if (rc == VERR_OUT_OF_RANGE)
74 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
75
76 if (RT_SUCCESS(rc))
77 {
78 /* Initialize request handle array. */
79 pAioMgr->iFreeEntry = 0;
80 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
81 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
82
83 if (pAioMgr->pahReqsFree)
84 {
85 /* Create the range lock memcache. */
86 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
87 0, UINT32_MAX, NULL, NULL, NULL, 0);
88 if (RT_SUCCESS(rc))
89 return VINF_SUCCESS;
90
91 RTMemFree(pAioMgr->pahReqsFree);
92 }
93 else
94 {
95 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
96 rc = VERR_NO_MEMORY;
97 }
98 }
99
100 return rc;
101}
102
103void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
104{
105 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
106
107 while (pAioMgr->iFreeEntry > 0)
108 {
109 pAioMgr->iFreeEntry--;
110 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
111 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
112 }
113
114 RTMemFree(pAioMgr->pahReqsFree);
115 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
116}
117
118#if 0 /* currently unused */
119/**
120 * Sorts the endpoint list with insertion sort.
121 */
122static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
123{
124 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
125
126 pEpPrev = pAioMgr->pEndpointsHead;
127 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
128
129 while (pEpCurr)
130 {
131 /* Remember the next element to sort because the list might change. */
132 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
133
134 /* Unlink the current element from the list. */
135 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
136 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
137
138 if (pPrev)
139 pPrev->AioMgr.pEndpointNext = pNext;
140 else
141 pAioMgr->pEndpointsHead = pNext;
142
143 if (pNext)
144 pNext->AioMgr.pEndpointPrev = pPrev;
145
146 /* Go back until we reached the place to insert the current endpoint into. */
147 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
148 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
149
150 /* Link the endpoint into the list. */
151 if (pEpPrev)
152 pNext = pEpPrev->AioMgr.pEndpointNext;
153 else
154 pNext = pAioMgr->pEndpointsHead;
155
156 pEpCurr->AioMgr.pEndpointNext = pNext;
157 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
158
159 if (pNext)
160 pNext->AioMgr.pEndpointPrev = pEpCurr;
161
162 if (pEpPrev)
163 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
164 else
165 pAioMgr->pEndpointsHead = pEpCurr;
166
167 pEpCurr = pEpNextToSort;
168 }
169
170#ifdef DEBUG
171 /* Validate sorting algorithm */
172 unsigned cEndpoints = 0;
173 pEpCurr = pAioMgr->pEndpointsHead;
174
175 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
176 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
177
178 while (pEpCurr)
179 {
180 cEndpoints++;
181
182 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
183 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
184
185 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
186 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
187
188 pEpCurr = pNext;
189 }
190
191 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
192
193#endif
194}
195#endif /* currently unused */
196
197/**
198 * Removes an endpoint from the currently assigned manager.
199 *
200 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
201 * FALSE otherwise.
202 * @param pEndpointRemove The endpoint to remove.
203 */
204static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
205{
206 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
207 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
208 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
209
210 pAioMgr->cEndpoints--;
211
212 if (pPrev)
213 pPrev->AioMgr.pEndpointNext = pNext;
214 else
215 pAioMgr->pEndpointsHead = pNext;
216
217 if (pNext)
218 pNext->AioMgr.pEndpointPrev = pPrev;
219
220 /* Make sure that there is no request pending on this manager for the endpoint. */
221 if (!pEndpointRemove->AioMgr.cRequestsActive)
222 {
223 Assert(!pEndpointRemove->pFlushReq);
224
225 /* Reopen the file so that the new endpoint can re-associate with the file */
226 RTFileClose(pEndpointRemove->hFile);
227 int rc = RTFileOpen(&pEndpointRemove->hFile, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
228 AssertRC(rc);
229 return false;
230 }
231
232 return true;
233}
234
235#if 0 /* currently unused */
236
237static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
238{
239 /* Balancing doesn't make sense with only one endpoint. */
240 if (pAioMgr->cEndpoints == 1)
241 return false;
242
243 /* Doesn't make sens to move endpoints if only one produces the whole load */
244 unsigned cEndpointsWithLoad = 0;
245
246 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
247
248 while (pCurr)
249 {
250 if (pCurr->AioMgr.cReqsPerSec)
251 cEndpointsWithLoad++;
252
253 pCurr = pCurr->AioMgr.pEndpointNext;
254 }
255
256 return (cEndpointsWithLoad > 1);
257}
258
259/**
260 * Creates a new I/O manager and spreads the I/O load of the endpoints
261 * between the given I/O manager and the new one.
262 *
263 * @param pAioMgr The I/O manager with high I/O load.
264 */
265static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
266{
267 /*
268 * Check if balancing would improve the situation.
269 */
270 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
271 {
272 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
273 PPDMACEPFILEMGR pAioMgrNew = NULL;
274
275 int rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
276 if (RT_SUCCESS(rc))
277 {
278 /* We will sort the list by request count per second. */
279 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
280
281 /* Now move some endpoints to the new manager. */
282 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
283 unsigned cReqsOther = 0;
284 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
285
286 while (pCurr)
287 {
288 if (cReqsHere <= cReqsOther)
289 {
290 /*
291 * The other manager has more requests to handle now.
292 * We will keep the current endpoint.
293 */
294 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
295 cReqsHere += pCurr->AioMgr.cReqsPerSec;
296 pCurr = pCurr->AioMgr.pEndpointNext;
297 }
298 else
299 {
300 /* Move to other endpoint. */
301 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
302 cReqsOther += pCurr->AioMgr.cReqsPerSec;
303
304 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
305
306 pCurr = pCurr->AioMgr.pEndpointNext;
307
308 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
309
310 if (fReqsPending)
311 {
312 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
313 pMove->AioMgr.fMoving = true;
314 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
315 }
316 else
317 {
318 pMove->AioMgr.fMoving = false;
319 pMove->AioMgr.pAioMgrDst = NULL;
320 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
321 }
322 }
323 }
324 }
325 else
326 {
327 /* Don't process further but leave a log entry about reduced performance. */
328 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
329 }
330 }
331 else
332 Log(("AIOMgr: Load balancing would not improve anything\n"));
333}
334
335#endif /* unused */
336
337/**
338 * Increase the maximum number of active requests for the given I/O manager.
339 *
340 * @returns VBox status code.
341 * @param pAioMgr The I/O manager to grow.
342 */
343static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
344{
345 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
346
347 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
348 && !pAioMgr->cRequestsActive,
349 ("Invalid state of the I/O manager\n"));
350
351#ifdef RT_OS_WINDOWS
352 /*
353 * Reopen the files of all assigned endpoints first so we can assign them to the new
354 * I/O context.
355 */
356 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
357
358 while (pCurr)
359 {
360 RTFileClose(pCurr->hFile);
361 int rc2 = RTFileOpen(&pCurr->hFile, pCurr->Core.pszUri, pCurr->fFlags); AssertRC(rc2);
362
363 pCurr = pCurr->AioMgr.pEndpointNext;
364 }
365#endif
366
367 /* Create the new bigger context. */
368 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
369
370 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
371 int rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
372 if (rc == VERR_OUT_OF_RANGE)
373 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
374
375 if (RT_SUCCESS(rc))
376 {
377 /* Close the old context. */
378 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
379 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
380
381 pAioMgr->hAioCtx = hAioCtxNew;
382
383 /* Create a new I/O task handle array */
384 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
385 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
386
387 if (pahReqNew)
388 {
389 /* Copy the cached request handles. */
390 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
391 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
392
393 RTMemFree(pAioMgr->pahReqsFree);
394 pAioMgr->pahReqsFree = pahReqNew;
395 pAioMgr->cReqEntries = cReqEntriesNew;
396 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
397 pAioMgr->cRequestsActiveMax));
398 }
399 else
400 rc = VERR_NO_MEMORY;
401 }
402
403#ifdef RT_OS_WINDOWS
404 /* Assign the file to the new context. */
405 pCurr = pAioMgr->pEndpointsHead;
406 while (pCurr)
407 {
408 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->hFile);
409 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
410
411 pCurr = pCurr->AioMgr.pEndpointNext;
412 }
413#endif
414
415 if (RT_FAILURE(rc))
416 {
417 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
418 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
419 }
420
421 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
422 LogFlowFunc(("returns rc=%Rrc\n", rc));
423
424 return rc;
425}
426
427/**
428 * Checks if a given status code is fatal.
429 * Non fatal errors can be fixed by migrating the endpoint to a
430 * failsafe manager.
431 *
432 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
433 * false If the error can be fixed by a migration. (image on NFS disk for example)
434 * @param rcReq The status code to check.
435 */
436DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
437{
438 return rcReq == VERR_DEV_IO_ERROR
439 || rcReq == VERR_FILE_IO_ERROR
440 || rcReq == VERR_DISK_IO_ERROR
441 || rcReq == VERR_DISK_FULL
442 || rcReq == VERR_FILE_TOO_BIG;
443}
444
445/**
446 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
447 *
448 * @returns VBox status code
449 * @param pAioMgr The I/O manager the error occurred on.
450 * @param rc The error code.
451 * @param SRC_POS The source location of the error (use RT_SRC_POS).
452 */
453static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
454{
455 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
456 pAioMgr, rc));
457 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
458 LogRel(("AIOMgr: Please contact the product vendor\n"));
459
460 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
461
462 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
463 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
464
465 AssertMsgFailed(("Implement\n"));
466 return VINF_SUCCESS;
467}
468
469/**
470 * Put a list of tasks in the pending request list of an endpoint.
471 */
472DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
473{
474 /* Add the rest of the tasks to the pending list */
475 if (!pEndpoint->AioMgr.pReqsPendingHead)
476 {
477 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
478 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
479 }
480 else
481 {
482 Assert(pEndpoint->AioMgr.pReqsPendingTail);
483 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
484 }
485
486 /* Update the tail. */
487 while (pTaskHead->pNext)
488 pTaskHead = pTaskHead->pNext;
489
490 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
491 pTaskHead->pNext = NULL;
492}
493
494/**
495 * Put one task in the pending request list of an endpoint.
496 */
497DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
498{
499 /* Add the rest of the tasks to the pending list */
500 if (!pEndpoint->AioMgr.pReqsPendingHead)
501 {
502 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
503 pEndpoint->AioMgr.pReqsPendingHead = pTask;
504 }
505 else
506 {
507 Assert(pEndpoint->AioMgr.pReqsPendingTail);
508 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
509 }
510
511 pEndpoint->AioMgr.pReqsPendingTail = pTask;
512 pTask->pNext = NULL;
513}
514
515/**
516 * Allocates a async I/O request.
517 *
518 * @returns Handle to the request.
519 * @param pAioMgr The I/O manager.
520 */
521static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
522{
523 /* Get a request handle. */
524 RTFILEAIOREQ hReq;
525 if (pAioMgr->iFreeEntry > 0)
526 {
527 pAioMgr->iFreeEntry--;
528 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
529 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
530 Assert(hReq != NIL_RTFILEAIOREQ);
531 }
532 else
533 {
534 int rc = RTFileAioReqCreate(&hReq);
535 AssertRCReturn(rc, NIL_RTFILEAIOREQ);
536 }
537
538 return hReq;
539}
540
541/**
542 * Frees a async I/O request handle.
543 *
544 * @param pAioMgr The I/O manager.
545 * @param hReq The I/O request handle to free.
546 */
547static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
548{
549 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
550 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
551
552 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
553 pAioMgr->iFreeEntry++;
554}
555
556/**
557 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
558 */
559static void pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
560 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
561 PRTFILEAIOREQ pahReqs, unsigned cReqs)
562{
563 pAioMgr->cRequestsActive += cReqs;
564 pEndpoint->AioMgr.cRequestsActive += cReqs;
565
566 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
567 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
568
569 int rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
570 if (RT_FAILURE(rc))
571 {
572 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
573 {
574 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
575
576 /* Append any not submitted task to the waiting list. */
577 for (size_t i = 0; i < cReqs; i++)
578 {
579 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
580
581 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
582 {
583 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
584
585 Assert(pTask->hReq == pahReqs[i]);
586 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
587 pAioMgr->cRequestsActive--;
588 pEndpoint->AioMgr.cRequestsActive--;
589
590 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
591 {
592 /* Clear the pending flush */
593 Assert(pEndpoint->pFlushReq == pTask);
594 pEndpoint->pFlushReq = NULL;
595 }
596 }
597 }
598
599 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
600
601 /* Print an entry in the release log */
602 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
603 {
604 pEpClass->fOutOfResourcesWarningPrinted = true;
605 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
606 pAioMgr->cRequestsActive));
607 }
608
609 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
610 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
611 rc = VINF_SUCCESS;
612 }
613 else /* Another kind of error happened (full disk, ...) */
614 {
615 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
616 for (size_t i = 0; i < cReqs; i++)
617 {
618 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
619
620 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
621 {
622 /* We call ourself again to do any error handling which might come up now. */
623 pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
624 }
625 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
626 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
627 }
628
629
630 if ( pEndpoint->pFlushReq
631 && !pAioMgr->cRequestsActive
632 && !pEndpoint->fAsyncFlushSupported)
633 {
634 /*
635 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
636 * the async flush API.
637 * Happens only if this we just noticed that this is not supported
638 * and the only active request was a flush.
639 */
640 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
641 pEndpoint->pFlushReq = NULL;
642 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
643 pdmacFileTaskFree(pEndpoint, pFlush);
644 }
645 }
646 }
647}
648
649static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
650 RTFOFF offStart, size_t cbRange,
651 PPDMACTASKFILE pTask, bool fAlignedReq)
652{
653 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
654 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
655 ("Invalid task type %d\n", pTask->enmTransferType));
656
657 /*
658 * If there is no unaligned request active and the current one is aligned
659 * just pass it through.
660 */
661 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
662 return false;
663
664 PPDMACFILERANGELOCK pRangeLock;
665 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
666 if (!pRangeLock)
667 {
668 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
669 /* Check if we intersect with the range. */
670 if ( !pRangeLock
671 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
672 && (pRangeLock->Core.KeyLast) >= offStart))
673 {
674 pRangeLock = NULL; /* False alarm */
675 }
676 }
677
678 /* Check whether we have one of the situations explained below */
679 if (pRangeLock)
680 {
681 /* Add to the list. */
682 pTask->pNext = NULL;
683
684 if (!pRangeLock->pWaitingTasksHead)
685 {
686 Assert(!pRangeLock->pWaitingTasksTail);
687 pRangeLock->pWaitingTasksHead = pTask;
688 pRangeLock->pWaitingTasksTail = pTask;
689 }
690 else
691 {
692 AssertPtr(pRangeLock->pWaitingTasksTail);
693 pRangeLock->pWaitingTasksTail->pNext = pTask;
694 pRangeLock->pWaitingTasksTail = pTask;
695 }
696 return true;
697 }
698
699 return false;
700}
701
702static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
703 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
704 RTFOFF offStart, size_t cbRange,
705 PPDMACTASKFILE pTask, bool fAlignedReq)
706{
707 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p offStart=%RTfoff cbRange=%zu pTask=%#p\n",
708 pAioMgr, pEndpoint, offStart, cbRange, pTask));
709
710 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask, fAlignedReq),
711 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
712 offStart, cbRange));
713
714 /*
715 * If there is no unaligned request active and the current one is aligned
716 * just don't use the lock.
717 */
718 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
719 {
720 pTask->pRangeLock = NULL;
721 return VINF_SUCCESS;
722 }
723
724 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
725 if (!pRangeLock)
726 return VERR_NO_MEMORY;
727
728 /* Init the lock. */
729 pRangeLock->Core.Key = offStart;
730 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
731 pRangeLock->cRefs = 1;
732 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
733 pRangeLock->pWaitingTasksHead = NULL;
734 pRangeLock->pWaitingTasksTail = NULL;
735
736 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
737 AssertMsg(fInserted, ("Range lock was not inserted!\n")); NOREF(fInserted);
738
739 /* Let the task point to its lock. */
740 pTask->pRangeLock = pRangeLock;
741 pEndpoint->AioMgr.cLockedReqsActive++;
742
743 return VINF_SUCCESS;
744}
745
746static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
747 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
748 PPDMACFILERANGELOCK pRangeLock)
749{
750 PPDMACTASKFILE pTasksWaitingHead;
751
752 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p pRangeLock=%#p\n",
753 pAioMgr, pEndpoint, pRangeLock));
754
755 /* pRangeLock can be NULL if there was no lock assigned with the task. */
756 if (!pRangeLock)
757 return NULL;
758
759 Assert(pRangeLock->cRefs == 1);
760
761 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
762 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
763 pRangeLock->pWaitingTasksHead = NULL;
764 pRangeLock->pWaitingTasksTail = NULL;
765 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
766 pEndpoint->AioMgr.cLockedReqsActive--;
767
768 return pTasksWaitingHead;
769}
770
771static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
772 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
773 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
774{
775 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
776 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
777 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
778 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
779
780 pTask->fPrefetch = false;
781 pTask->cbBounceBuffer = 0;
782
783 /*
784 * Before we start to setup the request we have to check whether there is a task
785 * already active which range intersects with ours. We have to defer execution
786 * of this task in two cases:
787 * - The pending task is a write and the current is either read or write
788 * - The pending task is a read and the current task is a write task.
789 *
790 * To check whether a range is currently "locked" we use the AVL tree where every pending task
791 * is stored by its file offset range. The current task will be added to the active task
792 * and will be executed when the active one completes. (The method below
793 * which checks whether a range is already used will add the task)
794 *
795 * This is necessary because of the requirement to align all requests to a 512 boundary
796 * which is enforced by the host OS (Linux and Windows atm). It is possible that
797 * we have to process unaligned tasks and need to align them using bounce buffers.
798 * While the data is fetched from the file another request might arrive writing to
799 * the same range. This will result in data corruption if both are executed concurrently.
800 */
801 int rc = VINF_SUCCESS;
802 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask,
803 true /* fAlignedReq */);
804 if (!fLocked)
805 {
806 /* Get a request handle. */
807 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
808 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
809
810 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
811 {
812 /* Grow the file if needed. */
813 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
814 {
815 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
816 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
817 }
818
819 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
820 pTask->Off, pTask->DataSeg.pvSeg,
821 pTask->DataSeg.cbSeg, pTask);
822 }
823 else
824 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
825 pTask->Off, pTask->DataSeg.pvSeg,
826 pTask->DataSeg.cbSeg, pTask);
827 AssertRC(rc);
828
829 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
830 pTask->DataSeg.cbSeg,
831 pTask, true /* fAlignedReq */);
832
833 if (RT_SUCCESS(rc))
834 {
835 pTask->hReq = hReq;
836 *phReq = hReq;
837 }
838 }
839 else
840 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
841
842 return rc;
843}
844
845static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
846 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
847 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
848{
849 /*
850 * Check if the alignment requirements are met.
851 * Offset, transfer size and buffer address
852 * need to be on a 512 boundary.
853 */
854 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
855 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
856 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
857 bool fAlignedReq = cbToTransfer == pTask->DataSeg.cbSeg
858 && offStart == pTask->Off;
859
860 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
861 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
862 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
863 offStart, cbToTransfer, pEndpoint->cbFile));
864
865 pTask->fPrefetch = false;
866
867 /*
868 * Before we start to setup the request we have to check whether there is a task
869 * already active which range intersects with ours. We have to defer execution
870 * of this task in two cases:
871 * - The pending task is a write and the current is either read or write
872 * - The pending task is a read and the current task is a write task.
873 *
874 * To check whether a range is currently "locked" we use the AVL tree where every pending task
875 * is stored by its file offset range. The current task will be added to the active task
876 * and will be executed when the active one completes. (The method below
877 * which checks whether a range is already used will add the task)
878 *
879 * This is necessary because of the requirement to align all requests to a 512 boundary
880 * which is enforced by the host OS (Linux and Windows atm). It is possible that
881 * we have to process unaligned tasks and need to align them using bounce buffers.
882 * While the data is fetched from the file another request might arrive writing to
883 * the same range. This will result in data corruption if both are executed concurrently.
884 */
885 int rc = VINF_SUCCESS;
886 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
887 if (!fLocked)
888 {
889 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
890 void *pvBuf = pTask->DataSeg.pvSeg;
891
892 /* Get a request handle. */
893 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
894 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
895
896 if ( !fAlignedReq
897 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
898 {
899 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
900 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
901
902 /* Create bounce buffer. */
903 pTask->cbBounceBuffer = cbToTransfer;
904
905 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
906 pTask->Off, offStart));
907 pTask->offBounceBuffer = pTask->Off - offStart;
908
909 /** @todo I think we need something like a RTMemAllocAligned method here.
910 * Current assumption is that the maximum alignment is 4096byte
911 * (GPT disk on Windows)
912 * so we can use RTMemPageAlloc here.
913 */
914 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
915 if (RT_LIKELY(pTask->pvBounceBuffer))
916 {
917 pvBuf = pTask->pvBounceBuffer;
918
919 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
920 {
921 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
922 || RT_UNLIKELY(offStart != pTask->Off))
923 {
924 /* We have to fill the buffer first before we can update the data. */
925 LogFlow(("Prefetching data for task %#p\n", pTask));
926 pTask->fPrefetch = true;
927 enmTransferType = PDMACTASKFILETRANSFER_READ;
928 }
929 else
930 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
931 }
932 }
933 else
934 rc = VERR_NO_MEMORY;
935 }
936 else
937 pTask->cbBounceBuffer = 0;
938
939 if (RT_SUCCESS(rc))
940 {
941 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
942 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
943
944 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
945 {
946 /* Grow the file if needed. */
947 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
948 {
949 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
950 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
951 }
952
953 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
954 offStart, pvBuf, cbToTransfer, pTask);
955 }
956 else
957 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
958 offStart, pvBuf, cbToTransfer, pTask);
959 AssertRC(rc);
960
961 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
962 if (RT_SUCCESS(rc))
963 {
964 pTask->hReq = hReq;
965 *phReq = hReq;
966 }
967 else
968 {
969 /* Cleanup */
970 if (pTask->cbBounceBuffer)
971 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
972 }
973 }
974 }
975 else
976 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
977
978 return rc;
979}
980
981static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
982 PPDMACEPFILEMGR pAioMgr,
983 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
984{
985 RTFILEAIOREQ apReqs[20];
986 unsigned cRequests = 0;
987 int rc = VINF_SUCCESS;
988
989 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
990 ("Trying to process request lists of a non active endpoint!\n"));
991
992 /* Go through the list and queue the requests until we get a flush request */
993 while ( pTaskHead
994 && !pEndpoint->pFlushReq
995 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
996 && RT_SUCCESS(rc))
997 {
998 RTMSINTERVAL msWhenNext;
999 PPDMACTASKFILE pCurr = pTaskHead;
1000
1001 if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext))
1002 {
1003 pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext);
1004 break;
1005 }
1006
1007 pTaskHead = pTaskHead->pNext;
1008
1009 pCurr->pNext = NULL;
1010
1011 AssertMsg(RT_VALID_PTR(pCurr->pEndpoint) && pCurr->pEndpoint == pEndpoint,
1012 ("Endpoints do not match\n"));
1013
1014 switch (pCurr->enmTransferType)
1015 {
1016 case PDMACTASKFILETRANSFER_FLUSH:
1017 {
1018 /* If there is no data transfer request this flush request finished immediately. */
1019 if (pEndpoint->fAsyncFlushSupported)
1020 {
1021 /* Issue a flush to the host. */
1022 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1023 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1024
1025 LogFlow(("Flush request %#p\n", hReq));
1026
1027 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->hFile, pCurr);
1028 if (RT_FAILURE(rc))
1029 {
1030 if (rc == VERR_NOT_SUPPORTED)
1031 LogRel(("AIOMgr: Async flushes not supported\n"));
1032 else
1033 LogRel(("AIOMgr: Preparing flush failed with %Rrc, disabling async flushes\n", rc));
1034 pEndpoint->fAsyncFlushSupported = false;
1035 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1036 rc = VINF_SUCCESS; /* Fake success */
1037 }
1038 else
1039 {
1040 pCurr->hReq = hReq;
1041 apReqs[cRequests] = hReq;
1042 pEndpoint->AioMgr.cReqsProcessed++;
1043 cRequests++;
1044 }
1045 }
1046
1047 if ( !pEndpoint->AioMgr.cRequestsActive
1048 && !pEndpoint->fAsyncFlushSupported)
1049 {
1050 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1051 pdmacFileTaskFree(pEndpoint, pCurr);
1052 }
1053 else
1054 {
1055 Assert(!pEndpoint->pFlushReq);
1056 pEndpoint->pFlushReq = pCurr;
1057 }
1058 break;
1059 }
1060 case PDMACTASKFILETRANSFER_READ:
1061 case PDMACTASKFILETRANSFER_WRITE:
1062 {
1063 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1064
1065 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1066 {
1067 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1068 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1069 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1070 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1071 else
1072 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1073
1074 AssertRC(rc);
1075 }
1076 else
1077 {
1078 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1079 hReq = pCurr->hReq;
1080 }
1081
1082 LogFlow(("Read/Write request %#p\n", hReq));
1083
1084 if (hReq != NIL_RTFILEAIOREQ)
1085 {
1086 apReqs[cRequests] = hReq;
1087 cRequests++;
1088 }
1089 break;
1090 }
1091 default:
1092 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1093 } /* switch transfer type */
1094
1095 /* Queue the requests if the array is full. */
1096 if (cRequests == RT_ELEMENTS(apReqs))
1097 {
1098 pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1099 cRequests = 0;
1100 }
1101 }
1102
1103 if (cRequests)
1104 pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1105
1106 if (pTaskHead)
1107 {
1108 /* Add the rest of the tasks to the pending list */
1109 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1110
1111 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1112 && !pEndpoint->pFlushReq))
1113 {
1114#if 0
1115 /*
1116 * The I/O manager has no room left for more requests
1117 * but there are still requests to process.
1118 * Create a new I/O manager and let it handle some endpoints.
1119 */
1120 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1121#else
1122 /* Grow the I/O manager */
1123 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1124#endif
1125 }
1126 }
1127
1128 /* Insufficient resources are not fatal. */
1129 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1130 rc = VINF_SUCCESS;
1131
1132 return rc;
1133}
1134
1135/**
1136 * Adds all pending requests for the given endpoint
1137 * until a flush request is encountered or there is no
1138 * request anymore.
1139 *
1140 * @returns VBox status code.
1141 * @param pAioMgr The async I/O manager for the endpoint
1142 * @param pEndpoint The endpoint to get the requests from.
1143 */
1144static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1145 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1146{
1147 int rc = VINF_SUCCESS;
1148 PPDMACTASKFILE pTasksHead = NULL;
1149
1150 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1151 ("Trying to process request lists of a non active endpoint!\n"));
1152
1153 Assert(!pEndpoint->pFlushReq);
1154
1155 /* Check the pending list first */
1156 if (pEndpoint->AioMgr.pReqsPendingHead)
1157 {
1158 LogFlow(("Queuing pending requests first\n"));
1159
1160 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1161 /*
1162 * Clear the list as the processing routine will insert them into the list
1163 * again if it gets a flush request.
1164 */
1165 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1166 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1167 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1168 AssertRC(rc); /** @todo r=bird: status code potentially overwritten. */
1169 }
1170
1171 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1172 {
1173 /* Now the request queue. */
1174 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1175 if (pTasksHead)
1176 {
1177 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1178 AssertRC(rc);
1179 }
1180 }
1181
1182 return rc;
1183}
1184
1185static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1186{
1187 int rc = VINF_SUCCESS;
1188 bool fNotifyWaiter = false;
1189
1190 LogFlowFunc((": Enter\n"));
1191
1192 Assert(pAioMgr->fBlockingEventPending);
1193
1194 switch (pAioMgr->enmBlockingEvent)
1195 {
1196 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1197 {
1198 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1199 AssertMsg(RT_VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1200
1201 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1202
1203 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1204 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1205 if (pAioMgr->pEndpointsHead)
1206 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1207 pAioMgr->pEndpointsHead = pEndpointNew;
1208
1209 /* Assign the completion point to this file. */
1210 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->hFile);
1211 fNotifyWaiter = true;
1212 pAioMgr->cEndpoints++;
1213 break;
1214 }
1215 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1216 {
1217 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1218 AssertMsg(RT_VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1219
1220 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1221 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1222 break;
1223 }
1224 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1225 {
1226 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1227 AssertMsg(RT_VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1228
1229 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1230 {
1231 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1232
1233 /* Make sure all tasks finished. Process the queues a last time first. */
1234 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1235 AssertRC(rc);
1236
1237 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1238 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1239 }
1240 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1241 && (!pEndpointClose->AioMgr.cRequestsActive))
1242 fNotifyWaiter = true;
1243 break;
1244 }
1245 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1246 {
1247 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1248 if (!pAioMgr->cRequestsActive)
1249 fNotifyWaiter = true;
1250 break;
1251 }
1252 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1253 {
1254 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1255 break;
1256 }
1257 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1258 {
1259 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1260 fNotifyWaiter = true;
1261 break;
1262 }
1263 default:
1264 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1265 }
1266
1267 if (fNotifyWaiter)
1268 {
1269 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1270 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1271
1272 /* Release the waiting thread. */
1273 LogFlow(("Signalling waiter\n"));
1274 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1275 AssertRC(rc);
1276 }
1277
1278 LogFlowFunc((": Leave\n"));
1279 return rc;
1280}
1281
1282/**
1283 * Checks all endpoints for pending events or new requests.
1284 *
1285 * @returns VBox status code.
1286 * @param pAioMgr The I/O manager handle.
1287 */
1288static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1289{
1290 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1291 int rc = VINF_SUCCESS;
1292 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1293
1294 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
1295
1296 while (pEndpoint)
1297 {
1298 if (!pEndpoint->pFlushReq
1299 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1300 && !pEndpoint->AioMgr.fMoving)
1301 {
1302 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1303 if (RT_FAILURE(rc))
1304 return rc;
1305 }
1306 else if ( !pEndpoint->AioMgr.cRequestsActive
1307 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1308 {
1309 /* Reopen the file so that the new endpoint can re-associate with the file */
1310 RTFileClose(pEndpoint->hFile);
1311 rc = RTFileOpen(&pEndpoint->hFile, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1312 AssertRC(rc);
1313
1314 if (pEndpoint->AioMgr.fMoving)
1315 {
1316 pEndpoint->AioMgr.fMoving = false;
1317 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1318 }
1319 else
1320 {
1321 Assert(pAioMgr->fBlockingEventPending);
1322 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1323
1324 /* Release the waiting thread. */
1325 LogFlow(("Signalling waiter\n"));
1326 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1327 AssertRC(rc);
1328 }
1329 }
1330
1331 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1332 }
1333
1334 return rc;
1335}
1336
1337/**
1338 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1339 */
1340static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1341{
1342 size_t cbTransfered = 0;
1343 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1344
1345 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1346}
1347
1348static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1349 int rcReq, size_t cbTransfered)
1350{
1351 int rc = VINF_SUCCESS;
1352 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1353 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1354 PPDMACTASKFILE pTasksWaiting;
1355
1356 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1357
1358 pEndpoint = pTask->pEndpoint;
1359
1360 pTask->hReq = NIL_RTFILEAIOREQ;
1361
1362 pAioMgr->cRequestsActive--;
1363 pEndpoint->AioMgr.cRequestsActive--;
1364 pEndpoint->AioMgr.cReqsProcessed++;
1365
1366 /*
1367 * It is possible that the request failed on Linux with kernels < 2.6.23
1368 * if the passed buffer was allocated with remap_pfn_range or if the file
1369 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1370 * The endpoint will be migrated to a failsafe manager in case a request fails.
1371 */
1372 if (RT_FAILURE(rcReq))
1373 {
1374 /* Free bounce buffers and the IPRT request. */
1375 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1376
1377 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1378 {
1379 LogRel(("AIOMgr: Flush failed with %Rrc, disabling async flushes\n", rcReq));
1380 pEndpoint->fAsyncFlushSupported = false;
1381 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1382 /* The other method will take over now. */
1383
1384 pEndpoint->pFlushReq = NULL;
1385 /* Call completion callback */
1386 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, VINF_SUCCESS));
1387 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1388 pdmacFileTaskFree(pEndpoint, pTask);
1389 }
1390 else
1391 {
1392 /* Free the lock and process pending tasks if necessary */
1393 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1394 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1395 AssertRC(rc);
1396
1397 if (pTask->cbBounceBuffer)
1398 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1399
1400 /*
1401 * Fatal errors are reported to the guest and non-fatal errors
1402 * will cause a migration to the failsafe manager in the hope
1403 * that the error disappears.
1404 */
1405 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1406 {
1407 /* Queue the request on the pending list. */
1408 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1409 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1410
1411 /* Create a new failsafe manager if necessary. */
1412 if (!pEndpoint->AioMgr.fMoving)
1413 {
1414 PPDMACEPFILEMGR pAioMgrFailsafe;
1415
1416 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1417 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1418
1419 pEndpoint->AioMgr.fMoving = true;
1420
1421 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1422 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1423 AssertRC(rc);
1424
1425 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1426
1427 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1428 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1429 }
1430
1431 /* If this was the last request for the endpoint migrate it to the new manager. */
1432 if (!pEndpoint->AioMgr.cRequestsActive)
1433 {
1434 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1435 Assert(!fReqsPending); NOREF(fReqsPending);
1436
1437 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1438 AssertRC(rc);
1439 }
1440 }
1441 else
1442 {
1443 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1444 pdmacFileTaskFree(pEndpoint, pTask);
1445 }
1446 }
1447 }
1448 else
1449 {
1450 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1451 {
1452 /* Clear pending flush */
1453 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1454 pEndpoint->pFlushReq = NULL;
1455 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1456
1457 /* Call completion callback */
1458 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1459 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1460 pdmacFileTaskFree(pEndpoint, pTask);
1461 }
1462 else
1463 {
1464 /*
1465 * Restart an incomplete transfer.
1466 * This usually means that the request will return an error now
1467 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1468 * the transfer needs to be continued.
1469 */
1470 pTask->cbTransfered += cbTransfered;
1471
1472 if (RT_UNLIKELY( pTask->cbTransfered < pTask->DataSeg.cbSeg
1473 || ( pTask->cbBounceBuffer
1474 && pTask->cbTransfered < pTask->cbBounceBuffer)))
1475 {
1476 RTFOFF offStart;
1477 size_t cbToTransfer;
1478 uint8_t *pbBuf = NULL;
1479
1480 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n",
1481 pTask, cbTransfered));
1482 Assert(cbTransfered % 512 == 0);
1483
1484 if (pTask->cbBounceBuffer)
1485 {
1486 AssertPtr(pTask->pvBounceBuffer);
1487 offStart = (pTask->Off & ~((RTFOFF)512-1)) + pTask->cbTransfered;
1488 cbToTransfer = pTask->cbBounceBuffer - pTask->cbTransfered;
1489 pbBuf = (uint8_t *)pTask->pvBounceBuffer + pTask->cbTransfered;
1490 }
1491 else
1492 {
1493 Assert(!pTask->pvBounceBuffer);
1494 offStart = pTask->Off + pTask->cbTransfered;
1495 cbToTransfer = pTask->DataSeg.cbSeg - pTask->cbTransfered;
1496 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + pTask->cbTransfered;
1497 }
1498
1499 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1500 {
1501 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, offStart,
1502 pbBuf, cbToTransfer, pTask);
1503 }
1504 else
1505 {
1506 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1507 ("Invalid transfer type\n"));
1508 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, offStart,
1509 pbBuf, cbToTransfer, pTask);
1510 }
1511 AssertRC(rc);
1512
1513 pTask->hReq = hReq;
1514 pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1515 }
1516 else if (pTask->fPrefetch)
1517 {
1518 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1519 Assert(pTask->cbBounceBuffer);
1520
1521 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1522 pTask->DataSeg.pvSeg,
1523 pTask->DataSeg.cbSeg);
1524
1525 /* Write it now. */
1526 pTask->fPrefetch = false;
1527 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1528 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
1529
1530 pTask->cbTransfered = 0;
1531
1532 /* Grow the file if needed. */
1533 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1534 {
1535 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1536 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
1537 }
1538
1539 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
1540 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1541 AssertRC(rc);
1542 pTask->hReq = hReq;
1543 pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1544 }
1545 else
1546 {
1547 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1548 {
1549 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1550 memcpy(pTask->DataSeg.pvSeg,
1551 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1552 pTask->DataSeg.cbSeg);
1553
1554 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1555 }
1556
1557 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1558
1559 /* Free the lock and process pending tasks if necessary */
1560 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1561 if (pTasksWaiting)
1562 {
1563 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1564 AssertRC(rc);
1565 }
1566
1567 /* Call completion callback */
1568 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1569 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1570 pdmacFileTaskFree(pEndpoint, pTask);
1571
1572 /*
1573 * If there is no request left on the endpoint but a flush request is set
1574 * it completed now and we notify the owner.
1575 * Furthermore we look for new requests and continue.
1576 */
1577 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1578 {
1579 /* Call completion callback */
1580 pTask = pEndpoint->pFlushReq;
1581 pEndpoint->pFlushReq = NULL;
1582
1583 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1584
1585 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1586 pdmacFileTaskFree(pEndpoint, pTask);
1587 }
1588 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1589 {
1590 /* If the endpoint is about to be migrated do it now. */
1591 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1592 Assert(!fReqsPending); NOREF(fReqsPending);
1593
1594 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1595 AssertRC(rc);
1596 }
1597 }
1598 } /* Not a flush request */
1599 } /* request completed successfully */
1600}
1601
1602/** Helper macro for checking for error codes. */
1603#define CHECK_RC(pAioMgr, rc) \
1604 if (RT_FAILURE(rc)) \
1605 {\
1606 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1607 return rc2;\
1608 }
1609
1610/**
1611 * The normal I/O manager using the RTFileAio* API
1612 *
1613 * @returns VBox status code.
1614 * @param hThreadSelf Handle of the thread.
1615 * @param pvUser Opaque user data.
1616 */
1617DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser)
1618{
1619 int rc = VINF_SUCCESS;
1620 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1621 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1622 NOREF(hThreadSelf);
1623
1624 while ( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1625 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING
1626 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1627 {
1628 if (!pAioMgr->cRequestsActive)
1629 {
1630 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1631 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1632 rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired);
1633 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1634 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1635
1636 LogFlow(("Got woken up\n"));
1637 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1638 }
1639
1640 /* Check for an external blocking event first. */
1641 if (pAioMgr->fBlockingEventPending)
1642 {
1643 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1644 CHECK_RC(pAioMgr, rc);
1645 }
1646
1647 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1648 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1649 {
1650 /* We got woken up because an endpoint issued new requests. Queue them. */
1651 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1652 CHECK_RC(pAioMgr, rc);
1653
1654 while (pAioMgr->cRequestsActive)
1655 {
1656 RTFILEAIOREQ apReqs[20];
1657 uint32_t cReqsCompleted = 0;
1658 size_t cReqsWait;
1659
1660 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1661 cReqsWait = RT_ELEMENTS(apReqs);
1662 else
1663 cReqsWait = pAioMgr->cRequestsActive;
1664
1665 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1666
1667 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1668 1,
1669 RT_INDEFINITE_WAIT, apReqs,
1670 cReqsWait, &cReqsCompleted);
1671 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1672 CHECK_RC(pAioMgr, rc);
1673
1674 LogFlow(("%d tasks completed\n", cReqsCompleted));
1675
1676 for (uint32_t i = 0; i < cReqsCompleted; i++)
1677 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1678
1679 /* Check for an external blocking event before we go to sleep again. */
1680 if (pAioMgr->fBlockingEventPending)
1681 {
1682 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1683 CHECK_RC(pAioMgr, rc);
1684 }
1685
1686 /* Update load statistics. */
1687 uint64_t uMillisCurr = RTTimeMilliTS();
1688 if (uMillisCurr > uMillisEnd)
1689 {
1690 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1691
1692 /* Calculate timespan. */
1693 uMillisCurr -= uMillisEnd;
1694
1695 while (pEndpointCurr)
1696 {
1697 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1698 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1699 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1700 }
1701
1702 /* Set new update interval */
1703 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1704 }
1705
1706 /* Check endpoints for new requests. */
1707 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1708 {
1709 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1710 CHECK_RC(pAioMgr, rc);
1711 }
1712 } /* while requests are active. */
1713
1714 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1715 {
1716 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1717 AssertRC(rc);
1718 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1719
1720 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1721 CHECK_RC(pAioMgr, rc);
1722 }
1723 } /* if still running */
1724 } /* while running */
1725
1726 LogFlowFunc(("rc=%Rrc\n", rc));
1727 return rc;
1728}
1729
1730#undef CHECK_RC
1731
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette