VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 26671

Last change on this file since 26671 was 26671, checked in by vboxsync, 15 years ago

AsyncCompletion: Make it possible to limit the bandwidth of a VM

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 47.1 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 26671 2010-02-22 07:21:34Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#include <iprt/types.h>
24#include <iprt/asm.h>
25#include <iprt/file.h>
26#include <iprt/mem.h>
27#include <iprt/string.h>
28#include <iprt/assert.h>
29#include <VBox/log.h>
30
31#include "PDMAsyncCompletionFileInternal.h"
32
33/** The update period for the I/O load statistics in ms. */
34#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
35/** Maximum number of requests a manager will handle. */
36#define PDMACEPFILEMGR_REQS_MAX 512 /* @todo: Find better solution wrt. the request number*/
37
38/*******************************************************************************
39* Internal functions *
40*******************************************************************************/
41static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
42 PPDMACEPFILEMGR pAioMgr,
43 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
44
45
46int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
47{
48 int rc = VINF_SUCCESS;
49
50 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
51 if (rc == VERR_OUT_OF_RANGE)
52 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, PDMACEPFILEMGR_REQS_MAX);
53
54 if (RT_SUCCESS(rc))
55 {
56 /* Initialize request handle array. */
57 pAioMgr->iFreeEntryNext = 0;
58 pAioMgr->iFreeReqNext = 0;
59 pAioMgr->cReqEntries = PDMACEPFILEMGR_REQS_MAX + 1;
60 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
61
62 if (pAioMgr->pahReqsFree)
63 {
64 return VINF_SUCCESS;
65 }
66 else
67 {
68 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
69 rc = VERR_NO_MEMORY;
70 }
71 }
72
73 return rc;
74}
75
76void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
77{
78 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
79
80 while (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
81 {
82 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext]);
83 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
84 }
85
86 RTMemFree(pAioMgr->pahReqsFree);
87}
88
89/**
90 * Sorts the endpoint list with insertion sort.
91 */
92static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
93{
94 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
95
96 pEpPrev = pAioMgr->pEndpointsHead;
97 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
98
99 while (pEpCurr)
100 {
101 /* Remember the next element to sort because the list might change. */
102 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
103
104 /* Unlink the current element from the list. */
105 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
106 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
107
108 if (pPrev)
109 pPrev->AioMgr.pEndpointNext = pNext;
110 else
111 pAioMgr->pEndpointsHead = pNext;
112
113 if (pNext)
114 pNext->AioMgr.pEndpointPrev = pPrev;
115
116 /* Go back until we reached the place to insert the current endpoint into. */
117 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
118 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
119
120 /* Link the endpoint into the list. */
121 if (pEpPrev)
122 pNext = pEpPrev->AioMgr.pEndpointNext;
123 else
124 pNext = pAioMgr->pEndpointsHead;
125
126 pEpCurr->AioMgr.pEndpointNext = pNext;
127 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
128
129 if (pNext)
130 pNext->AioMgr.pEndpointPrev = pEpCurr;
131
132 if (pEpPrev)
133 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
134 else
135 pAioMgr->pEndpointsHead = pEpCurr;
136
137 pEpCurr = pEpNextToSort;
138 }
139
140#ifdef DEBUG
141 /* Validate sorting alogrithm */
142 unsigned cEndpoints = 0;
143 pEpCurr = pAioMgr->pEndpointsHead;
144
145 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
146 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
147
148 while (pEpCurr)
149 {
150 cEndpoints++;
151
152 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
153 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
154
155 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
156 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
157
158 pEpCurr = pNext;
159 }
160
161 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
162
163#endif
164}
165
166/**
167 * Removes an endpoint from the currently assigned manager.
168 *
169 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
170 * FALSE otherwise.
171 * @param pEndpointRemove The endpoint to remove.
172 */
173static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
174{
175 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
176 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
177 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
178
179 pAioMgr->cEndpoints--;
180
181 if (pPrev)
182 pPrev->AioMgr.pEndpointNext = pNext;
183 else
184 pAioMgr->pEndpointsHead = pNext;
185
186 if (pNext)
187 pNext->AioMgr.pEndpointPrev = pPrev;
188
189 /* Make sure that there is no request pending on this manager for the endpoint. */
190 if (!pEndpointRemove->AioMgr.cRequestsActive)
191 {
192 Assert(!pEndpointRemove->pFlushReq);
193
194 /* Reopen the file so that the new endpoint can reassociate with the file */
195 RTFileClose(pEndpointRemove->File);
196 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
197 AssertRC(rc);
198 return false;
199 }
200
201 return true;
202}
203
204static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
205{
206 /* Balancing doesn't make sense with only one endpoint. */
207 if (pAioMgr->cEndpoints == 1)
208 return false;
209
210 /* Doesn't make sens to move endpoints if only one produces the whole load */
211 unsigned cEndpointsWithLoad = 0;
212
213 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
214
215 while (pCurr)
216 {
217 if (pCurr->AioMgr.cReqsPerSec)
218 cEndpointsWithLoad++;
219
220 pCurr = pCurr->AioMgr.pEndpointNext;
221 }
222
223 return (cEndpointsWithLoad > 1);
224}
225
226/**
227 * Creates a new I/O manager and spreads the I/O load of the endpoints
228 * between the given I/O manager and the new one.
229 *
230 * @returns nothing.
231 * @param pAioMgr The I/O manager with high I/O load.
232 */
233static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
234{
235 PPDMACEPFILEMGR pAioMgrNew = NULL;
236 int rc = VINF_SUCCESS;
237
238 /*
239 * Check if balancing would improve the situation.
240 */
241 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
242 {
243 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass,
244 &pAioMgrNew, false);
245 if (RT_SUCCESS(rc))
246 {
247 /* We will sort the list by request count per second. */
248 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
249
250 /* Now move some endpoints to the new manager. */
251 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
252 unsigned cReqsOther = 0;
253 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
254
255 while (pCurr)
256 {
257 if (cReqsHere <= cReqsOther)
258 {
259 /*
260 * The other manager has more requests to handle now.
261 * We will keep the current endpoint.
262 */
263 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
264 cReqsHere += pCurr->AioMgr.cReqsPerSec;
265 pCurr = pCurr->AioMgr.pEndpointNext;
266 }
267 else
268 {
269 /* Move to other endpoint. */
270 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
271 cReqsOther += pCurr->AioMgr.cReqsPerSec;
272
273 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
274
275 pCurr = pCurr->AioMgr.pEndpointNext;
276
277 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
278
279 if (fReqsPending)
280 {
281 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
282 pMove->AioMgr.fMoving = true;
283 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
284 }
285 else
286 {
287 pMove->AioMgr.fMoving = false;
288 pMove->AioMgr.pAioMgrDst = NULL;
289 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
290 }
291 }
292 }
293 }
294 else
295 {
296 /* Don't process further but leave a log entry about reduced performance. */
297 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
298 }
299 }
300 else
301 Log(("AIOMgr: Load balancing would not improve anything\n"));
302}
303
304/**
305 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
306 *
307 * @returns VBox status code
308 * @param pAioMgr The I/O manager the error ocurred on.
309 * @param rc The error code.
310 */
311static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
312{
313 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
314 pAioMgr, rc));
315 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
316 LogRel(("AIOMgr: Please contact the product vendor\n"));
317
318 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
319
320 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
321 ASMAtomicWriteBool(&pEpClassFile->fFailsafe, true);
322
323 AssertMsgFailed(("Implement\n"));
324 return VINF_SUCCESS;
325}
326
327/**
328 * Put a list of tasks in the pending request list of an endpoint.
329 */
330DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
331{
332 /* Add the rest of the tasks to the pending list */
333 if (!pEndpoint->AioMgr.pReqsPendingHead)
334 {
335 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
336 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
337 }
338 else
339 {
340 Assert(pEndpoint->AioMgr.pReqsPendingTail);
341 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
342 }
343
344 /* Update the tail. */
345 while (pTaskHead->pNext)
346 pTaskHead = pTaskHead->pNext;
347
348 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
349}
350
351/**
352 * Put one task in the pending request list of an endpoint.
353 */
354DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
355{
356 /* Add the rest of the tasks to the pending list */
357 if (!pEndpoint->AioMgr.pReqsPendingHead)
358 {
359 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
360 pEndpoint->AioMgr.pReqsPendingHead = pTask;
361 }
362 else
363 {
364 Assert(pEndpoint->AioMgr.pReqsPendingTail);
365 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
366 }
367
368 pEndpoint->AioMgr.pReqsPendingTail = pTask;
369}
370
371/**
372 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
373 */
374static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
375 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
376 PRTFILEAIOREQ pahReqs, unsigned cReqs)
377{
378 int rc;
379
380 pAioMgr->cRequestsActive += cReqs;
381 pEndpoint->AioMgr.cRequestsActive += cReqs;
382
383 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
384 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
385
386 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
387 if (RT_FAILURE(rc))
388 {
389 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
390 {
391 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
392
393 /*
394 * We run out of resources.
395 * Need to check which requests got queued
396 * and put the rest on the pending list again.
397 */
398 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
399 {
400 pEpClass->fOutOfResourcesWarningPrinted = true;
401 LogRel(("AIOMgr: The operating system doesn't have enough resources "
402 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
403 }
404
405 for (size_t i = 0; i < cReqs; i++)
406 {
407 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
408
409 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
410 {
411 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
412 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
413
414 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
415
416 /* Put the entry on the free array */
417 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = pahReqs[i];
418 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
419
420 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
421 pAioMgr->cRequestsActive--;
422 pEndpoint->AioMgr.cRequestsActive--;
423 }
424 }
425 LogFlow(("Removed requests. I/O manager has a total of %d active requests now\n", pAioMgr->cRequestsActive));
426 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
427 }
428 else
429 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
430 }
431
432 return rc;
433}
434
435/**
436 * Allocates a async I/O request.
437 *
438 * @returns Handle to the request.
439 * @param pAioMgr The I/O manager.
440 */
441static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
442{
443 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
444
445 /* Get a request handle. */
446 if (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
447 {
448 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext];
449 pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext] = NIL_RTFILEAIOREQ;
450 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
451 }
452 else
453 {
454 int rc = RTFileAioReqCreate(&hReq);
455 AssertRC(rc);
456 }
457
458 return hReq;
459}
460
461static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
462 RTFOFF offStart, size_t cbRange,
463 PPDMACTASKFILE pTask)
464{
465 PPDMACFILERANGELOCK pRangeLock = NULL; /** < Range lock */
466
467 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
468 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
469 ("Invalid task type %d\n", pTask->enmTransferType));
470
471 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
472 if (!pRangeLock)
473 {
474 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
475 /* Check if we intersect with the range. */
476 if ( !pRangeLock
477 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
478 && (pRangeLock->Core.KeyLast) >= offStart))
479 {
480 pRangeLock = NULL; /* False alarm */
481 }
482 }
483
484 /* Check whether we have one of the situations explained below */
485 if ( pRangeLock
486#if 0 /** @todo: later. For now we will just block all requests if they interfere */
487 && ( (pRangeLock->fReadLock && pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
488 || (!pRangeLock->fReadLock)
489#endif
490 )
491 {
492 /* Add to the list. */
493 pTask->pNext = NULL;
494
495 if (!pRangeLock->pWaitingTasksHead)
496 {
497 Assert(!pRangeLock->pWaitingTasksTail);
498 pRangeLock->pWaitingTasksHead = pTask;
499 pRangeLock->pWaitingTasksTail = pTask;
500 }
501 else
502 {
503 AssertPtr(pRangeLock->pWaitingTasksTail);
504 pRangeLock->pWaitingTasksTail->pNext = pTask;
505 pRangeLock->pWaitingTasksTail = pTask;
506 }
507 return true;
508 }
509
510 return false;
511}
512
513static int pdmacFileAioMgrNormalRangeLock(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
514 RTFOFF offStart, size_t cbRange,
515 PPDMACTASKFILE pTask)
516{
517 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask),
518 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
519 offStart, cbRange));
520
521 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemAllocZ(sizeof(PDMACFILERANGELOCK));
522 if (!pRangeLock)
523 return VERR_NO_MEMORY;
524
525 /* Init the lock. */
526 pRangeLock->Core.Key = offStart;
527 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
528 pRangeLock->cRefs = 1;
529 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
530
531 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
532 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
533
534 /* Let the task point to its lock. */
535 pTask->pRangeLock = pRangeLock;
536
537 return VINF_SUCCESS;
538}
539
540static int pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
541 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
542 PPDMACFILERANGELOCK pRangeLock)
543{
544 PPDMACTASKFILE pTasksWaitingHead;
545
546 AssertPtr(pRangeLock);
547 Assert(pRangeLock->cRefs == 1);
548
549 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
550 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
551 RTMemFree(pRangeLock);
552
553 return pdmacFileAioMgrNormalProcessTaskList(pTasksWaitingHead, pAioMgr, pEndpoint);
554}
555
556static int pdmacFileAioMgrNormalTaskPrepare(PPDMACEPFILEMGR pAioMgr,
557 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
558 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
559{
560 int rc = VINF_SUCCESS;
561 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
562 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
563 void *pvBuf = pTask->DataSeg.pvSeg;
564
565 /* Get a request handle. */
566 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
567 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
568
569 /*
570 * Check if the alignment requirements are met.
571 * Offset, transfer size and buffer address
572 * need to be on a 512 boundary.
573 */
574 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
575 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
576 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
577
578 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
579 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
580 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
581 offStart, cbToTransfer, pEndpoint->cbFile));
582
583 pTask->fPrefetch = false;
584
585 /*
586 * Before we start to setup the request we have to check whether there is a task
587 * already active which range intersects with ours. We have to defer execution
588 * of this task in two cases:
589 * - The pending task is a write and the current is either read or write
590 * - The pending task is a read and the current task is a write task.
591 *
592 * To check whether a range is currently "locked" we use the AVL tree where every pending task
593 * is stored by its file offset range. The current task will be added to the active task
594 * and will be executed when the active one completes. (The method below
595 * which checks whether a range is already used will add the task)
596 *
597 * This is neccessary because of the requirementto align all requests to a 512 boundary
598 * which is enforced by the host OS (Linux and Windows atm). It is possible that
599 * we have to process unaligned tasks and need to align them using bounce buffers.
600 * While the data is feteched from the file another request might arrive writing to
601 * the same range. This will result in data corruption if both are executed concurrently.
602 */
603 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask);
604
605 if (!fLocked)
606 {
607 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
608 || RT_UNLIKELY(offStart != pTask->Off)
609 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
610 {
611 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
612 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
613
614 /* Create bounce buffer. */
615 pTask->fBounceBuffer = true;
616
617 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
618 pTask->Off, offStart));
619 pTask->uBounceBufOffset = pTask->Off - offStart;
620
621 /** @todo: I think we need something like a RTMemAllocAligned method here.
622 * Current assumption is that the maximum alignment is 4096byte
623 * (GPT disk on Windows)
624 * so we can use RTMemPageAlloc here.
625 */
626 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
627 if (RT_LIKELY(pTask->pvBounceBuffer))
628 {
629 pvBuf = pTask->pvBounceBuffer;
630
631 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
632 {
633 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
634 || RT_UNLIKELY(offStart != pTask->Off))
635 {
636 /* We have to fill the buffer first before we can update the data. */
637 LogFlow(("Prefetching data for task %#p\n", pTask));
638 pTask->fPrefetch = true;
639 enmTransferType = PDMACTASKFILETRANSFER_READ;
640 }
641 else
642 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
643 }
644 }
645 else
646 rc = VERR_NO_MEMORY;
647 }
648 else
649 pTask->fBounceBuffer = false;
650
651 if (RT_SUCCESS(rc))
652 {
653 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
654 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
655
656 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
657 {
658 /* Grow the file if needed. */
659 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
660 {
661 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
662 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
663 }
664
665 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
666 offStart, pvBuf, cbToTransfer, pTask);
667 }
668 else
669 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
670 offStart, pvBuf, cbToTransfer, pTask);
671 AssertRC(rc);
672
673 rc = pdmacFileAioMgrNormalRangeLock(pEndpoint, offStart, cbToTransfer, pTask);
674
675 if (RT_SUCCESS(rc))
676 *phReq = hReq;
677 else
678 {
679 /* Cleanup */
680 if (pTask->fBounceBuffer)
681 RTMemPageFree(pTask->pvBounceBuffer);
682 }
683 }
684 }
685 else
686 {
687 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
688 rc = VINF_SUCCESS;
689 }
690
691 return rc;
692}
693
694static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
695 PPDMACEPFILEMGR pAioMgr,
696 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
697{
698 RTFILEAIOREQ apReqs[20];
699 unsigned cRequests = 0;
700 unsigned cMaxRequests = PDMACEPFILEMGR_REQS_MAX - pAioMgr->cRequestsActive;
701 int rc = VINF_SUCCESS;
702
703 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
704 ("Trying to process request lists of a non active endpoint!\n"));
705
706 /* Go through the list and queue the requests until we get a flush request */
707 while ( pTaskHead
708 && !pEndpoint->pFlushReq
709 && (cMaxRequests > 0)
710 && RT_SUCCESS(rc))
711 {
712 PPDMACTASKFILE pCurr = pTaskHead;
713
714 if (!pdmacFileBwMgrIsTransferAllowed(pEndpoint->pBwMgr, pCurr->DataSeg.cbSeg))
715 {
716 pAioMgr->fBwLimitReached = true;
717 break;
718 }
719
720 pTaskHead = pTaskHead->pNext;
721
722 pCurr->pNext = NULL;
723
724 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
725 ("Endpoints do not match\n"));
726
727 switch (pCurr->enmTransferType)
728 {
729 case PDMACTASKFILETRANSFER_FLUSH:
730 {
731 /* If there is no data transfer request this flush request finished immediately. */
732 if (!pEndpoint->AioMgr.cRequestsActive)
733 {
734 pCurr->pfnCompleted(pCurr, pCurr->pvUser);
735 pdmacFileTaskFree(pEndpoint, pCurr);
736 }
737 else
738 {
739 Assert(!pEndpoint->pFlushReq);
740 pEndpoint->pFlushReq = pCurr;
741 }
742 break;
743 }
744 case PDMACTASKFILETRANSFER_READ:
745 case PDMACTASKFILETRANSFER_WRITE:
746 {
747 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
748
749 rc = pdmacFileAioMgrNormalTaskPrepare(pAioMgr, pEndpoint, pCurr, &hReq);
750 AssertRC(rc);
751
752 if (hReq != NIL_RTFILEAIOREQ)
753 {
754 apReqs[cRequests] = hReq;
755 pEndpoint->AioMgr.cReqsProcessed++;
756 cMaxRequests--;
757 cRequests++;
758 if (cRequests == RT_ELEMENTS(apReqs))
759 {
760 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
761 cRequests = 0;
762 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
763 ("Unexpected return code\n"));
764 }
765 }
766 break;
767 }
768 default:
769 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
770 }
771 }
772
773 if (cRequests)
774 {
775 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
776 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
777 ("Unexpected return code rc=%Rrc\n", rc));
778 }
779
780 if (pTaskHead)
781 {
782 /* Add the rest of the tasks to the pending list */
783 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
784
785 if (RT_UNLIKELY( !cMaxRequests
786 && !pEndpoint->pFlushReq
787 && !pAioMgr->fBwLimitReached))
788 {
789 /*
790 * The I/O manager has no room left for more requests
791 * but there are still requests to process.
792 * Create a new I/O manager and let it handle some endpoints.
793 */
794 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
795 }
796 }
797
798 /* Insufficient resources are not fatal. */
799 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
800 rc = VINF_SUCCESS;
801
802 return rc;
803}
804
805/**
806 * Adds all pending requests for the given endpoint
807 * until a flush request is encountered or there is no
808 * request anymore.
809 *
810 * @returns VBox status code.
811 * @param pAioMgr The async I/O manager for the endpoint
812 * @param pEndpoint The endpoint to get the requests from.
813 */
814static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
815 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
816{
817 int rc = VINF_SUCCESS;
818 PPDMACTASKFILE pTasksHead = NULL;
819
820 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
821 ("Trying to process request lists of a non active endpoint!\n"));
822
823 Assert(!pEndpoint->pFlushReq);
824
825 /* Check the pending list first */
826 if (pEndpoint->AioMgr.pReqsPendingHead)
827 {
828 LogFlow(("Queuing pending requests first\n"));
829
830 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
831 /*
832 * Clear the list as the processing routine will insert them into the list
833 * again if it gets a flush request.
834 */
835 pEndpoint->AioMgr.pReqsPendingHead = NULL;
836 pEndpoint->AioMgr.pReqsPendingTail = NULL;
837 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
838 AssertRC(rc);
839 }
840
841 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
842 {
843 /* Now the request queue. */
844 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
845 if (pTasksHead)
846 {
847 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
848 AssertRC(rc);
849 }
850 }
851
852 return rc;
853}
854
855static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
856{
857 int rc = VINF_SUCCESS;
858 bool fNotifyWaiter = false;
859
860 LogFlowFunc((": Enter\n"));
861
862 Assert(pAioMgr->fBlockingEventPending);
863
864 switch (pAioMgr->enmBlockingEvent)
865 {
866 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
867 {
868 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
869 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
870
871 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
872
873 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
874 pEndpointNew->AioMgr.pEndpointPrev = NULL;
875 if (pAioMgr->pEndpointsHead)
876 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
877 pAioMgr->pEndpointsHead = pEndpointNew;
878
879 /* Assign the completion point to this file. */
880 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
881 fNotifyWaiter = true;
882 pAioMgr->cEndpoints++;
883 break;
884 }
885 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
886 {
887 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
888 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
889
890 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
891 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
892 break;
893 }
894 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
895 {
896 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
897 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
898
899 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
900 {
901 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
902
903 /* Make sure all tasks finished. Process the queues a last time first. */
904 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
905 AssertRC(rc);
906
907 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
908 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
909 }
910 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
911 && (!pEndpointClose->AioMgr.cRequestsActive))
912 fNotifyWaiter = true;
913 break;
914 }
915 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
916 {
917 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
918 if (!pAioMgr->cRequestsActive)
919 fNotifyWaiter = true;
920 break;
921 }
922 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
923 {
924 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
925 break;
926 }
927 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
928 {
929 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
930 fNotifyWaiter = true;
931 break;
932 }
933 default:
934 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
935 }
936
937 if (fNotifyWaiter)
938 {
939 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
940 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
941
942 /* Release the waiting thread. */
943 LogFlow(("Signalling waiter\n"));
944 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
945 AssertRC(rc);
946 }
947
948 LogFlowFunc((": Leave\n"));
949 return rc;
950}
951
952/**
953 * Checks all endpoints for pending events or new requests.
954 *
955 * @returns VBox status code.
956 * @param pAioMgr The I/O manager handle.
957 */
958static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
959{
960 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
961 int rc = VINF_SUCCESS;
962 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
963
964 pAioMgr->fBwLimitReached = false;
965
966 while (pEndpoint)
967 {
968 if (!pEndpoint->pFlushReq
969 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
970 && !pEndpoint->AioMgr.fMoving)
971 {
972 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
973 if (RT_FAILURE(rc))
974 return rc;
975 }
976 else if (!pEndpoint->AioMgr.cRequestsActive)
977 {
978 /* Reopen the file so that the new endpoint can reassociate with the file */
979 RTFileClose(pEndpoint->File);
980 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
981 AssertRC(rc);
982
983 if (pEndpoint->AioMgr.fMoving)
984 {
985 pEndpoint->AioMgr.fMoving = false;
986 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
987 }
988 else
989 {
990 Assert(pAioMgr->fBlockingEventPending);
991 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
992
993 /* Release the waiting thread. */
994 LogFlow(("Signalling waiter\n"));
995 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
996 AssertRC(rc);
997 }
998 }
999
1000 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1001 }
1002
1003 return rc;
1004}
1005
1006static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1007{
1008 int rc = VINF_SUCCESS;
1009 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1010 size_t cbTransfered = 0;
1011 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1012 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1013
1014 pEndpoint = pTask->pEndpoint;
1015
1016 /*
1017 * It is possible that the request failed on Linux with kernels < 2.6.23
1018 * if the passed buffer was allocated with remap_pfn_range or if the file
1019 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1020 * The endpoint will be migrated to a failsafe manager in case a request fails.
1021 */
1022 if (RT_FAILURE(rcReq))
1023 {
1024 /* Free bounce buffers and the IPRT request. */
1025 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = hReq;
1026 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
1027
1028 pAioMgr->cRequestsActive--;
1029 pEndpoint->AioMgr.cRequestsActive--;
1030 pEndpoint->AioMgr.cReqsProcessed++;
1031
1032 if (pTask->fBounceBuffer)
1033 RTMemFree(pTask->pvBounceBuffer);
1034
1035 /* Queue the request on the pending list. */
1036 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1037 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1038
1039 /* Create a new failsafe manager if neccessary. */
1040 if (!pEndpoint->AioMgr.fMoving)
1041 {
1042 PPDMACEPFILEMGR pAioMgrFailsafe;
1043
1044 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1045 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1046
1047 pEndpoint->AioMgr.fMoving = true;
1048
1049 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1050 &pAioMgrFailsafe, true);
1051 AssertRC(rc);
1052
1053 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1054
1055 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1056 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1057 }
1058
1059 /* If this was the last request for the endpoint migrate it to the new manager. */
1060 if (!pEndpoint->AioMgr.cRequestsActive)
1061 {
1062 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1063 Assert(!fReqsPending);
1064
1065 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1066 AssertRC(rc);
1067 }
1068 }
1069 else
1070 {
1071 AssertMsg(( (cbTransfered == pTask->DataSeg.cbSeg)
1072 || (pTask->fBounceBuffer && (cbTransfered >= pTask->DataSeg.cbSeg))),
1073 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rcReq, cbTransfered));
1074
1075 if (pTask->fPrefetch)
1076 {
1077 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1078 Assert(pTask->fBounceBuffer);
1079
1080 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
1081 pTask->DataSeg.pvSeg,
1082 pTask->DataSeg.cbSeg);
1083
1084 /* Write it now. */
1085 pTask->fPrefetch = false;
1086 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
1087 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1088
1089 /* Grow the file if needed. */
1090 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1091 {
1092 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1093 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
1094 }
1095
1096 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
1097 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1098 AssertRC(rc);
1099 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &hReq, 1);
1100 AssertRC(rc);
1101 }
1102 else
1103 {
1104 if (pTask->fBounceBuffer)
1105 {
1106 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1107 memcpy(pTask->DataSeg.pvSeg,
1108 ((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
1109 pTask->DataSeg.cbSeg);
1110
1111 RTMemPageFree(pTask->pvBounceBuffer);
1112 }
1113
1114 /* Put the entry on the free array */
1115 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = hReq;
1116 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
1117
1118 pAioMgr->cRequestsActive--;
1119 pEndpoint->AioMgr.cRequestsActive--;
1120 pEndpoint->AioMgr.cReqsProcessed++;
1121
1122 /* Free the lock and process pending tasks if neccessary */
1123 pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1124
1125 /* Call completion callback */
1126 pTask->pfnCompleted(pTask, pTask->pvUser);
1127 pdmacFileTaskFree(pEndpoint, pTask);
1128
1129 /*
1130 * If there is no request left on the endpoint but a flush request is set
1131 * it completed now and we notify the owner.
1132 * Furthermore we look for new requests and continue.
1133 */
1134 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1135 {
1136 /* Call completion callback */
1137 pTask = pEndpoint->pFlushReq;
1138 pEndpoint->pFlushReq = NULL;
1139
1140 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1141
1142 pTask->pfnCompleted(pTask, pTask->pvUser);
1143 pdmacFileTaskFree(pEndpoint, pTask);
1144 }
1145 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1146 {
1147 /* If the endpoint is about to be migrated do it now. */
1148 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1149 Assert(!fReqsPending);
1150
1151 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1152 AssertRC(rc);
1153 }
1154 }
1155 } /* request completed successfully */
1156}
1157
1158/** Helper macro for checking for error codes. */
1159#define CHECK_RC(pAioMgr, rc) \
1160 if (RT_FAILURE(rc)) \
1161 {\
1162 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1163 return rc2;\
1164 }
1165
1166/**
1167 * The normal I/O manager using the RTFileAio* API
1168 *
1169 * @returns VBox status code.
1170 * @param ThreadSelf Handle of the thread.
1171 * @param pvUser Opaque user data.
1172 */
1173int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
1174{
1175 int rc = VINF_SUCCESS;
1176 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1177 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1178
1179 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
1180 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
1181 {
1182 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1183 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1184 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
1185 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1186 AssertRC(rc);
1187
1188 LogFlow(("Got woken up\n"));
1189 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1190
1191 /* Check for an external blocking event first. */
1192 if (pAioMgr->fBlockingEventPending)
1193 {
1194 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1195 CHECK_RC(pAioMgr, rc);
1196 }
1197
1198 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
1199 {
1200 /* We got woken up because an endpoint issued new requests. Queue them. */
1201 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1202 CHECK_RC(pAioMgr, rc);
1203
1204 while ( pAioMgr->cRequestsActive
1205 || pAioMgr->fBwLimitReached)
1206 {
1207 if (pAioMgr->cRequestsActive)
1208 {
1209 RTFILEAIOREQ apReqs[20];
1210 uint32_t cReqsCompleted = 0;
1211 size_t cReqsWait;
1212
1213 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1214 cReqsWait = RT_ELEMENTS(apReqs);
1215 else
1216 cReqsWait = pAioMgr->cRequestsActive;
1217
1218 LogFlow(("Waiting for %d of %d tasks to complete\n", pAioMgr->cRequestsActive, cReqsWait));
1219
1220 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1221 cReqsWait,
1222 RT_INDEFINITE_WAIT, apReqs,
1223 RT_ELEMENTS(apReqs), &cReqsCompleted);
1224 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1225 CHECK_RC(pAioMgr, rc);
1226
1227 LogFlow(("%d tasks completed\n", cReqsCompleted));
1228
1229 for (uint32_t i = 0; i < cReqsCompleted; i++)
1230 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1231
1232 /* Check for an external blocking event before we go to sleep again. */
1233 if (pAioMgr->fBlockingEventPending)
1234 {
1235 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1236 CHECK_RC(pAioMgr, rc);
1237 }
1238
1239 /* Update load statistics. */
1240 uint64_t uMillisCurr = RTTimeMilliTS();
1241 if (uMillisCurr > uMillisEnd)
1242 {
1243 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1244
1245 /* Calculate timespan. */
1246 uMillisCurr -= uMillisEnd;
1247
1248 while (pEndpointCurr)
1249 {
1250 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1251 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1252 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1253 }
1254
1255 /* Set new update interval */
1256 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1257 }
1258 }
1259 else
1260 {
1261 /*
1262 * Bandwidth limit reached for all endpoints.
1263 * Yield and wait until we have enough resources again.
1264 */
1265 RTThreadYield();
1266 }
1267
1268 /* Check endpoints for new requests. */
1269 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1270 CHECK_RC(pAioMgr, rc);
1271 } /* while requests are active. */
1272 } /* if still running */
1273 } /* while running */
1274
1275 return rc;
1276}
1277
1278#undef CHECK_RC
1279
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette