VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 23911

Last change on this file since 23911 was 23812, checked in by vboxsync, 15 years ago

PDMAsyncCompletion: Bugfixes

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 36.8 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 23812 2009-10-15 17:52:15Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#include <iprt/types.h>
24#include <iprt/asm.h>
25#include <iprt/file.h>
26#include <iprt/mem.h>
27#include <iprt/string.h>
28#include <VBox/log.h>
29
30#include "PDMAsyncCompletionFileInternal.h"
31
32/** The update period for the I/O load statistics in ms. */
33#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
34/** Maximum number of requests a manager will handle. */
35#define PDMACEPFILEMGR_REQS_MAX 512 /* @todo: Find better solution wrt. the request number*/
36
37int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
38{
39 int rc = VINF_SUCCESS;
40
41 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
42 if (rc == VERR_OUT_OF_RANGE)
43 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, PDMACEPFILEMGR_REQS_MAX);
44
45 if (RT_SUCCESS(rc))
46 {
47 /* Initialize request handle array. */
48 pAioMgr->iFreeEntryNext = 0;
49 pAioMgr->iFreeReqNext = 0;
50 pAioMgr->cReqEntries = PDMACEPFILEMGR_REQS_MAX + 1;
51 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
52
53 if (pAioMgr->pahReqsFree)
54 {
55 return VINF_SUCCESS;
56 }
57 else
58 {
59 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
60 rc = VERR_NO_MEMORY;
61 }
62 }
63
64 return rc;
65}
66
67void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
68{
69 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
70
71 while (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
72 {
73 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext]);
74 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
75 }
76
77 RTMemFree(pAioMgr->pahReqsFree);
78}
79
80/**
81 * Sorts the endpoint list with insertion sort.
82 */
83static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
84{
85 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
86
87 pEpPrev = pAioMgr->pEndpointsHead;
88 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
89
90 while (pEpCurr)
91 {
92 /* Remember the next element to sort because the list might change. */
93 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
94
95 /* Unlink the current element from the list. */
96 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
97 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
98
99 if (pPrev)
100 pPrev->AioMgr.pEndpointNext = pNext;
101 else
102 pAioMgr->pEndpointsHead = pNext;
103
104 if (pNext)
105 pNext->AioMgr.pEndpointPrev = pPrev;
106
107 /* Go back until we reached the place to insert the current endpoint into. */
108 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
109 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
110
111 /* Link the endpoint into the list. */
112 if (pEpPrev)
113 pNext = pEpPrev->AioMgr.pEndpointNext;
114 else
115 pNext = pAioMgr->pEndpointsHead;
116
117 pEpCurr->AioMgr.pEndpointNext = pNext;
118 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
119 pNext->AioMgr.pEndpointPrev = pEpCurr;
120 if (pEpPrev)
121 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
122 else
123 pAioMgr->pEndpointsHead = pEpCurr;
124
125 pEpCurr = pEpNextToSort;
126 }
127
128#ifdef DEBUG
129 /* Validate sorting alogrithm */
130 unsigned cEndpoints = 0;
131 pEpCurr = pAioMgr->pEndpointsHead;
132
133 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
134 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
135
136 while (pEpCurr)
137 {
138 cEndpoints++;
139
140 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
141 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
142
143 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
144 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
145
146 pEpCurr = pNext;
147 }
148
149 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
150
151#endif
152}
153
154/**
155 * Removes an endpoint from the currently assigned manager.
156 *
157 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
158 * FALSE otherwise.
159 * @param pEndpointRemove The endpoint to remove.
160 */
161static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
162{
163 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
164 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
165 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
166
167 pAioMgr->cEndpoints--;
168
169 if (pPrev)
170 pPrev->AioMgr.pEndpointNext = pNext;
171 else
172 pAioMgr->pEndpointsHead = pNext;
173
174 if (pNext)
175 pNext->AioMgr.pEndpointPrev = pPrev;
176
177 /* Make sure that there is no request pending on this manager for the endpoint. */
178 if (!pEndpointRemove->AioMgr.cRequestsActive)
179 {
180 Assert(!pEndpointRemove->pFlushReq);
181
182 /* Reopen the file so that the new endpoint can reassociate with the file */
183 RTFileClose(pEndpointRemove->File);
184 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
185 AssertRC(rc);
186 return false;
187 }
188
189 return true;
190}
191
192/**
193 * Creates a new I/O manager and spreads the I/O load of the endpoints
194 * between the given I/O manager and the new one.
195 *
196 * @returns nothing.
197 * @param pAioMgr The I/O manager with high I/O load.
198 */
199static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
200{
201 PPDMACEPFILEMGR pAioMgrNew = NULL;
202 int rc = VINF_SUCCESS;
203
204 /* Splitting can't be done with only one open endpoint. */
205 if (pAioMgr->cEndpoints > 1)
206 {
207 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass,
208 &pAioMgrNew);
209 if (RT_SUCCESS(rc))
210 {
211 /* We will sort the list by request count per second. */
212 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
213
214 /* Now move some endpoints to the new manager. */
215 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
216 unsigned cReqsOther = 0;
217 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
218
219 while (pCurr)
220 {
221 if (cReqsHere <= cReqsOther)
222 {
223 /*
224 * The other manager has more requests to handle now.
225 * We will keep the current endpoint.
226 */
227 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
228 cReqsHere += pCurr->AioMgr.cReqsPerSec;
229 pCurr = pCurr->AioMgr.pEndpointNext;
230 }
231 else
232 {
233 /* Move to other endpoint. */
234 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
235 cReqsOther += pCurr->AioMgr.cReqsPerSec;
236
237 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
238
239 pCurr = pCurr->AioMgr.pEndpointNext;
240
241 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
242
243 if (fReqsPending)
244 {
245 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
246 pMove->AioMgr.fMoving = true;
247 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
248 }
249 else
250 {
251 pMove->AioMgr.fMoving = false;
252 pMove->AioMgr.pAioMgrDst = NULL;
253 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
254 }
255 }
256 }
257 }
258 else
259 {
260 /* Don't process further but leave a log entry about reduced performance. */
261 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
262 }
263 }
264}
265
266/**
267 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
268 *
269 * @returns VBox status code
270 * @param pAioMgr The I/O manager the error ocurred on.
271 * @param rc The error code.
272 */
273static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
274{
275 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
276 pAioMgr, rc));
277 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
278 LogRel(("AIOMgr: Please contact the product vendor\n"));
279
280 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
281
282 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
283 ASMAtomicWriteBool(&pEpClassFile->fFailsafe, true);
284
285 AssertMsgFailed(("Implement\n"));
286 return VINF_SUCCESS;
287}
288
289/**
290 * Put a list of tasks in the pending request list of an endpoint.
291 */
292DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
293{
294 /* Add the rest of the tasks to the pending list */
295 if (!pEndpoint->AioMgr.pReqsPendingHead)
296 {
297 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
298 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
299 }
300 else
301 {
302 Assert(pEndpoint->AioMgr.pReqsPendingTail);
303 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
304 }
305
306 /* Update the tail. */
307 while (pTaskHead->pNext)
308 pTaskHead = pTaskHead->pNext;
309
310 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
311}
312
313/**
314 * Put one task in the pending request list of an endpoint.
315 */
316DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
317{
318 /* Add the rest of the tasks to the pending list */
319 if (!pEndpoint->AioMgr.pReqsPendingHead)
320 {
321 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
322 pEndpoint->AioMgr.pReqsPendingHead = pTask;
323 }
324 else
325 {
326 Assert(pEndpoint->AioMgr.pReqsPendingTail);
327 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
328 }
329
330 pEndpoint->AioMgr.pReqsPendingTail = pTask;
331}
332
333/**
334 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
335 */
336static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
337 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
338 PRTFILEAIOREQ pahReqs, size_t cReqs)
339{
340 int rc;
341
342 pAioMgr->cRequestsActive += cReqs;
343 pEndpoint->AioMgr.cRequestsActive += cReqs;
344
345 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
346 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
347
348 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
349 if (RT_FAILURE(rc))
350 {
351 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
352 {
353 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
354
355 /*
356 * We run out of resources.
357 * Need to check which requests got queued
358 * and put the rest on the pending list again.
359 */
360 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
361 {
362 pEpClass->fOutOfResourcesWarningPrinted = true;
363 LogRel(("AIOMgr: The operating system doesn't have enough resources "
364 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
365 }
366
367 for (size_t i = 0; i < cReqs; i++)
368 {
369 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
370
371 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
372 {
373 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
374 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
375
376 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
377
378 /* Put the entry on the free array */
379 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = pahReqs[i];
380 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
381
382 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
383 pAioMgr->cRequestsActive--;
384 pEndpoint->AioMgr.cRequestsActive--;
385 }
386 }
387 LogFlow(("Removed requests. I/O manager has a total of %d active requests now\n", pAioMgr->cRequestsActive));
388 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
389 }
390 else
391 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
392 }
393
394 return rc;
395}
396
397static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
398 PPDMACEPFILEMGR pAioMgr,
399 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
400{
401 RTFILEAIOREQ apReqs[20];
402 unsigned cRequests = 0;
403 unsigned cMaxRequests = PDMACEPFILEMGR_REQS_MAX - pAioMgr->cRequestsActive;
404 int rc = VINF_SUCCESS;
405 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
406
407 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
408 ("Trying to process request lists of a non active endpoint!\n"));
409
410 /* Go through the list and queue the requests until we get a flush request */
411 while ( pTaskHead
412 && !pEndpoint->pFlushReq
413 && (cMaxRequests > 0)
414 && RT_SUCCESS(rc))
415 {
416 PPDMACTASKFILE pCurr = pTaskHead;
417
418 pTaskHead = pTaskHead->pNext;
419
420 pCurr->pNext = NULL;
421
422 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
423 ("Endpoints do not match\n"));
424
425 switch (pCurr->enmTransferType)
426 {
427 case PDMACTASKFILETRANSFER_FLUSH:
428 {
429 /* If there is no data transfer request this flush request finished immediately. */
430 if (!pEndpoint->AioMgr.cRequestsActive)
431 {
432 pCurr->pfnCompleted(pCurr, pCurr->pvUser);
433 pdmacFileTaskFree(pEndpoint, pCurr);
434 }
435 else
436 {
437 pEndpoint->pFlushReq = pCurr;
438 }
439 break;
440 }
441 case PDMACTASKFILETRANSFER_READ:
442 case PDMACTASKFILETRANSFER_WRITE:
443 {
444 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
445 void *pvBuf = pCurr->DataSeg.pvSeg;
446
447 /* Get a request handle. */
448 if (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
449 {
450 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext];
451 pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext] = NIL_RTFILEAIOREQ;
452 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
453 }
454 else
455 {
456 rc = RTFileAioReqCreate(&hReq);
457 AssertRC(rc);
458 }
459
460 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
461
462 /* Check if the alignment requirements are met.
463 * Offset, transfer size and buffer address
464 * need to be on a 512 boundary. */
465 RTFOFF offStart = pCurr->Off & ~(RTFOFF)(512-1);
466 size_t cbToTransfer = RT_ALIGN_Z(pCurr->DataSeg.cbSeg + (pCurr->Off - offStart), 512);
467 PDMACTASKFILETRANSFER enmTransferType = pCurr->enmTransferType;
468
469 AssertMsg( pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE
470 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
471 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
472 offStart, cbToTransfer, pEndpoint->cbFile));
473
474 pCurr->fPrefetch = false;
475
476 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
477 || RT_UNLIKELY(offStart != pCurr->Off)
478 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
479 {
480 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
481 pCurr, cbToTransfer, pCurr->DataSeg.cbSeg, offStart, pCurr->Off));
482
483 /* Create bounce buffer. */
484 pCurr->fBounceBuffer = true;
485
486 AssertMsg(pCurr->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
487 pCurr->Off, offStart));
488 pCurr->uBounceBufOffset = pCurr->Off - offStart;
489
490 /** @todo: I think we need something like a RTMemAllocAligned method here.
491 * Current assumption is that the maximum alignment is 4096byte
492 * (GPT disk on Windows)
493 * so we can use RTMemPageAlloc here.
494 */
495 pCurr->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
496 AssertPtr(pCurr->pvBounceBuffer);
497 pvBuf = pCurr->pvBounceBuffer;
498
499 if (pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
500 {
501 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
502 || RT_UNLIKELY(offStart != pCurr->Off))
503 {
504 /* We have to fill the buffer first before we can update the data. */
505 LogFlow(("Prefetching data for task %#p\n", pCurr));
506 pCurr->fPrefetch = true;
507 enmTransferType = PDMACTASKFILETRANSFER_READ;
508 }
509 else
510 memcpy(pvBuf, pCurr->DataSeg.pvSeg, pCurr->DataSeg.cbSeg);
511 }
512 }
513 else
514 pCurr->fBounceBuffer = false;
515
516 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
517 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
518
519 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
520 {
521 /* Grow the file if needed. */
522 if (RT_UNLIKELY((uint64_t)(pCurr->Off + pCurr->DataSeg.cbSeg) > pEndpoint->cbFile))
523 {
524 ASMAtomicWriteU64(&pEndpoint->cbFile, pCurr->Off + pCurr->DataSeg.cbSeg);
525 RTFileSetSize(pEndpoint->File, pCurr->Off + pCurr->DataSeg.cbSeg);
526 }
527
528 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
529 offStart, pvBuf, cbToTransfer, pCurr);
530 }
531 else
532 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
533 offStart, pvBuf, cbToTransfer, pCurr);
534 AssertRC(rc);
535
536 apReqs[cRequests] = hReq;
537 pEndpoint->AioMgr.cReqsProcessed++;
538 cMaxRequests--;
539 cRequests++;
540 if (cRequests == RT_ELEMENTS(apReqs))
541 {
542 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
543 cRequests = 0;
544 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
545 ("Unexpected return code\n"));
546 }
547 break;
548 }
549 default:
550 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
551 }
552 }
553
554 if (cRequests)
555 {
556 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
557 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
558 ("Unexpected return code rc=%Rrc\n", rc));
559 }
560
561 if (pTaskHead)
562 {
563 /* Add the rest of the tasks to the pending list */
564 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
565
566 if (RT_UNLIKELY(!cMaxRequests && !pEndpoint->pFlushReq))
567 {
568 /*
569 * The I/O manager has no room left for more requests
570 * but there are still requests to process.
571 * Create a new I/O manager and let it handle some endpoints.
572 */
573 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
574 }
575 }
576
577 /* Insufficient resources are not fatal. */
578 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
579 rc = VINF_SUCCESS;
580
581 return rc;
582}
583
584/**
585 * Adds all pending requests for the given endpoint
586 * until a flush request is encountered or there is no
587 * request anymore.
588 *
589 * @returns VBox status code.
590 * @param pAioMgr The async I/O manager for the endpoint
591 * @param pEndpoint The endpoint to get the requests from.
592 */
593static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
594 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
595{
596 int rc = VINF_SUCCESS;
597 PPDMACTASKFILE pTasksHead = NULL;
598
599 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
600 ("Trying to process request lists of a non active endpoint!\n"));
601
602 Assert(!pEndpoint->pFlushReq);
603
604 /* Check the pending list first */
605 if (pEndpoint->AioMgr.pReqsPendingHead)
606 {
607 LogFlow(("Queuing pending requests first\n"));
608
609 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
610 /*
611 * Clear the list as the processing routine will insert them into the list
612 * again if it gets a flush request.
613 */
614 pEndpoint->AioMgr.pReqsPendingHead = NULL;
615 pEndpoint->AioMgr.pReqsPendingTail = NULL;
616 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
617 AssertRC(rc);
618 }
619
620 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
621 {
622 /* Now the request queue. */
623 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
624 if (pTasksHead)
625 {
626 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
627 AssertRC(rc);
628 }
629 }
630
631 return rc;
632}
633
634static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
635{
636 int rc = VINF_SUCCESS;
637 bool fNotifyWaiter = false;
638
639 LogFlowFunc((": Enter\n"));
640
641 Assert(pAioMgr->fBlockingEventPending);
642
643 switch (pAioMgr->enmBlockingEvent)
644 {
645 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
646 {
647 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
648 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
649
650 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
651
652 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
653 pEndpointNew->AioMgr.pEndpointPrev = NULL;
654 if (pAioMgr->pEndpointsHead)
655 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
656 pAioMgr->pEndpointsHead = pEndpointNew;
657
658 /* Assign the completion point to this file. */
659 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
660 fNotifyWaiter = true;
661 pAioMgr->cEndpoints++;
662 break;
663 }
664 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
665 {
666 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
667 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
668
669 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
670 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
671 break;
672 }
673 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
674 {
675 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
676 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
677
678 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
679
680 /* Make sure all tasks finished. Process the queues a last time first. */
681 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
682 AssertRC(rc);
683
684 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
685 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
686 break;
687 }
688 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
689 {
690 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
691 if (!pAioMgr->cRequestsActive)
692 fNotifyWaiter = true;
693 break;
694 }
695 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
696 {
697 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
698 break;
699 }
700 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
701 {
702 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
703 fNotifyWaiter = true;
704 break;
705 }
706 default:
707 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
708 }
709
710 if (fNotifyWaiter)
711 {
712 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
713 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
714
715 /* Release the waiting thread. */
716 LogFlow(("Signalling waiter\n"));
717 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
718 AssertRC(rc);
719 }
720
721 LogFlowFunc((": Leave\n"));
722 return rc;
723}
724
725/**
726 * Checks all endpoints for pending events or new requests.
727 *
728 * @returns VBox status code.
729 * @param pAioMgr The I/O manager handle.
730 */
731static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
732{
733 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
734 int rc = VINF_SUCCESS;
735 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
736
737 while (pEndpoint)
738 {
739 if (!pEndpoint->pFlushReq && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE))
740 {
741 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
742 if (RT_FAILURE(rc))
743 return rc;
744 }
745 else if (!pEndpoint->AioMgr.cRequestsActive)
746 {
747 /* Reopen the file so that the new endpoint can reassociate with the file */
748 RTFileClose(pEndpoint->File);
749 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
750 AssertRC(rc);
751
752 if (pEndpoint->AioMgr.fMoving)
753 {
754 pEndpoint->AioMgr.fMoving = false;
755 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
756 }
757 else
758 {
759 Assert(pAioMgr->fBlockingEventPending);
760 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
761
762 /* Release the waiting thread. */
763 LogFlow(("Signalling waiter\n"));
764 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
765 AssertRC(rc);
766 }
767 }
768
769 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
770 }
771
772 return rc;
773}
774
775/** Helper macro for checking for error codes. */
776#define CHECK_RC(pAioMgr, rc) \
777 if (RT_FAILURE(rc)) \
778 {\
779 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
780 return rc2;\
781 }
782
783/**
784 * The normal I/O manager using the RTFileAio* API
785 *
786 * @returns VBox status code.
787 * @param ThreadSelf Handle of the thread.
788 * @param pvUser Opaque user data.
789 */
790int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
791{
792 int rc = VINF_SUCCESS;
793 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
794 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
795
796 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
797 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
798 {
799 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
800 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
801 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
802 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
803 AssertRC(rc);
804
805 LogFlow(("Got woken up\n"));
806 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
807
808 /* Check for an external blocking event first. */
809 if (pAioMgr->fBlockingEventPending)
810 {
811 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
812 CHECK_RC(pAioMgr, rc);
813 }
814
815 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
816 {
817 /* We got woken up because an endpoint issued new requests. Queue them. */
818 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
819 CHECK_RC(pAioMgr, rc);
820
821 while (pAioMgr->cRequestsActive)
822 {
823 RTFILEAIOREQ apReqs[20];
824 uint32_t cReqsCompleted = 0;
825 size_t cReqsWait;
826
827 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
828 cReqsWait = RT_ELEMENTS(apReqs);
829 else
830 cReqsWait = pAioMgr->cRequestsActive;
831
832 LogFlow(("Waiting for %d of %d tasks to complete\n", pAioMgr->cRequestsActive, cReqsWait));
833
834 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
835 cReqsWait,
836 RT_INDEFINITE_WAIT, apReqs,
837 RT_ELEMENTS(apReqs), &cReqsCompleted);
838 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
839 CHECK_RC(pAioMgr, rc);
840
841 LogFlow(("%d tasks completed\n", cReqsCompleted));
842
843 for (uint32_t i = 0; i < cReqsCompleted; i++)
844 {
845 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
846 size_t cbTransfered = 0;
847 int rcReq = RTFileAioReqGetRC(apReqs[i], &cbTransfered);
848 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(apReqs[i]);
849
850 pEndpoint = pTask->pEndpoint;
851
852 AssertMsg( RT_SUCCESS(rcReq)
853 && ( (cbTransfered == pTask->DataSeg.cbSeg)
854 || (pTask->fBounceBuffer)),
855 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rcReq, cbTransfered));
856
857 if (pTask->fPrefetch)
858 {
859 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
860 Assert(pTask->fBounceBuffer);
861
862 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
863 pTask->DataSeg.pvSeg,
864 pTask->DataSeg.cbSeg);
865
866 /* Write it now. */
867 pTask->fPrefetch = false;
868 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
869 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
870
871 /* Grow the file if needed. */
872 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
873 {
874 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
875 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
876 }
877
878 rc = RTFileAioReqPrepareWrite(apReqs[i], pEndpoint->File,
879 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
880 AssertRC(rc);
881 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &apReqs[i], 1);
882 AssertRC(rc);
883 }
884 else
885 {
886 if (pTask->fBounceBuffer)
887 {
888 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
889 memcpy(pTask->DataSeg.pvSeg,
890 ((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
891 pTask->DataSeg.cbSeg);
892
893 RTMemPageFree(pTask->pvBounceBuffer);
894 }
895
896 /* Put the entry on the free array */
897 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
898 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
899
900 pAioMgr->cRequestsActive--;
901 pEndpoint->AioMgr.cRequestsActive--;
902 pEndpoint->AioMgr.cReqsProcessed++;
903
904 /* Call completion callback */
905 pTask->pfnCompleted(pTask, pTask->pvUser);
906 pdmacFileTaskFree(pEndpoint, pTask);
907
908 /*
909 * If there is no request left on the endpoint but a flush request is set
910 * it completed now and we notify the owner.
911 * Furthermore we look for new requests and continue.
912 */
913 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
914 {
915 /* Call completion callback */
916 pTask = pEndpoint->pFlushReq;
917 pEndpoint->pFlushReq = NULL;
918
919 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
920
921 pTask->pfnCompleted(pTask, pTask->pvUser);
922 pdmacFileTaskFree(pEndpoint, pTask);
923 }
924 }
925 }
926
927 /* Check for an external blocking event before we go to sleep again. */
928 if (pAioMgr->fBlockingEventPending)
929 {
930 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
931 CHECK_RC(pAioMgr, rc);
932 }
933
934 /* Update load statistics. */
935 uint64_t uMillisCurr = RTTimeMilliTS();
936 if (uMillisCurr > uMillisEnd)
937 {
938 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
939
940 /* Calculate timespan. */
941 uMillisCurr -= uMillisEnd;
942
943 while (pEndpointCurr)
944 {
945 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
946 pEndpointCurr->AioMgr.cReqsProcessed = 0;
947 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
948 }
949
950 /* Set new update interval */
951 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
952 }
953
954 /* Check endpoints for new requests. */
955 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
956 CHECK_RC(pAioMgr, rc);
957 } /* while requests are active. */
958 } /* if still running */
959 } /* while running */
960
961 return rc;
962}
963
964#undef CHECK_RC
965
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette