VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 26295

Last change on this file since 26295 was 26147, checked in by vboxsync, 15 years ago

AsyncCompletion: Fix incorrect count of outstanding write tasks. Fixes hangs during flush requests. Return VINF_AIO_TASK_PENDING if data needs to be read from the file

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 40.9 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 26147 2010-02-02 13:55:20Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#include <iprt/types.h>
24#include <iprt/asm.h>
25#include <iprt/file.h>
26#include <iprt/mem.h>
27#include <iprt/string.h>
28#include <iprt/assert.h>
29#include <VBox/log.h>
30
31#include "PDMAsyncCompletionFileInternal.h"
32
33/** The update period for the I/O load statistics in ms. */
34#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
35/** Maximum number of requests a manager will handle. */
36#define PDMACEPFILEMGR_REQS_MAX 512 /* @todo: Find better solution wrt. the request number*/
37
38int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
39{
40 int rc = VINF_SUCCESS;
41
42 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
43 if (rc == VERR_OUT_OF_RANGE)
44 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, PDMACEPFILEMGR_REQS_MAX);
45
46 if (RT_SUCCESS(rc))
47 {
48 /* Initialize request handle array. */
49 pAioMgr->iFreeEntryNext = 0;
50 pAioMgr->iFreeReqNext = 0;
51 pAioMgr->cReqEntries = PDMACEPFILEMGR_REQS_MAX + 1;
52 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
53
54 if (pAioMgr->pahReqsFree)
55 {
56 return VINF_SUCCESS;
57 }
58 else
59 {
60 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
61 rc = VERR_NO_MEMORY;
62 }
63 }
64
65 return rc;
66}
67
68void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
69{
70 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
71
72 while (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
73 {
74 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext]);
75 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
76 }
77
78 RTMemFree(pAioMgr->pahReqsFree);
79}
80
81/**
82 * Sorts the endpoint list with insertion sort.
83 */
84static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
85{
86 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
87
88 pEpPrev = pAioMgr->pEndpointsHead;
89 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
90
91 while (pEpCurr)
92 {
93 /* Remember the next element to sort because the list might change. */
94 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
95
96 /* Unlink the current element from the list. */
97 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
98 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
99
100 if (pPrev)
101 pPrev->AioMgr.pEndpointNext = pNext;
102 else
103 pAioMgr->pEndpointsHead = pNext;
104
105 if (pNext)
106 pNext->AioMgr.pEndpointPrev = pPrev;
107
108 /* Go back until we reached the place to insert the current endpoint into. */
109 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
110 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
111
112 /* Link the endpoint into the list. */
113 if (pEpPrev)
114 pNext = pEpPrev->AioMgr.pEndpointNext;
115 else
116 pNext = pAioMgr->pEndpointsHead;
117
118 pEpCurr->AioMgr.pEndpointNext = pNext;
119 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
120
121 if (pNext)
122 pNext->AioMgr.pEndpointPrev = pEpCurr;
123
124 if (pEpPrev)
125 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
126 else
127 pAioMgr->pEndpointsHead = pEpCurr;
128
129 pEpCurr = pEpNextToSort;
130 }
131
132#ifdef DEBUG
133 /* Validate sorting alogrithm */
134 unsigned cEndpoints = 0;
135 pEpCurr = pAioMgr->pEndpointsHead;
136
137 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
138 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
139
140 while (pEpCurr)
141 {
142 cEndpoints++;
143
144 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
145 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
146
147 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
148 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
149
150 pEpCurr = pNext;
151 }
152
153 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
154
155#endif
156}
157
158/**
159 * Removes an endpoint from the currently assigned manager.
160 *
161 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
162 * FALSE otherwise.
163 * @param pEndpointRemove The endpoint to remove.
164 */
165static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
166{
167 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
168 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
169 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
170
171 pAioMgr->cEndpoints--;
172
173 if (pPrev)
174 pPrev->AioMgr.pEndpointNext = pNext;
175 else
176 pAioMgr->pEndpointsHead = pNext;
177
178 if (pNext)
179 pNext->AioMgr.pEndpointPrev = pPrev;
180
181 /* Make sure that there is no request pending on this manager for the endpoint. */
182 if (!pEndpointRemove->AioMgr.cRequestsActive)
183 {
184 Assert(!pEndpointRemove->pFlushReq);
185
186 /* Reopen the file so that the new endpoint can reassociate with the file */
187 RTFileClose(pEndpointRemove->File);
188 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
189 AssertRC(rc);
190 return false;
191 }
192
193 return true;
194}
195
196/**
197 * Creates a new I/O manager and spreads the I/O load of the endpoints
198 * between the given I/O manager and the new one.
199 *
200 * @returns nothing.
201 * @param pAioMgr The I/O manager with high I/O load.
202 */
203static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
204{
205 PPDMACEPFILEMGR pAioMgrNew = NULL;
206 int rc = VINF_SUCCESS;
207
208 /* Splitting can't be done with only one open endpoint. */
209 if (pAioMgr->cEndpoints > 1)
210 {
211 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass,
212 &pAioMgrNew, false);
213 if (RT_SUCCESS(rc))
214 {
215 /* We will sort the list by request count per second. */
216 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
217
218 /* Now move some endpoints to the new manager. */
219 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
220 unsigned cReqsOther = 0;
221 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
222
223 while (pCurr)
224 {
225 if (cReqsHere <= cReqsOther)
226 {
227 /*
228 * The other manager has more requests to handle now.
229 * We will keep the current endpoint.
230 */
231 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
232 cReqsHere += pCurr->AioMgr.cReqsPerSec;
233 pCurr = pCurr->AioMgr.pEndpointNext;
234 }
235 else
236 {
237 /* Move to other endpoint. */
238 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
239 cReqsOther += pCurr->AioMgr.cReqsPerSec;
240
241 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
242
243 pCurr = pCurr->AioMgr.pEndpointNext;
244
245 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
246
247 if (fReqsPending)
248 {
249 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
250 pMove->AioMgr.fMoving = true;
251 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
252 }
253 else
254 {
255 pMove->AioMgr.fMoving = false;
256 pMove->AioMgr.pAioMgrDst = NULL;
257 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
258 }
259 }
260 }
261 }
262 else
263 {
264 /* Don't process further but leave a log entry about reduced performance. */
265 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
266 }
267 }
268}
269
270/**
271 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
272 *
273 * @returns VBox status code
274 * @param pAioMgr The I/O manager the error ocurred on.
275 * @param rc The error code.
276 */
277static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
278{
279 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
280 pAioMgr, rc));
281 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
282 LogRel(("AIOMgr: Please contact the product vendor\n"));
283
284 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
285
286 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
287 ASMAtomicWriteBool(&pEpClassFile->fFailsafe, true);
288
289 AssertMsgFailed(("Implement\n"));
290 return VINF_SUCCESS;
291}
292
293/**
294 * Put a list of tasks in the pending request list of an endpoint.
295 */
296DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
297{
298 /* Add the rest of the tasks to the pending list */
299 if (!pEndpoint->AioMgr.pReqsPendingHead)
300 {
301 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
302 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
303 }
304 else
305 {
306 Assert(pEndpoint->AioMgr.pReqsPendingTail);
307 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
308 }
309
310 /* Update the tail. */
311 while (pTaskHead->pNext)
312 pTaskHead = pTaskHead->pNext;
313
314 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
315}
316
317/**
318 * Put one task in the pending request list of an endpoint.
319 */
320DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
321{
322 /* Add the rest of the tasks to the pending list */
323 if (!pEndpoint->AioMgr.pReqsPendingHead)
324 {
325 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
326 pEndpoint->AioMgr.pReqsPendingHead = pTask;
327 }
328 else
329 {
330 Assert(pEndpoint->AioMgr.pReqsPendingTail);
331 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
332 }
333
334 pEndpoint->AioMgr.pReqsPendingTail = pTask;
335}
336
337/**
338 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
339 */
340static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
341 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
342 PRTFILEAIOREQ pahReqs, size_t cReqs)
343{
344 int rc;
345
346 pAioMgr->cRequestsActive += cReqs;
347 pEndpoint->AioMgr.cRequestsActive += cReqs;
348
349 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
350 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
351
352 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
353 if (RT_FAILURE(rc))
354 {
355 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
356 {
357 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
358
359 /*
360 * We run out of resources.
361 * Need to check which requests got queued
362 * and put the rest on the pending list again.
363 */
364 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
365 {
366 pEpClass->fOutOfResourcesWarningPrinted = true;
367 LogRel(("AIOMgr: The operating system doesn't have enough resources "
368 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
369 }
370
371 for (size_t i = 0; i < cReqs; i++)
372 {
373 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
374
375 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
376 {
377 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
378 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
379
380 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
381
382 /* Put the entry on the free array */
383 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = pahReqs[i];
384 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
385
386 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
387 pAioMgr->cRequestsActive--;
388 pEndpoint->AioMgr.cRequestsActive--;
389 }
390 }
391 LogFlow(("Removed requests. I/O manager has a total of %d active requests now\n", pAioMgr->cRequestsActive));
392 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
393 }
394 else
395 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
396 }
397
398 return rc;
399}
400
401static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
402 PPDMACEPFILEMGR pAioMgr,
403 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
404{
405 RTFILEAIOREQ apReqs[20];
406 unsigned cRequests = 0;
407 unsigned cMaxRequests = PDMACEPFILEMGR_REQS_MAX - pAioMgr->cRequestsActive;
408 int rc = VINF_SUCCESS;
409 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
410
411 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
412 ("Trying to process request lists of a non active endpoint!\n"));
413
414 /* Go through the list and queue the requests until we get a flush request */
415 while ( pTaskHead
416 && !pEndpoint->pFlushReq
417 && (cMaxRequests > 0)
418 && RT_SUCCESS(rc))
419 {
420 PPDMACTASKFILE pCurr = pTaskHead;
421
422 pTaskHead = pTaskHead->pNext;
423
424 pCurr->pNext = NULL;
425
426 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
427 ("Endpoints do not match\n"));
428
429 switch (pCurr->enmTransferType)
430 {
431 case PDMACTASKFILETRANSFER_FLUSH:
432 {
433 /* If there is no data transfer request this flush request finished immediately. */
434 if (!pEndpoint->AioMgr.cRequestsActive)
435 {
436 pCurr->pfnCompleted(pCurr, pCurr->pvUser);
437 pdmacFileTaskFree(pEndpoint, pCurr);
438 }
439 else
440 {
441 pEndpoint->pFlushReq = pCurr;
442 }
443 break;
444 }
445 case PDMACTASKFILETRANSFER_READ:
446 case PDMACTASKFILETRANSFER_WRITE:
447 {
448 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
449 void *pvBuf = pCurr->DataSeg.pvSeg;
450
451 /* Get a request handle. */
452 if (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
453 {
454 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext];
455 pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext] = NIL_RTFILEAIOREQ;
456 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
457 }
458 else
459 {
460 rc = RTFileAioReqCreate(&hReq);
461 AssertRC(rc);
462 }
463
464 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
465
466 /* Check if the alignment requirements are met.
467 * Offset, transfer size and buffer address
468 * need to be on a 512 boundary. */
469 RTFOFF offStart = pCurr->Off & ~(RTFOFF)(512-1);
470 size_t cbToTransfer = RT_ALIGN_Z(pCurr->DataSeg.cbSeg + (pCurr->Off - offStart), 512);
471 PDMACTASKFILETRANSFER enmTransferType = pCurr->enmTransferType;
472
473 AssertMsg( pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE
474 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
475 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
476 offStart, cbToTransfer, pEndpoint->cbFile));
477
478 pCurr->fPrefetch = false;
479
480 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
481 || RT_UNLIKELY(offStart != pCurr->Off)
482 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
483 {
484 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
485 pCurr, cbToTransfer, pCurr->DataSeg.cbSeg, offStart, pCurr->Off));
486
487 /* Create bounce buffer. */
488 pCurr->fBounceBuffer = true;
489
490 AssertMsg(pCurr->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
491 pCurr->Off, offStart));
492 pCurr->uBounceBufOffset = pCurr->Off - offStart;
493
494 /** @todo: I think we need something like a RTMemAllocAligned method here.
495 * Current assumption is that the maximum alignment is 4096byte
496 * (GPT disk on Windows)
497 * so we can use RTMemPageAlloc here.
498 */
499 pCurr->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
500 AssertPtr(pCurr->pvBounceBuffer);
501 pvBuf = pCurr->pvBounceBuffer;
502
503 if (pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
504 {
505 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
506 || RT_UNLIKELY(offStart != pCurr->Off))
507 {
508 /* We have to fill the buffer first before we can update the data. */
509 LogFlow(("Prefetching data for task %#p\n", pCurr));
510 pCurr->fPrefetch = true;
511 enmTransferType = PDMACTASKFILETRANSFER_READ;
512 }
513 else
514 memcpy(pvBuf, pCurr->DataSeg.pvSeg, pCurr->DataSeg.cbSeg);
515 }
516 }
517 else
518 pCurr->fBounceBuffer = false;
519
520 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
521 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
522
523 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
524 {
525 /* Grow the file if needed. */
526 if (RT_UNLIKELY((uint64_t)(pCurr->Off + pCurr->DataSeg.cbSeg) > pEndpoint->cbFile))
527 {
528 ASMAtomicWriteU64(&pEndpoint->cbFile, pCurr->Off + pCurr->DataSeg.cbSeg);
529 RTFileSetSize(pEndpoint->File, pCurr->Off + pCurr->DataSeg.cbSeg);
530 }
531
532 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
533 offStart, pvBuf, cbToTransfer, pCurr);
534 }
535 else
536 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
537 offStart, pvBuf, cbToTransfer, pCurr);
538 AssertRC(rc);
539
540 apReqs[cRequests] = hReq;
541 pEndpoint->AioMgr.cReqsProcessed++;
542 cMaxRequests--;
543 cRequests++;
544 if (cRequests == RT_ELEMENTS(apReqs))
545 {
546 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
547 cRequests = 0;
548 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
549 ("Unexpected return code\n"));
550 }
551 break;
552 }
553 default:
554 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
555 }
556 }
557
558 if (cRequests)
559 {
560 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
561 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
562 ("Unexpected return code rc=%Rrc\n", rc));
563 }
564
565 if (pTaskHead)
566 {
567 /* Add the rest of the tasks to the pending list */
568 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
569
570 if (RT_UNLIKELY(!cMaxRequests && !pEndpoint->pFlushReq))
571 {
572 /*
573 * The I/O manager has no room left for more requests
574 * but there are still requests to process.
575 * Create a new I/O manager and let it handle some endpoints.
576 */
577 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
578 }
579 }
580
581 /* Insufficient resources are not fatal. */
582 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
583 rc = VINF_SUCCESS;
584
585 return rc;
586}
587
588/**
589 * Adds all pending requests for the given endpoint
590 * until a flush request is encountered or there is no
591 * request anymore.
592 *
593 * @returns VBox status code.
594 * @param pAioMgr The async I/O manager for the endpoint
595 * @param pEndpoint The endpoint to get the requests from.
596 */
597static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
598 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
599{
600 int rc = VINF_SUCCESS;
601 PPDMACTASKFILE pTasksHead = NULL;
602
603 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
604 ("Trying to process request lists of a non active endpoint!\n"));
605
606 Assert(!pEndpoint->pFlushReq);
607
608 /* Check the pending list first */
609 if (pEndpoint->AioMgr.pReqsPendingHead)
610 {
611 LogFlow(("Queuing pending requests first\n"));
612
613 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
614 /*
615 * Clear the list as the processing routine will insert them into the list
616 * again if it gets a flush request.
617 */
618 pEndpoint->AioMgr.pReqsPendingHead = NULL;
619 pEndpoint->AioMgr.pReqsPendingTail = NULL;
620 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
621 AssertRC(rc);
622 }
623
624 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
625 {
626 /* Now the request queue. */
627 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
628 if (pTasksHead)
629 {
630 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
631 AssertRC(rc);
632 }
633 }
634
635 return rc;
636}
637
638static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
639{
640 int rc = VINF_SUCCESS;
641 bool fNotifyWaiter = false;
642
643 LogFlowFunc((": Enter\n"));
644
645 Assert(pAioMgr->fBlockingEventPending);
646
647 switch (pAioMgr->enmBlockingEvent)
648 {
649 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
650 {
651 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
652 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
653
654 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
655
656 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
657 pEndpointNew->AioMgr.pEndpointPrev = NULL;
658 if (pAioMgr->pEndpointsHead)
659 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
660 pAioMgr->pEndpointsHead = pEndpointNew;
661
662 /* Assign the completion point to this file. */
663 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
664 fNotifyWaiter = true;
665 pAioMgr->cEndpoints++;
666 break;
667 }
668 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
669 {
670 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
671 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
672
673 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
674 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
675 break;
676 }
677 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
678 {
679 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
680 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
681
682 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
683 {
684 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
685
686 /* Make sure all tasks finished. Process the queues a last time first. */
687 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
688 AssertRC(rc);
689
690 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
691 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
692 }
693 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
694 && (!pEndpointClose->AioMgr.cRequestsActive))
695 fNotifyWaiter = true;
696 break;
697 }
698 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
699 {
700 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
701 if (!pAioMgr->cRequestsActive)
702 fNotifyWaiter = true;
703 break;
704 }
705 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
706 {
707 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
708 break;
709 }
710 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
711 {
712 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
713 fNotifyWaiter = true;
714 break;
715 }
716 default:
717 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
718 }
719
720 if (fNotifyWaiter)
721 {
722 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
723 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
724
725 /* Release the waiting thread. */
726 LogFlow(("Signalling waiter\n"));
727 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
728 AssertRC(rc);
729 }
730
731 LogFlowFunc((": Leave\n"));
732 return rc;
733}
734
735/**
736 * Checks all endpoints for pending events or new requests.
737 *
738 * @returns VBox status code.
739 * @param pAioMgr The I/O manager handle.
740 */
741static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
742{
743 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
744 int rc = VINF_SUCCESS;
745 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
746
747 while (pEndpoint)
748 {
749 if (!pEndpoint->pFlushReq
750 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
751 && !pEndpoint->AioMgr.fMoving)
752 {
753 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
754 if (RT_FAILURE(rc))
755 return rc;
756 }
757 else if (!pEndpoint->AioMgr.cRequestsActive)
758 {
759 /* Reopen the file so that the new endpoint can reassociate with the file */
760 RTFileClose(pEndpoint->File);
761 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
762 AssertRC(rc);
763
764 if (pEndpoint->AioMgr.fMoving)
765 {
766 pEndpoint->AioMgr.fMoving = false;
767 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
768 }
769 else
770 {
771 Assert(pAioMgr->fBlockingEventPending);
772 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
773
774 /* Release the waiting thread. */
775 LogFlow(("Signalling waiter\n"));
776 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
777 AssertRC(rc);
778 }
779 }
780
781 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
782 }
783
784 return rc;
785}
786
787/** Helper macro for checking for error codes. */
788#define CHECK_RC(pAioMgr, rc) \
789 if (RT_FAILURE(rc)) \
790 {\
791 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
792 return rc2;\
793 }
794
795/**
796 * The normal I/O manager using the RTFileAio* API
797 *
798 * @returns VBox status code.
799 * @param ThreadSelf Handle of the thread.
800 * @param pvUser Opaque user data.
801 */
802int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
803{
804 int rc = VINF_SUCCESS;
805 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
806 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
807
808 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
809 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
810 {
811 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
812 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
813 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
814 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
815 AssertRC(rc);
816
817 LogFlow(("Got woken up\n"));
818 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
819
820 /* Check for an external blocking event first. */
821 if (pAioMgr->fBlockingEventPending)
822 {
823 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
824 CHECK_RC(pAioMgr, rc);
825 }
826
827 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
828 {
829 /* We got woken up because an endpoint issued new requests. Queue them. */
830 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
831 CHECK_RC(pAioMgr, rc);
832
833 while (pAioMgr->cRequestsActive)
834 {
835 RTFILEAIOREQ apReqs[20];
836 uint32_t cReqsCompleted = 0;
837 size_t cReqsWait;
838
839 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
840 cReqsWait = RT_ELEMENTS(apReqs);
841 else
842 cReqsWait = pAioMgr->cRequestsActive;
843
844 LogFlow(("Waiting for %d of %d tasks to complete\n", pAioMgr->cRequestsActive, cReqsWait));
845
846 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
847 cReqsWait,
848 RT_INDEFINITE_WAIT, apReqs,
849 RT_ELEMENTS(apReqs), &cReqsCompleted);
850 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
851 CHECK_RC(pAioMgr, rc);
852
853 LogFlow(("%d tasks completed\n", cReqsCompleted));
854
855 for (uint32_t i = 0; i < cReqsCompleted; i++)
856 {
857 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
858 size_t cbTransfered = 0;
859 int rcReq = RTFileAioReqGetRC(apReqs[i], &cbTransfered);
860 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(apReqs[i]);
861
862 pEndpoint = pTask->pEndpoint;
863
864 /*
865 * It is possible that the request failed on Linux with kernels < 2.6.23
866 * if the passed buffer was allocated with remap_pfn_range or if the file
867 * is on an NFS endpoint which does not support async and direct I/O at the same time.
868 * The endpoint will be migrated to a failsafe manager in case a request fails.
869 */
870 if (RT_FAILURE(rcReq))
871 {
872 /* Free bounce buffers and the IPRT request. */
873 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
874 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
875
876 pAioMgr->cRequestsActive--;
877 pEndpoint->AioMgr.cRequestsActive--;
878 pEndpoint->AioMgr.cReqsProcessed++;
879
880 if (pTask->fBounceBuffer)
881 RTMemFree(pTask->pvBounceBuffer);
882
883 /* Queue the request on the pending list. */
884 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
885 pEndpoint->AioMgr.pReqsPendingHead = pTask;
886
887 /* Create a new failsafe manager if neccessary. */
888 if (!pEndpoint->AioMgr.fMoving)
889 {
890 PPDMACEPFILEMGR pAioMgrFailsafe;
891
892 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
893 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
894
895 pEndpoint->AioMgr.fMoving = true;
896
897 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
898 &pAioMgrFailsafe, true);
899 AssertRC(rc);
900
901 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
902
903 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
904 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
905 }
906
907 /* If this was the last request for the endpoint migrate it to the new manager. */
908 if (!pEndpoint->AioMgr.cRequestsActive)
909 {
910 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
911 Assert(!fReqsPending);
912
913 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
914 AssertRC(rc);
915 }
916 }
917 else
918 {
919 AssertMsg(( (cbTransfered == pTask->DataSeg.cbSeg)
920 || (pTask->fBounceBuffer && (cbTransfered >= pTask->DataSeg.cbSeg))),
921 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rcReq, cbTransfered));
922
923 if (pTask->fPrefetch)
924 {
925 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
926 Assert(pTask->fBounceBuffer);
927
928 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
929 pTask->DataSeg.pvSeg,
930 pTask->DataSeg.cbSeg);
931
932 /* Write it now. */
933 pTask->fPrefetch = false;
934 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
935 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
936
937 /* Grow the file if needed. */
938 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
939 {
940 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
941 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
942 }
943
944 rc = RTFileAioReqPrepareWrite(apReqs[i], pEndpoint->File,
945 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
946 AssertRC(rc);
947 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &apReqs[i], 1);
948 AssertRC(rc);
949 }
950 else
951 {
952 if (pTask->fBounceBuffer)
953 {
954 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
955 memcpy(pTask->DataSeg.pvSeg,
956 ((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
957 pTask->DataSeg.cbSeg);
958
959 RTMemPageFree(pTask->pvBounceBuffer);
960 }
961
962 /* Put the entry on the free array */
963 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
964 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
965
966 pAioMgr->cRequestsActive--;
967 pEndpoint->AioMgr.cRequestsActive--;
968 pEndpoint->AioMgr.cReqsProcessed++;
969
970 /* Call completion callback */
971 pTask->pfnCompleted(pTask, pTask->pvUser);
972 pdmacFileTaskFree(pEndpoint, pTask);
973
974 /*
975 * If there is no request left on the endpoint but a flush request is set
976 * it completed now and we notify the owner.
977 * Furthermore we look for new requests and continue.
978 */
979 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
980 {
981 /* Call completion callback */
982 pTask = pEndpoint->pFlushReq;
983 pEndpoint->pFlushReq = NULL;
984
985 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
986
987 pTask->pfnCompleted(pTask, pTask->pvUser);
988 pdmacFileTaskFree(pEndpoint, pTask);
989 }
990 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
991 {
992 /* If the endpoint is about to be migrated do it now. */
993 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
994 Assert(!fReqsPending);
995
996 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
997 AssertRC(rc);
998 }
999 }
1000 } /* request completed successfully */
1001 } /* for every completed request */
1002
1003 /* Check for an external blocking event before we go to sleep again. */
1004 if (pAioMgr->fBlockingEventPending)
1005 {
1006 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1007 CHECK_RC(pAioMgr, rc);
1008 }
1009
1010 /* Update load statistics. */
1011 uint64_t uMillisCurr = RTTimeMilliTS();
1012 if (uMillisCurr > uMillisEnd)
1013 {
1014 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1015
1016 /* Calculate timespan. */
1017 uMillisCurr -= uMillisEnd;
1018
1019 while (pEndpointCurr)
1020 {
1021 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1022 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1023 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1024 }
1025
1026 /* Set new update interval */
1027 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1028 }
1029
1030 /* Check endpoints for new requests. */
1031 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1032 CHECK_RC(pAioMgr, rc);
1033 } /* while requests are active. */
1034 } /* if still running */
1035 } /* while running */
1036
1037 return rc;
1038}
1039
1040#undef CHECK_RC
1041
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette