VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 24327

Last change on this file since 24327 was 23959, checked in by vboxsync, 15 years ago

PDMAsyncCompletion: Disable async I/O on linux if opening fails because it needs O_DIRECT too. Disable async I/O and enable the host cache if a request fails and the endpoint is migrated to a failsafe manager

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 40.6 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 23959 2009-10-22 08:38:31Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#define RT_STRICT
24#include <iprt/types.h>
25#include <iprt/asm.h>
26#include <iprt/file.h>
27#include <iprt/mem.h>
28#include <iprt/string.h>
29#include <iprt/assert.h>
30#include <VBox/log.h>
31
32#include "PDMAsyncCompletionFileInternal.h"
33
34/** The update period for the I/O load statistics in ms. */
35#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
36/** Maximum number of requests a manager will handle. */
37#define PDMACEPFILEMGR_REQS_MAX 512 /* @todo: Find better solution wrt. the request number*/
38
39int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
40{
41 int rc = VINF_SUCCESS;
42
43 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
44 if (rc == VERR_OUT_OF_RANGE)
45 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, PDMACEPFILEMGR_REQS_MAX);
46
47 if (RT_SUCCESS(rc))
48 {
49 /* Initialize request handle array. */
50 pAioMgr->iFreeEntryNext = 0;
51 pAioMgr->iFreeReqNext = 0;
52 pAioMgr->cReqEntries = PDMACEPFILEMGR_REQS_MAX + 1;
53 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
54
55 if (pAioMgr->pahReqsFree)
56 {
57 return VINF_SUCCESS;
58 }
59 else
60 {
61 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
62 rc = VERR_NO_MEMORY;
63 }
64 }
65
66 return rc;
67}
68
69void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
70{
71 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
72
73 while (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
74 {
75 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext]);
76 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
77 }
78
79 RTMemFree(pAioMgr->pahReqsFree);
80}
81
82/**
83 * Sorts the endpoint list with insertion sort.
84 */
85static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
86{
87 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
88
89 pEpPrev = pAioMgr->pEndpointsHead;
90 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
91
92 while (pEpCurr)
93 {
94 /* Remember the next element to sort because the list might change. */
95 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
96
97 /* Unlink the current element from the list. */
98 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
99 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
100
101 if (pPrev)
102 pPrev->AioMgr.pEndpointNext = pNext;
103 else
104 pAioMgr->pEndpointsHead = pNext;
105
106 if (pNext)
107 pNext->AioMgr.pEndpointPrev = pPrev;
108
109 /* Go back until we reached the place to insert the current endpoint into. */
110 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
111 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
112
113 /* Link the endpoint into the list. */
114 if (pEpPrev)
115 pNext = pEpPrev->AioMgr.pEndpointNext;
116 else
117 pNext = pAioMgr->pEndpointsHead;
118
119 pEpCurr->AioMgr.pEndpointNext = pNext;
120 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
121 pNext->AioMgr.pEndpointPrev = pEpCurr;
122 if (pEpPrev)
123 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
124 else
125 pAioMgr->pEndpointsHead = pEpCurr;
126
127 pEpCurr = pEpNextToSort;
128 }
129
130#ifdef DEBUG
131 /* Validate sorting alogrithm */
132 unsigned cEndpoints = 0;
133 pEpCurr = pAioMgr->pEndpointsHead;
134
135 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
136 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
137
138 while (pEpCurr)
139 {
140 cEndpoints++;
141
142 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
143 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
144
145 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
146 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
147
148 pEpCurr = pNext;
149 }
150
151 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
152
153#endif
154}
155
156/**
157 * Removes an endpoint from the currently assigned manager.
158 *
159 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
160 * FALSE otherwise.
161 * @param pEndpointRemove The endpoint to remove.
162 */
163static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
164{
165 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
166 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
167 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
168
169 pAioMgr->cEndpoints--;
170
171 if (pPrev)
172 pPrev->AioMgr.pEndpointNext = pNext;
173 else
174 pAioMgr->pEndpointsHead = pNext;
175
176 if (pNext)
177 pNext->AioMgr.pEndpointPrev = pPrev;
178
179 /* Make sure that there is no request pending on this manager for the endpoint. */
180 if (!pEndpointRemove->AioMgr.cRequestsActive)
181 {
182 Assert(!pEndpointRemove->pFlushReq);
183
184 /* Reopen the file so that the new endpoint can reassociate with the file */
185 RTFileClose(pEndpointRemove->File);
186 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
187 AssertRC(rc);
188 return false;
189 }
190
191 return true;
192}
193
194/**
195 * Creates a new I/O manager and spreads the I/O load of the endpoints
196 * between the given I/O manager and the new one.
197 *
198 * @returns nothing.
199 * @param pAioMgr The I/O manager with high I/O load.
200 */
201static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
202{
203 PPDMACEPFILEMGR pAioMgrNew = NULL;
204 int rc = VINF_SUCCESS;
205
206 /* Splitting can't be done with only one open endpoint. */
207 if (pAioMgr->cEndpoints > 1)
208 {
209 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass,
210 &pAioMgrNew, false);
211 if (RT_SUCCESS(rc))
212 {
213 /* We will sort the list by request count per second. */
214 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
215
216 /* Now move some endpoints to the new manager. */
217 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
218 unsigned cReqsOther = 0;
219 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
220
221 while (pCurr)
222 {
223 if (cReqsHere <= cReqsOther)
224 {
225 /*
226 * The other manager has more requests to handle now.
227 * We will keep the current endpoint.
228 */
229 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
230 cReqsHere += pCurr->AioMgr.cReqsPerSec;
231 pCurr = pCurr->AioMgr.pEndpointNext;
232 }
233 else
234 {
235 /* Move to other endpoint. */
236 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
237 cReqsOther += pCurr->AioMgr.cReqsPerSec;
238
239 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
240
241 pCurr = pCurr->AioMgr.pEndpointNext;
242
243 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
244
245 if (fReqsPending)
246 {
247 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
248 pMove->AioMgr.fMoving = true;
249 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
250 }
251 else
252 {
253 pMove->AioMgr.fMoving = false;
254 pMove->AioMgr.pAioMgrDst = NULL;
255 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
256 }
257 }
258 }
259 }
260 else
261 {
262 /* Don't process further but leave a log entry about reduced performance. */
263 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
264 }
265 }
266}
267
268/**
269 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
270 *
271 * @returns VBox status code
272 * @param pAioMgr The I/O manager the error ocurred on.
273 * @param rc The error code.
274 */
275static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
276{
277 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
278 pAioMgr, rc));
279 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
280 LogRel(("AIOMgr: Please contact the product vendor\n"));
281
282 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
283
284 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
285 ASMAtomicWriteBool(&pEpClassFile->fFailsafe, true);
286
287 AssertMsgFailed(("Implement\n"));
288 return VINF_SUCCESS;
289}
290
291/**
292 * Put a list of tasks in the pending request list of an endpoint.
293 */
294DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
295{
296 /* Add the rest of the tasks to the pending list */
297 if (!pEndpoint->AioMgr.pReqsPendingHead)
298 {
299 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
300 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
301 }
302 else
303 {
304 Assert(pEndpoint->AioMgr.pReqsPendingTail);
305 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
306 }
307
308 /* Update the tail. */
309 while (pTaskHead->pNext)
310 pTaskHead = pTaskHead->pNext;
311
312 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
313}
314
315/**
316 * Put one task in the pending request list of an endpoint.
317 */
318DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
319{
320 /* Add the rest of the tasks to the pending list */
321 if (!pEndpoint->AioMgr.pReqsPendingHead)
322 {
323 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
324 pEndpoint->AioMgr.pReqsPendingHead = pTask;
325 }
326 else
327 {
328 Assert(pEndpoint->AioMgr.pReqsPendingTail);
329 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
330 }
331
332 pEndpoint->AioMgr.pReqsPendingTail = pTask;
333}
334
335/**
336 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
337 */
338static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
339 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
340 PRTFILEAIOREQ pahReqs, size_t cReqs)
341{
342 int rc;
343
344 pAioMgr->cRequestsActive += cReqs;
345 pEndpoint->AioMgr.cRequestsActive += cReqs;
346
347 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
348 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
349
350 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
351 if (RT_FAILURE(rc))
352 {
353 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
354 {
355 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
356
357 /*
358 * We run out of resources.
359 * Need to check which requests got queued
360 * and put the rest on the pending list again.
361 */
362 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
363 {
364 pEpClass->fOutOfResourcesWarningPrinted = true;
365 LogRel(("AIOMgr: The operating system doesn't have enough resources "
366 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
367 }
368
369 for (size_t i = 0; i < cReqs; i++)
370 {
371 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
372
373 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
374 {
375 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
376 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
377
378 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
379
380 /* Put the entry on the free array */
381 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = pahReqs[i];
382 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
383
384 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
385 pAioMgr->cRequestsActive--;
386 pEndpoint->AioMgr.cRequestsActive--;
387 }
388 }
389 LogFlow(("Removed requests. I/O manager has a total of %d active requests now\n", pAioMgr->cRequestsActive));
390 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
391 }
392 else
393 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
394 }
395
396 return rc;
397}
398
399static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
400 PPDMACEPFILEMGR pAioMgr,
401 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
402{
403 RTFILEAIOREQ apReqs[20];
404 unsigned cRequests = 0;
405 unsigned cMaxRequests = PDMACEPFILEMGR_REQS_MAX - pAioMgr->cRequestsActive;
406 int rc = VINF_SUCCESS;
407 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
408
409 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
410 ("Trying to process request lists of a non active endpoint!\n"));
411
412 /* Go through the list and queue the requests until we get a flush request */
413 while ( pTaskHead
414 && !pEndpoint->pFlushReq
415 && (cMaxRequests > 0)
416 && RT_SUCCESS(rc))
417 {
418 PPDMACTASKFILE pCurr = pTaskHead;
419
420 pTaskHead = pTaskHead->pNext;
421
422 pCurr->pNext = NULL;
423
424 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
425 ("Endpoints do not match\n"));
426
427 switch (pCurr->enmTransferType)
428 {
429 case PDMACTASKFILETRANSFER_FLUSH:
430 {
431 /* If there is no data transfer request this flush request finished immediately. */
432 if (!pEndpoint->AioMgr.cRequestsActive)
433 {
434 pCurr->pfnCompleted(pCurr, pCurr->pvUser);
435 pdmacFileTaskFree(pEndpoint, pCurr);
436 }
437 else
438 {
439 pEndpoint->pFlushReq = pCurr;
440 }
441 break;
442 }
443 case PDMACTASKFILETRANSFER_READ:
444 case PDMACTASKFILETRANSFER_WRITE:
445 {
446 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
447 void *pvBuf = pCurr->DataSeg.pvSeg;
448
449 /* Get a request handle. */
450 if (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
451 {
452 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext];
453 pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext] = NIL_RTFILEAIOREQ;
454 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
455 }
456 else
457 {
458 rc = RTFileAioReqCreate(&hReq);
459 AssertRC(rc);
460 }
461
462 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
463
464 /* Check if the alignment requirements are met.
465 * Offset, transfer size and buffer address
466 * need to be on a 512 boundary. */
467 RTFOFF offStart = pCurr->Off & ~(RTFOFF)(512-1);
468 size_t cbToTransfer = RT_ALIGN_Z(pCurr->DataSeg.cbSeg + (pCurr->Off - offStart), 512);
469 PDMACTASKFILETRANSFER enmTransferType = pCurr->enmTransferType;
470
471 AssertMsg( pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE
472 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
473 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
474 offStart, cbToTransfer, pEndpoint->cbFile));
475
476 pCurr->fPrefetch = false;
477
478 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
479 || RT_UNLIKELY(offStart != pCurr->Off)
480 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
481 {
482 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
483 pCurr, cbToTransfer, pCurr->DataSeg.cbSeg, offStart, pCurr->Off));
484
485 /* Create bounce buffer. */
486 pCurr->fBounceBuffer = true;
487
488 AssertMsg(pCurr->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
489 pCurr->Off, offStart));
490 pCurr->uBounceBufOffset = pCurr->Off - offStart;
491
492 /** @todo: I think we need something like a RTMemAllocAligned method here.
493 * Current assumption is that the maximum alignment is 4096byte
494 * (GPT disk on Windows)
495 * so we can use RTMemPageAlloc here.
496 */
497 pCurr->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
498 AssertPtr(pCurr->pvBounceBuffer);
499 pvBuf = pCurr->pvBounceBuffer;
500
501 if (pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
502 {
503 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
504 || RT_UNLIKELY(offStart != pCurr->Off))
505 {
506 /* We have to fill the buffer first before we can update the data. */
507 LogFlow(("Prefetching data for task %#p\n", pCurr));
508 pCurr->fPrefetch = true;
509 enmTransferType = PDMACTASKFILETRANSFER_READ;
510 }
511 else
512 memcpy(pvBuf, pCurr->DataSeg.pvSeg, pCurr->DataSeg.cbSeg);
513 }
514 }
515 else
516 pCurr->fBounceBuffer = false;
517
518 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
519 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
520
521 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
522 {
523 /* Grow the file if needed. */
524 if (RT_UNLIKELY((uint64_t)(pCurr->Off + pCurr->DataSeg.cbSeg) > pEndpoint->cbFile))
525 {
526 ASMAtomicWriteU64(&pEndpoint->cbFile, pCurr->Off + pCurr->DataSeg.cbSeg);
527 RTFileSetSize(pEndpoint->File, pCurr->Off + pCurr->DataSeg.cbSeg);
528 }
529
530 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
531 offStart, pvBuf, cbToTransfer, pCurr);
532 }
533 else
534 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
535 offStart, pvBuf, cbToTransfer, pCurr);
536 AssertRC(rc);
537
538 apReqs[cRequests] = hReq;
539 pEndpoint->AioMgr.cReqsProcessed++;
540 cMaxRequests--;
541 cRequests++;
542 if (cRequests == RT_ELEMENTS(apReqs))
543 {
544 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
545 cRequests = 0;
546 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
547 ("Unexpected return code\n"));
548 }
549 break;
550 }
551 default:
552 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
553 }
554 }
555
556 if (cRequests)
557 {
558 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
559 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
560 ("Unexpected return code rc=%Rrc\n", rc));
561 }
562
563 if (pTaskHead)
564 {
565 /* Add the rest of the tasks to the pending list */
566 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
567
568 if (RT_UNLIKELY(!cMaxRequests && !pEndpoint->pFlushReq))
569 {
570 /*
571 * The I/O manager has no room left for more requests
572 * but there are still requests to process.
573 * Create a new I/O manager and let it handle some endpoints.
574 */
575 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
576 }
577 }
578
579 /* Insufficient resources are not fatal. */
580 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
581 rc = VINF_SUCCESS;
582
583 return rc;
584}
585
586/**
587 * Adds all pending requests for the given endpoint
588 * until a flush request is encountered or there is no
589 * request anymore.
590 *
591 * @returns VBox status code.
592 * @param pAioMgr The async I/O manager for the endpoint
593 * @param pEndpoint The endpoint to get the requests from.
594 */
595static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
596 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
597{
598 int rc = VINF_SUCCESS;
599 PPDMACTASKFILE pTasksHead = NULL;
600
601 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
602 ("Trying to process request lists of a non active endpoint!\n"));
603
604 Assert(!pEndpoint->pFlushReq);
605
606 /* Check the pending list first */
607 if (pEndpoint->AioMgr.pReqsPendingHead)
608 {
609 LogFlow(("Queuing pending requests first\n"));
610
611 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
612 /*
613 * Clear the list as the processing routine will insert them into the list
614 * again if it gets a flush request.
615 */
616 pEndpoint->AioMgr.pReqsPendingHead = NULL;
617 pEndpoint->AioMgr.pReqsPendingTail = NULL;
618 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
619 AssertRC(rc);
620 }
621
622 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
623 {
624 /* Now the request queue. */
625 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
626 if (pTasksHead)
627 {
628 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
629 AssertRC(rc);
630 }
631 }
632
633 return rc;
634}
635
636static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
637{
638 int rc = VINF_SUCCESS;
639 bool fNotifyWaiter = false;
640
641 LogFlowFunc((": Enter\n"));
642
643 Assert(pAioMgr->fBlockingEventPending);
644
645 switch (pAioMgr->enmBlockingEvent)
646 {
647 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
648 {
649 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
650 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
651
652 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
653
654 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
655 pEndpointNew->AioMgr.pEndpointPrev = NULL;
656 if (pAioMgr->pEndpointsHead)
657 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
658 pAioMgr->pEndpointsHead = pEndpointNew;
659
660 /* Assign the completion point to this file. */
661 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
662 fNotifyWaiter = true;
663 pAioMgr->cEndpoints++;
664 break;
665 }
666 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
667 {
668 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
669 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
670
671 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
672 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
673 break;
674 }
675 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
676 {
677 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
678 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
679
680 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
681
682 /* Make sure all tasks finished. Process the queues a last time first. */
683 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
684 AssertRC(rc);
685
686 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
687 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
688 break;
689 }
690 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
691 {
692 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
693 if (!pAioMgr->cRequestsActive)
694 fNotifyWaiter = true;
695 break;
696 }
697 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
698 {
699 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
700 break;
701 }
702 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
703 {
704 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
705 fNotifyWaiter = true;
706 break;
707 }
708 default:
709 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
710 }
711
712 if (fNotifyWaiter)
713 {
714 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
715 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
716
717 /* Release the waiting thread. */
718 LogFlow(("Signalling waiter\n"));
719 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
720 AssertRC(rc);
721 }
722
723 LogFlowFunc((": Leave\n"));
724 return rc;
725}
726
727/**
728 * Checks all endpoints for pending events or new requests.
729 *
730 * @returns VBox status code.
731 * @param pAioMgr The I/O manager handle.
732 */
733static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
734{
735 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
736 int rc = VINF_SUCCESS;
737 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
738
739 while (pEndpoint)
740 {
741 if (!pEndpoint->pFlushReq && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE))
742 {
743 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
744 if (RT_FAILURE(rc))
745 return rc;
746 }
747 else if (!pEndpoint->AioMgr.cRequestsActive)
748 {
749 /* Reopen the file so that the new endpoint can reassociate with the file */
750 RTFileClose(pEndpoint->File);
751 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
752 AssertRC(rc);
753
754 if (pEndpoint->AioMgr.fMoving)
755 {
756 pEndpoint->AioMgr.fMoving = false;
757 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
758 }
759 else
760 {
761 Assert(pAioMgr->fBlockingEventPending);
762 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
763
764 /* Release the waiting thread. */
765 LogFlow(("Signalling waiter\n"));
766 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
767 AssertRC(rc);
768 }
769 }
770
771 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
772 }
773
774 return rc;
775}
776
777/** Helper macro for checking for error codes. */
778#define CHECK_RC(pAioMgr, rc) \
779 if (RT_FAILURE(rc)) \
780 {\
781 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
782 return rc2;\
783 }
784
785/**
786 * The normal I/O manager using the RTFileAio* API
787 *
788 * @returns VBox status code.
789 * @param ThreadSelf Handle of the thread.
790 * @param pvUser Opaque user data.
791 */
792int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
793{
794 int rc = VINF_SUCCESS;
795 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
796 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
797
798 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
799 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
800 {
801 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
802 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
803 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
804 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
805 AssertRC(rc);
806
807 LogFlow(("Got woken up\n"));
808 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
809
810 /* Check for an external blocking event first. */
811 if (pAioMgr->fBlockingEventPending)
812 {
813 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
814 CHECK_RC(pAioMgr, rc);
815 }
816
817 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
818 {
819 /* We got woken up because an endpoint issued new requests. Queue them. */
820 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
821 CHECK_RC(pAioMgr, rc);
822
823 while (pAioMgr->cRequestsActive)
824 {
825 RTFILEAIOREQ apReqs[20];
826 uint32_t cReqsCompleted = 0;
827 size_t cReqsWait;
828
829 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
830 cReqsWait = RT_ELEMENTS(apReqs);
831 else
832 cReqsWait = pAioMgr->cRequestsActive;
833
834 LogFlow(("Waiting for %d of %d tasks to complete\n", pAioMgr->cRequestsActive, cReqsWait));
835
836 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
837 cReqsWait,
838 RT_INDEFINITE_WAIT, apReqs,
839 RT_ELEMENTS(apReqs), &cReqsCompleted);
840 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
841 CHECK_RC(pAioMgr, rc);
842
843 LogFlow(("%d tasks completed\n", cReqsCompleted));
844
845 for (uint32_t i = 0; i < cReqsCompleted; i++)
846 {
847 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
848 size_t cbTransfered = 0;
849 int rcReq = RTFileAioReqGetRC(apReqs[i], &cbTransfered);
850 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(apReqs[i]);
851
852 pEndpoint = pTask->pEndpoint;
853
854 /*
855 * It is possible that the request failed on Linux with kernels < 2.6.23
856 * if the passed buffer was allocated with remap_pfn_range or if the file
857 * is on an NFS endpoint which does not support async and direct I/O at the same time.
858 * The endpoint will be migrated to a failsafe manager in case a request fails.
859 */
860 if (RT_FAILURE(rcReq))
861 {
862 /* Free bounce buffers and the IPRT request. */
863 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
864 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
865
866 pAioMgr->cRequestsActive--;
867 pEndpoint->AioMgr.cRequestsActive--;
868 pEndpoint->AioMgr.cReqsProcessed++;
869
870 if (pTask->fBounceBuffer)
871 RTMemFree(pTask->pvBounceBuffer);
872
873 /* Queue the request on the pending list. */
874 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
875 pEndpoint->AioMgr.pReqsPendingHead = pTask;
876
877 /* Create a new failsafe manager if neccessary. */
878 if (!pEndpoint->AioMgr.fMoving)
879 {
880 PPDMACEPFILEMGR pAioMgrFailsafe;
881
882 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
883 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
884
885 pEndpoint->AioMgr.fMoving = true;
886
887 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
888 &pAioMgrFailsafe, true);
889 AssertRC(rc);
890
891 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
892
893 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
894 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
895 }
896
897 /* If this was the last request for the endpoint migrate it to the new manager. */
898 if (!pEndpoint->AioMgr.cRequestsActive)
899 {
900 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
901 Assert(!fReqsPending);
902
903 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
904 AssertRC(rc);
905 }
906 }
907 else
908 {
909 AssertMsg(( (cbTransfered == pTask->DataSeg.cbSeg)
910 || (pTask->fBounceBuffer && (cbTransfered >= pTask->DataSeg.cbSeg))),
911 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rcReq, cbTransfered));
912
913 if (pTask->fPrefetch)
914 {
915 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
916 Assert(pTask->fBounceBuffer);
917
918 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
919 pTask->DataSeg.pvSeg,
920 pTask->DataSeg.cbSeg);
921
922 /* Write it now. */
923 pTask->fPrefetch = false;
924 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
925 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
926
927 /* Grow the file if needed. */
928 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
929 {
930 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
931 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
932 }
933
934 rc = RTFileAioReqPrepareWrite(apReqs[i], pEndpoint->File,
935 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
936 AssertRC(rc);
937 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &apReqs[i], 1);
938 AssertRC(rc);
939 }
940 else
941 {
942 if (pTask->fBounceBuffer)
943 {
944 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
945 memcpy(pTask->DataSeg.pvSeg,
946 ((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
947 pTask->DataSeg.cbSeg);
948
949 RTMemPageFree(pTask->pvBounceBuffer);
950 }
951
952 /* Put the entry on the free array */
953 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
954 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
955
956 pAioMgr->cRequestsActive--;
957 pEndpoint->AioMgr.cRequestsActive--;
958 pEndpoint->AioMgr.cReqsProcessed++;
959
960 /* Call completion callback */
961 pTask->pfnCompleted(pTask, pTask->pvUser);
962 pdmacFileTaskFree(pEndpoint, pTask);
963
964 /*
965 * If there is no request left on the endpoint but a flush request is set
966 * it completed now and we notify the owner.
967 * Furthermore we look for new requests and continue.
968 */
969 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
970 {
971 /* Call completion callback */
972 pTask = pEndpoint->pFlushReq;
973 pEndpoint->pFlushReq = NULL;
974
975 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
976
977 pTask->pfnCompleted(pTask, pTask->pvUser);
978 pdmacFileTaskFree(pEndpoint, pTask);
979 }
980 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
981 {
982 /* If the endpoint is about to be migrated do it now. */
983 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
984 Assert(!fReqsPending);
985
986 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
987 AssertRC(rc);
988 }
989 }
990 } /* request completed successfully */
991 } /* for every completed request */
992
993 /* Check for an external blocking event before we go to sleep again. */
994 if (pAioMgr->fBlockingEventPending)
995 {
996 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
997 CHECK_RC(pAioMgr, rc);
998 }
999
1000 /* Update load statistics. */
1001 uint64_t uMillisCurr = RTTimeMilliTS();
1002 if (uMillisCurr > uMillisEnd)
1003 {
1004 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1005
1006 /* Calculate timespan. */
1007 uMillisCurr -= uMillisEnd;
1008
1009 while (pEndpointCurr)
1010 {
1011 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1012 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1013 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1014 }
1015
1016 /* Set new update interval */
1017 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1018 }
1019
1020 /* Check endpoints for new requests. */
1021 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1022 CHECK_RC(pAioMgr, rc);
1023 } /* while requests are active. */
1024 } /* if still running */
1025 } /* while running */
1026
1027 return rc;
1028}
1029
1030#undef CHECK_RC
1031
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette