VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/linux/fileaio-linux.cpp@ 28863

Last change on this file since 28863 was 28800, checked in by vboxsync, 15 years ago

Automated rebranding to Oracle copyright/license strings via filemuncher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 25.9 KB
Line 
1/* $Id: fileaio-linux.cpp 28800 2010-04-27 08:22:32Z vboxsync $ */
2/** @file
3 * IPRT - File async I/O, native implementation for the Linux host platform.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/** @page pg_rtfileaio_linux RTFile Async I/O - Linux Implementation Notes
28 * @internal
29 *
30 * Linux implements the kernel async I/O API through the io_* syscalls. They are
31 * not exposed in the glibc (the aio_* API uses userspace threads and blocking
32 * I/O operations to simulate async behavior). There is an external library
33 * called libaio which implements these syscalls but because we don't want to
34 * have another dependency and this library is not installed by default and the
35 * interface is really simple we use the kernel interface directly using wrapper
36 * functions.
37 *
38 * The interface has some limitations. The first one is that the file must be
39 * opened with O_DIRECT. This disables caching done by the kernel which can be
40 * compensated if the user of this API implements caching itself. The next
41 * limitation is that data buffers must be aligned at a 512 byte boundary or the
42 * request will fail.
43 */
44/** @todo r=bird: What's this about "must be opened with O_DIRECT"? An
45 * explanation would be nice, esp. seeing what Linus is quoted saying
46 * about it in the open man page... */
47
48/*******************************************************************************
49* Header Files *
50*******************************************************************************/
51#define LOG_GROUP RTLOGGROUP_FILE
52#include <iprt/asm.h>
53#include <iprt/mem.h>
54#include <iprt/assert.h>
55#include <iprt/string.h>
56#include <iprt/err.h>
57#include <iprt/log.h>
58#include <iprt/thread.h>
59#include "internal/fileaio.h"
60
61#include <unistd.h>
62#include <sys/syscall.h>
63#include <errno.h>
64
65#include <iprt/file.h>
66
67
68/*******************************************************************************
69* Structures and Typedefs *
70*******************************************************************************/
71/** The async I/O context handle */
72typedef unsigned long LNXKAIOCONTEXT;
73
74/**
75 * Supported commands for the iocbs
76 */
77enum
78{
79 LNXKAIO_IOCB_CMD_READ = 0,
80 LNXKAIO_IOCB_CMD_WRITE
81};
82
83/**
84 * The iocb structure of a request which is passed to the kernel.
85 *
86 * We redefined this here because the version in the header lacks padding
87 * for 32bit.
88 */
89typedef struct LNXKAIOIOCB
90{
91 /** Opaque pointer to data which is returned on an I/O event. */
92 void *pvUser;
93#ifdef RT_ARCH_X86
94 uint32_t u32Padding0;
95#endif
96 /** Contains the request number and is set by the kernel. */
97 uint32_t u32Key;
98 /** Reserved. */
99 uint32_t u32Reserved0;
100 /** The I/O opcode. */
101 uint16_t u16IoOpCode;
102 /** Request priority. */
103 int16_t i16Priority;
104 /** The file descriptor. */
105 uint32_t File;
106 /** The userspace pointer to the buffer containing/receiving the data. */
107 void *pvBuf;
108#ifdef RT_ARCH_X86
109 uint32_t u32Padding1;
110#endif
111 /** How many bytes to transfer. */
112#ifdef RT_ARCH_X86
113 uint32_t cbTransfer;
114 uint32_t u32Padding2;
115#elif defined(RT_ARCH_AMD64)
116 uint64_t cbTransfer;
117#else
118# error "Unknown architecture"
119#endif
120 /** At which offset to start the transfer. */
121 int64_t off;
122 /** Reserved. */
123 uint64_t u64Reserved1;
124 /** Flags */
125 uint32_t fFlags;
126 /** Readyness signal file descriptor. */
127 uint32_t u32ResFd;
128} LNXKAIOIOCB, *PLNXKAIOIOCB;
129
130/**
131 * I/O event structure to notify about completed requests.
132 * Redefined here too because of the padding.
133 */
134typedef struct LNXKAIOIOEVENT
135{
136 /** The pvUser field from the iocb. */
137 void *pvUser;
138#ifdef RT_ARCH_X86
139 uint32_t u32Padding0;
140#endif
141 /** The LNXKAIOIOCB object this event is for. */
142 PLNXKAIOIOCB *pIoCB;
143#ifdef RT_ARCH_X86
144 uint32_t u32Padding1;
145#endif
146 /** The result code of the operation .*/
147#ifdef RT_ARCH_X86
148 int32_t rc;
149 uint32_t u32Padding2;
150#elif defined(RT_ARCH_AMD64)
151 int64_t rc;
152#else
153# error "Unknown architecture"
154#endif
155 /** Secondary result code. */
156#ifdef RT_ARCH_X86
157 int32_t rc2;
158 uint32_t u32Padding3;
159#elif defined(RT_ARCH_AMD64)
160 int64_t rc2;
161#else
162# error "Unknown architecture"
163#endif
164} LNXKAIOIOEVENT, *PLNXKAIOIOEVENT;
165
166
167/**
168 * Async I/O completion context state.
169 */
170typedef struct RTFILEAIOCTXINTERNAL
171{
172 /** Handle to the async I/O context. */
173 LNXKAIOCONTEXT AioContext;
174 /** Maximum number of requests this context can handle. */
175 int cRequestsMax;
176 /** Current number of requests active on this context. */
177 volatile int32_t cRequests;
178 /** The ID of the thread which is currently waiting for requests. */
179 volatile RTTHREAD hThreadWait;
180 /** Flag whether the thread was woken up. */
181 volatile bool fWokenUp;
182 /** Flag whether the thread is currently waiting in the syscall. */
183 volatile bool fWaiting;
184 /** Magic value (RTFILEAIOCTX_MAGIC). */
185 uint32_t u32Magic;
186} RTFILEAIOCTXINTERNAL;
187/** Pointer to an internal context structure. */
188typedef RTFILEAIOCTXINTERNAL *PRTFILEAIOCTXINTERNAL;
189
190/**
191 * Async I/O request state.
192 */
193typedef struct RTFILEAIOREQINTERNAL
194{
195 /** The aio control block. This must be the FIRST elment in
196 * the structure! (see notes below) */
197 LNXKAIOIOCB AioCB;
198 /** Current state the request is in. */
199 RTFILEAIOREQSTATE enmState;
200 /** The I/O context this request is associated with. */
201 LNXKAIOCONTEXT AioContext;
202 /** Return code the request completed with. */
203 int Rc;
204 /** Number of bytes actually trasnfered. */
205 size_t cbTransfered;
206 /** Completion context we are assigned to. */
207 PRTFILEAIOCTXINTERNAL pCtxInt;
208 /** Magic value (RTFILEAIOREQ_MAGIC). */
209 uint32_t u32Magic;
210} RTFILEAIOREQINTERNAL;
211/** Pointer to an internal request structure. */
212typedef RTFILEAIOREQINTERNAL *PRTFILEAIOREQINTERNAL;
213
214
215/*******************************************************************************
216* Defined Constants And Macros *
217*******************************************************************************/
218/** The max number of events to get in one call. */
219#define AIO_MAXIMUM_REQUESTS_PER_CONTEXT 64
220
221
222/**
223 * Creates a new async I/O context.
224 */
225DECLINLINE(int) rtFileAsyncIoLinuxCreate(unsigned cEvents, LNXKAIOCONTEXT *pAioContext)
226{
227 int rc = syscall(__NR_io_setup, cEvents, pAioContext);
228 if (RT_UNLIKELY(rc == -1))
229 return RTErrConvertFromErrno(errno);
230
231 return VINF_SUCCESS;
232}
233
234/**
235 * Destroys a async I/O context.
236 */
237DECLINLINE(int) rtFileAsyncIoLinuxDestroy(LNXKAIOCONTEXT AioContext)
238{
239 int rc = syscall(__NR_io_destroy, AioContext);
240 if (RT_UNLIKELY(rc == -1))
241 return RTErrConvertFromErrno(errno);
242
243 return VINF_SUCCESS;
244}
245
246/**
247 * Submits an array of I/O requests to the kernel.
248 */
249DECLINLINE(int) rtFileAsyncIoLinuxSubmit(LNXKAIOCONTEXT AioContext, long cReqs, LNXKAIOIOCB **ppIoCB, int *pcSubmitted)
250{
251 int rc = syscall(__NR_io_submit, AioContext, cReqs, ppIoCB);
252 if (RT_UNLIKELY(rc == -1))
253 return RTErrConvertFromErrno(errno);
254
255 *pcSubmitted = rc;
256
257 return VINF_SUCCESS;
258}
259
260/**
261 * Cancels a I/O request.
262 */
263DECLINLINE(int) rtFileAsyncIoLinuxCancel(LNXKAIOCONTEXT AioContext, PLNXKAIOIOCB pIoCB, PLNXKAIOIOEVENT pIoResult)
264{
265 int rc = syscall(__NR_io_cancel, AioContext, pIoCB, pIoResult);
266 if (RT_UNLIKELY(rc == -1))
267 return RTErrConvertFromErrno(errno);
268
269 return VINF_SUCCESS;
270}
271
272/**
273 * Waits for I/O events.
274 * @returns Number of events (natural number w/ 0), IPRT error code (negative).
275 */
276DECLINLINE(int) rtFileAsyncIoLinuxGetEvents(LNXKAIOCONTEXT AioContext, long cReqsMin, long cReqs,
277 PLNXKAIOIOEVENT paIoResults, struct timespec *pTimeout)
278{
279 int rc = syscall(__NR_io_getevents, AioContext, cReqsMin, cReqs, paIoResults, pTimeout);
280 if (RT_UNLIKELY(rc == -1))
281 return RTErrConvertFromErrno(errno);
282
283 return rc;
284}
285
286RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
287{
288 int rc = VINF_SUCCESS;
289 AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
290
291 /*
292 * Check if the API is implemented by creating a
293 * completion port.
294 */
295 LNXKAIOCONTEXT AioContext = 0;
296 rc = rtFileAsyncIoLinuxCreate(1, &AioContext);
297 if (RT_FAILURE(rc))
298 return rc;
299
300 rc = rtFileAsyncIoLinuxDestroy(AioContext);
301 if (RT_FAILURE(rc))
302 return rc;
303
304 /* Supported - fill in the limits. The alignment is the only restriction. */
305 pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
306 pAioLimits->cbBufferAlignment = 512;
307
308 return VINF_SUCCESS;
309}
310
311
312RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
313{
314 AssertPtrReturn(phReq, VERR_INVALID_POINTER);
315
316 /*
317 * Allocate a new request and initialize it.
318 */
319 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(*pReqInt));
320 if (RT_UNLIKELY(!pReqInt))
321 return VERR_NO_MEMORY;
322
323 pReqInt->pCtxInt = NULL;
324 pReqInt->u32Magic = RTFILEAIOREQ_MAGIC;
325 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
326
327 *phReq = (RTFILEAIOREQ)pReqInt;
328 return VINF_SUCCESS;
329}
330
331
332RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
333{
334 /*
335 * Validate the handle and ignore nil.
336 */
337 if (hReq == NIL_RTFILEAIOREQ)
338 return VINF_SUCCESS;
339 PRTFILEAIOREQINTERNAL pReqInt = hReq;
340 RTFILEAIOREQ_VALID_RETURN(pReqInt);
341 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
342
343 /*
344 * Trash the magic and free it.
345 */
346 ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
347 RTMemFree(pReqInt);
348 return VINF_SUCCESS;
349}
350
351
352/**
353 * Worker setting up the request.
354 */
355DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
356 uint16_t uTransferDirection,
357 RTFOFF off, void *pvBuf, size_t cbTransfer,
358 void *pvUser)
359{
360 /*
361 * Validate the input.
362 */
363 PRTFILEAIOREQINTERNAL pReqInt = hReq;
364 RTFILEAIOREQ_VALID_RETURN(pReqInt);
365 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
366 Assert(hFile != NIL_RTFILE);
367 AssertPtr(pvBuf);
368 Assert(off >= 0);
369 Assert(cbTransfer > 0);
370
371 /*
372 * Setup the control block and clear the finished flag.
373 */
374 pReqInt->AioCB.u16IoOpCode = uTransferDirection;
375 pReqInt->AioCB.File = (uint32_t)hFile;
376 pReqInt->AioCB.off = off;
377 pReqInt->AioCB.cbTransfer = cbTransfer;
378 pReqInt->AioCB.pvBuf = pvBuf;
379 pReqInt->AioCB.pvUser = pvUser;
380
381 pReqInt->pCtxInt = NULL;
382 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
383
384 return VINF_SUCCESS;
385}
386
387
388RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
389 void *pvBuf, size_t cbRead, void *pvUser)
390{
391 return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_READ,
392 off, pvBuf, cbRead, pvUser);
393}
394
395
396RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
397 void const *pvBuf, size_t cbWrite, void *pvUser)
398{
399 return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_WRITE,
400 off, (void *)pvBuf, cbWrite, pvUser);
401}
402
403
404RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
405{
406 PRTFILEAIOREQINTERNAL pReqInt = hReq;
407 RTFILEAIOREQ_VALID_RETURN(pReqInt);
408 AssertReturn(hFile != NIL_RTFILE, VERR_INVALID_HANDLE);
409 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
410
411 /** @todo: Flushing is not neccessary on Linux because O_DIRECT is mandatory
412 * which disables caching.
413 * We could setup a fake request which isn't really executed
414 * to avoid platform dependent code in the caller.
415 */
416#if 0
417 return rtFileAsyncPrepareTransfer(pRequest, File, TRANSFERDIRECTION_FLUSH,
418 0, NULL, 0, pvUser);
419#endif
420 return VERR_NOT_IMPLEMENTED;
421}
422
423
424RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
425{
426 PRTFILEAIOREQINTERNAL pReqInt = hReq;
427 RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
428
429 return pReqInt->AioCB.pvUser;
430}
431
432
433RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
434{
435 PRTFILEAIOREQINTERNAL pReqInt = hReq;
436 RTFILEAIOREQ_VALID_RETURN(pReqInt);
437 RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
438
439 LNXKAIOIOEVENT AioEvent;
440 int rc = rtFileAsyncIoLinuxCancel(pReqInt->AioContext, &pReqInt->AioCB, &AioEvent);
441 if (RT_SUCCESS(rc))
442 {
443 /*
444 * Decrement request count because the request will never arrive at the
445 * completion port.
446 */
447 AssertMsg(VALID_PTR(pReqInt->pCtxInt),
448 ("Invalid state. Request was canceled but wasn't submitted\n"));
449
450 ASMAtomicDecS32(&pReqInt->pCtxInt->cRequests);
451 pReqInt->Rc = VERR_FILE_AIO_CANCELED;
452 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
453 return VINF_SUCCESS;
454 }
455 if (rc == VERR_TRY_AGAIN)
456 return VERR_FILE_AIO_IN_PROGRESS;
457 return rc;
458}
459
460
461RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
462{
463 PRTFILEAIOREQINTERNAL pReqInt = hReq;
464 RTFILEAIOREQ_VALID_RETURN(pReqInt);
465 AssertPtrNull(pcbTransfered);
466 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
467 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
468
469 if ( pcbTransfered
470 && RT_SUCCESS(pReqInt->Rc))
471 *pcbTransfered = pReqInt->cbTransfered;
472
473 return pReqInt->Rc;
474}
475
476
477RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax)
478{
479 PRTFILEAIOCTXINTERNAL pCtxInt;
480 AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
481
482 /* The kernel interface needs a maximum. */
483 if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
484 return VERR_OUT_OF_RANGE;
485
486 pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOCTXINTERNAL));
487 if (RT_UNLIKELY(!pCtxInt))
488 return VERR_NO_MEMORY;
489
490 /* Init the event handle. */
491 int rc = rtFileAsyncIoLinuxCreate(cAioReqsMax, &pCtxInt->AioContext);
492 if (RT_SUCCESS(rc))
493 {
494 pCtxInt->fWokenUp = false;
495 pCtxInt->fWaiting = false;
496 pCtxInt->hThreadWait = NIL_RTTHREAD;
497 pCtxInt->cRequestsMax = cAioReqsMax;
498 pCtxInt->u32Magic = RTFILEAIOCTX_MAGIC;
499 *phAioCtx = (RTFILEAIOCTX)pCtxInt;
500 }
501 else
502 RTMemFree(pCtxInt);
503
504 return rc;
505}
506
507
508RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
509{
510 /* Validate the handle and ignore nil. */
511 if (hAioCtx == NIL_RTFILEAIOCTX)
512 return VINF_SUCCESS;
513 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
514 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
515
516 /* Cannot destroy a busy context. */
517 if (RT_UNLIKELY(pCtxInt->cRequests))
518 return VERR_FILE_AIO_BUSY;
519
520 /* The native bit first, then mark it as dead and free it. */
521 int rc = rtFileAsyncIoLinuxDestroy(pCtxInt->AioContext);
522 if (RT_FAILURE(rc))
523 return rc;
524 ASMAtomicUoWriteU32(&pCtxInt->u32Magic, RTFILEAIOCTX_MAGIC_DEAD);
525 RTMemFree(pCtxInt);
526
527 return VINF_SUCCESS;
528}
529
530
531RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
532{
533 /* Nil means global here. */
534 if (hAioCtx == NIL_RTFILEAIOCTX)
535 return RTFILEAIO_UNLIMITED_REQS; /** @todo r=bird: I'm a bit puzzled by this return value since it
536 * is completely useless in RTFileAioCtxCreate. */
537
538 /* Return 0 if the handle is invalid, it's better than garbage I think... */
539 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
540 RTFILEAIOCTX_VALID_RETURN_RC(pCtxInt, 0);
541
542 return pCtxInt->cRequestsMax;
543}
544
545RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
546{
547 /* Nothing to do. */
548 return VINF_SUCCESS;
549}
550
551RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
552{
553 int rc = VINF_SUCCESS;
554
555 /*
556 * Parameter validation.
557 */
558 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
559 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
560 AssertReturn(cReqs > 0, VERR_INVALID_PARAMETER);
561 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
562 uint32_t i = cReqs;
563 PRTFILEAIOREQINTERNAL pReqInt = NULL;
564
565 /*
566 * Vaildate requests and associate with the context.
567 */
568 while (i-- > 0)
569 {
570 pReqInt = pahReqs[i];
571 if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
572 {
573 /* Undo everything and stop submitting. */
574 size_t iUndo = cReqs;
575 while (iUndo-- > i)
576 {
577 pReqInt = pahReqs[iUndo];
578 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
579 pReqInt->pCtxInt = NULL;
580 }
581 return VERR_INVALID_HANDLE;
582 }
583
584 pReqInt->AioContext = pCtxInt->AioContext;
585 pReqInt->pCtxInt = pCtxInt;
586 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
587 }
588
589 do
590 {
591 /*
592 * We cast pahReqs to the Linux iocb structure to avoid copying the requests
593 * into a temporary array. This is possible because the iocb structure is
594 * the first element in the request structure (see PRTFILEAIOCTXINTERNAL).
595 */
596 int cReqsSubmitted = 0;
597 rc = rtFileAsyncIoLinuxSubmit(pCtxInt->AioContext, cReqs,
598 (PLNXKAIOIOCB *)pahReqs,
599 &cReqsSubmitted);
600 if (RT_FAILURE(rc))
601 {
602 /*
603 * We encountered an error.
604 * This means that the first IoCB
605 * is not correctly initialized
606 * (invalid buffer alignment or bad file descriptor).
607 * Revert every request into the prepared state except
608 * the first one which will switch to completed.
609 * Another reason could be insuffidient ressources.
610 */
611 i = cReqs;
612 while (i-- > 0)
613 {
614 /* Already validated. */
615 pReqInt = pahReqs[i];
616 pReqInt->pCtxInt = NULL;
617 pReqInt->AioContext = 0;
618 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
619 }
620
621 if (rc == VERR_TRY_AGAIN)
622 return VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
623 else
624 {
625 /* The first request failed. */
626 pReqInt = pahReqs[0];
627 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
628 pReqInt->Rc = rc;
629 pReqInt->cbTransfered = 0;
630 return rc;
631 }
632 }
633
634 /* Advance. */
635 cReqs -= cReqsSubmitted;
636 pahReqs += cReqsSubmitted;
637 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
638
639 } while (cReqs);
640
641 return rc;
642}
643
644
645RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, RTMSINTERVAL cMillies,
646 PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
647{
648 /*
649 * Validate the parameters, making sure to always set pcReqs.
650 */
651 AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
652 *pcReqs = 0; /* always set */
653 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
654 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
655 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
656 AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
657 AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
658
659 /*
660 * Can't wait if there are not requests around.
661 */
662 if (RT_UNLIKELY(ASMAtomicUoReadS32(&pCtxInt->cRequests) == 0))
663 return VERR_FILE_AIO_NO_REQUEST;
664
665 /*
666 * Convert the timeout if specified.
667 */
668 struct timespec *pTimeout = NULL;
669 struct timespec Timeout = {0,0};
670 uint64_t StartNanoTS = 0;
671 if (cMillies != RT_INDEFINITE_WAIT)
672 {
673 Timeout.tv_sec = cMillies / 1000;
674 Timeout.tv_nsec = cMillies % 1000 * 1000000;
675 pTimeout = &Timeout;
676 StartNanoTS = RTTimeNanoTS();
677 }
678
679 /* Wait for at least one. */
680 if (!cMinReqs)
681 cMinReqs = 1;
682
683 /* For the wakeup call. */
684 Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
685 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
686
687 /*
688 * Loop until we're woken up, hit an error (incl timeout), or
689 * have collected the desired number of requests.
690 */
691 int rc = VINF_SUCCESS;
692 int cRequestsCompleted = 0;
693 while (!pCtxInt->fWokenUp)
694 {
695 LNXKAIOIOEVENT aPortEvents[AIO_MAXIMUM_REQUESTS_PER_CONTEXT];
696 int cRequestsToWait = RT_MIN(cReqs, AIO_MAXIMUM_REQUESTS_PER_CONTEXT);
697 ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
698 rc = rtFileAsyncIoLinuxGetEvents(pCtxInt->AioContext, cMinReqs, cRequestsToWait, &aPortEvents[0], pTimeout);
699 ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
700 if (RT_FAILURE(rc))
701 break;
702 uint32_t const cDone = rc;
703 rc = VINF_SUCCESS;
704
705 /*
706 * Process received events / requests.
707 */
708 for (uint32_t i = 0; i < cDone; i++)
709 {
710 /*
711 * The iocb is the first element in our request structure.
712 * So we can safely cast it directly to the handle (see above)
713 */
714 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)aPortEvents[i].pIoCB;
715 AssertPtr(pReqInt);
716 Assert(pReqInt->u32Magic == RTFILEAIOREQ_MAGIC);
717
718 /** @todo aeichner: The rc field contains the result code
719 * like you can find in errno for the normal read/write ops.
720 * But there is a second field called rc2. I don't know the
721 * purpose for it yet.
722 */
723 if (RT_UNLIKELY(aPortEvents[i].rc < 0))
724 pReqInt->Rc = RTErrConvertFromErrno(-aPortEvents[i].rc); /* Convert to positive value. */
725 else
726 {
727 pReqInt->Rc = VINF_SUCCESS;
728 pReqInt->cbTransfered = aPortEvents[i].rc;
729 }
730
731 /* Mark the request as finished. */
732 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
733
734 pahReqs[cRequestsCompleted++] = (RTFILEAIOREQ)pReqInt;
735 }
736
737 /*
738 * Done Yet? If not advance and try again.
739 */
740 if (cDone >= cMinReqs)
741 break;
742 cMinReqs -= cDone;
743 cReqs -= cDone;
744
745 if (cMillies != RT_INDEFINITE_WAIT)
746 {
747 /* The API doesn't return ETIMEDOUT, so we have to fix that ourselves. */
748 uint64_t NanoTS = RTTimeNanoTS();
749 uint64_t cMilliesElapsed = (NanoTS - StartNanoTS) / 1000000;
750 if (cMilliesElapsed >= cMillies)
751 {
752 rc = VERR_TIMEOUT;
753 break;
754 }
755
756 /* The syscall supposedly updates it, but we're paranoid. :-) */
757 Timeout.tv_sec = (cMillies - (RTMSINTERVAL)cMilliesElapsed) / 1000;
758 Timeout.tv_nsec = (cMillies - (RTMSINTERVAL)cMilliesElapsed) % 1000 * 1000000;
759 }
760 }
761
762 /*
763 * Update the context state and set the return value.
764 */
765 *pcReqs = cRequestsCompleted;
766 ASMAtomicSubS32(&pCtxInt->cRequests, cRequestsCompleted);
767 Assert(pCtxInt->hThreadWait == RTThreadSelf());
768 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
769
770 /*
771 * Clear the wakeup flag and set rc.
772 */
773 if ( pCtxInt->fWokenUp
774 && RT_SUCCESS(rc))
775 {
776 ASMAtomicXchgBool(&pCtxInt->fWokenUp, false);
777 rc = VERR_INTERRUPTED;
778 }
779
780 return rc;
781}
782
783
784RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
785{
786 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
787 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
788
789 /** @todo r=bird: Define the protocol for how to resume work after calling
790 * this function. */
791
792 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
793
794 /*
795 * Read the thread handle before the status flag.
796 * If we read the handle after the flag we might
797 * end up with an invalid handle because the thread
798 * waiting in RTFileAioCtxWakeup() might get scheduled
799 * before we read the flag and returns.
800 * We can ensure that the handle is valid if fWaiting is true
801 * when reading the handle before the status flag.
802 */
803 RTTHREAD hThread;
804 ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
805 bool fWaiting = ASMAtomicReadBool(&pCtxInt->fWaiting);
806 if ( !fWokenUp
807 && fWaiting)
808 {
809 /*
810 * If a thread waits the handle must be valid.
811 * It is possible that the thread returns from
812 * rtFileAsyncIoLinuxGetEvents() before the signal
813 * is send.
814 * This is no problem because we already set fWokenUp
815 * to true which will let the thread return VERR_INTERRUPTED
816 * and the next call to RTFileAioCtxWait() will not
817 * return VERR_INTERRUPTED because signals are not saved
818 * and will simply vanish if the destination thread can't
819 * receive it.
820 */
821 Assert(hThread != NIL_RTTHREAD);
822 RTThreadPoke(hThread);
823 }
824
825 return VINF_SUCCESS;
826}
827
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette