VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/linux/fileaio-linux.cpp@ 21505

Last change on this file since 21505 was 21505, checked in by vboxsync, 15 years ago

Fix Linux AIO by removing the needed header and defining needed types in the file. We didn't used that much from the header anyway and Linus sayed once that interfaces from kernel to user space wont change in an incompatible way so I think we are safe.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 26.0 KB
Line 
1/* $Id: fileaio-linux.cpp 21505 2009-07-10 21:54:21Z vboxsync $ */
2/** @file
3 * IPRT - File async I/O, native implementation for the Linux host platform.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31/** @page pg_rtfileaio_linux RTFile Async I/O - Linux Implementation Notes
32 * @internal
33 *
34 * Linux implements the kernel async I/O API through the io_* syscalls. They are
35 * not exposed in the glibc (the aio_* API uses userspace threads and blocking
36 * I/O operations to simulate async behavior). There is an external library
37 * called libaio which implements these syscalls but because we don't want to
38 * have another dependency and this library is not installed by default and the
39 * interface is really simple we use the kernel interface directly using wrapper
40 * functions.
41 *
42 * The interface has some limitations. The first one is that the file must be
43 * opened with O_DIRECT. This disables caching done by the kernel which can be
44 * compensated if the user of this API implements caching itself. The next
45 * limitation is that data buffers must be aligned at a 512 byte boundary or the
46 * request will fail.
47 */
48/** @todo r=bird: What's this about "must be opened with O_DIRECT"? An
49 * explanation would be nice, esp. seeing what Linus is quoted saying
50 * about it in the open man page... */
51
52/*******************************************************************************
53* Header Files *
54*******************************************************************************/
55#define LOG_GROUP RTLOGGROUP_FILE
56#include <iprt/asm.h>
57#include <iprt/mem.h>
58#include <iprt/assert.h>
59#include <iprt/string.h>
60#include <iprt/err.h>
61#include <iprt/log.h>
62#include <iprt/thread.h>
63#include "internal/fileaio.h"
64
65#include <unistd.h>
66#include <sys/syscall.h>
67#include <errno.h>
68
69#include <iprt/file.h>
70
71
72/*******************************************************************************
73* Structures and Typedefs *
74*******************************************************************************/
75/** The async I/O context handle */
76typedef unsigned long LNXKAIOCONTEXT;
77
78/**
79 * Supported commands for the iocbs
80 */
81enum
82{
83 LNXKAIO_IOCB_CMD_READ = 0,
84 LNXKAIO_IOCB_CMD_WRITE
85};
86
87/**
88 * The iocb structure of a request which is passed to the kernel.
89 *
90 * We redefined this here because the version in the header lacks padding
91 * for 32bit.
92 */
93typedef struct LNXKAIOIOCB
94{
95 /** Opaque pointer to data which is returned on an I/O event. */
96 void *pvUser;
97#ifdef RT_ARCH_X86
98 uint32_t u32Padding0;
99#endif
100 /** Contains the request number and is set by the kernel. */
101 uint32_t u32Key;
102 /** Reserved. */
103 uint32_t u32Reserved0;
104 /** The I/O opcode. */
105 uint16_t u16IoOpCode;
106 /** Request priority. */
107 int16_t i16Priority;
108 /** The file descriptor. */
109 uint32_t File;
110 /** The userspace pointer to the buffer containing/receiving the data. */
111 void *pvBuf;
112#ifdef RT_ARCH_X86
113 uint32_t u32Padding1;
114#endif
115 /** How many bytes to transfer. */
116#ifdef RT_ARCH_X86
117 uint32_t cbTransfer;
118 uint32_t u32Padding2;
119#elif defined(RT_ARCH_AMD64)
120 uint64_t cbTransfer;
121#else
122# error "Unknown architecture"
123#endif
124 /** At which offset to start the transfer. */
125 int64_t off;
126 /** Reserved. */
127 uint64_t u64Reserved1;
128 /** Flags */
129 uint32_t fFlags;
130 /** Readyness signal file descriptor. */
131 uint32_t u32ResFd;
132} LNXKAIOIOCB, *PLNXKAIOIOCB;
133
134/**
135 * I/O event structure to notify about completed requests.
136 * Redefined here too because of the padding.
137 */
138typedef struct LNXKAIOIOEVENT
139{
140 /** The pvUser field from the iocb. */
141 void *pvUser;
142#ifdef RT_ARCH_X86
143 uint32_t u32Padding0;
144#endif
145 /** The LNXKAIOIOCB object this event is for. */
146 PLNXKAIOIOCB *pIoCB;
147#ifdef RT_ARCH_X86
148 uint32_t u32Padding1;
149#endif
150 /** The result code of the operation .*/
151#ifdef RT_ARCH_X86
152 int32_t rc;
153 uint32_t u32Padding2;
154#elif defined(RT_ARCH_AMD64)
155 int64_t rc;
156#else
157# error "Unknown architecture"
158#endif
159 /** Secondary result code. */
160#ifdef RT_ARCH_X86
161 int32_t rc2;
162 uint32_t u32Padding3;
163#elif defined(RT_ARCH_AMD64)
164 int64_t rc2;
165#else
166# error "Unknown architecture"
167#endif
168} LNXKAIOIOEVENT, *PLNXKAIOIOEVENT;
169
170
171/**
172 * Async I/O completion context state.
173 */
174typedef struct RTFILEAIOCTXINTERNAL
175{
176 /** Handle to the async I/O context. */
177 LNXKAIOCONTEXT AioContext;
178 /** Maximum number of requests this context can handle. */
179 int cRequestsMax;
180 /** Current number of requests active on this context. */
181 volatile int32_t cRequests;
182 /** The ID of the thread which is currently waiting for requests. */
183 volatile RTTHREAD hThreadWait;
184 /** Flag whether the thread was woken up. */
185 volatile bool fWokenUp;
186 /** Flag whether the thread is currently waiting in the syscall. */
187 volatile bool fWaiting;
188 /** Magic value (RTFILEAIOCTX_MAGIC). */
189 uint32_t u32Magic;
190} RTFILEAIOCTXINTERNAL;
191/** Pointer to an internal context structure. */
192typedef RTFILEAIOCTXINTERNAL *PRTFILEAIOCTXINTERNAL;
193
194/**
195 * Async I/O request state.
196 */
197typedef struct RTFILEAIOREQINTERNAL
198{
199 /** The aio control block. This must be the FIRST elment in
200 * the structure! (see notes below) */
201 LNXKAIOIOCB AioCB;
202 /** Current state the request is in. */
203 RTFILEAIOREQSTATE enmState;
204 /** The I/O context this request is associated with. */
205 LNXKAIOCONTEXT AioContext;
206 /** Return code the request completed with. */
207 int Rc;
208 /** Number of bytes actually trasnfered. */
209 size_t cbTransfered;
210 /** Completion context we are assigned to. */
211 PRTFILEAIOCTXINTERNAL pCtxInt;
212 /** Magic value (RTFILEAIOREQ_MAGIC). */
213 uint32_t u32Magic;
214} RTFILEAIOREQINTERNAL;
215/** Pointer to an internal request structure. */
216typedef RTFILEAIOREQINTERNAL *PRTFILEAIOREQINTERNAL;
217
218
219/*******************************************************************************
220* Defined Constants And Macros *
221*******************************************************************************/
222/** The max number of events to get in one call. */
223#define AIO_MAXIMUM_REQUESTS_PER_CONTEXT 64
224
225
226/**
227 * Creates a new async I/O context.
228 */
229DECLINLINE(int) rtFileAsyncIoLinuxCreate(unsigned cEvents, LNXKAIOCONTEXT *pAioContext)
230{
231 int rc = syscall(__NR_io_setup, cEvents, pAioContext);
232 if (RT_UNLIKELY(rc == -1))
233 return RTErrConvertFromErrno(errno);
234
235 return VINF_SUCCESS;
236}
237
238/**
239 * Destroys a async I/O context.
240 */
241DECLINLINE(int) rtFileAsyncIoLinuxDestroy(LNXKAIOCONTEXT AioContext)
242{
243 int rc = syscall(__NR_io_destroy, AioContext);
244 if (RT_UNLIKELY(rc == -1))
245 return RTErrConvertFromErrno(errno);
246
247 return VINF_SUCCESS;
248}
249
250/**
251 * Submits an array of I/O requests to the kernel.
252 */
253DECLINLINE(int) rtFileAsyncIoLinuxSubmit(LNXKAIOCONTEXT AioContext, long cReqs, LNXKAIOIOCB **ppIoCB, int *pcSubmitted)
254{
255 int rc = syscall(__NR_io_submit, AioContext, cReqs, ppIoCB);
256 if (RT_UNLIKELY(rc == -1))
257 return RTErrConvertFromErrno(errno);
258
259 *pcSubmitted = rc;
260
261 return VINF_SUCCESS;
262}
263
264/**
265 * Cancels a I/O request.
266 */
267DECLINLINE(int) rtFileAsyncIoLinuxCancel(LNXKAIOCONTEXT AioContext, PLNXKAIOIOCB pIoCB, PLNXKAIOIOEVENT pIoResult)
268{
269 int rc = syscall(__NR_io_cancel, AioContext, pIoCB, pIoResult);
270 if (RT_UNLIKELY(rc == -1))
271 return RTErrConvertFromErrno(errno);
272
273 return VINF_SUCCESS;
274}
275
276/**
277 * Waits for I/O events.
278 * @returns Number of events (natural number w/ 0), IPRT error code (negative).
279 */
280DECLINLINE(int) rtFileAsyncIoLinuxGetEvents(LNXKAIOCONTEXT AioContext, long cReqsMin, long cReqs,
281 PLNXKAIOIOEVENT paIoResults, struct timespec *pTimeout)
282{
283 int rc = syscall(__NR_io_getevents, AioContext, cReqsMin, cReqs, paIoResults, pTimeout);
284 if (RT_UNLIKELY(rc == -1))
285 return RTErrConvertFromErrno(errno);
286
287 return rc;
288}
289
290RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
291{
292 int rc = VINF_SUCCESS;
293 AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
294
295 /*
296 * Check if the API is implemented by creating a
297 * completion port.
298 */
299 LNXKAIOCONTEXT AioContext = 0;
300 rc = rtFileAsyncIoLinuxCreate(1, &AioContext);
301 if (RT_FAILURE(rc))
302 return rc;
303
304 rc = rtFileAsyncIoLinuxDestroy(AioContext);
305 if (RT_FAILURE(rc))
306 return rc;
307
308 /* Supported - fill in the limits. The alignment is the only restriction. */
309 pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
310 pAioLimits->cbBufferAlignment = 512;
311
312 return VINF_SUCCESS;
313}
314
315
316RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
317{
318 AssertPtrReturn(phReq, VERR_INVALID_POINTER);
319
320 /*
321 * Allocate a new request and initialize it.
322 */
323 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(*pReqInt));
324 if (RT_UNLIKELY(!pReqInt))
325 return VERR_NO_MEMORY;
326
327 pReqInt->pCtxInt = NULL;
328 pReqInt->u32Magic = RTFILEAIOREQ_MAGIC;
329 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
330
331 *phReq = (RTFILEAIOREQ)pReqInt;
332 return VINF_SUCCESS;
333}
334
335
336RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
337{
338 /*
339 * Validate the handle and ignore nil.
340 */
341 if (hReq == NIL_RTFILEAIOREQ)
342 return VINF_SUCCESS;
343 PRTFILEAIOREQINTERNAL pReqInt = hReq;
344 RTFILEAIOREQ_VALID_RETURN(pReqInt);
345 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
346
347 /*
348 * Trash the magic and free it.
349 */
350 ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
351 RTMemFree(pReqInt);
352 return VINF_SUCCESS;
353}
354
355
356/**
357 * Worker setting up the request.
358 */
359DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
360 uint16_t uTransferDirection,
361 RTFOFF off, void *pvBuf, size_t cbTransfer,
362 void *pvUser)
363{
364 /*
365 * Validate the input.
366 */
367 PRTFILEAIOREQINTERNAL pReqInt = hReq;
368 RTFILEAIOREQ_VALID_RETURN(pReqInt);
369 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
370 Assert(hFile != NIL_RTFILE);
371 AssertPtr(pvBuf);
372 Assert(off >= 0);
373 Assert(cbTransfer > 0);
374
375 /*
376 * Setup the control block and clear the finished flag.
377 */
378 pReqInt->AioCB.u16IoOpCode = uTransferDirection;
379 pReqInt->AioCB.File = (uint32_t)hFile;
380 pReqInt->AioCB.off = off;
381 pReqInt->AioCB.cbTransfer = cbTransfer;
382 pReqInt->AioCB.pvBuf = pvBuf;
383 pReqInt->AioCB.pvUser = pvUser;
384
385 pReqInt->pCtxInt = NULL;
386 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
387
388 return VINF_SUCCESS;
389}
390
391
392RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
393 void *pvBuf, size_t cbRead, void *pvUser)
394{
395 return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_READ,
396 off, pvBuf, cbRead, pvUser);
397}
398
399
400RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
401 void *pvBuf, size_t cbWrite, void *pvUser)
402{
403 return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_WRITE,
404 off, pvBuf, cbWrite, pvUser);
405}
406
407
408RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
409{
410 PRTFILEAIOREQINTERNAL pReqInt = hReq;
411 RTFILEAIOREQ_VALID_RETURN(pReqInt);
412 AssertReturn(hFile != NIL_RTFILE, VERR_INVALID_HANDLE);
413 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
414
415 /** @todo: Flushing is not neccessary on Linux because O_DIRECT is mandatory
416 * which disables caching.
417 * We could setup a fake request which isn't really executed
418 * to avoid platform dependent code in the caller.
419 */
420#if 0
421 return rtFileAsyncPrepareTransfer(pRequest, File, TRANSFERDIRECTION_FLUSH,
422 0, NULL, 0, pvUser);
423#endif
424 return VERR_NOT_IMPLEMENTED;
425}
426
427
428RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
429{
430 PRTFILEAIOREQINTERNAL pReqInt = hReq;
431 RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
432
433 return pReqInt->AioCB.pvUser;
434}
435
436
437RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
438{
439 PRTFILEAIOREQINTERNAL pReqInt = hReq;
440 RTFILEAIOREQ_VALID_RETURN(pReqInt);
441 RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
442
443 LNXKAIOIOEVENT AioEvent;
444 int rc = rtFileAsyncIoLinuxCancel(pReqInt->AioContext, &pReqInt->AioCB, &AioEvent);
445 if (RT_SUCCESS(rc))
446 {
447 /*
448 * Decrement request count because the request will never arrive at the
449 * completion port.
450 */
451 AssertMsg(VALID_PTR(pReqInt->pCtxInt),
452 ("Invalid state. Request was canceled but wasn't submitted\n"));
453
454 ASMAtomicDecS32(&pReqInt->pCtxInt->cRequests);
455 pReqInt->Rc = VERR_FILE_AIO_CANCELED;
456 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
457 return VINF_SUCCESS;
458 }
459 if (rc == VERR_TRY_AGAIN)
460 return VERR_FILE_AIO_IN_PROGRESS;
461 return rc;
462}
463
464
465RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
466{
467 PRTFILEAIOREQINTERNAL pReqInt = hReq;
468 RTFILEAIOREQ_VALID_RETURN(pReqInt);
469 AssertPtrNull(pcbTransfered);
470 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
471 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
472
473 if ( pcbTransfered
474 && RT_SUCCESS(pReqInt->Rc))
475 *pcbTransfered = pReqInt->cbTransfered;
476
477 return pReqInt->Rc;
478}
479
480
481RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax)
482{
483 PRTFILEAIOCTXINTERNAL pCtxInt;
484 AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
485
486 /* The kernel interface needs a maximum. */
487 if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
488 return VERR_OUT_OF_RANGE;
489
490 pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOCTXINTERNAL));
491 if (RT_UNLIKELY(!pCtxInt))
492 return VERR_NO_MEMORY;
493
494 /* Init the event handle. */
495 int rc = rtFileAsyncIoLinuxCreate(cAioReqsMax, &pCtxInt->AioContext);
496 if (RT_SUCCESS(rc))
497 {
498 pCtxInt->fWokenUp = false;
499 pCtxInt->fWaiting = false;
500 pCtxInt->hThreadWait = NIL_RTTHREAD;
501 pCtxInt->cRequestsMax = cAioReqsMax;
502 pCtxInt->u32Magic = RTFILEAIOCTX_MAGIC;
503 *phAioCtx = (RTFILEAIOCTX)pCtxInt;
504 }
505 else
506 RTMemFree(pCtxInt);
507
508 return rc;
509}
510
511
512RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
513{
514 /* Validate the handle and ignore nil. */
515 if (hAioCtx == NIL_RTFILEAIOCTX)
516 return VINF_SUCCESS;
517 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
518 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
519
520 /* Cannot destroy a busy context. */
521 if (RT_UNLIKELY(pCtxInt->cRequests))
522 return VERR_FILE_AIO_BUSY;
523
524 /* The native bit first, then mark it as dead and free it. */
525 int rc = rtFileAsyncIoLinuxDestroy(pCtxInt->AioContext);
526 if (RT_FAILURE(rc))
527 return rc;
528 ASMAtomicUoWriteU32(&pCtxInt->u32Magic, RTFILEAIOCTX_MAGIC_DEAD);
529 RTMemFree(pCtxInt);
530
531 return VINF_SUCCESS;
532}
533
534
535RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
536{
537 /* Nil means global here. */
538 if (hAioCtx == NIL_RTFILEAIOCTX)
539 return RTFILEAIO_UNLIMITED_REQS; /** @todo r=bird: I'm a bit puzzled by this return value since it
540 * is completely useless in RTFileAioCtxCreate. */
541
542 /* Return 0 if the handle is invalid, it's better than garbage I think... */
543 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
544 RTFILEAIOCTX_VALID_RETURN_RC(pCtxInt, 0);
545
546 return pCtxInt->cRequestsMax;
547}
548
549RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
550{
551 /* Nothing to do. */
552 return VINF_SUCCESS;
553}
554
555RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
556{
557 int rc = VINF_SUCCESS;
558
559 /*
560 * Parameter validation.
561 */
562 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
563 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
564 AssertReturn(cReqs > 0, VERR_INVALID_PARAMETER);
565 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
566 uint32_t i = cReqs;
567 PRTFILEAIOREQINTERNAL pReqInt = NULL;
568
569 /*
570 * Vaildate requests and associate with the context.
571 */
572 while (i-- > 0)
573 {
574 pReqInt = pahReqs[i];
575 if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
576 {
577 /* Undo everything and stop submitting. */
578 size_t iUndo = cReqs;
579 while (iUndo-- > i)
580 {
581 pReqInt = pahReqs[iUndo];
582 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
583 pReqInt->pCtxInt = NULL;
584 }
585 return VERR_INVALID_HANDLE;
586 }
587
588 pReqInt->AioContext = pCtxInt->AioContext;
589 pReqInt->pCtxInt = pCtxInt;
590 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
591 }
592
593 do
594 {
595 /*
596 * We cast pahReqs to the Linux iocb structure to avoid copying the requests
597 * into a temporary array. This is possible because the iocb structure is
598 * the first element in the request structure (see PRTFILEAIOCTXINTERNAL).
599 */
600 int cReqsSubmitted = 0;
601 rc = rtFileAsyncIoLinuxSubmit(pCtxInt->AioContext, cReqs,
602 (PLNXKAIOIOCB *)pahReqs,
603 &cReqsSubmitted);
604 if (RT_FAILURE(rc))
605 {
606 /*
607 * We encountered an error.
608 * This means that the first IoCB
609 * is not correctly initialized
610 * (invalid buffer alignment or bad file descriptor).
611 * Revert every request into the prepared state except
612 * the first one which will switch to completed.
613 * Another reason could be insuffidient ressources.
614 */
615 i = cReqs;
616 while (i-- > 0)
617 {
618 /* Already validated. */
619 pReqInt = pahReqs[i];
620 pReqInt->pCtxInt = NULL;
621 pReqInt->AioContext = 0;
622 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
623 }
624
625 if (rc == VERR_TRY_AGAIN)
626 return VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
627 else
628 {
629 /* The first request failed. */
630 pReqInt = pahReqs[0];
631 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
632 pReqInt->Rc = rc;
633 pReqInt->cbTransfered = 0;
634 return rc;
635 }
636 }
637
638 /* Advance. */
639 cReqs -= cReqsSubmitted;
640 pahReqs += cReqsSubmitted;
641 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
642
643 } while (cReqs);
644
645 return rc;
646}
647
648
649RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, unsigned cMillisTimeout,
650 PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
651{
652 /*
653 * Validate the parameters, making sure to always set pcReqs.
654 */
655 AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
656 *pcReqs = 0; /* always set */
657 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
658 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
659 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
660 AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
661 AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
662
663 /*
664 * Can't wait if there are not requests around.
665 */
666 if (RT_UNLIKELY(ASMAtomicUoReadS32(&pCtxInt->cRequests) == 0))
667 return VERR_FILE_AIO_NO_REQUEST;
668
669 /*
670 * Convert the timeout if specified.
671 */
672 struct timespec *pTimeout = NULL;
673 struct timespec Timeout = {0,0};
674 uint64_t StartNanoTS = 0;
675 if (cMillisTimeout != RT_INDEFINITE_WAIT)
676 {
677 Timeout.tv_sec = cMillisTimeout / 1000;
678 Timeout.tv_nsec = cMillisTimeout % 1000 * 1000000;
679 pTimeout = &Timeout;
680 StartNanoTS = RTTimeNanoTS();
681 }
682
683 /* Wait for at least one. */
684 if (!cMinReqs)
685 cMinReqs = 1;
686
687 /* For the wakeup call. */
688 Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
689 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
690
691 /*
692 * Loop until we're woken up, hit an error (incl timeout), or
693 * have collected the desired number of requests.
694 */
695 int rc = VINF_SUCCESS;
696 int cRequestsCompleted = 0;
697 while (!pCtxInt->fWokenUp)
698 {
699 LNXKAIOIOEVENT aPortEvents[AIO_MAXIMUM_REQUESTS_PER_CONTEXT];
700 int cRequestsToWait = RT_MIN(cReqs, AIO_MAXIMUM_REQUESTS_PER_CONTEXT);
701 ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
702 rc = rtFileAsyncIoLinuxGetEvents(pCtxInt->AioContext, cMinReqs, cRequestsToWait, &aPortEvents[0], pTimeout);
703 ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
704 if (RT_FAILURE(rc))
705 break;
706 uint32_t const cDone = rc;
707 rc = VINF_SUCCESS;
708
709 /*
710 * Process received events / requests.
711 */
712 for (uint32_t i = 0; i < cDone; i++)
713 {
714 /*
715 * The iocb is the first element in our request structure.
716 * So we can safely cast it directly to the handle (see above)
717 */
718 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)aPortEvents[i].pIoCB;
719 AssertPtr(pReqInt);
720 Assert(pReqInt->u32Magic == RTFILEAIOREQ_MAGIC);
721
722 /** @todo aeichner: The rc field contains the result code
723 * like you can find in errno for the normal read/write ops.
724 * But there is a second field called rc2. I don't know the
725 * purpose for it yet.
726 */
727 if (RT_UNLIKELY(aPortEvents[i].rc < 0))
728 pReqInt->Rc = RTErrConvertFromErrno(aPortEvents[i].rc);
729 else
730 {
731 pReqInt->Rc = VINF_SUCCESS;
732 pReqInt->cbTransfered = aPortEvents[i].rc;
733 }
734
735 /* Mark the request as finished. */
736 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
737
738 pahReqs[cRequestsCompleted++] = (RTFILEAIOREQ)pReqInt;
739 }
740
741 /*
742 * Done Yet? If not advance and try again.
743 */
744 if (cDone >= cMinReqs)
745 break;
746 cMinReqs -= cDone;
747 cReqs -= cDone;
748
749 if (cMillisTimeout != RT_INDEFINITE_WAIT)
750 {
751 /* The API doesn't return ETIMEDOUT, so we have to fix that ourselves. */
752 uint64_t NanoTS = RTTimeNanoTS();
753 uint64_t cMilliesElapsed = (NanoTS - StartNanoTS) / 1000000;
754 if (cMilliesElapsed >= cMillisTimeout)
755 {
756 rc = VERR_TIMEOUT;
757 break;
758 }
759
760 /* The syscall supposedly updates it, but we're paranoid. :-) */
761 Timeout.tv_sec = (cMillisTimeout - (unsigned)cMilliesElapsed) / 1000;
762 Timeout.tv_nsec = (cMillisTimeout - (unsigned)cMilliesElapsed) % 1000 * 1000000;
763 }
764 }
765
766 /*
767 * Update the context state and set the return value.
768 */
769 *pcReqs = cRequestsCompleted;
770 ASMAtomicSubS32(&pCtxInt->cRequests, cRequestsCompleted);
771 Assert(pCtxInt->hThreadWait == RTThreadSelf());
772 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
773
774 /*
775 * Clear the wakeup flag and set rc.
776 */
777 if ( pCtxInt->fWokenUp
778 && RT_SUCCESS(rc))
779 {
780 ASMAtomicXchgBool(&pCtxInt->fWokenUp, false);
781 rc = VERR_INTERRUPTED;
782 }
783
784 return rc;
785}
786
787
788RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
789{
790 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
791 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
792
793 /** @todo r=bird: Define the protocol for how to resume work after calling
794 * this function. */
795
796 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
797
798 /*
799 * Read the thread handle before the status flag.
800 * If we read the handle after the flag we might
801 * end up with an invalid handle because the thread
802 * waiting in RTFileAioCtxWakeup() might get scheduled
803 * before we read the flag and returns.
804 * We can ensure that the handle is valid if fWaiting is true
805 * when reading the handle before the status flag.
806 */
807 RTTHREAD hThread;
808 ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
809 bool fWaiting = ASMAtomicReadBool(&pCtxInt->fWaiting);
810 if ( !fWokenUp
811 && fWaiting)
812 {
813 /*
814 * If a thread waits the handle must be valid.
815 * It is possible that the thread returns from
816 * rtFileAsyncIoLinuxGetEvents() before the signal
817 * is send.
818 * This is no problem because we already set fWokenUp
819 * to true which will let the thread return VERR_INTERRUPTED
820 * and the next call to RTFileAioCtxWait() will not
821 * return VERR_INTERRUPTED because signals are not saved
822 * and will simply vanish if the destination thread can't
823 * receive it.
824 */
825 Assert(hThread != NIL_RTTHREAD);
826 RTThreadPoke(hThread);
827 }
828
829 return VINF_SUCCESS;
830}
831
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette