VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c@ 58598

Last change on this file since 58598 was 57358, checked in by vboxsync, 9 years ago

*: scm cleanup run.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 15.3 KB
Line 
1/* $Id: alloc-r0drv-linux.c 57358 2015-08-14 15:16:38Z vboxsync $ */
2/** @file
3 * IPRT - Memory Allocation, Ring-0 Driver, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include "the-linux-kernel.h"
32#include "internal/iprt.h"
33#include <iprt/mem.h>
34
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include "r0drv/alloc-r0drv.h"
38
39
40#if (defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)) && !defined(RTMEMALLOC_EXEC_HEAP)
41# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
42/**
43 * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate
44 * memory in the moduel range. This is preferrable to the exec heap below.
45 */
46# define RTMEMALLOC_EXEC_VM_AREA
47# else
48/**
49 * We need memory in the module range (~2GB to ~0) this can only be obtained
50 * thru APIs that are not exported (see module_alloc()).
51 *
52 * So, we'll have to create a quick and dirty heap here using BSS memory.
53 * Very annoying and it's going to restrict us!
54 */
55# define RTMEMALLOC_EXEC_HEAP
56# endif
57#endif
58
59#ifdef RTMEMALLOC_EXEC_HEAP
60# include <iprt/heap.h>
61# include <iprt/spinlock.h>
62# include <iprt/err.h>
63#endif
64
65
66/*********************************************************************************************************************************
67* Structures and Typedefs *
68*********************************************************************************************************************************/
69#ifdef RTMEMALLOC_EXEC_VM_AREA
70/**
71 * Extended header used for headers marked with RTMEMHDR_FLAG_EXEC_VM_AREA.
72 *
73 * This is used with allocating executable memory, for things like generated
74 * code and loaded modules.
75 */
76typedef struct RTMEMLNXHDREX
77{
78 /** The VM area for this allocation. */
79 struct vm_struct *pVmArea;
80 void *pvDummy;
81 /** The header we present to the generic API. */
82 RTMEMHDR Hdr;
83} RTMEMLNXHDREX;
84AssertCompileSize(RTMEMLNXHDREX, 32);
85/** Pointer to an extended memory header. */
86typedef RTMEMLNXHDREX *PRTMEMLNXHDREX;
87#endif
88
89
90/*********************************************************************************************************************************
91* Global Variables *
92*********************************************************************************************************************************/
93#ifdef RTMEMALLOC_EXEC_HEAP
94/** The heap. */
95static RTHEAPSIMPLE g_HeapExec = NIL_RTHEAPSIMPLE;
96/** Spinlock protecting the heap. */
97static RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK;
98#endif
99
100
101/**
102 * API for cleaning up the heap spinlock on IPRT termination.
103 * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
104 */
105DECLHIDDEN(void) rtR0MemExecCleanup(void)
106{
107#ifdef RTMEMALLOC_EXEC_HEAP
108 RTSpinlockDestroy(g_HeapExecSpinlock);
109 g_HeapExecSpinlock = NIL_RTSPINLOCK;
110#endif
111}
112
113
114/**
115 * Donate read+write+execute memory to the exec heap.
116 *
117 * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to
118 * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically
119 * allocated memory in the module if it wishes for GCC generated code to work.
120 * GCC can only generate modules that work in the address range ~2GB to ~0
121 * currently.
122 *
123 * The API only accept one single donation.
124 *
125 * @returns IPRT status code.
126 * @retval VERR_NOT_SUPPORTED if the code isn't enabled.
127 * @param pvMemory Pointer to the memory block.
128 * @param cb The size of the memory block.
129 */
130RTR0DECL(int) RTR0MemExecDonate(void *pvMemory, size_t cb)
131{
132#ifdef RTMEMALLOC_EXEC_HEAP
133 int rc;
134 AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
135
136 rc = RTSpinlockCreate(&g_HeapExecSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTR0MemExecDonate");
137 if (RT_SUCCESS(rc))
138 {
139 rc = RTHeapSimpleInit(&g_HeapExec, pvMemory, cb);
140 if (RT_FAILURE(rc))
141 rtR0MemExecCleanup();
142 }
143 return rc;
144#else
145 return VERR_NOT_SUPPORTED;
146#endif
147}
148RT_EXPORT_SYMBOL(RTR0MemExecDonate);
149
150
151
152#ifdef RTMEMALLOC_EXEC_VM_AREA
153/**
154 * Allocate executable kernel memory in the module range.
155 *
156 * @returns Pointer to a allocation header success. NULL on failure.
157 *
158 * @param cb The size the user requested.
159 */
160static PRTMEMHDR rtR0MemAllocExecVmArea(size_t cb)
161{
162 size_t const cbAlloc = RT_ALIGN_Z(sizeof(RTMEMLNXHDREX) + cb, PAGE_SIZE);
163 size_t const cPages = cbAlloc >> PAGE_SHIFT;
164 struct page **papPages;
165 struct vm_struct *pVmArea;
166 size_t iPage;
167
168 pVmArea = __get_vm_area(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END);
169 if (!pVmArea)
170 return NULL;
171 pVmArea->nr_pages = 0; /* paranoia? */
172 pVmArea->pages = NULL; /* paranoia? */
173
174 papPages = (struct page **)kmalloc(cPages * sizeof(papPages[0]), GFP_KERNEL | __GFP_NOWARN);
175 if (!papPages)
176 {
177 vunmap(pVmArea->addr);
178 return NULL;
179 }
180
181 for (iPage = 0; iPage < cPages; iPage++)
182 {
183 papPages[iPage] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN);
184 if (!papPages[iPage])
185 break;
186 }
187 if (iPage == cPages)
188 {
189 /*
190 * Map the pages.
191 *
192 * Not entirely sure we really need to set nr_pages and pages here, but
193 * they provide a very convenient place for storing something we need
194 * in the free function, if nothing else...
195 */
196# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
197 struct page **papPagesIterator = papPages;
198# endif
199 pVmArea->nr_pages = cPages;
200 pVmArea->pages = papPages;
201 if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC,
202# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
203 &papPagesIterator
204# else
205 papPages
206# endif
207 ))
208 {
209 PRTMEMLNXHDREX pHdrEx = (PRTMEMLNXHDREX)pVmArea->addr;
210 pHdrEx->pVmArea = pVmArea;
211 pHdrEx->pvDummy = NULL;
212 return &pHdrEx->Hdr;
213 }
214 /* bail out */
215# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
216 pVmArea->nr_pages = papPagesIterator - papPages;
217# endif
218 }
219
220 vunmap(pVmArea->addr);
221
222 while (iPage-- > 0)
223 __free_page(papPages[iPage]);
224 kfree(papPages);
225
226 return NULL;
227}
228#endif /* RTMEMALLOC_EXEC_VM_AREA */
229
230
231/**
232 * OS specific allocation function.
233 */
234DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
235{
236 PRTMEMHDR pHdr;
237 IPRT_LINUX_SAVE_EFL_AC();
238
239 /*
240 * Allocate.
241 */
242 if (fFlags & RTMEMHDR_FLAG_EXEC)
243 {
244 if (fFlags & RTMEMHDR_FLAG_ANY_CTX)
245 return VERR_NOT_SUPPORTED;
246
247#if defined(RT_ARCH_AMD64)
248# ifdef RTMEMALLOC_EXEC_HEAP
249 if (g_HeapExec != NIL_RTHEAPSIMPLE)
250 {
251 RTSpinlockAcquire(g_HeapExecSpinlock);
252 pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0);
253 RTSpinlockRelease(g_HeapExecSpinlock);
254 fFlags |= RTMEMHDR_FLAG_EXEC_HEAP;
255 }
256 else
257 pHdr = NULL;
258
259# elif defined(RTMEMALLOC_EXEC_VM_AREA)
260 pHdr = rtR0MemAllocExecVmArea(cb);
261 fFlags |= RTMEMHDR_FLAG_EXEC_VM_AREA;
262
263# else /* !RTMEMALLOC_EXEC_HEAP */
264# error "you don not want to go here..."
265 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
266# endif /* !RTMEMALLOC_EXEC_HEAP */
267
268#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
269 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
270#else
271 pHdr = (PRTMEMHDR)vmalloc(cb + sizeof(*pHdr));
272#endif
273 }
274 else
275 {
276 if (
277#if 1 /* vmalloc has serious performance issues, avoid it. */
278 cb <= PAGE_SIZE*16 - sizeof(*pHdr)
279#else
280 cb <= PAGE_SIZE
281#endif
282 || (fFlags & RTMEMHDR_FLAG_ANY_CTX)
283 )
284 {
285 fFlags |= RTMEMHDR_FLAG_KMALLOC;
286 pHdr = kmalloc(cb + sizeof(*pHdr),
287 (fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC) ? (GFP_ATOMIC | __GFP_NOWARN)
288 : (GFP_KERNEL | __GFP_NOWARN));
289 if (RT_UNLIKELY( !pHdr
290 && cb > PAGE_SIZE
291 && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) ))
292 {
293 fFlags &= ~RTMEMHDR_FLAG_KMALLOC;
294 pHdr = vmalloc(cb + sizeof(*pHdr));
295 }
296 }
297 else
298 pHdr = vmalloc(cb + sizeof(*pHdr));
299 }
300 if (RT_UNLIKELY(!pHdr))
301 {
302 IPRT_LINUX_RESTORE_EFL_AC();
303 return VERR_NO_MEMORY;
304 }
305
306 /*
307 * Initialize.
308 */
309 pHdr->u32Magic = RTMEMHDR_MAGIC;
310 pHdr->fFlags = fFlags;
311 pHdr->cb = cb;
312 pHdr->cbReq = cb;
313
314 *ppHdr = pHdr;
315 IPRT_LINUX_RESTORE_EFL_AC();
316 return VINF_SUCCESS;
317}
318
319
320/**
321 * OS specific free function.
322 */
323DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr)
324{
325 IPRT_LINUX_SAVE_EFL_AC();
326
327 pHdr->u32Magic += 1;
328 if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC)
329 kfree(pHdr);
330#ifdef RTMEMALLOC_EXEC_HEAP
331 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_HEAP)
332 {
333 RTSpinlockAcquire(g_HeapExecSpinlock);
334 RTHeapSimpleFree(g_HeapExec, pHdr);
335 RTSpinlockRelease(g_HeapExecSpinlock);
336 }
337#endif
338#ifdef RTMEMALLOC_EXEC_VM_AREA
339 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_VM_AREA)
340 {
341 PRTMEMLNXHDREX pHdrEx = RT_FROM_MEMBER(pHdr, RTMEMLNXHDREX, Hdr);
342 size_t iPage = pHdrEx->pVmArea->nr_pages;
343 struct page **papPages = pHdrEx->pVmArea->pages;
344 void *pvMapping = pHdrEx->pVmArea->addr;
345
346 vunmap(pvMapping);
347
348 while (iPage-- > 0)
349 __free_page(papPages[iPage]);
350 kfree(papPages);
351 }
352#endif
353 else
354 vfree(pHdr);
355
356 IPRT_LINUX_RESTORE_EFL_AC();
357}
358
359
360
361/**
362 * Compute order. Some functions allocate 2^order pages.
363 *
364 * @returns order.
365 * @param cPages Number of pages.
366 */
367static int CalcPowerOf2Order(unsigned long cPages)
368{
369 int iOrder;
370 unsigned long cTmp;
371
372 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
373 ;
374 if (cPages & ~(1 << iOrder))
375 ++iOrder;
376
377 return iOrder;
378}
379
380
381/**
382 * Allocates physical contiguous memory (below 4GB).
383 * The allocation is page aligned and the content is undefined.
384 *
385 * @returns Pointer to the memory block. This is page aligned.
386 * @param pPhys Where to store the physical address.
387 * @param cb The allocation size in bytes. This is always
388 * rounded up to PAGE_SIZE.
389 */
390RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
391{
392 int cOrder;
393 unsigned cPages;
394 struct page *paPages;
395 void *pvRet;
396 IPRT_LINUX_SAVE_EFL_AC();
397
398 /*
399 * validate input.
400 */
401 Assert(VALID_PTR(pPhys));
402 Assert(cb > 0);
403
404 /*
405 * Allocate page pointer array.
406 */
407 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
408 cPages = cb >> PAGE_SHIFT;
409 cOrder = CalcPowerOf2Order(cPages);
410#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
411 /* ZONE_DMA32: 0-4GB */
412 paPages = alloc_pages(GFP_DMA32 | __GFP_NOWARN, cOrder);
413 if (!paPages)
414#endif
415#ifdef RT_ARCH_AMD64
416 /* ZONE_DMA; 0-16MB */
417 paPages = alloc_pages(GFP_DMA | __GFP_NOWARN, cOrder);
418#else
419 /* ZONE_NORMAL: 0-896MB */
420 paPages = alloc_pages(GFP_USER | __GFP_NOWARN, cOrder);
421#endif
422 if (paPages)
423 {
424 /*
425 * Reserve the pages and mark them executable.
426 */
427 unsigned iPage;
428 for (iPage = 0; iPage < cPages; iPage++)
429 {
430 Assert(!PageHighMem(&paPages[iPage]));
431 if (iPage + 1 < cPages)
432 {
433 AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
434 == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
435 && page_to_phys(&paPages[iPage]) + PAGE_SIZE
436 == page_to_phys(&paPages[iPage + 1]),
437 ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
438 (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])),
439 (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
440 }
441
442 SetPageReserved(&paPages[iPage]);
443#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
444 MY_SET_PAGES_EXEC(&paPages[iPage], 1);
445#endif
446 }
447 *pPhys = page_to_phys(paPages);
448 pvRet = phys_to_virt(page_to_phys(paPages));
449 }
450 else
451 pvRet = NULL;
452
453 IPRT_LINUX_RESTORE_EFL_AC();
454 return pvRet;
455}
456RT_EXPORT_SYMBOL(RTMemContAlloc);
457
458
459/**
460 * Frees memory allocated using RTMemContAlloc().
461 *
462 * @param pv Pointer to return from RTMemContAlloc().
463 * @param cb The cb parameter passed to RTMemContAlloc().
464 */
465RTR0DECL(void) RTMemContFree(void *pv, size_t cb)
466{
467 if (pv)
468 {
469 int cOrder;
470 unsigned cPages;
471 unsigned iPage;
472 struct page *paPages;
473 IPRT_LINUX_SAVE_EFL_AC();
474
475 /* validate */
476 AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
477 Assert(cb > 0);
478
479 /* calc order and get pages */
480 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
481 cPages = cb >> PAGE_SHIFT;
482 cOrder = CalcPowerOf2Order(cPages);
483 paPages = virt_to_page(pv);
484
485 /*
486 * Restore page attributes freeing the pages.
487 */
488 for (iPage = 0; iPage < cPages; iPage++)
489 {
490 ClearPageReserved(&paPages[iPage]);
491#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
492 MY_SET_PAGES_NOEXEC(&paPages[iPage], 1);
493#endif
494 }
495 __free_pages(paPages, cOrder);
496 IPRT_LINUX_RESTORE_EFL_AC();
497 }
498}
499RT_EXPORT_SYMBOL(RTMemContFree);
500
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette