VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c@ 48935

Last change on this file since 48935 was 48935, checked in by vboxsync, 11 years ago

Runtime: Whitespace and svn:keyword cleanups by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 14.0 KB
Line 
1/* $Id: alloc-r0drv-linux.c 48935 2013-10-07 21:19:37Z vboxsync $ */
2/** @file
3 * IPRT - Memory Allocation, Ring-0 Driver, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "the-linux-kernel.h"
32#include "internal/iprt.h"
33#include <iprt/mem.h>
34
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include "r0drv/alloc-r0drv.h"
38
39
40#if defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)
41# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
42/**
43 * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate
44 * memory in the moduel range. This is preferrable to the exec heap below.
45 */
46# define RTMEMALLOC_EXEC_VM_AREA
47# else
48/**
49 * We need memory in the module range (~2GB to ~0) this can only be obtained
50 * thru APIs that are not exported (see module_alloc()).
51 *
52 * So, we'll have to create a quick and dirty heap here using BSS memory.
53 * Very annoying and it's going to restrict us!
54 */
55# define RTMEMALLOC_EXEC_HEAP
56# endif
57#endif
58
59#ifdef RTMEMALLOC_EXEC_HEAP
60# include <iprt/heap.h>
61# include <iprt/spinlock.h>
62# include <iprt/err.h>
63#endif
64
65
66/*******************************************************************************
67* Structures and Typedefs *
68*******************************************************************************/
69#ifdef RTMEMALLOC_EXEC_VM_AREA
70/**
71 * Extended header used for headers marked with RTMEMHDR_FLAG_EXEC_VM_AREA.
72 *
73 * This is used with allocating executable memory, for things like generated
74 * code and loaded modules.
75 */
76typedef struct RTMEMLNXHDREX
77{
78 /** The VM area for this allocation. */
79 struct vm_struct *pVmArea;
80 void *pvDummy;
81 /** The header we present to the generic API. */
82 RTMEMHDR Hdr;
83} RTMEMLNXHDREX;
84AssertCompileSize(RTMEMLNXHDREX, 32);
85/** Pointer to an extended memory header. */
86typedef RTMEMLNXHDREX *PRTMEMLNXHDREX;
87#endif
88
89
90/*******************************************************************************
91* Global Variables *
92*******************************************************************************/
93#ifdef RTMEMALLOC_EXEC_HEAP
94/** The heap. */
95static RTHEAPSIMPLE g_HeapExec = NIL_RTHEAPSIMPLE;
96/** Spinlock protecting the heap. */
97static RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK;
98
99
100/**
101 * API for cleaning up the heap spinlock on IPRT termination.
102 * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
103 */
104DECLHIDDEN(void) rtR0MemExecCleanup(void)
105{
106 RTSpinlockDestroy(g_HeapExecSpinlock);
107 g_HeapExecSpinlock = NIL_RTSPINLOCK;
108}
109
110
111/**
112 * Donate read+write+execute memory to the exec heap.
113 *
114 * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to
115 * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically
116 * allocated memory in the module if it wishes for GCC generated code to work.
117 * GCC can only generate modules that work in the address range ~2GB to ~0
118 * currently.
119 *
120 * The API only accept one single donation.
121 *
122 * @returns IPRT status code.
123 * @param pvMemory Pointer to the memory block.
124 * @param cb The size of the memory block.
125 */
126RTR0DECL(int) RTR0MemExecDonate(void *pvMemory, size_t cb)
127{
128 int rc;
129 AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
130
131 rc = RTSpinlockCreate(&g_HeapExecSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTR0MemExecDonate");
132 if (RT_SUCCESS(rc))
133 {
134 rc = RTHeapSimpleInit(&g_HeapExec, pvMemory, cb);
135 if (RT_FAILURE(rc))
136 rtR0MemExecCleanup();
137 }
138 return rc;
139}
140RT_EXPORT_SYMBOL(RTR0MemExecDonate);
141
142#endif /* RTMEMALLOC_EXEC_HEAP */
143
144
145#ifdef RTMEMALLOC_EXEC_VM_AREA
146/**
147 * Allocate executable kernel memory in the module range.
148 *
149 * @returns Pointer to a allocation header success. NULL on failure.
150 *
151 * @param cb The size the user requested.
152 */
153static PRTMEMHDR rtR0MemAllocExecVmArea(size_t cb)
154{
155 size_t const cbAlloc = RT_ALIGN_Z(sizeof(RTMEMLNXHDREX) + cb, PAGE_SIZE);
156 size_t const cPages = cbAlloc >> PAGE_SHIFT;
157 struct page **papPages;
158 struct vm_struct *pVmArea;
159 size_t iPage;
160
161 pVmArea = __get_vm_area(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END);
162 if (!pVmArea)
163 return NULL;
164 pVmArea->nr_pages = 0; /* paranoia? */
165 pVmArea->pages = NULL; /* paranoia? */
166
167 papPages = (struct page **)kmalloc(cPages * sizeof(papPages[0]), GFP_KERNEL);
168 if (!papPages)
169 {
170 vunmap(pVmArea->addr);
171 return NULL;
172 }
173
174 for (iPage = 0; iPage < cPages; iPage++)
175 {
176 papPages[iPage] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
177 if (!papPages[iPage])
178 break;
179 }
180 if (iPage == cPages)
181 {
182 /*
183 * Map the pages. The API requires an iterator argument, which can be
184 * used, in case of failure, to figure out how much was actually
185 * mapped. Not sure how useful this really is, but whatever.
186 *
187 * Not entirely sure we really need to set nr_pages and pages here, but
188 * they provide a very convenient place for storing something we need
189 * in the free function, if nothing else...
190 */
191 struct page **papPagesIterator = papPages;
192 pVmArea->nr_pages = cPages;
193 pVmArea->pages = papPages;
194 if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC, &papPagesIterator))
195 {
196 PRTMEMLNXHDREX pHdrEx = (PRTMEMLNXHDREX)pVmArea->addr;
197 pHdrEx->pVmArea = pVmArea;
198 pHdrEx->pvDummy = NULL;
199 return &pHdrEx->Hdr;
200 }
201
202 /* bail out */
203 pVmArea->nr_pages = papPagesIterator - papPages;
204 }
205
206 vunmap(pVmArea->addr);
207
208 while (iPage-- > 0)
209 __free_page(papPages[iPage]);
210 kfree(papPages);
211
212 return NULL;
213}
214#endif /* RTMEMALLOC_EXEC_VM_AREA */
215
216
217/**
218 * OS specific allocation function.
219 */
220DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
221{
222 PRTMEMHDR pHdr;
223
224 /*
225 * Allocate.
226 */
227 if (fFlags & RTMEMHDR_FLAG_EXEC)
228 {
229 if (fFlags & RTMEMHDR_FLAG_ANY_CTX)
230 return VERR_NOT_SUPPORTED;
231
232#if defined(RT_ARCH_AMD64)
233# ifdef RTMEMALLOC_EXEC_HEAP
234 if (g_HeapExec != NIL_RTHEAPSIMPLE)
235 {
236 RTSpinlockAcquire(g_HeapExecSpinlock);
237 pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0);
238 RTSpinlockRelease(g_HeapExecSpinlock);
239 fFlags |= RTMEMHDR_FLAG_EXEC_HEAP;
240 }
241 else
242 pHdr = NULL;
243
244# elif defined(RTMEMALLOC_EXEC_VM_AREA)
245 pHdr = rtR0MemAllocExecVmArea(cb);
246 fFlags |= RTMEMHDR_FLAG_EXEC_VM_AREA;
247
248# else /* !RTMEMALLOC_EXEC_HEAP */
249# error "you don not want to go here..."
250 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM, MY_PAGE_KERNEL_EXEC);
251# endif /* !RTMEMALLOC_EXEC_HEAP */
252
253#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
254 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM, MY_PAGE_KERNEL_EXEC);
255#else
256 pHdr = (PRTMEMHDR)vmalloc(cb + sizeof(*pHdr));
257#endif
258 }
259 else
260 {
261 if (
262#if 1 /* vmalloc has serious performance issues, avoid it. */
263 cb <= PAGE_SIZE*16 - sizeof(*pHdr)
264#else
265 cb <= PAGE_SIZE
266#endif
267 || (fFlags & RTMEMHDR_FLAG_ANY_CTX)
268 )
269 {
270 fFlags |= RTMEMHDR_FLAG_KMALLOC;
271 pHdr = kmalloc(cb + sizeof(*pHdr),
272 (fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC) ? GFP_ATOMIC : GFP_KERNEL);
273 if (RT_UNLIKELY( !pHdr
274 && cb > PAGE_SIZE
275 && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) ))
276 {
277 fFlags &= ~RTMEMHDR_FLAG_KMALLOC;
278 pHdr = vmalloc(cb + sizeof(*pHdr));
279 }
280 }
281 else
282 pHdr = vmalloc(cb + sizeof(*pHdr));
283 }
284 if (RT_UNLIKELY(!pHdr))
285 return VERR_NO_MEMORY;
286
287 /*
288 * Initialize.
289 */
290 pHdr->u32Magic = RTMEMHDR_MAGIC;
291 pHdr->fFlags = fFlags;
292 pHdr->cb = cb;
293 pHdr->cbReq = cb;
294
295 *ppHdr = pHdr;
296 return VINF_SUCCESS;
297}
298
299
300/**
301 * OS specific free function.
302 */
303DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr)
304{
305 pHdr->u32Magic += 1;
306 if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC)
307 kfree(pHdr);
308#ifdef RTMEMALLOC_EXEC_HEAP
309 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_HEAP)
310 {
311 RTSpinlockAcquire(g_HeapExecSpinlock);
312 RTHeapSimpleFree(g_HeapExec, pHdr);
313 RTSpinlockRelease(g_HeapExecSpinlock);
314 }
315#endif
316#ifdef RTMEMALLOC_EXEC_VM_AREA
317 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_VM_AREA)
318 {
319 PRTMEMLNXHDREX pHdrEx = RT_FROM_MEMBER(pHdr, RTMEMLNXHDREX, Hdr);
320 size_t iPage = pHdrEx->pVmArea->nr_pages;
321 struct page **papPages = pHdrEx->pVmArea->pages;
322 void *pvMapping = pHdrEx->pVmArea->addr;
323
324 vunmap(pvMapping);
325
326 while (iPage-- > 0)
327 __free_page(papPages[iPage]);
328 kfree(papPages);
329 }
330#endif
331 else
332 vfree(pHdr);
333}
334
335
336
337/**
338 * Compute order. Some functions allocate 2^order pages.
339 *
340 * @returns order.
341 * @param cPages Number of pages.
342 */
343static int CalcPowerOf2Order(unsigned long cPages)
344{
345 int iOrder;
346 unsigned long cTmp;
347
348 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
349 ;
350 if (cPages & ~(1 << iOrder))
351 ++iOrder;
352
353 return iOrder;
354}
355
356
357/**
358 * Allocates physical contiguous memory (below 4GB).
359 * The allocation is page aligned and the content is undefined.
360 *
361 * @returns Pointer to the memory block. This is page aligned.
362 * @param pPhys Where to store the physical address.
363 * @param cb The allocation size in bytes. This is always
364 * rounded up to PAGE_SIZE.
365 */
366RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
367{
368 int cOrder;
369 unsigned cPages;
370 struct page *paPages;
371
372 /*
373 * validate input.
374 */
375 Assert(VALID_PTR(pPhys));
376 Assert(cb > 0);
377
378 /*
379 * Allocate page pointer array.
380 */
381 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
382 cPages = cb >> PAGE_SHIFT;
383 cOrder = CalcPowerOf2Order(cPages);
384#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
385 /* ZONE_DMA32: 0-4GB */
386 paPages = alloc_pages(GFP_DMA32, cOrder);
387 if (!paPages)
388#endif
389#ifdef RT_ARCH_AMD64
390 /* ZONE_DMA; 0-16MB */
391 paPages = alloc_pages(GFP_DMA, cOrder);
392#else
393 /* ZONE_NORMAL: 0-896MB */
394 paPages = alloc_pages(GFP_USER, cOrder);
395#endif
396 if (paPages)
397 {
398 /*
399 * Reserve the pages and mark them executable.
400 */
401 unsigned iPage;
402 for (iPage = 0; iPage < cPages; iPage++)
403 {
404 Assert(!PageHighMem(&paPages[iPage]));
405 if (iPage + 1 < cPages)
406 {
407 AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
408 == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
409 && page_to_phys(&paPages[iPage]) + PAGE_SIZE
410 == page_to_phys(&paPages[iPage + 1]),
411 ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
412 (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])),
413 (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
414 }
415
416 SetPageReserved(&paPages[iPage]);
417#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
418 MY_SET_PAGES_EXEC(&paPages[iPage], 1);
419#endif
420 }
421 *pPhys = page_to_phys(paPages);
422 return phys_to_virt(page_to_phys(paPages));
423 }
424
425 return NULL;
426}
427RT_EXPORT_SYMBOL(RTMemContAlloc);
428
429
430/**
431 * Frees memory allocated using RTMemContAlloc().
432 *
433 * @param pv Pointer to return from RTMemContAlloc().
434 * @param cb The cb parameter passed to RTMemContAlloc().
435 */
436RTR0DECL(void) RTMemContFree(void *pv, size_t cb)
437{
438 if (pv)
439 {
440 int cOrder;
441 unsigned cPages;
442 unsigned iPage;
443 struct page *paPages;
444
445 /* validate */
446 AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
447 Assert(cb > 0);
448
449 /* calc order and get pages */
450 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
451 cPages = cb >> PAGE_SHIFT;
452 cOrder = CalcPowerOf2Order(cPages);
453 paPages = virt_to_page(pv);
454
455 /*
456 * Restore page attributes freeing the pages.
457 */
458 for (iPage = 0; iPage < cPages; iPage++)
459 {
460 ClearPageReserved(&paPages[iPage]);
461#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
462 MY_SET_PAGES_NOEXEC(&paPages[iPage], 1);
463#endif
464 }
465 __free_pages(paPages, cOrder);
466 }
467}
468RT_EXPORT_SYMBOL(RTMemContFree);
469
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette