VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c@ 36962

Last change on this file since 36962 was 36962, checked in by vboxsync, 14 years ago

rtR0MemAllocEx/linux: Avoid vmalloc, it's has serious performance issues. (I forget why we're switching between vmalloc and kmalloc here.)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 12.8 KB
Line 
1/* $Id: alloc-r0drv-linux.c 36962 2011-05-04 17:43:50Z vboxsync $ */
2/** @file
3 * IPRT - Memory Allocation, Ring-0 Driver, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "the-linux-kernel.h"
32#include "internal/iprt.h"
33#include <iprt/mem.h>
34
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include "r0drv/alloc-r0drv.h"
38
39#if defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)
40/**
41 * We need memory in the module range (~2GB to ~0) this can only be obtained
42 * thru APIs that are not exported (see module_alloc()).
43 *
44 * So, we'll have to create a quick and dirty heap here using BSS memory.
45 * Very annoying and it's going to restrict us!
46 */
47# define RTMEMALLOC_EXEC_HEAP
48#endif
49#ifdef RTMEMALLOC_EXEC_HEAP
50# include <iprt/heap.h>
51# include <iprt/spinlock.h>
52# include <iprt/err.h>
53#endif
54
55
56/*******************************************************************************
57* Global Variables *
58*******************************************************************************/
59#ifdef RTMEMALLOC_EXEC_HEAP
60
61# ifdef CONFIG_DEBUG_SET_MODULE_RONX
62# define RTMEMALLOC_EXEC_HEAP_VM_AREA 1
63# endif
64/** The heap. */
65static RTHEAPSIMPLE g_HeapExec = NIL_RTHEAPSIMPLE;
66/** Spinlock protecting the heap. */
67static RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK;
68# ifdef RTMEMALLOC_EXEC_HEAP_VM_AREA
69static struct page **g_apPages;
70static void *g_pvHeap;
71static size_t g_cPages;
72# endif
73
74
75/**
76 * API for cleaning up the heap spinlock on IPRT termination.
77 * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
78 */
79DECLHIDDEN(void) rtR0MemExecCleanup(void)
80{
81# ifdef RTMEMALLOC_EXEC_HEAP_VM_AREA
82 unsigned i;
83
84 /* according to linux/drivers/lguest/core.c this function undoes
85 * map_vm_area() as well as __get_vm_area(). */
86 if (g_pvHeap)
87 vunmap(g_pvHeap);
88 for (i = 0; i < g_cPages; i++)
89 __free_page(g_apPages[i]);
90 kfree(g_apPages);
91# endif
92
93 RTSpinlockDestroy(g_HeapExecSpinlock);
94 g_HeapExecSpinlock = NIL_RTSPINLOCK;
95}
96
97
98# ifndef RTMEMALLOC_EXEC_HEAP_VM_AREA
99/**
100 * Donate read+write+execute memory to the exec heap.
101 *
102 * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to
103 * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically
104 * allocated memory in the module if it wishes for GCC generated code to work.
105 * GCC can only generate modules that work in the address range ~2GB to ~0
106 * currently.
107 *
108 * The API only accept one single donation.
109 *
110 * @returns IPRT status code.
111 * @param pvMemory Pointer to the memory block.
112 * @param cb The size of the memory block.
113 */
114RTR0DECL(int) RTR0MemExecDonate(void *pvMemory, size_t cb)
115{
116 int rc;
117 AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
118
119 rc = RTSpinlockCreate(&g_HeapExecSpinlock);
120 if (RT_SUCCESS(rc))
121 {
122 rc = RTHeapSimpleInit(&g_HeapExec, pvMemory, cb);
123 if (RT_FAILURE(rc))
124 rtR0MemExecCleanup();
125 }
126 return rc;
127}
128RT_EXPORT_SYMBOL(RTR0MemExecDonate);
129
130# else /* !RTMEMALLOC_EXEC_HEAP_VM_AREA */
131
132/**
133 * RTR0MemExecDonate() does not work if CONFIG_DEBUG_SET_MODULE_RONX is enabled.
134 * In that case, allocate a VM area in the modules range and back it with kernel
135 * memory. Unfortunately __vmalloc_area() is not exported so we have to emulate
136 * it.
137 */
138RTR0DECL(int) RTR0MemExecInit(size_t cb)
139{
140 int rc;
141 struct vm_struct *area;
142 size_t cPages;
143 size_t cbPages;
144 unsigned i;
145 struct page **ppPages;
146
147 AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
148
149 rc = RTSpinlockCreate(&g_HeapExecSpinlock);
150 if (RT_SUCCESS(rc))
151 {
152 cb = RT_ALIGN(cb, PAGE_SIZE);
153 area = __get_vm_area(cb, VM_ALLOC, MODULES_VADDR, MODULES_END);
154 if (!area)
155 {
156 rtR0MemExecCleanup();
157 return VERR_NO_MEMORY;
158 }
159 g_pvHeap = area->addr;
160 cPages = cb >> PAGE_SHIFT;
161 area->nr_pages = 0;
162 cbPages = cPages * sizeof(struct page *);
163 g_apPages = kmalloc(cbPages, GFP_KERNEL);
164 area->pages = g_apPages;
165 if (!g_apPages)
166 {
167 rtR0MemExecCleanup();
168 return VERR_NO_MEMORY;
169 }
170 memset(area->pages, 0, cbPages);
171 for (i = 0; i < cPages; i++)
172 {
173 g_apPages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
174 if (!g_apPages[i])
175 {
176 area->nr_pages = i;
177 g_cPages = i;
178 rtR0MemExecCleanup();
179 return VERR_NO_MEMORY;
180 }
181 }
182 area->nr_pages = cPages;
183 g_cPages = i;
184 ppPages = g_apPages;
185 if (map_vm_area(area, PAGE_KERNEL_EXEC, &ppPages))
186 {
187 rtR0MemExecCleanup();
188 return VERR_NO_MEMORY;
189 }
190
191 rc = RTHeapSimpleInit(&g_HeapExec, g_pvHeap, cb);
192 if (RT_FAILURE(rc))
193 rtR0MemExecCleanup();
194 }
195 return rc;
196}
197RT_EXPORT_SYMBOL(RTR0MemExecInit);
198# endif /* RTMEMALLOC_EXEC_HEAP_VM_AREA */
199#endif /* RTMEMALLOC_EXEC_HEAP */
200
201
202
203/**
204 * OS specific allocation function.
205 */
206DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
207{
208 PRTMEMHDR pHdr;
209
210 /*
211 * Allocate.
212 */
213 if (fFlags & RTMEMHDR_FLAG_EXEC)
214 {
215 if (fFlags & RTMEMHDR_FLAG_ANY_CTX)
216 return VERR_NOT_SUPPORTED;
217
218#if defined(RT_ARCH_AMD64)
219# ifdef RTMEMALLOC_EXEC_HEAP
220 if (g_HeapExec != NIL_RTHEAPSIMPLE)
221 {
222 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
223 RTSpinlockAcquireNoInts(g_HeapExecSpinlock, &SpinlockTmp);
224 pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0);
225 RTSpinlockReleaseNoInts(g_HeapExecSpinlock, &SpinlockTmp);
226 fFlags |= RTMEMHDR_FLAG_EXEC_HEAP;
227 }
228 else
229 pHdr = NULL;
230# else /* !RTMEMALLOC_EXEC_HEAP */
231 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM, MY_PAGE_KERNEL_EXEC);
232# endif /* !RTMEMALLOC_EXEC_HEAP */
233
234#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
235 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM, MY_PAGE_KERNEL_EXEC);
236#else
237 pHdr = (PRTMEMHDR)vmalloc(cb + sizeof(*pHdr));
238#endif
239 }
240 else
241 {
242 if (
243#if 1 /* vmalloc has serious performance issues, avoid it. */
244 cb <= PAGE_SIZE*16 - sizeof(*pHdr)
245#else
246 cb <= PAGE_SIZE
247#endif
248 || (fFlags & RTMEMHDR_FLAG_ANY_CTX)
249 )
250 {
251 fFlags |= RTMEMHDR_FLAG_KMALLOC;
252 pHdr = kmalloc(cb + sizeof(*pHdr),
253 (fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC) ? GFP_ATOMIC : GFP_KERNEL);
254 if (RT_UNLIKELY( !pHdr
255 && cb > PAGE_SIZE
256 && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) ))
257 {
258 fFlags &= ~RTMEMHDR_FLAG_KMALLOC;
259 pHdr = vmalloc(cb + sizeof(*pHdr));
260 }
261 }
262 else
263 pHdr = vmalloc(cb + sizeof(*pHdr));
264 }
265 if (RT_UNLIKELY(!pHdr))
266 return VERR_NO_MEMORY;
267
268 /*
269 * Initialize.
270 */
271 pHdr->u32Magic = RTMEMHDR_MAGIC;
272 pHdr->fFlags = fFlags;
273 pHdr->cb = cb;
274 pHdr->cbReq = cb;
275
276 *ppHdr = pHdr;
277 return VINF_SUCCESS;
278}
279
280
281/**
282 * OS specific free function.
283 */
284DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr)
285{
286 pHdr->u32Magic += 1;
287 if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC)
288 kfree(pHdr);
289#ifdef RTMEMALLOC_EXEC_HEAP
290 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_HEAP)
291 {
292 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
293 RTSpinlockAcquireNoInts(g_HeapExecSpinlock, &SpinlockTmp);
294 RTHeapSimpleFree(g_HeapExec, pHdr);
295 RTSpinlockReleaseNoInts(g_HeapExecSpinlock, &SpinlockTmp);
296 }
297#endif
298 else
299 vfree(pHdr);
300}
301
302
303/**
304 * Compute order. Some functions allocate 2^order pages.
305 *
306 * @returns order.
307 * @param cPages Number of pages.
308 */
309static int CalcPowerOf2Order(unsigned long cPages)
310{
311 int iOrder;
312 unsigned long cTmp;
313
314 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
315 ;
316 if (cPages & ~(1 << iOrder))
317 ++iOrder;
318
319 return iOrder;
320}
321
322
323/**
324 * Allocates physical contiguous memory (below 4GB).
325 * The allocation is page aligned and the content is undefined.
326 *
327 * @returns Pointer to the memory block. This is page aligned.
328 * @param pPhys Where to store the physical address.
329 * @param cb The allocation size in bytes. This is always
330 * rounded up to PAGE_SIZE.
331 */
332RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
333{
334 int cOrder;
335 unsigned cPages;
336 struct page *paPages;
337
338 /*
339 * validate input.
340 */
341 Assert(VALID_PTR(pPhys));
342 Assert(cb > 0);
343
344 /*
345 * Allocate page pointer array.
346 */
347 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
348 cPages = cb >> PAGE_SHIFT;
349 cOrder = CalcPowerOf2Order(cPages);
350#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
351 /* ZONE_DMA32: 0-4GB */
352 paPages = alloc_pages(GFP_DMA32, cOrder);
353 if (!paPages)
354#endif
355#ifdef RT_ARCH_AMD64
356 /* ZONE_DMA; 0-16MB */
357 paPages = alloc_pages(GFP_DMA, cOrder);
358#else
359 /* ZONE_NORMAL: 0-896MB */
360 paPages = alloc_pages(GFP_USER, cOrder);
361#endif
362 if (paPages)
363 {
364 /*
365 * Reserve the pages and mark them executable.
366 */
367 unsigned iPage;
368 for (iPage = 0; iPage < cPages; iPage++)
369 {
370 Assert(!PageHighMem(&paPages[iPage]));
371 if (iPage + 1 < cPages)
372 {
373 AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
374 == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
375 && page_to_phys(&paPages[iPage]) + PAGE_SIZE
376 == page_to_phys(&paPages[iPage + 1]),
377 ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
378 (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])),
379 (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
380 }
381
382 SetPageReserved(&paPages[iPage]);
383#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
384 MY_SET_PAGES_EXEC(&paPages[iPage], 1);
385#endif
386 }
387 *pPhys = page_to_phys(paPages);
388 return phys_to_virt(page_to_phys(paPages));
389 }
390
391 return NULL;
392}
393RT_EXPORT_SYMBOL(RTMemContAlloc);
394
395
396/**
397 * Frees memory allocated ysing RTMemContAlloc().
398 *
399 * @param pv Pointer to return from RTMemContAlloc().
400 * @param cb The cb parameter passed to RTMemContAlloc().
401 */
402RTR0DECL(void) RTMemContFree(void *pv, size_t cb)
403{
404 if (pv)
405 {
406 int cOrder;
407 unsigned cPages;
408 unsigned iPage;
409 struct page *paPages;
410
411 /* validate */
412 AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
413 Assert(cb > 0);
414
415 /* calc order and get pages */
416 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
417 cPages = cb >> PAGE_SHIFT;
418 cOrder = CalcPowerOf2Order(cPages);
419 paPages = virt_to_page(pv);
420
421 /*
422 * Restore page attributes freeing the pages.
423 */
424 for (iPage = 0; iPage < cPages; iPage++)
425 {
426 ClearPageReserved(&paPages[iPage]);
427#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
428 MY_SET_PAGES_NOEXEC(&paPages[iPage], 1);
429#endif
430 }
431 __free_pages(paPages, cOrder);
432 }
433}
434RT_EXPORT_SYMBOL(RTMemContFree);
435
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette