VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c@ 40806

Last change on this file since 40806 was 40806, checked in by vboxsync, 13 years ago

RTSpinlock: Redid the interface, eliminating NoInts and Tmp. Whether a spinlock is interrupt safe or not is now defined at creation time, preventing stupid bugs arrising from calling the wrong acquire and/or release methods somewhere. The saved flags are stored in the spinlock strucutre, eliminating the annoying Tmp variable. Needs testing on each platform before fixing the build burn.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 12.7 KB
Line 
1/* $Id: alloc-r0drv-linux.c 40806 2012-04-06 21:05:19Z vboxsync $ */
2/** @file
3 * IPRT - Memory Allocation, Ring-0 Driver, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "the-linux-kernel.h"
32#include "internal/iprt.h"
33#include <iprt/mem.h>
34
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include "r0drv/alloc-r0drv.h"
38
39#if defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)
40/**
41 * We need memory in the module range (~2GB to ~0) this can only be obtained
42 * thru APIs that are not exported (see module_alloc()).
43 *
44 * So, we'll have to create a quick and dirty heap here using BSS memory.
45 * Very annoying and it's going to restrict us!
46 */
47# define RTMEMALLOC_EXEC_HEAP
48#endif
49#ifdef RTMEMALLOC_EXEC_HEAP
50# include <iprt/heap.h>
51# include <iprt/spinlock.h>
52# include <iprt/err.h>
53#endif
54
55
56/*******************************************************************************
57* Global Variables *
58*******************************************************************************/
59#ifdef RTMEMALLOC_EXEC_HEAP
60
61# ifdef CONFIG_DEBUG_SET_MODULE_RONX
62# define RTMEMALLOC_EXEC_HEAP_VM_AREA 1
63# endif
64/** The heap. */
65static RTHEAPSIMPLE g_HeapExec = NIL_RTHEAPSIMPLE;
66/** Spinlock protecting the heap. */
67static RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK;
68# ifdef RTMEMALLOC_EXEC_HEAP_VM_AREA
69static struct page **g_apPages;
70static void *g_pvHeap;
71static size_t g_cPages;
72# endif
73
74
75/**
76 * API for cleaning up the heap spinlock on IPRT termination.
77 * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
78 */
79DECLHIDDEN(void) rtR0MemExecCleanup(void)
80{
81# ifdef RTMEMALLOC_EXEC_HEAP_VM_AREA
82 unsigned i;
83
84 /* according to linux/drivers/lguest/core.c this function undoes
85 * map_vm_area() as well as __get_vm_area(). */
86 if (g_pvHeap)
87 vunmap(g_pvHeap);
88 for (i = 0; i < g_cPages; i++)
89 __free_page(g_apPages[i]);
90 kfree(g_apPages);
91# endif
92
93 RTSpinlockDestroy(g_HeapExecSpinlock);
94 g_HeapExecSpinlock = NIL_RTSPINLOCK;
95}
96
97
98# ifndef RTMEMALLOC_EXEC_HEAP_VM_AREA
99/**
100 * Donate read+write+execute memory to the exec heap.
101 *
102 * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to
103 * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically
104 * allocated memory in the module if it wishes for GCC generated code to work.
105 * GCC can only generate modules that work in the address range ~2GB to ~0
106 * currently.
107 *
108 * The API only accept one single donation.
109 *
110 * @returns IPRT status code.
111 * @param pvMemory Pointer to the memory block.
112 * @param cb The size of the memory block.
113 */
114RTR0DECL(int) RTR0MemExecDonate(void *pvMemory, size_t cb)
115{
116 int rc;
117 AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
118
119 rc = RTSpinlockCreate(&g_HeapExecSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTR0MemExecDonate");
120 if (RT_SUCCESS(rc))
121 {
122 rc = RTHeapSimpleInit(&g_HeapExec, pvMemory, cb);
123 if (RT_FAILURE(rc))
124 rtR0MemExecCleanup();
125 }
126 return rc;
127}
128RT_EXPORT_SYMBOL(RTR0MemExecDonate);
129
130# else /* !RTMEMALLOC_EXEC_HEAP_VM_AREA */
131
132/**
133 * RTR0MemExecDonate() does not work if CONFIG_DEBUG_SET_MODULE_RONX is enabled.
134 * In that case, allocate a VM area in the modules range and back it with kernel
135 * memory. Unfortunately __vmalloc_area() is not exported so we have to emulate
136 * it.
137 */
138RTR0DECL(int) RTR0MemExecInit(size_t cb)
139{
140 int rc;
141 struct vm_struct *area;
142 size_t cPages;
143 size_t cbPages;
144 unsigned i;
145 struct page **ppPages;
146
147 AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
148
149 rc = RTSpinlockCreate(&g_HeapExecSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTR0MemExecInit");
150 if (RT_SUCCESS(rc))
151 {
152 cb = RT_ALIGN(cb, PAGE_SIZE);
153 area = __get_vm_area(cb, VM_ALLOC, MODULES_VADDR, MODULES_END);
154 if (!area)
155 {
156 rtR0MemExecCleanup();
157 return VERR_NO_MEMORY;
158 }
159 g_pvHeap = area->addr;
160 cPages = cb >> PAGE_SHIFT;
161 area->nr_pages = 0;
162 cbPages = cPages * sizeof(struct page *);
163 g_apPages = kmalloc(cbPages, GFP_KERNEL);
164 area->pages = g_apPages;
165 if (!g_apPages)
166 {
167 rtR0MemExecCleanup();
168 return VERR_NO_MEMORY;
169 }
170 memset(area->pages, 0, cbPages);
171 for (i = 0; i < cPages; i++)
172 {
173 g_apPages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
174 if (!g_apPages[i])
175 {
176 area->nr_pages = i;
177 g_cPages = i;
178 rtR0MemExecCleanup();
179 return VERR_NO_MEMORY;
180 }
181 }
182 area->nr_pages = cPages;
183 g_cPages = i;
184 ppPages = g_apPages;
185 if (map_vm_area(area, PAGE_KERNEL_EXEC, &ppPages))
186 {
187 rtR0MemExecCleanup();
188 return VERR_NO_MEMORY;
189 }
190
191 rc = RTHeapSimpleInit(&g_HeapExec, g_pvHeap, cb);
192 if (RT_FAILURE(rc))
193 rtR0MemExecCleanup();
194 }
195 return rc;
196}
197RT_EXPORT_SYMBOL(RTR0MemExecInit);
198# endif /* RTMEMALLOC_EXEC_HEAP_VM_AREA */
199#endif /* RTMEMALLOC_EXEC_HEAP */
200
201
202
203/**
204 * OS specific allocation function.
205 */
206DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
207{
208 PRTMEMHDR pHdr;
209
210 /*
211 * Allocate.
212 */
213 if (fFlags & RTMEMHDR_FLAG_EXEC)
214 {
215 if (fFlags & RTMEMHDR_FLAG_ANY_CTX)
216 return VERR_NOT_SUPPORTED;
217
218#if defined(RT_ARCH_AMD64)
219# ifdef RTMEMALLOC_EXEC_HEAP
220 if (g_HeapExec != NIL_RTHEAPSIMPLE)
221 {
222 RTSpinlockAcquire(g_HeapExecSpinlock);
223 pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0);
224 RTSpinlockRelease(g_HeapExecSpinlock);
225 fFlags |= RTMEMHDR_FLAG_EXEC_HEAP;
226 }
227 else
228 pHdr = NULL;
229# else /* !RTMEMALLOC_EXEC_HEAP */
230 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM, MY_PAGE_KERNEL_EXEC);
231# endif /* !RTMEMALLOC_EXEC_HEAP */
232
233#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
234 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM, MY_PAGE_KERNEL_EXEC);
235#else
236 pHdr = (PRTMEMHDR)vmalloc(cb + sizeof(*pHdr));
237#endif
238 }
239 else
240 {
241 if (
242#if 1 /* vmalloc has serious performance issues, avoid it. */
243 cb <= PAGE_SIZE*16 - sizeof(*pHdr)
244#else
245 cb <= PAGE_SIZE
246#endif
247 || (fFlags & RTMEMHDR_FLAG_ANY_CTX)
248 )
249 {
250 fFlags |= RTMEMHDR_FLAG_KMALLOC;
251 pHdr = kmalloc(cb + sizeof(*pHdr),
252 (fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC) ? GFP_ATOMIC : GFP_KERNEL);
253 if (RT_UNLIKELY( !pHdr
254 && cb > PAGE_SIZE
255 && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) ))
256 {
257 fFlags &= ~RTMEMHDR_FLAG_KMALLOC;
258 pHdr = vmalloc(cb + sizeof(*pHdr));
259 }
260 }
261 else
262 pHdr = vmalloc(cb + sizeof(*pHdr));
263 }
264 if (RT_UNLIKELY(!pHdr))
265 return VERR_NO_MEMORY;
266
267 /*
268 * Initialize.
269 */
270 pHdr->u32Magic = RTMEMHDR_MAGIC;
271 pHdr->fFlags = fFlags;
272 pHdr->cb = cb;
273 pHdr->cbReq = cb;
274
275 *ppHdr = pHdr;
276 return VINF_SUCCESS;
277}
278
279
280/**
281 * OS specific free function.
282 */
283DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr)
284{
285 pHdr->u32Magic += 1;
286 if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC)
287 kfree(pHdr);
288#ifdef RTMEMALLOC_EXEC_HEAP
289 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_HEAP)
290 {
291 RTSpinlockAcquire(g_HeapExecSpinlock);
292 RTHeapSimpleFree(g_HeapExec, pHdr);
293 RTSpinlockRelease(g_HeapExecSpinlock);
294 }
295#endif
296 else
297 vfree(pHdr);
298}
299
300
301/**
302 * Compute order. Some functions allocate 2^order pages.
303 *
304 * @returns order.
305 * @param cPages Number of pages.
306 */
307static int CalcPowerOf2Order(unsigned long cPages)
308{
309 int iOrder;
310 unsigned long cTmp;
311
312 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
313 ;
314 if (cPages & ~(1 << iOrder))
315 ++iOrder;
316
317 return iOrder;
318}
319
320
321/**
322 * Allocates physical contiguous memory (below 4GB).
323 * The allocation is page aligned and the content is undefined.
324 *
325 * @returns Pointer to the memory block. This is page aligned.
326 * @param pPhys Where to store the physical address.
327 * @param cb The allocation size in bytes. This is always
328 * rounded up to PAGE_SIZE.
329 */
330RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
331{
332 int cOrder;
333 unsigned cPages;
334 struct page *paPages;
335
336 /*
337 * validate input.
338 */
339 Assert(VALID_PTR(pPhys));
340 Assert(cb > 0);
341
342 /*
343 * Allocate page pointer array.
344 */
345 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
346 cPages = cb >> PAGE_SHIFT;
347 cOrder = CalcPowerOf2Order(cPages);
348#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
349 /* ZONE_DMA32: 0-4GB */
350 paPages = alloc_pages(GFP_DMA32, cOrder);
351 if (!paPages)
352#endif
353#ifdef RT_ARCH_AMD64
354 /* ZONE_DMA; 0-16MB */
355 paPages = alloc_pages(GFP_DMA, cOrder);
356#else
357 /* ZONE_NORMAL: 0-896MB */
358 paPages = alloc_pages(GFP_USER, cOrder);
359#endif
360 if (paPages)
361 {
362 /*
363 * Reserve the pages and mark them executable.
364 */
365 unsigned iPage;
366 for (iPage = 0; iPage < cPages; iPage++)
367 {
368 Assert(!PageHighMem(&paPages[iPage]));
369 if (iPage + 1 < cPages)
370 {
371 AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
372 == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
373 && page_to_phys(&paPages[iPage]) + PAGE_SIZE
374 == page_to_phys(&paPages[iPage + 1]),
375 ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
376 (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])),
377 (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
378 }
379
380 SetPageReserved(&paPages[iPage]);
381#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
382 MY_SET_PAGES_EXEC(&paPages[iPage], 1);
383#endif
384 }
385 *pPhys = page_to_phys(paPages);
386 return phys_to_virt(page_to_phys(paPages));
387 }
388
389 return NULL;
390}
391RT_EXPORT_SYMBOL(RTMemContAlloc);
392
393
394/**
395 * Frees memory allocated ysing RTMemContAlloc().
396 *
397 * @param pv Pointer to return from RTMemContAlloc().
398 * @param cb The cb parameter passed to RTMemContAlloc().
399 */
400RTR0DECL(void) RTMemContFree(void *pv, size_t cb)
401{
402 if (pv)
403 {
404 int cOrder;
405 unsigned cPages;
406 unsigned iPage;
407 struct page *paPages;
408
409 /* validate */
410 AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
411 Assert(cb > 0);
412
413 /* calc order and get pages */
414 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
415 cPages = cb >> PAGE_SHIFT;
416 cOrder = CalcPowerOf2Order(cPages);
417 paPages = virt_to_page(pv);
418
419 /*
420 * Restore page attributes freeing the pages.
421 */
422 for (iPage = 0; iPage < cPages; iPage++)
423 {
424 ClearPageReserved(&paPages[iPage]);
425#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
426 MY_SET_PAGES_NOEXEC(&paPages[iPage], 1);
427#endif
428 }
429 __free_pages(paPages, cOrder);
430 }
431}
432RT_EXPORT_SYMBOL(RTMemContFree);
433
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette