VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c@ 40806

Last change on this file since 40806 was 39808, checked in by vboxsync, 13 years ago

Runtime/r0drv: get the physical address of Linux kernel kmap mappings too. Make it work on RHEL3 and other Linux 2.4 systems.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Rev
File size: 51.0 KB
Line 
1/* $Revision: 39808 $ */
2/** @file
3 * IPRT - Ring-0 Memory Objects, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "the-linux-kernel.h"
32
33#include <iprt/memobj.h>
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/log.h>
37#include <iprt/process.h>
38#include <iprt/string.h>
39#include "internal/memobj.h"
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/* early 2.6 kernels */
46#ifndef PAGE_SHARED_EXEC
47# define PAGE_SHARED_EXEC PAGE_SHARED
48#endif
49#ifndef PAGE_READONLY_EXEC
50# define PAGE_READONLY_EXEC PAGE_READONLY
51#endif
52
53/*
54 * 2.6.29+ kernels don't work with remap_pfn_range() anymore because
55 * track_pfn_vma_new() is apparently not defined for non-RAM pages.
56 * It should be safe to use vm_insert_page() older kernels as well.
57 */
58#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
59# define VBOX_USE_INSERT_PAGE
60#endif
61#if defined(CONFIG_X86_PAE) \
62 && ( defined(HAVE_26_STYLE_REMAP_PAGE_RANGE) \
63 || ( LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) \
64 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)))
65# define VBOX_USE_PAE_HACK
66#endif
67
68
69/*******************************************************************************
70* Structures and Typedefs *
71*******************************************************************************/
72/**
73 * The Darwin version of the memory object structure.
74 */
75typedef struct RTR0MEMOBJLNX
76{
77 /** The core structure. */
78 RTR0MEMOBJINTERNAL Core;
79 /** Set if the allocation is contiguous.
80 * This means it has to be given back as one chunk. */
81 bool fContiguous;
82 /** Set if we've vmap'ed the memory into ring-0. */
83 bool fMappedToRing0;
84 /** The pages in the apPages array. */
85 size_t cPages;
86 /** Array of struct page pointers. (variable size) */
87 struct page *apPages[1];
88} RTR0MEMOBJLNX, *PRTR0MEMOBJLNX;
89
90
91static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx);
92
93
94/**
95 * Helper that converts from a RTR0PROCESS handle to a linux task.
96 *
97 * @returns The corresponding Linux task.
98 * @param R0Process IPRT ring-0 process handle.
99 */
100static struct task_struct *rtR0ProcessToLinuxTask(RTR0PROCESS R0Process)
101{
102 /** @todo fix rtR0ProcessToLinuxTask!! */
103 return R0Process == RTR0ProcHandleSelf() ? current : NULL;
104}
105
106
107/**
108 * Compute order. Some functions allocate 2^order pages.
109 *
110 * @returns order.
111 * @param cPages Number of pages.
112 */
113static int rtR0MemObjLinuxOrder(size_t cPages)
114{
115 int iOrder;
116 size_t cTmp;
117
118 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
119 ;
120 if (cPages & ~((size_t)1 << iOrder))
121 ++iOrder;
122
123 return iOrder;
124}
125
126
127/**
128 * Converts from RTMEM_PROT_* to Linux PAGE_*.
129 *
130 * @returns Linux page protection constant.
131 * @param fProt The IPRT protection mask.
132 * @param fKernel Whether it applies to kernel or user space.
133 */
134static pgprot_t rtR0MemObjLinuxConvertProt(unsigned fProt, bool fKernel)
135{
136 switch (fProt)
137 {
138 default:
139 AssertMsgFailed(("%#x %d\n", fProt, fKernel));
140 case RTMEM_PROT_NONE:
141 return PAGE_NONE;
142
143 case RTMEM_PROT_READ:
144 return fKernel ? PAGE_KERNEL_RO : PAGE_READONLY;
145
146 case RTMEM_PROT_WRITE:
147 case RTMEM_PROT_WRITE | RTMEM_PROT_READ:
148 return fKernel ? PAGE_KERNEL : PAGE_SHARED;
149
150 case RTMEM_PROT_EXEC:
151 case RTMEM_PROT_EXEC | RTMEM_PROT_READ:
152#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
153 if (fKernel)
154 {
155 pgprot_t fPg = MY_PAGE_KERNEL_EXEC;
156 pgprot_val(fPg) &= ~_PAGE_RW;
157 return fPg;
158 }
159 return PAGE_READONLY_EXEC;
160#else
161 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_READONLY_EXEC;
162#endif
163
164 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC:
165 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_READ:
166 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_SHARED_EXEC;
167 }
168}
169
170
171/**
172 * Internal worker that allocates physical pages and creates the memory object for them.
173 *
174 * @returns IPRT status code.
175 * @param ppMemLnx Where to store the memory object pointer.
176 * @param enmType The object type.
177 * @param cb The number of bytes to allocate.
178 * @param uAlignment The alignment of the physical memory.
179 * Only valid if fContiguous == true, ignored otherwise.
180 * @param fFlagsLnx The page allocation flags (GPFs).
181 * @param fContiguous Whether the allocation must be contiguous.
182 * @param rcNoMem What to return when we're out of pages.
183 */
184static int rtR0MemObjLinuxAllocPages(PRTR0MEMOBJLNX *ppMemLnx, RTR0MEMOBJTYPE enmType, size_t cb,
185 size_t uAlignment, unsigned fFlagsLnx, bool fContiguous, int rcNoMem)
186{
187 size_t iPage;
188 size_t const cPages = cb >> PAGE_SHIFT;
189 struct page *paPages;
190
191 /*
192 * Allocate a memory object structure that's large enough to contain
193 * the page pointer array.
194 */
195 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), enmType, NULL, cb);
196 if (!pMemLnx)
197 return VERR_NO_MEMORY;
198 pMemLnx->cPages = cPages;
199
200 if (cPages > 255)
201 {
202# ifdef __GFP_REPEAT
203 /* Try hard to allocate the memory, but the allocation attempt might fail. */
204 fFlagsLnx |= __GFP_REPEAT;
205# endif
206# ifdef __GFP_NOMEMALLOC
207 /* Introduced with Linux 2.6.12: Don't use emergency reserves */
208 fFlagsLnx |= __GFP_NOMEMALLOC;
209# endif
210 }
211
212 /*
213 * Allocate the pages.
214 * For small allocations we'll try contiguous first and then fall back on page by page.
215 */
216#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
217 if ( fContiguous
218 || cb <= PAGE_SIZE * 2)
219 {
220# ifdef VBOX_USE_INSERT_PAGE
221 paPages = alloc_pages(fFlagsLnx | __GFP_COMP, rtR0MemObjLinuxOrder(cPages));
222# else
223 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cPages));
224# endif
225 if (paPages)
226 {
227 fContiguous = true;
228 for (iPage = 0; iPage < cPages; iPage++)
229 pMemLnx->apPages[iPage] = &paPages[iPage];
230 }
231 else if (fContiguous)
232 {
233 rtR0MemObjDelete(&pMemLnx->Core);
234 return rcNoMem;
235 }
236 }
237
238 if (!fContiguous)
239 {
240 for (iPage = 0; iPage < cPages; iPage++)
241 {
242 pMemLnx->apPages[iPage] = alloc_page(fFlagsLnx);
243 if (RT_UNLIKELY(!pMemLnx->apPages[iPage]))
244 {
245 while (iPage-- > 0)
246 __free_page(pMemLnx->apPages[iPage]);
247 rtR0MemObjDelete(&pMemLnx->Core);
248 return rcNoMem;
249 }
250 }
251 }
252
253#else /* < 2.4.22 */
254 /** @todo figure out why we didn't allocate page-by-page on 2.4.21 and older... */
255 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cPages));
256 if (!paPages)
257 {
258 rtR0MemObjDelete(&pMemLnx->Core);
259 return rcNoMem;
260 }
261 for (iPage = 0; iPage < cPages; iPage++)
262 {
263 pMemLnx->apPages[iPage] = &paPages[iPage];
264 MY_SET_PAGES_EXEC(pMemLnx->apPages[iPage], 1);
265 if (PageHighMem(pMemLnx->apPages[iPage]))
266 BUG();
267 }
268
269 fContiguous = true;
270#endif /* < 2.4.22 */
271 pMemLnx->fContiguous = fContiguous;
272
273 /*
274 * Reserve the pages.
275 */
276 for (iPage = 0; iPage < cPages; iPage++)
277 SetPageReserved(pMemLnx->apPages[iPage]);
278
279 /*
280 * Note that the physical address of memory allocated with alloc_pages(flags, order)
281 * is always 2^(PAGE_SHIFT+order)-aligned.
282 */
283 if ( fContiguous
284 && uAlignment > PAGE_SIZE)
285 {
286 /*
287 * Check for alignment constraints. The physical address of memory allocated with
288 * alloc_pages(flags, order) is always 2^(PAGE_SHIFT+order)-aligned.
289 */
290 if (RT_UNLIKELY(page_to_phys(pMemLnx->apPages[0]) & (uAlignment - 1)))
291 {
292 /*
293 * This should never happen!
294 */
295 printk("rtR0MemObjLinuxAllocPages(cb=0x%lx, uAlignment=0x%lx): alloc_pages(..., %d) returned physical memory at 0x%lx!\n",
296 (unsigned long)cb, (unsigned long)uAlignment, rtR0MemObjLinuxOrder(cPages), (unsigned long)page_to_phys(pMemLnx->apPages[0]));
297 rtR0MemObjLinuxFreePages(pMemLnx);
298 return rcNoMem;
299 }
300 }
301
302 *ppMemLnx = pMemLnx;
303 return VINF_SUCCESS;
304}
305
306
307/**
308 * Frees the physical pages allocated by the rtR0MemObjLinuxAllocPages() call.
309 *
310 * This method does NOT free the object.
311 *
312 * @param pMemLnx The object which physical pages should be freed.
313 */
314static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx)
315{
316 size_t iPage = pMemLnx->cPages;
317 if (iPage > 0)
318 {
319 /*
320 * Restore the page flags.
321 */
322 while (iPage-- > 0)
323 {
324 ClearPageReserved(pMemLnx->apPages[iPage]);
325#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
326#else
327 MY_SET_PAGES_NOEXEC(pMemLnx->apPages[iPage], 1);
328#endif
329 }
330
331 /*
332 * Free the pages.
333 */
334#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
335 if (!pMemLnx->fContiguous)
336 {
337 iPage = pMemLnx->cPages;
338 while (iPage-- > 0)
339 __free_page(pMemLnx->apPages[iPage]);
340 }
341 else
342#endif
343 __free_pages(pMemLnx->apPages[0], rtR0MemObjLinuxOrder(pMemLnx->cPages));
344
345 pMemLnx->cPages = 0;
346 }
347}
348
349
350/**
351 * Maps the allocation into ring-0.
352 *
353 * This will update the RTR0MEMOBJLNX::Core.pv and RTR0MEMOBJ::fMappedToRing0 members.
354 *
355 * Contiguous mappings that isn't in 'high' memory will already be mapped into kernel
356 * space, so we'll use that mapping if possible. If execute access is required, we'll
357 * play safe and do our own mapping.
358 *
359 * @returns IPRT status code.
360 * @param pMemLnx The linux memory object to map.
361 * @param fExecutable Whether execute access is required.
362 */
363static int rtR0MemObjLinuxVMap(PRTR0MEMOBJLNX pMemLnx, bool fExecutable)
364{
365 int rc = VINF_SUCCESS;
366
367 /*
368 * Choose mapping strategy.
369 */
370 bool fMustMap = fExecutable
371 || !pMemLnx->fContiguous;
372 if (!fMustMap)
373 {
374 size_t iPage = pMemLnx->cPages;
375 while (iPage-- > 0)
376 if (PageHighMem(pMemLnx->apPages[iPage]))
377 {
378 fMustMap = true;
379 break;
380 }
381 }
382
383 Assert(!pMemLnx->Core.pv);
384 Assert(!pMemLnx->fMappedToRing0);
385
386 if (fMustMap)
387 {
388 /*
389 * Use vmap - 2.4.22 and later.
390 */
391#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
392 pgprot_t fPg;
393 pgprot_val(fPg) = _PAGE_PRESENT | _PAGE_RW;
394# ifdef _PAGE_NX
395 if (!fExecutable)
396 pgprot_val(fPg) |= _PAGE_NX;
397# endif
398
399# ifdef VM_MAP
400 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg);
401# else
402 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg);
403# endif
404 if (pMemLnx->Core.pv)
405 pMemLnx->fMappedToRing0 = true;
406 else
407 rc = VERR_MAP_FAILED;
408#else /* < 2.4.22 */
409 rc = VERR_NOT_SUPPORTED;
410#endif
411 }
412 else
413 {
414 /*
415 * Use the kernel RAM mapping.
416 */
417 pMemLnx->Core.pv = phys_to_virt(page_to_phys(pMemLnx->apPages[0]));
418 Assert(pMemLnx->Core.pv);
419 }
420
421 return rc;
422}
423
424
425/**
426 * Undos what rtR0MemObjLinuxVMap() did.
427 *
428 * @param pMemLnx The linux memory object.
429 */
430static void rtR0MemObjLinuxVUnmap(PRTR0MEMOBJLNX pMemLnx)
431{
432#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
433 if (pMemLnx->fMappedToRing0)
434 {
435 Assert(pMemLnx->Core.pv);
436 vunmap(pMemLnx->Core.pv);
437 pMemLnx->fMappedToRing0 = false;
438 }
439#else /* < 2.4.22 */
440 Assert(!pMemLnx->fMappedToRing0);
441#endif
442 pMemLnx->Core.pv = NULL;
443}
444
445
446DECLHIDDEN(int) rtR0MemObjNativeFree(RTR0MEMOBJ pMem)
447{
448 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
449
450 /*
451 * Release any memory that we've allocated or locked.
452 */
453 switch (pMemLnx->Core.enmType)
454 {
455 case RTR0MEMOBJTYPE_LOW:
456 case RTR0MEMOBJTYPE_PAGE:
457 case RTR0MEMOBJTYPE_CONT:
458 case RTR0MEMOBJTYPE_PHYS:
459 case RTR0MEMOBJTYPE_PHYS_NC:
460 rtR0MemObjLinuxVUnmap(pMemLnx);
461 rtR0MemObjLinuxFreePages(pMemLnx);
462 break;
463
464 case RTR0MEMOBJTYPE_LOCK:
465 if (pMemLnx->Core.u.Lock.R0Process != NIL_RTR0PROCESS)
466 {
467 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
468 size_t iPage;
469 Assert(pTask);
470 if (pTask && pTask->mm)
471 down_read(&pTask->mm->mmap_sem);
472
473 iPage = pMemLnx->cPages;
474 while (iPage-- > 0)
475 {
476 if (!PageReserved(pMemLnx->apPages[iPage]))
477 SetPageDirty(pMemLnx->apPages[iPage]);
478 page_cache_release(pMemLnx->apPages[iPage]);
479 }
480
481 if (pTask && pTask->mm)
482 up_read(&pTask->mm->mmap_sem);
483 }
484 /* else: kernel memory - nothing to do here. */
485 break;
486
487 case RTR0MEMOBJTYPE_RES_VIRT:
488 Assert(pMemLnx->Core.pv);
489 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
490 {
491 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
492 Assert(pTask);
493 if (pTask && pTask->mm)
494 {
495 down_write(&pTask->mm->mmap_sem);
496 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
497 up_write(&pTask->mm->mmap_sem);
498 }
499 }
500 else
501 {
502 vunmap(pMemLnx->Core.pv);
503
504 Assert(pMemLnx->cPages == 1 && pMemLnx->apPages[0] != NULL);
505 __free_page(pMemLnx->apPages[0]);
506 pMemLnx->apPages[0] = NULL;
507 pMemLnx->cPages = 0;
508 }
509 pMemLnx->Core.pv = NULL;
510 break;
511
512 case RTR0MEMOBJTYPE_MAPPING:
513 Assert(pMemLnx->cPages == 0); Assert(pMemLnx->Core.pv);
514 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
515 {
516 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
517 Assert(pTask);
518 if (pTask && pTask->mm)
519 {
520 down_write(&pTask->mm->mmap_sem);
521 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
522 up_write(&pTask->mm->mmap_sem);
523 }
524 }
525 else
526 vunmap(pMemLnx->Core.pv);
527 pMemLnx->Core.pv = NULL;
528 break;
529
530 default:
531 AssertMsgFailed(("enmType=%d\n", pMemLnx->Core.enmType));
532 return VERR_INTERNAL_ERROR;
533 }
534 return VINF_SUCCESS;
535}
536
537
538DECLHIDDEN(int) rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
539{
540 PRTR0MEMOBJLNX pMemLnx;
541 int rc;
542
543#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
544 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_HIGHUSER,
545 false /* non-contiguous */, VERR_NO_MEMORY);
546#else
547 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_USER,
548 false /* non-contiguous */, VERR_NO_MEMORY);
549#endif
550 if (RT_SUCCESS(rc))
551 {
552 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
553 if (RT_SUCCESS(rc))
554 {
555 *ppMem = &pMemLnx->Core;
556 return rc;
557 }
558
559 rtR0MemObjLinuxFreePages(pMemLnx);
560 rtR0MemObjDelete(&pMemLnx->Core);
561 }
562
563 return rc;
564}
565
566
567DECLHIDDEN(int) rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
568{
569 PRTR0MEMOBJLNX pMemLnx;
570 int rc;
571
572 /* Try to avoid GFP_DMA. GFM_DMA32 was introduced with Linux 2.6.15. */
573#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
574 /* ZONE_DMA32: 0-4GB */
575 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA32,
576 false /* non-contiguous */, VERR_NO_LOW_MEMORY);
577 if (RT_FAILURE(rc))
578#endif
579#ifdef RT_ARCH_AMD64
580 /* ZONE_DMA: 0-16MB */
581 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA,
582 false /* non-contiguous */, VERR_NO_LOW_MEMORY);
583#else
584# ifdef CONFIG_X86_PAE
585# endif
586 /* ZONE_NORMAL: 0-896MB */
587 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_USER,
588 false /* non-contiguous */, VERR_NO_LOW_MEMORY);
589#endif
590 if (RT_SUCCESS(rc))
591 {
592 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
593 if (RT_SUCCESS(rc))
594 {
595 *ppMem = &pMemLnx->Core;
596 return rc;
597 }
598
599 rtR0MemObjLinuxFreePages(pMemLnx);
600 rtR0MemObjDelete(&pMemLnx->Core);
601 }
602
603 return rc;
604}
605
606
607DECLHIDDEN(int) rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
608{
609 PRTR0MEMOBJLNX pMemLnx;
610 int rc;
611
612#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
613 /* ZONE_DMA32: 0-4GB */
614 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA32,
615 true /* contiguous */, VERR_NO_CONT_MEMORY);
616 if (RT_FAILURE(rc))
617#endif
618#ifdef RT_ARCH_AMD64
619 /* ZONE_DMA: 0-16MB */
620 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA,
621 true /* contiguous */, VERR_NO_CONT_MEMORY);
622#else
623 /* ZONE_NORMAL (32-bit hosts): 0-896MB */
624 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_USER,
625 true /* contiguous */, VERR_NO_CONT_MEMORY);
626#endif
627 if (RT_SUCCESS(rc))
628 {
629 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
630 if (RT_SUCCESS(rc))
631 {
632#if defined(RT_STRICT) && (defined(RT_ARCH_AMD64) || defined(CONFIG_HIGHMEM64G))
633 size_t iPage = pMemLnx->cPages;
634 while (iPage-- > 0)
635 Assert(page_to_phys(pMemLnx->apPages[iPage]) < _4G);
636#endif
637 pMemLnx->Core.u.Cont.Phys = page_to_phys(pMemLnx->apPages[0]);
638 *ppMem = &pMemLnx->Core;
639 return rc;
640 }
641
642 rtR0MemObjLinuxFreePages(pMemLnx);
643 rtR0MemObjDelete(&pMemLnx->Core);
644 }
645
646 return rc;
647}
648
649
650/**
651 * Worker for rtR0MemObjLinuxAllocPhysSub that tries one allocation strategy.
652 *
653 * @returns IPRT status.
654 * @param ppMemLnx Where to
655 * @param enmType The object type.
656 * @param cb The size of the allocation.
657 * @param uAlignment The alignment of the physical memory.
658 * Only valid for fContiguous == true, ignored otherwise.
659 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
660 * @param fGfp The Linux GFP flags to use for the allocation.
661 */
662static int rtR0MemObjLinuxAllocPhysSub2(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
663 size_t cb, size_t uAlignment, RTHCPHYS PhysHighest, unsigned fGfp)
664{
665 PRTR0MEMOBJLNX pMemLnx;
666 int rc;
667
668 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, enmType, cb, uAlignment, fGfp,
669 enmType == RTR0MEMOBJTYPE_PHYS /* contiguous / non-contiguous */,
670 VERR_NO_PHYS_MEMORY);
671 if (RT_FAILURE(rc))
672 return rc;
673
674 /*
675 * Check the addresses if necessary. (Can be optimized a bit for PHYS.)
676 */
677 if (PhysHighest != NIL_RTHCPHYS)
678 {
679 size_t iPage = pMemLnx->cPages;
680 while (iPage-- > 0)
681 if (page_to_phys(pMemLnx->apPages[iPage]) >= PhysHighest)
682 {
683 rtR0MemObjLinuxFreePages(pMemLnx);
684 rtR0MemObjDelete(&pMemLnx->Core);
685 return VERR_NO_MEMORY;
686 }
687 }
688
689 /*
690 * Complete the object.
691 */
692 if (enmType == RTR0MEMOBJTYPE_PHYS)
693 {
694 pMemLnx->Core.u.Phys.PhysBase = page_to_phys(pMemLnx->apPages[0]);
695 pMemLnx->Core.u.Phys.fAllocated = true;
696 }
697 *ppMem = &pMemLnx->Core;
698 return rc;
699}
700
701
702/**
703 * Worker for rtR0MemObjNativeAllocPhys and rtR0MemObjNativeAllocPhysNC.
704 *
705 * @returns IPRT status.
706 * @param ppMem Where to store the memory object pointer on success.
707 * @param enmType The object type.
708 * @param cb The size of the allocation.
709 * @param uAlignment The alignment of the physical memory.
710 * Only valid for enmType == RTR0MEMOBJTYPE_PHYS, ignored otherwise.
711 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
712 */
713static int rtR0MemObjLinuxAllocPhysSub(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
714 size_t cb, size_t uAlignment, RTHCPHYS PhysHighest)
715{
716 int rc;
717
718 /*
719 * There are two clear cases and that's the <=16MB and anything-goes ones.
720 * When the physical address limit is somewhere in-between those two we'll
721 * just have to try, starting with HIGHUSER and working our way thru the
722 * different types, hoping we'll get lucky.
723 *
724 * We should probably move this physical address restriction logic up to
725 * the page alloc function as it would be more efficient there. But since
726 * we don't expect this to be a performance issue just yet it can wait.
727 */
728 if (PhysHighest == NIL_RTHCPHYS)
729 /* ZONE_HIGHMEM: the whole physical memory */
730 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_HIGHUSER);
731 else if (PhysHighest <= _1M * 16)
732 /* ZONE_DMA: 0-16MB */
733 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA);
734 else
735 {
736 rc = VERR_NO_MEMORY;
737 if (RT_FAILURE(rc))
738 /* ZONE_HIGHMEM: the whole physical memory */
739 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_HIGHUSER);
740 if (RT_FAILURE(rc))
741 /* ZONE_NORMAL: 0-896MB */
742 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_USER);
743#ifdef GFP_DMA32
744 if (RT_FAILURE(rc))
745 /* ZONE_DMA32: 0-4GB */
746 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA32);
747#endif
748 if (RT_FAILURE(rc))
749 /* ZONE_DMA: 0-16MB */
750 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA);
751 }
752 return rc;
753}
754
755
756/**
757 * Translates a kernel virtual address to a linux page structure by walking the
758 * page tables.
759 *
760 * @note We do assume that the page tables will not change as we are walking
761 * them. This assumption is rather forced by the fact that I could not
762 * immediately see any way of preventing this from happening. So, we
763 * take some extra care when accessing them.
764 *
765 * Because of this, we don't want to use this function on memory where
766 * attribute changes to nearby pages is likely to cause large pages to
767 * be used or split up. So, don't use this for the linear mapping of
768 * physical memory.
769 *
770 * @returns Pointer to the page structur or NULL if it could not be found.
771 * @param pv The kernel virtual address.
772 */
773static struct page *rtR0MemObjLinuxVirtToPage(void *pv)
774{
775 unsigned long ulAddr = (unsigned long)pv;
776 unsigned long pfn;
777 struct page *pPage;
778 pte_t *pEntry;
779 union
780 {
781 pgd_t Global;
782#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
783 pud_t Upper;
784#endif
785 pmd_t Middle;
786 pte_t Entry;
787 } u;
788
789 /* Should this happen in a situation this code will be called in? And if
790 * so, can it change under our feet? See also
791 * "Documentation/vm/active_mm.txt" in the kernel sources. */
792 if (RT_UNLIKELY(!current->active_mm))
793 return NULL;
794 u.Global = *pgd_offset(current->active_mm, ulAddr);
795 if (RT_UNLIKELY(pgd_none(u.Global)))
796 return NULL;
797
798#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
799 u.Upper = *pud_offset(&u.Global, ulAddr);
800 if (RT_UNLIKELY(pud_none(u.Upper)))
801 return NULL;
802# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)
803 if (pud_large(u.Upper))
804 {
805 pPage = pud_page(u.Upper);
806 AssertReturn(pPage, NULL);
807 pfn = page_to_pfn(pPage); /* doing the safe way... */
808 pfn += (ulAddr >> PAGE_SHIFT) & ((UINT32_C(1) << (PUD_SHIFT - PAGE_SHIFT)) - 1);
809 return pfn_to_page(pfn);
810 }
811# endif
812
813 u.Middle = *pmd_offset(&u.Upper, ulAddr);
814#else /* < 2.6.11 */
815 u.Middle = *pmd_offset(&u.Global, ulAddr);
816#endif /* < 2.6.11 */
817 if (RT_UNLIKELY(pmd_none(u.Middle)))
818 return NULL;
819#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
820 if (pmd_large(u.Middle))
821 {
822 pPage = pmd_page(u.Middle);
823 AssertReturn(pPage, NULL);
824 pfn = page_to_pfn(pPage); /* doing the safe way... */
825 pfn += (ulAddr >> PAGE_SHIFT) & ((UINT32_C(1) << (PMD_SHIFT - PAGE_SHIFT)) - 1);
826 return pfn_to_page(pfn);
827 }
828#endif
829
830#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5) || defined(pte_offset_map) /* As usual, RHEL 3 had pte_offset_map earlier. */
831 pEntry = pte_offset_map(&u.Middle, ulAddr);
832#else
833 pEntry = pte_offset(&u.Middle, ulAddr);
834#endif
835 if (RT_UNLIKELY(!pEntry))
836 return NULL;
837 u.Entry = *pEntry;
838#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5) || defined(pte_offset_map)
839 pte_unmap(pEntry);
840#endif
841
842 if (RT_UNLIKELY(!pte_present(u.Entry)))
843 return NULL;
844 return pte_page(u.Entry);
845}
846
847
848DECLHIDDEN(int) rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest, size_t uAlignment)
849{
850 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS, cb, uAlignment, PhysHighest);
851}
852
853
854DECLHIDDEN(int) rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
855{
856 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS_NC, cb, PAGE_SIZE, PhysHighest);
857}
858
859
860DECLHIDDEN(int) rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb, uint32_t uCachePolicy)
861{
862 /*
863 * All we need to do here is to validate that we can use
864 * ioremap on the specified address (32/64-bit dma_addr_t).
865 */
866 PRTR0MEMOBJLNX pMemLnx;
867 dma_addr_t PhysAddr = Phys;
868 AssertMsgReturn(PhysAddr == Phys, ("%#llx\n", (unsigned long long)Phys), VERR_ADDRESS_TOO_BIG);
869
870 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_PHYS, NULL, cb);
871 if (!pMemLnx)
872 return VERR_NO_MEMORY;
873
874 pMemLnx->Core.u.Phys.PhysBase = PhysAddr;
875 pMemLnx->Core.u.Phys.fAllocated = false;
876 pMemLnx->Core.u.Phys.uCachePolicy = uCachePolicy;
877 Assert(!pMemLnx->cPages);
878 *ppMem = &pMemLnx->Core;
879 return VINF_SUCCESS;
880}
881
882
883DECLHIDDEN(int) rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess, RTR0PROCESS R0Process)
884{
885 const int cPages = cb >> PAGE_SHIFT;
886 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
887 struct vm_area_struct **papVMAs;
888 PRTR0MEMOBJLNX pMemLnx;
889 int rc = VERR_NO_MEMORY;
890 NOREF(fAccess);
891
892 /*
893 * Check for valid task and size overflows.
894 */
895 if (!pTask)
896 return VERR_NOT_SUPPORTED;
897 if (((size_t)cPages << PAGE_SHIFT) != cb)
898 return VERR_OUT_OF_RANGE;
899
900 /*
901 * Allocate the memory object and a temporary buffer for the VMAs.
902 */
903 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
904 if (!pMemLnx)
905 return VERR_NO_MEMORY;
906
907 papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
908 if (papVMAs)
909 {
910 down_read(&pTask->mm->mmap_sem);
911
912 /*
913 * Get user pages.
914 */
915 rc = get_user_pages(pTask, /* Task for fault accounting. */
916 pTask->mm, /* Whose pages. */
917 R3Ptr, /* Where from. */
918 cPages, /* How many pages. */
919 1, /* Write to memory. */
920 0, /* force. */
921 &pMemLnx->apPages[0], /* Page array. */
922 papVMAs); /* vmas */
923 if (rc == cPages)
924 {
925 /*
926 * Flush dcache (required?), protect against fork and _really_ pin the page
927 * table entries. get_user_pages() will protect against swapping out the
928 * pages but it will NOT protect against removing page table entries. This
929 * can be achieved with
930 * - using mlock / mmap(..., MAP_LOCKED, ...) from userland. This requires
931 * an appropriate limit set up with setrlimit(..., RLIMIT_MEMLOCK, ...).
932 * Usual Linux distributions support only a limited size of locked pages
933 * (e.g. 32KB).
934 * - setting the PageReserved bit (as we do in rtR0MemObjLinuxAllocPages()
935 * or by
936 * - setting the VM_LOCKED flag. This is the same as doing mlock() without
937 * a range check.
938 */
939 /** @todo The Linux fork() protection will require more work if this API
940 * is to be used for anything but locking VM pages. */
941 while (rc-- > 0)
942 {
943 flush_dcache_page(pMemLnx->apPages[rc]);
944 papVMAs[rc]->vm_flags |= (VM_DONTCOPY | VM_LOCKED);
945 }
946
947 up_read(&pTask->mm->mmap_sem);
948
949 RTMemFree(papVMAs);
950
951 pMemLnx->Core.u.Lock.R0Process = R0Process;
952 pMemLnx->cPages = cPages;
953 Assert(!pMemLnx->fMappedToRing0);
954 *ppMem = &pMemLnx->Core;
955
956 return VINF_SUCCESS;
957 }
958
959 /*
960 * Failed - we need to unlock any pages that we succeeded to lock.
961 */
962 while (rc-- > 0)
963 {
964 if (!PageReserved(pMemLnx->apPages[rc]))
965 SetPageDirty(pMemLnx->apPages[rc]);
966 page_cache_release(pMemLnx->apPages[rc]);
967 }
968
969 up_read(&pTask->mm->mmap_sem);
970
971 RTMemFree(papVMAs);
972 rc = VERR_LOCK_FAILED;
973 }
974
975 rtR0MemObjDelete(&pMemLnx->Core);
976 return rc;
977}
978
979
980DECLHIDDEN(int) rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess)
981{
982 void *pvLast = (uint8_t *)pv + cb - 1;
983 size_t const cPages = cb >> PAGE_SHIFT;
984 PRTR0MEMOBJLNX pMemLnx;
985 bool fLinearMapping;
986 int rc;
987 uint8_t *pbPage;
988 size_t iPage;
989 NOREF(fAccess);
990
991 if ( !RTR0MemKernelIsValidAddr(pv)
992 || !RTR0MemKernelIsValidAddr(pv + cb))
993 return VERR_INVALID_PARAMETER;
994
995 /*
996 * The lower part of the kernel memory has a linear mapping between
997 * physical and virtual addresses. So we take a short cut here. This is
998 * assumed to be the cleanest way to handle those addresses (and the code
999 * is well tested, though the test for determining it is not very nice).
1000 * If we ever decide it isn't we can still remove it.
1001 */
1002#if 0
1003 fLinearMapping = (unsigned long)pvLast < VMALLOC_START;
1004#else
1005 fLinearMapping = (unsigned long)pv >= (unsigned long)__va(0)
1006 && (unsigned long)pvLast < (unsigned long)high_memory;
1007#endif
1008
1009 /*
1010 * Allocate the memory object.
1011 */
1012 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, pv, cb);
1013 if (!pMemLnx)
1014 return VERR_NO_MEMORY;
1015
1016 /*
1017 * Gather the pages.
1018 * We ASSUME all kernel pages are non-swappable and non-movable.
1019 */
1020 rc = VINF_SUCCESS;
1021 pbPage = (uint8_t *)pvLast;
1022 iPage = cPages;
1023 if (!fLinearMapping)
1024 {
1025 while (iPage-- > 0)
1026 {
1027 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
1028 if (RT_UNLIKELY(!pPage))
1029 {
1030 rc = VERR_LOCK_FAILED;
1031 break;
1032 }
1033 pMemLnx->apPages[iPage] = pPage;
1034 pbPage -= PAGE_SIZE;
1035 }
1036 }
1037 else
1038 {
1039 while (iPage-- > 0)
1040 {
1041 pMemLnx->apPages[iPage] = virt_to_page(pbPage);
1042 pbPage -= PAGE_SIZE;
1043 }
1044 }
1045 if (RT_SUCCESS(rc))
1046 {
1047 /*
1048 * Complete the memory object and return.
1049 */
1050 pMemLnx->Core.u.Lock.R0Process = NIL_RTR0PROCESS;
1051 pMemLnx->cPages = cPages;
1052 Assert(!pMemLnx->fMappedToRing0);
1053 *ppMem = &pMemLnx->Core;
1054
1055 return VINF_SUCCESS;
1056 }
1057
1058 rtR0MemObjDelete(&pMemLnx->Core);
1059 return rc;
1060}
1061
1062
1063DECLHIDDEN(int) rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
1064{
1065#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1066 const size_t cPages = cb >> PAGE_SHIFT;
1067 struct page *pDummyPage;
1068 struct page **papPages;
1069
1070 /* check for unsupported stuff. */
1071 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
1072 if (uAlignment > PAGE_SIZE)
1073 return VERR_NOT_SUPPORTED;
1074
1075 /*
1076 * Allocate a dummy page and create a page pointer array for vmap such that
1077 * the dummy page is mapped all over the reserved area.
1078 */
1079 pDummyPage = alloc_page(GFP_HIGHUSER);
1080 if (!pDummyPage)
1081 return VERR_NO_MEMORY;
1082 papPages = RTMemAlloc(sizeof(*papPages) * cPages);
1083 if (papPages)
1084 {
1085 void *pv;
1086 size_t iPage = cPages;
1087 while (iPage-- > 0)
1088 papPages[iPage] = pDummyPage;
1089# ifdef VM_MAP
1090 pv = vmap(papPages, cPages, VM_MAP, PAGE_KERNEL_RO);
1091# else
1092 pv = vmap(papPages, cPages, VM_ALLOC, PAGE_KERNEL_RO);
1093# endif
1094 RTMemFree(papPages);
1095 if (pv)
1096 {
1097 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
1098 if (pMemLnx)
1099 {
1100 pMemLnx->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS;
1101 pMemLnx->cPages = 1;
1102 pMemLnx->apPages[0] = pDummyPage;
1103 *ppMem = &pMemLnx->Core;
1104 return VINF_SUCCESS;
1105 }
1106 vunmap(pv);
1107 }
1108 }
1109 __free_page(pDummyPage);
1110 return VERR_NO_MEMORY;
1111
1112#else /* < 2.4.22 */
1113 /*
1114 * Could probably use ioremap here, but the caller is in a better position than us
1115 * to select some safe physical memory.
1116 */
1117 return VERR_NOT_SUPPORTED;
1118#endif
1119}
1120
1121
1122/**
1123 * Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates
1124 * an empty user space mapping.
1125 *
1126 * The caller takes care of acquiring the mmap_sem of the task.
1127 *
1128 * @returns Pointer to the mapping.
1129 * (void *)-1 on failure.
1130 * @param R3PtrFixed (RTR3PTR)-1 if anywhere, otherwise a specific location.
1131 * @param cb The size of the mapping.
1132 * @param uAlignment The alignment of the mapping.
1133 * @param pTask The Linux task to create this mapping in.
1134 * @param fProt The RTMEM_PROT_* mask.
1135 */
1136static void *rtR0MemObjLinuxDoMmap(RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, struct task_struct *pTask, unsigned fProt)
1137{
1138 unsigned fLnxProt;
1139 unsigned long ulAddr;
1140
1141 /*
1142 * Convert from IPRT protection to mman.h PROT_ and call do_mmap.
1143 */
1144 fProt &= (RTMEM_PROT_NONE | RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC);
1145 if (fProt == RTMEM_PROT_NONE)
1146 fLnxProt = PROT_NONE;
1147 else
1148 {
1149 fLnxProt = 0;
1150 if (fProt & RTMEM_PROT_READ)
1151 fLnxProt |= PROT_READ;
1152 if (fProt & RTMEM_PROT_WRITE)
1153 fLnxProt |= PROT_WRITE;
1154 if (fProt & RTMEM_PROT_EXEC)
1155 fLnxProt |= PROT_EXEC;
1156 }
1157
1158 if (R3PtrFixed != (RTR3PTR)-1)
1159 ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
1160 else
1161 {
1162 ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1163 if ( !(ulAddr & ~PAGE_MASK)
1164 && (ulAddr & (uAlignment - 1)))
1165 {
1166 /** @todo implement uAlignment properly... We'll probably need to make some dummy mappings to fill
1167 * up alignment gaps. This is of course complicated by fragmentation (which we might have cause
1168 * ourselves) and further by there begin two mmap strategies (top / bottom). */
1169 /* For now, just ignore uAlignment requirements... */
1170 }
1171 }
1172 if (ulAddr & ~PAGE_MASK) /* ~PAGE_MASK == PAGE_OFFSET_MASK */
1173 return (void *)-1;
1174 return (void *)ulAddr;
1175}
1176
1177
1178DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
1179{
1180 PRTR0MEMOBJLNX pMemLnx;
1181 void *pv;
1182 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1183 if (!pTask)
1184 return VERR_NOT_SUPPORTED;
1185
1186 /*
1187 * Check that the specified alignment is supported.
1188 */
1189 if (uAlignment > PAGE_SIZE)
1190 return VERR_NOT_SUPPORTED;
1191
1192 /*
1193 * Let rtR0MemObjLinuxDoMmap do the difficult bits.
1194 */
1195 down_write(&pTask->mm->mmap_sem);
1196 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, cb, uAlignment, pTask, RTMEM_PROT_NONE);
1197 up_write(&pTask->mm->mmap_sem);
1198 if (pv == (void *)-1)
1199 return VERR_NO_MEMORY;
1200
1201 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
1202 if (!pMemLnx)
1203 {
1204 down_write(&pTask->mm->mmap_sem);
1205 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, cb);
1206 up_write(&pTask->mm->mmap_sem);
1207 return VERR_NO_MEMORY;
1208 }
1209
1210 pMemLnx->Core.u.ResVirt.R0Process = R0Process;
1211 *ppMem = &pMemLnx->Core;
1212 return VINF_SUCCESS;
1213}
1214
1215
1216DECLHIDDEN(int) rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap,
1217 void *pvFixed, size_t uAlignment,
1218 unsigned fProt, size_t offSub, size_t cbSub)
1219{
1220 int rc = VERR_NO_MEMORY;
1221 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1222 PRTR0MEMOBJLNX pMemLnx;
1223
1224 /* Fail if requested to do something we can't. */
1225 AssertMsgReturn(!offSub && !cbSub, ("%#x %#x\n", offSub, cbSub), VERR_NOT_SUPPORTED);
1226 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
1227 if (uAlignment > PAGE_SIZE)
1228 return VERR_NOT_SUPPORTED;
1229
1230 /*
1231 * Create the IPRT memory object.
1232 */
1233 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1234 if (pMemLnx)
1235 {
1236 if (pMemLnxToMap->cPages)
1237 {
1238#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1239 /*
1240 * Use vmap - 2.4.22 and later.
1241 */
1242 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, true /* kernel */);
1243# ifdef VM_MAP
1244 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_MAP, fPg);
1245# else
1246 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_ALLOC, fPg);
1247# endif
1248 if (pMemLnx->Core.pv)
1249 {
1250 pMemLnx->fMappedToRing0 = true;
1251 rc = VINF_SUCCESS;
1252 }
1253 else
1254 rc = VERR_MAP_FAILED;
1255
1256#else /* < 2.4.22 */
1257 /*
1258 * Only option here is to share mappings if possible and forget about fProt.
1259 */
1260 if (rtR0MemObjIsRing3(pMemToMap))
1261 rc = VERR_NOT_SUPPORTED;
1262 else
1263 {
1264 rc = VINF_SUCCESS;
1265 if (!pMemLnxToMap->Core.pv)
1266 rc = rtR0MemObjLinuxVMap(pMemLnxToMap, !!(fProt & RTMEM_PROT_EXEC));
1267 if (RT_SUCCESS(rc))
1268 {
1269 Assert(pMemLnxToMap->Core.pv);
1270 pMemLnx->Core.pv = pMemLnxToMap->Core.pv;
1271 }
1272 }
1273#endif
1274 }
1275 else
1276 {
1277 /*
1278 * MMIO / physical memory.
1279 */
1280 Assert(pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS && !pMemLnxToMap->Core.u.Phys.fAllocated);
1281 pMemLnx->Core.pv = pMemLnxToMap->Core.u.Phys.uCachePolicy == RTMEM_CACHE_POLICY_MMIO
1282 ? ioremap_nocache(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb)
1283 : ioremap(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb);
1284 if (pMemLnx->Core.pv)
1285 {
1286 /** @todo fix protection. */
1287 rc = VINF_SUCCESS;
1288 }
1289 }
1290 if (RT_SUCCESS(rc))
1291 {
1292 pMemLnx->Core.u.Mapping.R0Process = NIL_RTR0PROCESS;
1293 *ppMem = &pMemLnx->Core;
1294 return VINF_SUCCESS;
1295 }
1296 rtR0MemObjDelete(&pMemLnx->Core);
1297 }
1298
1299 return rc;
1300}
1301
1302
1303#ifdef VBOX_USE_PAE_HACK
1304/**
1305 * Replace the PFN of a PTE with the address of the actual page.
1306 *
1307 * The caller maps a reserved dummy page at the address with the desired access
1308 * and flags.
1309 *
1310 * This hack is required for older Linux kernels which don't provide
1311 * remap_pfn_range().
1312 *
1313 * @returns 0 on success, -ENOMEM on failure.
1314 * @param mm The memory context.
1315 * @param ulAddr The mapping address.
1316 * @param Phys The physical address of the page to map.
1317 */
1318static int rtR0MemObjLinuxFixPte(struct mm_struct *mm, unsigned long ulAddr, RTHCPHYS Phys)
1319{
1320 int rc = -ENOMEM;
1321 pgd_t *pgd;
1322
1323 spin_lock(&mm->page_table_lock);
1324
1325 pgd = pgd_offset(mm, ulAddr);
1326 if (!pgd_none(*pgd) && !pgd_bad(*pgd))
1327 {
1328 pmd_t *pmd = pmd_offset(pgd, ulAddr);
1329 if (!pmd_none(*pmd))
1330 {
1331 pte_t *ptep = pte_offset_map(pmd, ulAddr);
1332 if (ptep)
1333 {
1334 pte_t pte = *ptep;
1335 pte.pte_high &= 0xfff00000;
1336 pte.pte_high |= ((Phys >> 32) & 0x000fffff);
1337 pte.pte_low &= 0x00000fff;
1338 pte.pte_low |= (Phys & 0xfffff000);
1339 set_pte(ptep, pte);
1340 pte_unmap(ptep);
1341 rc = 0;
1342 }
1343 }
1344 }
1345
1346 spin_unlock(&mm->page_table_lock);
1347 return rc;
1348}
1349#endif /* VBOX_USE_PAE_HACK */
1350
1351
1352DECLHIDDEN(int) rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, RTR3PTR R3PtrFixed,
1353 size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process)
1354{
1355 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1356 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1357 int rc = VERR_NO_MEMORY;
1358 PRTR0MEMOBJLNX pMemLnx;
1359#ifdef VBOX_USE_PAE_HACK
1360 struct page *pDummyPage;
1361 RTHCPHYS DummyPhys;
1362#endif
1363
1364 /*
1365 * Check for restrictions.
1366 */
1367 if (!pTask)
1368 return VERR_NOT_SUPPORTED;
1369 if (uAlignment > PAGE_SIZE)
1370 return VERR_NOT_SUPPORTED;
1371
1372#ifdef VBOX_USE_PAE_HACK
1373 /*
1374 * Allocate a dummy page for use when mapping the memory.
1375 */
1376 pDummyPage = alloc_page(GFP_USER);
1377 if (!pDummyPage)
1378 return VERR_NO_MEMORY;
1379 SetPageReserved(pDummyPage);
1380 DummyPhys = page_to_phys(pDummyPage);
1381#endif
1382
1383 /*
1384 * Create the IPRT memory object.
1385 */
1386 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1387 if (pMemLnx)
1388 {
1389 /*
1390 * Allocate user space mapping.
1391 */
1392 void *pv;
1393 down_write(&pTask->mm->mmap_sem);
1394 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, pMemLnxToMap->Core.cb, uAlignment, pTask, fProt);
1395 if (pv != (void *)-1)
1396 {
1397 /*
1398 * Map page by page into the mmap area.
1399 * This is generic, paranoid and not very efficient.
1400 */
1401 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, false /* user */);
1402 unsigned long ulAddrCur = (unsigned long)pv;
1403 const size_t cPages = pMemLnxToMap->Core.cb >> PAGE_SHIFT;
1404 size_t iPage;
1405
1406 rc = 0;
1407 if (pMemLnxToMap->cPages)
1408 {
1409 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1410 {
1411#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1412 RTHCPHYS Phys = page_to_phys(pMemLnxToMap->apPages[iPage]);
1413#endif
1414#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1415 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1416 AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
1417#endif
1418#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && defined(RT_ARCH_X86)
1419 /* remap_page_range() limitation on x86 */
1420 AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
1421#endif
1422
1423#if defined(VBOX_USE_INSERT_PAGE) && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
1424 rc = vm_insert_page(vma, ulAddrCur, pMemLnxToMap->apPages[iPage]);
1425 vma->vm_flags |= VM_RESERVED; /* This flag helps making 100% sure some bad stuff wont happen (swap, core, ++). */
1426#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1427 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1428#elif defined(VBOX_USE_PAE_HACK)
1429 rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
1430 if (!rc)
1431 rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
1432#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1433 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1434#else /* 2.4 */
1435 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1436#endif
1437 if (rc)
1438 {
1439 rc = VERR_NO_MEMORY;
1440 break;
1441 }
1442 }
1443 }
1444 else
1445 {
1446 RTHCPHYS Phys;
1447 if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS)
1448 Phys = pMemLnxToMap->Core.u.Phys.PhysBase;
1449 else if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_CONT)
1450 Phys = pMemLnxToMap->Core.u.Cont.Phys;
1451 else
1452 {
1453 AssertMsgFailed(("%d\n", pMemLnxToMap->Core.enmType));
1454 Phys = NIL_RTHCPHYS;
1455 }
1456 if (Phys != NIL_RTHCPHYS)
1457 {
1458 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE, Phys += PAGE_SIZE)
1459 {
1460#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1461 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1462 AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
1463#endif
1464#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && defined(RT_ARCH_X86)
1465 /* remap_page_range() limitation on x86 */
1466 AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
1467#endif
1468
1469#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1470 rc = remap_pfn_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1471#elif defined(VBOX_USE_PAE_HACK)
1472 rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
1473 if (!rc)
1474 rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
1475#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1476 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1477#else /* 2.4 */
1478 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1479#endif
1480 if (rc)
1481 {
1482 rc = VERR_NO_MEMORY;
1483 break;
1484 }
1485 }
1486 }
1487 }
1488 if (!rc)
1489 {
1490 up_write(&pTask->mm->mmap_sem);
1491#ifdef VBOX_USE_PAE_HACK
1492 __free_page(pDummyPage);
1493#endif
1494
1495 pMemLnx->Core.pv = pv;
1496 pMemLnx->Core.u.Mapping.R0Process = R0Process;
1497 *ppMem = &pMemLnx->Core;
1498 return VINF_SUCCESS;
1499 }
1500
1501 /*
1502 * Bail out.
1503 */
1504 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, pMemLnxToMap->Core.cb);
1505 }
1506 up_write(&pTask->mm->mmap_sem);
1507 rtR0MemObjDelete(&pMemLnx->Core);
1508 }
1509#ifdef VBOX_USE_PAE_HACK
1510 __free_page(pDummyPage);
1511#endif
1512
1513 return rc;
1514}
1515
1516
1517DECLHIDDEN(int) rtR0MemObjNativeProtect(PRTR0MEMOBJINTERNAL pMem, size_t offSub, size_t cbSub, uint32_t fProt)
1518{
1519 NOREF(pMem);
1520 NOREF(offSub);
1521 NOREF(cbSub);
1522 NOREF(fProt);
1523 return VERR_NOT_SUPPORTED;
1524}
1525
1526
1527DECLHIDDEN(RTHCPHYS) rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage)
1528{
1529 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
1530
1531 if (pMemLnx->cPages)
1532 return page_to_phys(pMemLnx->apPages[iPage]);
1533
1534 switch (pMemLnx->Core.enmType)
1535 {
1536 case RTR0MEMOBJTYPE_CONT:
1537 return pMemLnx->Core.u.Cont.Phys + (iPage << PAGE_SHIFT);
1538
1539 case RTR0MEMOBJTYPE_PHYS:
1540 return pMemLnx->Core.u.Phys.PhysBase + (iPage << PAGE_SHIFT);
1541
1542 /* the parent knows */
1543 case RTR0MEMOBJTYPE_MAPPING:
1544 return rtR0MemObjNativeGetPagePhysAddr(pMemLnx->Core.uRel.Child.pParent, iPage);
1545
1546 /* cPages > 0 */
1547 case RTR0MEMOBJTYPE_LOW:
1548 case RTR0MEMOBJTYPE_LOCK:
1549 case RTR0MEMOBJTYPE_PHYS_NC:
1550 case RTR0MEMOBJTYPE_PAGE:
1551 default:
1552 AssertMsgFailed(("%d\n", pMemLnx->Core.enmType));
1553 /* fall thru */
1554
1555 case RTR0MEMOBJTYPE_RES_VIRT:
1556 return NIL_RTHCPHYS;
1557 }
1558}
1559
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette