VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/GIMKvm.cpp@ 78126

Last change on this file since 78126 was 76553, checked in by vboxsync, 6 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 17.6 KB
Line 
1/* $Id: GIMKvm.cpp 76553 2019-01-01 01:45:53Z vboxsync $ */
2/** @file
3 * GIM - Guest Interface Manager, KVM implementation.
4 */
5
6/*
7 * Copyright (C) 2015-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_GIM
23#include <VBox/vmm/gim.h>
24#include <VBox/vmm/cpum.h>
25#include <VBox/vmm/hm.h>
26#include <VBox/vmm/pdmapi.h>
27#include <VBox/vmm/ssm.h>
28#include <VBox/vmm/em.h>
29#include "GIMInternal.h"
30#include <VBox/vmm/vm.h>
31
32#include <VBox/disopcode.h>
33#include <VBox/err.h>
34#include <VBox/version.h>
35
36#include <iprt/asm-math.h>
37#include <iprt/assert.h>
38#include <iprt/string.h>
39#include <iprt/mem.h>
40
41
42
43/*********************************************************************************************************************************
44* Defined Constants And Macros *
45*********************************************************************************************************************************/
46
47/**
48 * GIM KVM saved-state version.
49 */
50#define GIM_KVM_SAVED_STATE_VERSION UINT32_C(1)
51
52/**
53 * VBox internal struct. to passback to EMT rendezvous callback while enabling
54 * the KVM wall-clock.
55 */
56typedef struct KVMWALLCLOCKINFO
57{
58 /** Guest physical address of the wall-clock struct. */
59 RTGCPHYS GCPhysWallClock;
60} KVMWALLCLOCKINFO;
61/** Pointer to the wall-clock info. struct. */
62typedef KVMWALLCLOCKINFO *PKVMWALLCLOCKINFO;
63
64
65/*********************************************************************************************************************************
66* Global Variables *
67*********************************************************************************************************************************/
68#ifdef VBOX_WITH_STATISTICS
69# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
70 { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName, { 0 }, { 0 }, { 0 }, { 0 } }
71#else
72# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
73 { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName }
74#endif
75
76/**
77 * Array of MSR ranges supported by KVM.
78 */
79static CPUMMSRRANGE const g_aMsrRanges_Kvm[] =
80{
81 GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE0_FIRST, MSR_GIM_KVM_RANGE0_LAST, "KVM range 0"),
82 GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE1_FIRST, MSR_GIM_KVM_RANGE1_LAST, "KVM range 1")
83};
84#undef GIMKVM_MSRRANGE
85
86
87/**
88 * Initializes the KVM GIM provider.
89 *
90 * @returns VBox status code.
91 * @param pVM The cross context VM structure.
92 */
93VMMR3_INT_DECL(int) gimR3KvmInit(PVM pVM)
94{
95 AssertReturn(pVM, VERR_INVALID_PARAMETER);
96 AssertReturn(pVM->gim.s.enmProviderId == GIMPROVIDERID_KVM, VERR_INTERNAL_ERROR_5);
97
98 int rc;
99 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
100
101 /*
102 * Determine interface capabilities based on the version.
103 */
104 if (!pVM->gim.s.u32Version)
105 {
106 /* Basic features. */
107 pKvm->uBaseFeat = 0
108 | GIM_KVM_BASE_FEAT_CLOCK_OLD
109 //| GIM_KVM_BASE_FEAT_NOP_IO_DELAY
110 //| GIM_KVM_BASE_FEAT_MMU_OP
111 | GIM_KVM_BASE_FEAT_CLOCK
112 //| GIM_KVM_BASE_FEAT_ASYNC_PF
113 //| GIM_KVM_BASE_FEAT_STEAL_TIME
114 //| GIM_KVM_BASE_FEAT_PV_EOI
115 | GIM_KVM_BASE_FEAT_PV_UNHALT
116 ;
117 /* Rest of the features are determined in gimR3KvmInitCompleted(). */
118 }
119
120 /*
121 * Expose HVP (Hypervisor Present) bit to the guest.
122 */
123 CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_HVP);
124
125 /*
126 * Modify the standard hypervisor leaves for KVM.
127 */
128 CPUMCPUIDLEAF HyperLeaf;
129 RT_ZERO(HyperLeaf);
130 HyperLeaf.uLeaf = UINT32_C(0x40000000);
131 HyperLeaf.uEax = UINT32_C(0x40000001); /* Minimum value for KVM is 0x40000001. */
132 HyperLeaf.uEbx = 0x4B4D564B; /* 'KVMK' */
133 HyperLeaf.uEcx = 0x564B4D56; /* 'VMKV' */
134 HyperLeaf.uEdx = 0x0000004D; /* 'M000' */
135 rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
136 AssertLogRelRCReturn(rc, rc);
137
138 /*
139 * Add KVM specific leaves.
140 */
141 HyperLeaf.uLeaf = UINT32_C(0x40000001);
142 HyperLeaf.uEax = pKvm->uBaseFeat;
143 HyperLeaf.uEbx = 0; /* Reserved */
144 HyperLeaf.uEcx = 0; /* Reserved */
145 HyperLeaf.uEdx = 0; /* Reserved */
146 rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
147 AssertLogRelRCReturn(rc, rc);
148
149 /*
150 * Insert all MSR ranges of KVM.
151 */
152 for (unsigned i = 0; i < RT_ELEMENTS(g_aMsrRanges_Kvm); i++)
153 {
154 rc = CPUMR3MsrRangesInsert(pVM, &g_aMsrRanges_Kvm[i]);
155 AssertLogRelRCReturn(rc, rc);
156 }
157
158 /*
159 * Setup hypercall and #UD handling.
160 * Note! We always need to trap VMCALL/VMMCALL hypercall using #UDs for raw-mode VMs.
161 */
162 for (VMCPUID i = 0; i < pVM->cCpus; i++)
163 EMSetHypercallInstructionsEnabled(&pVM->aCpus[i], true);
164
165 size_t cbHypercall = 0;
166 rc = GIMQueryHypercallOpcodeBytes(pVM, pKvm->abOpcodeNative, sizeof(pKvm->abOpcodeNative), &cbHypercall, &pKvm->uOpcodeNative);
167 AssertLogRelRCReturn(rc, rc);
168 AssertLogRelReturn(cbHypercall == sizeof(pKvm->abOpcodeNative), VERR_GIM_IPE_1);
169 pKvm->fTrapXcptUD = pKvm->uOpcodeNative != OP_VMCALL || VM_IS_RAW_MODE_ENABLED(pVM);
170
171 return VINF_SUCCESS;
172}
173
174
175/**
176 * Initializes remaining bits of the KVM provider.
177 *
178 * This is called after initializing HM and almost all other VMM components.
179 *
180 * @returns VBox status code.
181 * @param pVM The cross context VM structure.
182 */
183VMMR3_INT_DECL(int) gimR3KvmInitCompleted(PVM pVM)
184{
185 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
186 pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM);
187
188 if (TMR3CpuTickIsFixedRateMonotonic(pVM, true /* fWithParavirtEnabled */))
189 {
190 /** @todo We might want to consider just enabling this bit *always*. As far
191 * as I can see in the Linux guest, the "TSC_STABLE" bit is only
192 * translated as a "monotonic" bit which even in Async systems we
193 * -should- be reporting a strictly monotonic TSC to the guest. */
194 pKvm->uBaseFeat |= GIM_KVM_BASE_FEAT_TSC_STABLE;
195
196 CPUMCPUIDLEAF HyperLeaf;
197 RT_ZERO(HyperLeaf);
198 HyperLeaf.uLeaf = UINT32_C(0x40000001);
199 HyperLeaf.uEax = pKvm->uBaseFeat;
200 HyperLeaf.uEbx = 0;
201 HyperLeaf.uEcx = 0;
202 HyperLeaf.uEdx = 0;
203 int rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
204 AssertLogRelRCReturn(rc, rc);
205 }
206 return VINF_SUCCESS;
207}
208
209
210/**
211 * Terminates the KVM GIM provider.
212 *
213 * @returns VBox status code.
214 * @param pVM The cross context VM structure.
215 */
216VMMR3_INT_DECL(int) gimR3KvmTerm(PVM pVM)
217{
218 gimR3KvmReset(pVM);
219 return VINF_SUCCESS;
220}
221
222
223/**
224 * This resets KVM provider MSRs and unmaps whatever KVM regions that
225 * the guest may have mapped.
226 *
227 * This is called when the VM is being reset.
228 *
229 * @param pVM The cross context VM structure.
230 * @thread EMT(0)
231 */
232VMMR3_INT_DECL(void) gimR3KvmReset(PVM pVM)
233{
234 VM_ASSERT_EMT0(pVM);
235 LogRel(("GIM: KVM: Resetting MSRs\n"));
236
237 /*
238 * Reset MSRs.
239 */
240 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
241 pKvm->u64WallClockMsr = 0;
242 for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++)
243 {
244 PGIMKVMCPU pKvmCpu = &pVM->aCpus[iCpu].gim.s.u.KvmCpu;
245 pKvmCpu->u64SystemTimeMsr = 0;
246 pKvmCpu->u32SystemTimeVersion = 0;
247 pKvmCpu->fSystemTimeFlags = 0;
248 pKvmCpu->GCPhysSystemTime = 0;
249 pKvmCpu->uTsc = 0;
250 pKvmCpu->uVirtNanoTS = 0;
251 }
252}
253
254
255/**
256 * KVM state-save operation.
257 *
258 * @returns VBox status code.
259 * @param pVM The cross context VM structure.
260 * @param pSSM The saved state handle.
261 */
262VMMR3_INT_DECL(int) gimR3KvmSave(PVM pVM, PSSMHANDLE pSSM)
263{
264 PCGIMKVM pKvm = &pVM->gim.s.u.Kvm;
265
266 /*
267 * Save the KVM SSM version.
268 */
269 SSMR3PutU32(pSSM, GIM_KVM_SAVED_STATE_VERSION);
270
271 /*
272 * Save per-VCPU data.
273 */
274 for (uint32_t i = 0; i < pVM->cCpus; i++)
275 {
276 PCGIMKVMCPU pKvmCpu = &pVM->aCpus[i].gim.s.u.KvmCpu;
277 SSMR3PutU64(pSSM, pKvmCpu->u64SystemTimeMsr);
278 SSMR3PutU64(pSSM, pKvmCpu->uTsc);
279 SSMR3PutU64(pSSM, pKvmCpu->uVirtNanoTS);
280 SSMR3PutGCPhys(pSSM, pKvmCpu->GCPhysSystemTime);
281 SSMR3PutU32(pSSM, pKvmCpu->u32SystemTimeVersion);
282 SSMR3PutU8(pSSM, pKvmCpu->fSystemTimeFlags);
283 }
284
285 /*
286 * Save per-VM data.
287 */
288 SSMR3PutU64(pSSM, pKvm->u64WallClockMsr);
289 return SSMR3PutU32(pSSM, pKvm->uBaseFeat);
290}
291
292
293/**
294 * KVM state-load operation, final pass.
295 *
296 * @returns VBox status code.
297 * @param pVM The cross context VM structure.
298 * @param pSSM The saved state handle.
299 */
300VMMR3_INT_DECL(int) gimR3KvmLoad(PVM pVM, PSSMHANDLE pSSM)
301{
302 /*
303 * Load the KVM SSM version first.
304 */
305 uint32_t uKvmSavedStatVersion;
306 int rc = SSMR3GetU32(pSSM, &uKvmSavedStatVersion);
307 AssertRCReturn(rc, rc);
308 if (uKvmSavedStatVersion != GIM_KVM_SAVED_STATE_VERSION)
309 return SSMR3SetLoadError(pSSM, VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION, RT_SRC_POS,
310 N_("Unsupported KVM saved-state version %u (expected %u)."),
311 uKvmSavedStatVersion, GIM_KVM_SAVED_STATE_VERSION);
312
313 /*
314 * Update the TSC frequency from TM.
315 */
316 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
317 pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM);
318
319 /*
320 * Load per-VCPU data.
321 */
322 for (uint32_t i = 0; i < pVM->cCpus; i++)
323 {
324 PVMCPU pVCpu = &pVM->aCpus[i];
325 PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
326
327 SSMR3GetU64(pSSM, &pKvmCpu->u64SystemTimeMsr);
328 SSMR3GetU64(pSSM, &pKvmCpu->uTsc);
329 SSMR3GetU64(pSSM, &pKvmCpu->uVirtNanoTS);
330 SSMR3GetGCPhys(pSSM, &pKvmCpu->GCPhysSystemTime);
331 SSMR3GetU32(pSSM, &pKvmCpu->u32SystemTimeVersion);
332 rc = SSMR3GetU8(pSSM, &pKvmCpu->fSystemTimeFlags);
333 AssertRCReturn(rc, rc);
334
335 /* Enable the system-time struct. if necessary. */
336 /** @todo update guest struct only if cTscTicksPerSecond doesn't match host
337 * anymore. */
338 if (MSR_GIM_KVM_SYSTEM_TIME_IS_ENABLED(pKvmCpu->u64SystemTimeMsr))
339 {
340 Assert(!TMVirtualIsTicking(pVM)); /* paranoia. */
341 Assert(!TMCpuTickIsTicking(pVCpu));
342 gimR3KvmEnableSystemTime(pVM, pVCpu);
343 }
344 }
345
346 /*
347 * Load per-VM data.
348 */
349 SSMR3GetU64(pSSM, &pKvm->u64WallClockMsr);
350 rc = SSMR3GetU32(pSSM, &pKvm->uBaseFeat);
351 AssertRCReturn(rc, rc);
352
353 return VINF_SUCCESS;
354}
355
356
357/**
358 * Enables the KVM VCPU system-time structure.
359 *
360 * @returns VBox status code.
361 * @param pVM The cross context VM structure.
362 * @param pVCpu The cross context virtual CPU structure.
363 *
364 * @remarks Don't do any release assertions here, these can be triggered by
365 * guest R0 code.
366 */
367VMMR3_INT_DECL(int) gimR3KvmEnableSystemTime(PVM pVM, PVMCPU pVCpu)
368{
369 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
370 PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
371
372 /*
373 * Validate the mapping address first.
374 */
375 if (!PGMPhysIsGCPhysNormal(pVM, pKvmCpu->GCPhysSystemTime))
376 {
377 LogRel(("GIM: KVM: VCPU%3d: Invalid physical addr requested for mapping system-time struct. GCPhysSystemTime=%#RGp\n",
378 pVCpu->idCpu, pKvmCpu->GCPhysSystemTime));
379 return VERR_GIM_OPERATION_FAILED;
380 }
381
382 /*
383 * Construct the system-time struct.
384 */
385 GIMKVMSYSTEMTIME SystemTime;
386 RT_ZERO(SystemTime);
387 SystemTime.u32Version = pKvmCpu->u32SystemTimeVersion;
388 SystemTime.u64NanoTS = pKvmCpu->uVirtNanoTS;
389 SystemTime.u64Tsc = pKvmCpu->uTsc;
390 SystemTime.fFlags = pKvmCpu->fSystemTimeFlags | GIM_KVM_SYSTEM_TIME_FLAGS_TSC_STABLE;
391
392 /*
393 * How the guest calculates the system time (nanoseconds):
394 *
395 * tsc = rdtsc - SysTime.u64Tsc
396 * if (SysTime.i8TscShift >= 0)
397 * tsc <<= i8TscShift;
398 * else
399 * tsc >>= -i8TscShift;
400 * time = ((tsc * SysTime.u32TscScale) >> 32) + SysTime.u64NanoTS
401 */
402 uint64_t u64TscFreq = pKvm->cTscTicksPerSecond;
403 SystemTime.i8TscShift = 0;
404 while (u64TscFreq > 2 * RT_NS_1SEC_64)
405 {
406 u64TscFreq >>= 1;
407 SystemTime.i8TscShift--;
408 }
409 uint32_t uTscFreqLo = (uint32_t)u64TscFreq;
410 while (uTscFreqLo <= RT_NS_1SEC)
411 {
412 uTscFreqLo <<= 1;
413 SystemTime.i8TscShift++;
414 }
415 SystemTime.u32TscScale = ASMDivU64ByU32RetU32(RT_NS_1SEC_64 << 32, uTscFreqLo);
416
417 /*
418 * Update guest memory with the system-time struct.
419 */
420 Assert(!(SystemTime.u32Version & UINT32_C(1)));
421 int rc = PGMPhysSimpleWriteGCPhys(pVM, pKvmCpu->GCPhysSystemTime, &SystemTime, sizeof(GIMKVMSYSTEMTIME));
422 if (RT_SUCCESS(rc))
423 {
424 LogRel(("GIM: KVM: VCPU%3d: Enabled system-time struct. at %#RGp - u32TscScale=%#RX32 i8TscShift=%d uVersion=%#RU32 "
425 "fFlags=%#x uTsc=%#RX64 uVirtNanoTS=%#RX64\n", pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, SystemTime.u32TscScale,
426 SystemTime.i8TscShift, SystemTime.u32Version, SystemTime.fFlags, pKvmCpu->uTsc, pKvmCpu->uVirtNanoTS));
427 TMR3CpuTickParavirtEnable(pVM);
428 }
429 else
430 LogRel(("GIM: KVM: VCPU%3d: Failed to write system-time struct. at %#RGp. rc=%Rrc\n",
431 pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, rc));
432
433 return rc;
434}
435
436
437/**
438 * Disables the KVM system-time struct.
439 *
440 * @returns VBox status code.
441 * @param pVM The cross context VM structure.
442 */
443VMMR3_INT_DECL(int) gimR3KvmDisableSystemTime(PVM pVM)
444{
445 TMR3CpuTickParavirtDisable(pVM);
446 return VINF_SUCCESS;
447}
448
449
450/**
451 * @callback_method_impl{PFNVMMEMTRENDEZVOUS,
452 * Worker for gimR3KvmEnableWallClock}
453 */
454static DECLCALLBACK(VBOXSTRICTRC) gimR3KvmEnableWallClockCallback(PVM pVM, PVMCPU pVCpu, void *pvUser)
455{
456 PKVMWALLCLOCKINFO pWallClockInfo = (PKVMWALLCLOCKINFO)pvUser; AssertPtr(pWallClockInfo);
457 RTGCPHYS GCPhysWallClock = pWallClockInfo->GCPhysWallClock;
458 RT_NOREF1(pVCpu);
459
460 /*
461 * Read the wall-clock version (sequence) from the guest.
462 */
463 uint32_t uVersion;
464 Assert(PGMPhysIsGCPhysNormal(pVM, GCPhysWallClock));
465 int rc = PGMPhysSimpleReadGCPhys(pVM, &uVersion, GCPhysWallClock, sizeof(uVersion));
466 if (RT_FAILURE(rc))
467 {
468 LogRel(("GIM: KVM: Failed to read wall-clock struct. version at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
469 return rc;
470 }
471
472 /*
473 * Ensure the version is incrementally even.
474 */
475 /* faster: uVersion = (uVersion | 1) + 1; */
476 if (!(uVersion & 1))
477 ++uVersion;
478 ++uVersion;
479
480 /*
481 * Update wall-clock guest struct. with UTC information.
482 */
483 RTTIMESPEC TimeSpec;
484 int32_t iSec;
485 int32_t iNano;
486 TMR3UtcNow(pVM, &TimeSpec);
487 RTTimeSpecGetSecondsAndNano(&TimeSpec, &iSec, &iNano);
488
489 GIMKVMWALLCLOCK WallClock;
490 RT_ZERO(WallClock);
491 AssertCompile(sizeof(uVersion) == sizeof(WallClock.u32Version));
492 WallClock.u32Version = uVersion;
493 WallClock.u32Sec = iSec;
494 WallClock.u32Nano = iNano;
495
496 /*
497 * Write out the wall-clock struct. to guest memory.
498 */
499 Assert(!(WallClock.u32Version & 1));
500 rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysWallClock, &WallClock, sizeof(GIMKVMWALLCLOCK));
501 if (RT_SUCCESS(rc))
502 LogRel(("GIM: KVM: Enabled wall-clock struct. at %#RGp - u32Sec=%u u32Nano=%u uVersion=%#RU32\n", GCPhysWallClock,
503 WallClock.u32Sec, WallClock.u32Nano, WallClock.u32Version));
504 else
505 LogRel(("GIM: KVM: Failed to write wall-clock struct. at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
506 return rc;
507}
508
509
510/**
511 * Enables the KVM wall-clock structure.
512 *
513 * Since the wall-clock can be read by any VCPU but it is a global struct. in
514 * guest-memory, we do an EMT rendezvous here to be on the safe side. The
515 * alternative is to use an MMIO2 region and use the WallClock.u32Version field
516 * for transactional update. However, this MSR is rarely written to (typically
517 * once during bootup) it's currently not a performance issue especially since
518 * we're already in ring-3. If we really wanted better performance in this code
519 * path, we should be doing it in ring-0 with transactional update while make
520 * sure there is only 1 writer as well.
521 *
522 * @returns VBox status code.
523 * @param pVM The cross context VM structure.
524 * @param GCPhysWallClock Where the guest wall-clock structure is located.
525 *
526 * @remarks Don't do any release assertions here, these can be triggered by
527 * guest R0 code.
528 */
529VMMR3_INT_DECL(int) gimR3KvmEnableWallClock(PVM pVM, RTGCPHYS GCPhysWallClock)
530{
531 KVMWALLCLOCKINFO WallClockInfo;
532 WallClockInfo.GCPhysWallClock = GCPhysWallClock;
533 return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, gimR3KvmEnableWallClockCallback, &WallClockInfo);
534}
535
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette