1 | /* $Id: GIMKvm.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * GIM - Guest Interface Manager, KVM implementation.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2015-2022 Oracle and/or its affiliates.
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox base platform packages, as
|
---|
10 | * available from https://www.virtualbox.org.
|
---|
11 | *
|
---|
12 | * This program is free software; you can redistribute it and/or
|
---|
13 | * modify it under the terms of the GNU General Public License
|
---|
14 | * as published by the Free Software Foundation, in version 3 of the
|
---|
15 | * License.
|
---|
16 | *
|
---|
17 | * This program is distributed in the hope that it will be useful, but
|
---|
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | * General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU General Public License
|
---|
23 | * along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
24 | *
|
---|
25 | * SPDX-License-Identifier: GPL-3.0-only
|
---|
26 | */
|
---|
27 |
|
---|
28 |
|
---|
29 | /*********************************************************************************************************************************
|
---|
30 | * Header Files *
|
---|
31 | *********************************************************************************************************************************/
|
---|
32 | #define LOG_GROUP LOG_GROUP_GIM
|
---|
33 | #include <VBox/vmm/gim.h>
|
---|
34 | #include <VBox/vmm/cpum.h>
|
---|
35 | #include <VBox/vmm/hm.h>
|
---|
36 | #include <VBox/vmm/pdmapi.h>
|
---|
37 | #include <VBox/vmm/ssm.h>
|
---|
38 | #include <VBox/vmm/em.h>
|
---|
39 | #include "GIMInternal.h"
|
---|
40 | #include <VBox/vmm/vm.h>
|
---|
41 |
|
---|
42 | #include <VBox/disopcode.h>
|
---|
43 | #include <VBox/err.h>
|
---|
44 | #include <VBox/version.h>
|
---|
45 |
|
---|
46 | #include <iprt/asm-math.h>
|
---|
47 | #include <iprt/assert.h>
|
---|
48 | #include <iprt/string.h>
|
---|
49 | #include <iprt/mem.h>
|
---|
50 |
|
---|
51 |
|
---|
52 |
|
---|
53 | /*********************************************************************************************************************************
|
---|
54 | * Defined Constants And Macros *
|
---|
55 | *********************************************************************************************************************************/
|
---|
56 |
|
---|
57 | /**
|
---|
58 | * GIM KVM saved-state version.
|
---|
59 | */
|
---|
60 | #define GIM_KVM_SAVED_STATE_VERSION UINT32_C(1)
|
---|
61 |
|
---|
62 | /**
|
---|
63 | * VBox internal struct. to passback to EMT rendezvous callback while enabling
|
---|
64 | * the KVM wall-clock.
|
---|
65 | */
|
---|
66 | typedef struct KVMWALLCLOCKINFO
|
---|
67 | {
|
---|
68 | /** Guest physical address of the wall-clock struct. */
|
---|
69 | RTGCPHYS GCPhysWallClock;
|
---|
70 | } KVMWALLCLOCKINFO;
|
---|
71 | /** Pointer to the wall-clock info. struct. */
|
---|
72 | typedef KVMWALLCLOCKINFO *PKVMWALLCLOCKINFO;
|
---|
73 |
|
---|
74 |
|
---|
75 | /*********************************************************************************************************************************
|
---|
76 | * Global Variables *
|
---|
77 | *********************************************************************************************************************************/
|
---|
78 | #ifdef VBOX_WITH_STATISTICS
|
---|
79 | # define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
|
---|
80 | { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName, { 0 }, { 0 }, { 0 }, { 0 } }
|
---|
81 | #else
|
---|
82 | # define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
|
---|
83 | { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName }
|
---|
84 | #endif
|
---|
85 |
|
---|
86 | /**
|
---|
87 | * Array of MSR ranges supported by KVM.
|
---|
88 | */
|
---|
89 | static CPUMMSRRANGE const g_aMsrRanges_Kvm[] =
|
---|
90 | {
|
---|
91 | GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE0_FIRST, MSR_GIM_KVM_RANGE0_LAST, "KVM range 0"),
|
---|
92 | GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE1_FIRST, MSR_GIM_KVM_RANGE1_LAST, "KVM range 1")
|
---|
93 | };
|
---|
94 | #undef GIMKVM_MSRRANGE
|
---|
95 |
|
---|
96 |
|
---|
97 | /**
|
---|
98 | * Updates the KVM VCPU system-time structure in guest memory.
|
---|
99 | *
|
---|
100 | * @returns VBox status code.
|
---|
101 | * @param pVM The cross context VM structure.
|
---|
102 | * @param pVCpu The cross context virtual CPU structure.
|
---|
103 | *
|
---|
104 | * @remarks This must be called after the system time MSR value has been updated.
|
---|
105 | */
|
---|
106 | static int gimR3KvmUpdateSystemTime(PVM pVM, PVMCPU pVCpu)
|
---|
107 | {
|
---|
108 | PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
|
---|
109 | PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
|
---|
110 |
|
---|
111 | /*
|
---|
112 | * Validate the MSR has the enable bit and the guest's system time struct. address.
|
---|
113 | */
|
---|
114 | MSR_GIM_KVM_SYSTEM_TIME_IS_ENABLED(pKvmCpu->u64SystemTimeMsr);
|
---|
115 | if (!PGMPhysIsGCPhysNormal(pVM, pKvmCpu->GCPhysSystemTime))
|
---|
116 | {
|
---|
117 | LogRel(("GIM: KVM: VCPU%3d: Invalid physical addr requested for mapping system-time struct. GCPhysSystemTime=%#RGp\n",
|
---|
118 | pVCpu->idCpu, pKvmCpu->GCPhysSystemTime));
|
---|
119 | return VERR_GIM_OPERATION_FAILED;
|
---|
120 | }
|
---|
121 |
|
---|
122 | VMSTATE const enmVMState = pVM->enmVMState;
|
---|
123 | bool const fRunning = VMSTATE_IS_RUNNING(enmVMState);
|
---|
124 | Assert(!(pKvmCpu->u32SystemTimeVersion & UINT32_C(1)));
|
---|
125 |
|
---|
126 | /*
|
---|
127 | * Construct a system-time struct.
|
---|
128 | */
|
---|
129 | GIMKVMSYSTEMTIME SystemTime;
|
---|
130 | RT_ZERO(SystemTime);
|
---|
131 | SystemTime.u32Version = pKvmCpu->u32SystemTimeVersion + !!fRunning;
|
---|
132 | SystemTime.u64NanoTS = pKvmCpu->uVirtNanoTS;
|
---|
133 | SystemTime.u64Tsc = pKvmCpu->uTsc;
|
---|
134 | SystemTime.fFlags = pKvmCpu->fSystemTimeFlags | GIM_KVM_SYSTEM_TIME_FLAGS_TSC_STABLE;
|
---|
135 |
|
---|
136 | /*
|
---|
137 | * How the guest calculates the system time (nanoseconds):
|
---|
138 | *
|
---|
139 | * tsc = rdtsc - SysTime.u64Tsc
|
---|
140 | * if (SysTime.i8TscShift >= 0)
|
---|
141 | * tsc <<= i8TscShift;
|
---|
142 | * else
|
---|
143 | * tsc >>= -i8TscShift;
|
---|
144 | * time = ((tsc * SysTime.u32TscScale) >> 32) + SysTime.u64NanoTS
|
---|
145 | */
|
---|
146 | uint64_t u64TscFreq = pKvm->cTscTicksPerSecond;
|
---|
147 | SystemTime.i8TscShift = 0;
|
---|
148 | while (u64TscFreq > 2 * RT_NS_1SEC_64)
|
---|
149 | {
|
---|
150 | u64TscFreq >>= 1;
|
---|
151 | SystemTime.i8TscShift--;
|
---|
152 | }
|
---|
153 | uint32_t uTscFreqLo = (uint32_t)u64TscFreq;
|
---|
154 | while (uTscFreqLo <= RT_NS_1SEC)
|
---|
155 | {
|
---|
156 | uTscFreqLo <<= 1;
|
---|
157 | SystemTime.i8TscShift++;
|
---|
158 | }
|
---|
159 | SystemTime.u32TscScale = ASMDivU64ByU32RetU32(RT_NS_1SEC_64 << 32, uTscFreqLo);
|
---|
160 |
|
---|
161 | /*
|
---|
162 | * For informational purposes, back-calculate the exact TSC frequency the guest will see.
|
---|
163 | * Note that the frequency is in kHz, not Hz, since that's what Linux uses.
|
---|
164 | */
|
---|
165 | uint64_t uTscKHz = (RT_NS_1MS_64 << 32) / SystemTime.u32TscScale;
|
---|
166 | if (SystemTime.i8TscShift < 0)
|
---|
167 | uTscKHz <<= -SystemTime.i8TscShift;
|
---|
168 | else
|
---|
169 | uTscKHz >>= SystemTime.i8TscShift;
|
---|
170 |
|
---|
171 | /*
|
---|
172 | * Update guest memory with the system-time struct.
|
---|
173 | *
|
---|
174 | * We update the struct with an incremented, odd version field to indicate to the guest
|
---|
175 | * that the memory is being updated concurrently by the host and it should discard any
|
---|
176 | * data from this struct when it reads an odd version.
|
---|
177 | *
|
---|
178 | * When the VM is not running, we don't need to do this two step update for obvious
|
---|
179 | * reasons and so we skip it.
|
---|
180 | */
|
---|
181 | if (fRunning)
|
---|
182 | Assert(SystemTime.u32Version & UINT32_C(1));
|
---|
183 | else
|
---|
184 | Assert(!(SystemTime.u32Version & UINT32_C(1)));
|
---|
185 |
|
---|
186 | int rc = PGMPhysSimpleWriteGCPhys(pVM, pKvmCpu->GCPhysSystemTime, &SystemTime, sizeof(GIMKVMSYSTEMTIME));
|
---|
187 | if (RT_SUCCESS(rc))
|
---|
188 | {
|
---|
189 | LogRel(("GIM: KVM: VCPU%3d: Enabled system-time struct. at %#RGp - u32TscScale=%#RX32 i8TscShift=%d uVersion=%#RU32 "
|
---|
190 | "fFlags=%#x uTsc=%#RX64 uVirtNanoTS=%#RX64 TscKHz=%RU64\n", pVCpu->idCpu, pKvmCpu->GCPhysSystemTime,
|
---|
191 | SystemTime.u32TscScale, SystemTime.i8TscShift, SystemTime.u32Version + !!fRunning, SystemTime.fFlags,
|
---|
192 | pKvmCpu->uTsc, pKvmCpu->uVirtNanoTS, uTscKHz));
|
---|
193 | TMR3CpuTickParavirtEnable(pVM);
|
---|
194 | }
|
---|
195 | else
|
---|
196 | {
|
---|
197 | LogRel(("GIM: KVM: VCPU%3d: Failed to write system-time struct. at %#RGp. rc=%Rrc\n", pVCpu->idCpu,
|
---|
198 | pKvmCpu->GCPhysSystemTime, rc));
|
---|
199 | }
|
---|
200 |
|
---|
201 | if (fRunning)
|
---|
202 | {
|
---|
203 | ++SystemTime.u32Version;
|
---|
204 | Assert(!(SystemTime.u32Version & UINT32_C(1)));
|
---|
205 | rc = PGMPhysSimpleWriteGCPhys(pVM, pKvmCpu->GCPhysSystemTime + RT_UOFFSETOF(GIMKVMSYSTEMTIME, u32Version),
|
---|
206 | &SystemTime.u32Version, sizeof(SystemTime.u32Version));
|
---|
207 | if (RT_FAILURE(rc))
|
---|
208 | {
|
---|
209 | LogRel(("GIM: KVM: VCPU%3d: Failed to write system-time struct. while updating version field at %#RGp. rc=%Rrc\n",
|
---|
210 | pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, rc));
|
---|
211 | return rc;
|
---|
212 | }
|
---|
213 |
|
---|
214 | /* Update the version so our next write will start with an even value. */
|
---|
215 | pKvmCpu->u32SystemTimeVersion += 2;
|
---|
216 | }
|
---|
217 |
|
---|
218 | return rc;
|
---|
219 | }
|
---|
220 |
|
---|
221 |
|
---|
222 | /**
|
---|
223 | * Initializes the KVM GIM provider.
|
---|
224 | *
|
---|
225 | * @returns VBox status code.
|
---|
226 | * @param pVM The cross context VM structure.
|
---|
227 | */
|
---|
228 | VMMR3_INT_DECL(int) gimR3KvmInit(PVM pVM)
|
---|
229 | {
|
---|
230 | AssertReturn(pVM, VERR_INVALID_PARAMETER);
|
---|
231 | AssertReturn(pVM->gim.s.enmProviderId == GIMPROVIDERID_KVM, VERR_INTERNAL_ERROR_5);
|
---|
232 |
|
---|
233 | int rc;
|
---|
234 | PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
|
---|
235 |
|
---|
236 | /*
|
---|
237 | * Determine interface capabilities based on the version.
|
---|
238 | */
|
---|
239 | if (!pVM->gim.s.u32Version)
|
---|
240 | {
|
---|
241 | /* Basic features. */
|
---|
242 | pKvm->uBaseFeat = 0
|
---|
243 | | GIM_KVM_BASE_FEAT_CLOCK_OLD
|
---|
244 | //| GIM_KVM_BASE_FEAT_NOP_IO_DELAY
|
---|
245 | //| GIM_KVM_BASE_FEAT_MMU_OP
|
---|
246 | | GIM_KVM_BASE_FEAT_CLOCK
|
---|
247 | //| GIM_KVM_BASE_FEAT_ASYNC_PF
|
---|
248 | //| GIM_KVM_BASE_FEAT_STEAL_TIME
|
---|
249 | //| GIM_KVM_BASE_FEAT_PV_EOI
|
---|
250 | | GIM_KVM_BASE_FEAT_PV_UNHALT
|
---|
251 | ;
|
---|
252 | /* Rest of the features are determined in gimR3KvmInitCompleted(). */
|
---|
253 | }
|
---|
254 |
|
---|
255 | /*
|
---|
256 | * Expose HVP (Hypervisor Present) bit to the guest.
|
---|
257 | */
|
---|
258 | CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_HVP);
|
---|
259 |
|
---|
260 | /*
|
---|
261 | * Modify the standard hypervisor leaves for KVM.
|
---|
262 | */
|
---|
263 | CPUMCPUIDLEAF HyperLeaf;
|
---|
264 | RT_ZERO(HyperLeaf);
|
---|
265 | HyperLeaf.uLeaf = UINT32_C(0x40000000);
|
---|
266 | HyperLeaf.uEax = UINT32_C(0x40000001); /* Minimum value for KVM is 0x40000001. */
|
---|
267 | HyperLeaf.uEbx = 0x4B4D564B; /* 'KVMK' */
|
---|
268 | HyperLeaf.uEcx = 0x564B4D56; /* 'VMKV' */
|
---|
269 | HyperLeaf.uEdx = 0x0000004D; /* 'M000' */
|
---|
270 | rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
|
---|
271 | AssertLogRelRCReturn(rc, rc);
|
---|
272 |
|
---|
273 | /*
|
---|
274 | * Add KVM specific leaves.
|
---|
275 | */
|
---|
276 | HyperLeaf.uLeaf = UINT32_C(0x40000001);
|
---|
277 | HyperLeaf.uEax = pKvm->uBaseFeat;
|
---|
278 | HyperLeaf.uEbx = 0; /* Reserved */
|
---|
279 | HyperLeaf.uEcx = 0; /* Reserved */
|
---|
280 | HyperLeaf.uEdx = 0; /* Reserved */
|
---|
281 | rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
|
---|
282 | AssertLogRelRCReturn(rc, rc);
|
---|
283 |
|
---|
284 | /*
|
---|
285 | * Insert all MSR ranges of KVM.
|
---|
286 | */
|
---|
287 | for (unsigned i = 0; i < RT_ELEMENTS(g_aMsrRanges_Kvm); i++)
|
---|
288 | {
|
---|
289 | rc = CPUMR3MsrRangesInsert(pVM, &g_aMsrRanges_Kvm[i]);
|
---|
290 | AssertLogRelRCReturn(rc, rc);
|
---|
291 | }
|
---|
292 |
|
---|
293 | /*
|
---|
294 | * Setup hypercall and #UD handling.
|
---|
295 | * Note! We always need to trap VMCALL/VMMCALL hypercall using #UDs for raw-mode VMs.
|
---|
296 | */
|
---|
297 | for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
|
---|
298 | EMSetHypercallInstructionsEnabled(pVM->apCpusR3[idCpu], true);
|
---|
299 |
|
---|
300 | size_t cbHypercall = 0;
|
---|
301 | rc = GIMQueryHypercallOpcodeBytes(pVM, pKvm->abOpcodeNative, sizeof(pKvm->abOpcodeNative), &cbHypercall, &pKvm->uOpcodeNative);
|
---|
302 | AssertLogRelRCReturn(rc, rc);
|
---|
303 | AssertLogRelReturn(cbHypercall == sizeof(pKvm->abOpcodeNative), VERR_GIM_IPE_1);
|
---|
304 | pKvm->fTrapXcptUD = pKvm->uOpcodeNative != OP_VMCALL;
|
---|
305 |
|
---|
306 | return VINF_SUCCESS;
|
---|
307 | }
|
---|
308 |
|
---|
309 |
|
---|
310 | /**
|
---|
311 | * Initializes remaining bits of the KVM provider.
|
---|
312 | *
|
---|
313 | * This is called after initializing HM and almost all other VMM components.
|
---|
314 | *
|
---|
315 | * @returns VBox status code.
|
---|
316 | * @param pVM The cross context VM structure.
|
---|
317 | */
|
---|
318 | VMMR3_INT_DECL(int) gimR3KvmInitCompleted(PVM pVM)
|
---|
319 | {
|
---|
320 | PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
|
---|
321 | pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM);
|
---|
322 |
|
---|
323 | if (TMR3CpuTickIsFixedRateMonotonic(pVM, true /* fWithParavirtEnabled */))
|
---|
324 | {
|
---|
325 | /** @todo We might want to consider just enabling this bit *always*. As far
|
---|
326 | * as I can see in the Linux guest, the "TSC_STABLE" bit is only
|
---|
327 | * translated as a "monotonic" bit which even in Async systems we
|
---|
328 | * -should- be reporting a strictly monotonic TSC to the guest. */
|
---|
329 | pKvm->uBaseFeat |= GIM_KVM_BASE_FEAT_TSC_STABLE;
|
---|
330 |
|
---|
331 | CPUMCPUIDLEAF HyperLeaf;
|
---|
332 | RT_ZERO(HyperLeaf);
|
---|
333 | HyperLeaf.uLeaf = UINT32_C(0x40000001);
|
---|
334 | HyperLeaf.uEax = pKvm->uBaseFeat;
|
---|
335 | HyperLeaf.uEbx = 0;
|
---|
336 | HyperLeaf.uEcx = 0;
|
---|
337 | HyperLeaf.uEdx = 0;
|
---|
338 | int rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
|
---|
339 | AssertLogRelRCReturn(rc, rc);
|
---|
340 | }
|
---|
341 | return VINF_SUCCESS;
|
---|
342 | }
|
---|
343 |
|
---|
344 |
|
---|
345 | /**
|
---|
346 | * Terminates the KVM GIM provider.
|
---|
347 | *
|
---|
348 | * @returns VBox status code.
|
---|
349 | * @param pVM The cross context VM structure.
|
---|
350 | */
|
---|
351 | VMMR3_INT_DECL(int) gimR3KvmTerm(PVM pVM)
|
---|
352 | {
|
---|
353 | gimR3KvmReset(pVM);
|
---|
354 | return VINF_SUCCESS;
|
---|
355 | }
|
---|
356 |
|
---|
357 |
|
---|
358 | /**
|
---|
359 | * This resets KVM provider MSRs and unmaps whatever KVM regions that
|
---|
360 | * the guest may have mapped.
|
---|
361 | *
|
---|
362 | * This is called when the VM is being reset.
|
---|
363 | *
|
---|
364 | * @param pVM The cross context VM structure.
|
---|
365 | * @thread EMT(0)
|
---|
366 | */
|
---|
367 | VMMR3_INT_DECL(void) gimR3KvmReset(PVM pVM)
|
---|
368 | {
|
---|
369 | VM_ASSERT_EMT0(pVM);
|
---|
370 | LogRel(("GIM: KVM: Resetting MSRs\n"));
|
---|
371 |
|
---|
372 | /*
|
---|
373 | * Reset MSRs.
|
---|
374 | */
|
---|
375 | PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
|
---|
376 | pKvm->u64WallClockMsr = 0;
|
---|
377 | for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
|
---|
378 | {
|
---|
379 | PGIMKVMCPU pKvmCpu = &pVM->apCpusR3[idCpu]->gim.s.u.KvmCpu;
|
---|
380 | pKvmCpu->u64SystemTimeMsr = 0;
|
---|
381 | pKvmCpu->u32SystemTimeVersion = 0;
|
---|
382 | pKvmCpu->fSystemTimeFlags = 0;
|
---|
383 | pKvmCpu->GCPhysSystemTime = 0;
|
---|
384 | pKvmCpu->uTsc = 0;
|
---|
385 | pKvmCpu->uVirtNanoTS = 0;
|
---|
386 | }
|
---|
387 | }
|
---|
388 |
|
---|
389 |
|
---|
390 | /**
|
---|
391 | * KVM state-save operation.
|
---|
392 | *
|
---|
393 | * @returns VBox status code.
|
---|
394 | * @param pVM The cross context VM structure.
|
---|
395 | * @param pSSM The saved state handle.
|
---|
396 | */
|
---|
397 | VMMR3_INT_DECL(int) gimR3KvmSave(PVM pVM, PSSMHANDLE pSSM)
|
---|
398 | {
|
---|
399 | PCGIMKVM pKvm = &pVM->gim.s.u.Kvm;
|
---|
400 |
|
---|
401 | /*
|
---|
402 | * Save the KVM SSM version.
|
---|
403 | */
|
---|
404 | SSMR3PutU32(pSSM, GIM_KVM_SAVED_STATE_VERSION);
|
---|
405 |
|
---|
406 | /*
|
---|
407 | * Save per-VCPU data.
|
---|
408 | */
|
---|
409 | for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
|
---|
410 | {
|
---|
411 | PCGIMKVMCPU pKvmCpu = &pVM->apCpusR3[idCpu]->gim.s.u.KvmCpu;
|
---|
412 | SSMR3PutU64(pSSM, pKvmCpu->u64SystemTimeMsr);
|
---|
413 | SSMR3PutU64(pSSM, pKvmCpu->uTsc);
|
---|
414 | SSMR3PutU64(pSSM, pKvmCpu->uVirtNanoTS);
|
---|
415 | SSMR3PutGCPhys(pSSM, pKvmCpu->GCPhysSystemTime);
|
---|
416 | SSMR3PutU32(pSSM, pKvmCpu->u32SystemTimeVersion);
|
---|
417 | SSMR3PutU8(pSSM, pKvmCpu->fSystemTimeFlags);
|
---|
418 | }
|
---|
419 |
|
---|
420 | /*
|
---|
421 | * Save per-VM data.
|
---|
422 | */
|
---|
423 | SSMR3PutU64(pSSM, pKvm->u64WallClockMsr);
|
---|
424 | return SSMR3PutU32(pSSM, pKvm->uBaseFeat);
|
---|
425 | }
|
---|
426 |
|
---|
427 |
|
---|
428 | /**
|
---|
429 | * KVM state-load operation, final pass.
|
---|
430 | *
|
---|
431 | * @returns VBox status code.
|
---|
432 | * @param pVM The cross context VM structure.
|
---|
433 | * @param pSSM The saved state handle.
|
---|
434 | */
|
---|
435 | VMMR3_INT_DECL(int) gimR3KvmLoad(PVM pVM, PSSMHANDLE pSSM)
|
---|
436 | {
|
---|
437 | /*
|
---|
438 | * Load the KVM SSM version first.
|
---|
439 | */
|
---|
440 | uint32_t uKvmSavedStatVersion;
|
---|
441 | int rc = SSMR3GetU32(pSSM, &uKvmSavedStatVersion);
|
---|
442 | AssertRCReturn(rc, rc);
|
---|
443 | if (uKvmSavedStatVersion != GIM_KVM_SAVED_STATE_VERSION)
|
---|
444 | return SSMR3SetLoadError(pSSM, VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION, RT_SRC_POS,
|
---|
445 | N_("Unsupported KVM saved-state version %u (expected %u)."),
|
---|
446 | uKvmSavedStatVersion, GIM_KVM_SAVED_STATE_VERSION);
|
---|
447 |
|
---|
448 | /*
|
---|
449 | * Update the TSC frequency from TM.
|
---|
450 | */
|
---|
451 | PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
|
---|
452 | pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM);
|
---|
453 |
|
---|
454 | /*
|
---|
455 | * Load per-VCPU data.
|
---|
456 | */
|
---|
457 | for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
|
---|
458 | {
|
---|
459 | PVMCPU pVCpu = pVM->apCpusR3[idCpu];
|
---|
460 | PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
|
---|
461 |
|
---|
462 | SSMR3GetU64(pSSM, &pKvmCpu->u64SystemTimeMsr);
|
---|
463 | SSMR3GetU64(pSSM, &pKvmCpu->uTsc);
|
---|
464 | SSMR3GetU64(pSSM, &pKvmCpu->uVirtNanoTS);
|
---|
465 | SSMR3GetGCPhys(pSSM, &pKvmCpu->GCPhysSystemTime);
|
---|
466 | SSMR3GetU32(pSSM, &pKvmCpu->u32SystemTimeVersion);
|
---|
467 | rc = SSMR3GetU8(pSSM, &pKvmCpu->fSystemTimeFlags);
|
---|
468 | AssertRCReturn(rc, rc);
|
---|
469 |
|
---|
470 | /* Enable the system-time struct. if necessary. */
|
---|
471 | /** @todo update guest struct only if cTscTicksPerSecond doesn't match host
|
---|
472 | * anymore. */
|
---|
473 | if (MSR_GIM_KVM_SYSTEM_TIME_IS_ENABLED(pKvmCpu->u64SystemTimeMsr))
|
---|
474 | {
|
---|
475 | Assert(!TMVirtualIsTicking(pVM)); /* paranoia. */
|
---|
476 | Assert(!TMCpuTickIsTicking(pVCpu));
|
---|
477 | gimR3KvmUpdateSystemTime(pVM, pVCpu);
|
---|
478 | }
|
---|
479 | }
|
---|
480 |
|
---|
481 | /*
|
---|
482 | * Load per-VM data.
|
---|
483 | */
|
---|
484 | SSMR3GetU64(pSSM, &pKvm->u64WallClockMsr);
|
---|
485 | rc = SSMR3GetU32(pSSM, &pKvm->uBaseFeat);
|
---|
486 | AssertRCReturn(rc, rc);
|
---|
487 |
|
---|
488 | return VINF_SUCCESS;
|
---|
489 | }
|
---|
490 |
|
---|
491 |
|
---|
492 | /**
|
---|
493 | * Disables the KVM system-time struct.
|
---|
494 | *
|
---|
495 | * @returns VBox status code.
|
---|
496 | * @param pVM The cross context VM structure.
|
---|
497 | */
|
---|
498 | VMMR3_INT_DECL(int) gimR3KvmDisableSystemTime(PVM pVM)
|
---|
499 | {
|
---|
500 | TMR3CpuTickParavirtDisable(pVM);
|
---|
501 | return VINF_SUCCESS;
|
---|
502 | }
|
---|
503 |
|
---|
504 |
|
---|
505 | /**
|
---|
506 | * @callback_method_impl{PFNVMMEMTRENDEZVOUS,
|
---|
507 | * Worker for gimR3KvmEnableWallClock}
|
---|
508 | */
|
---|
509 | static DECLCALLBACK(VBOXSTRICTRC) gimR3KvmEnableWallClockCallback(PVM pVM, PVMCPU pVCpu, void *pvUser)
|
---|
510 | {
|
---|
511 | PKVMWALLCLOCKINFO pWallClockInfo = (PKVMWALLCLOCKINFO)pvUser; AssertPtr(pWallClockInfo);
|
---|
512 | RTGCPHYS GCPhysWallClock = pWallClockInfo->GCPhysWallClock;
|
---|
513 | RT_NOREF1(pVCpu);
|
---|
514 |
|
---|
515 | /*
|
---|
516 | * Read the wall-clock version (sequence) from the guest.
|
---|
517 | */
|
---|
518 | uint32_t uVersion;
|
---|
519 | Assert(PGMPhysIsGCPhysNormal(pVM, GCPhysWallClock));
|
---|
520 | int rc = PGMPhysSimpleReadGCPhys(pVM, &uVersion, GCPhysWallClock, sizeof(uVersion));
|
---|
521 | if (RT_FAILURE(rc))
|
---|
522 | {
|
---|
523 | LogRel(("GIM: KVM: Failed to read wall-clock struct. version at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
|
---|
524 | return rc;
|
---|
525 | }
|
---|
526 |
|
---|
527 | /*
|
---|
528 | * Ensure the version is incrementally even.
|
---|
529 | */
|
---|
530 | /* faster: uVersion = (uVersion | 1) + 1; */
|
---|
531 | if (!(uVersion & 1))
|
---|
532 | ++uVersion;
|
---|
533 | ++uVersion;
|
---|
534 |
|
---|
535 | /*
|
---|
536 | * Update wall-clock guest struct. with UTC information.
|
---|
537 | */
|
---|
538 | RTTIMESPEC TimeSpec;
|
---|
539 | int32_t iSec;
|
---|
540 | int32_t iNano;
|
---|
541 | TMR3UtcNow(pVM, &TimeSpec);
|
---|
542 | RTTimeSpecGetSecondsAndNano(&TimeSpec, &iSec, &iNano);
|
---|
543 |
|
---|
544 | GIMKVMWALLCLOCK WallClock;
|
---|
545 | RT_ZERO(WallClock);
|
---|
546 | AssertCompile(sizeof(uVersion) == sizeof(WallClock.u32Version));
|
---|
547 | WallClock.u32Version = uVersion;
|
---|
548 | WallClock.u32Sec = iSec;
|
---|
549 | WallClock.u32Nano = iNano;
|
---|
550 |
|
---|
551 | /*
|
---|
552 | * Write out the wall-clock struct. to guest memory.
|
---|
553 | */
|
---|
554 | Assert(!(WallClock.u32Version & 1));
|
---|
555 | rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysWallClock, &WallClock, sizeof(GIMKVMWALLCLOCK));
|
---|
556 | if (RT_SUCCESS(rc))
|
---|
557 | LogRel(("GIM: KVM: Enabled wall-clock struct. at %#RGp - u32Sec=%u u32Nano=%u uVersion=%#RU32\n", GCPhysWallClock,
|
---|
558 | WallClock.u32Sec, WallClock.u32Nano, WallClock.u32Version));
|
---|
559 | else
|
---|
560 | LogRel(("GIM: KVM: Failed to write wall-clock struct. at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
|
---|
561 | return rc;
|
---|
562 | }
|
---|
563 |
|
---|
564 |
|
---|
565 | /**
|
---|
566 | * Enables the KVM wall-clock structure.
|
---|
567 | *
|
---|
568 | * Since the wall-clock can be read by any VCPU but it is a global struct. in
|
---|
569 | * guest-memory, we do an EMT rendezvous here to be on the safe side. The
|
---|
570 | * alternative is to use an MMIO2 region and use the WallClock.u32Version field
|
---|
571 | * for transactional update. However, this MSR is rarely written to (typically
|
---|
572 | * once during bootup) it's currently not a performance issue especially since
|
---|
573 | * we're already in ring-3. If we really wanted better performance in this code
|
---|
574 | * path, we should be doing it in ring-0 with transactional update while make
|
---|
575 | * sure there is only 1 writer as well.
|
---|
576 | *
|
---|
577 | * @returns VBox status code.
|
---|
578 | * @param pVM The cross context VM structure.
|
---|
579 | * @param GCPhysWallClock Where the guest wall-clock structure is located.
|
---|
580 | *
|
---|
581 | * @remarks Don't do any release assertions here, these can be triggered by
|
---|
582 | * guest R0 code.
|
---|
583 | */
|
---|
584 | VMMR3_INT_DECL(int) gimR3KvmEnableWallClock(PVM pVM, RTGCPHYS GCPhysWallClock)
|
---|
585 | {
|
---|
586 | KVMWALLCLOCKINFO WallClockInfo;
|
---|
587 | WallClockInfo.GCPhysWallClock = GCPhysWallClock;
|
---|
588 | return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, gimR3KvmEnableWallClockCallback, &WallClockInfo);
|
---|
589 | }
|
---|
590 |
|
---|
591 |
|
---|
592 | /**
|
---|
593 | * Enables the KVM system time structure.
|
---|
594 | *
|
---|
595 | * This can be done concurrently because the guest memory being updated is per-VCPU
|
---|
596 | * and the struct even has a "version" field which needs to be incremented
|
---|
597 | * before/after altering guest memory to allow concurrent updates from the host.
|
---|
598 | * Hence this is not being done in an EMT rendezvous. It -is- done in ring-3 since
|
---|
599 | * we call into ring-3 only TM code in the end.
|
---|
600 | *
|
---|
601 | * @returns VBox status code.
|
---|
602 | * @param pVM The cross context VM structure.
|
---|
603 | * @param pVCpu The cross context virtual CPU structure.
|
---|
604 | * @param uMsrSystemTime The system time MSR value being written.
|
---|
605 | */
|
---|
606 | VMMR3_INT_DECL(int) gimR3KvmEnableSystemTime(PVMCC pVM, PVMCPUCC pVCpu, uint64_t uMsrSystemTime)
|
---|
607 | {
|
---|
608 | Assert(uMsrSystemTime & MSR_GIM_KVM_SYSTEM_TIME_ENABLE_BIT);
|
---|
609 | PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
|
---|
610 | PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
|
---|
611 |
|
---|
612 | /*
|
---|
613 | * Update the system-time struct.
|
---|
614 | * The system-time structs are usually placed at a different guest address for each VCPU.
|
---|
615 | */
|
---|
616 | pKvmCpu->uTsc = TMCpuTickGetNoCheck(pVCpu);
|
---|
617 | pKvmCpu->uVirtNanoTS = ASMMultU64ByU32DivByU32(pKvmCpu->uTsc, RT_NS_1SEC, pKvm->cTscTicksPerSecond);
|
---|
618 | pKvmCpu->u64SystemTimeMsr = uMsrSystemTime;
|
---|
619 | pKvmCpu->GCPhysSystemTime = MSR_GIM_KVM_SYSTEM_TIME_GUEST_GPA(uMsrSystemTime);
|
---|
620 |
|
---|
621 | int rc = gimR3KvmUpdateSystemTime(pVM, pVCpu);
|
---|
622 | if (RT_FAILURE(rc))
|
---|
623 | {
|
---|
624 | pKvmCpu->u64SystemTimeMsr = 0;
|
---|
625 | /* We shouldn't throw a #GP(0) here for buggy guests (neither does KVM apparently), see @bugref{8627}. */
|
---|
626 | }
|
---|
627 |
|
---|
628 | return rc;
|
---|
629 | }
|
---|
630 |
|
---|