VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/FTM.cpp@ 58122

Last change on this file since 58122 was 58122, checked in by vboxsync, 9 years ago

VMM: Made @param pVM more uniform and to the point.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 42.7 KB
Line 
1/* $Id: FTM.cpp 58122 2015-10-08 17:11:58Z vboxsync $ */
2/** @file
3 * FTM - Fault Tolerance Manager
4 */
5
6/*
7 * Copyright (C) 2010-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_FTM
23#include <VBox/vmm/ftm.h>
24#include <VBox/vmm/em.h>
25#include <VBox/vmm/pdm.h>
26#include <VBox/vmm/pgm.h>
27#include <VBox/vmm/ssm.h>
28#include <VBox/vmm/vmm.h>
29#include "FTMInternal.h"
30#include <VBox/vmm/vm.h>
31#include <VBox/vmm/uvm.h>
32#include <VBox/err.h>
33#include <VBox/param.h>
34#include <VBox/log.h>
35
36#include <iprt/assert.h>
37#include <iprt/thread.h>
38#include <iprt/string.h>
39#include <iprt/mem.h>
40#include <iprt/tcp.h>
41#include <iprt/socket.h>
42#include <iprt/semaphore.h>
43#include <iprt/asm.h>
44
45#include "internal/pgm.h"
46
47
48/*******************************************************************************
49 * Structures and Typedefs *
50 *******************************************************************************/
51
52/**
53 * TCP stream header.
54 *
55 * This is an extra layer for fixing the problem with figuring out when the SSM
56 * stream ends.
57 */
58typedef struct FTMTCPHDR
59{
60 /** Magic value. */
61 uint32_t u32Magic;
62 /** The size of the data block following this header.
63 * 0 indicates the end of the stream, while UINT32_MAX indicates
64 * cancelation. */
65 uint32_t cb;
66} FTMTCPHDR;
67/** Magic value for FTMTCPHDR::u32Magic. (Egberto Gismonti Amin) */
68#define FTMTCPHDR_MAGIC UINT32_C(0x19471205)
69/** The max block size. */
70#define FTMTCPHDR_MAX_SIZE UINT32_C(0x00fffff8)
71
72/**
73 * TCP stream header.
74 *
75 * This is an extra layer for fixing the problem with figuring out when the SSM
76 * stream ends.
77 */
78typedef struct FTMTCPHDRMEM
79{
80 /** Magic value. */
81 uint32_t u32Magic;
82 /** Size (Uncompressed) of the pages following the header. */
83 uint32_t cbPageRange;
84 /** GC Physical address of the page(s) to sync. */
85 RTGCPHYS GCPhys;
86 /** The size of the data block following this header.
87 * 0 indicates the end of the stream, while UINT32_MAX indicates
88 * cancelation. */
89 uint32_t cb;
90} FTMTCPHDRMEM;
91
92
93/*********************************************************************************************************************************
94* Global Variables *
95*********************************************************************************************************************************/
96static const char g_szWelcome[] = "VirtualBox-Fault-Tolerance-Sync-1.0\n";
97
98static DECLCALLBACK(int) ftmR3PageTreeDestroyCallback(PAVLGCPHYSNODECORE pBaseNode, void *pvUser);
99
100/**
101 * Initializes the FTM.
102 *
103 * @returns VBox status code.
104 * @param pVM The cross context VM structure.
105 */
106VMMR3_INT_DECL(int) FTMR3Init(PVM pVM)
107{
108 /*
109 * Assert alignment and sizes.
110 */
111 AssertCompile(sizeof(pVM->ftm.s) <= sizeof(pVM->ftm.padding));
112 AssertCompileMemberAlignment(FTM, CritSect, sizeof(uintptr_t));
113
114 /** @todo saved state for master nodes! */
115 pVM->ftm.s.pszAddress = NULL;
116 pVM->ftm.s.pszPassword = NULL;
117 pVM->fFaultTolerantMaster = false;
118 pVM->ftm.s.fIsStandbyNode = false;
119 pVM->ftm.s.standby.hServer = NIL_RTTCPSERVER;
120 pVM->ftm.s.hShutdownEvent = NIL_RTSEMEVENT;
121 pVM->ftm.s.hSocket = NIL_RTSOCKET;
122
123 /*
124 * Initialize the PGM critical section.
125 */
126 int rc = PDMR3CritSectInit(pVM, &pVM->ftm.s.CritSect, RT_SRC_POS, "FTM");
127 AssertRCReturn(rc, rc);
128
129 /*
130 * Register statistics.
131 */
132 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedMem, STAMTYPE_COUNTER, "/FT/Received/Mem", STAMUNIT_BYTES, "The amount of memory pages that was received.");
133 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedState, STAMTYPE_COUNTER, "/FT/Received/State", STAMUNIT_BYTES, "The amount of state information that was received.");
134 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentMem, STAMTYPE_COUNTER, "/FT/Sent/Mem", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
135 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentState, STAMTYPE_COUNTER, "/FT/Sent/State", STAMUNIT_BYTES, "The amount of state information that was sent.");
136 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaVM, STAMTYPE_COUNTER, "/FT/Sync/DeltaVM", STAMUNIT_OCCURENCES, "Number of delta vm syncs.");
137 STAM_REL_REG(pVM, &pVM->ftm.s.StatFullSync, STAMTYPE_COUNTER, "/FT/Sync/Full", STAMUNIT_OCCURENCES, "Number of full vm syncs.");
138 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaMem, STAMTYPE_COUNTER, "/FT/Sync/DeltaMem", STAMUNIT_OCCURENCES, "Number of delta mem syncs.");
139 STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointStorage, STAMTYPE_COUNTER, "/FT/Checkpoint/Storage", STAMUNIT_OCCURENCES, "Number of storage checkpoints.");
140 STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointNetwork, STAMTYPE_COUNTER, "/FT/Checkpoint/Network", STAMUNIT_OCCURENCES, "Number of network checkpoints.");
141#ifdef VBOX_WITH_STATISTICS
142 STAM_REG(pVM, &pVM->ftm.s.StatCheckpoint, STAMTYPE_PROFILE, "/FT/Checkpoint", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint.");
143 STAM_REG(pVM, &pVM->ftm.s.StatCheckpointPause, STAMTYPE_PROFILE, "/FT/Checkpoint/Pause", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint.");
144 STAM_REG(pVM, &pVM->ftm.s.StatCheckpointResume, STAMTYPE_PROFILE, "/FT/Checkpoint/Resume", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint.");
145 STAM_REG(pVM, &pVM->ftm.s.StatSentMemRAM, STAMTYPE_COUNTER, "/FT/Sent/Mem/RAM", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
146 STAM_REG(pVM, &pVM->ftm.s.StatSentMemMMIO2, STAMTYPE_COUNTER, "/FT/Sent/Mem/MMIO2", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
147 STAM_REG(pVM, &pVM->ftm.s.StatSentMemShwROM, STAMTYPE_COUNTER, "/FT/Sent/Mem/ShwROM", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
148 STAM_REG(pVM, &pVM->ftm.s.StatSentStateWrite, STAMTYPE_COUNTER, "/FT/Sent/State/Writes", STAMUNIT_BYTES, "The nr of write calls.");
149#endif
150 return VINF_SUCCESS;
151}
152
153/**
154 * Terminates the FTM.
155 *
156 * Termination means cleaning up and freeing all resources,
157 * the VM itself is at this point powered off or suspended.
158 *
159 * @returns VBox status code.
160 * @param pVM The cross context VM structure.
161 */
162VMMR3_INT_DECL(int) FTMR3Term(PVM pVM)
163{
164 if (pVM->ftm.s.hShutdownEvent != NIL_RTSEMEVENT)
165 {
166 RTSemEventDestroy(pVM->ftm.s.hShutdownEvent);
167 pVM->ftm.s.hShutdownEvent = NIL_RTSEMEVENT;
168 }
169 if (pVM->ftm.s.hSocket != NIL_RTSOCKET)
170 {
171 RTTcpClientClose(pVM->ftm.s.hSocket);
172 pVM->ftm.s.hSocket = NIL_RTSOCKET;
173 }
174 if (pVM->ftm.s.standby.hServer)
175 {
176 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
177 pVM->ftm.s.standby.hServer = NULL;
178 }
179 if (pVM->ftm.s.pszAddress)
180 RTMemFree(pVM->ftm.s.pszAddress);
181 if (pVM->ftm.s.pszPassword)
182 RTMemFree(pVM->ftm.s.pszPassword);
183
184 /* Remove all pending memory updates. */
185 if (pVM->ftm.s.standby.pPhysPageTree)
186 {
187 RTAvlGCPhysDestroy(&pVM->ftm.s.standby.pPhysPageTree, ftmR3PageTreeDestroyCallback, NULL);
188 pVM->ftm.s.standby.pPhysPageTree = NULL;
189 }
190
191 pVM->ftm.s.pszAddress = NULL;
192 pVM->ftm.s.pszPassword = NULL;
193
194 PDMR3CritSectDelete(&pVM->ftm.s.CritSect);
195 return VINF_SUCCESS;
196}
197
198
199static int ftmR3TcpWriteACK(PVM pVM)
200{
201 int rc = RTTcpWrite(pVM->ftm.s.hSocket, RT_STR_TUPLE("ACK\n"));
202 if (RT_FAILURE(rc))
203 {
204 LogRel(("FTSync: RTTcpWrite(,ACK,) -> %Rrc\n", rc));
205 }
206 return rc;
207}
208
209
210static int ftmR3TcpWriteNACK(PVM pVM, int32_t rc2, const char *pszMsgText = NULL)
211{
212 char szMsg[256];
213 size_t cch;
214 if (pszMsgText && *pszMsgText)
215 {
216 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d;%s\n", rc2, pszMsgText);
217 for (size_t off = 6; off + 1 < cch; off++)
218 if (szMsg[off] == '\n')
219 szMsg[off] = '\r';
220 }
221 else
222 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d\n", rc2);
223 int rc = RTTcpWrite(pVM->ftm.s.hSocket, szMsg, cch);
224 if (RT_FAILURE(rc))
225 LogRel(("FTSync: RTTcpWrite(,%s,%zu) -> %Rrc\n", szMsg, cch, rc));
226 return rc;
227}
228
229/**
230 * Reads a string from the socket.
231 *
232 * @returns VBox status code.
233 *
234 * @param pState The teleporter state structure.
235 * @param pszBuf The output buffer.
236 * @param cchBuf The size of the output buffer.
237 *
238 */
239static int ftmR3TcpReadLine(PVM pVM, char *pszBuf, size_t cchBuf)
240{
241 char *pszStart = pszBuf;
242 RTSOCKET Sock = pVM->ftm.s.hSocket;
243
244 AssertReturn(cchBuf > 1, VERR_INTERNAL_ERROR);
245 *pszBuf = '\0';
246
247 /* dead simple approach. */
248 for (;;)
249 {
250 char ch;
251 int rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
252 if (RT_FAILURE(rc))
253 {
254 LogRel(("FTSync: RTTcpRead -> %Rrc while reading string ('%s')\n", rc, pszStart));
255 return rc;
256 }
257 if ( ch == '\n'
258 || ch == '\0')
259 return VINF_SUCCESS;
260 if (cchBuf <= 1)
261 {
262 LogRel(("FTSync: String buffer overflow: '%s'\n", pszStart));
263 return VERR_BUFFER_OVERFLOW;
264 }
265 *pszBuf++ = ch;
266 *pszBuf = '\0';
267 cchBuf--;
268 }
269}
270
271/**
272 * Reads an ACK or NACK.
273 *
274 * @returns VBox status code.
275 * @param pVM The cross context VM structure.
276 * @param pszWhich Which ACK is this this?
277 * @param pszNAckMsg Optional NACK message.
278 */
279static int ftmR3TcpReadACK(PVM pVM, const char *pszWhich, const char *pszNAckMsg = NULL)
280{
281 char szMsg[256];
282 int rc = ftmR3TcpReadLine(pVM, szMsg, sizeof(szMsg));
283 if (RT_FAILURE(rc))
284 return rc;
285
286 if (!strcmp(szMsg, "ACK"))
287 return VINF_SUCCESS;
288
289 if (!strncmp(szMsg, RT_STR_TUPLE("NACK=")))
290 {
291 char *pszMsgText = strchr(szMsg, ';');
292 if (pszMsgText)
293 *pszMsgText++ = '\0';
294
295 int32_t vrc2;
296 rc = RTStrToInt32Full(&szMsg[sizeof("NACK=") - 1], 10, &vrc2);
297 if (rc == VINF_SUCCESS)
298 {
299 /*
300 * Well formed NACK, transform it into an error.
301 */
302 if (pszNAckMsg)
303 {
304 LogRel(("FTSync: %s: NACK=%Rrc (%d)\n", pszWhich, vrc2, vrc2));
305 return VERR_INTERNAL_ERROR;
306 }
307
308 if (pszMsgText)
309 {
310 pszMsgText = RTStrStrip(pszMsgText);
311 for (size_t off = 0; pszMsgText[off]; off++)
312 if (pszMsgText[off] == '\r')
313 pszMsgText[off] = '\n';
314
315 LogRel(("FTSync: %s: NACK=%Rrc (%d) - '%s'\n", pszWhich, vrc2, vrc2, pszMsgText));
316 }
317 return VERR_INTERNAL_ERROR_2;
318 }
319
320 if (pszMsgText)
321 pszMsgText[-1] = ';';
322 }
323 return VERR_INTERNAL_ERROR_3;
324}
325
326/**
327 * Submitts a command to the destination and waits for the ACK.
328 *
329 * @returns VBox status code.
330 *
331 * @param pVM The cross context VM structure.
332 * @param pszCommand The command.
333 * @param fWaitForAck Whether to wait for the ACK.
334 */
335static int ftmR3TcpSubmitCommand(PVM pVM, const char *pszCommand, bool fWaitForAck = true)
336{
337 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, pszCommand, strlen(pszCommand), RT_STR_TUPLE("\n"));
338 if (RT_FAILURE(rc))
339 return rc;
340 if (!fWaitForAck)
341 return VINF_SUCCESS;
342 return ftmR3TcpReadACK(pVM, pszCommand);
343}
344
345/**
346 * @copydoc SSMSTRMOPS::pfnWrite
347 */
348static DECLCALLBACK(int) ftmR3TcpOpWrite(void *pvUser, uint64_t offStream, const void *pvBuf, size_t cbToWrite)
349{
350 PVM pVM = (PVM)pvUser;
351 NOREF(offStream);
352
353 AssertReturn(cbToWrite > 0, VINF_SUCCESS);
354 AssertReturn(cbToWrite < UINT32_MAX, VERR_OUT_OF_RANGE);
355 AssertReturn(pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
356
357 STAM_COUNTER_INC(&pVM->ftm.s.StatSentStateWrite);
358 for (;;)
359 {
360 FTMTCPHDR Hdr;
361 Hdr.u32Magic = FTMTCPHDR_MAGIC;
362 Hdr.cb = RT_MIN((uint32_t)cbToWrite, FTMTCPHDR_MAX_SIZE);
363 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pvBuf, (size_t)Hdr.cb);
364 if (RT_FAILURE(rc))
365 {
366 LogRel(("FTSync/TCP: Write error: %Rrc (cb=%#x)\n", rc, Hdr.cb));
367 return rc;
368 }
369 pVM->ftm.s.StatSentState.c += Hdr.cb + sizeof(Hdr);
370 pVM->ftm.s.syncstate.uOffStream += Hdr.cb;
371 if (Hdr.cb == cbToWrite)
372 return VINF_SUCCESS;
373
374 /* advance */
375 cbToWrite -= Hdr.cb;
376 pvBuf = (uint8_t const *)pvBuf + Hdr.cb;
377 }
378}
379
380
381/**
382 * Selects and poll for close condition.
383 *
384 * We can use a relatively high poll timeout here since it's only used to get
385 * us out of error paths. In the normal cause of events, we'll get a
386 * end-of-stream header.
387 *
388 * @returns VBox status code.
389 *
390 * @param pState The teleporter state data.
391 */
392static int ftmR3TcpReadSelect(PVM pVM)
393{
394 int rc;
395 do
396 {
397 rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 1000);
398 if (RT_FAILURE(rc) && rc != VERR_TIMEOUT)
399 {
400 pVM->ftm.s.syncstate.fIOError = true;
401 LogRel(("FTSync/TCP: Header select error: %Rrc\n", rc));
402 break;
403 }
404 if (pVM->ftm.s.syncstate.fStopReading)
405 {
406 rc = VERR_EOF;
407 break;
408 }
409 } while (rc == VERR_TIMEOUT);
410 return rc;
411}
412
413
414/**
415 * @copydoc SSMSTRMOPS::pfnRead
416 */
417static DECLCALLBACK(int) ftmR3TcpOpRead(void *pvUser, uint64_t offStream, void *pvBuf, size_t cbToRead, size_t *pcbRead)
418{
419 PVM pVM = (PVM)pvUser;
420 AssertReturn(!pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
421 NOREF(offStream);
422
423 for (;;)
424 {
425 int rc;
426
427 /*
428 * Check for various conditions and may have been signalled.
429 */
430 if (pVM->ftm.s.syncstate.fEndOfStream)
431 return VERR_EOF;
432 if (pVM->ftm.s.syncstate.fStopReading)
433 return VERR_EOF;
434 if (pVM->ftm.s.syncstate.fIOError)
435 return VERR_IO_GEN_FAILURE;
436
437 /*
438 * If there is no more data in the current block, read the next
439 * block header.
440 */
441 if (!pVM->ftm.s.syncstate.cbReadBlock)
442 {
443 rc = ftmR3TcpReadSelect(pVM);
444 if (RT_FAILURE(rc))
445 return rc;
446 FTMTCPHDR Hdr;
447 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
448 if (RT_FAILURE(rc))
449 {
450 pVM->ftm.s.syncstate.fIOError = true;
451 LogRel(("FTSync/TCP: Header read error: %Rrc\n", rc));
452 return rc;
453 }
454 pVM->ftm.s.StatReceivedState.c += sizeof(Hdr);
455
456 if (RT_UNLIKELY( Hdr.u32Magic != FTMTCPHDR_MAGIC
457 || Hdr.cb > FTMTCPHDR_MAX_SIZE
458 || Hdr.cb == 0))
459 {
460 if ( Hdr.u32Magic == FTMTCPHDR_MAGIC
461 && ( Hdr.cb == 0
462 || Hdr.cb == UINT32_MAX)
463 )
464 {
465 pVM->ftm.s.syncstate.fEndOfStream = true;
466 pVM->ftm.s.syncstate.cbReadBlock = 0;
467 return Hdr.cb ? VERR_SSM_CANCELLED : VERR_EOF;
468 }
469 pVM->ftm.s.syncstate.fIOError = true;
470 LogRel(("FTSync/TCP: Invalid block: u32Magic=%#x cb=%#x\n", Hdr.u32Magic, Hdr.cb));
471 return VERR_IO_GEN_FAILURE;
472 }
473
474 pVM->ftm.s.syncstate.cbReadBlock = Hdr.cb;
475 if (pVM->ftm.s.syncstate.fStopReading)
476 return VERR_EOF;
477 }
478
479 /*
480 * Read more data.
481 */
482 rc = ftmR3TcpReadSelect(pVM);
483 if (RT_FAILURE(rc))
484 return rc;
485
486 uint32_t cb = (uint32_t)RT_MIN(pVM->ftm.s.syncstate.cbReadBlock, cbToRead);
487 rc = RTTcpRead(pVM->ftm.s.hSocket, pvBuf, cb, pcbRead);
488 if (RT_FAILURE(rc))
489 {
490 pVM->ftm.s.syncstate.fIOError = true;
491 LogRel(("FTSync/TCP: Data read error: %Rrc (cb=%#x)\n", rc, cb));
492 return rc;
493 }
494 if (pcbRead)
495 {
496 cb = (uint32_t)*pcbRead;
497 pVM->ftm.s.StatReceivedState.c += cb;
498 pVM->ftm.s.syncstate.uOffStream += cb;
499 pVM->ftm.s.syncstate.cbReadBlock -= cb;
500 return VINF_SUCCESS;
501 }
502 pVM->ftm.s.StatReceivedState.c += cb;
503 pVM->ftm.s.syncstate.uOffStream += cb;
504 pVM->ftm.s.syncstate.cbReadBlock -= cb;
505 if (cbToRead == cb)
506 return VINF_SUCCESS;
507
508 /* Advance to the next block. */
509 cbToRead -= cb;
510 pvBuf = (uint8_t *)pvBuf + cb;
511 }
512}
513
514
515/**
516 * @copydoc SSMSTRMOPS::pfnSeek
517 */
518static DECLCALLBACK(int) ftmR3TcpOpSeek(void *pvUser, int64_t offSeek, unsigned uMethod, uint64_t *poffActual)
519{
520 NOREF(pvUser); NOREF(offSeek); NOREF(uMethod); NOREF(poffActual);
521 return VERR_NOT_SUPPORTED;
522}
523
524
525/**
526 * @copydoc SSMSTRMOPS::pfnTell
527 */
528static DECLCALLBACK(uint64_t) ftmR3TcpOpTell(void *pvUser)
529{
530 PVM pVM = (PVM)pvUser;
531 return pVM->ftm.s.syncstate.uOffStream;
532}
533
534
535/**
536 * @copydoc SSMSTRMOPS::pfnSize
537 */
538static DECLCALLBACK(int) ftmR3TcpOpSize(void *pvUser, uint64_t *pcb)
539{
540 NOREF(pvUser); NOREF(pcb);
541 return VERR_NOT_SUPPORTED;
542}
543
544
545/**
546 * @copydoc SSMSTRMOPS::pfnIsOk
547 */
548static DECLCALLBACK(int) ftmR3TcpOpIsOk(void *pvUser)
549{
550 PVM pVM = (PVM)pvUser;
551
552 if (pVM->fFaultTolerantMaster)
553 {
554 /* Poll for incoming NACKs and errors from the other side */
555 int rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 0);
556 if (rc != VERR_TIMEOUT)
557 {
558 if (RT_SUCCESS(rc))
559 {
560 LogRel(("FTSync/TCP: Incoming data detect by IsOk, assuming it is a cancellation NACK.\n"));
561 rc = VERR_SSM_CANCELLED;
562 }
563 else
564 LogRel(("FTSync/TCP: RTTcpSelectOne -> %Rrc (IsOk).\n", rc));
565 return rc;
566 }
567 }
568
569 return VINF_SUCCESS;
570}
571
572
573/**
574 * @copydoc SSMSTRMOPS::pfnClose
575 */
576static DECLCALLBACK(int) ftmR3TcpOpClose(void *pvUser, bool fCanceled)
577{
578 PVM pVM = (PVM)pvUser;
579
580 if (pVM->fFaultTolerantMaster)
581 {
582 FTMTCPHDR EofHdr;
583 EofHdr.u32Magic = FTMTCPHDR_MAGIC;
584 EofHdr.cb = fCanceled ? UINT32_MAX : 0;
585 int rc = RTTcpWrite(pVM->ftm.s.hSocket, &EofHdr, sizeof(EofHdr));
586 if (RT_FAILURE(rc))
587 {
588 LogRel(("FTSync/TCP: EOF Header write error: %Rrc\n", rc));
589 return rc;
590 }
591 }
592 else
593 {
594 ASMAtomicWriteBool(&pVM->ftm.s.syncstate.fStopReading, true);
595 }
596
597 return VINF_SUCCESS;
598}
599
600
601/**
602 * Method table for a TCP based stream.
603 */
604static SSMSTRMOPS const g_ftmR3TcpOps =
605{
606 SSMSTRMOPS_VERSION,
607 ftmR3TcpOpWrite,
608 ftmR3TcpOpRead,
609 ftmR3TcpOpSeek,
610 ftmR3TcpOpTell,
611 ftmR3TcpOpSize,
612 ftmR3TcpOpIsOk,
613 ftmR3TcpOpClose,
614 SSMSTRMOPS_VERSION
615};
616
617
618/**
619 * VMR3ReqCallWait callback
620 *
621 * @param pVM The cross context VM structure.
622 *
623 */
624static DECLCALLBACK(void) ftmR3WriteProtectMemory(PVM pVM)
625{
626 int rc = PGMR3PhysWriteProtectRAM(pVM);
627 AssertRC(rc);
628}
629
630
631/**
632 * Sync the VM state
633 *
634 * @returns VBox status code.
635 * @param pVM The cross context VM structure.
636 */
637static int ftmR3PerformFullSync(PVM pVM)
638{
639 bool fSuspended = false;
640
641 int rc = VMR3Suspend(pVM->pUVM, VMSUSPENDREASON_FTM_SYNC);
642 AssertRCReturn(rc, rc);
643
644 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatFullSync);
645
646 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
647
648 /* Reset the sync state. */
649 pVM->ftm.s.syncstate.uOffStream = 0;
650 pVM->ftm.s.syncstate.cbReadBlock = 0;
651 pVM->ftm.s.syncstate.fStopReading = false;
652 pVM->ftm.s.syncstate.fIOError = false;
653 pVM->ftm.s.syncstate.fEndOfStream = false;
654
655 rc = ftmR3TcpSubmitCommand(pVM, "full-sync");
656 AssertRC(rc);
657
658 pVM->ftm.s.fDeltaLoadSaveActive = false;
659 rc = VMR3SaveFT(pVM->pUVM, &g_ftmR3TcpOps, pVM, &fSuspended, false /* fSkipStateChanges */);
660 AssertRC(rc);
661
662 rc = ftmR3TcpReadACK(pVM, "full-sync-complete");
663 AssertRC(rc);
664
665 RTSocketRelease(pVM->ftm.s.hSocket);
666
667 /* Write protect all memory. */
668 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM);
669 AssertRCReturn(rc, rc);
670
671 rc = VMR3Resume(pVM->pUVM, VMRESUMEREASON_FTM_SYNC);
672 AssertRC(rc);
673
674 return rc;
675}
676
677
678/**
679 * PGMR3PhysEnumDirtyFTPages callback for syncing dirty physical pages
680 *
681 * @param pVM The cross context VM structure.
682 * @param GCPhys GC physical address
683 * @param pRange HC virtual address of the page(s)
684 * @param cbRange Size of the dirty range in bytes.
685 * @param pvUser User argument
686 */
687static DECLCALLBACK(int) ftmR3SyncDirtyPage(PVM pVM, RTGCPHYS GCPhys, uint8_t *pRange, unsigned cbRange, void *pvUser)
688{
689 NOREF(pvUser);
690 FTMTCPHDRMEM Hdr;
691 Hdr.u32Magic = FTMTCPHDR_MAGIC;
692 Hdr.GCPhys = GCPhys;
693 Hdr.cbPageRange = cbRange;
694 Hdr.cb = cbRange;
695 /** @todo compress page(s). */
696 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pRange, (size_t)Hdr.cb);
697 if (RT_FAILURE(rc))
698 {
699 LogRel(("FTSync/TCP: Write error (ftmR3SyncDirtyPage): %Rrc (cb=%#x)\n", rc, Hdr.cb));
700 return rc;
701 }
702 pVM->ftm.s.StatSentMem.c += Hdr.cb + sizeof(Hdr);
703
704#ifdef VBOX_WITH_STATISTICS
705 switch (PGMPhysGetPageType(pVM, GCPhys))
706 {
707 case PGMPAGETYPE_RAM:
708 pVM->ftm.s.StatSentMemRAM.c += Hdr.cb + sizeof(Hdr);
709 break;
710
711 case PGMPAGETYPE_MMIO2:
712 pVM->ftm.s.StatSentMemMMIO2.c += Hdr.cb + sizeof(Hdr);
713 break;
714
715 case PGMPAGETYPE_ROM_SHADOW:
716 pVM->ftm.s.StatSentMemShwROM.c += Hdr.cb + sizeof(Hdr);
717 break;
718
719 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
720 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO:
721 AssertFailed();
722 break;
723
724 default:
725 AssertFailed();
726 break;
727 }
728#endif
729
730 return (pVM->ftm.s.fCheckpointingActive) ? VERR_INTERRUPTED : VINF_SUCCESS;
731}
732
733/**
734 * Thread function which starts syncing process for this master VM
735 *
736 * @param hThread The thread handle.
737 * @param pvUser Pointer to the VM.
738 * @return VINF_SUCCESS (ignored).
739 *
740 */
741static DECLCALLBACK(int) ftmR3MasterThread(RTTHREAD hThread, void *pvUser)
742{
743 int rc = VINF_SUCCESS;
744 PVM pVM = (PVM)pvUser;
745 NOREF(hThread);
746
747 for (;;)
748 {
749 /*
750 * Try connect to the standby machine.
751 */
752 Log(("ftmR3MasterThread: client connect to %s %d\n", pVM->ftm.s.pszAddress, pVM->ftm.s.uPort));
753 rc = RTTcpClientConnect(pVM->ftm.s.pszAddress, pVM->ftm.s.uPort, &pVM->ftm.s.hSocket);
754 if (RT_SUCCESS(rc))
755 {
756 Log(("ftmR3MasterThread: CONNECTED\n"));
757
758 /* Disable Nagle. */
759 rc = RTTcpSetSendCoalescing(pVM->ftm.s.hSocket, false /*fEnable*/);
760 AssertRC(rc);
761
762 /* Read and check the welcome message. */
763 char szLine[RT_MAX(128, sizeof(g_szWelcome))];
764 RT_ZERO(szLine);
765 rc = RTTcpRead(pVM->ftm.s.hSocket, szLine, sizeof(g_szWelcome) - 1, NULL);
766 if ( RT_SUCCESS(rc)
767 && !strcmp(szLine, g_szWelcome))
768 {
769 /* password */
770 if (pVM->ftm.s.pszPassword)
771 rc = RTTcpWrite(pVM->ftm.s.hSocket, pVM->ftm.s.pszPassword, strlen(pVM->ftm.s.pszPassword));
772
773 if (RT_SUCCESS(rc))
774 {
775 /* ACK */
776 rc = ftmR3TcpReadACK(pVM, "password", "Invalid password");
777 if (RT_SUCCESS(rc))
778 {
779 /** todo: verify VM config. */
780 break;
781 }
782 }
783 }
784 /* Failed, so don't bother anymore. */
785 return VINF_SUCCESS;
786 }
787 rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, 1000 /* 1 second */);
788 if (rc != VERR_TIMEOUT)
789 return VINF_SUCCESS; /* told to quit */
790 }
791
792 /* Successfully initialized the connection to the standby node.
793 * Start the sync process.
794 */
795
796 /* First sync all memory and write protect everything so
797 * we can send changed pages later on.
798 */
799
800 rc = ftmR3PerformFullSync(pVM);
801
802 for (;;)
803 {
804 rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, pVM->ftm.s.uInterval);
805 if (rc != VERR_TIMEOUT)
806 break; /* told to quit */
807
808 if (!pVM->ftm.s.fCheckpointingActive)
809 {
810 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
811 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
812
813 rc = ftmR3TcpSubmitCommand(pVM, "mem-sync");
814 AssertRC(rc);
815
816 /* sync the changed memory with the standby node. */
817 /* Write protect all memory. */
818 if (!pVM->ftm.s.fCheckpointingActive)
819 {
820 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM);
821 AssertRC(rc);
822 }
823
824 /* Enumerate all dirty pages and send them to the standby VM. */
825 if (!pVM->ftm.s.fCheckpointingActive)
826 {
827 rc = PGMR3PhysEnumDirtyFTPages(pVM, ftmR3SyncDirtyPage, NULL /* pvUser */);
828 Assert(rc == VINF_SUCCESS || rc == VERR_INTERRUPTED);
829 }
830
831 /* Send last memory header to signal the end. */
832 FTMTCPHDRMEM Hdr;
833 Hdr.u32Magic = FTMTCPHDR_MAGIC;
834 Hdr.GCPhys = 0;
835 Hdr.cbPageRange = 0;
836 Hdr.cb = 0;
837 rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 1, &Hdr, sizeof(Hdr));
838 if (RT_FAILURE(rc))
839 LogRel(("FTSync/TCP: Write error (ftmR3MasterThread): %Rrc (cb=%#x)\n", rc, Hdr.cb));
840
841 rc = ftmR3TcpReadACK(pVM, "mem-sync-complete");
842 AssertRC(rc);
843
844 PDMCritSectLeave(&pVM->ftm.s.CritSect);
845 }
846 }
847 return rc;
848}
849
850/**
851 * Syncs memory from the master VM
852 *
853 * @returns VBox status code.
854 * @param pVM The cross context VM structure.
855 */
856static int ftmR3SyncMem(PVM pVM)
857{
858 while (true)
859 {
860 FTMTCPHDRMEM Hdr;
861 RTGCPHYS GCPhys;
862
863 /* Read memory header. */
864 int rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
865 if (RT_FAILURE(rc))
866 {
867 Log(("RTTcpRead failed with %Rrc\n", rc));
868 break;
869 }
870 pVM->ftm.s.StatReceivedMem.c += sizeof(Hdr);
871
872 if (Hdr.cb == 0)
873 break; /* end of sync. */
874
875 Assert(Hdr.cb == Hdr.cbPageRange); /** @todo uncompress */
876 GCPhys = Hdr.GCPhys;
877
878 /* Must be a multiple of PAGE_SIZE. */
879 Assert((Hdr.cbPageRange & 0xfff) == 0);
880
881 while (Hdr.cbPageRange)
882 {
883 PFTMPHYSPAGETREENODE pNode = (PFTMPHYSPAGETREENODE)RTAvlGCPhysGet(&pVM->ftm.s.standby.pPhysPageTree, GCPhys);
884 if (!pNode)
885 {
886 /* Allocate memory for the node and page. */
887 pNode = (PFTMPHYSPAGETREENODE)RTMemAllocZ(sizeof(*pNode) + PAGE_SIZE);
888 AssertBreak(pNode);
889
890 /* Insert the node into the tree. */
891 pNode->Core.Key = GCPhys;
892 pNode->pPage = (void *)(pNode + 1);
893 bool fRet = RTAvlGCPhysInsert(&pVM->ftm.s.standby.pPhysPageTree, &pNode->Core);
894 Assert(fRet); NOREF(fRet);
895 }
896
897 /* Fetch the page. */
898 rc = RTTcpRead(pVM->ftm.s.hSocket, pNode->pPage, PAGE_SIZE, NULL);
899 if (RT_FAILURE(rc))
900 {
901 Log(("RTTcpRead page data (%d bytes) failed with %Rrc\n", Hdr.cb, rc));
902 break;
903 }
904 pVM->ftm.s.StatReceivedMem.c += PAGE_SIZE;
905 Hdr.cbPageRange -= PAGE_SIZE;
906 GCPhys += PAGE_SIZE;
907 }
908 }
909 return VINF_SUCCESS;
910}
911
912
913/**
914 * Callback handler for RTAvlGCPhysDestroy
915 *
916 * @returns 0 to continue, otherwise stop
917 * @param pBaseNode Node to destroy
918 * @param pvUser Pointer to the VM.
919 */
920static DECLCALLBACK(int) ftmR3PageTreeDestroyCallback(PAVLGCPHYSNODECORE pBaseNode, void *pvUser)
921{
922 PVM pVM = (PVM)pvUser;
923 PFTMPHYSPAGETREENODE pNode = (PFTMPHYSPAGETREENODE)pBaseNode;
924
925 if (pVM) /* NULL when the VM is destroyed. */
926 {
927 /* Update the guest memory of the standby VM. */
928 int rc = PGMR3PhysWriteExternal(pVM, pNode->Core.Key, pNode->pPage, PAGE_SIZE, PGMACCESSORIGIN_FTM);
929 AssertRC(rc);
930 }
931 RTMemFree(pNode);
932 return 0;
933}
934
935/**
936 * Thread function which monitors the health of the master VM
937 *
938 * @param hThread The thread handle.
939 * @param pvUser Pointer to the VM.
940 * @return VINF_SUCCESS (ignored).
941 *
942 */
943static DECLCALLBACK(int) ftmR3StandbyThread(RTTHREAD hThread, void *pvUser)
944{
945 PVM pVM = (PVM)pvUser;
946 NOREF(hThread);
947
948 for (;;)
949 {
950 uint64_t u64TimeNow;
951
952 int rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, pVM->ftm.s.uInterval);
953 if (rc != VERR_TIMEOUT)
954 break; /* told to quit */
955
956 if (pVM->ftm.s.standby.u64LastHeartbeat)
957 {
958 u64TimeNow = RTTimeMilliTS();
959
960 if (u64TimeNow > pVM->ftm.s.standby.u64LastHeartbeat + pVM->ftm.s.uInterval * 4)
961 {
962 /* Timeout; prepare to fallover. */
963 LogRel(("FTSync: TIMEOUT (%RX64 vs %RX64 ms): activate standby VM!\n", u64TimeNow, pVM->ftm.s.standby.u64LastHeartbeat + pVM->ftm.s.uInterval * 2));
964
965 pVM->ftm.s.fActivateStandby = true;
966 /** todo: prevent split-brain. */
967 break;
968 }
969 }
970 }
971
972 return VINF_SUCCESS;
973}
974
975
976/**
977 * Listen for incoming traffic destined for the standby VM.
978 *
979 * @copydoc FNRTTCPSERVE
980 *
981 * @returns VINF_SUCCESS or VERR_TCP_SERVER_STOP.
982 */
983static DECLCALLBACK(int) ftmR3StandbyServeConnection(RTSOCKET Sock, void *pvUser)
984{
985 PVM pVM = (PVM)pvUser;
986
987 pVM->ftm.s.hSocket = Sock;
988
989 /*
990 * Disable Nagle.
991 */
992 int rc = RTTcpSetSendCoalescing(Sock, false /*fEnable*/);
993 AssertRC(rc);
994
995 /* Send the welcome message to the master node. */
996 rc = RTTcpWrite(Sock, g_szWelcome, sizeof(g_szWelcome) - 1);
997 if (RT_FAILURE(rc))
998 {
999 LogRel(("Teleporter: Failed to write welcome message: %Rrc\n", rc));
1000 return VINF_SUCCESS;
1001 }
1002
1003 /*
1004 * Password.
1005 */
1006 const char *pszPassword = pVM->ftm.s.pszPassword;
1007 if (pszPassword)
1008 {
1009 unsigned off = 0;
1010 while (pszPassword[off])
1011 {
1012 char ch;
1013 rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
1014 if ( RT_FAILURE(rc)
1015 || pszPassword[off] != ch)
1016 {
1017 if (RT_FAILURE(rc))
1018 LogRel(("FTSync: Password read failure (off=%u): %Rrc\n", off, rc));
1019 else
1020 LogRel(("FTSync: Invalid password (off=%u)\n", off));
1021 ftmR3TcpWriteNACK(pVM, VERR_AUTHENTICATION_FAILURE);
1022 return VINF_SUCCESS;
1023 }
1024 off++;
1025 }
1026 }
1027 rc = ftmR3TcpWriteACK(pVM);
1028 if (RT_FAILURE(rc))
1029 return VINF_SUCCESS;
1030
1031 /** @todo verify VM config. */
1032
1033 /*
1034 * Stop the server.
1035 *
1036 * Note! After this point we must return VERR_TCP_SERVER_STOP, while prior
1037 * to it we must not return that value!
1038 */
1039 RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1040
1041 /*
1042 * Command processing loop.
1043 */
1044 //bool fDone = false;
1045 for (;;)
1046 {
1047 bool fFullSync = false;
1048 char szCmd[128];
1049
1050 rc = ftmR3TcpReadLine(pVM, szCmd, sizeof(szCmd));
1051 if (RT_FAILURE(rc))
1052 break;
1053
1054 pVM->ftm.s.standby.u64LastHeartbeat = RTTimeMilliTS();
1055 if (!strcmp(szCmd, "mem-sync"))
1056 {
1057 rc = ftmR3TcpWriteACK(pVM);
1058 AssertRC(rc);
1059 if (RT_FAILURE(rc))
1060 continue;
1061
1062 rc = ftmR3SyncMem(pVM);
1063 AssertRC(rc);
1064
1065 rc = ftmR3TcpWriteACK(pVM);
1066 AssertRC(rc);
1067 }
1068 else
1069 if ( !strcmp(szCmd, "checkpoint")
1070 || !strcmp(szCmd, "full-sync")
1071 || (fFullSync = true)) /* intended assignment */
1072 {
1073 rc = ftmR3TcpWriteACK(pVM);
1074 AssertRC(rc);
1075 if (RT_FAILURE(rc))
1076 continue;
1077
1078 /* Flush all pending memory updates. */
1079 if (pVM->ftm.s.standby.pPhysPageTree)
1080 {
1081 RTAvlGCPhysDestroy(&pVM->ftm.s.standby.pPhysPageTree, ftmR3PageTreeDestroyCallback, pVM);
1082 pVM->ftm.s.standby.pPhysPageTree = NULL;
1083 }
1084
1085 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
1086
1087 /* Reset the sync state. */
1088 pVM->ftm.s.syncstate.uOffStream = 0;
1089 pVM->ftm.s.syncstate.cbReadBlock = 0;
1090 pVM->ftm.s.syncstate.fStopReading = false;
1091 pVM->ftm.s.syncstate.fIOError = false;
1092 pVM->ftm.s.syncstate.fEndOfStream = false;
1093
1094 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
1095 rc = VMR3LoadFromStreamFT(pVM->pUVM, &g_ftmR3TcpOps, pVM);
1096 pVM->ftm.s.fDeltaLoadSaveActive = false;
1097 RTSocketRelease(pVM->ftm.s.hSocket);
1098 AssertRC(rc);
1099 if (RT_FAILURE(rc))
1100 {
1101 LogRel(("FTSync: VMR3LoadFromStream -> %Rrc\n", rc));
1102 ftmR3TcpWriteNACK(pVM, rc);
1103 continue;
1104 }
1105
1106 /* The EOS might not have been read, make sure it is. */
1107 pVM->ftm.s.syncstate.fStopReading = false;
1108 size_t cbRead;
1109 rc = ftmR3TcpOpRead(pVM, pVM->ftm.s.syncstate.uOffStream, szCmd, 1, &cbRead);
1110 if (rc != VERR_EOF)
1111 {
1112 LogRel(("FTSync: Draining teleporterTcpOpRead -> %Rrc\n", rc));
1113 ftmR3TcpWriteNACK(pVM, rc);
1114 continue;
1115 }
1116
1117 rc = ftmR3TcpWriteACK(pVM);
1118 AssertRC(rc);
1119 }
1120 }
1121 LogFlowFunc(("returns mRc=%Rrc\n", rc));
1122 return VERR_TCP_SERVER_STOP;
1123}
1124
1125/**
1126 * Powers on the fault tolerant virtual machine.
1127 *
1128 * @returns VBox status code.
1129 *
1130 * @param pUVM The user mode VM handle.
1131 * @param fMaster FT master or standby
1132 * @param uInterval FT sync interval
1133 * @param pszAddress Standby VM address
1134 * @param uPort Standby VM port
1135 * @param pszPassword FT password (NULL for none)
1136 *
1137 * @thread Any thread.
1138 * @vmstate Created
1139 * @vmstateto PoweringOn+Running (master), PoweringOn+Running_FT (standby)
1140 */
1141VMMR3DECL(int) FTMR3PowerOn(PUVM pUVM, bool fMaster, unsigned uInterval,
1142 const char *pszAddress, unsigned uPort, const char *pszPassword)
1143{
1144 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
1145 PVM pVM = pUVM->pVM;
1146 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1147
1148 VMSTATE enmVMState = VMR3GetState(pVM);
1149 AssertMsgReturn(enmVMState == VMSTATE_CREATED,
1150 ("%s\n", VMR3GetStateName(enmVMState)),
1151 VERR_INTERNAL_ERROR_4);
1152 AssertReturn(pszAddress, VERR_INVALID_PARAMETER);
1153
1154 if (pVM->ftm.s.uInterval)
1155 pVM->ftm.s.uInterval = uInterval;
1156 else
1157 pVM->ftm.s.uInterval = 50; /* standard sync interval of 50ms */
1158
1159 pVM->ftm.s.uPort = uPort;
1160 pVM->ftm.s.pszAddress = RTStrDup(pszAddress);
1161 if (pszPassword)
1162 pVM->ftm.s.pszPassword = RTStrDup(pszPassword);
1163
1164 int rc = RTSemEventCreate(&pVM->ftm.s.hShutdownEvent);
1165 if (RT_FAILURE(rc))
1166 return rc;
1167
1168 if (fMaster)
1169 {
1170 rc = RTThreadCreate(NULL, ftmR3MasterThread, pVM,
1171 0, RTTHREADTYPE_IO /* higher than normal priority */, 0, "ftmMaster");
1172 if (RT_FAILURE(rc))
1173 return rc;
1174
1175 pVM->fFaultTolerantMaster = true;
1176 if (PGMIsUsingLargePages(pVM))
1177 {
1178 /* Must disable large page usage as 2 MB pages are too big to write monitor. */
1179 LogRel(("FTSync: disabling large page usage.\n"));
1180 PGMSetLargePageUsage(pVM, false);
1181 }
1182 /** @todo might need to disable page fusion as well */
1183
1184 return VMR3PowerOn(pVM->pUVM);
1185 }
1186
1187
1188 /* standby */
1189 rc = RTThreadCreate(NULL, ftmR3StandbyThread, pVM,
1190 0, RTTHREADTYPE_DEFAULT, 0, "ftmStandby");
1191 if (RT_FAILURE(rc))
1192 return rc;
1193
1194 rc = RTTcpServerCreateEx(pszAddress, uPort, &pVM->ftm.s.standby.hServer);
1195 if (RT_FAILURE(rc))
1196 return rc;
1197 pVM->ftm.s.fIsStandbyNode = true;
1198
1199 rc = RTTcpServerListen(pVM->ftm.s.standby.hServer, ftmR3StandbyServeConnection, pVM);
1200 /** @todo deal with the exit code to check if we should activate this standby VM. */
1201 if (pVM->ftm.s.fActivateStandby)
1202 {
1203 /** @todo fallover. */
1204 }
1205
1206 if (pVM->ftm.s.standby.hServer)
1207 {
1208 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
1209 pVM->ftm.s.standby.hServer = NULL;
1210 }
1211 if (rc == VERR_TCP_SERVER_SHUTDOWN)
1212 rc = VINF_SUCCESS; /* ignore this error; the standby process was cancelled. */
1213 return rc;
1214}
1215
1216/**
1217 * Powers off the fault tolerant virtual machine (standby).
1218 *
1219 * @returns VBox status code.
1220 *
1221 * @param pUVM The user mode VM handle.
1222 */
1223VMMR3DECL(int) FTMR3CancelStandby(PUVM pUVM)
1224{
1225 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
1226 PVM pVM = pUVM->pVM;
1227 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1228 AssertReturn(!pVM->fFaultTolerantMaster, VERR_NOT_SUPPORTED);
1229 Assert(pVM->ftm.s.standby.hServer);
1230
1231 return RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1232}
1233
1234/**
1235 * Rendezvous callback used by FTMR3SetCheckpoint
1236 * Sync state + changed memory with the standby node.
1237 *
1238 * This is only called on one of the EMTs while the other ones are waiting for
1239 * it to complete this function.
1240 *
1241 * @returns VINF_SUCCESS (VBox strict status code).
1242 * @param pVM The cross context VM structure.
1243 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
1244 * @param pvUser Not used.
1245 */
1246static DECLCALLBACK(VBOXSTRICTRC) ftmR3SetCheckpointRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
1247{
1248 int rc = VINF_SUCCESS;
1249 bool fSuspended = false;
1250 NOREF(pVCpu);
1251 NOREF(pvUser);
1252
1253 /* We don't call VMR3Suspend here to avoid the overhead of state changes and notifications. This
1254 * is only a short suspend.
1255 */
1256 STAM_PROFILE_START(&pVM->ftm.s.StatCheckpointPause, a);
1257 PDMR3Suspend(pVM);
1258
1259 /* Hack alert: as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/
1260 EMR3NotifySuspend(pVM);
1261 STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpointPause, a);
1262
1263 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatDeltaVM);
1264
1265 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
1266
1267 /* Reset the sync state. */
1268 pVM->ftm.s.syncstate.uOffStream = 0;
1269 pVM->ftm.s.syncstate.cbReadBlock = 0;
1270 pVM->ftm.s.syncstate.fStopReading = false;
1271 pVM->ftm.s.syncstate.fIOError = false;
1272 pVM->ftm.s.syncstate.fEndOfStream = false;
1273
1274 rc = ftmR3TcpSubmitCommand(pVM, "checkpoint");
1275 AssertRC(rc);
1276
1277 pVM->ftm.s.fDeltaLoadSaveActive = true;
1278 rc = VMR3SaveFT(pVM->pUVM, &g_ftmR3TcpOps, pVM, &fSuspended, true /* fSkipStateChanges */);
1279 pVM->ftm.s.fDeltaLoadSaveActive = false;
1280 AssertRC(rc);
1281
1282 rc = ftmR3TcpReadACK(pVM, "checkpoint-complete");
1283 AssertRC(rc);
1284
1285 RTSocketRelease(pVM->ftm.s.hSocket);
1286
1287 /* Write protect all memory. */
1288 rc = PGMR3PhysWriteProtectRAM(pVM);
1289 AssertRC(rc);
1290
1291 /* We don't call VMR3Resume here to avoid the overhead of state changes and notifications. This
1292 * is only a short suspend.
1293 */
1294 STAM_PROFILE_START(&pVM->ftm.s.StatCheckpointResume, b);
1295 PGMR3ResetNoMorePhysWritesFlag(pVM);
1296 PDMR3Resume(pVM);
1297
1298 /* Hack alert as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/
1299 EMR3NotifyResume(pVM);
1300 STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpointResume, b);
1301
1302 return rc;
1303}
1304
1305/**
1306 * Performs a full sync to the standby node
1307 *
1308 * @returns VBox status code.
1309 *
1310 * @param pVM The cross context VM structure.
1311 * @param enmCheckpoint Checkpoint type
1312 */
1313VMMR3_INT_DECL(int) FTMR3SetCheckpoint(PVM pVM, FTMCHECKPOINTTYPE enmCheckpoint)
1314{
1315 int rc;
1316
1317 if (!pVM->fFaultTolerantMaster)
1318 return VINF_SUCCESS;
1319
1320 switch (enmCheckpoint)
1321 {
1322 case FTMCHECKPOINTTYPE_NETWORK:
1323 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointNetwork);
1324 break;
1325
1326 case FTMCHECKPOINTTYPE_STORAGE:
1327 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointStorage);
1328 break;
1329
1330 default:
1331 AssertMsgFailedReturn(("%d\n", enmCheckpoint), VERR_INVALID_PARAMETER);
1332 }
1333
1334 pVM->ftm.s.fCheckpointingActive = true;
1335 if (VM_IS_EMT(pVM))
1336 {
1337 PVMCPU pVCpu = VMMGetCpu(pVM);
1338
1339 /* We must take special care here as the memory sync is competing with us and requires a responsive EMT. */
1340 while ((rc = PDMCritSectTryEnter(&pVM->ftm.s.CritSect)) == VERR_SEM_BUSY)
1341 {
1342 if (VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS))
1343 {
1344 rc = VMMR3EmtRendezvousFF(pVM, pVCpu);
1345 AssertRC(rc);
1346 }
1347
1348 if (VM_FF_IS_PENDING(pVM, VM_FF_REQUEST))
1349 {
1350 rc = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, true /*fPriorityOnly*/);
1351 AssertRC(rc);
1352 }
1353 }
1354 }
1355 else
1356 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
1357
1358 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
1359
1360 STAM_PROFILE_START(&pVM->ftm.s.StatCheckpoint, a);
1361
1362 rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, ftmR3SetCheckpointRendezvous, NULL);
1363
1364 STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpoint, a);
1365
1366 PDMCritSectLeave(&pVM->ftm.s.CritSect);
1367 pVM->ftm.s.fCheckpointingActive = false;
1368
1369 return rc;
1370}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette