VirtualBox

source: vbox/trunk/src/VBox/VMM/FTM.cpp@ 32131

Last change on this file since 32131 was 32121, checked in by vboxsync, 14 years ago

Wrong api

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 32.7 KB
Line 
1/* $Id: FTM.cpp 32121 2010-08-31 10:00:09Z vboxsync $ */
2/** @file
3 * FTM - Fault Tolerance Manager
4 */
5
6/*
7 * Copyright (C) 2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_FTM
23#include "FTMInternal.h"
24#include <VBox/vm.h>
25#include <VBox/vmm.h>
26#include <VBox/err.h>
27#include <VBox/param.h>
28#include <VBox/ssm.h>
29#include <VBox/log.h>
30#include <VBox/pgm.h>
31
32#include <iprt/assert.h>
33#include <iprt/thread.h>
34#include <iprt/string.h>
35#include <iprt/mem.h>
36#include <iprt/tcp.h>
37#include <iprt/socket.h>
38#include <iprt/semaphore.h>
39#include <iprt/asm.h>
40
41/*******************************************************************************
42 * Structures and Typedefs *
43 *******************************************************************************/
44
45/**
46 * TCP stream header.
47 *
48 * This is an extra layer for fixing the problem with figuring out when the SSM
49 * stream ends.
50 */
51typedef struct FTMTCPHDR
52{
53 /** Magic value. */
54 uint32_t u32Magic;
55 /** The size of the data block following this header.
56 * 0 indicates the end of the stream, while UINT32_MAX indicates
57 * cancelation. */
58 uint32_t cb;
59} FTMTCPHDR;
60/** Magic value for FTMTCPHDR::u32Magic. (Egberto Gismonti Amin) */
61#define FTMTCPHDR_MAGIC UINT32_C(0x19471205)
62/** The max block size. */
63#define FTMTCPHDR_MAX_SIZE UINT32_C(0x00fffff8)
64
65/**
66 * TCP stream header.
67 *
68 * This is an extra layer for fixing the problem with figuring out when the SSM
69 * stream ends.
70 */
71typedef struct FTMTCPHDRMEM
72{
73 /** Magic value. */
74 uint32_t u32Magic;
75 /** Size (Uncompressed) of the pages following the header. */
76 uint32_t cbPageRange;
77 /** GC Physical address of the page(s) to sync. */
78 RTGCPHYS GCPhys;
79 /** The size of the data block following this header.
80 * 0 indicates the end of the stream, while UINT32_MAX indicates
81 * cancelation. */
82 uint32_t cb;
83} FTMTCPHDRMEM;
84
85/*******************************************************************************
86* Global Variables *
87*******************************************************************************/
88static const char g_szWelcome[] = "VirtualBox-Fault-Tolerance-Sync-1.0\n";
89
90/**
91 * Initializes the FTM.
92 *
93 * @returns VBox status code.
94 * @param pVM The VM to operate on.
95 */
96VMMR3DECL(int) FTMR3Init(PVM pVM)
97{
98 /*
99 * Assert alignment and sizes.
100 */
101 AssertCompile(sizeof(pVM->ftm.s) <= sizeof(pVM->ftm.padding));
102 AssertCompileMemberAlignment(FTM, CritSect, sizeof(uintptr_t));
103
104 /** @todo saved state for master nodes! */
105 pVM->ftm.s.pszAddress = NULL;
106 pVM->ftm.s.pszPassword = NULL;
107 pVM->fFaultTolerantMaster = false;
108 pVM->ftm.s.fIsStandbyNode = false;
109 pVM->ftm.s.standby.hServer = NIL_RTTCPSERVER;
110 pVM->ftm.s.master.hShutdownEvent = NIL_RTSEMEVENT;
111 pVM->ftm.s.hSocket = NIL_RTSOCKET;
112
113 /*
114 * Initialize the PGM critical section.
115 */
116 int rc = PDMR3CritSectInit(pVM, &pVM->ftm.s.CritSect, RT_SRC_POS, "FTM");
117 AssertRCReturn(rc, rc);
118
119 /*
120 * Register statistics.
121 */
122 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedMem, STAMTYPE_COUNTER, "/FT/Received/Mem", STAMUNIT_BYTES, "The amount of memory pages that was received.");
123 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedState, STAMTYPE_COUNTER, "/FT/Received/State", STAMUNIT_BYTES, "The amount of state information that was received.");
124 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentMem, STAMTYPE_COUNTER, "/FT/Sent/Mem", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
125 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentState, STAMTYPE_COUNTER, "/FT/Sent/State", STAMUNIT_BYTES, "The amount of state information that was sent.");
126 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaVM, STAMTYPE_COUNTER, "/FT/Sync/DeltaVM", STAMUNIT_OCCURENCES, "Number of delta vm syncs.");
127 STAM_REL_REG(pVM, &pVM->ftm.s.StatFullSync, STAMTYPE_COUNTER, "/FT/Sync/Full", STAMUNIT_OCCURENCES, "Number of full vm syncs.");
128 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaMem, STAMTYPE_COUNTER, "/FT/Sync/DeltaMem", STAMUNIT_OCCURENCES, "Number of delta mem syncs.");
129
130 return VINF_SUCCESS;
131}
132
133/**
134 * Terminates the FTM.
135 *
136 * Termination means cleaning up and freeing all resources,
137 * the VM itself is at this point powered off or suspended.
138 *
139 * @returns VBox status code.
140 * @param pVM The VM to operate on.
141 */
142VMMR3DECL(int) FTMR3Term(PVM pVM)
143{
144 if (pVM->ftm.s.master.hShutdownEvent != NIL_RTSEMEVENT)
145 {
146 RTSemEventDestroy(pVM->ftm.s.master.hShutdownEvent);
147 pVM->ftm.s.master.hShutdownEvent = NIL_RTSEMEVENT;
148 }
149 if (pVM->ftm.s.hSocket != NIL_RTSOCKET)
150 {
151 RTTcpClientClose(pVM->ftm.s.hSocket);
152 pVM->ftm.s.hSocket = NIL_RTSOCKET;
153 }
154 if (pVM->ftm.s.standby.hServer)
155 {
156 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
157 pVM->ftm.s.standby.hServer = NULL;
158 }
159 if (pVM->ftm.s.pszAddress)
160 RTMemFree(pVM->ftm.s.pszAddress);
161 if (pVM->ftm.s.pszPassword)
162 RTMemFree(pVM->ftm.s.pszPassword);
163
164 pVM->ftm.s.pszAddress = NULL;
165 pVM->ftm.s.pszPassword = NULL;
166
167 PDMR3CritSectDelete(&pVM->ftm.s.CritSect);
168 return VINF_SUCCESS;
169}
170
171
172static int ftmR3TcpWriteACK(PVM pVM)
173{
174 int rc = RTTcpWrite(pVM->ftm.s.hSocket, "ACK\n", sizeof("ACK\n") - 1);
175 if (RT_FAILURE(rc))
176 {
177 LogRel(("FTSync: RTTcpWrite(,ACK,) -> %Rrc\n", rc));
178 }
179 return rc;
180}
181
182
183static int ftmR3TcpWriteNACK(PVM pVM, int32_t rc2, const char *pszMsgText = NULL)
184{
185 char szMsg[256];
186 size_t cch;
187 if (pszMsgText && *pszMsgText)
188 {
189 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d;%s\n", rc2, pszMsgText);
190 for (size_t off = 6; off + 1 < cch; off++)
191 if (szMsg[off] == '\n')
192 szMsg[off] = '\r';
193 }
194 else
195 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d\n", rc2);
196 int rc = RTTcpWrite(pVM->ftm.s.hSocket, szMsg, cch);
197 if (RT_FAILURE(rc))
198 LogRel(("FTSync: RTTcpWrite(,%s,%zu) -> %Rrc\n", szMsg, cch, rc));
199 return rc;
200}
201
202/**
203 * Reads a string from the socket.
204 *
205 * @returns VBox status code.
206 *
207 * @param pState The teleporter state structure.
208 * @param pszBuf The output buffer.
209 * @param cchBuf The size of the output buffer.
210 *
211 */
212static int ftmR3TcpReadLine(PVM pVM, char *pszBuf, size_t cchBuf)
213{
214 char *pszStart = pszBuf;
215 RTSOCKET Sock = pVM->ftm.s.hSocket;
216
217 AssertReturn(cchBuf > 1, VERR_INTERNAL_ERROR);
218 *pszBuf = '\0';
219
220 /* dead simple approach. */
221 for (;;)
222 {
223 char ch;
224 int rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
225 if (RT_FAILURE(rc))
226 {
227 LogRel(("FTSync: RTTcpRead -> %Rrc while reading string ('%s')\n", rc, pszStart));
228 return rc;
229 }
230 if ( ch == '\n'
231 || ch == '\0')
232 return VINF_SUCCESS;
233 if (cchBuf <= 1)
234 {
235 LogRel(("FTSync: String buffer overflow: '%s'\n", pszStart));
236 return VERR_BUFFER_OVERFLOW;
237 }
238 *pszBuf++ = ch;
239 *pszBuf = '\0';
240 cchBuf--;
241 }
242}
243
244/**
245 * Reads an ACK or NACK.
246 *
247 * @returns VBox status code.
248 * @param pVM The VM to operate on.
249 * @param pszWhich Which ACK is this this?
250 * @param pszNAckMsg Optional NACK message.
251 */
252static int ftmR3TcpReadACK(PVM pVM, const char *pszWhich, const char *pszNAckMsg = NULL)
253{
254 char szMsg[256];
255 int rc = ftmR3TcpReadLine(pVM, szMsg, sizeof(szMsg));
256 if (RT_FAILURE(rc))
257 return rc;
258
259 if (!strcmp(szMsg, "ACK"))
260 return VINF_SUCCESS;
261
262 if (!strncmp(szMsg, "NACK=", sizeof("NACK=") - 1))
263 {
264 char *pszMsgText = strchr(szMsg, ';');
265 if (pszMsgText)
266 *pszMsgText++ = '\0';
267
268 int32_t vrc2;
269 rc = RTStrToInt32Full(&szMsg[sizeof("NACK=") - 1], 10, &vrc2);
270 if (rc == VINF_SUCCESS)
271 {
272 /*
273 * Well formed NACK, transform it into an error.
274 */
275 if (pszNAckMsg)
276 {
277 LogRel(("FTSync: %s: NACK=%Rrc (%d)\n", pszWhich, vrc2, vrc2));
278 return VERR_INTERNAL_ERROR;
279 }
280
281 if (pszMsgText)
282 {
283 pszMsgText = RTStrStrip(pszMsgText);
284 for (size_t off = 0; pszMsgText[off]; off++)
285 if (pszMsgText[off] == '\r')
286 pszMsgText[off] = '\n';
287
288 LogRel(("FTSync: %s: NACK=%Rrc (%d) - '%s'\n", pszWhich, vrc2, vrc2, pszMsgText));
289 }
290 return VERR_INTERNAL_ERROR_2;
291 }
292
293 if (pszMsgText)
294 pszMsgText[-1] = ';';
295 }
296 return VERR_INTERNAL_ERROR_3;
297}
298
299/**
300 * Submitts a command to the destination and waits for the ACK.
301 *
302 * @returns VBox status code.
303 *
304 * @param pVM The VM to operate on.
305 * @param pszCommand The command.
306 * @param fWaitForAck Whether to wait for the ACK.
307 */
308static int ftmR3TcpSubmitCommand(PVM pVM, const char *pszCommand, bool fWaitForAck = true)
309{
310 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, pszCommand, strlen(pszCommand), "\n", sizeof("\n") - 1);
311 if (RT_FAILURE(rc))
312 return rc;
313 if (!fWaitForAck)
314 return VINF_SUCCESS;
315 return ftmR3TcpReadACK(pVM, pszCommand);
316}
317
318/**
319 * @copydoc SSMSTRMOPS::pfnWrite
320 */
321static DECLCALLBACK(int) ftmR3TcpOpWrite(void *pvUser, uint64_t offStream, const void *pvBuf, size_t cbToWrite)
322{
323 PVM pVM = (PVM)pvUser;
324
325 AssertReturn(cbToWrite > 0, VINF_SUCCESS);
326 AssertReturn(cbToWrite < UINT32_MAX, VERR_OUT_OF_RANGE);
327 AssertReturn(pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
328
329 for (;;)
330 {
331 FTMTCPHDR Hdr;
332 Hdr.u32Magic = FTMTCPHDR_MAGIC;
333 Hdr.cb = RT_MIN((uint32_t)cbToWrite, FTMTCPHDR_MAX_SIZE);
334 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pvBuf, (size_t)Hdr.cb);
335 if (RT_FAILURE(rc))
336 {
337 LogRel(("FTSync/TCP: Write error: %Rrc (cb=%#x)\n", rc, Hdr.cb));
338 return rc;
339 }
340 pVM->ftm.s.StatSentState.c += Hdr.cb + sizeof(Hdr);
341 pVM->ftm.s.syncstate.uOffStream += Hdr.cb;
342 if (Hdr.cb == cbToWrite)
343 return VINF_SUCCESS;
344
345 /* advance */
346 cbToWrite -= Hdr.cb;
347 pvBuf = (uint8_t const *)pvBuf + Hdr.cb;
348 }
349}
350
351
352/**
353 * Selects and poll for close condition.
354 *
355 * We can use a relatively high poll timeout here since it's only used to get
356 * us out of error paths. In the normal cause of events, we'll get a
357 * end-of-stream header.
358 *
359 * @returns VBox status code.
360 *
361 * @param pState The teleporter state data.
362 */
363static int ftmR3TcpReadSelect(PVM pVM)
364{
365 int rc;
366 do
367 {
368 rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 1000);
369 if (RT_FAILURE(rc) && rc != VERR_TIMEOUT)
370 {
371 pVM->ftm.s.syncstate.fIOError = true;
372 LogRel(("FTSync/TCP: Header select error: %Rrc\n", rc));
373 break;
374 }
375 if (pVM->ftm.s.syncstate.fStopReading)
376 {
377 rc = VERR_EOF;
378 break;
379 }
380 } while (rc == VERR_TIMEOUT);
381 return rc;
382}
383
384
385/**
386 * @copydoc SSMSTRMOPS::pfnRead
387 */
388static DECLCALLBACK(int) ftmR3TcpOpRead(void *pvUser, uint64_t offStream, void *pvBuf, size_t cbToRead, size_t *pcbRead)
389{
390 PVM pVM = (PVM)pvUser;
391 AssertReturn(!pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
392
393 for (;;)
394 {
395 int rc;
396
397 /*
398 * Check for various conditions and may have been signalled.
399 */
400 if (pVM->ftm.s.syncstate.fEndOfStream)
401 return VERR_EOF;
402 if (pVM->ftm.s.syncstate.fStopReading)
403 return VERR_EOF;
404 if (pVM->ftm.s.syncstate.fIOError)
405 return VERR_IO_GEN_FAILURE;
406
407 /*
408 * If there is no more data in the current block, read the next
409 * block header.
410 */
411 if (!pVM->ftm.s.syncstate.cbReadBlock)
412 {
413 rc = ftmR3TcpReadSelect(pVM);
414 if (RT_FAILURE(rc))
415 return rc;
416 FTMTCPHDR Hdr;
417 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
418 if (RT_FAILURE(rc))
419 {
420 pVM->ftm.s.syncstate.fIOError = true;
421 LogRel(("FTSync/TCP: Header read error: %Rrc\n", rc));
422 return rc;
423 }
424 pVM->ftm.s.StatReceivedState.c += sizeof(Hdr);
425
426 if (RT_UNLIKELY( Hdr.u32Magic != FTMTCPHDR_MAGIC
427 || Hdr.cb > FTMTCPHDR_MAX_SIZE
428 || Hdr.cb == 0))
429 {
430 if ( Hdr.u32Magic == FTMTCPHDR_MAGIC
431 && ( Hdr.cb == 0
432 || Hdr.cb == UINT32_MAX)
433 )
434 {
435 pVM->ftm.s.syncstate.fEndOfStream = true;
436 pVM->ftm.s.syncstate.cbReadBlock = 0;
437 return Hdr.cb ? VERR_SSM_CANCELLED : VERR_EOF;
438 }
439 pVM->ftm.s.syncstate.fIOError = true;
440 LogRel(("FTSync/TCP: Invalid block: u32Magic=%#x cb=%#x\n", Hdr.u32Magic, Hdr.cb));
441 return VERR_IO_GEN_FAILURE;
442 }
443
444 pVM->ftm.s.syncstate.cbReadBlock = Hdr.cb;
445 if (pVM->ftm.s.syncstate.fStopReading)
446 return VERR_EOF;
447 }
448
449 /*
450 * Read more data.
451 */
452 rc = ftmR3TcpReadSelect(pVM);
453 if (RT_FAILURE(rc))
454 return rc;
455
456 uint32_t cb = (uint32_t)RT_MIN(pVM->ftm.s.syncstate.cbReadBlock, cbToRead);
457 rc = RTTcpRead(pVM->ftm.s.hSocket, pvBuf, cb, pcbRead);
458 if (RT_FAILURE(rc))
459 {
460 pVM->ftm.s.syncstate.fIOError = true;
461 LogRel(("FTSync/TCP: Data read error: %Rrc (cb=%#x)\n", rc, cb));
462 return rc;
463 }
464 if (pcbRead)
465 {
466 cb = (uint32_t)*pcbRead;
467 pVM->ftm.s.StatReceivedState.c += cb;
468 pVM->ftm.s.syncstate.uOffStream += cb;
469 pVM->ftm.s.syncstate.cbReadBlock -= cb;
470 return VINF_SUCCESS;
471 }
472 pVM->ftm.s.StatReceivedState.c += cb;
473 pVM->ftm.s.syncstate.uOffStream += cb;
474 pVM->ftm.s.syncstate.cbReadBlock -= cb;
475 if (cbToRead == cb)
476 return VINF_SUCCESS;
477
478 /* Advance to the next block. */
479 cbToRead -= cb;
480 pvBuf = (uint8_t *)pvBuf + cb;
481 }
482}
483
484
485/**
486 * @copydoc SSMSTRMOPS::pfnSeek
487 */
488static DECLCALLBACK(int) ftmR3TcpOpSeek(void *pvUser, int64_t offSeek, unsigned uMethod, uint64_t *poffActual)
489{
490 return VERR_NOT_SUPPORTED;
491}
492
493
494/**
495 * @copydoc SSMSTRMOPS::pfnTell
496 */
497static DECLCALLBACK(uint64_t) ftmR3TcpOpTell(void *pvUser)
498{
499 PVM pVM = (PVM)pvUser;
500 return pVM->ftm.s.syncstate.uOffStream;
501}
502
503
504/**
505 * @copydoc SSMSTRMOPS::pfnSize
506 */
507static DECLCALLBACK(int) ftmR3TcpOpSize(void *pvUser, uint64_t *pcb)
508{
509 return VERR_NOT_SUPPORTED;
510}
511
512
513/**
514 * @copydoc SSMSTRMOPS::pfnIsOk
515 */
516static DECLCALLBACK(int) ftmR3TcpOpIsOk(void *pvUser)
517{
518 PVM pVM = (PVM)pvUser;
519
520 if (pVM->fFaultTolerantMaster)
521 {
522 /* Poll for incoming NACKs and errors from the other side */
523 int rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 0);
524 if (rc != VERR_TIMEOUT)
525 {
526 if (RT_SUCCESS(rc))
527 {
528 LogRel(("FTSync/TCP: Incoming data detect by IsOk, assuming it is a cancellation NACK.\n"));
529 rc = VERR_SSM_CANCELLED;
530 }
531 else
532 LogRel(("FTSync/TCP: RTTcpSelectOne -> %Rrc (IsOk).\n", rc));
533 return rc;
534 }
535 }
536
537 return VINF_SUCCESS;
538}
539
540
541/**
542 * @copydoc SSMSTRMOPS::pfnClose
543 */
544static DECLCALLBACK(int) ftmR3TcpOpClose(void *pvUser, bool fCanceled)
545{
546 PVM pVM = (PVM)pvUser;
547
548 if (pVM->fFaultTolerantMaster)
549 {
550 FTMTCPHDR EofHdr;
551 EofHdr.u32Magic = FTMTCPHDR_MAGIC;
552 EofHdr.cb = fCanceled ? UINT32_MAX : 0;
553 int rc = RTTcpWrite(pVM->ftm.s.hSocket, &EofHdr, sizeof(EofHdr));
554 if (RT_FAILURE(rc))
555 {
556 LogRel(("FTSync/TCP: EOF Header write error: %Rrc\n", rc));
557 return rc;
558 }
559 }
560 else
561 {
562 ASMAtomicWriteBool(&pVM->ftm.s.syncstate.fStopReading, true);
563 }
564
565 return VINF_SUCCESS;
566}
567
568
569/**
570 * Method table for a TCP based stream.
571 */
572static SSMSTRMOPS const g_ftmR3TcpOps =
573{
574 SSMSTRMOPS_VERSION,
575 ftmR3TcpOpWrite,
576 ftmR3TcpOpRead,
577 ftmR3TcpOpSeek,
578 ftmR3TcpOpTell,
579 ftmR3TcpOpSize,
580 ftmR3TcpOpIsOk,
581 ftmR3TcpOpClose,
582 SSMSTRMOPS_VERSION
583};
584
585/**
586 * VMR3ReqCallWait callback
587 *
588 * @param pVM The VM handle.
589 *
590 */
591static DECLCALLBACK(void) ftmR3WriteProtectMemory(PVM pVM)
592{
593 int rc = PGMR3PhysWriteProtectRAM(pVM);
594 AssertRC(rc);
595}
596
597/**
598 * Sync the VM state partially or fully
599 *
600 * @returns VBox status code.
601 * @param pVM The VM handle.
602 * @param enmState Which state to sync
603 */
604static int ftmR3PerformSync(PVM pVM, FTMSYNCSTATE enmState)
605{
606 int rc;
607 bool fFullSync = false;
608
609 if (enmState != FTMSYNCSTATE_DELTA_MEMORY)
610 {
611 rc = VMR3Suspend(pVM);
612 AssertRCReturn(rc, rc);
613 }
614
615 switch (enmState)
616 {
617 case FTMSYNCSTATE_FULL:
618 fFullSync = true;
619 /* no break */
620 case FTMSYNCSTATE_DELTA_VM:
621 {
622 bool fSuspended = false;
623
624 STAM_REL_COUNTER_INC((fFullSync) ? &pVM->ftm.s.StatFullSync : &pVM->ftm.s.StatDeltaVM);
625
626 rc = ftmR3TcpSubmitCommand(pVM, (fFullSync) ? "full-sync" : "checkpoint");
627 AssertRC(rc);
628
629 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
630 rc = VMR3Save(pVM, NULL /* pszFilename */, &g_ftmR3TcpOps, pVM, true /* fContinueAfterwards */, NULL, NULL, &fSuspended);
631 pVM->ftm.s.fDeltaLoadSaveActive = false;
632 AssertRC(rc);
633
634 rc = ftmR3TcpReadACK(pVM, (fFullSync) ? "full-sync-complete" : "checkpoint-complete");
635 AssertRC(rc);
636 break;
637 }
638
639 case FTMSYNCSTATE_DELTA_MEMORY:
640 /* Nothing to do as we sync the memory in an async thread; no need to block EMT. */
641 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatDeltaMem);
642 break;
643 }
644
645 /* Write protect all memory. */
646 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM);
647 AssertRCReturn(rc, rc);
648
649 if (enmState != FTMSYNCSTATE_DELTA_MEMORY)
650 {
651 rc = VMR3Resume(pVM);
652 AssertRCReturn(rc, rc);
653 }
654 return VINF_SUCCESS;
655}
656
657/**
658 * PGMR3PhysEnumDirtyFTPages callback for syncing dirty physical pages
659 *
660 * @param pVM VM Handle.
661 * @param GCPhys GC physical address
662 * @param pRange HC virtual address of the page(s)
663 * @param cbRange Size of the dirty range in bytes.
664 * @param pvUser User argument
665 */
666static DECLCALLBACK(int) ftmR3SyncDirtyPage(PVM pVM, RTGCPHYS GCPhys, uint8_t *pRange, unsigned cbRange, void *pvUser)
667{
668 FTMTCPHDRMEM Hdr;
669 Hdr.u32Magic = FTMTCPHDR_MAGIC;
670 Hdr.GCPhys = GCPhys;
671 Hdr.cbPageRange = cbRange;
672 Hdr.cb = cbRange;
673 /** @todo compress page(s). */
674 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pRange, (size_t)Hdr.cb);
675 if (RT_FAILURE(rc))
676 {
677 LogRel(("FTSync/TCP: Write error (ftmR3SyncDirtyPage): %Rrc (cb=%#x)\n", rc, Hdr.cb));
678 return rc;
679 }
680 pVM->ftm.s.StatSentMem.c += Hdr.cb + sizeof(Hdr);
681 return VINF_SUCCESS;
682}
683
684/**
685 * Thread function which starts syncing process for this master VM
686 *
687 * @param Thread The thread id.
688 * @param pvUser Not used
689 * @return VINF_SUCCESS (ignored).
690 *
691 */
692static DECLCALLBACK(int) ftmR3MasterThread(RTTHREAD Thread, void *pvUser)
693{
694 int rc = VINF_SUCCESS;
695 PVM pVM = (PVM)pvUser;
696
697 for (;;)
698 {
699 /*
700 * Try connect to the standby machine.
701 */
702 Log(("ftmR3MasterThread: client connect to %s %d\n", pVM->ftm.s.pszAddress, pVM->ftm.s.uPort));
703 rc = RTTcpClientConnect(pVM->ftm.s.pszAddress, pVM->ftm.s.uPort, &pVM->ftm.s.hSocket);
704 if (RT_SUCCESS(rc))
705 {
706 Log(("ftmR3MasterThread: CONNECTED\n"));
707
708 /* Disable Nagle. */
709 rc = RTTcpSetSendCoalescing(pVM->ftm.s.hSocket, false /*fEnable*/);
710 AssertRC(rc);
711
712 /* Read and check the welcome message. */
713 char szLine[RT_MAX(128, sizeof(g_szWelcome))];
714 RT_ZERO(szLine);
715 rc = RTTcpRead(pVM->ftm.s.hSocket, szLine, sizeof(g_szWelcome) - 1, NULL);
716 if ( RT_SUCCESS(rc)
717 && !strcmp(szLine, g_szWelcome))
718 {
719 /* password */
720 if (pVM->ftm.s.pszPassword)
721 rc = RTTcpWrite(pVM->ftm.s.hSocket, pVM->ftm.s.pszPassword, strlen(pVM->ftm.s.pszPassword));
722
723 if (RT_SUCCESS(rc))
724 {
725 /* ACK */
726 rc = ftmR3TcpReadACK(pVM, "password", "Invalid password");
727 if (RT_SUCCESS(rc))
728 {
729 /** todo: verify VM config. */
730 break;
731 }
732 }
733 }
734 /* Failed, so don't bother anymore. */
735 return VINF_SUCCESS;
736 }
737 rc = RTSemEventWait(pVM->ftm.s.master.hShutdownEvent, 1000 /* 1 second */);
738 if (rc != VERR_TIMEOUT)
739 return VINF_SUCCESS; /* told to quit */
740 }
741
742 /* Successfully initialized the connection to the standby node.
743 * Start the sync process.
744 */
745
746 /* First sync all memory and write protect everything so
747 * we can send changed pages later on.
748 */
749
750 rc = ftmR3PerformSync(pVM, FTMSYNCSTATE_FULL);
751
752 for (;;)
753 {
754 rc = RTSemEventWait(pVM->ftm.s.master.hShutdownEvent, pVM->ftm.s.uInterval);
755 if (rc != VERR_TIMEOUT)
756 break; /* told to quit */
757
758 if (!pVM->ftm.s.fCheckpointingActive)
759 {
760 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
761 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
762
763 rc = ftmR3TcpSubmitCommand(pVM, "mem-sync");
764 AssertRC(rc);
765
766 /* sync the changed memory with the standby node. */
767 rc = ftmR3PerformSync(pVM, FTMSYNCSTATE_DELTA_MEMORY);
768
769 /* Enumerate all dirty pages and send them to the standby VM. */
770 rc = PGMR3PhysEnumDirtyFTPages(pVM, ftmR3SyncDirtyPage, NULL /* pvUser */);
771 AssertRC(rc);
772
773 /* Send last memory header to signal the end. */
774 FTMTCPHDRMEM Hdr;
775 Hdr.u32Magic = FTMTCPHDR_MAGIC;
776 Hdr.GCPhys = 0;
777 Hdr.cbPageRange = 0;
778 Hdr.cb = 0;
779 rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 1, &Hdr, sizeof(Hdr));
780 if (RT_FAILURE(rc))
781 LogRel(("FTSync/TCP: Write error (ftmR3MasterThread): %Rrc (cb=%#x)\n", rc, Hdr.cb));
782
783 rc = ftmR3TcpReadACK(pVM, "mem-sync-complete");
784 AssertRC(rc);
785
786 PDMCritSectLeave(&pVM->ftm.s.CritSect);
787 }
788 }
789 return rc;
790}
791
792/**
793 * Listen for incoming traffic destined for the standby VM.
794 *
795 * @copydoc FNRTTCPSERVE
796 *
797 * @returns VINF_SUCCESS or VERR_TCP_SERVER_STOP.
798 */
799static DECLCALLBACK(int) ftmR3StandbyServeConnection(RTSOCKET Sock, void *pvUser)
800{
801 PVM pVM = (PVM)pvUser;
802
803 pVM->ftm.s.hSocket = Sock;
804
805 /*
806 * Disable Nagle.
807 */
808 int rc = RTTcpSetSendCoalescing(Sock, false /*fEnable*/);
809 AssertRC(rc);
810
811 /* Send the welcome message to the master node. */
812 rc = RTTcpWrite(Sock, g_szWelcome, sizeof(g_szWelcome) - 1);
813 if (RT_FAILURE(rc))
814 {
815 LogRel(("Teleporter: Failed to write welcome message: %Rrc\n", rc));
816 return VINF_SUCCESS;
817 }
818
819 /*
820 * Password.
821 */
822 const char *pszPassword = pVM->ftm.s.pszPassword;
823 if (pszPassword)
824 {
825 unsigned off = 0;
826 while (pszPassword[off])
827 {
828 char ch;
829 rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
830 if ( RT_FAILURE(rc)
831 || pszPassword[off] != ch)
832 {
833 if (RT_FAILURE(rc))
834 LogRel(("FTSync: Password read failure (off=%u): %Rrc\n", off, rc));
835 else
836 LogRel(("FTSync: Invalid password (off=%u)\n", off));
837 ftmR3TcpWriteNACK(pVM, VERR_AUTHENTICATION_FAILURE);
838 return VINF_SUCCESS;
839 }
840 off++;
841 }
842 }
843 rc = ftmR3TcpWriteACK(pVM);
844 if (RT_FAILURE(rc))
845 return VINF_SUCCESS;
846
847 /** todo: verify VM config. */
848
849 /*
850 * Stop the server.
851 *
852 * Note! After this point we must return VERR_TCP_SERVER_STOP, while prior
853 * to it we must not return that value!
854 */
855 RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
856
857 /*
858 * Command processing loop.
859 */
860 bool fDone = false;
861 for (;;)
862 {
863 bool fFullSync = false;
864 char szCmd[128];
865
866 rc = ftmR3TcpReadLine(pVM, szCmd, sizeof(szCmd));
867 if (RT_FAILURE(rc))
868 break;
869
870 if (!strcmp(szCmd, "mem-sync"))
871 {
872 rc = ftmR3TcpWriteACK(pVM);
873 AssertRC(rc);
874 if (RT_FAILURE(rc))
875 continue;
876
877 while (true)
878 {
879 FTMTCPHDRMEM Hdr;
880 void *pPage;
881
882 /* Read memory header. */
883 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
884 if (RT_FAILURE(rc))
885 {
886 Log(("RTTcpRead failed with %Rrc\n", rc));
887 break;
888 }
889 pVM->ftm.s.StatReceivedMem.c += sizeof(Hdr);
890
891 if (Hdr.cb == 0)
892 break; /* end of sync. */
893
894 Assert(Hdr.cb == Hdr.cbPageRange); /** @todo uncompress */
895
896 /* Allocate memory to hold the page(s). */
897 pPage = RTMemAlloc(Hdr.cbPageRange);
898 AssertBreak(pPage);
899
900 /* Fetch the page(s). */
901 rc = RTTcpRead(pVM->ftm.s.hSocket, pPage, Hdr.cb, NULL);
902 if (RT_FAILURE(rc))
903 {
904 Log(("RTTcpRead page data (%d bytes) failed with %Rrc\n", Hdr.cb, rc));
905 break;
906 }
907 pVM->ftm.s.StatReceivedMem.c += Hdr.cb;
908
909 /* Update the guest memory of the standby VM. */
910#if 1
911 rc = PGMR3PhysWriteExternal(pVM, Hdr.GCPhys, pPage, Hdr.cbPageRange, "FTMemSync");
912#else
913 rc = PGMPhysWrite(pVM, Hdr.GCPhys, pPage, Hdr.cbPageRange);
914#endif
915 AssertRC(rc);
916
917 RTMemFree(pPage);
918 }
919
920 rc = ftmR3TcpWriteACK(pVM);
921 AssertRC(rc);
922 }
923 else
924 if ( !strcmp(szCmd, "checkpoint")
925 || (fFullSync = true) /* intended assignment */
926 || !strcmp(szCmd, "full-sync"))
927 {
928 rc = ftmR3TcpWriteACK(pVM);
929 AssertRC(rc);
930 if (RT_FAILURE(rc))
931 continue;
932
933 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
934 pVM->ftm.s.syncstate.uOffStream = 0;
935
936 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
937 rc = VMR3LoadFromStream(pVM, &g_ftmR3TcpOps, pVM, NULL, NULL);
938 pVM->ftm.s.fDeltaLoadSaveActive = false;
939 RTSocketRelease(pVM->ftm.s.hSocket);
940 AssertRC(rc);
941 if (RT_FAILURE(rc))
942 {
943 LogRel(("FTSync: VMR3LoadFromStream -> %Rrc\n", rc));
944 ftmR3TcpWriteNACK(pVM, rc);
945 continue;
946 }
947
948 /* The EOS might not have been read, make sure it is. */
949 pVM->ftm.s.syncstate.fStopReading = false;
950 size_t cbRead;
951 rc = ftmR3TcpOpRead(pVM, pVM->ftm.s.syncstate.uOffStream, szCmd, 1, &cbRead);
952 if (rc != VERR_EOF)
953 {
954 LogRel(("FTSync: Draining teleporterTcpOpRead -> %Rrc\n", rc));
955 ftmR3TcpWriteNACK(pVM, rc);
956 continue;
957 }
958
959 rc = ftmR3TcpWriteACK(pVM);
960 AssertRC(rc);
961 }
962 }
963 LogFlowFunc(("returns mRc=%Rrc\n", rc));
964 return VERR_TCP_SERVER_STOP;
965}
966
967/**
968 * Powers on the fault tolerant virtual machine.
969 *
970 * @returns VBox status code.
971 *
972 * @param pVM The VM to operate on.
973 * @param fMaster FT master or standby
974 * @param uInterval FT sync interval
975 * @param pszAddress Standby VM address
976 * @param uPort Standby VM port
977 * @param pszPassword FT password (NULL for none)
978 *
979 * @thread Any thread.
980 * @vmstate Created
981 * @vmstateto PoweringOn+Running (master), PoweringOn+Running_FT (standby)
982 */
983VMMR3DECL(int) FTMR3PowerOn(PVM pVM, bool fMaster, unsigned uInterval, const char *pszAddress, unsigned uPort, const char *pszPassword)
984{
985 int rc = VINF_SUCCESS;
986
987 VMSTATE enmVMState = VMR3GetState(pVM);
988 AssertMsgReturn(enmVMState == VMSTATE_CREATED,
989 ("%s\n", VMR3GetStateName(enmVMState)),
990 VERR_INTERNAL_ERROR_4);
991 AssertReturn(pszAddress, VERR_INVALID_PARAMETER);
992
993 if (pVM->ftm.s.uInterval)
994 pVM->ftm.s.uInterval = uInterval;
995 else
996 pVM->ftm.s.uInterval = 50; /* standard sync interval of 50ms */
997
998 pVM->ftm.s.uPort = uPort;
999 pVM->ftm.s.pszAddress = RTStrDup(pszAddress);
1000 if (pszPassword)
1001 pVM->ftm.s.pszPassword = RTStrDup(pszPassword);
1002 if (fMaster)
1003 {
1004 rc = RTSemEventCreate(&pVM->ftm.s.master.hShutdownEvent);
1005 if (RT_FAILURE(rc))
1006 return rc;
1007
1008 rc = RTThreadCreate(NULL, ftmR3MasterThread, pVM,
1009 0, RTTHREADTYPE_IO /* higher than normal priority */, 0, "ftmMaster");
1010 if (RT_FAILURE(rc))
1011 return rc;
1012
1013 pVM->fFaultTolerantMaster = true;
1014 if (PGMIsUsingLargePages(pVM))
1015 {
1016 /* Must disable large page usage as 2 MB pages are too big to write monitor. */
1017 LogRel(("FTSync: disabling large page usage.\n"));
1018 PGMSetLargePageUsage(pVM, false);
1019 }
1020 /** @todo might need to disable page fusion as well */
1021
1022 return VMR3PowerOn(pVM);
1023 }
1024 else
1025 {
1026 /* standby */
1027 rc = RTTcpServerCreateEx(pszAddress, uPort, &pVM->ftm.s.standby.hServer);
1028 if (RT_FAILURE(rc))
1029 return rc;
1030 pVM->ftm.s.fIsStandbyNode = true;
1031
1032 rc = RTTcpServerListen(pVM->ftm.s.standby.hServer, ftmR3StandbyServeConnection, pVM);
1033 /** @todo deal with the exit code to check if we should activate this standby VM. */
1034
1035 if (pVM->ftm.s.standby.hServer)
1036 {
1037 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
1038 pVM->ftm.s.standby.hServer = NULL;
1039 }
1040 if (rc == VERR_TCP_SERVER_SHUTDOWN)
1041 rc = VINF_SUCCESS; /* ignore this error; the standby process was cancelled. */
1042 }
1043 return rc;
1044}
1045
1046/**
1047 * Powers off the fault tolerant virtual machine (standby).
1048 *
1049 * @returns VBox status code.
1050 *
1051 * @param pVM The VM to operate on.
1052 */
1053VMMR3DECL(int) FTMR3CancelStandby(PVM pVM)
1054{
1055 AssertReturn(!pVM->fFaultTolerantMaster, VERR_NOT_SUPPORTED);
1056 Assert(pVM->ftm.s.standby.hServer);
1057
1058 return RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1059}
1060
1061
1062/**
1063 * Performs a full sync to the standby node
1064 *
1065 * @returns VBox status code.
1066 *
1067 * @param pVM The VM to operate on.
1068 */
1069VMMR3DECL(int) FTMR3SyncState(PVM pVM)
1070{
1071 VM_ASSERT_OTHER_THREAD(pVM);
1072
1073 if (!pVM->fFaultTolerantMaster)
1074 return VINF_SUCCESS;
1075
1076 pVM->ftm.s.fCheckpointingActive = true;
1077 int rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
1078 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
1079
1080 /* Reset the sync state. */
1081 pVM->ftm.s.syncstate.uOffStream = 0;
1082 pVM->ftm.s.syncstate.cbReadBlock = 0;
1083 pVM->ftm.s.syncstate.fStopReading = false;
1084 pVM->ftm.s.syncstate.fIOError = false;
1085 pVM->ftm.s.syncstate.fEndOfStream = false;
1086
1087 /* Sync state + changed memory with the standby node. */
1088 rc = ftmR3PerformSync(pVM, FTMSYNCSTATE_DELTA_VM);
1089
1090 PDMCritSectLeave(&pVM->ftm.s.CritSect);
1091 pVM->ftm.s.fCheckpointingActive = false;
1092
1093 return VERR_NOT_IMPLEMENTED;
1094}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette