VirtualBox

source: vbox/trunk/src/VBox/VMM/FTM.cpp@ 32059

Last change on this file since 32059 was 32057, checked in by vboxsync, 14 years ago

Stats

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 30.9 KB
Line 
1/* $Id: FTM.cpp 32057 2010-08-27 16:09:36Z vboxsync $ */
2/** @file
3 * FTM - Fault Tolerance Manager
4 */
5
6/*
7 * Copyright (C) 2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_FTM
23#include "FTMInternal.h"
24#include <VBox/vm.h>
25#include <VBox/vmm.h>
26#include <VBox/err.h>
27#include <VBox/param.h>
28#include <VBox/ssm.h>
29#include <VBox/log.h>
30#include <VBox/pgm.h>
31
32#include <iprt/assert.h>
33#include <iprt/thread.h>
34#include <iprt/string.h>
35#include <iprt/mem.h>
36#include <iprt/tcp.h>
37#include <iprt/socket.h>
38#include <iprt/semaphore.h>
39#include <iprt/asm.h>
40
41/*******************************************************************************
42 * Structures and Typedefs *
43 *******************************************************************************/
44
45/**
46 * TCP stream header.
47 *
48 * This is an extra layer for fixing the problem with figuring out when the SSM
49 * stream ends.
50 */
51typedef struct FTMTCPHDR
52{
53 /** Magic value. */
54 uint32_t u32Magic;
55 /** The size of the data block following this header.
56 * 0 indicates the end of the stream, while UINT32_MAX indicates
57 * cancelation. */
58 uint32_t cb;
59} FTMTCPHDR;
60/** Magic value for FTMTCPHDR::u32Magic. (Egberto Gismonti Amin) */
61#define FTMTCPHDR_MAGIC UINT32_C(0x19471205)
62/** The max block size. */
63#define FTMTCPHDR_MAX_SIZE UINT32_C(0x00fffff8)
64
65/**
66 * TCP stream header.
67 *
68 * This is an extra layer for fixing the problem with figuring out when the SSM
69 * stream ends.
70 */
71typedef struct FTMTCPHDRMEM
72{
73 /** Magic value. */
74 uint32_t u32Magic;
75 /** Size (Uncompressed) of the pages following the header. */
76 uint32_t cbPageRange;
77 /** GC Physical address of the page(s) to sync. */
78 RTGCPHYS GCPhys;
79 /** The size of the data block following this header.
80 * 0 indicates the end of the stream, while UINT32_MAX indicates
81 * cancelation. */
82 uint32_t cb;
83} FTMTCPHDRMEM;
84
85/*******************************************************************************
86* Global Variables *
87*******************************************************************************/
88static const char g_szWelcome[] = "VirtualBox-Fault-Tolerance-Sync-1.0\n";
89
90/**
91 * Initializes the FTM.
92 *
93 * @returns VBox status code.
94 * @param pVM The VM to operate on.
95 */
96VMMR3DECL(int) FTMR3Init(PVM pVM)
97{
98 /*
99 * Assert alignment and sizes.
100 */
101 AssertCompile(sizeof(pVM->ftm.s) <= sizeof(pVM->ftm.padding));
102 AssertCompileMemberAlignment(FTM, CritSect, sizeof(uintptr_t));
103
104 /** @todo saved state for master nodes! */
105 pVM->ftm.s.pszAddress = NULL;
106 pVM->ftm.s.pszPassword = NULL;
107 pVM->fFaultTolerantMaster = false;
108 pVM->ftm.s.fIsStandbyNode = false;
109 pVM->ftm.s.standby.hServer = NIL_RTTCPSERVER;
110 pVM->ftm.s.master.hShutdownEvent = NIL_RTSEMEVENT;
111 pVM->ftm.s.hSocket = NIL_RTSOCKET;
112
113 /*
114 * Initialize the PGM critical section.
115 */
116 int rc = PDMR3CritSectInit(pVM, &pVM->ftm.s.CritSect, RT_SRC_POS, "FTM");
117 AssertRCReturn(rc, rc);
118
119 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedMem, STAMTYPE_COUNTER, "/FT/Received/Mem", STAMUNIT_BYTES, "The amount of memory pages that was received.");
120 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedState, STAMTYPE_COUNTER, "/FT/Received/State", STAMUNIT_BYTES, "The amount of state information that was received.");
121 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentMem, STAMTYPE_COUNTER, "/FT/Sent/Mem", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
122 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentState, STAMTYPE_COUNTER, "/FT/Sent/State", STAMUNIT_BYTES, "The amount of state information that was sent.");
123
124 return VINF_SUCCESS;
125}
126
127/**
128 * Terminates the FTM.
129 *
130 * Termination means cleaning up and freeing all resources,
131 * the VM itself is at this point powered off or suspended.
132 *
133 * @returns VBox status code.
134 * @param pVM The VM to operate on.
135 */
136VMMR3DECL(int) FTMR3Term(PVM pVM)
137{
138 if (pVM->ftm.s.pszAddress)
139 RTMemFree(pVM->ftm.s.pszAddress);
140 if (pVM->ftm.s.pszPassword)
141 RTMemFree(pVM->ftm.s.pszPassword);
142 if (pVM->ftm.s.hSocket != NIL_RTSOCKET)
143 RTTcpClientClose(pVM->ftm.s.hSocket);
144 if (pVM->ftm.s.standby.hServer)
145 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
146 if (pVM->ftm.s.master.hShutdownEvent != NIL_RTSEMEVENT)
147 RTSemEventDestroy(pVM->ftm.s.master.hShutdownEvent);
148
149 PDMR3CritSectDelete(&pVM->ftm.s.CritSect);
150 return VINF_SUCCESS;
151}
152
153
154static int ftmR3TcpWriteACK(PVM pVM)
155{
156 int rc = RTTcpWrite(pVM->ftm.s.hSocket, "ACK\n", sizeof("ACK\n") - 1);
157 if (RT_FAILURE(rc))
158 {
159 LogRel(("FTSync: RTTcpWrite(,ACK,) -> %Rrc\n", rc));
160 }
161 return rc;
162}
163
164
165static int ftmR3TcpWriteNACK(PVM pVM, int32_t rc2, const char *pszMsgText = NULL)
166{
167 char szMsg[256];
168 size_t cch;
169 if (pszMsgText && *pszMsgText)
170 {
171 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d;%s\n", rc2, pszMsgText);
172 for (size_t off = 6; off + 1 < cch; off++)
173 if (szMsg[off] == '\n')
174 szMsg[off] = '\r';
175 }
176 else
177 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d\n", rc2);
178 int rc = RTTcpWrite(pVM->ftm.s.hSocket, szMsg, cch);
179 if (RT_FAILURE(rc))
180 LogRel(("FTSync: RTTcpWrite(,%s,%zu) -> %Rrc\n", szMsg, cch, rc));
181 return rc;
182}
183
184/**
185 * Reads a string from the socket.
186 *
187 * @returns VBox status code.
188 *
189 * @param pState The teleporter state structure.
190 * @param pszBuf The output buffer.
191 * @param cchBuf The size of the output buffer.
192 *
193 */
194static int ftmR3TcpReadLine(PVM pVM, char *pszBuf, size_t cchBuf)
195{
196 char *pszStart = pszBuf;
197 RTSOCKET Sock = pVM->ftm.s.hSocket;
198
199 AssertReturn(cchBuf > 1, VERR_INTERNAL_ERROR);
200 *pszBuf = '\0';
201
202 /* dead simple approach. */
203 for (;;)
204 {
205 char ch;
206 int rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
207 if (RT_FAILURE(rc))
208 {
209 LogRel(("FTSync: RTTcpRead -> %Rrc while reading string ('%s')\n", rc, pszStart));
210 return rc;
211 }
212 if ( ch == '\n'
213 || ch == '\0')
214 return VINF_SUCCESS;
215 if (cchBuf <= 1)
216 {
217 LogRel(("FTSync: String buffer overflow: '%s'\n", pszStart));
218 return VERR_BUFFER_OVERFLOW;
219 }
220 *pszBuf++ = ch;
221 *pszBuf = '\0';
222 cchBuf--;
223 }
224}
225
226/**
227 * Reads an ACK or NACK.
228 *
229 * @returns VBox status code.
230 * @param pVM The VM to operate on.
231 * @param pszWhich Which ACK is this this?
232 * @param pszNAckMsg Optional NACK message.
233 */
234static int ftmR3TcpReadACK(PVM pVM, const char *pszWhich, const char *pszNAckMsg = NULL)
235{
236 char szMsg[256];
237 int rc = ftmR3TcpReadLine(pVM, szMsg, sizeof(szMsg));
238 if (RT_FAILURE(rc))
239 return rc;
240
241 if (!strcmp(szMsg, "ACK"))
242 return VINF_SUCCESS;
243
244 if (!strncmp(szMsg, "NACK=", sizeof("NACK=") - 1))
245 {
246 char *pszMsgText = strchr(szMsg, ';');
247 if (pszMsgText)
248 *pszMsgText++ = '\0';
249
250 int32_t vrc2;
251 rc = RTStrToInt32Full(&szMsg[sizeof("NACK=") - 1], 10, &vrc2);
252 if (rc == VINF_SUCCESS)
253 {
254 /*
255 * Well formed NACK, transform it into an error.
256 */
257 if (pszNAckMsg)
258 {
259 LogRel(("FTSync: %s: NACK=%Rrc (%d)\n", pszWhich, vrc2, vrc2));
260 return VERR_INTERNAL_ERROR;
261 }
262
263 if (pszMsgText)
264 {
265 pszMsgText = RTStrStrip(pszMsgText);
266 for (size_t off = 0; pszMsgText[off]; off++)
267 if (pszMsgText[off] == '\r')
268 pszMsgText[off] = '\n';
269
270 LogRel(("FTSync: %s: NACK=%Rrc (%d) - '%s'\n", pszWhich, vrc2, vrc2, pszMsgText));
271 }
272 return VERR_INTERNAL_ERROR_2;
273 }
274
275 if (pszMsgText)
276 pszMsgText[-1] = ';';
277 }
278 return VERR_INTERNAL_ERROR_3;
279}
280
281/**
282 * Submitts a command to the destination and waits for the ACK.
283 *
284 * @returns VBox status code.
285 *
286 * @param pVM The VM to operate on.
287 * @param pszCommand The command.
288 * @param fWaitForAck Whether to wait for the ACK.
289 */
290static int ftmR3TcpSubmitCommand(PVM pVM, const char *pszCommand, bool fWaitForAck = true)
291{
292 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, pszCommand, strlen(pszCommand), "\n", sizeof("\n") - 1);
293 if (RT_FAILURE(rc))
294 return rc;
295 if (!fWaitForAck)
296 return VINF_SUCCESS;
297 return ftmR3TcpReadACK(pVM, pszCommand);
298}
299
300/**
301 * @copydoc SSMSTRMOPS::pfnWrite
302 */
303static DECLCALLBACK(int) ftmR3TcpOpWrite(void *pvUser, uint64_t offStream, const void *pvBuf, size_t cbToWrite)
304{
305 PVM pVM = (PVM)pvUser;
306
307 AssertReturn(cbToWrite > 0, VINF_SUCCESS);
308 AssertReturn(cbToWrite < UINT32_MAX, VERR_OUT_OF_RANGE);
309 AssertReturn(pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
310
311 for (;;)
312 {
313 FTMTCPHDR Hdr;
314 Hdr.u32Magic = FTMTCPHDR_MAGIC;
315 Hdr.cb = RT_MIN((uint32_t)cbToWrite, FTMTCPHDR_MAX_SIZE);
316 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pvBuf, (size_t)Hdr.cb);
317 if (RT_FAILURE(rc))
318 {
319 LogRel(("FTSync/TCP: Write error: %Rrc (cb=%#x)\n", rc, Hdr.cb));
320 return rc;
321 }
322 pVM->ftm.s.syncstate.uOffStream += Hdr.cb;
323 if (Hdr.cb == cbToWrite)
324 return VINF_SUCCESS;
325
326 /* advance */
327 cbToWrite -= Hdr.cb;
328 pvBuf = (uint8_t const *)pvBuf + Hdr.cb;
329 }
330}
331
332
333/**
334 * Selects and poll for close condition.
335 *
336 * We can use a relatively high poll timeout here since it's only used to get
337 * us out of error paths. In the normal cause of events, we'll get a
338 * end-of-stream header.
339 *
340 * @returns VBox status code.
341 *
342 * @param pState The teleporter state data.
343 */
344static int ftmR3TcpReadSelect(PVM pVM)
345{
346 int rc;
347 do
348 {
349 rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 1000);
350 if (RT_FAILURE(rc) && rc != VERR_TIMEOUT)
351 {
352 pVM->ftm.s.syncstate.fIOError = true;
353 LogRel(("FTSync/TCP: Header select error: %Rrc\n", rc));
354 break;
355 }
356 if (pVM->ftm.s.syncstate.fStopReading)
357 {
358 rc = VERR_EOF;
359 break;
360 }
361 } while (rc == VERR_TIMEOUT);
362 return rc;
363}
364
365
366/**
367 * @copydoc SSMSTRMOPS::pfnRead
368 */
369static DECLCALLBACK(int) ftmR3TcpOpRead(void *pvUser, uint64_t offStream, void *pvBuf, size_t cbToRead, size_t *pcbRead)
370{
371 PVM pVM = (PVM)pvUser;
372 AssertReturn(!pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
373
374 for (;;)
375 {
376 int rc;
377
378 /*
379 * Check for various conditions and may have been signalled.
380 */
381 if (pVM->ftm.s.syncstate.fEndOfStream)
382 return VERR_EOF;
383 if (pVM->ftm.s.syncstate.fStopReading)
384 return VERR_EOF;
385 if (pVM->ftm.s.syncstate.fIOError)
386 return VERR_IO_GEN_FAILURE;
387
388 /*
389 * If there is no more data in the current block, read the next
390 * block header.
391 */
392 if (!pVM->ftm.s.syncstate.cbReadBlock)
393 {
394 rc = ftmR3TcpReadSelect(pVM);
395 if (RT_FAILURE(rc))
396 return rc;
397 FTMTCPHDR Hdr;
398 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
399 if (RT_FAILURE(rc))
400 {
401 pVM->ftm.s.syncstate.fIOError = true;
402 LogRel(("FTSync/TCP: Header read error: %Rrc\n", rc));
403 return rc;
404 }
405
406 if (RT_UNLIKELY( Hdr.u32Magic != FTMTCPHDR_MAGIC
407 || Hdr.cb > FTMTCPHDR_MAX_SIZE
408 || Hdr.cb == 0))
409 {
410 if ( Hdr.u32Magic == FTMTCPHDR_MAGIC
411 && ( Hdr.cb == 0
412 || Hdr.cb == UINT32_MAX)
413 )
414 {
415 pVM->ftm.s.syncstate.fEndOfStream = true;
416 pVM->ftm.s.syncstate.cbReadBlock = 0;
417 return Hdr.cb ? VERR_SSM_CANCELLED : VERR_EOF;
418 }
419 pVM->ftm.s.syncstate.fIOError = true;
420 LogRel(("FTSync/TCP: Invalid block: u32Magic=%#x cb=%#x\n", Hdr.u32Magic, Hdr.cb));
421 return VERR_IO_GEN_FAILURE;
422 }
423
424 pVM->ftm.s.syncstate.cbReadBlock = Hdr.cb;
425 if (pVM->ftm.s.syncstate.fStopReading)
426 return VERR_EOF;
427 }
428
429 /*
430 * Read more data.
431 */
432 rc = ftmR3TcpReadSelect(pVM);
433 if (RT_FAILURE(rc))
434 return rc;
435 uint32_t cb = (uint32_t)RT_MIN(pVM->ftm.s.syncstate.cbReadBlock, cbToRead);
436 rc = RTTcpRead(pVM->ftm.s.hSocket, pvBuf, cb, pcbRead);
437 if (RT_FAILURE(rc))
438 {
439 pVM->ftm.s.syncstate.fIOError = true;
440 LogRel(("FTSync/TCP: Data read error: %Rrc (cb=%#x)\n", rc, cb));
441 return rc;
442 }
443 if (pcbRead)
444 {
445 cb = (uint32_t)*pcbRead;
446 pVM->ftm.s.syncstate.uOffStream += cb;
447 pVM->ftm.s.syncstate.cbReadBlock -= cb;
448 return VINF_SUCCESS;
449 }
450 pVM->ftm.s.syncstate.uOffStream += cb;
451 pVM->ftm.s.syncstate.cbReadBlock -= cb;
452 if (cbToRead == cb)
453 return VINF_SUCCESS;
454
455 /* Advance to the next block. */
456 cbToRead -= cb;
457 pvBuf = (uint8_t *)pvBuf + cb;
458 }
459}
460
461
462/**
463 * @copydoc SSMSTRMOPS::pfnSeek
464 */
465static DECLCALLBACK(int) ftmR3TcpOpSeek(void *pvUser, int64_t offSeek, unsigned uMethod, uint64_t *poffActual)
466{
467 return VERR_NOT_SUPPORTED;
468}
469
470
471/**
472 * @copydoc SSMSTRMOPS::pfnTell
473 */
474static DECLCALLBACK(uint64_t) ftmR3TcpOpTell(void *pvUser)
475{
476 PVM pVM = (PVM)pvUser;
477 return pVM->ftm.s.syncstate.uOffStream;
478}
479
480
481/**
482 * @copydoc SSMSTRMOPS::pfnSize
483 */
484static DECLCALLBACK(int) ftmR3TcpOpSize(void *pvUser, uint64_t *pcb)
485{
486 return VERR_NOT_SUPPORTED;
487}
488
489
490/**
491 * @copydoc SSMSTRMOPS::pfnIsOk
492 */
493static DECLCALLBACK(int) ftmR3TcpOpIsOk(void *pvUser)
494{
495 PVM pVM = (PVM)pvUser;
496
497 if (pVM->fFaultTolerantMaster)
498 {
499 /* Poll for incoming NACKs and errors from the other side */
500 int rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 0);
501 if (rc != VERR_TIMEOUT)
502 {
503 if (RT_SUCCESS(rc))
504 {
505 LogRel(("FTSync/TCP: Incoming data detect by IsOk, assuming it is a cancellation NACK.\n"));
506 rc = VERR_SSM_CANCELLED;
507 }
508 else
509 LogRel(("FTSync/TCP: RTTcpSelectOne -> %Rrc (IsOk).\n", rc));
510 return rc;
511 }
512 }
513
514 return VINF_SUCCESS;
515}
516
517
518/**
519 * @copydoc SSMSTRMOPS::pfnClose
520 */
521static DECLCALLBACK(int) ftmR3TcpOpClose(void *pvUser, bool fCanceled)
522{
523 PVM pVM = (PVM)pvUser;
524
525 if (pVM->fFaultTolerantMaster)
526 {
527 FTMTCPHDR EofHdr;
528 EofHdr.u32Magic = FTMTCPHDR_MAGIC;
529 EofHdr.cb = fCanceled ? UINT32_MAX : 0;
530 int rc = RTTcpWrite(pVM->ftm.s.hSocket, &EofHdr, sizeof(EofHdr));
531 if (RT_FAILURE(rc))
532 {
533 LogRel(("FTSync/TCP: EOF Header write error: %Rrc\n", rc));
534 return rc;
535 }
536 }
537 else
538 {
539 ASMAtomicWriteBool(&pVM->ftm.s.syncstate.fStopReading, true);
540 }
541
542 return VINF_SUCCESS;
543}
544
545
546/**
547 * Method table for a TCP based stream.
548 */
549static SSMSTRMOPS const g_ftmR3TcpOps =
550{
551 SSMSTRMOPS_VERSION,
552 ftmR3TcpOpWrite,
553 ftmR3TcpOpRead,
554 ftmR3TcpOpSeek,
555 ftmR3TcpOpTell,
556 ftmR3TcpOpSize,
557 ftmR3TcpOpIsOk,
558 ftmR3TcpOpClose,
559 SSMSTRMOPS_VERSION
560};
561
562/**
563 * Sync the VM state partially or fully
564 *
565 * @returns VBox status code.
566 * @param pVM The VM handle.
567 * @param enmState Which state to sync
568 */
569static DECLCALLBACK(void) ftmR3PerformSync(PVM pVM, FTMSYNCSTATE enmState)
570{
571 int rc;
572 bool fFullSync = false;
573
574 if (enmState != FTMSYNCSTATE_DELTA_MEMORY)
575 {
576 rc = VMR3Suspend(pVM);
577 AssertReturnVoid(RT_SUCCESS(rc));
578 }
579
580 switch (enmState)
581 {
582 case FTMSYNCSTATE_FULL:
583 fFullSync = true;
584 /* no break */
585 case FTMSYNCSTATE_DELTA_VM:
586 {
587 bool fSuspended = false;
588
589 rc = ftmR3TcpSubmitCommand(pVM, (fFullSync) ? "full-sync" : "checkpoint");
590 AssertRC(rc);
591
592 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
593 rc = VMR3Save(pVM, NULL /* pszFilename */, &g_ftmR3TcpOps, pVM, true /* fContinueAfterwards */, NULL, NULL, &fSuspended);
594 pVM->ftm.s.fDeltaLoadSaveActive = false;
595 AssertRC(rc);
596
597 rc = ftmR3TcpReadACK(pVM, (fFullSync) ? "full-sync-complete" : "checkpoint-complete");
598 AssertRC(rc);
599 break;
600 }
601
602 case FTMSYNCSTATE_DELTA_MEMORY:
603 /* Nothing to do as we sync the memory in an async thread; no need to block EMT. */
604 break;
605 }
606 /* Write protect all memory. */
607 rc = PGMR3PhysWriteProtectRAM(pVM);
608 AssertRC(rc);
609
610 if (enmState != FTMSYNCSTATE_DELTA_MEMORY)
611 {
612 rc = VMR3Resume(pVM);
613 AssertRC(rc);
614 }
615}
616
617/**
618 * PGMR3PhysEnumDirtyFTPages callback for syncing dirty physical pages
619 *
620 * @param pVM VM Handle.
621 * @param GCPhys GC physical address
622 * @param pRange HC virtual address of the page(s)
623 * @param cbRange Size of the dirty range in bytes.
624 * @param pvUser User argument
625 */
626static DECLCALLBACK(int) ftmR3SyncDirtyPage(PVM pVM, RTGCPHYS GCPhys, uint8_t *pRange, unsigned cbRange, void *pvUser)
627{
628 FTMTCPHDRMEM Hdr;
629 Hdr.u32Magic = FTMTCPHDR_MAGIC;
630 Hdr.GCPhys = GCPhys;
631 Hdr.cbPageRange = cbRange;
632 Hdr.cb = cbRange;
633 /** @todo compress page(s). */
634 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pRange, (size_t)Hdr.cb);
635 if (RT_FAILURE(rc))
636 {
637 LogRel(("FTSync/TCP: Write error (ftmR3SyncDirtyPage): %Rrc (cb=%#x)\n", rc, Hdr.cb));
638 return rc;
639 }
640 pVM->ftm.s.StatSentMem.c += Hdr.cb + sizeof(Hdr);
641 return VINF_SUCCESS;
642}
643
644/**
645 * Thread function which starts syncing process for this master VM
646 *
647 * @param Thread The thread id.
648 * @param pvUser Not used
649 * @return VINF_SUCCESS (ignored).
650 *
651 */
652static DECLCALLBACK(int) ftmR3MasterThread(RTTHREAD Thread, void *pvUser)
653{
654 int rc = VINF_SUCCESS;
655 PVM pVM = (PVM)pvUser;
656
657 for (;;)
658 {
659 /*
660 * Try connect to the standby machine.
661 */
662 rc = RTTcpClientConnect(pVM->ftm.s.pszAddress, pVM->ftm.s.uPort, &pVM->ftm.s.hSocket);
663 if (RT_SUCCESS(rc))
664 {
665 /* Disable Nagle. */
666 rc = RTTcpSetSendCoalescing(pVM->ftm.s.hSocket, false /*fEnable*/);
667 AssertRC(rc);
668
669 /* Read and check the welcome message. */
670 char szLine[RT_MAX(128, sizeof(g_szWelcome))];
671 RT_ZERO(szLine);
672 rc = RTTcpRead(pVM->ftm.s.hSocket, szLine, sizeof(g_szWelcome) - 1, NULL);
673 if ( RT_SUCCESS(rc)
674 && !strcmp(szLine, g_szWelcome))
675 {
676 /* password */
677 rc = RTTcpWrite(pVM->ftm.s.hSocket, pVM->ftm.s.pszPassword, strlen(pVM->ftm.s.pszPassword));
678 if (RT_SUCCESS(rc))
679 {
680 /* ACK */
681 rc = ftmR3TcpReadACK(pVM, "password", "Invalid password");
682 if (RT_SUCCESS(rc))
683 {
684 /** todo: verify VM config. */
685 break;
686 }
687 }
688 }
689 rc = RTTcpClientClose(pVM->ftm.s.hSocket);
690 AssertRC(rc);
691 pVM->ftm.s.hSocket = NIL_RTSOCKET;
692 }
693 rc = RTSemEventWait(pVM->ftm.s.master.hShutdownEvent, 1000 /* 1 second */);
694 if (rc != VERR_TIMEOUT)
695 return VINF_SUCCESS; /* told to quit */
696 }
697
698 /* Successfully initialized the connection to the standby node.
699 * Start the sync process.
700 */
701
702 /* First sync all memory and write protect everything so
703 * we can send changed pages later on.
704 */
705
706 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3PerformSync, 2, pVM, FTMSYNCSTATE_FULL);
707 AssertRC(rc);
708
709 for (;;)
710 {
711 rc = RTSemEventWait(pVM->ftm.s.master.hShutdownEvent, pVM->ftm.s.uInterval);
712 if (rc != VERR_TIMEOUT)
713 break; /* told to quit */
714
715 if (!pVM->ftm.s.fCheckpointingActive)
716 {
717 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
718 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
719
720 rc = ftmR3TcpSubmitCommand(pVM, "mem-sync");
721 AssertRC(rc);
722
723 /* sync the changed memory with the standby node. */
724 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3PerformSync, 2, pVM, FTMSYNCSTATE_DELTA_MEMORY);
725 AssertRC(rc);
726
727 /* Enumerate all dirty pages and send them to the standby VM. */
728 rc = PGMR3PhysEnumDirtyFTPages(pVM, ftmR3SyncDirtyPage, NULL /* pvUser */);
729 AssertRC(rc);
730
731 /* Send last memory header to signal the end. */
732 FTMTCPHDRMEM Hdr;
733 Hdr.u32Magic = FTMTCPHDR_MAGIC;
734 Hdr.GCPhys = 0;
735 Hdr.cbPageRange = 0;
736 Hdr.cb = 0;
737 rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 1, &Hdr, sizeof(Hdr));
738 if (RT_FAILURE(rc))
739 LogRel(("FTSync/TCP: Write error (ftmR3MasterThread): %Rrc (cb=%#x)\n", rc, Hdr.cb));
740
741 rc = ftmR3TcpReadACK(pVM, "mem-sync-complete");
742 AssertRC(rc);
743
744 PDMCritSectLeave(&pVM->ftm.s.CritSect);
745 }
746 }
747 return rc;
748}
749
750/**
751 * Listen for incoming traffic destined for the standby VM.
752 *
753 * @copydoc FNRTTCPSERVE
754 *
755 * @returns VINF_SUCCESS or VERR_TCP_SERVER_STOP.
756 */
757static DECLCALLBACK(int) ftmR3StandbyServeConnection(RTSOCKET Sock, void *pvUser)
758{
759 PVM pVM = (PVM)pvUser;
760
761 pVM->ftm.s.hSocket = Sock;
762
763 /*
764 * Disable Nagle.
765 */
766 int rc = RTTcpSetSendCoalescing(Sock, false /*fEnable*/);
767 AssertRC(rc);
768
769 /* Send the welcome message to the master node. */
770 rc = RTTcpWrite(Sock, g_szWelcome, sizeof(g_szWelcome) - 1);
771 if (RT_FAILURE(rc))
772 {
773 LogRel(("Teleporter: Failed to write welcome message: %Rrc\n", rc));
774 return VINF_SUCCESS;
775 }
776
777 /*
778 * Password.
779 */
780 const char *pszPassword = pVM->ftm.s.pszPassword;
781 unsigned off = 0;
782 while (pszPassword[off])
783 {
784 char ch;
785 rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
786 if ( RT_FAILURE(rc)
787 || pszPassword[off] != ch)
788 {
789 if (RT_FAILURE(rc))
790 LogRel(("FTSync: Password read failure (off=%u): %Rrc\n", off, rc));
791 else
792 LogRel(("FTSync: Invalid password (off=%u)\n", off));
793 ftmR3TcpWriteNACK(pVM, VERR_AUTHENTICATION_FAILURE);
794 return VINF_SUCCESS;
795 }
796 off++;
797 }
798 rc = ftmR3TcpWriteACK(pVM);
799 if (RT_FAILURE(rc))
800 return VINF_SUCCESS;
801
802 /** todo: verify VM config. */
803
804 /*
805 * Stop the server.
806 *
807 * Note! After this point we must return VERR_TCP_SERVER_STOP, while prior
808 * to it we must not return that value!
809 */
810 RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
811
812 /*
813 * Command processing loop.
814 */
815 bool fDone = false;
816 for (;;)
817 {
818 bool fFullSync = false;
819 char szCmd[128];
820
821 rc = ftmR3TcpReadLine(pVM, szCmd, sizeof(szCmd));
822 AssertRC(rc);
823 if (RT_FAILURE(rc))
824 break;
825
826 if (!strcmp(szCmd, "mem-sync"))
827 {
828 rc = ftmR3TcpWriteACK(pVM);
829 AssertRC(rc);
830 if (RT_FAILURE(rc))
831 continue;
832
833 while (true)
834 {
835 FTMTCPHDRMEM Hdr;
836 void *pPage;
837
838 /* Read memory header. */
839 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
840 if (RT_FAILURE(rc))
841 {
842 Log(("RTTcpRead failed with %Rrc\n", rc));
843 break;
844 }
845 pVM->ftm.s.StatReceivedMem.c += sizeof(Hdr);
846
847 if (Hdr.cb == 0)
848 break; /* end of sync. */
849
850 Assert(Hdr.cb == Hdr.cbPageRange); /** @todo uncompress */
851
852 /* Allocate memory to hold the page(s). */
853 pPage = RTMemAlloc(Hdr.cbPageRange);
854 AssertBreak(pPage);
855
856 /* Fetch the page(s). */
857 rc = RTTcpRead(pVM->ftm.s.hSocket, pPage, Hdr.cb, NULL);
858 if (RT_FAILURE(rc))
859 {
860 Log(("RTTcpRead page data (%d bytes) failed with %Rrc\n", Hdr.cb, rc));
861 break;
862 }
863 pVM->ftm.s.StatReceivedMem.c += Hdr.cb;
864
865 /* Update the guest memory of the standby VM. */
866 rc = PGMPhysWrite(pVM, Hdr.GCPhys, pPage, Hdr.cbPageRange);
867 AssertRC(rc);
868
869 RTMemFree(pPage);
870 }
871
872 rc = ftmR3TcpWriteACK(pVM);
873 AssertRC(rc);
874 }
875 else
876 if ( !strcmp(szCmd, "checkpoint")
877 || (fFullSync = true) /* intended assignment */
878 || !strcmp(szCmd, "full-sync"))
879 {
880 rc = ftmR3TcpWriteACK(pVM);
881 AssertRC(rc);
882 if (RT_FAILURE(rc))
883 continue;
884
885 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
886 pVM->ftm.s.syncstate.uOffStream = 0;
887
888 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
889 rc = VMR3LoadFromStream(pVM, &g_ftmR3TcpOps, pVM, NULL, NULL);
890 pVM->ftm.s.fDeltaLoadSaveActive = false;
891 RTSocketRelease(pVM->ftm.s.hSocket);
892 AssertRC(rc);
893 if (RT_FAILURE(rc))
894 {
895 LogRel(("FTSync: VMR3LoadFromStream -> %Rrc\n", rc));
896 ftmR3TcpWriteNACK(pVM, rc);
897 continue;
898 }
899
900 /* The EOS might not have been read, make sure it is. */
901 pVM->ftm.s.syncstate.fStopReading = false;
902 size_t cbRead;
903 rc = ftmR3TcpOpRead(pVM, pVM->ftm.s.syncstate.uOffStream, szCmd, 1, &cbRead);
904 if (rc != VERR_EOF)
905 {
906 LogRel(("FTSync: Draining teleporterTcpOpRead -> %Rrc\n", rc));
907 ftmR3TcpWriteNACK(pVM, rc);
908 continue;
909 }
910
911 rc = ftmR3TcpWriteACK(pVM);
912 AssertRC(rc);
913 }
914 }
915 LogFlowFunc(("returns mRc=%Rrc\n", rc));
916 return VERR_TCP_SERVER_STOP;
917}
918
919/**
920 * Powers on the fault tolerant virtual machine.
921 *
922 * @returns VBox status code.
923 *
924 * @param pVM The VM to operate on.
925 * @param fMaster FT master or standby
926 * @param uInterval FT sync interval
927 * @param pszAddress Standby VM address
928 * @param uPort Standby VM port
929 * @param pszPassword FT password (NULL for none)
930 *
931 * @thread Any thread.
932 * @vmstate Created
933 * @vmstateto PoweringOn+Running (master), PoweringOn+Running_FT (standby)
934 */
935VMMR3DECL(int) FTMR3PowerOn(PVM pVM, bool fMaster, unsigned uInterval, const char *pszAddress, unsigned uPort, const char *pszPassword)
936{
937 int rc = VINF_SUCCESS;
938
939 VMSTATE enmVMState = VMR3GetState(pVM);
940 AssertMsgReturn(enmVMState == VMSTATE_POWERING_ON,
941 ("%s\n", VMR3GetStateName(enmVMState)),
942 VERR_INTERNAL_ERROR_4);
943 AssertReturn(pszAddress, VERR_INVALID_PARAMETER);
944
945 if (pVM->ftm.s.uInterval)
946 pVM->ftm.s.uInterval = uInterval;
947 else
948 pVM->ftm.s.uInterval = 50; /* standard sync interval of 50ms */
949
950 pVM->ftm.s.uPort = uPort;
951 pVM->ftm.s.pszAddress = RTStrDup(pszAddress);
952 if (pszPassword)
953 pVM->ftm.s.pszPassword = RTStrDup(pszPassword);
954 if (fMaster)
955 {
956 rc = RTSemEventCreate(&pVM->ftm.s.master.hShutdownEvent);
957 if (RT_FAILURE(rc))
958 return rc;
959
960 rc = RTThreadCreate(NULL, ftmR3MasterThread, pVM,
961 0, RTTHREADTYPE_IO /* higher than normal priority */, 0, "ftmR3MasterThread");
962 if (RT_FAILURE(rc))
963 return rc;
964
965 pVM->fFaultTolerantMaster = true;
966 if (PGMIsUsingLargePages(pVM))
967 {
968 /* Must disable large page usage as 2 MB pages are too big to write monitor. */
969 LogRel(("FTSync: disabling large page usage.\n"));
970 PGMSetLargePageUsage(pVM, false);
971 }
972 /** @todo might need to disable page fusion as well */
973
974 return VMR3PowerOn(pVM);
975 }
976 else
977 {
978 /* standby */
979 rc = RTTcpServerCreateEx(pszAddress, uPort, &pVM->ftm.s.standby.hServer);
980 if (RT_FAILURE(rc))
981 return rc;
982 pVM->ftm.s.fIsStandbyNode = true;
983
984 rc = RTTcpServerListen(pVM->ftm.s.standby.hServer, ftmR3StandbyServeConnection, pVM);
985 /** @todo deal with the exit code to check if we should activate this standby VM. */
986
987 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
988 pVM->ftm.s.standby.hServer = NULL;
989 }
990 return rc;
991}
992
993/**
994 * Powers off the fault tolerant virtual machine (standby).
995 *
996 * @returns VBox status code.
997 *
998 * @param pVM The VM to operate on.
999 */
1000VMMR3DECL(int) FTMR3CancelStandby(PVM pVM)
1001{
1002 AssertReturn(!pVM->fFaultTolerantMaster, VERR_NOT_SUPPORTED);
1003 Assert(pVM->ftm.s.standby.hServer);
1004
1005 return RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1006}
1007
1008
1009/**
1010 * Performs a full sync to the standby node
1011 *
1012 * @returns VBox status code.
1013 *
1014 * @param pVM The VM to operate on.
1015 */
1016VMMR3DECL(int) FTMR3SyncState(PVM pVM)
1017{
1018 if (!pVM->fFaultTolerantMaster)
1019 return VINF_SUCCESS;
1020
1021 pVM->ftm.s.fCheckpointingActive = true;
1022 int rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
1023 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
1024
1025 /* Reset the sync state. */
1026 pVM->ftm.s.syncstate.uOffStream = 0;
1027 pVM->ftm.s.syncstate.cbReadBlock = 0;
1028 pVM->ftm.s.syncstate.fStopReading = false;
1029 pVM->ftm.s.syncstate.fIOError = false;
1030 pVM->ftm.s.syncstate.fEndOfStream = false;
1031
1032 /* Sync state + changed memory with the standby node. */
1033 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3PerformSync, 2, pVM, FTMSYNCSTATE_DELTA_VM);
1034 AssertRC(rc);
1035
1036 PDMCritSectLeave(&pVM->ftm.s.CritSect);
1037 pVM->ftm.s.fCheckpointingActive = false;
1038
1039 return VERR_NOT_IMPLEMENTED;
1040}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette