VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 90160

Last change on this file since 90160 was 86891, checked in by vboxsync, 4 years ago

vsheriff: python 3 adjustments. bugref:9788

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 84.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 86891 2020-11-16 14:39:15Z vboxsync $
4# pylint: disable=line-too-long
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15from __future__ import print_function;
16
17__copyright__ = \
18"""
19Copyright (C) 2012-2020 Oracle Corporation
20
21This file is part of VirtualBox Open Source Edition (OSE), as
22available from http://www.virtualbox.org. This file is free software;
23you can redistribute it and/or modify it under the terms of the GNU
24General Public License (GPL) as published by the Free Software
25Foundation, in version 2 as it comes in the "COPYING" file of the
26VirtualBox OSE distribution. VirtualBox OSE is distributed in the
27hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
28
29The contents of this file may alternatively be used under the terms
30of the Common Development and Distribution License Version 1.0
31(CDDL) only, as it comes in the "COPYING.CDDL" file of the
32VirtualBox OSE distribution, in which case the provisions of the
33CDDL are applicable instead of those of the GPL.
34
35You may elect to license modified versions of this file under the
36terms and conditions of either the GPL or the CDDL or both.
37"""
38__version__ = "$Revision: 86891 $"
39
40
41# Standard python imports
42import hashlib;
43import os;
44import re;
45import smtplib;
46#import subprocess;
47import sys;
48from email.mime.multipart import MIMEMultipart;
49from email.mime.text import MIMEText;
50from email.utils import COMMASPACE;
51
52if sys.version_info[0] >= 3:
53 from io import StringIO as StringIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
54else:
55 from StringIO import StringIO as StringIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
56from optparse import OptionParser; # pylint: disable=deprecated-module
57from PIL import Image; # pylint: disable=import-error
58
59# Add Test Manager's modules path
60g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
61sys.path.append(g_ksTestManagerDir);
62
63# Test Manager imports
64from common import utils;
65from testmanager.core.db import TMDatabaseConnection;
66from testmanager.core.build import BuildDataEx;
67from testmanager.core.failurereason import FailureReasonLogic;
68from testmanager.core.testbox import TestBoxLogic, TestBoxData;
69from testmanager.core.testcase import TestCaseDataEx;
70from testmanager.core.testgroup import TestGroupData;
71from testmanager.core.testset import TestSetLogic, TestSetData;
72from testmanager.core.testresults import TestResultLogic, TestResultFileData;
73from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
74from testmanager.core.useraccount import UserAccountLogic;
75from testmanager.config import g_ksSmtpHost, g_kcSmtpPort, g_ksAlertFrom, \
76 g_ksAlertSubject, g_asAlertList #, g_ksLomPassword;
77
78# Python 3 hacks:
79if sys.version_info[0] >= 3:
80 xrange = range; # pylint: disable=redefined-builtin,invalid-name
81
82
83class VirtualTestSheriffCaseFile(object):
84 """
85 A failure investigation case file.
86
87 """
88
89
90 ## Max log file we'll read into memory. (256 MB)
91 kcbMaxLogRead = 0x10000000;
92
93 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
94 self.oSheriff = oSheriff;
95 self.oTestSet = oTestSet; # TestSetData
96 self.oTree = oTree; # TestResultDataEx
97 self.oBuild = oBuild; # BuildDataEx
98 self.oTestBox = oTestBox; # TestBoxData
99 self.oTestGroup = oTestGroup; # TestGroupData
100 self.oTestCase = oTestCase; # TestCaseDataEx
101 self.sMainLog = ''; # The main log file. Empty string if not accessible.
102 self.sSvcLog = ''; # The VBoxSVC log file. Empty string if not accessible.
103
104 # Generate a case file name.
105 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
106 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
107 % ( self.oTestSet.idTestSet,
108 self.oTestCase.sName,
109 self.oTestBox.sName,
110 self.oTestBox.sOs,
111 self.oTestBox.sOsVersion,
112 self.oTestBox.sCpuArch,
113 self.oTestBox.sCpuName,
114 self.oTestBox.sCpuVendor,
115 self.oBuild.oCat.sProduct,
116 self.oBuild.oCat.sBranch,
117 self.oBuild.oCat.sType,
118 self.oBuild.iRevision, );
119
120 # Investigation notes.
121 self.tReason = None; # None or one of the ktReason_XXX constants.
122 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
123 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
124
125 #
126 # Reason.
127 #
128
129 def noteReason(self, tReason):
130 """ Notes down a possible reason. """
131 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
132 self.tReason = tReason;
133 return True;
134
135 def noteReasonForId(self, tReason, idTestResult, sComment = None):
136 """ Notes down a possible reason for a specific test result. """
137 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
138 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
139 (u' (%s)' % (sComment,)) if sComment is not None else ''));
140 self.dReasonForResultId[idTestResult] = tReason;
141 if sComment is not None:
142 self.dCommentForResultId[idTestResult] = sComment;
143 return True;
144
145
146 #
147 # Test classification.
148 #
149
150 def isVBoxTest(self):
151 """ Test classification: VirtualBox (using the build) """
152 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
153
154 def isVBoxUnitTest(self):
155 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
156 return self.isVBoxTest() \
157 and (self.oTestCase.sName.lower() == 'unit tests' or self.oTestCase.sName.lower() == 'misc: unit tests');
158
159 def isVBoxInstallTest(self):
160 """ Test case classification: VirtualBox Guest installation test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('install:');
163
164 def isVBoxUnattendedInstallTest(self):
165 """ Test case classification: VirtualBox Guest installation test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('uinstall:');
168
169 def isVBoxUSBTest(self):
170 """ Test case classification: VirtualBox USB test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('usb:');
173
174 def isVBoxStorageTest(self):
175 """ Test case classification: VirtualBox Storage test. """
176 return self.isVBoxTest() \
177 and self.oTestCase.sName.lower().startswith('storage:');
178
179 def isVBoxGAsTest(self):
180 """ Test case classification: VirtualBox Guest Additions test. """
181 return self.isVBoxTest() \
182 and ( self.oTestCase.sName.lower().startswith('guest additions')
183 or self.oTestCase.sName.lower().startswith('ga\'s tests'));
184
185 def isVBoxAPITest(self):
186 """ Test case classification: VirtualBox API test. """
187 return self.isVBoxTest() \
188 and self.oTestCase.sName.lower().startswith('api:');
189
190 def isVBoxBenchmarkTest(self):
191 """ Test case classification: VirtualBox Benchmark test. """
192 return self.isVBoxTest() \
193 and self.oTestCase.sName.lower().startswith('benchmark:');
194
195 def isVBoxSmokeTest(self):
196 """ Test case classification: Smoke test. """
197 return self.isVBoxTest() \
198 and self.oTestCase.sName.lower().startswith('smoketest');
199
200 def isVBoxSerialTest(self):
201 """ Test case classification: Smoke test. """
202 return self.isVBoxTest() \
203 and self.oTestCase.sName.lower().startswith('serial:');
204
205
206 #
207 # Utility methods.
208 #
209
210 def getMainLog(self):
211 """
212 Tries to read the main log file since this will be the first source of information.
213 """
214 if self.sMainLog:
215 return self.sMainLog;
216 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
217 if oFile is not None:
218 try:
219 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
220 except Exception as oXcpt:
221 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
222 self.sMainLog = '';
223 else:
224 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
225 return self.sMainLog;
226
227 def getLogFile(self, oFile):
228 """
229 Tries to read the given file as a utf-8 log file.
230 oFile is a TestFileDataEx instance.
231 Returns empty string if problems opening or reading the file.
232 """
233 sContent = '';
234 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
235 if oFile is not None:
236 try:
237 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
238 except Exception as oXcpt:
239 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
240 else:
241 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
242 return sContent;
243
244 def getSvcLog(self):
245 """
246 Tries to read the VBoxSVC log file as it typically not associated with a failing test result.
247 Note! Returns the first VBoxSVC log file we find.
248 """
249 if not self.sSvcLog:
250 aoSvcLogFiles = self.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseSvc);
251 if aoSvcLogFiles:
252 self.sSvcLog = self.getLogFile(aoSvcLogFiles[0]);
253 return self.sSvcLog;
254
255 def getScreenshotSha256(self, oFile):
256 """
257 Tries to read the given screenshot file, uncompress it, and do SHA-2
258 on the raw pixels.
259 Returns SHA-2 digest string on success, None on failure.
260 """
261 (oImgFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
262 try:
263 abImageFile = oImgFile.read();
264 except Exception as oXcpt:
265 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
266 else:
267 try:
268 oImage = Image.open(StringIO(abImageFile));
269 except Exception as oXcpt:
270 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
271 else:
272 try:
273 oHash = hashlib.sha256();
274 oHash.update(oImage.tostring());
275 except Exception as oXcpt:
276 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
277 else:
278 return oHash.hexdigest();
279 return None;
280
281
282
283 def isSingleTestFailure(self):
284 """
285 Figure out if this is a single test failing or if it's one of the
286 more complicated ones.
287 """
288 if self.oTree.cErrors == 1:
289 return True;
290 if self.oTree.deepCountErrorContributers() <= 1:
291 return True;
292 return False;
293
294
295
296class VirtualTestSheriff(object): # pylint: disable=too-few-public-methods
297 """
298 Add build info into Test Manager database.
299 """
300
301 ## The user account for the virtual sheriff.
302 ksLoginName = 'vsheriff';
303
304 def __init__(self):
305 """
306 Parse command line.
307 """
308 self.oDb = None;
309 self.tsNow = None;
310 self.oTestResultLogic = None;
311 self.oTestSetLogic = None;
312 self.oFailureReasonLogic = None; # FailureReasonLogic;
313 self.oTestResultFailureLogic = None; # TestResultFailureLogic
314 self.oLogin = None;
315 self.uidSelf = -1;
316 self.oLogFile = None;
317 self.asBsodReasons = [];
318 self.asUnitTestReasons = [];
319
320 oParser = OptionParser();
321 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
322 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
323 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
324 help = 'Work period specified in hours. Defauls is 2 hours.');
325 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
326 help = 'Whether to commit the findings to the database. Default is a dry run.');
327 oParser.add_option('--testset', dest = 'aidTestSets', metavar = '<id>', default = [], type = 'int', action = 'append',
328 help = 'Only investigate this one. Accumulates IDs when repeated.');
329 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
330 help = 'Quiet execution');
331 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
332 help = 'Where to log messages.');
333 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
334 help = 'Enables debug mode.');
335
336 (self.oConfig, _) = oParser.parse_args();
337
338 if self.oConfig.sLogFile:
339 self.oLogFile = open(self.oConfig.sLogFile, "a");
340 self.oLogFile.write('VirtualTestSheriff: $Revision: 86891 $ \n');
341
342
343 def eprint(self, sText):
344 """
345 Prints error messages.
346 Returns 1 (for exit code usage.)
347 """
348 print('error: %s' % (sText,));
349 if self.oLogFile is not None:
350 if sys.version_info[0] >= 3:
351 self.oLogFile.write(u'error: %s\n' % (sText,));
352 else:
353 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
354 return 1;
355
356 def dprint(self, sText):
357 """
358 Prints debug info.
359 """
360 if self.oConfig.fDebug:
361 if not self.oConfig.fQuiet:
362 print('debug: %s' % (sText, ));
363 if self.oLogFile is not None:
364 if sys.version_info[0] >= 3:
365 self.oLogFile.write(u'debug: %s\n' % (sText,));
366 else:
367 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
368 return 0;
369
370 def vprint(self, sText):
371 """
372 Prints verbose info.
373 """
374 if not self.oConfig.fQuiet:
375 print('info: %s' % (sText,));
376 if self.oLogFile is not None:
377 if sys.version_info[0] >= 3:
378 self.oLogFile.write(u'info: %s\n' % (sText,));
379 else:
380 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
381 return 0;
382
383 def getFailureReason(self, tReason):
384 """ Gets the failure reason object for tReason. """
385 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
386
387 def selfCheck(self):
388 """ Does some self checks, looking up things we expect to be in the database and such. """
389 rcExit = 0;
390 for sAttr in dir(self.__class__):
391 if sAttr.startswith('ktReason_'):
392 tReason = getattr(self.__class__, sAttr);
393 oFailureReason = self.getFailureReason(tReason);
394 if oFailureReason is None:
395 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
396 % (tReason[1], tReason[0],));
397
398 # Check the user account as well.
399 if self.oLogin is None:
400 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
401 if oLogin is None:
402 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
403 return rcExit;
404
405 def sendEmailAlert(self, uidAuthor, sBodyText):
406 """
407 Sends email alert.
408 """
409
410 # Get author email
411 self.oDb.execute('SELECT sEmail FROM Users WHERE uid=%s', (uidAuthor,));
412 sFrom = self.oDb.fetchOne();
413 if sFrom is not None:
414 sFrom = sFrom[0];
415 else:
416 sFrom = g_ksAlertFrom;
417
418 # Gather recipient list.
419 asEmailList = [];
420 for sUser in g_asAlertList:
421 self.oDb.execute('SELECT sEmail FROM Users WHERE sUsername=%s', (sUser,));
422 sEmail = self.oDb.fetchOne();
423 if sEmail:
424 asEmailList.append(sEmail[0]);
425 if not asEmailList:
426 return self.eprint('No email addresses to send alter to!');
427
428 # Compose the message.
429 oMsg = MIMEMultipart();
430 oMsg['From'] = sFrom;
431 oMsg['To'] = COMMASPACE.join(asEmailList);
432 oMsg['Subject'] = g_ksAlertSubject;
433 oMsg.attach(MIMEText(sBodyText, 'plain'))
434
435 # Try send it.
436 try:
437 oSMTP = smtplib.SMTP(g_ksSmtpHost, g_kcSmtpPort);
438 oSMTP.sendmail(sFrom, asEmailList, oMsg.as_string())
439 oSMTP.quit()
440 except smtplib.SMTPException as oXcpt:
441 return self.eprint('Failed to send mail: %s' % (oXcpt,));
442
443 return 0;
444
445 def badTestBoxManagement(self):
446 """
447 Looks for bad test boxes and first tries once to reboot them then disables them.
448 """
449 rcExit = 0;
450
451 #
452 # We skip this entirely if we're running in the past and not in harmless debug mode.
453 #
454 if self.oConfig.cStartHoursAgo != 0 \
455 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
456 return rcExit;
457 tsNow = self.tsNow if self.oConfig.fDebug else None;
458 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
459 oTestBoxLogic = TestBoxLogic(self.oDb);
460
461 #
462 # Generate a list of failures reasons we consider bad-testbox behavior.
463 #
464 aidFailureReasons = [
465 self.getFailureReason(self.ktReason_Host_DriverNotLoaded).idFailureReason,
466 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
467 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
468 self.getFailureReason(self.ktReason_Host_InstallationFailed).idFailureReason,
469 ];
470
471 #
472 # Get list of bad test boxes for given period and check them out individually.
473 #
474 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
475 aidFailureReasons = aidFailureReasons);
476 for idTestBox in aidBadTestBoxes:
477 # Skip if the testbox is already disabled or has a pending reboot command.
478 try:
479 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
480 except Exception as oXcpt:
481 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
482 continue;
483 if not oTestBox.fEnabled:
484 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
485 % ( idTestBox, oTestBox.sName, ));
486 continue;
487 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
488 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
489 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
490 continue;
491
492 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
493 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
494 cOkay = 0;
495 cBad = 0;
496 iFirstOkay = len(aoSets);
497 for iSet, oSet in enumerate(aoSets):
498 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
499 cBad += 1;
500 else:
501 # Check for bad failure reasons.
502 oFailure = None;
503 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
504 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oSet.idTestSet)
505 aoFailedResults = oTree.getListOfFailures();
506 for oFailedResult in aoFailedResults:
507 oFailure = self.oTestResultFailureLogic.getById(oFailedResult.idTestResult);
508 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
509 break;
510 oFailure = None;
511 if oFailure is not None:
512 cBad += 1;
513 else:
514 # This is an okay test result then.
515 ## @todo maybe check the elapsed time here, it could still be a bad run?
516 cOkay += 1;
517 if iFirstOkay > iSet:
518 iFirstOkay = iSet;
519 if iSet > 10:
520 break;
521
522 # We react if there are two or more bad-testbox statuses at the head of the
523 # history and at least three in the last 10 results.
524 if iFirstOkay >= 2 and cBad > 2:
525 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
526 sComment = u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
527 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
528 self.vprint(sComment);
529 self.sendEmailAlert(self.uidSelf, sComment);
530 if self.oConfig.fRealRun is True:
531 try:
532 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
533 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
534 % (iFirstOkay, cBad, cOkay),);
535 except Exception as oXcpt:
536 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
537 else:
538 sComment = u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
539 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
540 self.vprint(sComment);
541 self.sendEmailAlert(self.uidSelf, sComment);
542 if self.oConfig.fRealRun is True:
543 try:
544 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
545 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
546 % (iFirstOkay, cBad, cOkay),);
547 except Exception as oXcpt:
548 rcExit = self.eprint(u'Error rebooting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
549 else:
550 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
551 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
552
553 ## @todo r=bird: review + rewrite;
554 ## - no selecting here, that belongs in the core/*.py files.
555 ## - preserve existing comments.
556 ## - doing way too much in the try/except block.
557 ## - No password quoting in the sshpass command that always fails (127).
558 ## - Timeout is way to low. testboxmem1 need more than 10 min to take a dump, ages to
559 ## get thru POST and another 5 just to time out in grub. Should be an hour or so.
560 ## Besides, it need to be constant elsewhere in the file, not a variable here.
561 ##
562 ##
563 ## Reset hanged testboxes
564 ##
565 #cStatusTimeoutMins = 10;
566 #
567 #self.oDb.execute('SELECT TestBoxStatuses.idTestBox\n'
568 # ' FROM TestBoxStatuses, TestBoxes\n'
569 # ' WHERE TestBoxStatuses.tsUpdated >= (CURRENT_TIMESTAMP - interval \'%s hours\')\n'
570 # ' AND TestBoxStatuses.tsUpdated < (CURRENT_TIMESTAMP - interval \'%s minutes\')\n'
571 # ' AND TestBoxStatuses.idTestBox = TestBoxes.idTestBox\n'
572 # ' AND Testboxes.tsExpire = \'infinity\'::timestamp', (cHoursBack,cStatusTimeoutMins));
573 #for idTestBox in self.oDb.fetchAll():
574 # idTestBox = idTestBox[0];
575 # try:
576 # oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
577 # except Exception as oXcpt:
578 # rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
579 # continue;
580 # # Skip if the testbox is already disabled, already reset or there's no iLOM
581 # if not oTestBox.fEnabled or oTestBox.ipLom is None or oTestBox.sComment is not None and oTestBox.sComment.find('Automatically reset') >= 0:
582 # self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
583 # % ( idTestBox, oTestBox.sName, ));
584 # continue;
585 # ## @todo get iLOM credentials from a table?
586 # sCmd = 'sshpass -p%s ssh -oStrictHostKeyChecking=no root@%s show /SP && reset /SYS' % (g_ksLomPassword, oTestBox.ipLom,);
587 # try:
588 # oPs = subprocess.Popen(sCmd, stdout=subprocess.PIPE, shell=True);
589 # sStdout = oPs.communicate()[0];
590 # iRC = oPs.wait();
591 #
592 # oTestBox.sComment = 'Automatically reset (iRC=%u sStdout=%s)' % (iRC, sStdout,);
593 # oTestBoxLogic.editEntry(oTestBox, self.uidSelf, fCommit = True);
594 #
595 # sComment = u'Reset testbox #%u (%s) - iRC=%u sStduot=%s' % ( idTestBox, oTestBox.sName, iRC, sStdout);
596 # self.vprint(sComment);
597 # self.sendEmailAlert(self.uidSelf, sComment);
598 #
599 # except Exception as oXcpt:
600 # rcExit = self.eprint(u'Error resetting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
601 #
602 return rcExit;
603
604
605 ## @name Failure reasons we know.
606 ## @{
607 ktReason_Add_CopyToGuest_Timeout = ( 'Additions', 'CopyToGuest Timeout' );
608 ktReason_Add_FlushViewOfFile = ( 'Additions', 'FlushViewOfFile' );
609 ktReason_Add_Mmap_Coherency = ( 'Additions', 'mmap coherency' );
610 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
611 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
612 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
613 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
614 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
615 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
616 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
617 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
618 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
619 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
620 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
621 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
622 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
623 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
624 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
625 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
626 ktReason_Host_InstallationFailed = ( 'Host', 'Installation failed' );
627 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
628 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
629 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
630 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
631 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
632 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
633 ktReason_Host_NetworkMisconfiguration = ( 'Host', 'Network misconfiguration' );
634 ktReason_Host_TSTInfo_Accuracy_OOR = ( 'Host', 'TSTInfo accuracy out of range' );
635 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
636 ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND = ( 'Networking', 'VERR_INTNET_FLT_IF_NOT_FOUND' );
637 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
638 ktReason_OSInstall_Udev_hang = ( 'O/S Install', 'udev hang' );
639 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
640 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
641 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
642 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
643 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
644 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
645 ktReason_API_std_bad_alloc = ( 'API / (XP)COM', 'std::bad_alloc' );
646 ktReason_API_Digest_Mismatch = ( 'API / (XP)COM', 'Digest mismatch' );
647 ktReason_API_MoveVM_SharingViolation = ( 'API / (XP)COM', 'MoveVM sharing violation' );
648 ktReason_API_MoveVM_InvalidParameter = ( 'API / (XP)COM', 'MoveVM invalid parameter' );
649 ktReason_API_Open_Session_Failed = ( 'API / (XP)COM', 'Open session failed' );
650 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
651 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
652 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
653 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
654 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
655 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
656 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
657 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
658 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
659 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
660 ktReason_Unknown_VM_Start_Error = ( 'Unknown', 'VM Start Error' );
661 ktReason_Unknown_VM_Runtime_Error = ( 'Unknown', 'VM Runtime Error' );
662 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
663 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
664 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
665 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
666 ktReason_GuestBug_CompizVBoxQt = ( 'Guest Bug', 'Compiz + VirtualBox Qt GUI crash' );
667 ## @}
668
669 ## BSOD category.
670 ksBsodCategory = 'BSOD';
671 ## Special reason indicating that the flesh and blood sheriff has work to do.
672 ksBsodAddNew = 'Add new BSOD';
673
674 ## Unit test category.
675 ksUnitTestCategory = 'Unit';
676 ## Special reason indicating that the flesh and blood sheriff has work to do.
677 ksUnitTestAddNew = 'Add new';
678
679 ## Used for indica that we shouldn't report anything for this test result ID and
680 ## consider promoting the previous error to test set level if it's the only one.
681 ktHarmless = ( 'Probably', 'Caused by previous error' );
682
683
684 def caseClosed(self, oCaseFile):
685 """
686 Reports the findings in the case and closes it.
687 """
688 #
689 # Log it and create a dReasonForReasultId we can use below.
690 #
691 dCommentForResultId = oCaseFile.dCommentForResultId;
692 if oCaseFile.dReasonForResultId:
693 # Must weed out ktHarmless.
694 dReasonForResultId = {};
695 for idKey, tReason in oCaseFile.dReasonForResultId.items():
696 if tReason is not self.ktHarmless:
697 dReasonForResultId[idKey] = tReason;
698 if not dReasonForResultId:
699 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
700 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
701 return False;
702
703 # Try promote to single reason.
704 atValues = dReasonForResultId.values();
705 fSingleReason = True;
706 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
707 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
708 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
709 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
710 else:
711 fSingleReason = False;
712 if fSingleReason:
713 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
714 if dCommentForResultId:
715 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
716 elif oCaseFile.tReason is not None:
717 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
718 else:
719 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
720 return False;
721
722 self.vprint(u'Closing %s with following reason%s: %s'
723 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
724
725 #
726 # Add the test failure reason record(s).
727 #
728 for idTestResult, tReason in dReasonForResultId.items():
729 oFailureReason = self.getFailureReason(tReason);
730 if oFailureReason is not None:
731 sComment = 'Set by $Revision: 86891 $' # Handy for reverting later.
732 if idTestResult in dCommentForResultId:
733 sComment += ': ' + dCommentForResultId[idTestResult];
734
735 oAdd = TestResultFailureData();
736 oAdd.initFromValues(idTestResult = idTestResult,
737 idFailureReason = oFailureReason.idFailureReason,
738 uidAuthor = self.uidSelf,
739 idTestSet = oCaseFile.oTestSet.idTestSet,
740 sComment = sComment,);
741 if self.oConfig.fRealRun:
742 try:
743 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
744 except Exception as oXcpt:
745 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
746 % (oXcpt, oAdd, oCaseFile.sLongName,));
747 else:
748 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
749 return True;
750
751 #
752 # Tools for assiting log parsing.
753 #
754
755 @staticmethod
756 def matchFollowedByLines(sStr, off, asFollowingLines):
757 """ Worker for isThisFollowedByTheseLines. """
758
759 # Advance off to the end of the line.
760 off = sStr.find('\n', off);
761 if off < 0:
762 return False;
763 off += 1;
764
765 # Match each string with the subsequent lines.
766 for iLine, sLine in enumerate(asFollowingLines):
767 offEnd = sStr.find('\n', off);
768 if offEnd < 0:
769 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
770 if sLine and sStr.find(sLine, off, offEnd) < 0:
771 return False;
772
773 # next line.
774 off = offEnd + 1;
775
776 return True;
777
778 @staticmethod
779 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
780 """
781 Looks for a line contining sFirst which is then followed by lines
782 with the strings in asFollowingLines. (No newline chars anywhere!)
783 Returns True / False.
784 """
785 off = sStr.find(sFirst, 0);
786 while off >= 0:
787 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
788 return True;
789 off = sStr.find(sFirst, off + 1);
790 return False;
791
792 @staticmethod
793 def findAndReturnRestOfLine(sHaystack, sNeedle):
794 """
795 Looks for sNeedle in sHaystack.
796 Returns The text following the needle up to the end of the line.
797 Returns None if not found.
798 """
799 if sHaystack is None:
800 return None;
801 off = sHaystack.find(sNeedle);
802 if off < 0:
803 return None;
804 off += len(sNeedle)
805 offEol = sHaystack.find('\n', off);
806 if offEol < 0:
807 offEol = len(sHaystack);
808 return sHaystack[off:offEol]
809
810 @staticmethod
811 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
812 """
813 Looks for sNeedle in zeroe or more haystacks (asHaystack).
814 Returns The text following the first needed found up to the end of the line.
815 Returns None if not found.
816 """
817 for sHaystack in asHaystacks:
818 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
819 if sRet is not None:
820 return sRet;
821 return None;
822
823
824 #
825 # The investigative units.
826 #
827
828 katSimpleInstallUninstallMainLogReasons = [
829 # ( Whether to stop on hit, reason tuple, needle text. )
830 ( False, ktReason_Host_LeftoverService,
831 'SERVICE_NAME: vbox' ),
832 ( False, ktReason_Host_LeftoverService,
833 'Seems installation was skipped. Old version lurking behind? Not the fault of this build/test run!'),
834 ];
835
836 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
837 'darwin': [
838 # ( Whether to stop on hit, reason tuple, needle text. )
839 ( True, ktReason_Host_DriverNotUnloading,
840 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
841 ],
842 'linux': [
843 # ( Whether to stop on hit, reason tuple, needle text. )
844 ( True, ktReason_Host_DriverNotCompilable,
845 'This system is not currently set up to build kernel modules' ),
846 ( True, ktReason_Host_DriverNotCompilable,
847 'This system is currently not set up to build kernel modules' ),
848 ( True, ktReason_Host_InstallationFailed,
849 'vboxdrv.sh: failed: Look at /var/log/vbox-install.log to find out what went wrong.' ),
850 ( True, ktReason_Host_DriverNotUnloading,
851 'Cannot unload module vboxdrv'),
852 ],
853 'solaris': [
854 # ( Whether to stop on hit, reason tuple, needle text. )
855 ( True, ktReason_Host_DriverNotUnloading, 'can\'t unload the module: Device busy' ),
856 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: Host module ...FAILED!' ),
857 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: NetFilter (Crossbow) module ...FAILED!' ),
858 ( True, ktReason_Host_InstallationFailed, 'svcadm: Couldn\'t bind to svc.configd.' ),
859 ( True, ktReason_Host_InstallationFailed, 'pkgadd: ERROR: postinstall script did not complete successfully' ),
860 ],
861 };
862
863
864 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
865 """
866 Investigates an install or uninstall failure.
867
868 We lump the two together since the installation typically also performs
869 an uninstall first and will be seeing similar issues to the uninstall.
870 """
871 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
872
873 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
874 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
875 return True;
876
877 atSimple = self.katSimpleInstallUninstallMainLogReasons;
878 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
879 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
880
881 fFoundSomething = False;
882 for fStopOnHit, tReason, sNeedle in atSimple:
883 if sResultLog.find(sNeedle) > 0:
884 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
885 if fStopOnHit:
886 return True;
887 fFoundSomething = True;
888
889 return fFoundSomething if fFoundSomething else None;
890
891
892 def investigateBadTestBox(self, oCaseFile):
893 """
894 Checks out bad-testbox statuses.
895 """
896 _ = oCaseFile;
897 return False;
898
899
900 def investigateVBoxUnitTest(self, oCaseFile):
901 """
902 Checks out a VBox unittest problem.
903 """
904
905 #
906 # Process simple test case failures first, using their name as reason.
907 # We do the reason management just like for BSODs.
908 #
909 cRelevantOnes = 0;
910 sMainLog = oCaseFile.getMainLog();
911 aoFailedResults = oCaseFile.oTree.getListOfFailures();
912 for oFailedResult in aoFailedResults:
913 if oFailedResult is oCaseFile.oTree:
914 self.vprint('TODO: toplevel failure');
915 cRelevantOnes += 1
916
917 elif oFailedResult.sName == 'Installing VirtualBox':
918 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
919 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
920 cRelevantOnes += 1
921
922 elif oFailedResult.sName == 'Uninstalling VirtualBox':
923 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
924 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
925 cRelevantOnes += 1
926
927 elif oFailedResult.oParent is not None:
928 # Get the 2nd level node because that's where we'll find the unit test name.
929 while oFailedResult.oParent.oParent is not None:
930 oFailedResult = oFailedResult.oParent;
931
932 # Only report a failure once.
933 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
934 sKey = oFailedResult.sName;
935 if sKey.startswith('testcase/'):
936 sKey = sKey[9:];
937 if sKey in self.asUnitTestReasons:
938 tReason = ( self.ksUnitTestCategory, sKey );
939 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
940 else:
941 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
942 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
943 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
944 cRelevantOnes += 1
945 else:
946 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
947
948 #
949 # If we've caught all the relevant ones by now, report the result.
950 #
951 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
952 return self.caseClosed(oCaseFile);
953 return False;
954
955 def extractGuestCpuStack(self, sInfoText):
956 """
957 Extracts the guest CPU stacks from the input file.
958
959 Returns a dictionary keyed by the CPU number, value being a list of
960 raw stack lines (no header).
961 Returns empty dictionary if no stacks where found.
962 """
963 dRet = {};
964 off = 0;
965 while True:
966 # Find the stack.
967 offStart = sInfoText.find('=== start guest stack VCPU ', off);
968 if offStart < 0:
969 break;
970 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
971 if offEnd >= 0:
972 offEnd += 3;
973 else:
974 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
975 if offEnd < 0:
976 offEnd = len(sInfoText);
977
978 sStack = sInfoText[offStart : offEnd];
979 sStack = sStack.replace('\r',''); # paranoia
980 asLines = sStack.split('\n');
981
982 # Figure the CPU.
983 asWords = asLines[0].split();
984 if len(asWords) < 6 or not asWords[5].isdigit():
985 break;
986 iCpu = int(asWords[5]);
987
988 # Add it and advance.
989 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
990 off = offEnd;
991 return dRet;
992
993 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
994 """ Investigates kvm_lock_spinning deadlocks """
995 #
996 # Extract the stacks. We need more than one CPU to create a deadlock.
997 #
998 dStacks = self.extractGuestCpuStack(sInfoText);
999 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
1000 if len(dStacks) >= 2:
1001 #
1002 # Examin each of the stacks. Each must have kvm_lock_spinning in
1003 # one of the first three entries.
1004 #
1005 cHits = 0;
1006 for iCpu in dStacks:
1007 asBacktrace = dStacks[iCpu];
1008 for iFrame in xrange(min(3, len(asBacktrace))):
1009 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
1010 cHits += 1;
1011 break;
1012 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
1013 if cHits == len(dStacks):
1014 return (True, self.ktReason_VMM_kvm_lock_spinning);
1015
1016 _ = dLogs; _ = oCaseFile;
1017 return (False, None);
1018
1019 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
1020 """ Investigates HalReturnToFirmware hangs """
1021 del oCaseFile
1022 del sInfoText
1023 del dLogs
1024 # hope that's sufficient
1025 return (True, self.ktReason_Unknown_HalReturnToFirmware);
1026
1027 ## Things we search a main or VM log for to figure out why something went bust.
1028 ## @note DO NOT ADD MORE STUFF HERE!
1029 ## Please use katSimpleMainLogReasons and katSimpleVmLogReasons instead!
1030 katSimpleMainAndVmLogReasonsDeprecated = [
1031 # ( Whether to stop on hit, reason tuple, needle text. )
1032 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
1033 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
1034 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
1035 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
1036 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
1037 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
1038 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
1039 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
1040 ( True, ktReason_Networking_Nonexistent_host_nic,
1041 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1042 ( True, ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND,
1043 'Failed to attach the network LUN (VERR_INTNET_FLT_IF_NOT_FOUND)' ),
1044 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
1045 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1046 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1047 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1048 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
1049 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
1050 ( True, ktReason_Unknown_File_Not_Found,
1051 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
1052 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
1053 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
1054 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
1055 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
1056 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
1057 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
1058 #( False, ktReason_Unknown_VM_Start_Error, 'VMSetError: ' ), - false positives for stuff like:
1059 # "VMSetError: VD: Backend 'VBoxIsoMaker' does not support async I/O"
1060 ( False, ktReason_Unknown_VM_Start_Error, 'error: failed to open session for' ),
1061 ( False, ktReason_Unknown_VM_Runtime_Error, 'Console: VM runtime error: fatal=true' ),
1062 ];
1063
1064 ## This we search a main log for to figure out why something went bust.
1065 katSimpleMainLogReasons = [
1066 # ( Whether to stop on hit, reason tuple, needle text. )
1067 ];
1068
1069 ## This we search a VM log for to figure out why something went bust.
1070 katSimpleVmLogReasons = [
1071 # ( Whether to stop on hit, reason tuple, needle text. )
1072 ];
1073
1074 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
1075 katSimpleVBoxHardeningLogReasons = [
1076 # ( Whether to stop on hit, reason tuple, needle text. )
1077 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
1078 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
1079 ( True, ktReason_Host_TSTInfo_Accuracy_OOR, 'RTCRTSPTSTINFO::Accuracy::Millis: Out of range' ),
1080 ( False, ktReason_Unknown_VM_Crash, 'Quitting: ExitCode=0xc0000005 (rcNtWait=' ),
1081 ];
1082
1083 ## Things we search a kernel.log file for to figure out why something went bust.
1084 katSimpleKernelLogReasons = [
1085 # ( Whether to stop on hit, reason tuple, needle text. )
1086 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
1087 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn\'t work' ),
1088 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
1089 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libQt5CoreVBox' ),
1090 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libgtk-3' ),
1091 ];
1092
1093 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
1094 katSimpleVgaTextReasons = [
1095 # ( Whether to stop on hit, reason tuple, needle text. )
1096 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1097 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
1098 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1099 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
1100 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
1101 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
1102 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
1103 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
1104 "and send a report. Then try booting with the 'noapic' option\n"
1105 "\n" ),
1106 ( True, ktReason_OSInstall_GRUB_hang,
1107 "-----\nGRUB Loading stage2..\n\n\n\n" ),
1108 ( True, ktReason_OSInstall_GRUB_hang,
1109 "-----\nGRUB Loading stage2...\n\n\n\n" ), # the 3 dot hang appears to be less frequent
1110 ( True, ktReason_OSInstall_GRUB_hang,
1111 "-----\nGRUB Loading stage2....\n\n\n\n" ), # the 4 dot hang appears to be very infrequent
1112 ( True, ktReason_OSInstall_GRUB_hang,
1113 "-----\nGRUB Loading stage2.....\n\n\n\n" ), # the 5 dot hang appears to be more frequent again
1114 ( True, ktReason_OSInstall_Udev_hang,
1115 "\nStarting udev:\n\n\n\n" ),
1116 ( True, ktReason_OSInstall_Udev_hang,
1117 "\nStarting udev:\n------" ),
1118 ( True, ktReason_Panic_BootManagerC000000F,
1119 "Windows failed to start. A recent hardware or software change might be the" ),
1120 ( True, ktReason_BootManager_Image_corrupt,
1121 "BOOTMGR image is corrupt. The system cannot boot." ),
1122 ];
1123
1124 ## Things we search for in the info.txt file. Require handlers for now.
1125 katInfoTextHandlers = [
1126 # ( Trigger text, handler method )
1127 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
1128 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
1129 ];
1130
1131 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
1132 katSimpleScreenshotHashReasons = [
1133 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
1134 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
1135 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
1136 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
1137 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
1138 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
1139 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
1140 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
1141 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
1142 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
1143 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
1144 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
1145 ];
1146
1147
1148 def scanLog(self, asLogs, atNeedles, oCaseFile, idTestResult):
1149 """
1150 Scans for atNeedles in sLog.
1151
1152 Returns True if a stop-on-hit neelde was found.
1153 Returns None if a no-stop reason was found.
1154 Returns False if no hit.
1155 """
1156 fRet = False;
1157 for fStopOnHit, tReason, oNeedle in atNeedles:
1158 fMatch = False;
1159 if utils.isString(oNeedle):
1160 for sLog in asLogs:
1161 if sLog:
1162 fMatch |= sLog.find(oNeedle) > 0;
1163 else:
1164 for sLog in asLogs:
1165 if sLog:
1166 fMatch |= oNeedle.search(sLog) is not None;
1167 if fMatch:
1168 oCaseFile.noteReasonForId(tReason, idTestResult);
1169 if fStopOnHit:
1170 return True;
1171 fRet = None;
1172 return fRet;
1173
1174
1175 def investigateGATest(self, oCaseFile, oFailedResult, sResultLog):
1176 """
1177 Investigates a failed VM run.
1178 """
1179 enmReason = None;
1180 if oFailedResult.sName == 'mmap':
1181 if sResultLog.find('FsPerf: Flush issue at offset ') >= 0:
1182 enmReason = self.ktReason_Add_Mmap_Coherency;
1183 elif sResultLog.find('FlushViewOfFile') >= 0:
1184 enmReason = self.ktReason_Add_FlushViewOfFile;
1185 elif oFailedResult.sName == 'Copy to guest':
1186 if sResultLog.find('*** abort action ***') >= 0:
1187 enmReason = self.ktReason_Add_CopyToGuest_Timeout;
1188
1189 if enmReason is not None:
1190 return oCaseFile.noteReasonForId(enmReason, oFailedResult.idTestResult);
1191
1192 self.vprint(u'TODO: Cannot place GA failure idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1193 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1194 return False;
1195
1196 def isResultFromGATest(self, oFailedResult):
1197 """
1198 Checks if this result and corresponding log snippet looks like a GA test run.
1199 """
1200 while oFailedResult is not None:
1201 if oFailedResult.sName in [ 'Guest Control', 'Shared Folders', 'FsPerf', ]:
1202 return True;
1203 oFailedResult = oFailedResult.oParent;
1204 return False;
1205
1206
1207 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
1208 """
1209 Investigates a failed VM run.
1210 """
1211
1212 def investigateLogSet():
1213 """
1214 Investigates the current set of VM related logs.
1215 """
1216 self.dprint('investigateLogSet: log lengths: result %u, VM %u, kernel %u, vga text %u, info text %u, hard %u'
1217 % ( len(sResultLog if sResultLog else ''),
1218 len(sVMLog if sVMLog else ''),
1219 len(sKrnlLog if sKrnlLog else ''),
1220 len(sVgaText if sVgaText else ''),
1221 len(sInfoText if sInfoText else ''),
1222 len(sNtHardLog if sNtHardLog else ''),));
1223
1224 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
1225 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
1226 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
1227 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
1228 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
1229 #self.dprint(u'hard.txt<<<\n%s\n<<<\n' % (sNtHardLog,));
1230
1231 # TODO: more
1232
1233 #
1234 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
1235 #
1236 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
1237 'GIM: HyperV: Guest indicates a fatal condition! P0=');
1238 if sDetails is not None:
1239 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
1240 sKey = sDetails.split(' ', 1)[0];
1241 try: sKey = '0x%08X' % (int(sKey, 16),);
1242 except: pass;
1243 if sKey in self.asBsodReasons:
1244 tReason = ( self.ksBsodCategory, sKey );
1245 elif sKey.lower() in self.asBsodReasons: # just in case.
1246 tReason = ( self.ksBsodCategory, sKey.lower() );
1247 else:
1248 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
1249 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
1250 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
1251
1252 fFoundSomething = False;
1253
1254 #
1255 # Look for linux panic.
1256 #
1257 if sKrnlLog is not None:
1258 fRet = self.scanLog([sKrnlLog,], self.katSimpleKernelLogReasons, oCaseFile, oFailedResult.idTestResult);
1259 if fRet is True:
1260 return fRet;
1261 fFoundSomething |= fRet is None;
1262
1263 #
1264 # Loop thru the simple stuff.
1265 #
1266
1267 # Main log.
1268 fRet = self.scanLog([sResultLog,], self.katSimpleMainLogReasons, oCaseFile, oFailedResult.idTestResult);
1269 if fRet is True:
1270 return fRet;
1271 fFoundSomething |= fRet is None;
1272
1273 # VM log.
1274 fRet = self.scanLog([sVMLog,], self.katSimpleVmLogReasons, oCaseFile, oFailedResult.idTestResult);
1275 if fRet is True:
1276 return fRet;
1277 fFoundSomething |= fRet is None;
1278
1279 # Old main + vm log.
1280 fRet = self.scanLog([sResultLog, sVMLog], self.katSimpleMainAndVmLogReasonsDeprecated,
1281 oCaseFile, oFailedResult.idTestResult);
1282 if fRet is True:
1283 return fRet;
1284 fFoundSomething |= fRet is None;
1285
1286 # Continue with vga text.
1287 if sVgaText:
1288 fRet = self.scanLog([sVgaText,], self.katSimpleVgaTextReasons, oCaseFile, oFailedResult.idTestResult);
1289 if fRet is True:
1290 return fRet;
1291 fFoundSomething |= fRet is None;
1292
1293 # Continue with screen hashes.
1294 if sScreenHash is not None:
1295 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1296 if sScreenHash == sHash:
1297 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1298 if fStopOnHit:
1299 return True;
1300 fFoundSomething = True;
1301
1302 # Check VBoxHardening.log.
1303 if sNtHardLog is not None:
1304 fRet = self.scanLog([sNtHardLog,], self.katSimpleVBoxHardeningLogReasons, oCaseFile, oFailedResult.idTestResult);
1305 if fRet is True:
1306 return fRet;
1307 fFoundSomething |= fRet is None;
1308
1309 #
1310 # Complicated stuff.
1311 #
1312 dLogs = {
1313 'sVMLog': sVMLog,
1314 'sNtHardLog': sNtHardLog,
1315 'sScreenHash': sScreenHash,
1316 'sKrnlLog': sKrnlLog,
1317 'sVgaText': sVgaText,
1318 'sInfoText': sInfoText,
1319 };
1320
1321 # info.txt.
1322 if sInfoText:
1323 for sNeedle, fnHandler in self.katInfoTextHandlers:
1324 if sInfoText.find(sNeedle) > 0:
1325 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1326 if tReason is not None:
1327 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1328 if fStop:
1329 return True;
1330 fFoundSomething = True;
1331
1332 #
1333 # Check for repeated reboots...
1334 #
1335 if sVMLog is not None:
1336 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1337 if cResets > 10:
1338 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1339 sComment = 'Counted %s reboots' % (cResets,));
1340
1341 return fFoundSomething;
1342
1343 #
1344 # Check if we got any VM or/and kernel logs. Treat them as sets in
1345 # case we run multiple VMs here (this is of course ASSUMING they
1346 # appear in the order that terminateVmBySession uploads them).
1347 #
1348 cTimes = 0;
1349 sVMLog = None;
1350 sNtHardLog = None;
1351 sScreenHash = None;
1352 sKrnlLog = None;
1353 sVgaText = None;
1354 sInfoText = None;
1355 for oFile in oFailedResult.aoFiles:
1356 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1357 if 'VBoxHardening.log' not in oFile.sFile:
1358 if sVMLog is not None:
1359 if investigateLogSet() is True:
1360 return True;
1361 cTimes += 1;
1362 sInfoText = None;
1363 sVgaText = None;
1364 sKrnlLog = None;
1365 sScreenHash = None;
1366 sNtHardLog = None;
1367 sVMLog = oCaseFile.getLogFile(oFile);
1368 else:
1369 sNtHardLog = oCaseFile.getLogFile(oFile);
1370 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1371 sKrnlLog = oCaseFile.getLogFile(oFile);
1372 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1373 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1374 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1375 sInfoText = oCaseFile.getLogFile(oFile);
1376 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1377 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1378 if sScreenHash is not None:
1379 sScreenHash = sScreenHash.lower();
1380 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1381
1382 if ( sVMLog is not None \
1383 or sNtHardLog is not None \
1384 or cTimes == 0) \
1385 and investigateLogSet() is True:
1386 return True;
1387
1388 return None;
1389
1390 def isResultFromVMRun(self, oFailedResult, sResultLog):
1391 """
1392 Checks if this result and corresponding log snippet looks like a VM run.
1393 """
1394
1395 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1396 if sResultLog.find(' startVm') > 0:
1397 return True;
1398
1399 # Any other indicators? No?
1400 _ = oFailedResult;
1401 return False;
1402
1403
1404 ## Things we search a VBoxSVC log for to figure out why something went bust.
1405 katSimpleSvcLogReasons = [
1406 # ( Whether to stop on hit, reason tuple, needle text. )
1407 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* exited normally: -1073741819 \(0xc0000005\)') ),
1408 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* was signalled: 11 \(0xb\)') ),
1409 ];
1410
1411 def investigateSvcLogForVMRun(self, oCaseFile, sSvcLog):
1412 """
1413 Check the VBoxSVC log for a single VM run.
1414 """
1415 if sSvcLog:
1416 fRet = self.scanLog([sSvcLog,], self.katSimpleSvcLogReasons, oCaseFile, oCaseFile.oTree.idTestResult);
1417 if fRet is True or fRet is None:
1418 return True;
1419 return False;
1420
1421 def investigateNtHardLogForVMRun(self, oCaseFile):
1422 """
1423 Check if the hardening log for a single VM run contains VM crash indications.
1424 """
1425 aoLogFiles = oCaseFile.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseVm);
1426 for oLogFile in aoLogFiles:
1427 if oLogFile.sFile.find('VBoxHardening.log') >= 0:
1428 sLog = oCaseFile.getLogFile(oLogFile);
1429 if sLog.find('Quitting: ExitCode=0xc0000005') >= 0:
1430 return oCaseFile.noteReasonForId(self.ktReason_Unknown_VM_Crash, oCaseFile.oTree.idTestResult);
1431 return False;
1432
1433
1434 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1435 """
1436 Checks out a VBox VM test.
1437
1438 This is generic investigation of a test running one or more VMs, like
1439 for example a smoke test or a guest installation test.
1440
1441 The fSingleVM parameter is a hint, which probably won't come in useful.
1442 """
1443 _ = fSingleVM;
1444
1445 #
1446 # Get a list of test result failures we should be looking into and the main log.
1447 #
1448 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1449 sMainLog = oCaseFile.getMainLog();
1450
1451 #
1452 # There are a set of errors ending up on the top level result record.
1453 # Should deal with these first.
1454 #
1455 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1456 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1457 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1458 # ignore other failures in the test if we're not a little bit careful here.
1459 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1460 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1461 return self.caseClosed(oCaseFile);
1462
1463 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1464 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1465 (' Timeout: ', ' Attempting to abort child...',) ):
1466 if sMainLog.find('*** glibc detected *** /') > 0:
1467 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1468 else:
1469 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1470 return self.caseClosed(oCaseFile);
1471
1472 # Look for heap corruption without visible hang.
1473 if sMainLog.find('*** glibc detected *** /') > 0 \
1474 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1475 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1476 return self.caseClosed(oCaseFile);
1477
1478 # Out of memory w/ timeout.
1479 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1480 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1481 return self.caseClosed(oCaseFile);
1482
1483 # Stale files like vts_rm.exe (windows).
1484 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1485 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1486 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1487 return self.caseClosed(oCaseFile);
1488
1489 #
1490 # XPCOM screwup
1491 #
1492 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1493 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1494 return self.caseClosed(oCaseFile);
1495
1496 #
1497 # Go thru each failed result.
1498 #
1499 for oFailedResult in aoFailedResults:
1500 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1501 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1502 if oFailedResult.sName == 'Installing VirtualBox':
1503 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1504
1505 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1506 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1507
1508 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1509 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1510
1511 elif self.isResultFromGATest(oFailedResult):
1512 self.investigateGATest(oCaseFile, oFailedResult, sResultLog);
1513
1514 elif sResultLog.find('most likely not unique') > 0:
1515 oCaseFile.noteReasonForId(self.ktReason_Host_NetworkMisconfiguration, oFailedResult.idTestResult)
1516 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1517 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1518
1519 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1520 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1521 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1522
1523 elif sResultLog.find('** error: no action was specified') > 0 \
1524 or sResultLog.find('(len(self._asXml, asText))') > 0:
1525 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1526
1527 else:
1528 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1529 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1530
1531 #
1532 # Check VBoxSVC.log and VBoxHardening.log for VM crashes if inconclusive on single VM runs.
1533 #
1534 if fSingleVM and len(oCaseFile.dReasonForResultId) < len(aoFailedResults):
1535 self.dprint(u'Got %u out of %u - checking VBoxSVC.log...'
1536 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1537 if self.investigateSvcLogForVMRun(oCaseFile, oCaseFile.getSvcLog()):
1538 return self.caseClosed(oCaseFile);
1539 if self.investigateNtHardLogForVMRun(oCaseFile):
1540 return self.caseClosed(oCaseFile);
1541
1542 #
1543 # Report home and close the case if we got them all, otherwise log it.
1544 #
1545 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1546 return self.caseClosed(oCaseFile);
1547
1548 if oCaseFile.dReasonForResultId:
1549 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1550 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1551 else:
1552 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1553 return False;
1554
1555
1556 ## Things we search a main log for to figure out why something in the API test went bust.
1557 katSimpleApiMainLogReasons = [
1558 # ( Whether to stop on hit, reason tuple, needle text. )
1559 ( True, ktReason_Networking_Nonexistent_host_nic,
1560 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1561 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1562 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1563 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1564 ( True, ktReason_API_Digest_Mismatch, 'Digest mismatch (VERR_NOT_EQUAL)' ),
1565 ( True, ktReason_API_MoveVM_SharingViolation, 'rc=VBOX_E_IPRT_ERROR text="Could not copy the log file ' ),
1566 ( True, ktReason_API_MoveVM_InvalidParameter,
1567 'rc=VBOX_E_IPRT_ERROR text="Could not copy the setting file ' ),
1568 ( True, ktReason_API_Open_Session_Failed, 'error: failed to open session for' ),
1569 ];
1570
1571 def investigateVBoxApiTest(self, oCaseFile):
1572 """
1573 Checks out a VBox API test.
1574 """
1575
1576 #
1577 # Get a list of test result failures we should be looking into and the main log.
1578 #
1579 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1580 sMainLog = oCaseFile.getMainLog();
1581
1582 #
1583 # Go thru each failed result.
1584 #
1585 for oFailedResult in aoFailedResults:
1586 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1587 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1588 if oFailedResult.sName == 'Installing VirtualBox':
1589 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1590
1591 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1592 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1593
1594 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1595 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1596
1597 else:
1598 fFoundSomething = False;
1599 for fStopOnHit, tReason, sNeedle in self.katSimpleApiMainLogReasons:
1600 if sResultLog.find(sNeedle) > 0:
1601 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1602 fFoundSomething = True;
1603 if fStopOnHit:
1604 break;
1605 if fFoundSomething:
1606 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1607 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1608
1609 #
1610 # Report home and close the case if we got them all, otherwise log it.
1611 #
1612 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1613 return self.caseClosed(oCaseFile);
1614
1615 if oCaseFile.dReasonForResultId:
1616 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1617 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1618 else:
1619 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1620 return False;
1621
1622
1623 def reasoningFailures(self):
1624 """
1625 Guess the reason for failures.
1626 """
1627 #
1628 # Get a list of failed test sets without any assigned failure reason.
1629 #
1630 cGot = 0;
1631 if not self.oConfig.aidTestSets:
1632 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack,
1633 tsNow = self.tsNow);
1634 else:
1635 aoTestSets = [self.oTestSetLogic.getById(idTestSet) for idTestSet in self.oConfig.aidTestSets];
1636 for oTestSet in aoTestSets:
1637 self.dprint(u'----------------------------------- #%u, status %s -----------------------------------'
1638 % ( oTestSet.idTestSet, oTestSet.enmStatus,));
1639
1640 #
1641 # Open a case file and assign it to the right investigator.
1642 #
1643 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1644 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1645 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1646 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1647 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1648
1649 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1650
1651 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1652 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1653 fRc = self.investigateBadTestBox(oCaseFile);
1654
1655 elif oCaseFile.isVBoxUnitTest():
1656 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1657 fRc = self.investigateVBoxUnitTest(oCaseFile);
1658
1659 elif oCaseFile.isVBoxInstallTest() or oCaseFile.isVBoxUnattendedInstallTest():
1660 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1661 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1662
1663 elif oCaseFile.isVBoxUSBTest():
1664 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1665 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1666
1667 elif oCaseFile.isVBoxStorageTest():
1668 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1669 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1670
1671 elif oCaseFile.isVBoxGAsTest():
1672 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1673 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1674
1675 elif oCaseFile.isVBoxAPITest():
1676 self.dprint(u'investigateVBoxApiTest is taking over %s.' % (oCaseFile.sLongName,));
1677 fRc = self.investigateVBoxApiTest(oCaseFile);
1678
1679 elif oCaseFile.isVBoxBenchmarkTest():
1680 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1681 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1682
1683 elif oCaseFile.isVBoxSmokeTest():
1684 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1685 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1686
1687 elif oCaseFile.isVBoxSerialTest():
1688 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1689 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1690
1691 else:
1692 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1693 fRc = False;
1694 cGot += fRc is True;
1695
1696 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1697 return 0;
1698
1699
1700 def main(self):
1701 """
1702 The 'main' function.
1703 Return exit code (0, 1, etc).
1704 """
1705 # Database stuff.
1706 self.oDb = TMDatabaseConnection()
1707 self.oTestResultLogic = TestResultLogic(self.oDb);
1708 self.oTestSetLogic = TestSetLogic(self.oDb);
1709 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1710 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1711 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1712 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1713
1714 # Get a fix on our 'now' before we do anything..
1715 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1716 self.tsNow = self.oDb.fetchOne();
1717
1718 # If we're suppost to commit anything we need to get our user ID.
1719 rcExit = 0;
1720 if self.oConfig.fRealRun:
1721 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1722 if self.oLogin is None:
1723 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1724 else:
1725 self.uidSelf = self.oLogin.uid;
1726
1727 #
1728 # Do the stuff.
1729 #
1730 if rcExit == 0:
1731 rcExit = self.selfCheck();
1732 if rcExit == 0:
1733 rcExit = self.badTestBoxManagement();
1734 rcExit2 = self.reasoningFailures();
1735 if rcExit == 0:
1736 rcExit = rcExit2;
1737 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1738 if rcExit == 0:
1739 rcExit = self.badTestBoxManagement();
1740
1741 # Cleanup.
1742 self.oFailureReasonLogic = None;
1743 self.oTestResultFailureLogic = None;
1744 self.oTestSetLogic = None;
1745 self.oTestResultLogic = None;
1746 self.oDb.close();
1747 self.oDb = None;
1748 if self.oLogFile is not None:
1749 self.oLogFile.close();
1750 self.oLogFile = None;
1751 return rcExit;
1752
1753if __name__ == '__main__':
1754 sys.exit(VirtualTestSheriff().main());
1755
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette