1 | # -*- coding: utf-8 -*-
|
---|
2 | # $Id: testboxtasks.py 94125 2022-03-08 14:15:09Z vboxsync $
|
---|
3 |
|
---|
4 | """
|
---|
5 | TestBox Script - Async Tasks.
|
---|
6 | """
|
---|
7 |
|
---|
8 | __copyright__ = \
|
---|
9 | """
|
---|
10 | Copyright (C) 2012-2022 Oracle Corporation
|
---|
11 |
|
---|
12 | This file is part of VirtualBox Open Source Edition (OSE), as
|
---|
13 | available from http://www.virtualbox.org. This file is free software;
|
---|
14 | you can redistribute it and/or modify it under the terms of the GNU
|
---|
15 | General Public License (GPL) as published by the Free Software
|
---|
16 | Foundation, in version 2 as it comes in the "COPYING" file of the
|
---|
17 | VirtualBox OSE distribution. VirtualBox OSE is distributed in the
|
---|
18 | hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
|
---|
19 |
|
---|
20 | The contents of this file may alternatively be used under the terms
|
---|
21 | of the Common Development and Distribution License Version 1.0
|
---|
22 | (CDDL) only, as it comes in the "COPYING.CDDL" file of the
|
---|
23 | VirtualBox OSE distribution, in which case the provisions of the
|
---|
24 | CDDL are applicable instead of those of the GPL.
|
---|
25 |
|
---|
26 | You may elect to license modified versions of this file under the
|
---|
27 | terms and conditions of either the GPL or the CDDL or both.
|
---|
28 | """
|
---|
29 | __version__ = "$Revision: 94125 $"
|
---|
30 |
|
---|
31 |
|
---|
32 | # Standard python imports.
|
---|
33 | from datetime import datetime
|
---|
34 | import os
|
---|
35 | import re
|
---|
36 | import signal;
|
---|
37 | import sys
|
---|
38 | import subprocess
|
---|
39 | import threading
|
---|
40 | import time
|
---|
41 |
|
---|
42 | # Validation Kit imports.
|
---|
43 | from common import constants
|
---|
44 | from common import utils;
|
---|
45 | from common import webutils;
|
---|
46 | import testboxcommons
|
---|
47 |
|
---|
48 | # Figure where we are.
|
---|
49 | try: __file__
|
---|
50 | except: __file__ = sys.argv[0];
|
---|
51 | g_ksTestScriptDir = os.path.dirname(os.path.abspath(__file__));
|
---|
52 |
|
---|
53 |
|
---|
54 |
|
---|
55 | class TestBoxBaseTask(object):
|
---|
56 | """
|
---|
57 | Asynchronous task employing a thread to do the actual work.
|
---|
58 | """
|
---|
59 |
|
---|
60 | ## Time to wait for a task to terminate.
|
---|
61 | kcSecTerminateTimeout = 60
|
---|
62 |
|
---|
63 | def __init__(self, oTestBoxScript, cSecTimeout, fnThreadProc):
|
---|
64 | self._oTestBoxScript = oTestBoxScript;
|
---|
65 | self._cSecTimeout = cSecTimeout;
|
---|
66 | self._tsSecStarted = utils.timestampSecond();
|
---|
67 | self.__oRLock = threading.RLock();
|
---|
68 | self._oCv = threading.Condition(self.__oRLock);
|
---|
69 | self._fRunning = True; # Protected by lock.
|
---|
70 | self._fShouldTerminate = False; # Protected by lock.
|
---|
71 |
|
---|
72 | # Spawn the worker thread.
|
---|
73 | self._oThread = threading.Thread(target=fnThreadProc);
|
---|
74 | self._oThread.daemon = True;
|
---|
75 | self._oThread.start();
|
---|
76 |
|
---|
77 | def _lock(self):
|
---|
78 | """ Take the CV lock. """
|
---|
79 | self._oCv.acquire();
|
---|
80 |
|
---|
81 | def _unlock(self):
|
---|
82 | """ Release the CV lock. """
|
---|
83 | self._oCv.release();
|
---|
84 |
|
---|
85 | def _complete(self):
|
---|
86 | """
|
---|
87 | Indicate that the task is complete, waking up the main thread.
|
---|
88 | Usually called at the end of the thread procedure.
|
---|
89 | """
|
---|
90 | self._lock();
|
---|
91 | self._fRunning = False;
|
---|
92 | self._oCv.notifyAll();
|
---|
93 | self._unlock();
|
---|
94 |
|
---|
95 | def isRunning(self):
|
---|
96 | """ Check if the task is still running. """
|
---|
97 | self._lock();
|
---|
98 | fRunning = self._fRunning;
|
---|
99 | self._unlock();
|
---|
100 | return fRunning;
|
---|
101 |
|
---|
102 | def wait(self, cSecTimeout):
|
---|
103 | """ Wait for the task to complete. """
|
---|
104 | self._lock();
|
---|
105 | fRunning = self._fRunning;
|
---|
106 | if fRunning is True and cSecTimeout > 0:
|
---|
107 | self._oCv.wait(cSecTimeout)
|
---|
108 | self._unlock();
|
---|
109 | return fRunning;
|
---|
110 |
|
---|
111 | def terminate(self, cSecTimeout = kcSecTerminateTimeout):
|
---|
112 | """ Terminate the task. """
|
---|
113 | self._lock();
|
---|
114 | self._fShouldTerminate = True;
|
---|
115 | self._unlock();
|
---|
116 |
|
---|
117 | return self.wait(cSecTimeout);
|
---|
118 |
|
---|
119 | def _shouldTerminate(self):
|
---|
120 | """
|
---|
121 | Returns True if we should terminate, False if not.
|
---|
122 | """
|
---|
123 | self._lock();
|
---|
124 | fShouldTerminate = self._fShouldTerminate is True;
|
---|
125 | self._unlock();
|
---|
126 | return fShouldTerminate;
|
---|
127 |
|
---|
128 |
|
---|
129 | class TestBoxTestDriverTask(TestBoxBaseTask):
|
---|
130 | """
|
---|
131 | Base class for tasks involving test drivers.
|
---|
132 | """
|
---|
133 |
|
---|
134 | ## When to flush the backlog of log messages.
|
---|
135 | kcchMaxBackLog = 32768;
|
---|
136 |
|
---|
137 | ## The backlog sync time (seconds).
|
---|
138 | kcSecBackLogFlush = 30;
|
---|
139 |
|
---|
140 | ## The timeout for the cleanup job (5 mins).
|
---|
141 | kcSecCleanupTimeout = 300;
|
---|
142 | ## The timeout to wait for the abort command before killing it.
|
---|
143 | kcSecAbortTimeout = 300;
|
---|
144 |
|
---|
145 | ## The timeout to wait for the final output to be processed.
|
---|
146 | kcSecFinalOutputTimeout = 180;
|
---|
147 | ## The timeout to wait for the abort command output to be processed.
|
---|
148 | kcSecAbortCmdOutputTimeout = 30;
|
---|
149 | ## The timeout to wait for the terminate output to be processed.
|
---|
150 | kcSecTerminateOutputTimeout = 30;
|
---|
151 | ## The timeout to wait for the kill output to be processed.
|
---|
152 | kcSecKillOutputTimeout = 30;
|
---|
153 |
|
---|
154 | ## The timeout for talking to the test manager.
|
---|
155 | ksecTestManagerTimeout = 60;
|
---|
156 |
|
---|
157 |
|
---|
158 | def __init__(self, oTestBoxScript, fnThreadProc, cSecTimeout, idResult, sScriptCmdLine):
|
---|
159 | """
|
---|
160 | Class instance init
|
---|
161 | """
|
---|
162 | # Init our instance data.
|
---|
163 | self._idResult = idResult;
|
---|
164 | self._sScriptCmdLine = sScriptCmdLine;
|
---|
165 | self._oChild = None;
|
---|
166 | self._oBackLogLock = threading.RLock();
|
---|
167 | self._oBackLogFlushLock = threading.RLock();
|
---|
168 | self._asBackLog = [];
|
---|
169 | self._cchBackLog = 0;
|
---|
170 | self._secTsBackLogFlush = utils.timestampSecond();
|
---|
171 |
|
---|
172 | # Init super.
|
---|
173 | TestBoxBaseTask.__init__(self, oTestBoxScript, cSecTimeout, fnThreadProc);
|
---|
174 |
|
---|
175 | def terminate(self, cSecTimeout = kcSecCleanupTimeout):
|
---|
176 | """ Reimplement with higher default timeout. """
|
---|
177 | return TestBoxBaseTask.terminate(self, cSecTimeout);
|
---|
178 |
|
---|
179 | def _logFlush(self, oGivenConnection = None):
|
---|
180 | """
|
---|
181 | Flushes the log to the test manager.
|
---|
182 |
|
---|
183 | No exceptions.
|
---|
184 | """
|
---|
185 | fRc = True;
|
---|
186 |
|
---|
187 | with self._oBackLogFlushLock:
|
---|
188 | # Grab the current back log.
|
---|
189 | with self._oBackLogLock:
|
---|
190 | asBackLog = self._asBackLog;
|
---|
191 | self._asBackLog = [];
|
---|
192 | self._cchBackLog = 0;
|
---|
193 | self._secTsBackLogFlush = utils.timestampSecond();
|
---|
194 |
|
---|
195 | # If there is anything to flush, flush it.
|
---|
196 | if asBackLog:
|
---|
197 | sBody = '';
|
---|
198 | for sLine in asBackLog:
|
---|
199 | sBody += sLine + '\n';
|
---|
200 |
|
---|
201 | oConnection = None;
|
---|
202 | try:
|
---|
203 | if oGivenConnection is None:
|
---|
204 | oConnection = self._oTestBoxScript.openTestManagerConnection();
|
---|
205 | oConnection.postRequest(constants.tbreq.LOG_MAIN, {constants.tbreq.LOG_PARAM_BODY: sBody});
|
---|
206 | oConnection.close();
|
---|
207 | else:
|
---|
208 | oGivenConnection.postRequest(constants.tbreq.LOG_MAIN, {constants.tbreq.LOG_PARAM_BODY: sBody});
|
---|
209 | except Exception as oXcpt:
|
---|
210 | testboxcommons.log('_logFlush error: %s' % (oXcpt,));
|
---|
211 | if len(sBody) < self.kcchMaxBackLog * 4:
|
---|
212 | with self._oBackLogLock:
|
---|
213 | asBackLog.extend(self._asBackLog);
|
---|
214 | self._asBackLog = asBackLog;
|
---|
215 | # Don't restore _cchBackLog as there is no point in retrying immediately.
|
---|
216 | if oConnection is not None: # Be kind to apache.
|
---|
217 | try: oConnection.close();
|
---|
218 | except: pass;
|
---|
219 | fRc = False;
|
---|
220 |
|
---|
221 | return fRc;
|
---|
222 |
|
---|
223 | def flushLogOnConnection(self, oConnection):
|
---|
224 | """
|
---|
225 | Attempts to flush the logon the given connection.
|
---|
226 |
|
---|
227 | No exceptions.
|
---|
228 | """
|
---|
229 | return self._logFlush(oConnection);
|
---|
230 |
|
---|
231 | def _logInternal(self, sMessage, fPrefix = True, fFlushCheck = False):
|
---|
232 | """
|
---|
233 | Internal logging.
|
---|
234 | Won't flush the backlog, returns a flush indicator so the caller can
|
---|
235 | do it instead.
|
---|
236 | """
|
---|
237 | if fPrefix:
|
---|
238 | try:
|
---|
239 | oNow = datetime.utcnow();
|
---|
240 | sTs = '%02u:%02u:%02u.%06u ' % (oNow.hour, oNow.minute, oNow.second, oNow.microsecond);
|
---|
241 | except Exception as oXcpt:
|
---|
242 | sTs = 'oXcpt=%s ' % (oXcpt);
|
---|
243 | sFullMsg = sTs + sMessage;
|
---|
244 | else:
|
---|
245 | sFullMsg = sMessage;
|
---|
246 |
|
---|
247 | with self._oBackLogLock:
|
---|
248 | self._asBackLog.append(sFullMsg);
|
---|
249 | cchBackLog = self._cchBackLog + len(sFullMsg) + 1;
|
---|
250 | self._cchBackLog = cchBackLog;
|
---|
251 | secTsBackLogFlush = self._secTsBackLogFlush;
|
---|
252 |
|
---|
253 | testboxcommons.log(sFullMsg);
|
---|
254 | return fFlushCheck \
|
---|
255 | and ( cchBackLog >= self.kcchMaxBackLog \
|
---|
256 | or utils.timestampSecond() - secTsBackLogFlush >= self.kcSecBackLogFlush);
|
---|
257 |
|
---|
258 | def _log(self, sMessage):
|
---|
259 | """
|
---|
260 | General logging function, will flush.
|
---|
261 | """
|
---|
262 | if self._logInternal(sMessage, fFlushCheck = True):
|
---|
263 | self._logFlush();
|
---|
264 | return True;
|
---|
265 |
|
---|
266 | def _reportDone(self, sResult):
|
---|
267 | """
|
---|
268 | Report EXEC job done to the test manager.
|
---|
269 |
|
---|
270 | sResult is a value from constants.result.
|
---|
271 | """
|
---|
272 | ## @todo optimize this to use one server connection.
|
---|
273 |
|
---|
274 | #
|
---|
275 | # Log it.
|
---|
276 | #
|
---|
277 | assert sResult in constants.result.g_kasValidResults;
|
---|
278 | self._log('Done %s' % (sResult,));
|
---|
279 |
|
---|
280 | #
|
---|
281 | # Report it.
|
---|
282 | #
|
---|
283 | fRc = True;
|
---|
284 | secStart = utils.timestampSecond();
|
---|
285 | while True:
|
---|
286 | self._logFlush(); ## @todo Combine this with EXEC_COMPLETED.
|
---|
287 | oConnection = None;
|
---|
288 | try:
|
---|
289 | oConnection = self._oTestBoxScript.openTestManagerConnection();
|
---|
290 | oConnection.postRequest(constants.tbreq.EXEC_COMPLETED, {constants.tbreq.EXEC_COMPLETED_PARAM_RESULT: sResult});
|
---|
291 | oConnection.close();
|
---|
292 | except Exception as oXcpt:
|
---|
293 | if utils.timestampSecond() - secStart < self.ksecTestManagerTimeout:
|
---|
294 | self._log('_reportDone exception (%s) - retrying...' % (oXcpt,));
|
---|
295 | time.sleep(2);
|
---|
296 | continue;
|
---|
297 | self._log('_reportDone error: %s' % (oXcpt,));
|
---|
298 | if oConnection is not None: # Be kind to apache.
|
---|
299 | try: oConnection.close();
|
---|
300 | except: pass;
|
---|
301 | fRc = False;
|
---|
302 | break;
|
---|
303 |
|
---|
304 | #
|
---|
305 | # Mark the task as completed.
|
---|
306 | #
|
---|
307 | self._complete();
|
---|
308 | return fRc;
|
---|
309 |
|
---|
310 | def _assembleArguments(self, sAction, fWithInterpreter = True):
|
---|
311 | """
|
---|
312 | Creates an argument array for subprocess.Popen, splitting the
|
---|
313 | sScriptCmdLine like bourne shell would.
|
---|
314 | fWithInterpreter is used (False) when checking that the script exists.
|
---|
315 |
|
---|
316 | Returns None on bad input.
|
---|
317 | """
|
---|
318 |
|
---|
319 | #
|
---|
320 | # This is a good place to export the test set id to the environment.
|
---|
321 | #
|
---|
322 | os.environ['TESTBOX_TEST_SET_ID'] = str(self._idResult);
|
---|
323 | cTimeoutLeft = utils.timestampSecond() - self._tsSecStarted;
|
---|
324 | cTimeoutLeft = 0 if cTimeoutLeft >= self._cSecTimeout else self._cSecTimeout - cTimeoutLeft;
|
---|
325 | os.environ['TESTBOX_TIMEOUT'] = str(cTimeoutLeft);
|
---|
326 | os.environ['TESTBOX_TIMEOUT_ABS'] = str(self._tsSecStarted + self._cSecTimeout);
|
---|
327 |
|
---|
328 | #
|
---|
329 | # Do replacements and split the command line into arguments.
|
---|
330 | #
|
---|
331 | if self._sScriptCmdLine.find('@ACTION@') >= 0:
|
---|
332 | sCmdLine = self._sScriptCmdLine.replace('@ACTION@', sAction);
|
---|
333 | else:
|
---|
334 | sCmdLine = self._sScriptCmdLine + ' ' + sAction;
|
---|
335 | for sVar in [ 'TESTBOX_PATH_BUILDS', 'TESTBOX_PATH_RESOURCES', 'TESTBOX_PATH_SCRATCH', 'TESTBOX_PATH_SCRIPTS',
|
---|
336 | 'TESTBOX_PATH_UPLOAD', 'TESTBOX_UUID', 'TESTBOX_REPORTER', 'TESTBOX_ID', 'TESTBOX_TEST_SET_ID',
|
---|
337 | 'TESTBOX_TIMEOUT', 'TESTBOX_TIMEOUT_ABS' ]:
|
---|
338 | if sCmdLine.find('${' + sVar + '}') >= 0:
|
---|
339 | sCmdLine = sCmdLine.replace('${' + sVar + '}', os.environ[sVar]);
|
---|
340 |
|
---|
341 | asArgs = utils.argsSplit(sCmdLine);
|
---|
342 |
|
---|
343 | #
|
---|
344 | # Massage argv[0]:
|
---|
345 | # - Convert portable slashes ('/') to the flavor preferred by the
|
---|
346 | # OS we're currently running on.
|
---|
347 | # - Run python script thru the current python interpreter (important
|
---|
348 | # on systems that doesn't sport native hash-bang script execution).
|
---|
349 | #
|
---|
350 | asArgs[0] = asArgs[0].replace('/', os.path.sep);
|
---|
351 | if not os.path.isabs(asArgs[0]):
|
---|
352 | asArgs[0] = os.path.join(self._oTestBoxScript.getPathScripts(), asArgs[0]);
|
---|
353 |
|
---|
354 | if asArgs[0].endswith('.py') and fWithInterpreter:
|
---|
355 | if sys.executable:
|
---|
356 | asArgs.insert(0, sys.executable);
|
---|
357 | else:
|
---|
358 | asArgs.insert(0, 'python');
|
---|
359 |
|
---|
360 | return asArgs;
|
---|
361 |
|
---|
362 | def _outputThreadProc(self, oChild, oStdOut, sAction):
|
---|
363 | """
|
---|
364 | Thread procedure for the thread that reads the output of the child
|
---|
365 | process. We use a dedicated thread for this purpose since non-blocking
|
---|
366 | I/O may be hard to keep portable according to hints around the web...
|
---|
367 | """
|
---|
368 | oThread = oChild.oOutputThread;
|
---|
369 | while not oThread.fPleaseQuit:
|
---|
370 | # Get a line.
|
---|
371 | try:
|
---|
372 | sLine = oStdOut.readline();
|
---|
373 | except Exception as oXcpt:
|
---|
374 | self._log('child (%s) pipe I/O error: %s' % (sAction, oXcpt,));
|
---|
375 | break;
|
---|
376 |
|
---|
377 | # EOF?
|
---|
378 | if not sLine:
|
---|
379 | break;
|
---|
380 |
|
---|
381 | # Strip trailing new line (DOS and UNIX).
|
---|
382 | if sLine.endswith("\r\n"):
|
---|
383 | sLine = sLine[0:-2];
|
---|
384 | elif sLine.endswith("\n"):
|
---|
385 | sLine = sLine[0:-1];
|
---|
386 |
|
---|
387 | # Log it.
|
---|
388 | if self._logInternal(sLine, fPrefix = False, fFlushCheck = True):
|
---|
389 | self._logFlush();
|
---|
390 |
|
---|
391 | # Close the stdout pipe in case we were told to get lost.
|
---|
392 | try:
|
---|
393 | oStdOut.close();
|
---|
394 | except Exception as oXcpt:
|
---|
395 | self._log('warning: Exception closing stdout pipe of "%s" child: %s' % (sAction, oXcpt,));
|
---|
396 |
|
---|
397 | # This is a bit hacky, but try reap the child so it won't hang as
|
---|
398 | # defunkt during abort/timeout.
|
---|
399 | if oChild.poll() is None:
|
---|
400 | for _ in range(15):
|
---|
401 | time.sleep(0.2);
|
---|
402 | if oChild.poll() is not None:
|
---|
403 | break;
|
---|
404 |
|
---|
405 | oChild = None;
|
---|
406 | return None;
|
---|
407 |
|
---|
408 | def _spawnChild(self, sAction):
|
---|
409 | """
|
---|
410 | Spawns the child process, returning success indicator + child object.
|
---|
411 | """
|
---|
412 |
|
---|
413 | # Argument list.
|
---|
414 | asArgs = self._assembleArguments(sAction)
|
---|
415 | if asArgs is None:
|
---|
416 | self._log('Malformed command line: "%s"' % (self._sScriptCmdLine,));
|
---|
417 | return (False, None);
|
---|
418 |
|
---|
419 | # Spawn child.
|
---|
420 | try:
|
---|
421 | oChild = utils.processPopenSafe(asArgs,
|
---|
422 | shell = False,
|
---|
423 | bufsize = -1,
|
---|
424 | stdout = subprocess.PIPE,
|
---|
425 | stderr = subprocess.STDOUT,
|
---|
426 | cwd = self._oTestBoxScript.getPathSpill(),
|
---|
427 | universal_newlines = True,
|
---|
428 | close_fds = utils.getHostOs() != 'win',
|
---|
429 | preexec_fn = (None if utils.getHostOs() in ['win', 'os2']
|
---|
430 | else os.setsid)); # pylint: disable=no-member
|
---|
431 | except Exception as oXcpt:
|
---|
432 | self._log('Error creating child process %s: %s' % (asArgs, oXcpt));
|
---|
433 | return (False, None);
|
---|
434 |
|
---|
435 | oChild.sTestBoxScriptAction = sAction;
|
---|
436 |
|
---|
437 | # Start output thread, extending the child object to keep track of it.
|
---|
438 | oChild.oOutputThread = threading.Thread(target=self._outputThreadProc, args=(oChild, oChild.stdout, sAction))
|
---|
439 | oChild.oOutputThread.daemon = True;
|
---|
440 | oChild.oOutputThread.fPleaseQuit = False; # Our extension.
|
---|
441 | oChild.oOutputThread.start();
|
---|
442 |
|
---|
443 | return (True, oChild);
|
---|
444 |
|
---|
445 | def _monitorChild(self, cSecTimeout, fTryKillCommand = True, oChild = None):
|
---|
446 | """
|
---|
447 | Monitors the child process. If the child executes longer that
|
---|
448 | cSecTimeout allows, we'll terminate it.
|
---|
449 | Returns Success indicator and constants.result value.
|
---|
450 | """
|
---|
451 |
|
---|
452 | if oChild is None:
|
---|
453 | oChild = self._oChild;
|
---|
454 |
|
---|
455 | iProcGroup = oChild.pid;
|
---|
456 | if utils.getHostOs() in ['win', 'os2'] or iProcGroup <= 0:
|
---|
457 | iProcGroup = -2;
|
---|
458 |
|
---|
459 | #
|
---|
460 | # Do timeout processing and check the health of the child.
|
---|
461 | #
|
---|
462 | sResult = constants.result.PASSED;
|
---|
463 | seStarted = utils.timestampSecond();
|
---|
464 | while True:
|
---|
465 | # Check status.
|
---|
466 | iRc = oChild.poll();
|
---|
467 | if iRc is not None:
|
---|
468 | self._log('Child doing "%s" completed with exit code %d' % (oChild.sTestBoxScriptAction, iRc));
|
---|
469 | oChild.oOutputThread.join(self.kcSecFinalOutputTimeout);
|
---|
470 |
|
---|
471 | if oChild is self._oChild:
|
---|
472 | self._oChild = None;
|
---|
473 |
|
---|
474 | if iRc == constants.rtexitcode.SUCCESS:
|
---|
475 | return (True, constants.result.PASSED);
|
---|
476 | if iRc == constants.rtexitcode.SKIPPED:
|
---|
477 | return (True, constants.result.SKIPPED);
|
---|
478 | if iRc == constants.rtexitcode.BAD_TESTBOX:
|
---|
479 | return (False, constants.result.BAD_TESTBOX);
|
---|
480 | return (False, constants.result.FAILED);
|
---|
481 |
|
---|
482 | # Check for abort first, since that has less of a stigma.
|
---|
483 | if self._shouldTerminate() is True:
|
---|
484 | sResult = constants.result.ABORTED;
|
---|
485 | break;
|
---|
486 |
|
---|
487 | # Check timeout.
|
---|
488 | cSecElapsed = utils.timestampSecond() - seStarted;
|
---|
489 | if cSecElapsed > cSecTimeout:
|
---|
490 | self._log('Timeout: %u secs (limit %u secs)' % (cSecElapsed, cSecTimeout));
|
---|
491 | sResult = constants.result.TIMED_OUT;
|
---|
492 | break;
|
---|
493 |
|
---|
494 | # Wait.
|
---|
495 | cSecLeft = cSecTimeout - cSecElapsed;
|
---|
496 | oChild.oOutputThread.join(15 if cSecLeft > 15 else (cSecLeft + 1));
|
---|
497 |
|
---|
498 | #
|
---|
499 | # If the child is still alive, try use the abort command to stop it
|
---|
500 | # very gently. This let's the testdriver clean up daemon processes
|
---|
501 | # and such that our code below won't catch.
|
---|
502 | #
|
---|
503 | if fTryKillCommand and oChild.poll() is None:
|
---|
504 | self._log('Attempting to abort child...');
|
---|
505 | (fRc2, oAbortChild) = self._spawnChild('abort');
|
---|
506 | if oAbortChild is not None and fRc2 is True:
|
---|
507 | self._monitorChild(self.kcSecAbortTimeout, False, oAbortChild);
|
---|
508 | oAbortChild = None;
|
---|
509 |
|
---|
510 | #
|
---|
511 | # If the child is still alive, try the polite way.
|
---|
512 | #
|
---|
513 | if oChild.poll() is None:
|
---|
514 | self._log('Attempting to terminate child doing "%s"...' % (oChild.sTestBoxScriptAction,));
|
---|
515 |
|
---|
516 | if iProcGroup > 0:
|
---|
517 | try:
|
---|
518 | os.killpg(iProcGroup, signal.SIGTERM); # pylint: disable=no-member
|
---|
519 | except Exception as oXcpt:
|
---|
520 | self._log('killpg() failed: %s' % (oXcpt,));
|
---|
521 |
|
---|
522 | try:
|
---|
523 | self._oChild.terminate();
|
---|
524 | oChild.oOutputThread.join(self.kcSecTerminateOutputTimeout);
|
---|
525 | except Exception as oXcpt:
|
---|
526 | self._log('terminate() failed: %s' % (oXcpt,));
|
---|
527 |
|
---|
528 | #
|
---|
529 | # If the child doesn't respond to polite, kill it. Always do a killpg
|
---|
530 | # should there be any processes left in the group.
|
---|
531 | #
|
---|
532 | if iProcGroup > 0:
|
---|
533 | try:
|
---|
534 | os.killpg(iProcGroup, signal.SIGKILL); # pylint: disable=no-member
|
---|
535 | except Exception as oXcpt:
|
---|
536 | self._log('killpg() failed: %s' % (oXcpt,));
|
---|
537 |
|
---|
538 | if oChild.poll() is None:
|
---|
539 | self._log('Attemting to kill child doing "%s"...' % (oChild.sTestBoxScriptAction,));
|
---|
540 | try:
|
---|
541 | self._oChild.kill();
|
---|
542 | oChild.oOutputThread.join(self.kcSecKillOutputTimeout);
|
---|
543 | except Exception as oXcpt:
|
---|
544 | self._log('kill() failed: %s' % (oXcpt,));
|
---|
545 |
|
---|
546 | #
|
---|
547 | # Give the whole mess a couple of more seconds to respond in case the
|
---|
548 | # output thread exitted prematurely for some weird reason.
|
---|
549 | #
|
---|
550 | if oChild.poll() is None:
|
---|
551 | time.sleep(2);
|
---|
552 | time.sleep(2);
|
---|
553 | time.sleep(2);
|
---|
554 |
|
---|
555 | iRc = oChild.poll();
|
---|
556 | if iRc is not None:
|
---|
557 | self._log('Child doing "%s" aborted with exit code %d' % (oChild.sTestBoxScriptAction, iRc));
|
---|
558 | else:
|
---|
559 | self._log('Child doing "%s" is still running, giving up...' % (oChild.sTestBoxScriptAction,));
|
---|
560 | ## @todo in this case we should probably try reboot the testbox...
|
---|
561 | oChild.oOutputThread.fPleaseQuit = True;
|
---|
562 |
|
---|
563 | if oChild is self._oChild:
|
---|
564 | self._oChild = None;
|
---|
565 | return (False, sResult);
|
---|
566 |
|
---|
567 | def _terminateChild(self):
|
---|
568 | """
|
---|
569 | Terminates the child forcefully.
|
---|
570 | """
|
---|
571 | if self._oChild is not None:
|
---|
572 | pass;
|
---|
573 |
|
---|
574 | def _cleanupAfter(self):
|
---|
575 | """
|
---|
576 | Cleans up after a test failure. (On success, cleanup is implicit.)
|
---|
577 | """
|
---|
578 | assert self._oChild is None;
|
---|
579 |
|
---|
580 | #
|
---|
581 | # Tell the script to clean up.
|
---|
582 | #
|
---|
583 | if self._sScriptCmdLine: # can be empty if cleanup crashed.
|
---|
584 | (fRc, self._oChild) = self._spawnChild('cleanup-after');
|
---|
585 | if fRc is True:
|
---|
586 | (fRc, _) = self._monitorChild(self.kcSecCleanupTimeout, False);
|
---|
587 | self._terminateChild();
|
---|
588 | else:
|
---|
589 | fRc = False;
|
---|
590 |
|
---|
591 | #
|
---|
592 | # Wipe the stuff clean.
|
---|
593 | #
|
---|
594 | fRc2 = self._oTestBoxScript.reinitScratch(fnLog = self._log, cRetries = 6);
|
---|
595 |
|
---|
596 | return fRc and fRc2;
|
---|
597 |
|
---|
598 |
|
---|
599 |
|
---|
600 | class TestBoxCleanupTask(TestBoxTestDriverTask):
|
---|
601 | """
|
---|
602 | Special asynchronous task for cleaning up a stale test when starting the
|
---|
603 | testbox script. It's assumed that the reason for the stale test lies in
|
---|
604 | it causing a panic, reboot, or similar, so we'll also try collect some
|
---|
605 | info about recent system crashes and reboots.
|
---|
606 | """
|
---|
607 |
|
---|
608 | def __init__(self, oTestBoxScript):
|
---|
609 | # Read the old state, throwing a fit if it's invalid.
|
---|
610 | sScriptState = oTestBoxScript.getPathState();
|
---|
611 | sScriptCmdLine = self._readStateFile(os.path.join(sScriptState, 'script-cmdline.txt'));
|
---|
612 | sResultId = self._readStateFile(os.path.join(sScriptState, 'result-id.txt'));
|
---|
613 | try:
|
---|
614 | idResult = int(sResultId);
|
---|
615 | if idResult <= 0 or idResult >= 0x7fffffff:
|
---|
616 | raise Exception('');
|
---|
617 | except:
|
---|
618 | raise Exception('Invalid id value "%s" found in %s' % (sResultId, os.path.join(sScriptState, 'result-id.txt')));
|
---|
619 |
|
---|
620 | sTestBoxId = self._readStateFile(os.path.join(sScriptState, 'testbox-id.txt'));
|
---|
621 | try:
|
---|
622 | self.idTestBox = int(sTestBoxId);
|
---|
623 | if self.idTestBox <= 0 or self.idTestBox >= 0x7fffffff:
|
---|
624 | raise Exception('');
|
---|
625 | except:
|
---|
626 | raise Exception('Invalid id value "%s" found in %s' % (sTestBoxId, os.path.join(sScriptState, 'testbox-id.txt')));
|
---|
627 | self.sTestBoxName = self._readStateFile(os.path.join(sScriptState, 'testbox-name.txt'));
|
---|
628 |
|
---|
629 | # Init super.
|
---|
630 | TestBoxTestDriverTask.__init__(self, oTestBoxScript, self._threadProc, self.kcSecCleanupTimeout,
|
---|
631 | idResult, sScriptCmdLine);
|
---|
632 |
|
---|
633 | @staticmethod
|
---|
634 | def _readStateFile(sPath):
|
---|
635 | """
|
---|
636 | Reads a state file, returning a string on success and otherwise raising
|
---|
637 | an exception.
|
---|
638 | """
|
---|
639 | try:
|
---|
640 | with open(sPath, "rb") as oFile:
|
---|
641 | sStr = oFile.read();
|
---|
642 | sStr = sStr.decode('utf-8');
|
---|
643 | return sStr.strip();
|
---|
644 | except Exception as oXcpt:
|
---|
645 | raise Exception('Failed to read "%s": %s' % (sPath, oXcpt));
|
---|
646 |
|
---|
647 | def _threadProc(self):
|
---|
648 | """
|
---|
649 | Perform the actual clean up on script startup.
|
---|
650 | """
|
---|
651 |
|
---|
652 | #
|
---|
653 | # First make sure we won't repeat this exercise should it turn out to
|
---|
654 | # trigger another reboot/panic/whatever.
|
---|
655 | #
|
---|
656 | sScriptCmdLine = os.path.join(self._oTestBoxScript.getPathState(), 'script-cmdline.txt');
|
---|
657 | try:
|
---|
658 | os.remove(sScriptCmdLine);
|
---|
659 | open(sScriptCmdLine, 'wb').close(); # pylint: disable=consider-using-with
|
---|
660 | except Exception as oXcpt:
|
---|
661 | self._log('Error truncating "%s": %s' % (sScriptCmdLine, oXcpt));
|
---|
662 |
|
---|
663 | #
|
---|
664 | # Report the incident.
|
---|
665 | #
|
---|
666 | self._log('Seems we rebooted!');
|
---|
667 | self._log('script-cmdline="%s"' % (self._sScriptCmdLine));
|
---|
668 | self._log('result-id=%d' % (self._idResult));
|
---|
669 | self._log('testbox-id=%d' % (self.idTestBox));
|
---|
670 | self._log('testbox-name=%s' % (self.sTestBoxName));
|
---|
671 | self._logFlush();
|
---|
672 |
|
---|
673 | # System specific info.
|
---|
674 | sOs = utils.getHostOs();
|
---|
675 | if sOs == 'darwin':
|
---|
676 | self._log('NVRAM Panic Info:\n%s\n' % (self.darwinGetPanicInfo(),));
|
---|
677 |
|
---|
678 | self._logFlush();
|
---|
679 | ## @todo Add some special command for reporting this situation so we get something
|
---|
680 | # useful in the event log.
|
---|
681 |
|
---|
682 | #
|
---|
683 | # Do the cleaning up.
|
---|
684 | #
|
---|
685 | self._cleanupAfter();
|
---|
686 |
|
---|
687 | self._reportDone(constants.result.REBOOTED);
|
---|
688 | return False;
|
---|
689 |
|
---|
690 | def darwinGetPanicInfo(self):
|
---|
691 | """
|
---|
692 | Returns a string with the aapl,panic-info content.
|
---|
693 | """
|
---|
694 | # Retriev the info.
|
---|
695 | try:
|
---|
696 | sRawInfo = utils.processOutputChecked(['nvram', 'aapl,panic-info']);
|
---|
697 | except Exception as oXcpt:
|
---|
698 | return 'exception running nvram: %s' % (oXcpt,);
|
---|
699 |
|
---|
700 | # Decode (%xx) and decompact it (7-bit -> 8-bit).
|
---|
701 | ahDigits = \
|
---|
702 | {
|
---|
703 | '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
|
---|
704 | '8': 8, '9': 9, 'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14, 'f': 15,
|
---|
705 | };
|
---|
706 | sInfo = '';
|
---|
707 | off = len('aapl,panic-info') + 1;
|
---|
708 | iBit = 0;
|
---|
709 | bLow = 0;
|
---|
710 |
|
---|
711 | while off < len(sRawInfo):
|
---|
712 | # isprint is used to determine whether to %xx or %c it, so we have to
|
---|
713 | # be a little careful before assuming % sequences are hex bytes.
|
---|
714 | if sRawInfo[off] == '%' \
|
---|
715 | and off + 3 <= len(sRawInfo) \
|
---|
716 | and sRawInfo[off + 1] in ahDigits \
|
---|
717 | and sRawInfo[off + 2] in ahDigits:
|
---|
718 | bCur = ahDigits[sRawInfo[off + 1]] * 0x10 + ahDigits[sRawInfo[off + 2]];
|
---|
719 | off += 3;
|
---|
720 | else:
|
---|
721 | bCur = ord(sRawInfo[off]);
|
---|
722 | off += 1;
|
---|
723 |
|
---|
724 | sInfo += chr(((bCur & (0x7f >> iBit)) << iBit) | bLow);
|
---|
725 | bLow = bCur >> (7 - iBit);
|
---|
726 |
|
---|
727 | if iBit < 6:
|
---|
728 | iBit += 1;
|
---|
729 | else:
|
---|
730 | # Final bit in sequence.
|
---|
731 | sInfo += chr(bLow);
|
---|
732 | bLow = 0;
|
---|
733 | iBit = 0;
|
---|
734 |
|
---|
735 | # Expand shorthand.
|
---|
736 | sInfo = sInfo.replace('@', 'com.apple.');
|
---|
737 | sInfo = sInfo.replace('>', 'com.apple.driver.');
|
---|
738 | sInfo = sInfo.replace('|', 'com.apple.iokit.');
|
---|
739 | sInfo = sInfo.replace('$', 'com.apple.security.');
|
---|
740 | sInfo = sInfo.replace('!A', 'Apple');
|
---|
741 | sInfo = sInfo.replace('!a', 'Action');
|
---|
742 | sInfo = sInfo.replace('!B', 'Bluetooth');
|
---|
743 | sInfo = sInfo.replace('!C', 'Controller');
|
---|
744 | sInfo = sInfo.replace('!F', 'Family');
|
---|
745 | sInfo = sInfo.replace('!I', 'Intel');
|
---|
746 | sInfo = sInfo.replace('!U', 'AppleUSB');
|
---|
747 | sInfo = sInfo.replace('!P', 'Profile');
|
---|
748 |
|
---|
749 | # Done.
|
---|
750 | return sInfo
|
---|
751 |
|
---|
752 |
|
---|
753 | class TestBoxExecTask(TestBoxTestDriverTask):
|
---|
754 | """
|
---|
755 | Implementation of a asynchronous EXEC task.
|
---|
756 |
|
---|
757 | This uses a thread for doing the actual work, i.e. starting and monitoring
|
---|
758 | the child process, processing its output, and more.
|
---|
759 | """
|
---|
760 |
|
---|
761 | def __init__(self, oTestBoxScript, idResult, sScriptZips, sScriptCmdLine, cSecTimeout):
|
---|
762 | """
|
---|
763 | Class instance init
|
---|
764 | """
|
---|
765 | # Init our instance data.
|
---|
766 | self._sScriptZips = sScriptZips;
|
---|
767 |
|
---|
768 | # Init super.
|
---|
769 | TestBoxTestDriverTask.__init__(self, oTestBoxScript, self._threadProc, cSecTimeout, idResult, sScriptCmdLine);
|
---|
770 |
|
---|
771 | @staticmethod
|
---|
772 | def _writeStateFile(sPath, sContent):
|
---|
773 | """
|
---|
774 | Writes a state file, raising an exception on failure.
|
---|
775 | """
|
---|
776 | try:
|
---|
777 | with open(sPath, "wb") as oFile:
|
---|
778 | oFile.write(sContent.encode('utf-8'));
|
---|
779 | oFile.flush();
|
---|
780 | try: os.fsync(oFile.fileno());
|
---|
781 | except: pass;
|
---|
782 | except Exception as oXcpt:
|
---|
783 | raise Exception('Failed to write "%s": %s' % (sPath, oXcpt));
|
---|
784 | return True;
|
---|
785 |
|
---|
786 | @staticmethod
|
---|
787 | def _environTxtContent():
|
---|
788 | """
|
---|
789 | Collects environment variables and values for the environ.txt stat file
|
---|
790 | (for external monitoring tool).
|
---|
791 | """
|
---|
792 | sText = '';
|
---|
793 | for sVar in [ 'TESTBOX_PATH_BUILDS', 'TESTBOX_PATH_RESOURCES', 'TESTBOX_PATH_SCRATCH', 'TESTBOX_PATH_SCRIPTS',
|
---|
794 | 'TESTBOX_PATH_UPLOAD', 'TESTBOX_HAS_HW_VIRT', 'TESTBOX_HAS_NESTED_PAGING', 'TESTBOX_HAS_IOMMU',
|
---|
795 | 'TESTBOX_SCRIPT_REV', 'TESTBOX_CPU_COUNT', 'TESTBOX_MEM_SIZE', 'TESTBOX_SCRATCH_SIZE',
|
---|
796 | 'TESTBOX_WITH_RAW_MODE', 'TESTBOX_WITH_RAW_MODE', 'TESTBOX_MANAGER_URL', 'TESTBOX_UUID',
|
---|
797 | 'TESTBOX_REPORTER', 'TESTBOX_NAME', 'TESTBOX_ID', 'TESTBOX_TEST_SET_ID',
|
---|
798 | 'TESTBOX_TIMEOUT', 'TESTBOX_TIMEOUT_ABS', ]:
|
---|
799 | sValue = os.environ.get(sVar);
|
---|
800 | if sValue:
|
---|
801 | sText += sVar + '=' + sValue + '\n';
|
---|
802 | return sText;
|
---|
803 |
|
---|
804 | def _saveState(self):
|
---|
805 | """
|
---|
806 | Saves the task state on disk so we can launch a TestBoxCleanupTask job
|
---|
807 | if the test should cause system panic or similar.
|
---|
808 |
|
---|
809 | Note! May later be extended to support tests that reboots the host.
|
---|
810 | """
|
---|
811 | sScriptState = self._oTestBoxScript.getPathState();
|
---|
812 | try:
|
---|
813 | self._writeStateFile(os.path.join(sScriptState, 'script-cmdline.txt'), self._sScriptCmdLine);
|
---|
814 | self._writeStateFile(os.path.join(sScriptState, 'result-id.txt'), str(self._idResult));
|
---|
815 | self._writeStateFile(os.path.join(sScriptState, 'testbox-id.txt'), str(self._oTestBoxScript.getTestBoxId()));
|
---|
816 | self._writeStateFile(os.path.join(sScriptState, 'testbox-name.txt'), self._oTestBoxScript.getTestBoxName());
|
---|
817 | self._writeStateFile(os.path.join(sScriptState, 'environ.txt'), self._environTxtContent());
|
---|
818 | except Exception as oXcpt:
|
---|
819 | self._log('Failed to write state: %s' % (oXcpt,));
|
---|
820 | return False;
|
---|
821 | return True;
|
---|
822 |
|
---|
823 | def _downloadAndUnpackScriptZips(self):
|
---|
824 | """
|
---|
825 | Downloads/copies the script ZIPs into TESTBOX_SCRIPT and unzips them to
|
---|
826 | the same directory.
|
---|
827 |
|
---|
828 | Raises no exceptions, returns log + success indicator instead.
|
---|
829 | """
|
---|
830 | sPathScript = self._oTestBoxScript.getPathScripts();
|
---|
831 | asArchives = self._sScriptZips.split(',');
|
---|
832 | for sArchive in asArchives:
|
---|
833 | sArchive = sArchive.strip();
|
---|
834 | if not sArchive:
|
---|
835 | continue;
|
---|
836 |
|
---|
837 | # Figure the destination name (in scripts).
|
---|
838 | sDstFile = webutils.getFilename(sArchive);
|
---|
839 | if not sDstFile \
|
---|
840 | or re.search('[^a-zA-Z0-9 !#$%&\'()@^_`{}~.-]', sDstFile) is not None: # FAT charset sans 128-255 + '.'.
|
---|
841 | self._log('Malformed script zip filename: %s' % (sArchive,));
|
---|
842 | return False;
|
---|
843 | sDstFile = os.path.join(sPathScript, sDstFile);
|
---|
844 |
|
---|
845 | # Do the work.
|
---|
846 | if webutils.downloadFile(sArchive, sDstFile, self._oTestBoxScript.getPathBuilds(), self._log, self._log) is not True:
|
---|
847 | return False;
|
---|
848 | asFiles = utils.unpackFile(sDstFile, sPathScript, self._log, self._log);
|
---|
849 | if asFiles is None:
|
---|
850 | return False;
|
---|
851 |
|
---|
852 | # Since zip files doesn't always include mode masks, set the X bit
|
---|
853 | # of all of them so we can execute binaries and hash-bang scripts.
|
---|
854 | for sFile in asFiles:
|
---|
855 | utils.chmodPlusX(sFile);
|
---|
856 |
|
---|
857 | return True;
|
---|
858 |
|
---|
859 | def _threadProc(self):
|
---|
860 | """
|
---|
861 | Do the work of an EXEC command.
|
---|
862 | """
|
---|
863 |
|
---|
864 | sResult = constants.result.PASSED;
|
---|
865 |
|
---|
866 | #
|
---|
867 | # Start by preparing the scratch directories.
|
---|
868 | #
|
---|
869 | # Note! Failures at this stage are not treated as real errors since
|
---|
870 | # they may be caused by the previous test and other circumstances
|
---|
871 | # so we don't want to go fail a build because of this.
|
---|
872 | #
|
---|
873 | fRc = self._oTestBoxScript.reinitScratch(self._logInternal);
|
---|
874 | fNeedCleanUp = fRc;
|
---|
875 | if fRc is True:
|
---|
876 | fRc = self._downloadAndUnpackScriptZips();
|
---|
877 | testboxcommons.log2('_threadProc: _downloadAndUnpackScriptZips -> %s' % (fRc,));
|
---|
878 | if fRc is not True:
|
---|
879 | sResult = constants.result.BAD_TESTBOX;
|
---|
880 |
|
---|
881 | #
|
---|
882 | # Make sure the script exists.
|
---|
883 | #
|
---|
884 | if fRc is True:
|
---|
885 | sScript = self._assembleArguments('none', fWithInterpreter = False)[0];
|
---|
886 | if not os.path.exists(sScript):
|
---|
887 | self._log('The test driver script "%s" cannot be found.' % (sScript,));
|
---|
888 | sDir = sScript;
|
---|
889 | while len(sDir) > 3:
|
---|
890 | sDir = os.path.dirname(sDir);
|
---|
891 | if os.path.exists(sDir):
|
---|
892 | self._log('First existing parent directory is "%s".' % (sDir,));
|
---|
893 | break;
|
---|
894 | fRc = False;
|
---|
895 |
|
---|
896 | if fRc is True:
|
---|
897 | #
|
---|
898 | # Start testdriver script.
|
---|
899 | #
|
---|
900 | fRc = self._saveState();
|
---|
901 | if fRc:
|
---|
902 | (fRc, self._oChild) = self._spawnChild('all');
|
---|
903 | testboxcommons.log2('_threadProc: _spawnChild -> %s, %s' % (fRc, self._oChild));
|
---|
904 | if fRc:
|
---|
905 | (fRc, sResult) = self._monitorChild(self._cSecTimeout);
|
---|
906 | testboxcommons.log2('_threadProc: _monitorChild -> %s' % (fRc,));
|
---|
907 |
|
---|
908 | # If the run failed, do explicit cleanup unless its a BAD_TESTBOX, since BAD_TESTBOX is
|
---|
909 | # intended for pre-cleanup problems caused by previous test failures. Do a cleanup on
|
---|
910 | # a BAD_TESTBOX could easily trigger an uninstallation error and change status to FAILED.
|
---|
911 | if fRc is not True:
|
---|
912 | if sResult != constants.result.BAD_TESTBOX:
|
---|
913 | testboxcommons.log2('_threadProc: explicit cleanups...');
|
---|
914 | self._terminateChild();
|
---|
915 | self._cleanupAfter();
|
---|
916 | fNeedCleanUp = False;
|
---|
917 | assert self._oChild is None;
|
---|
918 |
|
---|
919 | #
|
---|
920 | # Clean up scratch.
|
---|
921 | #
|
---|
922 | if fNeedCleanUp:
|
---|
923 | if self._oTestBoxScript.reinitScratch(self._logInternal, cRetries = 6) is not True:
|
---|
924 | self._log('post run reinitScratch failed.');
|
---|
925 | fRc = False;
|
---|
926 |
|
---|
927 | #
|
---|
928 | # Report status and everything back to the test manager.
|
---|
929 | #
|
---|
930 | if fRc is False and sResult == constants.result.PASSED:
|
---|
931 | sResult = constants.result.FAILED;
|
---|
932 | self._reportDone(sResult);
|
---|
933 | return fRc;
|
---|
934 |
|
---|