VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 93874

最後變更 在這個檔案從93874是 93874,由 vboxsync 提交於 3 年 前

vsheriff: better detection of "misc: unit tests"

  • 屬性 svn:eol-style 設為 LF
  • 屬性 svn:executable 設為 *
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 89.2 KB
 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 93874 2022-02-21 19:18:26Z vboxsync $
4# pylint: disable=line-too-long
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15from __future__ import print_function;
16
17__copyright__ = \
18"""
19Copyright (C) 2012-2022 Oracle Corporation
20
21This file is part of VirtualBox Open Source Edition (OSE), as
22available from http://www.alldomusa.eu.org. This file is free software;
23you can redistribute it and/or modify it under the terms of the GNU
24General Public License (GPL) as published by the Free Software
25Foundation, in version 2 as it comes in the "COPYING" file of the
26VirtualBox OSE distribution. VirtualBox OSE is distributed in the
27hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
28
29The contents of this file may alternatively be used under the terms
30of the Common Development and Distribution License Version 1.0
31(CDDL) only, as it comes in the "COPYING.CDDL" file of the
32VirtualBox OSE distribution, in which case the provisions of the
33CDDL are applicable instead of those of the GPL.
34
35You may elect to license modified versions of this file under the
36terms and conditions of either the GPL or the CDDL or both.
37"""
38__version__ = "$Revision: 93874 $"
39
40
41# Standard python imports
42import hashlib;
43import os;
44import re;
45import smtplib;
46#import subprocess;
47import sys;
48from email.mime.multipart import MIMEMultipart;
49from email.mime.text import MIMEText;
50from email.utils import COMMASPACE;
51
52if sys.version_info[0] >= 3:
53 from io import BytesIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
54else:
55 from StringIO import StringIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
56from optparse import OptionParser; # pylint: disable=deprecated-module
57from PIL import Image; # pylint: disable=import-error
58
59# Add Test Manager's modules path
60g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
61sys.path.append(g_ksTestManagerDir);
62
63# Test Manager imports
64from common import utils;
65from testmanager.core.db import TMDatabaseConnection;
66from testmanager.core.build import BuildDataEx;
67from testmanager.core.failurereason import FailureReasonLogic;
68from testmanager.core.testbox import TestBoxLogic, TestBoxData;
69from testmanager.core.testcase import TestCaseDataEx;
70from testmanager.core.testgroup import TestGroupData;
71from testmanager.core.testset import TestSetLogic, TestSetData;
72from testmanager.core.testresults import TestResultLogic, TestResultFileData;
73from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
74from testmanager.core.useraccount import UserAccountLogic;
75from testmanager.config import g_ksSmtpHost, g_kcSmtpPort, g_ksAlertFrom, \
76 g_ksAlertSubject, g_asAlertList #, g_ksLomPassword;
77
78# Python 3 hacks:
79if sys.version_info[0] >= 3:
80 xrange = range; # pylint: disable=redefined-builtin,invalid-name
81
82
83class VirtualTestSheriffCaseFile(object):
84 """
85 A failure investigation case file.
86
87 """
88
89
90 ## Max log file we'll read into memory. (256 MB)
91 kcbMaxLogRead = 0x10000000;
92
93 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
94 self.oSheriff = oSheriff;
95 self.oTestSet = oTestSet; # TestSetData
96 self.oTree = oTree; # TestResultDataEx
97 self.oBuild = oBuild; # BuildDataEx
98 self.oTestBox = oTestBox; # TestBoxData
99 self.oTestGroup = oTestGroup; # TestGroupData
100 self.oTestCase = oTestCase; # TestCaseDataEx
101 self.sMainLog = ''; # The main log file. Empty string if not accessible.
102 self.sSvcLog = ''; # The VBoxSVC log file. Empty string if not accessible.
103
104 # Generate a case file name.
105 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
106 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
107 % ( self.oTestSet.idTestSet,
108 self.oTestCase.sName,
109 self.oTestBox.sName,
110 self.oTestBox.sOs,
111 self.oTestBox.sOsVersion,
112 self.oTestBox.sCpuArch,
113 self.oTestBox.sCpuName,
114 self.oTestBox.sCpuVendor,
115 self.oBuild.oCat.sProduct,
116 self.oBuild.oCat.sBranch,
117 self.oBuild.oCat.sType,
118 self.oBuild.iRevision, );
119
120 # Investigation notes.
121 self.tReason = None; # None or one of the ktReason_XXX constants.
122 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
123 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
124
125 #
126 # Reason.
127 #
128
129 def noteReason(self, tReason):
130 """ Notes down a possible reason. """
131 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
132 self.tReason = tReason;
133 return True;
134
135 def noteReasonForId(self, tReason, idTestResult, sComment = None):
136 """ Notes down a possible reason for a specific test result. """
137 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
138 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
139 (u' (%s)' % (sComment,)) if sComment is not None else ''));
140 self.dReasonForResultId[idTestResult] = tReason;
141 if sComment is not None:
142 self.dCommentForResultId[idTestResult] = sComment;
143 return True;
144
145
146 #
147 # Test classification.
148 #
149
150 def isVBoxTest(self):
151 """ Test classification: VirtualBox (using the build) """
152 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
153
154 def isVBoxUnitTest(self):
155 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
156 return self.isVBoxTest() \
157 and (self.oTestCase.sName.lower() == 'unit tests' or self.oTestCase.sName.lower().startswith('misc: unit tests'));
158
159 def isVBoxInstallTest(self):
160 """ Test case classification: VirtualBox Guest installation test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('install:');
163
164 def isVBoxUnattendedInstallTest(self):
165 """ Test case classification: VirtualBox Guest installation test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('uinstall:');
168
169 def isVBoxUSBTest(self):
170 """ Test case classification: VirtualBox USB test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('usb:');
173
174 def isVBoxStorageTest(self):
175 """ Test case classification: VirtualBox Storage test. """
176 return self.isVBoxTest() \
177 and self.oTestCase.sName.lower().startswith('storage:');
178
179 def isVBoxGAsTest(self):
180 """ Test case classification: VirtualBox Guest Additions test. """
181 return self.isVBoxTest() \
182 and ( self.oTestCase.sName.lower().startswith('guest additions')
183 or self.oTestCase.sName.lower().startswith('ga\'s tests'));
184
185 def isVBoxAPITest(self):
186 """ Test case classification: VirtualBox API test. """
187 return self.isVBoxTest() \
188 and self.oTestCase.sName.lower().startswith('api:');
189
190 def isVBoxBenchmarkTest(self):
191 """ Test case classification: VirtualBox Benchmark test. """
192 return self.isVBoxTest() \
193 and self.oTestCase.sName.lower().startswith('benchmark:');
194
195 def isVBoxSmokeTest(self):
196 """ Test case classification: Smoke test. """
197 return self.isVBoxTest() \
198 and self.oTestCase.sName.lower().startswith('smoketest');
199
200 def isVBoxSerialTest(self):
201 """ Test case classification: Smoke test. """
202 return self.isVBoxTest() \
203 and self.oTestCase.sName.lower().startswith('serial:');
204
205
206 #
207 # Utility methods.
208 #
209
210 def getMainLog(self):
211 """
212 Tries to read the main log file since this will be the first source of information.
213 """
214 if self.sMainLog:
215 return self.sMainLog;
216 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
217 if oFile is not None:
218 try:
219 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
220 except Exception as oXcpt:
221 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
222 self.sMainLog = '';
223 else:
224 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
225 return self.sMainLog;
226
227 def getLogFile(self, oFile):
228 """
229 Tries to read the given file as a utf-8 log file.
230 oFile is a TestFileDataEx instance.
231 Returns empty string if problems opening or reading the file.
232 """
233 sContent = '';
234 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
235 if oFile is not None:
236 try:
237 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
238 except Exception as oXcpt:
239 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
240 else:
241 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
242 return sContent;
243
244 def getSvcLog(self):
245 """
246 Tries to read the VBoxSVC log file as it typically not associated with a failing test result.
247 Note! Returns the first VBoxSVC log file we find.
248 """
249 if not self.sSvcLog:
250 aoSvcLogFiles = self.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseSvc);
251 if aoSvcLogFiles:
252 self.sSvcLog = self.getLogFile(aoSvcLogFiles[0]);
253 return self.sSvcLog;
254
255 def getScreenshotSha256(self, oFile):
256 """
257 Tries to read the given screenshot file, uncompress it, and do SHA-2
258 on the raw pixels.
259 Returns SHA-2 digest string on success, None on failure.
260 """
261 (oImgFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
262 try:
263 abImageFile = oImgFile.read();
264 except Exception as oXcpt:
265 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
266 else:
267 try:
268 oImage = Image.open(BytesIO(abImageFile));
269 except Exception as oXcpt:
270 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
271 else:
272 try:
273 oHash = hashlib.sha256();
274 if hasattr(oImage, 'tobytes'):
275 oHash.update(oImage.tobytes());
276 else:
277 oHash.update(oImage.tostring()); # pylint: disable=no-member
278 except Exception as oXcpt:
279 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
280 else:
281 return oHash.hexdigest();
282 return None;
283
284
285
286 def isSingleTestFailure(self):
287 """
288 Figure out if this is a single test failing or if it's one of the
289 more complicated ones.
290 """
291 if self.oTree.cErrors == 1:
292 return True;
293 if self.oTree.deepCountErrorContributers() <= 1:
294 return True;
295 return False;
296
297
298
299class VirtualTestSheriff(object): # pylint: disable=too-few-public-methods
300 """
301 Add build info into Test Manager database.
302 """
303
304 ## The user account for the virtual sheriff.
305 ksLoginName = 'vsheriff';
306
307 def __init__(self):
308 """
309 Parse command line.
310 """
311 self.oDb = None;
312 self.tsNow = None;
313 self.oTestResultLogic = None;
314 self.oTestSetLogic = None;
315 self.oFailureReasonLogic = None; # FailureReasonLogic;
316 self.oTestResultFailureLogic = None; # TestResultFailureLogic
317 self.oLogin = None;
318 self.uidSelf = -1;
319 self.oLogFile = None;
320 self.asBsodReasons = [];
321 self.asUnitTestReasons = [];
322
323 oParser = OptionParser();
324 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
325 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
326 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
327 help = 'Work period specified in hours. Defauls is 2 hours.');
328 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
329 help = 'Whether to commit the findings to the database. Default is a dry run.');
330 oParser.add_option('--testset', dest = 'aidTestSets', metavar = '<id>', default = [], type = 'int', action = 'append',
331 help = 'Only investigate this one. Accumulates IDs when repeated.');
332 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
333 help = 'Quiet execution');
334 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
335 help = 'Where to log messages.');
336 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
337 help = 'Enables debug mode.');
338
339 (self.oConfig, _) = oParser.parse_args();
340
341 if self.oConfig.sLogFile:
342 self.oLogFile = open(self.oConfig.sLogFile, "a");
343 self.oLogFile.write('VirtualTestSheriff: $Revision: 93874 $ \n');
344
345
346 def eprint(self, sText):
347 """
348 Prints error messages.
349 Returns 1 (for exit code usage.)
350 """
351 print('error: %s' % (sText,));
352 if self.oLogFile is not None:
353 if sys.version_info[0] >= 3:
354 self.oLogFile.write(u'error: %s\n' % (sText,));
355 else:
356 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
357 return 1;
358
359 def dprint(self, sText):
360 """
361 Prints debug info.
362 """
363 if self.oConfig.fDebug:
364 if not self.oConfig.fQuiet:
365 print('debug: %s' % (sText, ));
366 if self.oLogFile is not None:
367 if sys.version_info[0] >= 3:
368 self.oLogFile.write(u'debug: %s\n' % (sText,));
369 else:
370 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
371 return 0;
372
373 def vprint(self, sText):
374 """
375 Prints verbose info.
376 """
377 if not self.oConfig.fQuiet:
378 print('info: %s' % (sText,));
379 if self.oLogFile is not None:
380 if sys.version_info[0] >= 3:
381 self.oLogFile.write(u'info: %s\n' % (sText,));
382 else:
383 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
384 return 0;
385
386 def getFailureReason(self, tReason):
387 """ Gets the failure reason object for tReason. """
388 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
389
390 def selfCheck(self):
391 """ Does some self checks, looking up things we expect to be in the database and such. """
392 rcExit = 0;
393 for sAttr in dir(self.__class__):
394 if sAttr.startswith('ktReason_'):
395 tReason = getattr(self.__class__, sAttr);
396 oFailureReason = self.getFailureReason(tReason);
397 if oFailureReason is None:
398 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
399 % (tReason[1], tReason[0],));
400
401 # Check the user account as well.
402 if self.oLogin is None:
403 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
404 if oLogin is None:
405 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
406 return rcExit;
407
408 def sendEmailAlert(self, uidAuthor, sBodyText):
409 """
410 Sends email alert.
411 """
412
413 # Get author email
414 self.oDb.execute('SELECT sEmail FROM Users WHERE uid=%s', (uidAuthor,));
415 sFrom = self.oDb.fetchOne();
416 if sFrom is not None:
417 sFrom = sFrom[0];
418 else:
419 sFrom = g_ksAlertFrom;
420
421 # Gather recipient list.
422 asEmailList = [];
423 for sUser in g_asAlertList:
424 self.oDb.execute('SELECT sEmail FROM Users WHERE sUsername=%s', (sUser,));
425 sEmail = self.oDb.fetchOne();
426 if sEmail:
427 asEmailList.append(sEmail[0]);
428 if not asEmailList:
429 return self.eprint('No email addresses to send alter to!');
430
431 # Compose the message.
432 oMsg = MIMEMultipart();
433 oMsg['From'] = sFrom;
434 oMsg['To'] = COMMASPACE.join(asEmailList);
435 oMsg['Subject'] = g_ksAlertSubject;
436 oMsg.attach(MIMEText(sBodyText, 'plain'))
437
438 # Try send it.
439 try:
440 oSMTP = smtplib.SMTP(g_ksSmtpHost, g_kcSmtpPort);
441 oSMTP.sendmail(sFrom, asEmailList, oMsg.as_string())
442 oSMTP.quit()
443 except smtplib.SMTPException as oXcpt:
444 return self.eprint('Failed to send mail: %s' % (oXcpt,));
445
446 return 0;
447
448 def badTestBoxManagement(self):
449 """
450 Looks for bad test boxes and first tries once to reboot them then disables them.
451 """
452 rcExit = 0;
453
454 #
455 # We skip this entirely if we're running in the past and not in harmless debug mode.
456 #
457 if self.oConfig.cStartHoursAgo != 0 \
458 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
459 return rcExit;
460 tsNow = self.tsNow if self.oConfig.fDebug else None;
461 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
462 oTestBoxLogic = TestBoxLogic(self.oDb);
463
464 #
465 # Generate a list of failures reasons we consider bad-testbox behavior.
466 #
467 aidFailureReasons = [
468 self.getFailureReason(self.ktReason_Host_DriverNotLoaded).idFailureReason,
469 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
470 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
471 self.getFailureReason(self.ktReason_Host_InstallationFailed).idFailureReason,
472 ];
473
474 #
475 # Get list of bad test boxes for given period and check them out individually.
476 #
477 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
478 aidFailureReasons = aidFailureReasons);
479 for idTestBox in aidBadTestBoxes:
480 # Skip if the testbox is already disabled or has a pending reboot command.
481 try:
482 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
483 except Exception as oXcpt:
484 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
485 continue;
486 if not oTestBox.fEnabled:
487 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
488 % ( idTestBox, oTestBox.sName, ));
489 continue;
490 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
491 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
492 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
493 continue;
494
495 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
496 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
497 cOkay = 0;
498 cBad = 0;
499 iFirstOkay = len(aoSets);
500 for iSet, oSet in enumerate(aoSets):
501 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
502 cBad += 1;
503 else:
504 # Check for bad failure reasons.
505 oFailure = None;
506 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
507 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oSet.idTestSet)
508 aoFailedResults = oTree.getListOfFailures();
509 for oFailedResult in aoFailedResults:
510 oFailure = self.oTestResultFailureLogic.getById(oFailedResult.idTestResult);
511 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
512 break;
513 oFailure = None;
514 if oFailure is not None:
515 cBad += 1;
516 else:
517 # This is an okay test result then.
518 ## @todo maybe check the elapsed time here, it could still be a bad run?
519 cOkay += 1;
520 if iFirstOkay > iSet:
521 iFirstOkay = iSet;
522 if iSet > 10:
523 break;
524
525 # We react if there are two or more bad-testbox statuses at the head of the
526 # history and at least three in the last 10 results.
527 if iFirstOkay >= 2 and cBad > 2:
528 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
529 sComment = u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
530 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
531 self.vprint(sComment);
532 self.sendEmailAlert(self.uidSelf, sComment);
533 if self.oConfig.fRealRun is True:
534 try:
535 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
536 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
537 % (iFirstOkay, cBad, cOkay),);
538 except Exception as oXcpt:
539 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
540 else:
541 sComment = u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
542 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
543 self.vprint(sComment);
544 self.sendEmailAlert(self.uidSelf, sComment);
545 if self.oConfig.fRealRun is True:
546 try:
547 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
548 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
549 % (iFirstOkay, cBad, cOkay),);
550 except Exception as oXcpt:
551 rcExit = self.eprint(u'Error rebooting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
552 else:
553 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
554 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
555
556 ## @todo r=bird: review + rewrite;
557 ## - no selecting here, that belongs in the core/*.py files.
558 ## - preserve existing comments.
559 ## - doing way too much in the try/except block.
560 ## - No password quoting in the sshpass command that always fails (127).
561 ## - Timeout is way to low. testboxmem1 need more than 10 min to take a dump, ages to
562 ## get thru POST and another 5 just to time out in grub. Should be an hour or so.
563 ## Besides, it need to be constant elsewhere in the file, not a variable here.
564 ##
565 ##
566 ## Reset hanged testboxes
567 ##
568 #cStatusTimeoutMins = 10;
569 #
570 #self.oDb.execute('SELECT TestBoxStatuses.idTestBox\n'
571 # ' FROM TestBoxStatuses, TestBoxes\n'
572 # ' WHERE TestBoxStatuses.tsUpdated >= (CURRENT_TIMESTAMP - interval \'%s hours\')\n'
573 # ' AND TestBoxStatuses.tsUpdated < (CURRENT_TIMESTAMP - interval \'%s minutes\')\n'
574 # ' AND TestBoxStatuses.idTestBox = TestBoxes.idTestBox\n'
575 # ' AND Testboxes.tsExpire = \'infinity\'::timestamp', (cHoursBack,cStatusTimeoutMins));
576 #for idTestBox in self.oDb.fetchAll():
577 # idTestBox = idTestBox[0];
578 # try:
579 # oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
580 # except Exception as oXcpt:
581 # rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
582 # continue;
583 # # Skip if the testbox is already disabled, already reset or there's no iLOM
584 # if not oTestBox.fEnabled or oTestBox.ipLom is None or oTestBox.sComment is not None and oTestBox.sComment.find('Automatically reset') >= 0:
585 # self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
586 # % ( idTestBox, oTestBox.sName, ));
587 # continue;
588 # ## @todo get iLOM credentials from a table?
589 # sCmd = 'sshpass -p%s ssh -oStrictHostKeyChecking=no root@%s show /SP && reset /SYS' % (g_ksLomPassword, oTestBox.ipLom,);
590 # try:
591 # oPs = subprocess.Popen(sCmd, stdout=subprocess.PIPE, shell=True);
592 # sStdout = oPs.communicate()[0];
593 # iRC = oPs.wait();
594 #
595 # oTestBox.sComment = 'Automatically reset (iRC=%u sStdout=%s)' % (iRC, sStdout,);
596 # oTestBoxLogic.editEntry(oTestBox, self.uidSelf, fCommit = True);
597 #
598 # sComment = u'Reset testbox #%u (%s) - iRC=%u sStduot=%s' % ( idTestBox, oTestBox.sName, iRC, sStdout);
599 # self.vprint(sComment);
600 # self.sendEmailAlert(self.uidSelf, sComment);
601 #
602 # except Exception as oXcpt:
603 # rcExit = self.eprint(u'Error resetting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
604 #
605 return rcExit;
606
607
608 ## @name Failure reasons we know.
609 ## @{
610
611 ktReason_Add_Installer_Win_Failed = ( 'Additions', 'Win GA install' );
612 ktReason_Add_ShFl_Automount = ( 'Additions', 'Automounting' );
613 ktReason_Add_ShFl_FsPerf = ( 'Additions', 'FsPerf' );
614 ktReason_Add_ShFl_FsPerf_Abend = ( 'Additions', 'FsPerf abend' );
615 ktReason_Add_GstCtl_Preparations = ( 'Additions', 'GstCtl preparations' );
616 ktReason_Add_GstCtl_SessionBasics = ( 'Additions', 'Session basics' );
617 ktReason_Add_GstCtl_SessionProcRefs = ( 'Additions', 'Session process' );
618 ktReason_Add_GstCtl_Session_Reboot = ( 'Additions', 'Session reboot' );
619 ktReason_Add_GstCtl_CopyFromGuest_Timeout = ( 'Additions', 'CopyFromGuest timeout' );
620 ktReason_Add_GstCtl_CopyToGuest_Timeout = ( 'Additions', 'CopyToGuest timeout' );
621 ktReason_Add_GstCtl_CopyToGuest_DstEmpty = ( 'Additions', 'CopyToGuest dst empty' );
622 ktReason_Add_GstCtl_CopyToGuest_DstExists = ( 'Additions', 'CopyToGuest dst exists' );
623 ktReason_Add_FlushViewOfFile = ( 'Additions', 'FlushViewOfFile' );
624 ktReason_Add_Mmap_Coherency = ( 'Additions', 'mmap coherency' );
625 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
626 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
627 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
628 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
629 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
630 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
631 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
632 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
633 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
634 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
635 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
636 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
637 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
638 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
639 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
640 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
641 ktReason_Host_InstallationFailed = ( 'Host', 'Installation failed' );
642 ktReason_Host_InstallationWantReboot = ( 'Host', 'Installation want reboot' );
643 ktReason_Host_InvalidPackage = ( 'Host', 'ERROR_INSTALL_PACKAGE_INVALID' );
644 ktReason_Host_InstallSourceAbsent = ( 'Host', 'ERROR_INSTALL_SOURCE_ABSENT' );
645 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
646 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
647 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
648 ktReason_Host_win32com_gen_py = ( 'Host', 'win32com.gen_py' );
649 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
650 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
651 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
652 ktReason_Host_NetworkMisconfiguration = ( 'Host', 'Network misconfiguration' );
653 ktReason_Host_TSTInfo_Accuracy_OOR = ( 'Host', 'TSTInfo accuracy out of range' );
654 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
655 ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND = ( 'Networking', 'VERR_INTNET_FLT_IF_NOT_FOUND' );
656 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
657 ktReason_OSInstall_Udev_hang = ( 'O/S Install', 'udev hang' );
658 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
659 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
660 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
661 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
662 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
663 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
664 ktReason_API_std_bad_alloc = ( 'API / (XP)COM', 'std::bad_alloc' );
665 ktReason_API_Digest_Mismatch = ( 'API / (XP)COM', 'Digest mismatch' );
666 ktReason_API_MoveVM_SharingViolation = ( 'API / (XP)COM', 'MoveVM sharing violation' );
667 ktReason_API_MoveVM_InvalidParameter = ( 'API / (XP)COM', 'MoveVM invalid parameter' );
668 ktReason_API_Open_Session_Failed = ( 'API / (XP)COM', 'Open session failed' );
669 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
670 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
671 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
672 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
673 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
674 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
675 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
676 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
677 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
678 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
679 ktReason_Unknown_VM_Start_Error = ( 'Unknown', 'VM Start Error' );
680 ktReason_Unknown_VM_Runtime_Error = ( 'Unknown', 'VM Runtime Error' );
681 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
682 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
683 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
684 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
685 ktReason_GuestBug_CompizVBoxQt = ( 'Guest Bug', 'Compiz + VirtualBox Qt GUI crash' );
686 ## @}
687
688 ## BSOD category.
689 ksBsodCategory = 'BSOD';
690 ## Special reason indicating that the flesh and blood sheriff has work to do.
691 ksBsodAddNew = 'Add new BSOD';
692
693 ## Unit test category.
694 ksUnitTestCategory = 'Unit';
695 ## Special reason indicating that the flesh and blood sheriff has work to do.
696 ksUnitTestAddNew = 'Add new';
697
698 ## Used for indica that we shouldn't report anything for this test result ID and
699 ## consider promoting the previous error to test set level if it's the only one.
700 ktHarmless = ( 'Probably', 'Caused by previous error' );
701
702
703 def caseClosed(self, oCaseFile):
704 """
705 Reports the findings in the case and closes it.
706 """
707 #
708 # Log it and create a dReasonForReasultId we can use below.
709 #
710 dCommentForResultId = oCaseFile.dCommentForResultId;
711 if oCaseFile.dReasonForResultId:
712 # Must weed out ktHarmless.
713 dReasonForResultId = {};
714 for idKey, tReason in oCaseFile.dReasonForResultId.items():
715 if tReason is not self.ktHarmless:
716 dReasonForResultId[idKey] = tReason;
717 if not dReasonForResultId:
718 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
719 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
720 return False;
721
722 # Try promote to single reason.
723 atValues = dReasonForResultId.values();
724 fSingleReason = True;
725 if len(dReasonForResultId) == 1 and next(iter(dReasonForResultId.keys())) != oCaseFile.oTestSet.idTestResult:
726 self.dprint(u'Promoting single reason to whole set: %s' % (next(iter(atValues)),));
727 elif len(dReasonForResultId) > 1 and len(atValues) == list(atValues).count(next(iter(atValues))):
728 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), next(iter(atValues))));
729 else:
730 fSingleReason = False;
731 if fSingleReason:
732 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(atValues)), };
733 if dCommentForResultId:
734 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(dCommentForResultId.values())), };
735 elif oCaseFile.tReason is not None:
736 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
737 else:
738 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
739 return False;
740
741 self.vprint(u'Closing %s with following reason%s: %s'
742 % ( oCaseFile.sName, 's' if len(dReasonForResultId) > 1 else '', dReasonForResultId, ));
743
744 #
745 # Add the test failure reason record(s).
746 #
747 for idTestResult, tReason in dReasonForResultId.items():
748 oFailureReason = self.getFailureReason(tReason);
749 if oFailureReason is not None:
750 sComment = 'Set by $Revision: 93874 $' # Handy for reverting later.
751 if idTestResult in dCommentForResultId:
752 sComment += ': ' + dCommentForResultId[idTestResult];
753
754 oAdd = TestResultFailureData();
755 oAdd.initFromValues(idTestResult = idTestResult,
756 idFailureReason = oFailureReason.idFailureReason,
757 uidAuthor = self.uidSelf,
758 idTestSet = oCaseFile.oTestSet.idTestSet,
759 sComment = sComment,);
760 if self.oConfig.fRealRun:
761 try:
762 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
763 except Exception as oXcpt:
764 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
765 % (oXcpt, oAdd, oCaseFile.sLongName,));
766 else:
767 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
768 return True;
769
770 #
771 # Tools for assiting log parsing.
772 #
773
774 @staticmethod
775 def matchFollowedByLines(sStr, off, asFollowingLines):
776 """ Worker for isThisFollowedByTheseLines. """
777
778 # Advance off to the end of the line.
779 off = sStr.find('\n', off);
780 if off < 0:
781 return False;
782 off += 1;
783
784 # Match each string with the subsequent lines.
785 for iLine, sLine in enumerate(asFollowingLines):
786 offEnd = sStr.find('\n', off);
787 if offEnd < 0:
788 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
789 if sLine and sStr.find(sLine, off, offEnd) < 0:
790 return False;
791
792 # next line.
793 off = offEnd + 1;
794
795 return True;
796
797 @staticmethod
798 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
799 """
800 Looks for a line contining sFirst which is then followed by lines
801 with the strings in asFollowingLines. (No newline chars anywhere!)
802 Returns True / False.
803 """
804 off = sStr.find(sFirst, 0);
805 while off >= 0:
806 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
807 return True;
808 off = sStr.find(sFirst, off + 1);
809 return False;
810
811 @staticmethod
812 def findAndReturnRestOfLine(sHaystack, sNeedle):
813 """
814 Looks for sNeedle in sHaystack.
815 Returns The text following the needle up to the end of the line.
816 Returns None if not found.
817 """
818 if sHaystack is None:
819 return None;
820 off = sHaystack.find(sNeedle);
821 if off < 0:
822 return None;
823 off += len(sNeedle)
824 offEol = sHaystack.find('\n', off);
825 if offEol < 0:
826 offEol = len(sHaystack);
827 return sHaystack[off:offEol]
828
829 @staticmethod
830 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
831 """
832 Looks for sNeedle in zeroe or more haystacks (asHaystack).
833 Returns The text following the first needed found up to the end of the line.
834 Returns None if not found.
835 """
836 for sHaystack in asHaystacks:
837 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
838 if sRet is not None:
839 return sRet;
840 return None;
841
842
843 #
844 # The investigative units.
845 #
846
847 katSimpleInstallUninstallMainLogReasons = [
848 # ( Whether to stop on hit, reason tuple, needle text. )
849 ( False, ktReason_Host_LeftoverService,
850 'SERVICE_NAME: vbox' ),
851 ( False, ktReason_Host_LeftoverService,
852 'Seems installation was skipped. Old version lurking behind? Not the fault of this build/test run!'),
853 ];
854
855 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
856 'darwin': [
857 # ( Whether to stop on hit, reason tuple, needle text. )
858 ( True, ktReason_Host_DriverNotUnloading,
859 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
860 ],
861 'linux': [
862 # ( Whether to stop on hit, reason tuple, needle text. )
863 ( True, ktReason_Host_DriverNotCompilable,
864 'This system is not currently set up to build kernel modules' ),
865 ( True, ktReason_Host_DriverNotCompilable,
866 'This system is currently not set up to build kernel modules' ),
867 ( True, ktReason_Host_InstallationFailed,
868 'vboxdrv.sh: failed: Look at /var/log/vbox-install.log to find out what went wrong.' ),
869 ( True, ktReason_Host_DriverNotUnloading,
870 'Cannot unload module vboxdrv'),
871 ],
872 'solaris': [
873 # ( Whether to stop on hit, reason tuple, needle text. )
874 ( True, ktReason_Host_DriverNotUnloading, 'can\'t unload the module: Device busy' ),
875 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: Host module ...FAILED!' ),
876 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: NetFilter (Crossbow) module ...FAILED!' ),
877 ( True, ktReason_Host_InstallationFailed, 'svcadm: Couldn\'t bind to svc.configd.' ),
878 ( True, ktReason_Host_InstallationFailed, 'pkgadd: ERROR: postinstall script did not complete successfully' ),
879 ],
880 'win': [
881 # ( Whether to stop on hit, reason tuple, needle text. )
882 ( True, ktReason_Host_InstallationWantReboot, 'ERROR_SUCCESS_REBOOT_REQUIRED' ),
883 ( False, ktReason_Host_InstallationFailed, 'Installation error.' ),
884 ( True, ktReason_Host_InvalidPackage, 'Uninstaller failed, exit code: 1620' ),
885 ( True, ktReason_Host_InstallSourceAbsent, 'Uninstaller failed, exit code: 1612' ),
886 ],
887 };
888
889
890 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
891 """
892 Investigates an install or uninstall failure.
893
894 We lump the two together since the installation typically also performs
895 an uninstall first and will be seeing similar issues to the uninstall.
896 """
897 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
898
899 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
900 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
901 return True;
902
903 atSimple = self.katSimpleInstallUninstallMainLogReasons;
904 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
905 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
906
907 fFoundSomething = False;
908 for fStopOnHit, tReason, sNeedle in atSimple:
909 if sResultLog.find(sNeedle) > 0:
910 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
911 if fStopOnHit:
912 return True;
913 fFoundSomething = True;
914
915 return fFoundSomething if fFoundSomething else None;
916
917
918 def investigateBadTestBox(self, oCaseFile):
919 """
920 Checks out bad-testbox statuses.
921 """
922 _ = oCaseFile;
923 return False;
924
925
926 def investigateVBoxUnitTest(self, oCaseFile):
927 """
928 Checks out a VBox unittest problem.
929 """
930
931 #
932 # Process simple test case failures first, using their name as reason.
933 # We do the reason management just like for BSODs.
934 #
935 cRelevantOnes = 0;
936 sMainLog = oCaseFile.getMainLog();
937 aoFailedResults = oCaseFile.oTree.getListOfFailures();
938 for oFailedResult in aoFailedResults:
939 if oFailedResult is oCaseFile.oTree:
940 self.vprint('TODO: toplevel failure');
941 cRelevantOnes += 1
942
943 elif oFailedResult.sName == 'Installing VirtualBox':
944 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
945 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
946 cRelevantOnes += 1
947
948 elif oFailedResult.sName == 'Uninstalling VirtualBox':
949 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
950 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
951 cRelevantOnes += 1
952
953 elif oFailedResult.oParent is not None:
954 # Get the 2nd level node because that's where we'll find the unit test name.
955 while oFailedResult.oParent.oParent is not None:
956 oFailedResult = oFailedResult.oParent;
957
958 # Only report a failure once.
959 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
960 sKey = oFailedResult.sName;
961 if sKey.startswith('testcase/'):
962 sKey = sKey[9:];
963 if sKey in self.asUnitTestReasons:
964 tReason = ( self.ksUnitTestCategory, sKey );
965 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
966 else:
967 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
968 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
969 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
970 cRelevantOnes += 1
971 else:
972 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
973
974 #
975 # If we've caught all the relevant ones by now, report the result.
976 #
977 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
978 return self.caseClosed(oCaseFile);
979 return False;
980
981 def extractGuestCpuStack(self, sInfoText):
982 """
983 Extracts the guest CPU stacks from the input file.
984
985 Returns a dictionary keyed by the CPU number, value being a list of
986 raw stack lines (no header).
987 Returns empty dictionary if no stacks where found.
988 """
989 dRet = {};
990 off = 0;
991 while True:
992 # Find the stack.
993 offStart = sInfoText.find('=== start guest stack VCPU ', off);
994 if offStart < 0:
995 break;
996 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
997 if offEnd >= 0:
998 offEnd += 3;
999 else:
1000 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
1001 if offEnd < 0:
1002 offEnd = len(sInfoText);
1003
1004 sStack = sInfoText[offStart : offEnd];
1005 sStack = sStack.replace('\r',''); # paranoia
1006 asLines = sStack.split('\n');
1007
1008 # Figure the CPU.
1009 asWords = asLines[0].split();
1010 if len(asWords) < 6 or not asWords[5].isdigit():
1011 break;
1012 iCpu = int(asWords[5]);
1013
1014 # Add it and advance.
1015 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
1016 off = offEnd;
1017 return dRet;
1018
1019 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
1020 """ Investigates kvm_lock_spinning deadlocks """
1021 #
1022 # Extract the stacks. We need more than one CPU to create a deadlock.
1023 #
1024 dStacks = self.extractGuestCpuStack(sInfoText);
1025 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
1026 if len(dStacks) >= 2:
1027 #
1028 # Examin each of the stacks. Each must have kvm_lock_spinning in
1029 # one of the first three entries.
1030 #
1031 cHits = 0;
1032 for iCpu in dStacks:
1033 asBacktrace = dStacks[iCpu];
1034 for iFrame in xrange(min(3, len(asBacktrace))):
1035 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
1036 cHits += 1;
1037 break;
1038 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
1039 if cHits == len(dStacks):
1040 return (True, self.ktReason_VMM_kvm_lock_spinning);
1041
1042 _ = dLogs; _ = oCaseFile;
1043 return (False, None);
1044
1045 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
1046 """ Investigates HalReturnToFirmware hangs """
1047 del oCaseFile
1048 del sInfoText
1049 del dLogs
1050 # hope that's sufficient
1051 return (True, self.ktReason_Unknown_HalReturnToFirmware);
1052
1053 ## Things we search a main or VM log for to figure out why something went bust.
1054 ## @note DO NOT ADD MORE STUFF HERE!
1055 ## Please use katSimpleMainLogReasons and katSimpleVmLogReasons instead!
1056 katSimpleMainAndVmLogReasonsDeprecated = [
1057 # ( Whether to stop on hit, reason tuple, needle text. )
1058 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
1059 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
1060 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
1061 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
1062 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
1063 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
1064 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
1065 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
1066 ( True, ktReason_Networking_Nonexistent_host_nic,
1067 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1068 ( True, ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND,
1069 'Failed to attach the network LUN (VERR_INTNET_FLT_IF_NOT_FOUND)' ),
1070 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
1071 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1072 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1073 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1074 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
1075 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
1076 ( True, ktReason_Unknown_File_Not_Found,
1077 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
1078 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
1079 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
1080 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
1081 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
1082 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
1083 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
1084 #( False, ktReason_Unknown_VM_Start_Error, 'VMSetError: ' ), - false positives for stuff like:
1085 # "VMSetError: VD: Backend 'VBoxIsoMaker' does not support async I/O"
1086 ( False, ktReason_Unknown_VM_Start_Error, 'error: failed to open session for' ),
1087 ( False, ktReason_Unknown_VM_Runtime_Error, 'Console: VM runtime error: fatal=true' ),
1088 ];
1089
1090 ## This we search a main log for to figure out why something went bust.
1091 katSimpleMainLogReasons = [
1092 # ( Whether to stop on hit, reason tuple, needle text. )
1093 ( False, ktReason_Host_win32com_gen_py, 'ModuleNotFoundError: No module named \'win32com.gen_py' ),
1094
1095 ];
1096
1097 ## This we search a VM log for to figure out why something went bust.
1098 katSimpleVmLogReasons = [
1099 # ( Whether to stop on hit, reason tuple, needle text. )
1100 ];
1101
1102 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
1103 katSimpleVBoxHardeningLogReasons = [
1104 # ( Whether to stop on hit, reason tuple, needle text. )
1105 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
1106 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
1107 ( True, ktReason_Host_TSTInfo_Accuracy_OOR, 'RTCRTSPTSTINFO::Accuracy::Millis: Out of range' ),
1108 ( False, ktReason_Unknown_VM_Crash, 'Quitting: ExitCode=0xc0000005 (rcNtWait=' ),
1109 ];
1110
1111 ## Things we search a kernel.log file for to figure out why something went bust.
1112 katSimpleKernelLogReasons = [
1113 # ( Whether to stop on hit, reason tuple, needle text. )
1114 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
1115 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn\'t work' ),
1116 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
1117 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libQt5CoreVBox' ),
1118 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libgtk-3' ),
1119 ];
1120
1121 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
1122 katSimpleVgaTextReasons = [
1123 # ( Whether to stop on hit, reason tuple, needle text. )
1124 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1125 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
1126 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1127 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
1128 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
1129 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
1130 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
1131 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
1132 "and send a report. Then try booting with the 'noapic' option\n"
1133 "\n" ),
1134 ( True, ktReason_OSInstall_GRUB_hang,
1135 "-----\nGRUB Loading stage2..\n\n\n\n" ),
1136 ( True, ktReason_OSInstall_GRUB_hang,
1137 "-----\nGRUB Loading stage2...\n\n\n\n" ), # the 3 dot hang appears to be less frequent
1138 ( True, ktReason_OSInstall_GRUB_hang,
1139 "-----\nGRUB Loading stage2....\n\n\n\n" ), # the 4 dot hang appears to be very infrequent
1140 ( True, ktReason_OSInstall_GRUB_hang,
1141 "-----\nGRUB Loading stage2.....\n\n\n\n" ), # the 5 dot hang appears to be more frequent again
1142 ( True, ktReason_OSInstall_Udev_hang,
1143 "\nStarting udev:\n\n\n\n" ),
1144 ( True, ktReason_OSInstall_Udev_hang,
1145 "\nStarting udev:\n------" ),
1146 ( True, ktReason_Panic_BootManagerC000000F,
1147 "Windows failed to start. A recent hardware or software change might be the" ),
1148 ( True, ktReason_BootManager_Image_corrupt,
1149 "BOOTMGR image is corrupt. The system cannot boot." ),
1150 ];
1151
1152 ## Things we search for in the info.txt file. Require handlers for now.
1153 katInfoTextHandlers = [
1154 # ( Trigger text, handler method )
1155 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
1156 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
1157 ];
1158
1159 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
1160 katSimpleScreenshotHashReasons = [
1161 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
1162 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
1163 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
1164 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
1165 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
1166 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
1167 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
1168 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
1169 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
1170 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
1171 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
1172 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
1173 ];
1174
1175
1176 def scanLog(self, asLogs, atNeedles, oCaseFile, idTestResult):
1177 """
1178 Scans for atNeedles in sLog.
1179
1180 Returns True if a stop-on-hit neelde was found.
1181 Returns None if a no-stop reason was found.
1182 Returns False if no hit.
1183 """
1184 fRet = False;
1185 for fStopOnHit, tReason, oNeedle in atNeedles:
1186 fMatch = False;
1187 if utils.isString(oNeedle):
1188 for sLog in asLogs:
1189 if sLog:
1190 fMatch |= sLog.find(oNeedle) > 0;
1191 else:
1192 for sLog in asLogs:
1193 if sLog:
1194 fMatch |= oNeedle.search(sLog) is not None;
1195 if fMatch:
1196 oCaseFile.noteReasonForId(tReason, idTestResult);
1197 if fStopOnHit:
1198 return True;
1199 fRet = None;
1200 return fRet;
1201
1202
1203 def investigateGATest(self, oCaseFile, oFailedResult, sResultLog):
1204 """
1205 Investigates a failed VM run.
1206 """
1207 enmReason = None;
1208 sParentName = oFailedResult.oParent.sName if oFailedResult.oParent else '';
1209 if oFailedResult.sName == 'VBoxWindowsAdditions.exe' or sResultLog.find('VBoxWindowsAdditions.exe" failed with') > 0:
1210 enmReason = self.ktReason_Add_Installer_Win_Failed;
1211 # guest control:
1212 elif sParentName == 'Guest Control' and oFailedResult.sName == 'Preparations':
1213 enmReason = self.ktReason_Add_GstCtl_Preparations;
1214 elif oFailedResult.sName == 'Session Basics':
1215 enmReason = self.ktReason_Add_GstCtl_SessionBasics;
1216 elif oFailedResult.sName == 'Session Process References':
1217 enmReason = self.ktReason_Add_GstCtl_SessionProcRefs;
1218 elif oFailedResult.sName == 'Copy from guest':
1219 if sResultLog.find('*** abort action ***') >= 0:
1220 enmReason = self.ktReason_Add_GstCtl_CopyFromGuest_Timeout;
1221 elif oFailedResult.sName == 'Copy to guest':
1222 off = sResultLog.find('"Guest directory "');
1223 if off > 0 and sResultLog.find('" already exists"', off, off + 80):
1224 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_DstExists;
1225 elif sResultLog.find('Guest destination must not be empty') >= 0:
1226 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_DstEmpty;
1227 elif sResultLog.find('*** abort action ***') >= 0:
1228 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_Timeout;
1229 elif oFailedResult.sName.find('Session w/ Guest Reboot') >= 0:
1230 enmReason = self.ktReason_Add_GstCtl_Session_Reboot;
1231 # shared folders:
1232 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Automounting':
1233 enmReason = self.ktReason_Add_ShFl_Automount;
1234 elif oFailedResult.sName == 'mmap':
1235 if sResultLog.find('FsPerf: Flush issue at offset ') >= 0:
1236 enmReason = self.ktReason_Add_Mmap_Coherency;
1237 elif sResultLog.find('FlushViewOfFile') >= 0:
1238 enmReason = self.ktReason_Add_FlushViewOfFile;
1239 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Running FsPerf':
1240 enmReason = self.ktReason_Add_ShFl_FsPerf; ## Maybe it would be better to be more specific...
1241
1242 if enmReason is not None:
1243 return oCaseFile.noteReasonForId(enmReason, oFailedResult.idTestResult);
1244
1245 self.vprint(u'TODO: Cannot place GA failure idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1246 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1247 return False;
1248
1249 def isResultFromGATest(self, oCaseFile, oFailedResult):
1250 """
1251 Checks if this result and corresponding log snippet looks like a GA test run.
1252 """
1253 while oFailedResult is not None:
1254 if oFailedResult.sName in [ 'Guest Control', 'Shared Folders', 'FsPerf', 'VBoxWindowsAdditions.exe' ]:
1255 return True;
1256 if oCaseFile.oTestCase.sName == 'Guest Additions' and oFailedResult.sName in [ 'Install', ]:
1257 return True;
1258 oFailedResult = oFailedResult.oParent;
1259 return False;
1260
1261
1262 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
1263 """
1264 Investigates a failed VM run.
1265 """
1266
1267 def investigateLogSet():
1268 """
1269 Investigates the current set of VM related logs.
1270 """
1271 self.dprint('investigateLogSet: log lengths: result %u, VM %u, kernel %u, vga text %u, info text %u, hard %u'
1272 % ( len(sResultLog if sResultLog else ''),
1273 len(sVMLog if sVMLog else ''),
1274 len(sKrnlLog if sKrnlLog else ''),
1275 len(sVgaText if sVgaText else ''),
1276 len(sInfoText if sInfoText else ''),
1277 len(sNtHardLog if sNtHardLog else ''),));
1278
1279 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
1280 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
1281 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
1282 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
1283 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
1284 #self.dprint(u'hard.txt<<<\n%s\n<<<\n' % (sNtHardLog,));
1285
1286 # TODO: more
1287
1288 #
1289 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
1290 #
1291 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
1292 'GIM: HyperV: Guest indicates a fatal condition! P0=');
1293 if sDetails is not None:
1294 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
1295 sKey = sDetails.split(' ', 1)[0];
1296 try: sKey = '0x%08X' % (int(sKey, 16),);
1297 except: pass;
1298 if sKey in self.asBsodReasons:
1299 tReason = ( self.ksBsodCategory, sKey );
1300 elif sKey.lower() in self.asBsodReasons: # just in case.
1301 tReason = ( self.ksBsodCategory, sKey.lower() );
1302 else:
1303 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
1304 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
1305 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
1306
1307 fFoundSomething = False;
1308
1309 #
1310 # Look for linux panic.
1311 #
1312 if sKrnlLog is not None:
1313 fRet = self.scanLog([sKrnlLog,], self.katSimpleKernelLogReasons, oCaseFile, oFailedResult.idTestResult);
1314 if fRet is True:
1315 return fRet;
1316 fFoundSomething |= fRet is None;
1317
1318 #
1319 # Loop thru the simple stuff.
1320 #
1321
1322 # Main log.
1323 fRet = self.scanLog([sResultLog,], self.katSimpleMainLogReasons, oCaseFile, oFailedResult.idTestResult);
1324 if fRet is True:
1325 return fRet;
1326 fFoundSomething |= fRet is None;
1327
1328 # VM log.
1329 fRet = self.scanLog([sVMLog,], self.katSimpleVmLogReasons, oCaseFile, oFailedResult.idTestResult);
1330 if fRet is True:
1331 return fRet;
1332 fFoundSomething |= fRet is None;
1333
1334 # Old main + vm log.
1335 fRet = self.scanLog([sResultLog, sVMLog], self.katSimpleMainAndVmLogReasonsDeprecated,
1336 oCaseFile, oFailedResult.idTestResult);
1337 if fRet is True:
1338 return fRet;
1339 fFoundSomething |= fRet is None;
1340
1341 # Continue with vga text.
1342 if sVgaText:
1343 fRet = self.scanLog([sVgaText,], self.katSimpleVgaTextReasons, oCaseFile, oFailedResult.idTestResult);
1344 if fRet is True:
1345 return fRet;
1346 fFoundSomething |= fRet is None;
1347
1348 # Continue with screen hashes.
1349 if sScreenHash is not None:
1350 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1351 if sScreenHash == sHash:
1352 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1353 if fStopOnHit:
1354 return True;
1355 fFoundSomething = True;
1356
1357 # Check VBoxHardening.log.
1358 if sNtHardLog is not None:
1359 fRet = self.scanLog([sNtHardLog,], self.katSimpleVBoxHardeningLogReasons, oCaseFile, oFailedResult.idTestResult);
1360 if fRet is True:
1361 return fRet;
1362 fFoundSomething |= fRet is None;
1363
1364 #
1365 # Complicated stuff.
1366 #
1367 dLogs = {
1368 'sVMLog': sVMLog,
1369 'sNtHardLog': sNtHardLog,
1370 'sScreenHash': sScreenHash,
1371 'sKrnlLog': sKrnlLog,
1372 'sVgaText': sVgaText,
1373 'sInfoText': sInfoText,
1374 };
1375
1376 # info.txt.
1377 if sInfoText:
1378 for sNeedle, fnHandler in self.katInfoTextHandlers:
1379 if sInfoText.find(sNeedle) > 0:
1380 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1381 if tReason is not None:
1382 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1383 if fStop:
1384 return True;
1385 fFoundSomething = True;
1386
1387 #
1388 # Check for repeated reboots...
1389 #
1390 if sVMLog is not None:
1391 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1392 if cResets > 10:
1393 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1394 sComment = 'Counted %s reboots' % (cResets,));
1395
1396 return fFoundSomething;
1397
1398 #
1399 # Check if we got any VM or/and kernel logs. Treat them as sets in
1400 # case we run multiple VMs here (this is of course ASSUMING they
1401 # appear in the order that terminateVmBySession uploads them).
1402 #
1403 cTimes = 0;
1404 sVMLog = None;
1405 sNtHardLog = None;
1406 sScreenHash = None;
1407 sKrnlLog = None;
1408 sVgaText = None;
1409 sInfoText = None;
1410 for oFile in oFailedResult.aoFiles:
1411 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1412 if 'VBoxHardening.log' not in oFile.sFile:
1413 if sVMLog is not None:
1414 if investigateLogSet() is True:
1415 return True;
1416 cTimes += 1;
1417 sInfoText = None;
1418 sVgaText = None;
1419 sKrnlLog = None;
1420 sScreenHash = None;
1421 sNtHardLog = None;
1422 sVMLog = oCaseFile.getLogFile(oFile);
1423 else:
1424 sNtHardLog = oCaseFile.getLogFile(oFile);
1425 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1426 sKrnlLog = oCaseFile.getLogFile(oFile);
1427 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1428 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1429 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1430 sInfoText = oCaseFile.getLogFile(oFile);
1431 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1432 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1433 if sScreenHash is not None:
1434 sScreenHash = sScreenHash.lower();
1435 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1436
1437 if ( sVMLog is not None \
1438 or sNtHardLog is not None \
1439 or cTimes == 0) \
1440 and investigateLogSet() is True:
1441 return True;
1442
1443 return None;
1444
1445 def isResultFromVMRun(self, oFailedResult, sResultLog):
1446 """
1447 Checks if this result and corresponding log snippet looks like a VM run.
1448 """
1449
1450 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1451 if sResultLog.find(' startVm') > 0:
1452 return True;
1453
1454 # Any other indicators? No?
1455 _ = oFailedResult;
1456 return False;
1457
1458
1459 ## Things we search a VBoxSVC log for to figure out why something went bust.
1460 katSimpleSvcLogReasons = [
1461 # ( Whether to stop on hit, reason tuple, needle text. )
1462 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* exited normally: -1073741819 \(0xc0000005\)') ),
1463 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* was signalled: 11 \(0xb\)') ),
1464 ];
1465
1466 def investigateSvcLogForVMRun(self, oCaseFile, sSvcLog):
1467 """
1468 Check the VBoxSVC log for a single VM run.
1469 """
1470 if sSvcLog:
1471 fRet = self.scanLog([sSvcLog,], self.katSimpleSvcLogReasons, oCaseFile, oCaseFile.oTree.idTestResult);
1472 if fRet is True or fRet is None:
1473 return True;
1474 return False;
1475
1476 def investigateNtHardLogForVMRun(self, oCaseFile):
1477 """
1478 Check if the hardening log for a single VM run contains VM crash indications.
1479 """
1480 aoLogFiles = oCaseFile.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseVm);
1481 for oLogFile in aoLogFiles:
1482 if oLogFile.sFile.find('VBoxHardening.log') >= 0:
1483 sLog = oCaseFile.getLogFile(oLogFile);
1484 if sLog.find('Quitting: ExitCode=0xc0000005') >= 0:
1485 return oCaseFile.noteReasonForId(self.ktReason_Unknown_VM_Crash, oCaseFile.oTree.idTestResult);
1486 return False;
1487
1488
1489 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1490 """
1491 Checks out a VBox VM test.
1492
1493 This is generic investigation of a test running one or more VMs, like
1494 for example a smoke test or a guest installation test.
1495
1496 The fSingleVM parameter is a hint, which probably won't come in useful.
1497 """
1498 _ = fSingleVM;
1499
1500 #
1501 # Get a list of test result failures we should be looking into and the main log.
1502 #
1503 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1504 sMainLog = oCaseFile.getMainLog();
1505
1506 #
1507 # There are a set of errors ending up on the top level result record.
1508 # Should deal with these first.
1509 #
1510 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1511 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1512 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1513 # ignore other failures in the test if we're not a little bit careful here.
1514 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1515 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1516 return self.caseClosed(oCaseFile);
1517
1518 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1519 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1520 (' Timeout: ', ' Attempting to abort child...',) ):
1521 if sMainLog.find('*** glibc detected *** /') > 0:
1522 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1523 else:
1524 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1525 return self.caseClosed(oCaseFile);
1526
1527 # Look for heap corruption without visible hang.
1528 if sMainLog.find('*** glibc detected *** /') > 0 \
1529 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1530 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1531 return self.caseClosed(oCaseFile);
1532
1533 # Out of memory w/ timeout.
1534 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1535 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1536 return self.caseClosed(oCaseFile);
1537
1538 # Stale files like vts_rm.exe (windows).
1539 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1540 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1541 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1542 return self.caseClosed(oCaseFile);
1543
1544 #
1545 # XPCOM screwup
1546 #
1547 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1548 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1549 return self.caseClosed(oCaseFile);
1550
1551 #
1552 # Go thru each failed result.
1553 #
1554 for oFailedResult in aoFailedResults:
1555 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1556 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1557 if oFailedResult.sName == 'Installing VirtualBox':
1558 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1559
1560 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1561 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1562
1563 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1564 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1565
1566 elif self.isResultFromGATest(oCaseFile, oFailedResult):
1567 self.investigateGATest(oCaseFile, oFailedResult, sResultLog);
1568
1569 elif sResultLog.find('most likely not unique') > 0:
1570 oCaseFile.noteReasonForId(self.ktReason_Host_NetworkMisconfiguration, oFailedResult.idTestResult)
1571 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1572 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1573
1574 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1575 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1576 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1577
1578 elif sResultLog.find('** error: no action was specified') > 0 \
1579 or sResultLog.find('(len(self._asXml, asText))') > 0:
1580 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1581
1582 else:
1583 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1584 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1585
1586 #
1587 # Windows python/com screwup.
1588 #
1589 if sMainLog.find('ModuleNotFoundError: No module named \'win32com.gen_py') > 0:
1590 oCaseFile.noteReason(self.ktReason_Host_win32com_gen_py);
1591 return self.caseClosed(oCaseFile);
1592
1593 #
1594 # Check VBoxSVC.log and VBoxHardening.log for VM crashes if inconclusive on single VM runs.
1595 #
1596 if fSingleVM and len(oCaseFile.dReasonForResultId) < len(aoFailedResults):
1597 self.dprint(u'Got %u out of %u - checking VBoxSVC.log...'
1598 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1599 if self.investigateSvcLogForVMRun(oCaseFile, oCaseFile.getSvcLog()):
1600 return self.caseClosed(oCaseFile);
1601 if self.investigateNtHardLogForVMRun(oCaseFile):
1602 return self.caseClosed(oCaseFile);
1603
1604 #
1605 # Report home and close the case if we got them all, otherwise log it.
1606 #
1607 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1608 return self.caseClosed(oCaseFile);
1609
1610 if oCaseFile.dReasonForResultId:
1611 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1612 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1613 else:
1614 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1615 return False;
1616
1617
1618 ## Things we search a main log for to figure out why something in the API test went bust.
1619 katSimpleApiMainLogReasons = [
1620 # ( Whether to stop on hit, reason tuple, needle text. )
1621 ( True, ktReason_Networking_Nonexistent_host_nic,
1622 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1623 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1624 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1625 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1626 ( True, ktReason_API_Digest_Mismatch, 'Digest mismatch (VERR_NOT_EQUAL)' ),
1627 ( True, ktReason_API_MoveVM_SharingViolation, 'rc=VBOX_E_IPRT_ERROR text="Could not copy the log file ' ),
1628 ( True, ktReason_API_MoveVM_InvalidParameter,
1629 'rc=VBOX_E_IPRT_ERROR text="Could not copy the setting file ' ),
1630 ( True, ktReason_API_Open_Session_Failed, 'error: failed to open session for' ),
1631 ];
1632
1633 def investigateVBoxApiTest(self, oCaseFile):
1634 """
1635 Checks out a VBox API test.
1636 """
1637
1638 #
1639 # Get a list of test result failures we should be looking into and the main log.
1640 #
1641 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1642 sMainLog = oCaseFile.getMainLog();
1643
1644 #
1645 # Go thru each failed result.
1646 #
1647 for oFailedResult in aoFailedResults:
1648 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1649 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1650 if oFailedResult.sName == 'Installing VirtualBox':
1651 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1652
1653 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1654 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1655
1656 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1657 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1658
1659 else:
1660 fFoundSomething = False;
1661 for fStopOnHit, tReason, sNeedle in self.katSimpleApiMainLogReasons:
1662 if sResultLog.find(sNeedle) > 0:
1663 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1664 fFoundSomething = True;
1665 if fStopOnHit:
1666 break;
1667 if fFoundSomething:
1668 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1669 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1670
1671 #
1672 # Report home and close the case if we got them all, otherwise log it.
1673 #
1674 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1675 return self.caseClosed(oCaseFile);
1676
1677 if oCaseFile.dReasonForResultId:
1678 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1679 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1680 else:
1681 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1682 return False;
1683
1684
1685 def reasoningFailures(self):
1686 """
1687 Guess the reason for failures.
1688 """
1689 #
1690 # Get a list of failed test sets without any assigned failure reason.
1691 #
1692 cGot = 0;
1693 if not self.oConfig.aidTestSets:
1694 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack,
1695 tsNow = self.tsNow);
1696 else:
1697 aoTestSets = [self.oTestSetLogic.getById(idTestSet) for idTestSet in self.oConfig.aidTestSets];
1698 for oTestSet in aoTestSets:
1699 self.dprint(u'----------------------------------- #%u, status %s -----------------------------------'
1700 % ( oTestSet.idTestSet, oTestSet.enmStatus,));
1701
1702 #
1703 # Open a case file and assign it to the right investigator.
1704 #
1705 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1706 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1707 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1708 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1709 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1710
1711 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1712
1713 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1714 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1715 fRc = self.investigateBadTestBox(oCaseFile);
1716
1717 elif oCaseFile.isVBoxUnitTest():
1718 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1719 fRc = self.investigateVBoxUnitTest(oCaseFile);
1720
1721 elif oCaseFile.isVBoxInstallTest() or oCaseFile.isVBoxUnattendedInstallTest():
1722 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1723 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1724
1725 elif oCaseFile.isVBoxUSBTest():
1726 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1727 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1728
1729 elif oCaseFile.isVBoxStorageTest():
1730 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1731 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1732
1733 elif oCaseFile.isVBoxGAsTest():
1734 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1735 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1736
1737 elif oCaseFile.isVBoxAPITest():
1738 self.dprint(u'investigateVBoxApiTest is taking over %s.' % (oCaseFile.sLongName,));
1739 fRc = self.investigateVBoxApiTest(oCaseFile);
1740
1741 elif oCaseFile.isVBoxBenchmarkTest():
1742 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1743 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1744
1745 elif oCaseFile.isVBoxSmokeTest():
1746 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1747 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1748
1749 elif oCaseFile.isVBoxSerialTest():
1750 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1751 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1752
1753 else:
1754 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1755 fRc = False;
1756 cGot += fRc is True;
1757
1758 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1759 return 0;
1760
1761
1762 def main(self):
1763 """
1764 The 'main' function.
1765 Return exit code (0, 1, etc).
1766 """
1767 # Database stuff.
1768 self.oDb = TMDatabaseConnection()
1769 self.oTestResultLogic = TestResultLogic(self.oDb);
1770 self.oTestSetLogic = TestSetLogic(self.oDb);
1771 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1772 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1773 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1774 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1775
1776 # Get a fix on our 'now' before we do anything..
1777 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1778 self.tsNow = self.oDb.fetchOne();
1779
1780 # If we're suppost to commit anything we need to get our user ID.
1781 rcExit = 0;
1782 if self.oConfig.fRealRun:
1783 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1784 if self.oLogin is None:
1785 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1786 else:
1787 self.uidSelf = self.oLogin.uid;
1788
1789 #
1790 # Do the stuff.
1791 #
1792 if rcExit == 0:
1793 rcExit = self.selfCheck();
1794 if rcExit == 0:
1795 rcExit = self.badTestBoxManagement();
1796 rcExit2 = self.reasoningFailures();
1797 if rcExit == 0:
1798 rcExit = rcExit2;
1799 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1800 if rcExit == 0:
1801 rcExit = self.badTestBoxManagement();
1802
1803 # Cleanup.
1804 self.oFailureReasonLogic = None;
1805 self.oTestResultFailureLogic = None;
1806 self.oTestSetLogic = None;
1807 self.oTestResultLogic = None;
1808 self.oDb.close();
1809 self.oDb = None;
1810 if self.oLogFile is not None:
1811 self.oLogFile.close();
1812 self.oLogFile = None;
1813 return rcExit;
1814
1815if __name__ == '__main__':
1816 sys.exit(VirtualTestSheriff().main());
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette