parallel-vm.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. #!/usr/bin/env python2
  2. #
  3. # Parallel VM test case executor
  4. # Copyright (c) 2014-2015, Jouni Malinen <j@w1.fi>
  5. #
  6. # This software may be distributed under the terms of the BSD license.
  7. # See README for more details.
  8. import curses
  9. import fcntl
  10. import logging
  11. import os
  12. import subprocess
  13. import sys
  14. import time
  15. logger = logging.getLogger()
  16. # Test cases that take significantly longer time to execute than average.
  17. long_tests = [ "ap_roam_open",
  18. "wpas_mesh_password_mismatch_retry",
  19. "wpas_mesh_password_mismatch",
  20. "hostapd_oom_wpa2_psk_connect",
  21. "ap_hs20_fetch_osu_stop",
  22. "ap_roam_wpa2_psk",
  23. "ibss_wpa_none_ccmp",
  24. "nfc_wps_er_handover_pk_hash_mismatch_sta",
  25. "go_neg_peers_force_diff_freq",
  26. "p2p_cli_invite",
  27. "sta_ap_scan_2b",
  28. "ap_pmf_sta_unprot_deauth_burst",
  29. "ap_bss_add_remove_during_ht_scan",
  30. "wext_scan_hidden",
  31. "autoscan_exponential",
  32. "nfc_p2p_client",
  33. "wnm_bss_keep_alive",
  34. "ap_inactivity_disconnect",
  35. "scan_bss_expiration_age",
  36. "autoscan_periodic",
  37. "discovery_group_client",
  38. "concurrent_p2pcli",
  39. "ap_bss_add_remove",
  40. "wpas_ap_wps",
  41. "wext_pmksa_cache",
  42. "ibss_wpa_none",
  43. "ap_ht_40mhz_intolerant_ap",
  44. "ibss_rsn",
  45. "discovery_pd_retries",
  46. "ap_wps_setup_locked_timeout",
  47. "ap_vht160",
  48. "dfs_radar",
  49. "dfs",
  50. "dfs_ht40_minus",
  51. "grpform_cred_ready_timeout",
  52. "hostapd_oom_wpa2_eap_connect",
  53. "wpas_ap_dfs",
  54. "autogo_many",
  55. "hostapd_oom_wpa2_eap",
  56. "ibss_open",
  57. "proxyarp_open_ebtables",
  58. "proxyarp_open_ebtables_ipv6",
  59. "radius_failover",
  60. "obss_scan_40_intolerant",
  61. "dbus_connect_oom",
  62. "proxyarp_open",
  63. "proxyarp_open_ipv6",
  64. "ap_wps_iteration",
  65. "ap_wps_iteration_error",
  66. "ap_wps_pbc_timeout",
  67. "ap_wps_http_timeout",
  68. "p2p_go_move_reg_change",
  69. "p2p_go_move_active",
  70. "p2p_go_move_scm",
  71. "p2p_go_move_scm_peer_supports",
  72. "p2p_go_move_scm_peer_does_not_support",
  73. "p2p_go_move_scm_multi" ]
  74. def get_failed(vm):
  75. failed = []
  76. for i in range(num_servers):
  77. failed += vm[i]['failed']
  78. return failed
  79. def vm_read_stdout(vm, i):
  80. global total_started, total_passed, total_failed, total_skipped
  81. global rerun_failures
  82. ready = False
  83. try:
  84. out = vm['proc'].stdout.read()
  85. except:
  86. return False
  87. logger.debug("VM[%d] stdout.read[%s]" % (i, out))
  88. pending = vm['pending'] + out
  89. lines = []
  90. while True:
  91. pos = pending.find('\n')
  92. if pos < 0:
  93. break
  94. line = pending[0:pos].rstrip()
  95. pending = pending[(pos + 1):]
  96. logger.debug("VM[%d] stdout full line[%s]" % (i, line))
  97. if line.startswith("READY"):
  98. ready = True
  99. elif line.startswith("PASS"):
  100. ready = True
  101. total_passed += 1
  102. elif line.startswith("FAIL"):
  103. ready = True
  104. total_failed += 1
  105. vals = line.split(' ')
  106. if len(vals) < 2:
  107. logger.info("VM[%d] incomplete FAIL line: %s" % (i, line))
  108. name = line
  109. else:
  110. name = vals[1]
  111. logger.debug("VM[%d] test case failed: %s" % (i, name))
  112. vm['failed'].append(name)
  113. elif line.startswith("NOT-FOUND"):
  114. ready = True
  115. total_failed += 1
  116. logger.info("VM[%d] test case not found" % i)
  117. elif line.startswith("SKIP"):
  118. ready = True
  119. total_skipped += 1
  120. elif line.startswith("START"):
  121. total_started += 1
  122. if len(vm['failed']) == 0:
  123. vals = line.split(' ')
  124. if len(vals) >= 2:
  125. vm['fail_seq'].append(vals[1])
  126. vm['out'] += line + '\n'
  127. lines.append(line)
  128. vm['pending'] = pending
  129. return ready
  130. def show_progress(scr):
  131. global num_servers
  132. global vm
  133. global dir
  134. global timestamp
  135. global tests
  136. global first_run_failures
  137. global total_started, total_passed, total_failed, total_skipped
  138. total_tests = len(tests)
  139. logger.info("Total tests: %d" % total_tests)
  140. scr.leaveok(1)
  141. scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
  142. for i in range(0, num_servers):
  143. scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
  144. scr.addstr(i + 1, 10, "starting VM")
  145. scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
  146. scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
  147. scr.refresh()
  148. completed_first_pass = False
  149. rerun_tests = []
  150. while True:
  151. running = False
  152. first_running = False
  153. updated = False
  154. for i in range(0, num_servers):
  155. if completed_first_pass:
  156. continue
  157. if vm[i]['first_run_done']:
  158. continue
  159. if not vm[i]['proc']:
  160. continue
  161. if vm[i]['proc'].poll() is not None:
  162. vm[i]['proc'] = None
  163. scr.move(i + 1, 10)
  164. scr.clrtoeol()
  165. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  166. with open(log, 'r') as f:
  167. if "Kernel panic" in f.read():
  168. scr.addstr("kernel panic")
  169. logger.info("VM[%d] kernel panic" % i)
  170. else:
  171. scr.addstr("unexpected exit")
  172. logger.info("VM[%d] unexpected exit" % i)
  173. updated = True
  174. continue
  175. running = True
  176. first_running = True
  177. try:
  178. err = vm[i]['proc'].stderr.read()
  179. vm[i]['err'] += err
  180. logger.debug("VM[%d] stderr.read[%s]" % (i, err))
  181. except:
  182. pass
  183. if vm_read_stdout(vm[i], i):
  184. scr.move(i + 1, 10)
  185. scr.clrtoeol()
  186. updated = True
  187. if not tests:
  188. vm[i]['first_run_done'] = True
  189. scr.addstr("completed first round")
  190. logger.info("VM[%d] completed first round" % i)
  191. continue
  192. else:
  193. name = tests.pop(0)
  194. vm[i]['proc'].stdin.write(name + '\n')
  195. scr.addstr(name)
  196. logger.debug("VM[%d] start test %s" % (i, name))
  197. if not first_running and not completed_first_pass:
  198. logger.info("First round of testing completed")
  199. if tests:
  200. logger.info("Unexpected test cases remaining from first round: " + str(tests))
  201. raise Exception("Unexpected test cases remaining from first round")
  202. completed_first_pass = True
  203. for name in get_failed(vm):
  204. if rerun_failures:
  205. rerun_tests.append(name)
  206. first_run_failures.append(name)
  207. for i in range(num_servers):
  208. if not completed_first_pass:
  209. continue
  210. if not vm[i]['proc']:
  211. continue
  212. if vm[i]['proc'].poll() is not None:
  213. vm[i]['proc'] = None
  214. scr.move(i + 1, 10)
  215. scr.clrtoeol()
  216. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  217. with open(log, 'r') as f:
  218. if "Kernel panic" in f.read():
  219. scr.addstr("kernel panic")
  220. logger.info("VM[%d] kernel panic" % i)
  221. else:
  222. scr.addstr("completed run")
  223. logger.info("VM[%d] completed run" % i)
  224. updated = True
  225. continue
  226. running = True
  227. try:
  228. err = vm[i]['proc'].stderr.read()
  229. vm[i]['err'] += err
  230. logger.debug("VM[%d] stderr.read[%s]" % (i, err))
  231. except:
  232. pass
  233. ready = False
  234. if vm[i]['first_run_done']:
  235. vm[i]['first_run_done'] = False
  236. ready = True
  237. else:
  238. ready = vm_read_stdout(vm[i], i)
  239. if ready:
  240. scr.move(i + 1, 10)
  241. scr.clrtoeol()
  242. updated = True
  243. if not rerun_tests:
  244. vm[i]['proc'].stdin.write('\n')
  245. scr.addstr("shutting down")
  246. logger.info("VM[%d] shutting down" % i)
  247. else:
  248. name = rerun_tests.pop(0)
  249. vm[i]['proc'].stdin.write(name + '\n')
  250. scr.addstr(name + "(*)")
  251. logger.debug("VM[%d] start test %s (*)" % (i, name))
  252. if not running:
  253. break
  254. if updated:
  255. scr.move(num_servers + 1, 10)
  256. scr.clrtoeol()
  257. scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests)))
  258. scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped))
  259. failed = get_failed(vm)
  260. if len(failed) > 0:
  261. scr.move(num_servers + 2, 0)
  262. scr.clrtoeol()
  263. scr.addstr("Failed test cases: ")
  264. count = 0
  265. for f in failed:
  266. count += 1
  267. if count > 30:
  268. scr.addstr('...')
  269. scr.clrtoeol()
  270. break
  271. scr.addstr(f)
  272. scr.addstr(' ')
  273. scr.move(0, 35)
  274. scr.clrtoeol()
  275. if rerun_tests:
  276. scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
  277. elif rerun_failures:
  278. pass
  279. elif first_run_failures:
  280. scr.addstr("(RETRY FAILED)")
  281. scr.refresh()
  282. time.sleep(0.25)
  283. scr.refresh()
  284. time.sleep(0.3)
  285. def main():
  286. import argparse
  287. import os
  288. global num_servers
  289. global vm
  290. global dir
  291. global timestamp
  292. global tests
  293. global first_run_failures
  294. global total_started, total_passed, total_failed, total_skipped
  295. global rerun_failures
  296. total_started = 0
  297. total_passed = 0
  298. total_failed = 0
  299. total_skipped = 0
  300. debug_level = logging.INFO
  301. rerun_failures = True
  302. timestamp = int(time.time())
  303. scriptsdir = os.path.dirname(os.path.realpath(sys.argv[0]))
  304. p = argparse.ArgumentParser(description='run multiple testing VMs in parallel')
  305. p.add_argument('num_servers', metavar='number of VMs', type=int, choices=range(1, 100),
  306. help="number of VMs to start")
  307. p.add_argument('-f', dest='testmodules', metavar='<test module>',
  308. help='execute only tests from these test modules',
  309. type=str, nargs='+')
  310. p.add_argument('-1', dest='no_retry', action='store_const', const=True, default=False,
  311. help="don't retry failed tests automatically")
  312. p.add_argument('--debug', dest='debug', action='store_const', const=True, default=False,
  313. help="enable debug logging")
  314. p.add_argument('--codecov', dest='codecov', action='store_const', const=True, default=False,
  315. help="enable code coverage collection")
  316. p.add_argument('--shuffle-tests', dest='shuffle', action='store_const', const=True, default=False,
  317. help="shuffle test cases to randomize order")
  318. p.add_argument('--short', dest='short', action='store_const', const=True,
  319. default=False,
  320. help="only run short-duration test cases")
  321. p.add_argument('--long', dest='long', action='store_const', const=True,
  322. default=False,
  323. help="include long-duration test cases")
  324. p.add_argument('--valgrind', dest='valgrind', action='store_const',
  325. const=True, default=False,
  326. help="run tests under valgrind")
  327. p.add_argument('params', nargs='*')
  328. args = p.parse_args()
  329. dir = os.environ.get('HWSIM_TEST_LOG_DIR', '/tmp/hwsim-test-logs')
  330. try:
  331. os.makedirs(dir)
  332. except:
  333. pass
  334. num_servers = args.num_servers
  335. rerun_failures = not args.no_retry
  336. if args.debug:
  337. debug_level = logging.DEBUG
  338. extra_args = []
  339. if args.valgrind:
  340. extra_args += [ '--valgrind' ]
  341. if args.long:
  342. extra_args += [ '--long' ]
  343. if args.codecov:
  344. print "Code coverage - build separate binaries"
  345. logdir = os.path.join(dir, str(timestamp))
  346. os.makedirs(logdir)
  347. subprocess.check_call([os.path.join(scriptsdir, 'build-codecov.sh'),
  348. logdir])
  349. codecov_args = ['--codecov_dir', logdir]
  350. codecov = True
  351. else:
  352. codecov_args = []
  353. codecov = False
  354. first_run_failures = []
  355. if args.params:
  356. tests = args.params
  357. else:
  358. tests = []
  359. cmd = [ os.path.join(os.path.dirname(scriptsdir), 'run-tests.py'),
  360. '-L' ]
  361. if args.testmodules:
  362. cmd += [ "-f" ]
  363. cmd += args.testmodules
  364. lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
  365. for l in lst.stdout.readlines():
  366. name = l.split(' ')[0]
  367. tests.append(name)
  368. if len(tests) == 0:
  369. sys.exit("No test cases selected")
  370. if args.shuffle:
  371. from random import shuffle
  372. shuffle(tests)
  373. elif num_servers > 2 and len(tests) > 100:
  374. # Move test cases with long duration to the beginning as an
  375. # optimization to avoid last part of the test execution running a long
  376. # duration test case on a single VM while all other VMs have already
  377. # completed their work.
  378. for l in long_tests:
  379. if l in tests:
  380. tests.remove(l)
  381. tests.insert(0, l)
  382. if args.short:
  383. tests = [t for t in tests if t not in long_tests]
  384. logger.setLevel(debug_level)
  385. log_handler = logging.FileHandler('parallel-vm.log')
  386. log_handler.setLevel(debug_level)
  387. fmt = "%(asctime)s %(levelname)s %(message)s"
  388. log_formatter = logging.Formatter(fmt)
  389. log_handler.setFormatter(log_formatter)
  390. logger.addHandler(log_handler)
  391. vm = {}
  392. for i in range(0, num_servers):
  393. print("\rStarting virtual machine {}/{}".format(i + 1, num_servers)),
  394. logger.info("Starting virtual machine {}/{}".format(i + 1, num_servers))
  395. cmd = [os.path.join(scriptsdir, 'vm-run.sh'), '--delay', str(i),
  396. '--timestamp', str(timestamp),
  397. '--ext', 'srv.%d' % (i + 1),
  398. '-i'] + codecov_args + extra_args
  399. vm[i] = {}
  400. vm[i]['first_run_done'] = False
  401. vm[i]['proc'] = subprocess.Popen(cmd,
  402. stdin=subprocess.PIPE,
  403. stdout=subprocess.PIPE,
  404. stderr=subprocess.PIPE)
  405. vm[i]['out'] = ""
  406. vm[i]['pending'] = ""
  407. vm[i]['err'] = ""
  408. vm[i]['failed'] = []
  409. vm[i]['fail_seq'] = []
  410. for stream in [ vm[i]['proc'].stdout, vm[i]['proc'].stderr ]:
  411. fd = stream.fileno()
  412. fl = fcntl.fcntl(fd, fcntl.F_GETFL)
  413. fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
  414. print
  415. curses.wrapper(show_progress)
  416. with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
  417. for i in range(0, num_servers):
  418. f.write('VM {}\n{}\n{}\n'.format(i, vm[i]['out'], vm[i]['err']))
  419. failed = get_failed(vm)
  420. if first_run_failures:
  421. print "To re-run same failure sequence(s):"
  422. for i in range(0, num_servers):
  423. if len(vm[i]['failed']) == 0:
  424. continue
  425. print "./vm-run.sh",
  426. if args.long:
  427. print "--long",
  428. skip = len(vm[i]['fail_seq'])
  429. skip -= min(skip, 30)
  430. for t in vm[i]['fail_seq']:
  431. if skip > 0:
  432. skip -= 1
  433. continue
  434. print t,
  435. print
  436. print "Failed test cases:"
  437. for f in first_run_failures:
  438. print f,
  439. logger.info("Failed: " + f)
  440. print
  441. double_failed = []
  442. for name in failed:
  443. double_failed.append(name)
  444. for test in first_run_failures:
  445. double_failed.remove(test)
  446. if not rerun_failures:
  447. pass
  448. elif failed and not double_failed:
  449. print "All failed cases passed on retry"
  450. logger.info("All failed cases passed on retry")
  451. elif double_failed:
  452. print "Failed even on retry:"
  453. for f in double_failed:
  454. print f,
  455. logger.info("Failed on retry: " + f)
  456. print
  457. res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started,
  458. total_passed,
  459. total_failed,
  460. total_skipped)
  461. print(res)
  462. logger.info(res)
  463. print "Logs: " + dir + '/' + str(timestamp)
  464. logger.info("Logs: " + dir + '/' + str(timestamp))
  465. for i in range(0, num_servers):
  466. if len(vm[i]['pending']) > 0:
  467. logger.info("Unprocessed stdout from VM[%d]: '%s'" %
  468. (i, vm[i]['pending']))
  469. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  470. with open(log, 'r') as f:
  471. if "Kernel panic" in f.read():
  472. print "Kernel panic in " + log
  473. logger.info("Kernel panic in " + log)
  474. if codecov:
  475. print "Code coverage - preparing report"
  476. for i in range(num_servers):
  477. subprocess.check_call([os.path.join(scriptsdir,
  478. 'process-codecov.sh'),
  479. logdir + ".srv.%d" % (i + 1),
  480. str(i)])
  481. subprocess.check_call([os.path.join(scriptsdir, 'combine-codecov.sh'),
  482. logdir])
  483. print "file://%s/index.html" % logdir
  484. logger.info("Code coverage report: file://%s/index.html" % logdir)
  485. if double_failed or (failed and not rerun_failures):
  486. logger.info("Test run complete - failures found")
  487. sys.exit(2)
  488. if failed:
  489. logger.info("Test run complete - failures found on first run; passed on retry")
  490. sys.exit(1)
  491. logger.info("Test run complete - no failures")
  492. sys.exit(0)
  493. if __name__ == "__main__":
  494. main()