Browse Source

tests: Retry failed cases automatically in parallel VM run

parallel-vm.py is now retrying failed cases once at the end of the run.
If all the failed test cases passed on the second attempt, that is noted
in the summary output. Results are also indicated as the exit value from
the run: 0 = all cases passed on first run, 1 = some cases failed once,
but everything passed after one retry, 2 = some cases failed did not
succeed at all.

Signed-off-by: Jouni Malinen <j@w1.fi>
Jouni Malinen 10 years ago
parent
commit
3eb1db0377
1 changed files with 110 additions and 16 deletions
  1. 110 16
      tests/hwsim/vm/parallel-vm.py

+ 110 - 16
tests/hwsim/vm/parallel-vm.py

@@ -33,6 +33,7 @@ def show_progress(scr):
     global dir
     global dir
     global timestamp
     global timestamp
     global tests
     global tests
+    global first_run_failures
 
 
     total_tests = len(tests)
     total_tests = len(tests)
 
 
@@ -45,10 +46,19 @@ def show_progress(scr):
     scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
     scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
     scr.refresh()
     scr.refresh()
 
 
+    completed_first_pass = False
+    rerun_tests = []
+
     while True:
     while True:
         running = False
         running = False
+        first_running = False
         updated = False
         updated = False
+
         for i in range(0, num_servers):
         for i in range(0, num_servers):
+            if completed_first_pass:
+                continue
+            if vm[i]['first_run_done']:
+                continue
             if not vm[i]['proc']:
             if not vm[i]['proc']:
                 continue
                 continue
             if vm[i]['proc'].poll() is not None:
             if vm[i]['proc'].poll() is not None:
@@ -60,11 +70,12 @@ def show_progress(scr):
                     if "Kernel panic" in f.read():
                     if "Kernel panic" in f.read():
                         scr.addstr("kernel panic")
                         scr.addstr("kernel panic")
                     else:
                     else:
-                        scr.addstr("completed run")
+                        scr.addstr("unexpected exit")
                 updated = True
                 updated = True
                 continue
                 continue
 
 
             running = True
             running = True
+            first_running = True
             try:
             try:
                 err = vm[i]['proc'].stderr.read()
                 err = vm[i]['proc'].stderr.read()
                 vm[i]['err'] += err
                 vm[i]['err'] += err
@@ -73,27 +84,80 @@ def show_progress(scr):
 
 
             try:
             try:
                 out = vm[i]['proc'].stdout.read()
                 out = vm[i]['proc'].stdout.read()
+                vm[i]['out'] += out
                 if "READY" in out or "PASS" in out or "FAIL" in out or "SKIP" in out:
                 if "READY" in out or "PASS" in out or "FAIL" in out or "SKIP" in out:
+                    scr.move(i + 1, 10)
+                    scr.clrtoeol()
+                    updated = True
                     if not tests:
                     if not tests:
-                        vm[i]['proc'].stdin.write('\n')
+                        vm[i]['first_run_done'] = True
+                        scr.addstr("completed first round")
+                        continue
                     else:
                     else:
                         name = tests.pop(0)
                         name = tests.pop(0)
                         vm[i]['proc'].stdin.write(name + '\n')
                         vm[i]['proc'].stdin.write(name + '\n')
+                        scr.addstr(name)
             except:
             except:
+                pass
+
+        if not first_running and not completed_first_pass:
+            if tests:
+                raise Exception("Unexpected test cases remaining from first round")
+            completed_first_pass = True
+            (started, passed, failed, skipped) = get_results()
+            for f in failed:
+                name = f.split(' ')[1]
+                rerun_tests.append(name)
+                first_run_failures.append(name)
+
+        for i in range(num_servers):
+            if not completed_first_pass:
+                continue
+            if not vm[i]['proc']:
                 continue
                 continue
-            #print("VM {}: '{}'".format(i, out))
+            if vm[i]['proc'].poll() is not None:
-            vm[i]['out'] += out
+                vm[i]['proc'] = None
-            lines = vm[i]['out'].splitlines()
+                scr.move(i + 1, 10)
-            last = [ l for l in lines if l.startswith('START ') ]
+                scr.clrtoeol()
-            if len(last) > 0:
+                log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
-                try:
+                with open(log, 'r') as f:
-                    info = last[-1].split(' ')
+                    if "Kernel panic" in f.read():
+                        scr.addstr("kernel panic")
+                    else:
+                        scr.addstr("completed run")
+                updated = True
+                continue
+
+            running = True
+            try:
+                err = vm[i]['proc'].stderr.read()
+                vm[i]['err'] += err
+            except:
+                pass
+
+            try:
+                ready = False
+                if vm[i]['first_run_done']:
+                    vm[i]['first_run_done'] = False
+                    ready = True
+                else:
+                    out = vm[i]['proc'].stdout.read()
+                    vm[i]['out'] += out
+                    if "READY" in out or "PASS" in out or "FAIL" in out or "SKIP" in out:
+                        ready = True
+                if ready:
                     scr.move(i + 1, 10)
                     scr.move(i + 1, 10)
                     scr.clrtoeol()
                     scr.clrtoeol()
-                    scr.addstr(info[1])
                     updated = True
                     updated = True
-                except:
+                    if not rerun_tests:
-                    pass
+                        vm[i]['proc'].stdin.write('\n')
+                        scr.addstr("shutting down")
+                    else:
+                        name = rerun_tests.pop(0)
+                        vm[i]['proc'].stdin.write(name + '\n')
+                        scr.addstr(name + "(*)")
+            except:
+                pass
 
 
         if not running:
         if not running:
             break
             break
@@ -111,9 +175,17 @@ def show_progress(scr):
                 for f in failed:
                 for f in failed:
                     scr.addstr(f.split(' ')[1])
                     scr.addstr(f.split(' ')[1])
                     scr.addstr(' ')
                     scr.addstr(' ')
+
+            scr.move(0, 35)
+            scr.clrtoeol()
+            if rerun_tests:
+                scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
+            elif first_run_failures:
+                scr.addstr("(RETRY FAILED)")
+
             scr.refresh()
             scr.refresh()
 
 
-        time.sleep(0.5)
+        time.sleep(0.25)
 
 
     scr.refresh()
     scr.refresh()
     time.sleep(0.3)
     time.sleep(0.3)
@@ -124,6 +196,7 @@ def main():
     global dir
     global dir
     global timestamp
     global timestamp
     global tests
     global tests
+    global first_run_failures
 
 
     if len(sys.argv) < 2:
     if len(sys.argv) < 2:
         sys.exit("Usage: %s <number of VMs> [--codecov] [params..]" % sys.argv[0])
         sys.exit("Usage: %s <number of VMs> [--codecov] [params..]" % sys.argv[0])
@@ -146,6 +219,7 @@ def main():
         codecov_args = []
         codecov_args = []
         codecov = False
         codecov = False
 
 
+    first_run_failures = []
     tests = []
     tests = []
     cmd = [ '../run-tests.py', '-L' ] + sys.argv[idx:]
     cmd = [ '../run-tests.py', '-L' ] + sys.argv[idx:]
     lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
     lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
@@ -214,6 +288,7 @@ def main():
                '--ext', 'srv.%d' % (i + 1),
                '--ext', 'srv.%d' % (i + 1),
                '-i'] + codecov_args + extra_args
                '-i'] + codecov_args + extra_args
         vm[i] = {}
         vm[i] = {}
+        vm[i]['first_run_done'] = False
         vm[i]['proc'] = subprocess.Popen(cmd,
         vm[i]['proc'] = subprocess.Popen(cmd,
                                          stdin=subprocess.PIPE,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
@@ -234,10 +309,23 @@ def main():
 
 
     (started, passed, failed, skipped) = get_results()
     (started, passed, failed, skipped) = get_results()
 
 
-    if len(failed) > 0:
+    if first_run_failures:
         print "Failed test cases:"
         print "Failed test cases:"
-        for f in failed:
+        for f in first_run_failures:
-            print f.split(' ')[1],
+            print f,
+        print
+    double_failed = []
+    for f in failed:
+        name = f.split(' ')[1]
+        double_failed.append(name)
+    for test in first_run_failures:
+        double_failed.remove(test)
+    if failed and not double_failed:
+        print "All failed cases passed on retry"
+    elif double_failed:
+        print "Failed even on retry:"
+        for f in double_failed:
+            print f,
         print
         print
     print("TOTAL={} PASS={} FAIL={} SKIP={}".format(len(started), len(passed), len(failed), len(skipped)))
     print("TOTAL={} PASS={} FAIL={} SKIP={}".format(len(started), len(passed), len(failed), len(skipped)))
     print "Logs: " + dir + '/' + str(timestamp)
     print "Logs: " + dir + '/' + str(timestamp)
@@ -257,5 +345,11 @@ def main():
         subprocess.check_call(['./combine-codecov.sh', logdir])
         subprocess.check_call(['./combine-codecov.sh', logdir])
         print "file://%s/index.html" % logdir
         print "file://%s/index.html" % logdir
 
 
+    if double_failed:
+        sys.exit(2)
+    if failed:
+        sys.exit(1)
+    sys.exit(0)
+
 if __name__ == "__main__":
 if __name__ == "__main__":
     main()
     main()