Commit fad00808 authored by Reto Da Forno's avatar Reto Da Forno
Browse files

node_id removed from timesync and error logs, and also log an error if an...

node_id removed from timesync and error logs, and also log an error if an observer is unreachable or fails to start a test
parent ddb36886
...@@ -89,7 +89,7 @@ class StopTestThread(threading.Thread): ...@@ -89,7 +89,7 @@ class StopTestThread(threading.Thread):
if (rs != 0): if (rs != 0):
if (rs == 1): if (rs == 1):
if ("No such file or directory" in err): if ("No such file or directory" in err):
msg = "SD card on observer ID %s is not mounted, observer will thus be omitted for this test." % (self._obsdict_key[self._obskey][1]) msg = "SD card on observer ID %s is not mounted." % (self._obsdict_key[self._obskey][1])
else: else:
msg = "Observer ID %s is not reachable (returned %d: %s, %s)." % (self._obsdict_key[self._obskey][1], rs, out, err) msg = "Observer ID %s is not reachable (returned %d: %s, %s)." % (self._obsdict_key[self._obskey][1], rs, out, err)
else: else:
...@@ -181,11 +181,11 @@ class StartTestThread(threading.Thread): ...@@ -181,11 +181,11 @@ class StartTestThread(threading.Thread):
if (rs != 0): if (rs != 0):
if (rs == 1): if (rs == 1):
if ("No such file or directory" in err): if ("No such file or directory" in err):
msg = "SD card on observer ID %s is not mounted, observer will thus be omitted for this test." % (self._obsdict_key[self._obskey][1]) msg = "SD card on observer ID %s is not mounted (observer will thus be omitted for this test)." % (self._obsdict_key[self._obskey][1])
else: else:
msg = "Observer ID %s is not reachable, it will thus be omitted for this test (returned: %d: %s, %s)." % (self._obsdict_key[self._obskey][1], rs, out, err) msg = "Observer ID %s is not reachable and will thus be omitted for this test (returned: %d: %s, %s)." % (self._obsdict_key[self._obskey][1], rs, out, err)
else: else:
msg = "Observer ID %s is not responsive, it will thus be omitted for this test (SSH returned %d). Command: %s" % (self._obsdict_key[self._obskey][1], rs, " ".join(cmd)) msg = "Observer ID %s is not responsive and will thus be omitted for this test (SSH returned %d)." % (self._obsdict_key[self._obskey][1], rs)
errors.append((msg, errno.EHOSTUNREACH, self._obsdict_key[self._obskey][1])) errors.append((msg, errno.EHOSTUNREACH, self._obsdict_key[self._obskey][1]))
logger.error(msg) logger.error(msg)
else: else:
...@@ -260,6 +260,28 @@ class StartTestThread(threading.Thread): ...@@ -260,6 +260,28 @@ class StartTestThread(threading.Thread):
### END StartTestThread ### END StartTestThread
##############################################################################
#
# write_to_error_log write into the error log file that is passed to the user together with the test results
#
##############################################################################
def write_to_error_log(testid, obsid, message):
if not isinstance(testid, int):
return
testresultsdir = "%s/%d" %(flocklab.config.get('fetcher', 'testresults_dir'), testid)
if not os.path.exists(testresultsdir):
os.makedirs(testresultsdir)
logger.debug("Created %s" % testresultsdir)
errorlogfilename = "%s/errorlog.csv" % (testresultsdir)
writeheader = False
if not os.path.isfile(errorlogfilename):
writeheader = True
with open("%s/errorlog.csv" % (testresultsdir), "a") as errorlog:
if writeheader:
errorlog.write('timestamp,observer_id,message\n')
errorlog.write("%.3f,%s,%s\n" % (time.time(), str(obsid), message))
errorlog.close()
############################################################################## ##############################################################################
# #
...@@ -707,6 +729,7 @@ def start_test(testid, cur, cn, obsdict_key, obsdict_id): ...@@ -707,6 +729,7 @@ def start_test(testid, cur, cn, obsdict_key, obsdict_id):
#logger.error("Error from test start thread for observer %s: %s" %(str(err[2]), str(err[0]))) #logger.error("Error from test start thread for observer %s: %s" %(str(err[2]), str(err[0])))
obs_error.append(err[2]) obs_error.append(err[2])
warnings.append(err[0]) warnings.append(err[0])
write_to_error_log(testid, err[2], err[0])
if len(obs_error) > 0: if len(obs_error) > 0:
# Abort or continue? # Abort or continue?
if abortOnError or not flocklab.config.get("dispatcher", "continue_on_error"): if abortOnError or not flocklab.config.get("dispatcher", "continue_on_error"):
...@@ -731,7 +754,11 @@ def start_test(testid, cur, cn, obsdict_key, obsdict_id): ...@@ -731,7 +754,11 @@ def start_test(testid, cur, cn, obsdict_key, obsdict_id):
rs = p.wait() rs = p.wait()
if (rs != 0): if (rs != 0):
msg = "Serial proxy for test ID %d could not be started (error code %d)." % (testid, rs) msg = "Serial proxy for test ID %d could not be started (error code %d)." % (testid, rs)
errors.append(msg) if abortOnError:
errors.append(msg)
else:
warnings.append(msg)
write_to_error_log(testid, 0, msg)
logger.error(msg) logger.error(msg)
logger.debug("Executed command was: %s" % (str(cmd))) logger.debug("Executed command was: %s" % (str(cmd)))
else: else:
...@@ -1291,7 +1318,7 @@ def main(argv): ...@@ -1291,7 +1318,7 @@ def main(argv):
cn.commit() cn.commit()
if len(errors) != 0: if len(errors) != 0:
# Test start failed. Make it abort: # Test start failed. Make it abort:
logger.warning("Going to abort test because of errors when trying to start it.") logger.error("Going to abort test because of errors when trying to start it.")
abort = True abort = True
# Inform user: # Inform user:
ret = inform_user(testid, cur, action, errors, warnings) ret = inform_user(testid, cur, action, errors, warnings)
......
...@@ -194,11 +194,11 @@ def read_from_db_file(dbfile): ...@@ -194,11 +194,11 @@ def read_from_db_file(dbfile):
# write_to_error_log: Write an error message to the error log file that will be passed to the user # write_to_error_log: Write an error message to the error log file that will be passed to the user
# #
############################################################################## ##############################################################################
def write_to_error_log(timestamp, obsid, nodeid, message): def write_to_error_log(timestamp, obsid, message):
try: try:
testresultsfile_dict['errorlog'][1].acquire() testresultsfile_dict['errorlog'][1].acquire()
errorlog = open(testresultsfile_dict['errorlog'][0], "a") errorlog = open(testresultsfile_dict['errorlog'][0], "a")
errorlog.write("%s,%d,%d,%s\n" % (str(timestamp), obsid, nodeid, message)) errorlog.write("%s,%d,%s\n" % (str(timestamp), obsid, message))
errorlog.close() errorlog.close()
testresultsfile_dict['errorlog'][1].release() testresultsfile_dict['errorlog'][1].release()
except Exception: except Exception:
...@@ -411,7 +411,7 @@ def worker_logs(queueitem=None, nodeid=None, resultfile_path=None, resultfile_lo ...@@ -411,7 +411,7 @@ def worker_logs(queueitem=None, nodeid=None, resultfile_path=None, resultfile_lo
infile = open(inputfilename, "r") infile = open(inputfilename, "r")
for line in infile: for line in infile:
(timestamp, msg) = line.strip().split(',', 1) (timestamp, msg) = line.strip().split(',', 1)
result.append("%s,%d,%s,%s\n" % (timestamp, obsid, nodeid, msg)) result.append("%s,%d,%s\n" % (timestamp, obsid, msg))
infile.close() infile.close()
append_lines_to_file(resultfile_path, resultfile_lock, result) append_lines_to_file(resultfile_path, resultfile_lock, result)
os.remove(inputfilename) os.remove(inputfilename)
...@@ -483,7 +483,7 @@ def worker_datatrace(queueitem=None, nodeid=None, resultfile_path=None, resultfi ...@@ -483,7 +483,7 @@ def worker_datatrace(queueitem=None, nodeid=None, resultfile_path=None, resultfi
# process raw datatrace log (parse & apply time correction) # process raw datatrace log (parse & apply time correction)
dfData, dfLocalTs, dfOverflow = dwt.processDatatraceOutput(input_filename) dfData, dfLocalTs, dfOverflow = dwt.processDatatraceOutput(input_filename)
except Exception as e: except Exception as e:
write_to_error_log('{}'.format(time.time()), obsid, nodeid, 'Datatrace: Error occurred when processing datatrace raw output! Potential problems: SWO/CPU speed mismatch (see cpuSpeed tag in xml config) or target did not start properly. Error: {}'.format(e)) write_to_error_log('{}'.format(time.time()), obsid, 'Datatrace: Error occurred when processing datatrace raw output! Potential problems: SWO/CPU speed mismatch (see cpuSpeed tag in xml config) or target did not start properly. Error: {}'.format(e))
else: else:
if len(dfData): if len(dfData):
# add observer and node ID # add observer and node ID
...@@ -503,11 +503,11 @@ def worker_datatrace(queueitem=None, nodeid=None, resultfile_path=None, resultfi ...@@ -503,11 +503,11 @@ def worker_datatrace(queueitem=None, nodeid=None, resultfile_path=None, resultfi
resultfile_lock.release() resultfile_lock.release()
# append overflow events to errorlog # append overflow events to errorlog
for idx, row in dfOverflow.iterrows(): for idx, row in dfOverflow.iterrows():
write_to_error_log(row['global_ts_uncorrected'], obsid, nodeid, 'Datatrace: event rate too high (overflow occurred)!') write_to_error_log(row['global_ts_uncorrected'], obsid, 'Datatrace: event rate too high (overflow occurred)!')
# append info about delayed timestamps to errorlog # append info about delayed timestamps to errorlog
for idx, row in dfLocalTs.iterrows(): for idx, row in dfLocalTs.iterrows():
if row['tc'] != 0: if row['tc'] != 0:
write_to_error_log(row['global_ts'], obsid, nodeid, 'Datatrace: timestamp has been delayed (tc={})!'.format(row['tc'])) write_to_error_log(row['global_ts'], obsid, 'Datatrace: timestamp has been delayed (tc={})!'.format(row['tc']))
except: except:
msg = "Error in datatrace worker process: %s: %s\n%s" % (str(sys.exc_info()[0]), str(sys.exc_info()[1]), traceback.format_exc()) msg = "Error in datatrace worker process: %s: %s\n%s" % (str(sys.exc_info()[0]), str(sys.exc_info()[1]), traceback.format_exc())
...@@ -1114,7 +1114,7 @@ def main(argv): ...@@ -1114,7 +1114,7 @@ def main(argv):
testresultsfile_dict[service] = (path, lock) testresultsfile_dict[service] = (path, lock)
# Create file and write header: # Create file and write header:
if service in ('errorlog', 'timesynclog'): if service in ('errorlog', 'timesynclog'):
header = 'timestamp,observer_id,node_id,message\n' header = 'timestamp,observer_id,message\n'
elif service == 'gpiotracing': elif service == 'gpiotracing':
header = 'timestamp,observer_id,node_id,pin_name,value\n' header = 'timestamp,observer_id,node_id,pin_name,value\n'
elif service == 'powerprofiling': elif service == 'powerprofiling':
...@@ -1126,9 +1126,11 @@ def main(argv): ...@@ -1126,9 +1126,11 @@ def main(argv):
elif service == 'datatrace': elif service == 'datatrace':
header = 'timestamp,observer_id,node_id,variable,value,access,pc,delay_marker\n' header = 'timestamp,observer_id,node_id,variable,value,access,pc,delay_marker\n'
lock.acquire() lock.acquire()
f = open(path, 'w') # only create file and write header if it does not yet exist (e.g. the errorlog file may already exist)
f.write(header) if not os.path.isfile(path):
f.close() f = open(path, 'w')
f.write(header)
f.close()
lock.release() lock.release()
# Start logging thread: # Start logging thread:
logqueue = manager.Queue(maxsize=10000) logqueue = manager.Queue(maxsize=10000)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment