Commit fad00808 authored by Reto Da Forno's avatar Reto Da Forno
Browse files

node_id removed from timesync and error logs, and also log an error if an...

node_id removed from timesync and error logs, and also log an error if an observer is unreachable or fails to start a test
parent ddb36886
......@@ -89,7 +89,7 @@ class StopTestThread(threading.Thread):
if (rs != 0):
if (rs == 1):
if ("No such file or directory" in err):
msg = "SD card on observer ID %s is not mounted, observer will thus be omitted for this test." % (self._obsdict_key[self._obskey][1])
msg = "SD card on observer ID %s is not mounted." % (self._obsdict_key[self._obskey][1])
else:
msg = "Observer ID %s is not reachable (returned %d: %s, %s)." % (self._obsdict_key[self._obskey][1], rs, out, err)
else:
......@@ -181,11 +181,11 @@ class StartTestThread(threading.Thread):
if (rs != 0):
if (rs == 1):
if ("No such file or directory" in err):
msg = "SD card on observer ID %s is not mounted, observer will thus be omitted for this test." % (self._obsdict_key[self._obskey][1])
msg = "SD card on observer ID %s is not mounted (observer will thus be omitted for this test)." % (self._obsdict_key[self._obskey][1])
else:
msg = "Observer ID %s is not reachable, it will thus be omitted for this test (returned: %d: %s, %s)." % (self._obsdict_key[self._obskey][1], rs, out, err)
msg = "Observer ID %s is not reachable and will thus be omitted for this test (returned: %d: %s, %s)." % (self._obsdict_key[self._obskey][1], rs, out, err)
else:
msg = "Observer ID %s is not responsive, it will thus be omitted for this test (SSH returned %d). Command: %s" % (self._obsdict_key[self._obskey][1], rs, " ".join(cmd))
msg = "Observer ID %s is not responsive and will thus be omitted for this test (SSH returned %d)." % (self._obsdict_key[self._obskey][1], rs)
errors.append((msg, errno.EHOSTUNREACH, self._obsdict_key[self._obskey][1]))
logger.error(msg)
else:
......@@ -260,6 +260,28 @@ class StartTestThread(threading.Thread):
### END StartTestThread
##############################################################################
#
# write_to_error_log write into the error log file that is passed to the user together with the test results
#
##############################################################################
def write_to_error_log(testid, obsid, message):
if not isinstance(testid, int):
return
testresultsdir = "%s/%d" %(flocklab.config.get('fetcher', 'testresults_dir'), testid)
if not os.path.exists(testresultsdir):
os.makedirs(testresultsdir)
logger.debug("Created %s" % testresultsdir)
errorlogfilename = "%s/errorlog.csv" % (testresultsdir)
writeheader = False
if not os.path.isfile(errorlogfilename):
writeheader = True
with open("%s/errorlog.csv" % (testresultsdir), "a") as errorlog:
if writeheader:
errorlog.write('timestamp,observer_id,message\n')
errorlog.write("%.3f,%s,%s\n" % (time.time(), str(obsid), message))
errorlog.close()
##############################################################################
#
......@@ -707,6 +729,7 @@ def start_test(testid, cur, cn, obsdict_key, obsdict_id):
#logger.error("Error from test start thread for observer %s: %s" %(str(err[2]), str(err[0])))
obs_error.append(err[2])
warnings.append(err[0])
write_to_error_log(testid, err[2], err[0])
if len(obs_error) > 0:
# Abort or continue?
if abortOnError or not flocklab.config.get("dispatcher", "continue_on_error"):
......@@ -731,7 +754,11 @@ def start_test(testid, cur, cn, obsdict_key, obsdict_id):
rs = p.wait()
if (rs != 0):
msg = "Serial proxy for test ID %d could not be started (error code %d)." % (testid, rs)
errors.append(msg)
if abortOnError:
errors.append(msg)
else:
warnings.append(msg)
write_to_error_log(testid, 0, msg)
logger.error(msg)
logger.debug("Executed command was: %s" % (str(cmd)))
else:
......@@ -1291,7 +1318,7 @@ def main(argv):
cn.commit()
if len(errors) != 0:
# Test start failed. Make it abort:
logger.warning("Going to abort test because of errors when trying to start it.")
logger.error("Going to abort test because of errors when trying to start it.")
abort = True
# Inform user:
ret = inform_user(testid, cur, action, errors, warnings)
......
......@@ -194,11 +194,11 @@ def read_from_db_file(dbfile):
# write_to_error_log: Write an error message to the error log file that will be passed to the user
#
##############################################################################
def write_to_error_log(timestamp, obsid, nodeid, message):
def write_to_error_log(timestamp, obsid, message):
try:
testresultsfile_dict['errorlog'][1].acquire()
errorlog = open(testresultsfile_dict['errorlog'][0], "a")
errorlog.write("%s,%d,%d,%s\n" % (str(timestamp), obsid, nodeid, message))
errorlog.write("%s,%d,%s\n" % (str(timestamp), obsid, message))
errorlog.close()
testresultsfile_dict['errorlog'][1].release()
except Exception:
......@@ -411,7 +411,7 @@ def worker_logs(queueitem=None, nodeid=None, resultfile_path=None, resultfile_lo
infile = open(inputfilename, "r")
for line in infile:
(timestamp, msg) = line.strip().split(',', 1)
result.append("%s,%d,%s,%s\n" % (timestamp, obsid, nodeid, msg))
result.append("%s,%d,%s\n" % (timestamp, obsid, msg))
infile.close()
append_lines_to_file(resultfile_path, resultfile_lock, result)
os.remove(inputfilename)
......@@ -483,7 +483,7 @@ def worker_datatrace(queueitem=None, nodeid=None, resultfile_path=None, resultfi
# process raw datatrace log (parse & apply time correction)
dfData, dfLocalTs, dfOverflow = dwt.processDatatraceOutput(input_filename)
except Exception as e:
write_to_error_log('{}'.format(time.time()), obsid, nodeid, 'Datatrace: Error occurred when processing datatrace raw output! Potential problems: SWO/CPU speed mismatch (see cpuSpeed tag in xml config) or target did not start properly. Error: {}'.format(e))
write_to_error_log('{}'.format(time.time()), obsid, 'Datatrace: Error occurred when processing datatrace raw output! Potential problems: SWO/CPU speed mismatch (see cpuSpeed tag in xml config) or target did not start properly. Error: {}'.format(e))
else:
if len(dfData):
# add observer and node ID
......@@ -503,11 +503,11 @@ def worker_datatrace(queueitem=None, nodeid=None, resultfile_path=None, resultfi
resultfile_lock.release()
# append overflow events to errorlog
for idx, row in dfOverflow.iterrows():
write_to_error_log(row['global_ts_uncorrected'], obsid, nodeid, 'Datatrace: event rate too high (overflow occurred)!')
write_to_error_log(row['global_ts_uncorrected'], obsid, 'Datatrace: event rate too high (overflow occurred)!')
# append info about delayed timestamps to errorlog
for idx, row in dfLocalTs.iterrows():
if row['tc'] != 0:
write_to_error_log(row['global_ts'], obsid, nodeid, 'Datatrace: timestamp has been delayed (tc={})!'.format(row['tc']))
write_to_error_log(row['global_ts'], obsid, 'Datatrace: timestamp has been delayed (tc={})!'.format(row['tc']))
except:
msg = "Error in datatrace worker process: %s: %s\n%s" % (str(sys.exc_info()[0]), str(sys.exc_info()[1]), traceback.format_exc())
......@@ -1114,7 +1114,7 @@ def main(argv):
testresultsfile_dict[service] = (path, lock)
# Create file and write header:
if service in ('errorlog', 'timesynclog'):
header = 'timestamp,observer_id,node_id,message\n'
header = 'timestamp,observer_id,message\n'
elif service == 'gpiotracing':
header = 'timestamp,observer_id,node_id,pin_name,value\n'
elif service == 'powerprofiling':
......@@ -1126,9 +1126,11 @@ def main(argv):
elif service == 'datatrace':
header = 'timestamp,observer_id,node_id,variable,value,access,pc,delay_marker\n'
lock.acquire()
f = open(path, 'w')
f.write(header)
f.close()
# only create file and write header if it does not yet exist (e.g. the errorlog file may already exist)
if not os.path.isfile(path):
f = open(path, 'w')
f.write(header)
f.close()
lock.release()
# Start logging thread:
logqueue = manager.Queue(maxsize=10000)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment