def get_woven_logs(log_files, pod, filters, objref_dict): lines = [] combined_lines = [] first_combined = "" pod_re = regex.wordRE(pod) # Produce a list of lines of all the selected logs for log_file in log_files: log = gcs_async.read(log_file).get_result() log = log.decode('utf8', 'replace') lines.extend(log.split('\n')) # Combine lines without timestamp into previous line, except if it comes at the # beginning of the file, in which case add it to the line with the first timestamp for line in lines: timestamp_re = regex.timestamp(line) if timestamp_re and timestamp_re.group(0): if not combined_lines: # add beginning of file to first timestamp line line = first_combined + line combined_lines.append(line) else: if not combined_lines: first_combined = first_combined + line else: combined_lines[-1] = combined_lines[-1] + line lines = sorted(combined_lines, key=regex.sub_timestamp) data = '\n'.join(lines) woven_logs = log_parser.digest(data, error_re=pod_re, filters=filters, objref_dict=objref_dict) return woven_logs
def test_timestamp(self): for text, matches in [ ('I0629 17:33:09.813041', True), ('2016-07-22T19:01:11.150204523Z', True), ('629 17:33:09.813041:', False), ('629 17:33:09', False), ]: self.assertEqual(bool(regex.timestamp(text)), matches, 'test_timestamp(%r) should be %r' % (text, matches))