def mk_full_id_dict(_path, _file, _type): _dict = {} if _type == 'subscriber': fields = SUB_FIELDS elif _type == 'peer': fields = PEER_FIELDS elif _type == 'tgid': fields = TGID_FIELDS try: with open(_path + _file, 'r', encoding='latin1') as _handle: ids = csv_dict_reader(_handle, fieldnames=fields, restkey='OTHER', dialect='excel', delimiter=',') for row in ids: for item in row: try: _dict[int(row['ID'])] = dict(row) except: pass _handle.close return (_dict) except IOError: return _dict
def _read_file(self, key): resp = self.boto_client.get_object(Bucket=self.bucket, Key=key) if key.endswith('.parquet'): body = resp['Body'].read() reader = parquet_dict_reader(io.BytesIO(body)) yield from reader with THREAD_LOCK: self.bytes_processed += len(body) self.compressed_bytes_processed += resp['ContentLength'] else: with gz_open(resp['Body'], mode='rt') as gz_f: reader = csv_dict_reader(gz_f, delimiter=' ') reader.fieldnames = [ f.replace('-', '_') for f in reader.fieldnames ] yield from reader with THREAD_LOCK: self.bytes_processed += gz_f.tell() self.compressed_bytes_processed += resp['ContentLength']
p.communicate() except OSError as ex: print_file(COMBINED_OUTPUT_FILE_NAME) print '#### ERROR Caught OSError `{0}`.'.format(ex) print '&&&& FAILED {0}'.format(BENCHMARK_NAME) exit(-1) print_file(COMBINED_OUTPUT_FILE_NAME) if p.returncode != 0: print '#### ERROR Process exited with code {0}.'.format(p.returncode) print '&&&& FAILED {0}'.format(BENCHMARK_NAME) exit(p.returncode) with open(COMBINED_OUTPUT_FILE_NAME) as input_file: reader = csv_dict_reader(input_file) variable_units = reader.next() # Get units header row. distinguishing_variables = reader.fieldnames measured_variables = [("STL Average Throughput", "+"), ("Thrust Average Throughput", "+")] for record in reader: for variable, directionality in measured_variables: # Don't monitor regressions for STL implementations, nvbug 28980890: if "STL" in variable: continue print "&&&& PERF {0}_{1}_{2}bit_{3}mib_{4} {5} {6}{7}".format( record["Algorithm"], record["Element Type"],
def __init__(self, input_files, output_file, preserve_whitespace=True): """Read input files and open the output file and construct a new `io_manager` object. If `preserve_whitespace` is `False`, leading and trailing whitespace is stripped from each CSV cell. Raises AssertionError : If `len(input_files) <= 0` or `type(preserve_whitespace) != bool`. """ assert len(input_files) > 0, "No input files provided." assert type(preserve_whitespace) == bool self.preserve_whitespace = preserve_whitespace self.readers = deque() self.variable_names = None self.variable_units = None self.input_files = deque() for input_file in input_files: input_file_object = open(input_file) reader = csv_dict_reader(filter_comments(input_file_object)) if not self.preserve_whitespace: strip_list(reader.fieldnames) if self.variable_names is None: self.variable_names = reader.fieldnames else: # Make sure all inputs have the same schema. assert self.variable_names == reader.fieldnames, \ "Input file (`" + input_file + "`) variable schema `" + \ str(reader.fieldnames) + "` does not match the variable schema `" + \ str(self.variable_names) + "`." # Consume the next row, which should be the second line of the header. variable_units = reader.next() if not self.preserve_whitespace: strip_dict(variable_units) if self.variable_units is None: self.variable_units = variable_units else: # Make sure all inputs have the same units schema. assert self.variable_units == variable_units, \ "Input file (`" + input_file + "`) units schema `" + \ str(variable_units) + "` does not match the units schema `" + \ str(self.variable_units) + "`." self.readers.append(reader) self.input_files.append(input_file_object) if output_file == "-": # Output to stdout. self.output_file = stdout else: # Output to user-specified file. self.output_file = open(output_file, "w") self.writer = csv_dict_writer(self.output_file, fieldnames=self.variable_names)
def __init__(self, baseline_input_file, observed_input_file, output_file, preserve_whitespace = False): """Read input files and open the output file and construct a new `io_manager` object. If `preserve_whitespace` is `False`, leading and trailing whitespace is stripped from each CSV cell. Raises AssertionError : If `type(preserve_whitespace) != bool`. """ assert type(preserve_whitespace) == bool self.preserve_whitespace = preserve_whitespace # Open baseline results. self.baseline_input_file = open(baseline_input_file) self.baseline_reader = csv_dict_reader( filter_comments(self.baseline_input_file) ) if not self.preserve_whitespace: strip_list(self.baseline_reader.fieldnames) self.variable_names = list(self.baseline_reader.fieldnames) # Copy. self.variable_units = self.baseline_reader.next() if not self.preserve_whitespace: strip_dict(self.variable_units) # Open observed results. self.observed_input_file = open(observed_input_file) self.observed_reader = csv_dict_reader( filter_comments(self.observed_input_file) ) if not self.preserve_whitespace: strip_list(self.observed_reader.fieldnames) # Make sure all inputs have the same variables schema. assert self.variable_names == self.observed_reader.fieldnames, \ "Observed results input file (`" + observed_input_file + "`) " + \ "variable schema `" + str(self.observed_reader.fieldnames) + "` does " + \ "not match the baseline results input file (`" + baseline_input_file + \ "`) variable schema `" + str(self.variable_names) + "`." # Consume the next row, which should be the second line of the header. observed_variable_units = self.observed_reader.next() if not self.preserve_whitespace: strip_dict(observed_variable_units) # Make sure all inputs have the same units schema. assert self.variable_units == observed_variable_units, \ "Observed results input file (`" + observed_input_file + "`) " + \ "units schema `" + str(observed_variable_units) + "` does not " + \ "match the baseline results input file (`" + baseline_input_file + \ "`) units schema `" + str(self.variable_units) + "`." if output_file == "-": # Output to stdout. self.output_file = stdout else: # Output to user-specified file. self.output_file = open(output_file, "w") self.writer = csv_dict_writer( self.output_file, fieldnames = self.variable_names )
def __init__(self, input_files, output_file, preserve_whitespace = True): """Read input files and open the output file and construct a new `io_manager` object. If `preserve_whitespace` is `False`, leading and trailing whitespace is stripped from each CSV cell. Raises AssertionError : If `len(input_files) <= 0` or `type(preserve_whitespace) != bool`. """ assert len(input_files) > 0, "No input files provided." assert type(preserve_whitespace) == bool self.preserve_whitespace = preserve_whitespace self.readers = deque() self.variable_names = None self.variable_units = None self.input_files = deque() for input_file in input_files: input_file_object = open(input_file) reader = csv_dict_reader(filter_comments(input_file_object)) if not self.preserve_whitespace: strip_list(reader.fieldnames) if self.variable_names is None: self.variable_names = reader.fieldnames else: # Make sure all inputs have the same schema. assert self.variable_names == reader.fieldnames, \ "Input file (`" + input_file + "`) variable schema `" + \ str(reader.fieldnames) + "` does not match the variable schema `" + \ str(self.variable_names) + "`." # Consume the next row, which should be the second line of the header. variable_units = reader.next() if not self.preserve_whitespace: strip_dict(variable_units) if self.variable_units is None: self.variable_units = variable_units else: # Make sure all inputs have the same units schema. assert self.variable_units == variable_units, \ "Input file (`" + input_file + "`) units schema `" + \ str(variable_units) + "` does not match the units schema `" + \ str(self.variable_units) + "`." self.readers.append(reader) self.input_files.append(input_file_object) if output_file == "-": # Output to stdout. self.output_file = stdout else: # Output to user-specified file. self.output_file = open(output_file, "w") self.writer = csv_dict_writer( self.output_file, fieldnames = self.variable_names )
p.communicate() except OSError as ex: print_file(COMBINED_OUTPUT_FILE_NAME) print '#### ERROR Caught OSError `{0}`.'.format(ex) print '&&&& FAILED {0}'.format(BENCHMARK_NAME) exit(-1) print_file(COMBINED_OUTPUT_FILE_NAME) if p.returncode != 0: print '#### ERROR Process exited with code {0}.'.format(p.returncode) print '&&&& FAILED {0}'.format(BENCHMARK_NAME) exit(p.returncode) with open(COMBINED_OUTPUT_FILE_NAME) as input_file: reader = csv_dict_reader(input_file) variable_units = reader.next() # Get units header row. distinguishing_variables = reader.fieldnames measured_variables = [ ("STL Average Throughput", "+"), ("Thrust Average Throughput", "+") ] for record in reader: for variable, directionality in measured_variables: print "&&&& PERF {0}_{1}_{2}bit_{3}mib_{4} {5} {6}{7}".format( record["Algorithm"], record["Element Type"],