def main(input_dir, output_dir, entity): cur_file = f"{input_dir}/{entity.lower()}.json" cur_data = read_data(cur_file) labels, lines = process_data(cur_data=cur_data) write_data(labels, lines, output_dir)
def main(input_dir, output_dir, entity): cur_file = f"{input_dir}/{entity.lower()}.json" cur_data = read_data(cur_file) new_data = process_data(cur_data=cur_data) out_file = f"{output_dir}/{entity.lower()}.json" write_data(new_data, out_file)
def main(new_file, cur_dir, out_dir): new_data = read_data(new_file) entity = new_data["entity"] cur_file = f"{cur_dir}/{entity.lower()}.json" cur_data = read_data(cur_file) mutations = process_data(cur_data=cur_data, new_data=new_data) out_file = f"{out_dir}/mutations.json" write_data(mutations, out_file)
def main(mutations_file, data_dir): mutations = read_data(mutations_file) entity = mutations["entity"] cur_file = f"{data_dir}/{entity.lower()}.json" cur_data = read_data(cur_file) new_data = process_data(cur_data=cur_data, mutations=mutations) out_file = cur_file write_data(new_data, out_file)
def main(): # Get the arguments from the command-line except the filename argv = sys.argv[1:] params = get_params(argv) params['data_params'], type_dict = reader.get_data_settings(params['config_file']) header, raw_data = reader.get_data(params) type_selector, converters = reader.validate_data_format(params['data_params'], header) key_fails, failed_rows, surveys, out_keys = data_processor.process_rows(raw_data, header, params, converters) if 'logfile' in params: with open(params['logfile'], 'w') as logfile: now = datetime.datetime.now() logfile.writelines(['Completed: ' + now.strftime("%B %d, %Y") + '\n']) logfile.writelines(['Total of %s rows were not completed' % len(failed_rows) + '\n']) for key in key_fails.keys(): logfile.writelines([str(key_fails[key]) + ' ' + str(key) + ' key(s) could not be read' + '\n']) logfile.writelines(['%s surveys read, %s surveys failed' % (len(raw_data), len(failed_rows)) + '\n']) logfile.writelines(['%s percent success rate' % str( round((len(raw_data) - len(failed_rows)) / len(raw_data) * 100, 2)) + '\n']) out_header = header + out_keys writer.write_data(surveys, out_header, params)
def main(): # Get the arguments from the command-line except the filename argv = sys.argv[1:] params = get_params(argv) print('Program started, input file: %s' % params['filepath']) if 'debug' in params.keys(): if params['debug'] == '1': print('Setting debug level: Debug') logging.getLogger().setLevel(logging.DEBUG) else: print('Setting debug level: Warnings') logging.getLogger().setLevel(logging.WARNING) else: logging.getLogger().setLevel(logging.INFO) params['data_params'], type_dict = reader.get_data_settings(params['config_file']) header, raw_data = reader.get_data(params) type_selector, converters = reader.validate_data_format(params['data_params'], header) key_fails, failed_rows, surveys,quality_assessment, out_keys,meta, raw_surveys = data_processor.process_rows(raw_data, header, params, converters) if 'logfile' in params: if params['logfile'].endswith('txt'): writer.write_log(logfiler.write_txt_log(params, key_fails, raw_data, failed_rows, meta), params['logfile'], 'Log:') elif params['logfile'].endswith('csv'): writer.write_log(logfiler.create_pbi_log(surveys,quality_assessment, meta['attribute_quality'], meta, failed_rows,params), params['logfile'], ['Key', 'Value']) else: logging.CRITICAL('Logfile not written, wrong file extension provided') out_header = surveys[0].keys() if params['outfile'].endswith('.csv'): writer.write_data(surveys, out_header, params,rounding=9,raw_surveys=raw_surveys) if len(quality_assessment) > 0: writer.write_data(quality_assessment, quality_assessment[0].keys(), params, sub_file='group_qual') if len(meta['attribute_quality']) > 0 and 'logfile' in params: writer.write_data(meta['attribute_quality'], meta['attribute_quality'][0].keys(), params, sub_file='attr_qual') else: logging.critical("Your output filename: %s must end with .csv" % params['outfile'])
def main(input_dir, output_file): raw_stadsdelen = read_data(input_dir=input_dir) stadsdelen = process_data(raw_stadsdelen) write_data(stadsdelen, output_file=output_file)