def utm_to_latlong(input_data_file=None, output_data_file=None, log_file=None): """Converts UTM coordinates into latitude/longitude. assumes rows are easting, northing, zone number, either 'N' for northern hemisphere or 'S' for southern hemisphere """ logger = setup_logger(__name__, log_file) # Check required input and output data file names were given. assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' _in = open(input_data_file, 'r') try: _out = open(output_data_file, 'w') try: data = csv.reader(_in) output = csv.writer(_out) for row_ind, row in enumerate(data): east = float(row[0]) north = float(row[1]) zone = int(row[2]) latlong = utm.to_latlon(east, north, zone, northern=('N' == row[3])) logger.info('Changed row {} from: {} to: {}'.format( row_ind, (row[0], row[1]), latlong)) output.writerow(latlong) finally: _out.close() finally: _in.close()
def calc_dec_deg_to_deg_min_sec(missing_value, input_data_file=None, output_data_file=None, log_file=None): # Convert degrees minutes seconds to decimal degrees logger = setup_logger(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Convert degrees minutes seconds to decimal degrees') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') if (float(line[0]) != float(missing_value)): decimal_degree = math.fabs(float(line[0])) sign = float(line[0]) / math.fabs(float(line[0])) degree = math.trunc(decimal_degree) minute = (decimal_degree - degree) * 60. second = (minute - math.trunc(minute)) * 60. minute = math.trunc(minute) degree = sign * degree else: degree = float(missing_value) minute = float(missing_value) second = float(missing_value) new_line.append(degree) new_line.append(minute) new_line.append(second) # output.writerow(new_line) # output.writerow(['{:.0f},{:.0f},{:.4f}'.format(new_line)]) output.writerow(['{:.0f}'.format(x) for x in new_line])
def read_config(self, config_file): self.config_file = config_file with open(self.config_file) as open_config: self.config = yaml.safe_load(open_config) self.logger = setup_logger(__name__, log_file=self.get_log_file(), log_level=self.get_log_level()) self.logger.debug('Loaded configuration file: {}'.format(config_file)) return self.config
def calc_deg_min_sec_to_dec_deg(missing_value, input_data_file=None, output_data_file=None, log_file=None): # Convert degrees minutes seconds to decimal degrees logger = setup_logger(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Convert degrees minutes seconds to decimal degrees') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') if (float(line[0]) != float(missing_value)) and (float( line[1]) != float(missing_value)) and (float( line[2]) != float(missing_value)): sign = float(line[0]) / math.fabs(float(line[0])) value = math.fabs(float( line[0])) + float(line[1]) / 60. + float(line[2]) / 3600. value = value * sign else: value = float(missing_value) new_line.append(value) output.writerow(['{:.10f}'.format(x) for x in new_line])
def file_manager(self, filenames, output_dir, temp_dir, log_file=None, log_level=DEFAULT_LOG_LEVEL): """ filenames: a sequence of paths to data files CURRENT: sends out a sequence of filenames after confirming they exist FID: a sequential numeric identifier for each file LOGFILE: sends out a sequence of log filenames that correspond to a data file """ self.logger = setup_logger(__name__, log_file, log_level) step_files = [] for identifier, name in enumerate(filenames, start=1): name_path = Path(name) output_dir_path = Path(output_dir) temp_dir_path = Path(temp_dir) if name_path.suffix is '': output_filename = name_path.name + '_out.csv' log_filename = name_path.name + '.log' else: output_filename = name_path.name.replace(name_path.suffix, '_out.csv') log_filename = name_path.name.replace(name_path.suffix, '.log') output_path = output_dir_path.joinpath(output_filename) log_path = temp_dir_path.joinpath(log_filename) try: # Open the file. If the file doesn't exist, the error will be # caught. if log_path.exists(): log_path.write_bytes(b'') else: log_path.touch(mode=0o666) step_files.append((name, str(output_path), identifier, str(log_path))) except FileNotFoundError: self.logger.error('The log file {f} does not exist and cannot be created.'.format(f=str(log_path))) return step_files
def read_csv_file(file_name, log_file=None): logger = setup_logger(__name__, log_file) logger.info('Reading file: {}'.format(file_name)) data = [] with open(file_name, newline='') as _from: count = [] data = [] data_reader = csv.reader(_from, quoting=csv.QUOTE_ALL, skipinitialspace=True, quotechar="'") for line in data_reader: new_line = [] for elem in line: new_line.append(cast_to_decimal(elem)) data.append(new_line) count.append(len(line)) try: if len(data) == 0: logger.warn('Data file is empty.') else: column_check(count, logger) except IOError as e: logger.error(e) return data
def math_add_constant(constant, missing_value, input_data_file=None, output_data_file=None, log_file=None): # Adds constant to all values in input_data_file and writes the result to # output_data_file. logger = setup_logger(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values to be added to is required.' assert output_data_file is not None, 'An output CSV file to write new values to is required.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Adding {} to the column'.format(constant)) output = csv.writer(_out) reader = csv.reader(_in, quotechar="'", quoting=csv.QUOTE_ALL) decimal_constant = cast_float_to_decimal(constant) decimal_missing = cast_float_to_decimal(missing_value) for line in reader: new_line = [] for item in line: decimal_item = cast_float_to_decimal(item) if decimal_item != decimal_missing: value = decimal_item + decimal_constant else: value = decimal_missing new_line.append(str(value)) output.writerow(new_line)
def __init__(self, *args, **kwargs): super(ConfigTranslator, self).__init__(*args, **kwargs) self.config_file = None self.config = None if 'log_file' not in kwargs.keys(): kwargs['log_file'] = None self.logger = setup_logger(__name__, kwargs['log_file'])
def reformat_dates_to_gtnp(date_time_format, input_data_file=None, output_data_file=None, log_file=None): """ Reformat the date/times. :param column_file: file containing date/time column :param out_file: CSV filename for reformatted date/times :param in_format: python strptime format string of date/times in column_file """ logger = setup_logger(__name__, log_file) with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: data = csv.reader(_in) output = csv.writer(_out) for line in data: for i, item in enumerate(line): try: date_time = dt.datetime.strptime(item.strip(), date_time_format) quoted_dt = "'{0}'".format( date_time.strftime(gtnp_date_time_format)) line[i] = quoted_dt except ValueError as error: logger.error(error) output.writerow(line)
def calc_add_quad_col(missing_value, input_data_file=None, output_data_file=None, log_file=None): # output = in_column_A + in_column_B logger = setup_logger(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info( 'Add two columns in quadrature (out = sqrt(column_a**2 +column_b**2))' ) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') if (float(line[0]) != float(missing_value)) and (float( line[1]) != float(missing_value)): value = math.sqrt( float(line[0]) * float(line[0]) + float(line[1]) * float(line[1])) else: value = float(missing_value) new_line.append(value) output.writerow(['{:.10f}'.format(x) for x in new_line])
def calc_copy_col(input_data_file=None, output_data_file=None, log_file=None): # output = in_column_A + in_column_B logger = setup_logger(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Copy input column to output column') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: output.writerow(line)
def write_csv_file(output_file, output_data, log_file=None, log_level=DEFAULT_LOG_LEVEL): logger = setup_logger(__name__, log_file=log_file, log_level=log_level) logger.info('Writing data to file: {}'.format(output_file)) output_path = Path(output_file) with output_path.open('w', newline='\n') as _to: for line in output_data: for cnt, elem in enumerate(line): if isinstance(elem, Decimal): _to.write(str(elem)) else: _to.write("'{}'".format(elem)) if cnt < len(line) - 1: _to.write(',') if cnt == len(line) - 1: _to.write('\n') output_path.touch(mode=0o666, exist_ok=True)
def write_csv_file(output_file, output_data, log_file=None): logger = setup_logger(__name__, log_file) logger.info('Writing data to file: {}'.format(output_file)) output_path = Path(output_file) with output_path.open('w', newline='\n') as _to: for line in output_data: for cnt, elem in enumerate(line): if isinstance(elem, Decimal): _to.write(str(elem)) else: _to.write("'{}'".format(elem)) if cnt < len(line) - 1: _to.write(',') if cnt == len(line) - 1: _to.write('\n') output_path.touch(mode=0o666, exist_ok=True)
def widget_template(method_arg_1, method_arg_2, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): if log_file: logger = setup_logger(__name__, log_file=log_file, log_level=log_level) logger.info('I am a widget. Here are my arguments:') logger.info('\tinput_data_file = {}'.format(input_data_file)) logger.info('\toutput_data_file = {}'.format(output_data_file)) logger.info('\tlog_file = {}'.format(log_file)) logger.info('\tlog_level = {}'.format(log_level)) logger.info('\tmethod_arg_1 = {}'.format(method_arg_1)) logger.info('\tmethod_arg_2 = {}'.format(method_arg_2)) else: print('I am a widget. Here are my arguments:') print('\tinput_data_file = {}'.format(input_data_file)) print('\toutput_data_file = {}'.format(output_data_file)) print('\tlog_file = {}'.format(log_file)) print('\tlog_level = {}'.format(log_level)) print('\tmethod_arg_1 = {}'.format(method_arg_1)) print('\tmethod_arg_2 = {}'.format(method_arg_2))
def sort_by_columns(column_list, input_data_file=None, output_data_file=None, log_file=None): """ Takes a list of columns to sort by in ascending order. :param input_data_file: CSV file to sort :param output_data_file: sorted CSV file :param column_list: list of tuples (index, type) describing sort columns """ logger = setup_logger(__name__, log_file) logger.info('Sorting input file by columns:') if isinstance(column_list, str): column_list = tuple_list(column_list) shifted_list = [] for index, ind_type in column_list: index = index - 1 new_tuple = (index, ind_type) logger.info('\t' + str(new_tuple)) shifted_list.append(new_tuple) sorted_writer = csv.writer(open(output_data_file, 'w'), quotechar="'", quoting=csv.QUOTE_NONNUMERIC, lineterminator='\n') header_row = None sorted_data = [] with open(input_data_file, 'r') as csvfile: unsorted_reader = csv.reader(csvfile, delimiter=',', quotechar="'") csv_data = [] ind = 0 for row in unsorted_reader: row = [cast_data_value(col_val.strip()) for col_val in row] if ind > 0: typed_row = create_typed_row(row, shifted_list, logger) csv_data.append(typed_row) else: header_row = row ind += 1 sorted_data = csv_data for index, type in reversed(shifted_list): sorted_data = sorted(sorted_data, key=lambda sort_by: sort_by[index]) sorted_writer.writerow(header_row) for sorted_row in sorted_data: if date_time_index is not None: row_list = list(sorted_row) row_list[date_time_index] = row_list[date_time_index].strftime(gtnp_date_time_format) sorted_row = tuple(row_list) sorted_writer.writerow(sorted_row)
def widget_template(method_arg_1, method_arg_2, input_data_file=None, output_data_file=None, log_file=None): if log_file: logger = setup_logger(__name__, log_file) logger.info('I am a widget. Here are my arguments:') logger.info('\tinput_data_file = {}'.format(input_data_file)) logger.info('\toutput_data_file = {}'.format(output_data_file)) logger.info('\tlog_file = {}'.format(log_file)) logger.info('\tmethod_arg_1 = {}'.format(method_arg_1)) logger.info('\tmethod_arg_2 = {}'.format(method_arg_2)) else: print('I am a widget. Here are my arguments:') print('\tinput_data_file = {}'.format(input_data_file)) print('\toutput_data_file = {}'.format(output_data_file)) print('\tlog_file = {}'.format(log_file)) print('\tmethod_arg_1 = {}'.format(method_arg_1)) print('\tmethod_arg_2 = {}'.format(method_arg_2))
def math_absolute_value(missing_value, input_data_file=None, output_data_file=None, log_file=None): # Takes absolute value of all values in input_data_file and writes result to # output_data_file. logger = setup_logger(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Absolute value of column') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') for item in line: if float(item) != float(missing_value): value = math.fabs(float(item)) else: value = float(missing_value) new_line.append(value) output.writerow(['{:.2f}'.format(x) for x in new_line])
def math_multiply_constant(constant, missing_value, input_data_file=None, output_data_file=None, log_file=None): # multiplies all values in input_data_file by a constant and writes result to # output_data_file. logger = setup_logger(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Multiplying column by {}'.format(constant)) output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') for item in line: if float(item) != float(missing_value): value = float(item) * float(constant) else: value = float(missing_value) new_line.append(value) output.writerow(['{:.2f}'.format(x) for x in new_line])
def reformat_dates_to_gtnp(date_time_format, input_data_file=None, output_data_file=None, log_file=None, log_level=DEFAULT_LOG_LEVEL): """ Reformat the date/times. :param column_file: file containing date/time column :param out_file: CSV filename for reformatted date/times :param in_format: python strptime format string of date/times in column_file """ logger = setup_logger(__name__, log_file, log_level) with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: data = csv.reader(_in) output = csv.writer(_out) for line in data: for i, item in enumerate(line): try: date_time = dt.datetime.strptime(item.strip(), date_time_format) quoted_dt = "'{0}'".format(date_time.strftime(gtnp_date_time_format)) line[i] = quoted_dt except ValueError as error: logger.error(error) output.writerow(line)
def calc_divide_col(missing_value, input_data_file=None, output_data_file=None, log_file=None): # output = in_column_A + in_column_B logger = setup_logger(__name__, log_file) assert input_data_file is not None, 'An input CSV file with columns of values.' assert output_data_file is not None, 'An output CSV file to write new values.' with open(input_data_file, newline='') as _in, \ open(output_data_file, 'w', newline='') as _out: logger.info('Divide two columns (out = column_a / column_b)') output = csv.writer(_out) reader = csv.reader(_in) for line in reader: new_line = array('f') if (float(line[0]) != float(missing_value)) \ and (float(line[1]) != float(missing_value)) and (float(line[1]) != 0.0): value = float(line[0]) / float(line[1]) else: value = float(missing_value) new_line.append(value) output.writerow(['{:.10f}'.format(x) for x in new_line])
def __init__(self, flow_name, log_file=None): # Flow level information and utilities self.flow_name = flow_name if log_file is None: self.log_file = os.path.join(os.getcwd(), self.flow_name.split('.')[0] + '.log') else: self.log_file = log_file self.logger = setup_logger('', self.log_file) self.logger.info('Setup logging into: {}'.format(self.log_file)) self.config_translator = ConfigTranslator() self.config_translator.read_config(self.flow_name) self.widget_factory = WidgetFactory() self.file_manager = None self.variable_mapper = None self.file_reader = None self.input_files = None self.input_manipulations = [] self.file_writer = None self.output_directory = None self.output_manipulations = []
def read_csv_file(file_name, log_file=None, log_level=DEFAULT_LOG_LEVEL): logger = setup_logger(__name__, log_file=log_file, log_level=log_level) logger.info('Reading file: {}'.format(file_name)) data = [] with open(file_name, newline='') as _from: count = [] data = [] data_reader = csv.reader(_from, quoting=csv.QUOTE_ALL, skipinitialspace=True, quotechar="'") for line in data_reader: new_line = [] for elem in line: new_line.append(cast_to_decimal(elem)) data.append(new_line) count.append(len(line)) try: if len(data) == 0: logger.warn('Data file is empty.') else: column_check(count, logger) except IOError as e: logger.error(e) return data
def file_manager(self, filenames, output_dir, temp_dir, log_file=None): """ filenames: a sequence of paths to data files CURRENT: sends out a sequence of filenames after confirming they exist FID: a sequential numeric identifier for each file LOGFILE: sends out a sequence of log filenames that correspond to a data file """ self.logger = setup_logger(__name__, log_file) step_files = [] for identifier, name in enumerate(filenames, start=1): name_path = Path(name) output_dir_path = Path(output_dir) temp_dir_path = Path(temp_dir) if name_path.suffix is '': output_filename = name_path.name + '_out.csv' log_filename = name_path.name + '.log' else: output_filename = name_path.name.replace( name_path.suffix, '_out.csv') log_filename = name_path.name.replace(name_path.suffix, '.log') output_path = output_dir_path.joinpath(output_filename) log_path = temp_dir_path.joinpath(log_filename) try: # Open the file. If the file doesn't exist, the error will be # caught. if log_path.exists(): log_path.write_bytes(b'') else: log_path.touch(mode=0o666) step_files.append( (name, str(output_path), identifier, str(log_path))) except FileNotFoundError: self.logger.error( 'The log file {f} does not exist and cannot be created.'. format(f=str(log_path))) return step_files
def variable_map(self, input_data, map_file, log_file=None): # Columns are separated by whitespace sep = ' ' n_entries = 7 logger = setup_logger(__name__, log_file) logger.info('Running variable mapper.') # in_map = {column name: column index} of the original data file # in_details: {column name: [units, description]} of the original data file # out_map = {column name: column index} of the processed file # out_details: {column name: [units, description]} of the processed file # name_converter: {input column name: output column name} in_map = {} in_details = {} out_map = {} out_details = {} name_converter = {} with open(map_file) as f: # Possible improvement: skip over n "headlines" instead of just 1 firstline = True for line in f: if (firstline): # skips first line firstline = False continue # Divide each line into entries pattern = '{0}+'.format(sep) entries = re.split(pattern, line) if (len(entries) != n_entries and len(entries) != 0): # Check that the number of entries is correct logger.info('Map file: {m}'.format(m=map_file)) logger.info( 'Expected number of columns: {e}'.format(e=n_entries)) logger.info( 'Read number of columns: {r}'.format(r=len(entries))) logger.info('Read entries: ', entries, sep=' ') raise IndexError('File has the wrong number of columns.') else: in_header, operation, out_header, in_index, out_index, \ units, description = self.entries_breakout(entries) # TODO: description and units should be passed around as metadata # Build the name converter name_converter[in_header] = out_header if (in_header and in_index > 0): # If the input exists, store data about it in_map.update({in_header: in_index - 1}) in_details.update( {in_header: [operation, description]}) if (out_header and out_index > 0): # If the output exists, store data about it out_map.update({out_header: out_index - 1}) out_details.update({out_header: [units, description]}) output_data = [] # headline = next(data) # Pulls the first line of the file as headers # Construct the first line of the output file from the given information headline = [''] * len(out_map) for name, index, details in zip(out_map.keys(), out_map.values(), out_details.values()): if (details[0]): # units exist formatstr = '{name} ({unit})' else: formatstr = '{name}' headline[index] = formatstr.format(name=name, unit=details[0]) output_data.append(headline) copies = {} for in_name in in_map.keys(): # Figure out which columns need to be copied if name_converter[in_name] in out_map: # copies is a dictionary of input column index -> output column index copies[in_map[in_name]] = out_map[name_converter[in_name]] firstline = True for line in input_data: # Copy selected columns if (firstline): firstline = False continue outputline = [''] * len(out_map) for _from, _to in copies.items(): outputline[_to] = line[_from] output_data.append(outputline) # Returns: # - the output data. # - a dictionary of column name -> index for the input csv # - a dictionary of column name -> index for the output csv # - a dictionary of data column name -> destination column name result = [ output_data, in_map, out_map, {v: k for k, v in name_converter.items()} ] return result
def setup_logger(self, name, log_file): if name is None: name = self.channel self.logger = setup_logger(name, log_file)
def variable_map(self, input_data, map_file, log_file=None, log_level=DEFAULT_LOG_LEVEL): # reads in a file mapping input columns to output columns # then copies the input data to the output data logger = setup_logger(__name__, log_file, log_level=log_level) logger.info('Running variable mapper.') # in_map = {column name: column index} of the original data file # in_details: {column name: [units, description]} of the original data file # out_map = {column name: column index} of the processed file # out_details: {column name: [units, description]} of the processed file # name_converter: {input column name: output column name} in_map = {} in_details = {} out_map = {} out_details = {} name_converter = {} # read in all the variable mapping information col_number = [] in_name = [] operation = [] out_name = [] in_index = [] out_index = [] units = [] description = [] num_rec = 0 in_column = 0 out_column = 0 with open(map_file, newline='') as _in: # logger.info('\tRead Variable Mapping File') logger.info('\tMap file: {m}'.format(m=map_file)) reader = csv.reader(_in) # Possible improvement: skip over n "headlines" instead of just 1 firstline = True for line in reader: # logger.info('{}'.format(line)) if (firstline): # skips first line firstline = False continue # Divide each line into entries num_rec = num_rec + 1 col_number.append(line[0]) in_name.append(line[1]) operation.append(line[2]) out_name.append(line[3]) in_index.append(line[4]) out_index.append(line[5]) units.append(line[6]) description.append(line[7]) # Create mapping files headline = [] for i in range(num_rec): # logger.info('record: {}'.format(i)) if (in_index[i] != '0'): in_column = in_column + 1 # If the input exists, store data about it in_map.update({in_name[i]: int(in_index[i]) - 1}) in_details.update({in_name[i]: [operation[i], description[i]]}) if (out_index[i] != '0'): out_column = out_column + 1 # If the output exists, store data about it out_map.update({out_name[i]: int(out_index[i]) - 1}) out_details.update({out_name[i]: [units[i], description[i]]}) name_converter[in_name[i]] = out_name[i] text_string = ('{} ({})'.format(out_name[i], units[i])) headline.append(text_string) # logger.info('in col: {} out col: {}'.format(in_column, out_column)) # define output data output_data = [] # append header of output data output_data.append(headline) # Figure out which columns need to be copied num_copies = 0 copy_from = [] copy_to = [] for i in range(num_rec): if (operation[i] == 'copy'): num_copies = num_copies + 1 copy_from.append(int(in_index[i]) - 1) copy_to.append(int(out_index[i]) - 1) # logger.info('from: {} to: {}'.format(copy_from[i],copy_to[i])) logger.info('\tnum_copies: {} '.format(num_copies)) # copy input data to output data firstline = True for line in input_data: # Copy selected columns if (firstline): firstline = False continue outputline = [''] * out_column for i in range(num_copies): outputline[copy_to[i]] = line[copy_from[i]] output_data.append(outputline) # return to DIT # Returns: # - the output data. # - a dictionary of column name -> index for the input csv # - a dictionary of column name -> index for the output csv # - a dictionary of data column name -> destination column name result = [output_data, in_map, out_map, {v: k for k, v in name_converter.items()}] return result