def _results_to_row_function(function, data, results, verbose=0, limit=None): table_name = data['name'] fields_data = data['fields'] primary_key = data.get('primary_key') table_name_temp = config.TEMP_TABLE_STR + table_name first = True count = 0 output = [] for row in results: if first: fields = get_result_fields(results) f = [field['name'] for field in fields] row_data = dict(zip(f, row)) row_data = function(row_data, verbose=verbose) if first: fields = process_header(fields_data) create_table(table_name_temp, fields, primary_key=primary_key, verbose=verbose) insert_sql = insert_rows(table_name_temp, fields) first = False output.append(row_data) count += 1 if count % config.BATCH_SIZE == 0: run_sql(insert_sql, output) output = [] if verbose: print('{table}: {count:,}'.format( table=table_name, count=count )) if limit and count == limit: break if output: run_sql(insert_sql, output) if verbose: print('{table}: {count:,} rows imported'.format( table=table_name, count=count )) if count: # Add indexes build_indexes(table_name_temp, fields, verbose=verbose) return count
def import_csv( reader, table_name, fields=None, skip_first=False, description=None, verbose=0, limit=None, keep_table=False, importer=None, ): if keep_table and table_name not in table_list(): keep_table = False temp_table = config.TEMP_TABLE_STR + table_name count = 0 t_fields = [] data = [] has_header_row = (fields is None) or skip_first first = True set_first = False for row in reader: skip = False if first: if len(row) == 1 and row[0][:1] == "#": if not description: description = row[0][1:].strip() skip = True else: if fields is None: fields = row t_fields = process_header(fields) t_fns = get_fns(t_fields) if keep_table: old_fields = table_columns(table_name) if fields_match(old_fields, t_fields): truncate_table(table_name, verbose=verbose) temp_table = table_name else: keep_table = False if not keep_table: create_table(temp_table, t_fields, verbose=verbose) f = [field["name"] for field in t_fields if not field.get("missing")] insert_sql = insert_rows(temp_table, t_fields) set_first = True if not ((description or has_header_row) and first): row_data = dict(zip(f, row)) for fn in t_fns: fn_info = t_fns[fn] if fn_info[1]: fn_fields = fn_info[1].split("|") else: fn_fields = [fn] try: row_data[fn] = fn_info[0](*[row_data[x] for x in fn_fields]) except Exception as e: # FIXME log error print(str(e)) print(fn) print(row_data) skip = True if not skip: data.append(row_data) if count % config.BATCH_SIZE == 0 and count: run_sql(insert_sql, data) data = [] if verbose: print("{table}: {count:,}".format(table=table_name, count=count)) if not skip: count += 1 if limit and count == limit: break if set_first: first = False if data: run_sql(insert_sql, data) if verbose: print("{table}: {count:,} rows imported".format(table=table_name, count=count)) # Add indexes if not keep_table: build_indexes(temp_table, t_fields, verbose=verbose) update_summary_table(table_name, description, importer=importer, created=not keep_table)