def copy_file(model, file_path=None, **kwargs): table_name = model._meta.db_table with open(file_path, 'r') as file: columns = file.readline().replace('"', '').replace('\n', '') sql = copy_query(table_name, columns) try: copy_insert_from_csv(table_name, file_path, **kwargs) except Exception as e: logger.warning("Database - Bulk Import Error - beginning Batch seeding. Error: {}".format(e)) rows = from_csv_file_to_gen(file_path, kwargs['update']) batch_upsert_from_gen(model, rows, settings.BATCH_SIZE, **kwargs)
def seed_from_csv_diff(original_file_path, new_file_path, model, **kwargs): """ takes new file, filters it down in size, adds to Set() takes old file, adds to Set() saves to temporary file for read to avoid high memory usage Diff Set() = New file Set() - Old file Set() - preserves new records - preserves altered/updated records - removes duplicate, non updated records seeds Diff Set() in batches """ original_diff_set = set() new_diff_set = set() new_file = open(new_file_path, 'r') headers = new_file.readline().replace('\n', '').split(',') new_reader = model.update_set_filter(csv.reader(new_file), headers) original_file = open(original_file_path, 'r') original_reader = csv.reader(original_file) next(original_reader, None) logger.debug(" * Beginning CSV diff process.") for row in new_reader: new_diff_set.add(json.dumps(row)) for row in original_reader: original_diff_set.add(json.dumps(row)) diff = new_diff_set - original_diff_set temp_file_path = os.path.join(settings.MEDIA_TEMP_ROOT, str( 'set_diff' + str(random.randint(1, 10000000))) + '.mock' if settings.TESTING else '.csv') with open(temp_file_path, 'w') as temp_file: writer = csv.writer(temp_file, delimiter=',') writer.writerow(headers) for row in diff: writer.writerow(json.loads(row)) diff_gen = from_csv_file_to_gen(temp_file_path, kwargs['update']) logger.debug(" * Csv diff completed, beginning batch upsert.") batch_upsert_from_gen(model, diff_gen, settings.BATCH_SIZE, **kwargs) if os.path.isfile(temp_file_path): os.remove(temp_file_path) if 'callback' in kwargs and kwargs['callback']: kwargs['callback']()
def transform_self(self, file_path, update=None): return self.pre_validation_filters( from_csv_file_to_gen(file_path, update))
def transform_self(self, file_path, update=None): return self.pre_validation_filters(with_bbl(from_csv_file_to_gen(file_path, update), borough='boro'))
def transform_self(self, file_path, update=None): return self.pre_validation_filters( with_bbl(from_csv_file_to_gen(file_path, update), allow_blank=True))
def transform_self(self, file_path, update=None): return self.pre_validation_filters( from_csv_file_to_gen(file_path, update, self.clean_null_bytes_headers))
def transform_self(self, file_path, update=None): return with_bbl(from_csv_file_to_gen(file_path, update))