def create_local_file(self, award_type, source, agency_code, generate_since): """ Generate complete file from SQL query and S3 bucket deletion files, then zip it locally """ logger.info('Generating CSV file with creations and modifications') # Create file paths and working directory timestamp = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S%f') working_dir = '{}_{}_delta_gen_{}/'.format(settings.CSV_LOCAL_PATH, agency_code, timestamp) if not os.path.exists(working_dir): os.mkdir(working_dir) source_name = '{}_{}_delta'.format( award_type, VALUE_MAPPINGS['transactions']['download_name']) source_path = os.path.join(working_dir, '{}.csv'.format(source_name)) # Create a unique temporary file with the raw query raw_quoted_query = generate_raw_quoted_query( source.row_emitter(None)) # None requests all headers csv_query_annotated = self.apply_annotations_to_sql( raw_quoted_query, source.human_names) (temp_sql_file, temp_sql_file_path) = tempfile.mkstemp(prefix='bd_sql_', dir='/tmp') with open(temp_sql_file_path, 'w') as file: file.write('\\copy ({}) To STDOUT with CSV HEADER'.format( csv_query_annotated)) # Generate the csv with \copy cat_command = subprocess.Popen(['cat', temp_sql_file_path], stdout=subprocess.PIPE) subprocess.check_output([ 'psql', '-o', source_path, os.environ['DOWNLOAD_DATABASE_URL'], '-v', 'ON_ERROR_STOP=1' ], stdin=cat_command.stdout, stderr=subprocess.STDOUT) # Append deleted rows to the end of the file self.add_deletion_records(source_path, working_dir, award_type, agency_code, source, generate_since) if csv_row_count(source_path, has_header=True) > 0: # Split the CSV into multiple files and zip it up zipfile_path = '{}{}_{}_Delta_{}.zip'.format( settings.CSV_LOCAL_PATH, agency_code, award_type, datetime.strftime(date.today(), '%Y%m%d')) logger.info('Creating compressed file: {}'.format( os.path.basename(zipfile_path))) split_and_zip_csvs(zipfile_path, source_path, source_name) else: zipfile_path = None os.close(temp_sql_file) os.remove(temp_sql_file_path) shutil.rmtree(working_dir) return zipfile_path
def generate_temp_query_file(source_query, limit, source, download_job, columns): if limit: source_query = source_query[:limit] csv_query_annotated = apply_annotations_to_sql(generate_raw_quoted_query(source_query), source.columns(columns)) write_to_log(message='Creating PSQL Query: {}'.format(csv_query_annotated), download_job=download_job, is_debug=True) # Create a unique temporary file to hold the raw query, using \copy (temp_sql_file, temp_sql_file_path) = tempfile.mkstemp(prefix='bd_sql_', dir='/tmp') with open(temp_sql_file_path, 'w') as file: file.write('\copy ({}) To STDOUT with CSV HEADER'.format(csv_query_annotated)) return temp_sql_file, temp_sql_file_path