def test_get_staged_file_throws_error_for_multiple_staged_files(tmpdir): tmp_directory = str(tmpdir) staging_subdirectory = paths.STAGING_SUBDIRECTORY identifier = TEST_ID test_one_tsv = 'test1.tsv' test_two_tsv = 'test2.tsv' staged_path_one = (tmpdir.mkdir(staging_subdirectory).mkdir( identifier).join(test_one_tsv)) staged_path_one.write('') staged_path_two = ( tmpdir.join(staging_subdirectory).join(identifier).join(test_two_tsv)) staged_path_two.write('') with pytest.raises(AssertionError): paths.get_staged_file(tmp_directory, identifier)
def load_local_data(output_dir, postgres_conn_id, identifier): tsv_file_name = paths.get_staged_file(output_dir, identifier) sql.load_local_data_to_intermediate_table( postgres_conn_id, tsv_file_name, identifier ) sql.upsert_records_to_image_table(postgres_conn_id, identifier)
def test_get_staged_file_finds_staged_file(tmpdir): tmp_directory = str(tmpdir) staging_subdirectory = paths.STAGING_SUBDIRECTORY identifier = TEST_ID test_tsv = 'test.tsv' staged_path = ( tmpdir.mkdir(staging_subdirectory).mkdir(identifier).join(test_tsv)) staged_path.write('') staged_file = paths.get_staged_file(tmp_directory, identifier) assert staged_file == str(staged_path)
def load_local_data(output_dir, postgres_conn_id, identifier, overwrite=False): tsv_file_name = paths.get_staged_file(output_dir, identifier) ingestion_column.check_and_fix_tsv_file(tsv_file_name) sql.load_local_data_to_intermediate_table( postgres_conn_id, tsv_file_name, identifier ) if overwrite is True: sql.overwrite_records_in_image_table(postgres_conn_id, identifier) else: sql.upsert_records_to_image_table(postgres_conn_id, identifier)
def copy_to_s3(output_dir, bucket, identifier, aws_conn_id): tsv_file_name = paths.get_staged_file(output_dir, identifier) s3.copy_file_to_s3_staging(identifier, tsv_file_name, bucket, aws_conn_id)
def copy_to_s3(output_dir, bucket, identifier, aws_conn_id): tsv_file_name = paths.get_staged_file(output_dir, identifier) ingestion_column.check_and_fix_tsv_file(tsv_file_name) s3.copy_file_to_s3_staging(identifier, tsv_file_name, bucket, aws_conn_id)
def load_data_to_s3(output_dir, identifier, aws_conn_id): tsv_file_name = paths.get_staged_file(output_dir, identifier) s3.copy_file_to_s3_staging(identifier, tsv_file_name, aws_conn_id)