Exemplo n.º 1
0
def test_get_staged_file_throws_error_for_multiple_staged_files(tmpdir):
    tmp_directory = str(tmpdir)
    staging_subdirectory = paths.STAGING_SUBDIRECTORY
    identifier = TEST_ID
    test_one_tsv = 'test1.tsv'
    test_two_tsv = 'test2.tsv'
    staged_path_one = (tmpdir.mkdir(staging_subdirectory).mkdir(
        identifier).join(test_one_tsv))
    staged_path_one.write('')
    staged_path_two = (
        tmpdir.join(staging_subdirectory).join(identifier).join(test_two_tsv))
    staged_path_two.write('')
    with pytest.raises(AssertionError):
        paths.get_staged_file(tmp_directory, identifier)
Exemplo n.º 2
0
def load_local_data(output_dir, postgres_conn_id, identifier):
    tsv_file_name = paths.get_staged_file(output_dir, identifier)
    sql.load_local_data_to_intermediate_table(
        postgres_conn_id,
        tsv_file_name,
        identifier
    )
    sql.upsert_records_to_image_table(postgres_conn_id, identifier)
Exemplo n.º 3
0
def test_get_staged_file_finds_staged_file(tmpdir):
    tmp_directory = str(tmpdir)
    staging_subdirectory = paths.STAGING_SUBDIRECTORY
    identifier = TEST_ID
    test_tsv = 'test.tsv'
    staged_path = (
        tmpdir.mkdir(staging_subdirectory).mkdir(identifier).join(test_tsv))
    staged_path.write('')
    staged_file = paths.get_staged_file(tmp_directory, identifier)

    assert staged_file == str(staged_path)
Exemplo n.º 4
0
def load_local_data(output_dir, postgres_conn_id, identifier, overwrite=False):
    tsv_file_name = paths.get_staged_file(output_dir, identifier)
    ingestion_column.check_and_fix_tsv_file(tsv_file_name)
    sql.load_local_data_to_intermediate_table(
        postgres_conn_id,
        tsv_file_name,
        identifier
    )
    if overwrite is True:
        sql.overwrite_records_in_image_table(postgres_conn_id, identifier)
    else:
        sql.upsert_records_to_image_table(postgres_conn_id, identifier)
Exemplo n.º 5
0
def copy_to_s3(output_dir, bucket, identifier, aws_conn_id):
    tsv_file_name = paths.get_staged_file(output_dir, identifier)
    s3.copy_file_to_s3_staging(identifier, tsv_file_name, bucket, aws_conn_id)
Exemplo n.º 6
0
def copy_to_s3(output_dir, bucket, identifier, aws_conn_id):
    tsv_file_name = paths.get_staged_file(output_dir, identifier)
    ingestion_column.check_and_fix_tsv_file(tsv_file_name)
    s3.copy_file_to_s3_staging(identifier, tsv_file_name, bucket, aws_conn_id)
Exemplo n.º 7
0
def load_data_to_s3(output_dir, identifier, aws_conn_id):
    tsv_file_name = paths.get_staged_file(output_dir, identifier)
    s3.copy_file_to_s3_staging(identifier, tsv_file_name, aws_conn_id)