Пример #1
0
    def test_table(self):
        table = f"test-table-{uuid4()}"
        expected_rows = {
            row.name: row
            for row in self._gen_rows(997) if any(row.attributes.values())
        }

        with self.subTest("build table"):
            start_time = time.time()
            tnu_table.put_rows(table, expected_rows.values())
            print("BUILD DURATION", time.time() - start_time)

        with self.subTest("get table"):
            fetched_rows = {
                row.name: row
                for row in tnu_table.list_rows(table)
            }
            for row_name in expected_rows:
                a = expected_rows[row_name]
                b = fetched_rows.get(row_name)
                if a != b:
                    print(a)
                    print(b)
                    print()
            self.assertEqual(expected_rows, fetched_rows)

        with self.subTest("destroy_table"):
            start_time = time.time()
            tnu_table.delete(table)
            print("DESTROY DURATION", time.time() - start_time)
            fetched_rows = [row for row in tnu_table.list_rows(table)]
            self.assertEqual(0, len(fetched_rows))
Пример #2
0
def delete_table(args: argparse.Namespace):
    """
    Get one row
    """
    args.workspace, args.workspace_namespace = Config.resolve(args.workspace, args.workspace_namespace)
    tnu_table.delete(args.table, args.workspace, args.workspace_namespace)
Пример #3
0
 def tearDownClass(cls):
     for table in tnu_table.list_tables():
         if table.startswith("test-table"):
             tnu_table.delete(table)
Пример #4
0
with herzog.Cell("python"):
    import os
    from terra_notebook_utils import table

    workspace = os.environ['WORKSPACE_NAME']
    workspace_namespace = os.environ['GOOGLE_PROJECT']
    workspace_bucket = os.environ['WORKSPACE_BUCKET']

with herzog.Cell("markdown"):
    """
    ## Option A: VCF merge workflow input for DRS URIs
    This is a typical workflow preparation for merging TOPMed VCFs _without_ downloading them to your workspace.
    Results will be placed in your workspace bucket.
    """

table.delete("vcf-merge-input-drs")
with herzog.Cell("python"):
    table_name = "vcf-merge-input-drs"
    table.put_row(table_name, dict(workspace=workspace,
                                   billing_project=workspace_namespace,
                                   inputs=["drs://dg.4503/697f611b-aa8a-4bd7-a80b-946276273833",
                                           "drs://dg.4503/ce212b62-e796-4b32-becb-361f272cead0"],
                                   output=f"{workspace_bucket}/merged/drs_combined_a.vcf.gz"))
    table.put_row(table_name, dict(workspace=workspace,
                                   billing_project=workspace_namespace,
                                   inputs=["drs://dg.4503/93286e47-3d09-47e6-ac87-4c2975ef0c3f",
                                           "drs://dg.4503/aba6b011-2ab4-4739-beb4-c1eeaee60c74"],
                                   output=f"{workspace_bucket}/merged/drs_combined_b.vcf.gz"))

with herzog.Cell("markdown"):
    """
    assert table_name not in {table_name for table_name in table.list_tables()}
    try:
        table.put_rows(table_name, data)
        assert table_name in {table_name for table_name in table.list_tables()}
        for row in table.list_rows(table_name):
            assert row.attributes in data
        trow = table.get_row(table_name, row.name)
        assert trow.attributes == row.attributes
        drs_uri = table.fetch_drs_url(table_name, trow.attributes['file_name'])
        assert trow.attributes['object_id'] == drs_uri
        rows = [row for row in table.list_rows(table_name)]
        table.del_rows(table_name, [rows[0].name, rows[1].name])
        assert len(data) - 2 == len(
            [row for row in table.list_rows(table_name)])
    finally:
        table.delete(table_name)
    assert table_name not in {table_name for table_name in table.list_tables()}

with herzog.Cell("python"):
    """Test workflows list"""
    for s in workflows.list_submissions():
        print(json.dumps(s, indent=2))

with herzog.Cell("python"):
    """Test workflows get submission"""
    submissions = [s for s in workflows.list_submissions()]
    workflows.get_submission(submissions[0]['submissionId'])

with herzog.Cell("python"):
    """Test workflows get workflow"""
    submissions = [s for s in workflows.list_submissions()]
 def tearDownClass(cls):
     tnu_table.delete(cls.table)
Пример #7
0
with herzog.Cell("python"):
    import os
    from terra_notebook_utils import table

    workspace = os.environ['WORKSPACE_NAME']
    workspace_namespace = os.environ['GOOGLE_PROJECT']
    workspace_bucket = os.environ['WORKSPACE_BUCKET']

with herzog.Cell("markdown"):
    """
    ## Option A: VCF subsample workflow input for DRS URIs
    This is a typical workflow preparation for subsampling TOPMed VCFs _without_ downloading them to your workspace.
    Results will be placed in your workspace bucket.
    """

table.delete("vcf-subsample-input-drs")
with herzog.Cell("python"):
    table_name = "vcf-subsample-input-drs"
    table.put_row(
        table_name,
        dict(workspace=workspace,
             billing_project=workspace_namespace,
             input="drs://dg.4503/b2871873-8dcb-4a3e-a926-a17ab4a19f0a",
             output=f"{workspace_bucket}/subsampled/drs_subsampled_a.vcf.gz",
             samples=["NWD999037", "NWD996859"]))
    table.put_row(
        table_name,
        dict(workspace=workspace,
             billing_project=workspace_namespace,
             input="drs://dg.4503/06dc6204-a426-11ea-b7de-179adfdbfdb4",
             output=f"{workspace_bucket}/subsampled/drs_subsampled_b.vcf.gz",
Пример #8
0
      - Convert the wide-formatted dataframe into a Terra data table.
    """

###################################################################################### noqa
# Provide test fixtures for the following cells
import pandas
from uuid import uuid4
from random import choice, randint
from terra_notebook_utils import table

NUMBER_OF_WIDE_ROWS = 20

long_table = "test-long"
wide_table = "test-wide"

table.delete(long_table)
table.delete(wide_table)

lines = list()
for _ in range(NUMBER_OF_WIDE_ROWS):
    sample_id = f"{uuid4()}"
    sample_obj = dict(foo=f"{uuid4()}", entityName=sample_id)
    for fmt in ["cram", "crai", "vcf"]:
        lines.append({
            'pfb:sample': sample_obj,
            'pfb:data_format': fmt,
            'pfb:object_id': f"{uuid4()}",
            'pfb:file_size': randint(1024, 1024**3)
        })
dataframe_to_table("test-long", pandas.DataFrame(lines))
###################################################################################### noqa