Exemplo n.º 1
0
def test_build_table_download_file_handle_list__repeated_file_handles():
    syn = synapseclient.client.Synapse(debug=True, skip_checks=True)

    #patch the cache so we don't look there in case FileHandle ids actually exist there
    patch.object(syn.cache, "get", return_value=None)

    cols = [
        Column(name='Name', columnType='STRING', maximumSize=50),
        Column(name='filehandle', columnType='FILEHANDLEID')
    ]

    schema = Schema(name='FileHandleTest', columns=cols, parent='syn420')

    #using some large filehandle numbers so i don
    data = [["ayy lmao", 5318008], ["large numberino", 0x5f3759df],
            ["repeated file handle", 5318008],
            ["repeated file handle also", 0x5f3759df]]

    ## need columns to do cast_values w/o storing
    table = Table(schema,
                  data,
                  headers=[SelectColumn.from_column(col) for col in cols])

    file_handle_associations, file_handle_to_path_map = syn._build_table_download_file_handle_list(
        table, ['filehandle'])

    #verify only 2 file_handles are added (repeats were ignored)
    assert_equals(2, len(file_handle_associations))
    assert_equals(0,
                  len(file_handle_to_path_map))  #might as well check anyways
def test_list_of_rows_table():
    data = [["John Coltrane",  1926, 8.65, False],
            ["Miles Davis",    1926, 9.87, False],
            ["Bill Evans",     1929, 7.65, False],
            ["Paul Chambers",  1935, 5.14, False],
            ["Jimmy Cobb",     1929, 5.78, True],
            ["Scott LaFaro",   1936, 4.21, False],
            ["Sonny Rollins",  1930, 8.99, True],
            ["Kenny Burrel",   1931, 4.37, True]]

    cols = [Column(id='1', name='Name', columnType='STRING'),
            Column(id='2', name='Born', columnType='INTEGER'),
            Column(id='3', name='Hipness', columnType='DOUBLE'),
            Column(id='4', name='Living', columnType='BOOLEAN')]

    schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001")

    # need columns to do cast_values w/o storing
    table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols])

    for table_row, expected_row in zip(table, data):
        assert table_row == expected_row

    rowset = table.asRowSet()
    for rowset_row, expected_row in zip(rowset.rows, data):
        assert rowset_row['values'] == expected_row

    table.columns = cols

    df = table.asDataFrame()
    assert list(df['Name']) == [r[0] for r in data]
Exemplo n.º 3
0
def test_pandas_to_table():
    pd = _try_import_pandas('test_pandas_to_table')

    df = pd.DataFrame(dict(a=[1, 2, 3], b=["c", "d", "e"]))
    schema = Schema(name="Baz",
                    parent="syn12345",
                    columns=as_table_columns(df))
    print("\n", df, "\n\n")

    ## A dataframe with no row id and version
    table = Table(schema, df)

    for i, row in enumerate(table):
        print(row)
        assert row[0] == (i + 1)
        assert row[1] == ["c", "d", "e"][i]

    assert len(table) == 3

    ## If includeRowIdAndRowVersion=True, include empty row id an versions
    ## ROW_ID,ROW_VERSION,a,b
    ## ,,1,c
    ## ,,2,d
    ## ,,3,e
    table = Table(schema, df, includeRowIdAndRowVersion=True)
    for i, row in enumerate(table):
        print(row)
        assert row[0] is None
        assert row[1] is None
        assert row[2] == (i + 1)

    ## A dataframe with no row id and version
    df = pd.DataFrame(index=["1_7", "2_7", "3_8"],
                      data=dict(a=[100, 200, 300], b=["c", "d", "e"]))
    print("\n", df, "\n\n")

    table = Table(schema, df)
    for i, row in enumerate(table):
        print(row)
        assert row[0] == ["1", "2", "3"][i]
        assert row[1] == ["7", "7", "8"][i]
        assert row[2] == (i + 1) * 100
        assert row[3] == ["c", "d", "e"][i]

    ## A dataframe with row id and version in columns
    df = pd.DataFrame(
        dict(ROW_ID=["0", "1", "2"],
             ROW_VERSION=["8", "9", "9"],
             a=[100, 200, 300],
             b=["c", "d", "e"]))
    print("\n", df, "\n\n")

    table = Table(schema, df)
    for i, row in enumerate(table):
        print(row)
        assert row[0] == ["0", "1", "2"][i]
        assert row[1] == ["8", "9", "9"][i]
        assert row[2] == (i + 1) * 100
        assert row[3] == ["c", "d", "e"][i]
Exemplo n.º 4
0
def test_RowSetTable_len():
    schema = Schema(parentId="syn123",
                    id='syn456',
                    columns=[Column(name='column_name', id='123')])
    rowset = RowSet(schema=schema,
                    rows=[Row(['first row']),
                          Row(['second row'])])
    row_set_table = RowSetTable(schema, rowset)
    assert_equals(2, len(row_set_table))
def test_dict_to_table():
    d = dict(a=[1, 2, 3], b=["c", "d", "e"])
    df = pd.DataFrame(d)
    schema = Schema(name="Baz", parent="syn12345", columns=as_table_columns(df))

    with patch.object(CsvFileTable, "from_data_frame") as mocked_from_data_frame:
        Table(schema, d)

    # call_agrs is a tuple with values and name
    agrs_list = mocked_from_data_frame.call_args[0]
    # getting the second argument
    df_agr = agrs_list[1]
    assert df_agr.equals(df)
Exemplo n.º 6
0
def test_pandas_to_table():
    df = pd.DataFrame(dict(a=[1, 2, 3], b=["c", "d", "e"]))
    schema = Schema(name="Baz",
                    parent="syn12345",
                    columns=as_table_columns(df))

    # A dataframe with no row id and version
    table = Table(schema, df)

    for i, row in enumerate(table):
        assert_equals(row[0], (i + 1))
        assert_equals(row[1], ["c", "d", "e"][i])

        assert_equals(len(table), 3)

    # If includeRowIdAndRowVersion=True, include empty row id an versions
    # ROW_ID,ROW_VERSION,a,b
    # ,,1,c
    # ,,2,d
    # ,,3,e
    table = Table(schema, df, includeRowIdAndRowVersion=True)
    for i, row in enumerate(table):
        assert_is_none(row[0])
        assert_is_none(row[1])
        assert_equals(row[2], (i + 1))

    # A dataframe with no row id and version
    df = pd.DataFrame(index=["1_7", "2_7", "3_8"],
                      data=dict(a=[100, 200, 300], b=["c", "d", "e"]))

    table = Table(schema, df)
    for i, row in enumerate(table):
        assert_equals(row[0], ["1", "2", "3"][i])
        assert_equals(row[1], ["7", "7", "8"][i])
        assert_equals(row[2], (i + 1) * 100)
        assert_equals(row[3], ["c", "d", "e"][i])

    # A dataframe with row id and version in columns
    df = pd.DataFrame(
        dict(ROW_ID=["0", "1", "2"],
             ROW_VERSION=["8", "9", "9"],
             a=[100, 200, 300],
             b=["c", "d", "e"]))

    table = Table(schema, df)
    for i, row in enumerate(table):
        assert_equals(row[0], ["0", "1", "2"][i])
        assert_equals(row[1], ["8", "9", "9"][i])
        assert_equals(row[2], (i + 1) * 100)
        assert_equals(row[3], ["c", "d", "e"][i])
Exemplo n.º 7
0
def test_list_of_rows_table():
    data = [["John Coltrane", 1926, 8.65, False],
            ["Miles Davis", 1926, 9.87, False],
            ["Bill Evans", 1929, 7.65, False],
            ["Paul Chambers", 1935, 5.14, False],
            ["Jimmy Cobb", 1929, 5.78, True],
            ["Scott LaFaro", 1936, 4.21, False],
            ["Sonny Rollins", 1930, 8.99, True],
            ["Kenny Burrel", 1931, 4.37, True]]

    cols = []
    cols.append(Column(id='1', name='Name', columnType='STRING'))
    cols.append(Column(id='2', name='Born', columnType='INTEGER'))
    cols.append(Column(id='3', name='Hipness', columnType='DOUBLE'))
    cols.append(Column(id='4', name='Living', columnType='BOOLEAN'))

    schema1 = Schema(name='Jazz Guys',
                     columns=cols,
                     id="syn1000002",
                     parent="syn1000001")

    ## need columns to do cast_values w/o storing
    table = Table(schema1,
                  data,
                  headers=[SelectColumn.from_column(col) for col in cols])

    for table_row, expected_row in zip(table, data):
        assert table_row == expected_row

    rowset = table.asRowSet()
    for rowset_row, expected_row in zip(rowset.rows, data):
        assert rowset_row['values'] == expected_row

    table.columns = cols

    ## test asDataFrame
    try:
        import pandas as pd

        df = table.asDataFrame()
        assert all(df['Name'] == [r[0] for r in data])

    except ImportError as e1:
        sys.stderr.write(
            'Pandas is apparently not installed, skipping asDataFrame portion of test_list_of_rows_table.\n\n'
        )
def test_RowSetTable():
    row_set_json = {
        'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
        'headers': [
            {'columnType': 'STRING', 'id': '353', 'name': 'name'},
            {'columnType': 'DOUBLE', 'id': '355', 'name': 'x'},
            {'columnType': 'DOUBLE', 'id': '3020', 'name': 'y'},
            {'columnType': 'INTEGER', 'id': '891', 'name': 'n'}],
        'rows': [{
            'rowId': 5,
            'values': ['foo', '1.23', '2.2', '101'],
            'versionNumber': 3},
            {'rowId': 6,
             'values': ['bar', '1.34', '2.4', '101'],
             'versionNumber': 3},
            {'rowId': 7,
             'values': ['foo', '1.23', '2.2', '101'],
             'versionNumber': 4},
            {'rowId': 8,
             'values': ['qux', '1.23', '2.2', '102'],
             'versionNumber': 3}],
        'tableId': 'syn2976298'}

    row_set = RowSet.from_json(row_set_json)

    assert row_set.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
    assert row_set.tableId == 'syn2976298'
    assert len(row_set.headers) == 4
    assert len(row_set.rows) == 4

    schema = Schema(id="syn2976298", name="Bogus Schema", columns=[353, 355, 3020, 891], parent="syn1000001")

    table = Table(schema, row_set)

    assert table.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
    assert table.tableId == 'syn2976298'
    assert len(table.headers) == 4
    assert len(table.asRowSet().rows) == 4

    df = table.asDataFrame()
    assert df.shape == (4, 4)
    assert list(df['name']) == ['foo', 'bar', 'foo', 'qux']
def test_schema():
    schema = Schema(name='My Table', parent="syn1000001")

    assert not schema.has_columns()

    schema.addColumn(Column(id='1', name='Name', columnType='STRING'))

    assert schema.has_columns()
    assert schema.properties.columnIds == ['1']

    schema.removeColumn('1')
    assert not schema.has_columns()
    assert schema.properties.columnIds == []

    schema = Schema(name='Another Table', parent="syn1000001")

    schema.addColumns([
        Column(name='Name', columnType='STRING'),
        Column(name='Born', columnType='INTEGER'),
        Column(name='Hipness', columnType='DOUBLE'),
        Column(name='Living', columnType='BOOLEAN')])
    assert schema.has_columns()
    assert len(schema.columns_to_store) == 4
    assert Column(name='Name', columnType='STRING') in schema.columns_to_store
    assert Column(name='Born', columnType='INTEGER') in schema.columns_to_store
    assert Column(name='Hipness', columnType='DOUBLE') in schema.columns_to_store
    assert Column(name='Living', columnType='BOOLEAN') in schema.columns_to_store

    schema.removeColumn(Column(name='Living', columnType='BOOLEAN'))
    assert schema.has_columns()
    assert len(schema.columns_to_store) == 3
    assert Column(name='Living', columnType='BOOLEAN') not in schema.columns_to_store
    assert Column(name='Hipness', columnType='DOUBLE') in schema.columns_to_store
Exemplo n.º 10
0
def test_schema():
    schema = Schema(name='My Table', parent="syn1000001")

    assert not schema.has_columns()

    schema.addColumn(Column(id='1', name='Name', columnType='STRING'))

    assert schema.has_columns()
    assert schema.properties.columnIds == ['1']

    schema.removeColumn('1')
    assert not schema.has_columns()
    assert schema.properties.columnIds == []

    schema = Schema(name='Another Table', parent="syn1000001")

    schema.addColumns([
        Column(name='Name', columnType='STRING'),
        Column(name='Born', columnType='INTEGER'),
        Column(name='Hipness', columnType='DOUBLE'),
        Column(name='Living', columnType='BOOLEAN')
    ])
    assert schema.has_columns()
    assert len(schema.columns_to_store) == 4
    assert Column(name='Name', columnType='STRING') in schema.columns_to_store
    assert Column(name='Born', columnType='INTEGER') in schema.columns_to_store
    assert Column(name='Hipness',
                  columnType='DOUBLE') in schema.columns_to_store
    assert Column(name='Living',
                  columnType='BOOLEAN') in schema.columns_to_store

    schema.removeColumn(Column(name='Living', columnType='BOOLEAN'))
    assert schema.has_columns()
    assert len(schema.columns_to_store) == 3
    assert Column(name='Living',
                  columnType='BOOLEAN') not in schema.columns_to_store
    assert Column(name='Hipness',
                  columnType='DOUBLE') in schema.columns_to_store
Exemplo n.º 11
0
def test_Schema__max_column_check():
    table = Schema(name="someName", parent="idk")
    table.addColumns(
        Column(name="colNum%s" % i, columnType="STRING")
        for i in range(synapseclient.table.MAX_NUM_TABLE_COLUMNS + 1))
    assert_raises(ValueError, syn.store, table)
Exemplo n.º 12
0
def test_csv_table():
    ## Maybe not truly a unit test, but here because it doesn't do
    ## network IO to synapse
    data = [["1", "1", "John Coltrane", 1926, 8.65, False],
            ["2", "1", "Miles Davis", 1926, 9.87, False],
            ["3", "1", "Bill Evans", 1929, 7.65, False],
            ["4", "1", "Paul Chambers", 1935, 5.14, False],
            ["5", "1", "Jimmy Cobb", 1929, 5.78, True],
            ["6", "1", "Scott LaFaro", 1936, 4.21, False],
            ["7", "1", "Sonny Rollins", 1930, 8.99, True],
            ["8", "1", "Kenny Burrel", 1931, 4.37, True]]

    filename = None

    cols = []
    cols.append(Column(id='1', name='Name', columnType='STRING'))
    cols.append(Column(id='2', name='Born', columnType='INTEGER'))
    cols.append(Column(id='3', name='Hipness', columnType='DOUBLE'))
    cols.append(Column(id='4', name='Living', columnType='BOOLEAN'))

    schema1 = Schema(id='syn1234',
                     name='Jazz Guys',
                     columns=cols,
                     parent="syn1000001")

    #TODO: use StringIO.StringIO(data) rather than writing files
    try:
        ## create CSV file
        with tempfile.NamedTemporaryFile(delete=False) as temp:
            filename = temp.name

        with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
            writer = csv.writer(temp,
                                quoting=csv.QUOTE_NONNUMERIC,
                                lineterminator=str(os.linesep))
            headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols]
            writer.writerow(headers)
            for row in data:
                writer.writerow(row)

        table = Table(schema1, filename)
        assert isinstance(table, CsvFileTable)

        ## need to set column headers to read a CSV file
        table.setColumnHeaders([
            SelectColumn(name="ROW_ID", columnType="STRING"),
            SelectColumn(name="ROW_VERSION", columnType="STRING")
        ] + [SelectColumn.from_column(col) for col in cols])

        ## test iterator
        for table_row, expected_row in zip(table, data):
            assert table_row == expected_row

        ## test asRowSet
        rowset = table.asRowSet()
        for rowset_row, expected_row in zip(rowset.rows, data):
            assert rowset_row['values'] == expected_row[2:]
            assert rowset_row['rowId'] == expected_row[0]
            assert rowset_row['versionNumber'] == expected_row[1]

        ## test asDataFrame
        try:
            import pandas as pd

            df = table.asDataFrame()
            assert all(df['Name'] == [row[2] for row in data])
            assert all(df['Born'] == [row[3] for row in data])
            assert all(df['Living'] == [row[5] for row in data])
            assert all(df.index == ['%s_%s' % tuple(row[0:2]) for row in data])
            assert df.shape == (8, 4)

        except ImportError as e1:
            sys.stderr.write(
                'Pandas is apparently not installed, skipping asDataFrame portion of test_csv_table.\n\n'
            )

    except Exception as ex1:
        if filename:
            try:
                if os.path.isdir(filename):
                    shutil.rmtree(filename)
                else:
                    os.remove(filename)
            except Exception as ex:
                print(ex)
        raise
Exemplo n.º 13
0
def test_RowSetTable():
    row_set_json = {
        'etag':
        'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
        'headers': [{
            'columnType': 'STRING',
            'id': '353',
            'name': 'name'
        }, {
            'columnType': 'DOUBLE',
            'id': '355',
            'name': 'x'
        }, {
            'columnType': 'DOUBLE',
            'id': '3020',
            'name': 'y'
        }, {
            'columnType': 'INTEGER',
            'id': '891',
            'name': 'n'
        }],
        'rows': [{
            'rowId': 5,
            'values': ['foo', '1.23', '2.2', '101'],
            'versionNumber': 3
        }, {
            'rowId': 6,
            'values': ['bar', '1.34', '2.4', '101'],
            'versionNumber': 3
        }, {
            'rowId': 7,
            'values': ['foo', '1.23', '2.2', '101'],
            'versionNumber': 4
        }, {
            'rowId': 8,
            'values': ['qux', '1.23', '2.2', '102'],
            'versionNumber': 3
        }],
        'tableId':
        'syn2976298'
    }

    row_set = RowSet.from_json(row_set_json)

    assert row_set.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
    assert row_set.tableId == 'syn2976298'
    assert len(row_set.headers) == 4
    assert len(row_set.rows) == 4

    schema = Schema(id="syn2976298",
                    name="Bogus Schema",
                    columns=[353, 355, 3020, 891],
                    parent="syn1000001")

    table = Table(schema, row_set)

    assert table.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
    assert table.tableId == 'syn2976298'
    assert len(table.headers) == 4
    assert len(table.asRowSet().rows) == 4

    try:
        import pandas as pd

        df = table.asDataFrame()
        assert df.shape == (4, 4)
        assert all(df['name'] == ['foo', 'bar', 'foo', 'qux'])

    except ImportError as e1:
        sys.stderr.write(
            'Pandas is apparently not installed, skipping part of test_RowSetTable.\n\n'
        )
def test_schema():
    schema = Schema(name='My Table', parent="syn1000001")

    assert_false(schema.has_columns())

    schema.addColumn(Column(id='1', name='Name', columnType='STRING'))

    assert_true(schema.has_columns())
    assert_equals(schema.properties.columnIds, ['1'])

    schema.removeColumn('1')
    assert_false(schema.has_columns())
    assert_equals(schema.properties.columnIds, [])

    schema = Schema(name='Another Table', parent="syn1000001")

    schema.addColumns([
        Column(name='Name', columnType='STRING'),
        Column(name='Born', columnType='INTEGER'),
        Column(name='Hipness', columnType='DOUBLE'),
        Column(name='Living', columnType='BOOLEAN')])
    assert_true(schema.has_columns())
    assert_equals(len(schema.columns_to_store), 4)
    assert_in(Column(name='Name', columnType='STRING'), schema.columns_to_store)
    assert_in(Column(name='Born', columnType='INTEGER'), schema.columns_to_store)
    assert_in(Column(name='Hipness', columnType='DOUBLE'), schema.columns_to_store)
    assert_in(Column(name='Living', columnType='BOOLEAN'), schema.columns_to_store)

    schema.removeColumn(Column(name='Living', columnType='BOOLEAN'))
    assert_true(schema.has_columns())
    assert_equals(len(schema.columns_to_store), 3)
    assert_not_in(Column(name='Living', columnType='BOOLEAN'), schema.columns_to_store)
    assert_in(Column(name='Hipness', columnType='DOUBLE'), schema.columns_to_store)
def test_Schema__max_column_check():
    table = Schema(name="someName", parent="idk")
    table.addColumns(Column(name="colNum%s" % i, columnType="STRING")
                     for i in range(synapseclient.table.MAX_NUM_TABLE_COLUMNS + 1))
    assert_raises(ValueError, syn.store, table)
Exemplo n.º 16
0
def test_schema():
    schema = Schema(name='My Table', parent="syn1000001")

    assert_false(schema.has_columns())

    schema.addColumn(Column(id='1', name='Name', columnType='STRING'))

    assert_true(schema.has_columns())
    assert_equals(schema.properties.columnIds, ['1'])

    schema.removeColumn('1')
    assert_false(schema.has_columns())
    assert_equals(schema.properties.columnIds, [])

    schema = Schema(name='Another Table', parent="syn1000001")

    schema.addColumns([
        Column(name='Name', columnType='STRING'),
        Column(name='Born', columnType='INTEGER'),
        Column(name='Hipness', columnType='DOUBLE'),
        Column(name='Living', columnType='BOOLEAN')
    ])
    assert_true(schema.has_columns())
    assert_equals(len(schema.columns_to_store), 4)
    assert_in(Column(name='Name', columnType='STRING'),
              schema.columns_to_store)
    assert_in(Column(name='Born', columnType='INTEGER'),
              schema.columns_to_store)
    assert_in(Column(name='Hipness', columnType='DOUBLE'),
              schema.columns_to_store)
    assert_in(Column(name='Living', columnType='BOOLEAN'),
              schema.columns_to_store)

    schema.removeColumn(Column(name='Living', columnType='BOOLEAN'))
    assert_true(schema.has_columns())
    assert_equals(len(schema.columns_to_store), 3)
    assert_not_in(Column(name='Living', columnType='BOOLEAN'),
                  schema.columns_to_store)
    assert_in(Column(name='Hipness', columnType='DOUBLE'),
              schema.columns_to_store)