Пример #1
0
def test_csv_table():
    ## Maybe not truly a unit test, but here because it doesn't do
    ## network IO to synapse
    data = [["1", "1", "John Coltrane", 1926, 8.65, False],
            ["2", "1", "Miles Davis", 1926, 9.87, False],
            ["3", "1", "Bill Evans", 1929, 7.65, False],
            ["4", "1", "Paul Chambers", 1935, 5.14, False],
            ["5", "1", "Jimmy Cobb", 1929, 5.78, True],
            ["6", "1", "Scott LaFaro", 1936, 4.21, False],
            ["7", "1", "Sonny Rollins", 1930, 8.99, True],
            ["8", "1", "Kenny Burrel", 1931, 4.37, True]]

    filename = None

    cols = []
    cols.append(Column(id='1', name='Name', columnType='STRING'))
    cols.append(Column(id='2', name='Born', columnType='INTEGER'))
    cols.append(Column(id='3', name='Hipness', columnType='DOUBLE'))
    cols.append(Column(id='4', name='Living', columnType='BOOLEAN'))

    schema1 = Schema(id='syn1234',
                     name='Jazz Guys',
                     columns=cols,
                     parent="syn1000001")

    #TODO: use StringIO.StringIO(data) rather than writing files
    try:
        ## create CSV file
        with tempfile.NamedTemporaryFile(delete=False) as temp:
            filename = temp.name

        with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
            writer = csv.writer(temp,
                                quoting=csv.QUOTE_NONNUMERIC,
                                lineterminator=str(os.linesep))
            headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols]
            writer.writerow(headers)
            for row in data:
                writer.writerow(row)

        table = Table(schema1, filename)
        assert isinstance(table, CsvFileTable)

        ## need to set column headers to read a CSV file
        table.setColumnHeaders([
            SelectColumn(name="ROW_ID", columnType="STRING"),
            SelectColumn(name="ROW_VERSION", columnType="STRING")
        ] + [SelectColumn.from_column(col) for col in cols])

        ## test iterator
        for table_row, expected_row in zip(table, data):
            assert table_row == expected_row

        ## test asRowSet
        rowset = table.asRowSet()
        for rowset_row, expected_row in zip(rowset.rows, data):
            assert rowset_row['values'] == expected_row[2:]
            assert rowset_row['rowId'] == expected_row[0]
            assert rowset_row['versionNumber'] == expected_row[1]

        ## test asDataFrame
        try:
            import pandas as pd

            df = table.asDataFrame()
            assert all(df['Name'] == [row[2] for row in data])
            assert all(df['Born'] == [row[3] for row in data])
            assert all(df['Living'] == [row[5] for row in data])
            assert all(df.index == ['%s_%s' % tuple(row[0:2]) for row in data])
            assert df.shape == (8, 4)

        except ImportError as e1:
            sys.stderr.write(
                'Pandas is apparently not installed, skipping asDataFrame portion of test_csv_table.\n\n'
            )

    except Exception as ex1:
        if filename:
            try:
                if os.path.isdir(filename):
                    shutil.rmtree(filename)
                else:
                    os.remove(filename)
            except Exception as ex:
                print(ex)
        raise
def test_csv_table():
    ## Maybe not truly a unit test, but here because it doesn't do
    ## network IO to synapse
    data = [["1", "1", "John Coltrane",  1926, 8.65, False],
            ["2", "1", "Miles Davis",    1926, 9.87, False],
            ["3", "1", "Bill Evans",     1929, 7.65, False],
            ["4", "1", "Paul Chambers",  1935, 5.14, False],
            ["5", "1", "Jimmy Cobb",     1929, 5.78, True],
            ["6", "1", "Scott LaFaro",   1936, 4.21, False],
            ["7", "1", "Sonny Rollins",  1930, 8.99, True],
            ["8", "1", "Kenny Burrel",   1931, 4.37, True]]

    filename = None

    cols = []
    cols.append(Column(id='1', name='Name', columnType='STRING'))
    cols.append(Column(id='2', name='Born', columnType='INTEGER'))
    cols.append(Column(id='3', name='Hipness', columnType='DOUBLE'))
    cols.append(Column(id='4', name='Living', columnType='BOOLEAN'))

    schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001")

    #TODO: use StringIO.StringIO(data) rather than writing files

    try:
        ## create CSV file
        with tempfile.NamedTemporaryFile(delete=False) as temp:
            writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=os.linesep)
            writer.writerow(['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols])
            filename = temp.name
            for row in data:
                writer.writerow(row)

        table = Table(schema1, filename)
        assert isinstance(table, CsvFileTable)

        ## need to set column headers to read a CSV file
        table.setColumnHeaders(
            [SelectColumn(name="ROW_ID", columnType="STRING"),
             SelectColumn(name="ROW_VERSION", columnType="STRING")] +
            [SelectColumn.from_column(col) for col in cols])

        ## test iterator
        # print "\n\nJazz Guys"
        for table_row, expected_row in izip(table, data):
            # print table_row, expected_row
            assert table_row==expected_row

        ## test asRowSet
        rowset = table.asRowSet()
        for rowset_row, expected_row in izip(rowset.rows, data):
            #print rowset_row, expected_row
            assert rowset_row['values']==expected_row[2:]
            assert rowset_row['rowId']==expected_row[0]
            assert rowset_row['versionNumber']==expected_row[1]

        ## test asDataFrame
        try:
            import pandas as pd

            df = table.asDataFrame()
            assert all(df['Name'] == [row[2] for row in data])
            assert all(df['Born'] == [row[3] for row in data])
            assert all(df['Living'] == [row[5] for row in data])
            assert all(df.index == ['%s_%s'%tuple(row[0:2]) for row in data])
            assert df.shape == (8,4)

        except ImportError as e1:
            sys.stderr.write('Pandas is apparently not installed, skipping asDataFrame portion of test_csv_table.\n\n')

    except Exception as ex1:
        if filename:
            try:
                if os.path.isdir(filename):
                    shutil.rmtree(filename)
                else:
                    os.remove(filename)
            except Exception as ex:
                print ex
        raise
def test_csv_table():
    # Maybe not truly a unit test, but here because it doesn't do
    # network IO to synapse
    data = [["1", "1", "John Coltrane",  1926, 8.65, False],
            ["2", "1", "Miles Davis",    1926, 9.87, False],
            ["3", "1", "Bill Evans",     1929, 7.65, False],
            ["4", "1", "Paul Chambers",  1935, 5.14, False],
            ["5", "1", "Jimmy Cobb",     1929, 5.78, True],
            ["6", "1", "Scott LaFaro",   1936, 4.21, False],
            ["7", "1", "Sonny Rollins",  1930, 8.99, True],
            ["8", "1", "Kenny Burrel",   1931, 4.37, True]]

    filename = None

    cols = [Column(id='1', name='Name', columnType='STRING'),
            Column(id='2', name='Born', columnType='INTEGER'),
            Column(id='3', name='Hipness', columnType='DOUBLE'),
            Column(id='4', name='Living', columnType='BOOLEAN')]

    schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001")

    # TODO: use StringIO.StringIO(data) rather than writing files
    try:
        # create CSV file
        with tempfile.NamedTemporaryFile(delete=False) as temp:
            filename = temp.name

        with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
            writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep))
            headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols]
            writer.writerow(headers)
            for row in data:
                writer.writerow(row)

        table = Table(schema1, filename)
        assert_is_instance(table, CsvFileTable)

        # need to set column headers to read a CSV file
        table.setColumnHeaders(
            [SelectColumn(name="ROW_ID", columnType="STRING"),
             SelectColumn(name="ROW_VERSION", columnType="STRING")] +
            [SelectColumn.from_column(col) for col in cols])

        # test iterator
        for table_row, expected_row in zip(table, data):
            assert_equals(table_row, expected_row)

        # test asRowSet
        rowset = table.asRowSet()
        for rowset_row, expected_row in zip(rowset.rows, data):
            assert_equals(rowset_row['values'], expected_row[2:])
            assert_equals(rowset_row['rowId'], expected_row[0])
            assert_equals(rowset_row['versionNumber'], expected_row[1])

        df = table.asDataFrame()
        assert_equals(list(df['Name']), [row[2] for row in data])
        assert_equals(list(df['Born']), [row[3] for row in data])
        assert_equals(list(df['Living']), [row[5] for row in data])
        assert_equals(list(df.index), ['%s_%s' % tuple(row[0:2]) for row in data])
        assert_equals(df.shape, (8, 4))

    except Exception:
        if filename:
            try:
                if os.path.isdir(filename):
                    shutil.rmtree(filename)
                else:
                    os.remove(filename)
            except Exception as ex:
                print(ex)
        raise