示例#1
0
 def test_never_timeout(self):
     session = Session()
     for _ in range(2):
         token1 = session.session_token
         sleep(300)
         token2 = session.session_token
         self.assertNotEqual(token1, token2)
     session.close()
def run_script(dh_session: Session) -> Table:
    server_script = '''
t2 = t.where("VendorID > 0")\
        .sort("VendorID", "fare_amount")\
        .headBy(5, "VendorID")
'''
    dh_session.run_script(server_script)
    return dh_session.open_table("t2")
示例#3
0
def demo_asof_join(dh_session: Session):
    left_table = dh_session.time_table(period=100000).update(
        formulas=["Col1=i"])
    right_table = dh_session.time_table(period=200000).update(
        formulas=["Col1=i"])
    time.sleep(2)
    return left_table.aj(right_table,
                         on=["Timestamp"],
                         joins=["Timestamp2 = Timestamp", "Col2 = Col1"])
def demo_asof_join(dh_session: Session):
    left_table = dh_session.time_table(period=100000).update(
        column_specs=["Col1=i"])
    right_table = dh_session.time_table(period=200000).update(
        column_specs=["Col1=i"])
    time.sleep(2)
    return left_table.aj(
        right_table,
        keys=["Timestamp"],
        columns_to_add=["Timestamp2 = Timestamp", "Col2 = Col1"])
示例#5
0
    def test_merge_tables(self):
        session = Session()
        pa_table = csv.read_csv(self.csv_file)
        table1 = session.import_table(pa_table)
        table2 = table1.group_by(by=["a", "c"]).ungroup(cols=["b", "d", "e"])
        table3 = table1.where(["a % 2 > 0 && b % 3 == 1"])
        result_table = session.merge_tables(tables=[table1, table2, table3],
                                            order_by="a")

        self.assertTrue(result_table.size > table1.size)
        self.assertTrue(result_table.size > table2.size)
        self.assertTrue(result_table.size > table3.size)
    def test_persistent_tables(self):
        with Session() as session1:
            session1 = Session()
            session1.run_script('t = None')
            t = session1.empty_table(10)
            session1.bind_table('t', t)


        with Session(sync_fields=SYNC_ONCE) as session2:
            self.assertIn('t', session2.tables)
示例#7
0
def demo_query(dh_session: Session, taxi_data_table: Table) -> Table:
    # create a query and execute it on the DH server
    query = (dh_session.query(taxi_data_table)
             .where(filters=["VendorID > 0"])
             .sort(order_by=["VendorID", "fare_amount"])
             .tail_by(num_rows=5, by=["VendorID"]))
    return query.exec()
def import_taxi_records(dh_session: Session) -> Table:

    # download the CSV data and read it into a pyarrow table and prepare it for uploading into DH
    csv_file_name = download_csv(
        url=
        "https://nyc-tlc.s3.amazonaws.com/trip+data/yellow_tripdata_2020-12.csv"
    )
    pa_table = csv.read_csv(csv_file_name)

    # drop unwanted columns
    unwanted_columns = [
        "tpep_pickup_datetime", "tpep_dropoff_datetime", "RatecodeID",
        "store_and_fwd_flag", "PULocationID", "DOLocationID"
    ]
    pa_table = pa_table.drop(unwanted_columns)

    # drop any column with a unsupported data type
    for column, column_name in zip(pa_table.columns, pa_table.column_names):
        if not is_deephaven_compatible(column.type):
            print(
                f"drop column: {column_name} because of unsupported data type {column.type}"
            )
            pa_table = pa_table.drop([column_name])

    # upload the pyarrow table to the Deephaven server
    return dh_session.import_table(pa_table)
示例#9
0
def main():
    with Session(host="localhost", port=10000) as dh_session:
        taxi_data_table = import_taxi_records(dh_session)
        bottom_5_fares_table = demo_chained_table_ops(taxi_data_table)

        # download the table to the client in the form of pyarrow table and convert it into a Pandas DataFrame
        snapshot_data = bottom_5_fares_table.snapshot()
        df = snapshot_data.to_pandas()

        pd.set_option("max_columns", 20)
        print(df)
    def test_shared_tables(self):
        session1 = Session(sync_fields=SYNC_REPEATED)
        session1.run_script('t = None')

        session2 = Session()
        t = session2.empty_table(10)
        session2.bind_table('t', t)

        @timeout_decorator.timeout(seconds=1)
        def wait_for_table():
            while 't' not in session1.tables:
                pass

        try:
            wait_for_table()
        except timeout_decorator.TimeoutError:
            self.fail('table did not get synced to session1')
示例#11
0
class BaseTestCase(unittest.TestCase):
    csv_file = 'test.csv'

    @classmethod
    def setUpClass(cls) -> None:
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        if not os.path.exists(BaseTestCase.csv_file):
            with open(BaseTestCase.csv_file, 'w'):
                pass
        make_random_csv(5, 1000, output_file=BaseTestCase.csv_file)

    @classmethod
    def tearDownClass(cls) -> None:
        if os.path.exists(BaseTestCase.csv_file):
            os.remove(BaseTestCase.csv_file)

    def setUp(self) -> None:
        self.session = Session()

    def tearDown(self) -> None:
        self.session.close()
def main():
    with Session(host="localhost", port=10000) as dh_session:
        taxi_data_table = import_taxi_records(dh_session)
        variable_name = "t"
        dh_session.bind_table(taxi_data_table, variable_name)

        bottom_5_fares_table = run_script(dh_session=dh_session)
        snapshot_data = bottom_5_fares_table.snapshot()
        df = snapshot_data.to_pandas()

        pd.set_option("max_columns", 20)
        print(df)
def main():
    with Session(host="localhost", port=10000) as dh_session:
        taxi_data_table = import_taxi_records(dh_session)

        top_5_fares_table = demo_query(dh_session=dh_session, taxi_data_table=taxi_data_table)
        bottom_5_fares_table = demo_chained_table_ops(taxi_data_table)

        combined_fares_table = dh_session.merge_tables(tables=[top_5_fares_table, bottom_5_fares_table])
        snapshot_data = combined_fares_table.snapshot()
        df = snapshot_data.to_pandas()

        pd.set_option("max_columns", 20)
        print(df)
示例#14
0
    def test_multiple_sessions(self):
        sessions = []
        for i in range(100):
            sessions.append(Session())

        tables = []
        for session in sessions:
            pa_table = csv.read_csv(self.csv_file)
            table1 = session.import_table(pa_table)
            table2 = table1.group_by()
            self.assertEqual(table2.size, 1)
            tables.append(table1)

        for i, table in enumerate(tables[:-1]):
            j_table = table.natural_join(tables[i + 1],
                                         on=["a", "b", "c", "d", "e"])
            self.assertEqual(table.size, j_table.size)

        for session in sessions:
            session.close()
示例#15
0
 def setUp(self) -> None:
     self.session = Session()
示例#16
0
 def test_close(self):
     session = Session()
     session.close()
     self.assertEqual(False, session.is_connected)
     self.assertEqual(False, session.is_alive)
示例#17
0
 def test_connect_failure(self):
     with self.assertRaises(DHError):
         session = Session(port=80)
def main():
    with Session(host="localhost", port=10000) as dh_session:
        joined_table = demo_asof_join(dh_session)
        df = joined_table.snapshot().to_pandas()
        print(df)
示例#19
0
 def test_empty_table(self):
     session = Session()
     t = session.empty_table(1000)
     self.assertEqual(t.size, 1000)
     session.close()
示例#20
0
 def test_time_table(self):
     session = Session()
     t = session.time_table(period=100000)
     self.assertFalse(t.is_static)
     session.close()