def test_correct_return_type(self): ''' Validate that the module is always returning some type of bytes object. ''' requester = FXTickDataRequester(self.currency, self.start_date) self.assertIsInstance(requester.request(), bytes)
def test_tabular_data_processor(self): ''' Validate that the processor writes tab delimited data. ''' # Delete the test output path if present. Otherwise, make said path. if os.path.exists(self.data_output_path): shutil.rmtree(self.data_output_path) os.mkdir(self.data_output_path) else: os.mkdir(self.data_output_path) # Iterate through five days worth of request dates. for request_date in [self.start_date + timedelta(i) for i in range(5)]: requester = FXTickDataRequester(self.currency, request_date) raw_ticks: bytes = requester.request() parsed: list = self.data_parser.parse(raw_ticks) # Process data using tabular parser. processor = FXTickDataProcessorTabular(self.currency, request_date) ticks: pd.DataFrame = processor.process(parsed) # Write sample data using custom writer. # If there are any errors writing the data, then fail the test. try: processor.write(ticks, self.data_output_path) except Exception as e: self.fail(str(e))
def __call__(self, params: dict) -> bool: ''' Full data processing pipeline. Emit a flag if the pipeline succeeded. :params opath: Path to output directory for writes. :params sep: Optionally specify how the data is delimited. :returns flag: Boolean flag indicating success of failure. ''' opath, sep = self._get_params(params, ['opath', 'sep']) try: LOG.info( f'Sending API requests for date {str(self.request_date)} to {opath}' ) data_requester = FXTickDataRequester(self.currency, self.request_date) raw_ticks = data_requester.request() except Exception as e: LOG.error( f'Error requesting data on {self.request_date} for {self.currency}' ) LOG.error(f'Error string: {str(e)}') return False try: LOG.info( f'Parsing API response for date {str(self.request_date)} to {opath}' ) tick_data_parser = FXTickDataParser() parsed_ticks = tick_data_parser.parse(raw_ticks) except Exception as e: LOG.error( f'Error parsing response data on {self.request_date} for {self.currency}' ) LOG.error(f'Error string: {str(e)}') return False try: LOG.info( f'Processing and writing parsed response for date {str(self.request_date)} to {opath}' ) tick_data_processor = FXTickDataProcessorTabular( self.currency, self.request_date) processed_tick_data = tick_data_processor.process(parsed_ticks) tick_data_processor.write(processed_tick_data, opath) except Exception as e: LOG.error( f'Error in the process/write stage for date {self.request_date} for {self.currency}' ) LOG.error(f'Error string: {str(e)}') return False return True
def __call__(self, params: dict) -> bool: ''' Full data processing pipeline. Emit a flag if the pipeline succeeded. :params db: Local DB name. :params table: Table where data will be stored. :returns flag: Boolean flag indicating success of failure. ''' db, table = self._get_params(params, ['db', 'table']) try: LOG.info( f'Sending API requests for date {str(self.request_date)} to {db}.{table}' ) data_requester = FXTickDataRequester(self.currency, self.request_date) raw_ticks = data_requester.request() except Exception as e: LOG.error( f'Error requesting data on {self.request_date} for {self.currency}' ) LOG.error(f'Error string: {str(e)}') return False try: LOG.info( f'Parsing API response for date {str(self.request_date)} to {db}/{table}' ) tick_data_parser = FXTickDataParser() parsed_ticks = tick_data_parser.parse(raw_ticks) except Exception as e: LOG.error( f'Error parsing response data on {self.request_date} for {self.currency}' ) LOG.error(f'Error string: {str(e)}') return False try: LOG.info( f'Processing and writing parsed response for date {str(self.request_date)} to {db}.{table}' ) tick_data_processor = FXTickDataProcessorSQLite( self.currency, self.request_date) processed_tick_data = tick_data_processor.process(parsed_ticks) tick_data_processor.write(processed_tick_data, db, table) except Exception as e: LOG.error( f'Error in the process/write stage for date {self.request_date} for {self.currency}' ) LOG.error(f'Error string: {str(e)}') return False return True
def test_raise_exception_malformed_all_inputs(self): ''' Validate that the module is correctly raising errors for malformed currency pairs and malformed dates. ''' with self.assertRaises(Exception): requester = FXTickDataRequester('', '') requester.request()
def test_raise_exception_malformed_currency_pair(self): ''' Validate that the module is correctly raising errors for malformed currency pairs. ''' with self.assertRaises(Exception): requester = FXTickDataRequester('', self.start_date) requester.request()
def test_raise_exception_malformed_date(self): ''' Validate that the module is correctly raising errors for malformed input dates. ''' with self.assertRaises(Exception): requester = FXTickDataRequester(self.currency, '') requester.request()
def test_single_request(self): ''' Validate that the module successfully requests and returns data. ''' requester = FXTickDataRequester(self.currency, self.start_date) resp: bytes = requester.request() self.assertNotEqual(len(resp), 0)
def test_multiple_requests(self): ''' Validate that the moduke can make several days worth of requests with no issues. ''' responses: list = [] for date in [self.start_date + timedelta(days=i) for i in range(3)]: requester = FXTickDataRequester(self.currency, date) responses.append(requester.request()) self.assertTrue(all([len(resp) > 0 for resp in responses]))
def test_parse_data_multiple_days(self): ''' Validate that the parsed data matches some historical data across multiple days. ''' # Iterate through five days worth of request dates. for request_date in [self.start_date + timedelta(i) for i in range(5)]: requester = FXTickDataRequester(self.currency, request_date) raw_ticks: bytes = requester.request() parsed: list = self.data_parser.parse(raw_ticks) # Process data and validate that it matches on the same date. processor = FXTickDataProcessor(self.currency, request_date) ticks: pd.DataFrame = processor.process(parsed) # Load locally stored tick data. local_data = self._load_data(request_date) # Validate that the data matches everywhere. assert_frame_equal(ticks, local_data)
def setUp(self): self.currency = 'EURUSD' self.start_date = parser.parse('2018-10-01') self.requester = FXTickDataRequester(self.currency, self.start_date) self.data_parser = FXTickDataParser() # Path to testing data. self.data_path: str = 'tests/data/' self.data_output_path: str = 'tests/data/outs/' # Set path to test DB. self.path_to_test_db: str = 'tests/data/test.db'
def setUp(self): self.currency = 'EURUSD' self.start_date = parser.parse('2018-10-01') self.data_parser = FXTickDataParser() # Preset testing data. self.expected_types = [int, int, int, float, float] # Request some date for a single hour. requester = FXTickDataRequester(self.currency, self.start_date) self.resp: bytes = requester.request() # Preset sampled testing data. self.sample_data: list = [ (205, 116055, 116052, 1.0, 1.5700000524520874), (275, 116057, 116054, 1.0, 1.0), (797, 116058, 116053, 2.319999933242798, 5.510000228881836), (1070, 116054, 116053, 1.0, 1.0), (1264, 116053, 116049, 1.0, 1.7599999904632568), (2644, 116052, 116049, 1.5, 1.690000057220459), (3150, 116051, 116048, 1.5, 1.3899999856948853), (5480, 116051, 116047, 1.5, 5.889999866485596), (7738, 116051, 116048, 1.5, 1.7599999904632568), (8257, 116052, 116048, 3.369999885559082, 2.140000104904175), (8986, 116051, 116049, 1.0, 1.25), (11673, 116050, 116048, 1.0, 1.5), (12174, 116051, 116046, 2.25, 5.25), (13082, 116051, 116046, 2.25, 3.450000047683716), (13649, 116051, 116047, 2.440000057220459, 1.0), (14157, 116051, 116047, 2.25, 1.0), (14516, 116052, 116049, 1.100000023841858, 1.0), (15055, 116052, 116048, 1.690000057220459, 1.0), (16985, 116052, 116047, 1.0, 1.5700000524520874), (17651, 116052, 116047, 1.440000057220459, 6.449999809265137), (18700, 116050, 116048, 1.0, 1.1200000047683716), (19224, 116051, 116047, 2.25, 2.319999933242798), (19753, 116051, 116047, 2.25, 1.2000000476837158), (20289, 116051, 116046, 3.069999933242798, 5.320000171661377), (21907, 116050, 116047, 1.0, 1.5700000524520874) ]
def test_sqlite_data_processor_columns(self): ''' Validate that the correct columns are in the processed data. ''' correct_columns: list = [ 'ts', 'pair', 'ask', 'bid', 'ask_volume', 'bid_volume' ] # Iterate through five days worth of request dates. for request_date in [self.start_date + timedelta(i) for i in range(5)]: requester = FXTickDataRequester(self.currency, request_date) raw_ticks: bytes = requester.request() parsed: list = self.data_parser.parse(raw_ticks) # Process data using tabular parser. processor = FXTickDataProcessorSQLite(self.currency, request_date) ticks: pd.DataFrame = processor.process(parsed) ticks_processed: pd.DataFrame = processor._add_currency_pair_column( ticks) # Validate that the correct columns are present in the correct order. self.assertEqual(list(ticks_processed.columns), correct_columns)
def test_sqlite_data_processor(self): ''' Validate that the proessor is writing to a database. ''' self._make_test_database() # Iterate through five days worth of request dates. for request_date in [self.start_date + timedelta(i) for i in range(5)]: requester = FXTickDataRequester(self.currency, request_date) raw_ticks: bytes = requester.request() parsed: list = self.data_parser.parse(raw_ticks) # Process data using tabular parser. processor = FXTickDataProcessorSQLite(self.currency, request_date) ticks: pd.DataFrame = processor.process(parsed) # Write sample data using custom writer. # If there are any errors writing the data, then fail the test. try: processor.write(ticks, self.path_to_test_db, 'raw_ticks') except Exception as e: self.fail(str(e))