def test_use_entities_order_two_output_edges(self): dataset = RawDataset(dataset_id="dataset1", inference_mode=True, dataset_type=DatasetType.TRAINING, data_directory=self.DATASET_PATH, batch_size=1) edges_producer = EdgesProducer(dataset.ids_of_entities, dataset.graph_edges, use_entities_order=True) head_edges, target_index = edges_producer.produce_head_edges( sample={ "object_ids": np.array([1, 0, 1]), "object_types": np.array([1, 0, 2]) }) self.assertAllEqual([[1, 0, 1], [2, 0, 1]], head_edges["object_ids"]) self.assertAllEqual([[1, 0, 2], [1, 0, 2]], head_edges["object_types"]) self.assertEqual(0, target_index)
def test_edge_pattern_in_existing_edges(self, randint_mock): randint_mock.return_value = 0 dataset = RawDataset(dataset_id="dataset1", inference_mode=True, dataset_type=DatasetType.TRAINING, data_directory=self.DATASET_PATH, batch_size=1) edges_producer = EdgesProducer(dataset.ids_of_entities, dataset.graph_edges, use_entities_order=False) edges, target_index = edges_producer.produce_head_edges( sample={ "object_ids": np.array([0, 0, 1]), "object_types": np.array([1, 0, 2]) }) self.assertAllEqual((3, 3), edges["object_ids"].shape) self.assertAllEqual([0, 0, 1], edges["object_ids"][0]) self.assertEqual(0, target_index)
def test_incremental_edges(self): np.random.seed(2) dataset = RawDataset( dataset_id="dataset1", dataset_type=DatasetType.VALIDATION, data_directory=self.DATASET_PATH, shuffle_dataset=False, batch_size=1, inference_mode=False, ) self.assertDictEqual(dataset.known_entity_output_edges, { 0: [(1, 0)], 1: [(2, 1)] }) self.assertDictEqual(dataset.known_entity_input_edges, { 1: [(0, 0)], 2: [(1, 1)] })
def test_multiple_object_ids(self, randint_mock): randint_mock.return_value = 0 dataset = RawDataset(dataset_id="dataset1", inference_mode=True, dataset_type=DatasetType.TRAINING, data_directory=self.DATASET_PATH, batch_size=1) edges_producer = EdgesProducer(dataset.ids_of_entities, dataset.graph_edges, use_entities_order=False) edges, target_index = edges_producer.produce_head_edges(sample={ "object_ids": np.array([1, 0, 1, 2, 3]), "object_types": np.array([1, 0, 2, 1, 0]) }, ) self.assertAllEqual([[1, 0, 1, 2, 3], [2, 0, 1, 2, 3]], edges["object_ids"]) self.assertAllEqual([[1, 0, 2, 1, 0], [1, 0, 2, 1, 0]], edges["object_types"]) self.assertEqual(0, target_index)
def main() -> None: args = parse_args() raw_dataset = RawDataset(subsample=args.subsample) print(f'Raw dataset has {len(raw_dataset.raw_df)} records.') processed_dataset = ProcessedDataset(raw_dataset) print( f'Processed dataset has {len(processed_dataset.processed_df)} records.' ) featurized_dataset = FeaturizedDataset(processed_dataset) print( f'Featurized dataset has {len(featurized_dataset.featurized_df)} records.' ) model = RNNModel(featurized_dataset) handler = ModelHandler(model, use_wandb=True) print(f'Comensing training on {handler.device}') handler.fit(args.max_epochs) print('Done.')