def test_generate_and_save_package_topic_model_local(self): input_data_store = LocalFileSystem( "tests/data/data_gnosis/input-ptm-data") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "tests/data/data_gnosis/output-ptm-data") self.assertTrue(output_data_store is not None) package_topic_model = GnosisPackageTopicModel.curate( data_store=input_data_store, filename="data_input_curated_package_topic/package_topic.json") self.assertTrue(package_topic_model is not None) output_result = package_topic_model.get_dictionary() self.assertTrue(output_result is not None) expected_package_topic_model = GnosisPackageTopicModel.load( data_store=output_data_store, filename="data_package_topic/expected_package_topic.json") self.assertTrue(expected_package_topic_model is not None) expected_output_result = expected_package_topic_model.get_dictionary() self.assertTrue(expected_output_result is not None) self.assertDictEqual(output_result, expected_output_result) package_topic_model.save( data_store=output_data_store, filename="data_package_topic/package_topic.json")
def test_train_and_save_gnosis_ref_arch_local(self): input_data_store = LocalFileSystem( "analytics_platform/kronos/gnosis/test/data/input-ra-data") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "analytics_platform/kronos/gnosis/test/data/output-ra-data") self.assertTrue(output_data_store is not None) gnosis_ra_obj = GnosisReferenceArchitecture.train( data_store=input_data_store, min_support_count=40, min_intent_topic_count=2, fp_num_partition=12) self.assertTrue(gnosis_ra_obj is not None) output_result = gnosis_ra_obj.get_dictionary() self.assertTrue(output_result is not None) expected_gnosis_ra_obj = GnosisReferenceArchitecture.load( data_store=output_data_store, filename="data_gnosis/expected_gnosis_ref_arch.json") self.assertTrue(expected_gnosis_ra_obj is not None) expected_output_result = expected_gnosis_ra_obj.get_dictionary() self.assertTrue(expected_output_result is not None) self.assertDictEqual(output_result, expected_output_result) gnosis_ra_obj.save(data_store=output_data_store, filename="data_gnosis/gnosis_ref_arch.json")
def test_train_and_save_gnosis_ref_arch_local(self): """Test the train() method, the deserialization method and compare results.""" input_data_store = LocalFileSystem( "tests/data/data_gnosis/input-ra-data/") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "tests/data/data_gnosis/output-ra-data/") self.assertTrue(output_data_store is not None) gnosis_ra_obj = GnosisReferenceArchitecture.train(data_store=input_data_store, min_support_count=40, min_intent_topic_count=2, fp_num_partition=12, fp_tag_intent_limit=2) self.assertTrue(gnosis_ra_obj is not None) output_result = gnosis_ra_obj.get_dictionary() self.assertTrue(output_result is not None) expected_gnosis_ra_obj = GnosisReferenceArchitecture.load( data_store=output_data_store, filename="data_gnosis/expected_gnosis_ref_arch.json") self.assertTrue(expected_gnosis_ra_obj is not None) expected_output_result = expected_gnosis_ra_obj.get_dictionary() self.assertTrue(expected_output_result is not None) self.assertDictEqual(output_result, expected_output_result) gnosis_ra_obj.save(data_store=output_data_store, filename="data_gnosis/gnosis_ref_arch.json")
def test_generate_and_save_pruned_list_local(self): """Test the method prune_tag_list() and the deserialization of the tag list.""" input_data_store = LocalFileSystem("tests/data/data_apollo/") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem("tests/data/data_apollo/") self.assertTrue(output_data_store is not None) TagListPruner.prune_tag_list(input_data_store, output_data_store, additional_path="") output_pruned_list_obj = TagListPruner.load( data_store=output_data_store, filename=PACKAGE_LIST_INPUT_CURATED_FILEPATH + "package_topic.json") self.assertTrue(output_pruned_list_obj is not None) output_result = output_pruned_list_obj.package_list self.assertTrue(output_result is not None) expected_output_json = TagListPruner.load( data_store=output_data_store, filename=PACKAGE_LIST_INPUT_CURATED_FILEPATH + "expected_output.json") self.assertTrue(expected_output_json is not None) expected_output_result = expected_output_json.package_list self.assertTrue(expected_output_result is not None) for tag_generated, tag_expected in zip(output_result, expected_output_result): self.assertDictEqual(tag_generated, tag_expected)
def test_generate_and_save_cooccurrence_matrices_local(self): input_data_store = LocalFileSystem( "tests/data/data_softnet/input-com-data") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "tests/data/data_softnet/output-com-data") self.assertTrue(output_data_store is not None) eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict( input_kronos_dependency_data_store=input_data_store, additional_path="") self.assertTrue(eco_to_kronos_dependency_dict is not None) manifest_filenames = input_data_store.list_files( "data_input_manifest_file_list") self.assertTrue(manifest_filenames is not None) for manifest_filename in manifest_filenames: user_category = manifest_filename.split("/")[1] manifest_content_json_list = input_data_store.read_json_file( filename=manifest_filename) self.assertTrue(manifest_content_json_list is not None) for manifest_content_json in manifest_content_json_list: self.assertTrue(manifest_content_json is not None) manifest_content_dict = dict(manifest_content_json) ecosystem = manifest_content_dict["ecosystem"] kronos_dependency_dict = eco_to_kronos_dependency_dict[ ecosystem] list_of_package_list = manifest_content_dict.get( "package_list") cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix( kronos_dependency_dict=kronos_dependency_dict, list_of_package_list=list_of_package_list) self.assertTrue(cooccurrence_matrix_obj is not None) output_filename = "data_co_occurrence_matrix" + "/" + str( user_category) + "/" + "cooccurrence_matrix" + "_" + str( ecosystem) + ".json" cooccurrence_matrix_obj.save(data_store=output_data_store, filename=output_filename) expected_output_filename = "data_co_occurrence_matrix" + "/" + str( user_category ) + "/" + "expected_cooccurrence_matrix" + "_" + str( ecosystem) + ".json" expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load( data_store=output_data_store, filename=expected_output_filename) self.assertTrue(expected_cooccurrence_matrix_obj is not None) cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary( ) self.assertTrue(cooccurrence_matrix_df is not None) expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary( ) expected_columns = set(expected_cooccurrence_matrix_df.columns) resultant_columns = set(cooccurrence_matrix_df.columns) self.assertTrue(resultant_columns == expected_columns) self.assertTrue( set(cooccurrence_matrix_df).issubset( set(expected_cooccurrence_matrix_df)))
def test_generate_and_save_cooccurrence_matrices_local(self): input_data_store = LocalFileSystem( "analytics_platform/kronos/softnet/test/data/input-com-data") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "analytics_platform/kronos/softnet/test/data/output-com-data") self.assertTrue(output_data_store is not None) eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict( input_kronos_dependency_data_store=input_data_store, additional_path="") self.assertTrue(eco_to_kronos_dependency_dict is not None) manifest_filenames = input_data_store.list_files( "data_input_manifest_file_list") self.assertTrue(manifest_filenames is not None) for manifest_filename in manifest_filenames: user_category = manifest_filename.split("/")[1] manifest_content_json_list = input_data_store.read_json_file( filename=manifest_filename) self.assertTrue(manifest_content_json_list is not None) for manifest_content_json in manifest_content_json_list: self.assertTrue(manifest_content_json is not None) manifest_content_dict = dict(manifest_content_json) ecosystem = manifest_content_dict["ecosystem"] kronos_dependency_dict = eco_to_kronos_dependency_dict[ ecosystem] list_of_package_list = manifest_content_dict.get( "package_list") cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix( kronos_dependency_dict=kronos_dependency_dict, list_of_package_list=list_of_package_list) self.assertTrue(cooccurrence_matrix_obj is not None) output_filename = "data_co_occurrence_matrix" + "/" + str( user_category) + "/" + "cooccurrence_matrix" + "_" + str( ecosystem) + ".json" cooccurrence_matrix_obj.save(data_store=output_data_store, filename=output_filename) expected_output_filename = "data_co_occurrence_matrix" + "/" + str( user_category ) + "/" + "expected_cooccurrence_matrix" + "_" + str( ecosystem) + ".json" expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load( data_store=output_data_store, filename=expected_output_filename) self.assertTrue(expected_cooccurrence_matrix_obj is not None) cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary( ) self.assertTrue(cooccurrence_matrix_df is not None) expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary( ) self.assertTrue(expected_cooccurrence_matrix_df is not None) assert_frame_equal( cooccurrence_matrix_df.sort_index(axis=1), expected_cooccurrence_matrix_df.sort_index(axis=1), check_names=True)
def test_generate_and_save_kronos_dependency_local(self): input_data_store = LocalFileSystem("analytics_platform/kronos/softnet/test/data/input-kd-data") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem("analytics_platform/kronos/softnet/test/data/output-kd-data") self.assertTrue(output_data_store is not None) gnosis_ref_arch_json = input_data_store.read_json_file(filename="data_gnosis/gnosis_ref_arch.json") self.assertTrue(gnosis_ref_arch_json is not None) gnosis_ref_arch_dict = dict(gnosis_ref_arch_json) self.assertTrue(gnosis_ref_arch_dict is not None) package_topic_json = input_data_store.read_json_file("data_package_topic/package_topic.json") self.assertTrue(package_topic_json is not None) package_topic_dict = dict(package_topic_json) self.assertTrue(package_topic_dict is not None) eco_to_package_topic_dict = package_topic_dict["package_topic_map"] eco_to_topic_package_dict = package_topic_dict["topic_package_map"] eco_to_kronos_dependency_dict = dict() for ecosystem in eco_to_package_topic_dict.keys(): package_to_topic_dict = eco_to_package_topic_dict.get(ecosystem) topic_to_package_dict = eco_to_topic_package_dict.get(ecosystem) kronos_dependency_obj = KronosDependencyGenerator.generate_kronos_dependency( gnosis_ref_arch_dict=gnosis_ref_arch_dict, package_to_topic_dict=package_to_topic_dict, topic_to_package_dict=topic_to_package_dict) self.assertTrue(kronos_dependency_obj is not None) eco_to_kronos_dependency_dict[ecosystem] = kronos_dependency_obj for ecosystem in eco_to_kronos_dependency_dict.keys(): kronos_dependency_obj = eco_to_kronos_dependency_dict[ecosystem] filename = "data_kronos_dependency/kronos_dependency.json" filename_formatted = filename.replace(".", "_" + ecosystem + ".") kronos_dependency_obj.save(data_store=output_data_store, filename=filename_formatted) kronos_dependency_dict = kronos_dependency_obj.get_dictionary() self.assertTrue(kronos_dependency_dict is not None) expected_filename_formatted = filename_formatted.replace("/", "/expected_") expected_kronos_dependency_obj = KronosDependencyGenerator.load(data_store=output_data_store, filename=expected_filename_formatted) self.assertTrue(expected_kronos_dependency_obj is not None) expected_kronos_dependency_dict = expected_kronos_dependency_obj.get_dictionary() self.assertTrue(expected_kronos_dependency_dict is not None) self.assertDictEqual(kronos_dependency_dict, expected_kronos_dependency_dict)
def test_score_eco_user_package_dict(self): """Test the dependency dict deserialization, frequency list deserialization etc.""" input_data_store = LocalFileSystem( "tests/data/data_pgm/input-score-data/") self.assertIsNotNone(input_data_store) output_data_store = LocalFileSystem( "tests/data/data_pgm/output-score-data/") self.assertIsNotNone(output_data_store) frequency_dict_data_store = LocalFileSystem( src_dir="tests/data/data_apollo/") self.assertIsNotNone(frequency_dict_data_store) user_eco_kronos_dict = load_user_eco_to_kronos_model_dict( input_kronos_data_store=input_data_store, additional_path="") self.assertIsNotNone(user_eco_kronos_dict) eco_to_kronos_dependency_dict = get_eco_to_kronos_dependency_dict( data_store=input_data_store, folderpath="data_kronos_dependency") self.assertIsNotNone(eco_to_kronos_dependency_dict) user_request = [{ "ecosystem": "pypi", "comp_package_count_threshold": 10, "alt_package_count_threshold": 1, "outlier_probability_threshold": 0.61, "unknown_packages_ratio_threshold": 0.4, "outlier_package_count_threshold": 2, "package_list": ["p1", "p2", "p3", "np1"] }] frequency_dict = frequency_dict_data_store.read_json_file( filename=KD_PACKAGE_FREQUENCY) self.assertIsNotNone(frequency_dict) all_package_list_obj = RecommendationValidator.load_package_list_local( input_folder_name="tests/data/data_recom_valid/", additional_path="") response = score_eco_user_package_dict( user_request, user_eco_kronos_dict=user_eco_kronos_dict, eco_to_kronos_dependency_dict=eco_to_kronos_dependency_dict, all_package_list_obj=all_package_list_obj, package_frequency_dict=frequency_dict, use_filters=USE_FILTERS) self.assertIsNotNone(response) output_data_store.write_json_file(filename="response.json", contents=response) expected_response = output_data_store.read_json_file( filename="expected_response.json") self.assertIsNotNone(expected_response) self.assertDictEqual(response[0], expected_response[0])
def test_package_tag_creation(self): input_data_store = LocalFileSystem( "tests/data/data_gnosis/input-ptm-data/") self.assertTrue(input_data_store is not None) ptm_json = input_data_store.read_json_file( filename='data_input_curated_package_topic/package_topic.json') self.assertTrue(ptm_json) package_names = ptm_json[0]['package_topic_map'] for package_name in package_names: tag_list = create_tags_for_package(package_name) # At least one tag should be generated for each package self.assertTrue(tag_list)
def test_manifest_missing_packages(self): input_data_store = LocalFileSystem("tests/data/data_gnosis/") self.assertTrue(input_data_store is not None) manifest_json = input_data_store.read_json_file( filename= 'data_input_manifest_file_list/manifest_unknown_packages.json') self.assertTrue(manifest_json) self.assertTrue("package_list" in manifest_json[0]) package_list = manifest_json[0]['package_list'] packages = GnosisPackageTopicModel._get_unknown_packages_from_manifests( input_data_store, additional_path='', package_topic_dict={}) self.assertListEqual(sorted(package_list[0]), sorted(packages.keys()))
def generate_kronos_dependency(cls, gnosis_ref_arch_dict, package_to_topic_dict, topic_to_package_dict): """Generate a soft net: component class topic model from the data available in the given data stores. :param data_store: Gnosis graph store. :param pkg_topic_store: Package Topic graph store. :return: Object of class KronosDependencyGenerator.""" _logger.info("Started kronos dependency graph generation") package_list = list(package_to_topic_dict.keys()) component_class_list = gnosis_ref_arch_dict.get( softnet_constants.GNOSIS_RA_COMPONENT_CLASS_LIST) component_class_to_package_edge_list, component_class_to_package_dict = \ cls._generate_component_class_to_package_edge_list_and_dict( package_list, component_class_list, package_to_topic_dict) gnosis_ref_arch_intent_list = gnosis_ref_arch_dict.get( softnet_constants.GNOSIS_RA_INTENT_LIST) kronos_intent_list = gnosis_ref_arch_intent_list + component_class_list kronos_node_list = package_list + kronos_intent_list gnosis_ref_arch_edge_list = gnosis_ref_arch_dict.get( softnet_constants.GNOSIS_RA_EDGE_LIST) kronos_dependency_edge_list = gnosis_ref_arch_edge_list + \ component_class_to_package_edge_list parent_tuple_list = softnet_utils.generate_parent_tuple_list( kronos_node_list, kronos_dependency_edge_list) parent_tuple_list_string = LocalFileSystem.convert_list_of_tuples_to_string( parent_tuple_list) similar_package_dict = cls._generate_similar_package_dict( package_to_topic_dict, topic_to_package_dict) kronos_dependency_dict = dict() kronos_dependency_dict[ softnet_constants.KD_PACKAGE_LIST] = package_list kronos_dependency_dict[ softnet_constants.KD_INTENT_LIST] = kronos_intent_list kronos_dependency_dict[ softnet_constants. KD_INTENT_DEPENDENCY_MAP] = gnosis_ref_arch_dict.get( softnet_constants.GNOSIS_RA_DICT) kronos_dependency_dict[ softnet_constants. KD_COMPONENT_DEPENDENCY_MAP] = component_class_to_package_dict kronos_dependency_dict[ softnet_constants.KD_PARENT_TUPLE_LIST] = parent_tuple_list_string kronos_dependency_dict[ softnet_constants.KD_EDGE_LIST] = kronos_dependency_edge_list kronos_dependency_dict[ softnet_constants.KD_SIMILAR_PACKAGE_MAP] = similar_package_dict kronos_dependency_dict[ softnet_constants.KD_PACKAGE_TO_TOPIC_MAP] = package_to_topic_dict _logger.info("Ended Kronos dependency graph generation") return KronosDependencyGenerator(kronos_dependency_dict)
def load(cls, data_store): if type(data_store) is LocalFileSystem: word_class_dict = data_store.read_pickle_file( filename=WORD_CLASS_DICT_FILENAME) net = tflearn.input_data( shape=[None, int(word_class_dict["num_input"])]) net = tflearn.fully_connected(net, 8) net = tflearn.fully_connected(net, 8) net = tflearn.fully_connected(net, int(word_class_dict["num_output"]), activation='softmax') net = tflearn.regression(net) model = tflearn.DNN(net) dl_model = data_store.read_dl_model(data=model, filename=MODEL_FILENAME) if type(data_store) is S3DataStore: data_store.download_file(MODEL_FILENAME + ".index", "/tmp/" + MODEL_FILENAME + ".index") data_store.download_file(MODEL_FILENAME + ".meta", "/tmp/" + MODEL_FILENAME + ".meta") data_store.download_file( MODEL_FILENAME + ".data-00000-of-00001", "/tmp/" + MODEL_FILENAME + ".data-00000-of-00001") data_store.download_file(WORD_CLASS_DICT_FILENAME, "/tmp/" + WORD_CLASS_DICT_FILENAME) temp_data_store = LocalFileSystem("/tmp/") word_class_dict = temp_data_store.read_pickle_file( filename=WORD_CLASS_DICT_FILENAME) net = tflearn.input_data( shape=[None, int(word_class_dict["num_input"])]) net = tflearn.fully_connected(net, 8) net = tflearn.fully_connected(net, 8) net = tflearn.fully_connected(net, int(word_class_dict["num_output"]), activation='softmax') net = tflearn.regression(net) model = tflearn.DNN(net) dl_model = temp_data_store.read_dl_model(data=model, filename=MODEL_FILENAME) return ChatbotModel(words=word_class_dict["words"], classes=word_class_dict["classes"], num_input=word_class_dict["num_input"], num_output=word_class_dict["num_output"], dl_model=dl_model, response=word_class_dict["response"])
def load(cls, data_store): if type(data_store) is LocalFileSystem: matrix = data_store.read_pickle_file( filename=SIMILARITY_MATRIX_FILENAME) movie_names = data_store.read_pickle_file( filename=MOVIE_LIST_FILENAME) if type(data_store) is S3DataStore: data_store.download_file(SIMILARITY_MATRIX_FILENAME, "/tmp/" + SIMILARITY_MATRIX_FILENAME) data_store.download_file(MOVIE_LIST_FILENAME, "/tmp/" + MOVIE_LIST_FILENAME) temp_data_store = LocalFileSystem("/tmp/") matrix = temp_data_store.read_pickle_file( filename=SIMILARITY_MATRIX_FILENAME) movie_names = temp_data_store.read_pickle_file( filename=MOVIE_LIST_FILENAME) return ImdbRecSys(matrix=matrix, movie_names=movie_names)
def test_generate_and_save_package_frequency_dict_local(self): input_data_store = LocalFileSystem( src_dir="tests/data/data_gnosis/input-ra-data/") self.assertIsNotNone(input_data_store) output_data_store = LocalFileSystem(src_dir="tests/data/data_apollo/") self.assertIsNotNone(output_data_store) frequency_dict_generator = FrequencyDictGenerator.create_frequency_generator( input_data_store=input_data_store, additional_path="") self.assertIsNotNone(frequency_dict_generator) frequency_dict_generator.generate_and_save_frequency_dict( output_data_store=output_data_store, additional_path="") frequency_dict = output_data_store.read_json_file( filename=KD_PACKAGE_FREQUENCY) self.assertIsNotNone(frequency_dict)
def save(self, data_store): word_class_dict = { "words": self.words, "classes": self.classes, "num_input": self.num_input, "num_output": self.num_output, "response": self.response } if type(data_store) is LocalFileSystem: data_store.write_dl_model(data=self.dl_model, filename=MODEL_FILENAME) data_store.write_pickle_file(data=word_class_dict, filename=WORD_CLASS_DICT_FILENAME) if type(data_store) is S3DataStore: temp_data_store = LocalFileSystem("/tmp/") temp_data_store.write_dl_model(data=self.dl_model, filename=MODEL_FILENAME) temp_data_store.write_pickle_file( data=word_class_dict, filename=WORD_CLASS_DICT_FILENAME) data_store.upload_file("/tmp/" + MODEL_FILENAME + ".index", MODEL_FILENAME + ".index") data_store.upload_file("/tmp/" + MODEL_FILENAME + ".meta", MODEL_FILENAME + ".meta") data_store.upload_file( "/tmp/" + MODEL_FILENAME + ".data-00000-of-00001", MODEL_FILENAME + ".data-00000-of-00001") data_store.upload_file("/tmp/" + WORD_CLASS_DICT_FILENAME, WORD_CLASS_DICT_FILENAME) return None
def load_package_list_local(cls, input_folder_name, additional_path, input_ecosystem): """Generate the aggregated manifest list for a given ecosystem from LocalFileSystem datasource. :param input_folder_name: The main directory where the manifest files are stored. :param additional_path: The directory to pick the manifest files from. :param input_ecosystem: The ecosystem for which the aggregated manifest list will be saved. :return: RecommendationValidator object.""" # Create a LocalFile object input_manifest_data_store = LocalFileSystem(src_dir=input_folder_name) return cls.load_package_list(input_manifest_data_store, additional_path, input_ecosystem)
def test_train_and_save_kronos_list_local(self): input_data_store = LocalFileSystem( "tests/data/data_pgm/input-train-data/") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "tests/data/data_pgm/output-train-data/") self.assertTrue(output_data_store is not None) eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict( input_kronos_dependency_data_store=input_data_store, additional_path="") self.assertTrue(eco_to_kronos_dependency_dict is not None) user_eco_to_cooccurrence_matrix_dict = load_user_eco_to_co_occerrence_matrix_dict( input_co_occurrence_data_store=input_data_store, additional_path="") self.assertTrue(user_eco_to_cooccurrence_matrix_dict is not None) for user_category in user_eco_to_cooccurrence_matrix_dict.keys(): eco_to_cooccurrence_matrix_dict = user_eco_to_cooccurrence_matrix_dict[ user_category] for ecosystem in eco_to_cooccurrence_matrix_dict.keys(): kronos_dependency_dict = eco_to_kronos_dependency_dict[ ecosystem] cooccurrence_matrix_df = eco_to_cooccurrence_matrix_dict[ ecosystem] kronos_model = PGMPomegranate.train( kronos_dependency_dict=kronos_dependency_dict, package_occurrence_df=cooccurrence_matrix_df) self.assertTrue(kronos_model is not None) filename = os.path.join( "data_kronos_user_eco", str(user_category), "kronos" + "_" + str(ecosystem) + ".json") kronos_model.save(data_store=output_data_store, filename=filename)
def test_movie_recommender_with_local_data_store(): movie_reco = MovieRecommender.train( src_url="data/sample_movielens_ratings.txt") assert movie_reco is not None model_data_store = LocalFileSystem(src_dir="/tmp") movie_reco.save_to_data_store(data_store=model_data_store) movie_reco = MovieRecommender.load_from_data_store( data_store=model_data_store) assert movie_reco is not None reco = movie_reco.recommend_movies(user_id=25) assert reco is not None assert len(reco.items()) == 10
def load_package_list_local(cls, input_folder_name, additional_path): """Load manifest files from the local file system and generate list of packages from it. Generate the aggregated manifest list for a given ecosystem from LocalFileSystem datasource. :param input_folder_name: The main directory where the manifest files are stored. :param additional_path: The directory to pick the manifest files from. :return: RecommendationValidator object. """ # Create a LocalFile object input_manifest_data_store = LocalFileSystem(src_dir=input_folder_name) all_list_of_package_set = load_package_list( input_data_store=input_manifest_data_store, additional_path=additional_path) return cls(all_list_of_package_set=all_list_of_package_set)
def test_score_eco_user_package_dict(self): input_data_store = LocalFileSystem( "tests/data/data_pgm/input-score-data/") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "tests/data/data_pgm/output-score-data/") self.assertTrue(output_data_store is not None) user_eco_kronos_dict = load_user_eco_to_kronos_model_dict( input_kronos_data_store=input_data_store, additional_path="") self.assertTrue(user_eco_kronos_dict is not None) eco_to_kronos_dependency_dict = get_eco_to_kronos_dependency_dict( data_store=input_data_store, folderpath="data_kronos_dependency") self.assertTrue(eco_to_kronos_dependency_dict is not None) user_request = [{ "ecosystem": "pypi", "comp_package_count_threshold": 10, "alt_package_count_threshold": 1, "outlier_probability_threshold": 0.61, "unknown_packages_ratio_threshold": 0.4, "outlier_package_count_threshold": 2, "package_list": ["p1", "p2", "p3", "np1", "p2", "p3", "p1"] }] response = score_eco_user_package_dict( user_request, user_eco_kronos_dict=user_eco_kronos_dict, eco_to_kronos_dependency_dict=eco_to_kronos_dependency_dict, all_package_list_obj=None) self.assertTrue(response is not None) output_data_store.write_json_file(filename="response.json", contents=response) expected_response = output_data_store.read_json_file( filename="expected_response.json") self.assertTrue(expected_response is not None) self.assertDictEqual(response[0], expected_response[0])
def save(self, data_store): if type(data_store) is LocalFileSystem: data_store.write_pickle_file(data=self.matrix, filename=SIMILARITY_MATRIX_FILENAME) data_store.write_pickle_file(data=self.movie_names, filename=MOVIE_LIST_FILENAME) if type(data_store) is S3DataStore: temp_data_store = LocalFileSystem("/tmp/") temp_data_store.write_pickle_file( data=self.matrix, filename=SIMILARITY_MATRIX_FILENAME) temp_data_store.write_pickle_file(data=self.movie_names, filename=MOVIE_LIST_FILENAME) data_store.upload_file("/tmp/" + SIMILARITY_MATRIX_FILENAME, SIMILARITY_MATRIX_FILENAME) data_store.upload_file("/tmp/" + MOVIE_LIST_FILENAME, MOVIE_LIST_FILENAME) return None
def crawl_local(): data_store = LocalFileSystem(src_dir="./rec_platform/data/") crawl(data_store=data_store)
def train_and_save_rec_model_local(): data_store = LocalFileSystem(src_dir="./rec_platform/data/") train_and_save_rec_model(data_store=data_store)
def load_rec_model_local(): data_store = LocalFileSystem(src_dir="./rec_platform/data") rec_sys = ImdbRecSys.load(data_store=data_store) return rec_sys
def load_chatbot_model_local(src_dir): data_store = LocalFileSystem(src_dir=src_dir) chatbot = ChatbotModel.load(data_store=data_store) return chatbot
def load_credential_local(src_dir): data_store = LocalFileSystem(src_dir=src_dir) credential_json = data_store.read_json_file(CREDENTIAL_FILENAME) return dict(credential_json)