def test_score_eco_user_package_dict(self): """Test the dependency dict deserialization, frequency list deserialization etc.""" input_data_store = LocalFileSystem( "tests/data/data_pgm/input-score-data/") self.assertIsNotNone(input_data_store) output_data_store = LocalFileSystem( "tests/data/data_pgm/output-score-data/") self.assertIsNotNone(output_data_store) frequency_dict_data_store = LocalFileSystem( src_dir="tests/data/data_apollo/") self.assertIsNotNone(frequency_dict_data_store) user_eco_kronos_dict = load_user_eco_to_kronos_model_dict( input_kronos_data_store=input_data_store, additional_path="") self.assertIsNotNone(user_eco_kronos_dict) eco_to_kronos_dependency_dict = get_eco_to_kronos_dependency_dict( data_store=input_data_store, folderpath="data_kronos_dependency") self.assertIsNotNone(eco_to_kronos_dependency_dict) user_request = [{ "ecosystem": "pypi", "comp_package_count_threshold": 10, "alt_package_count_threshold": 1, "outlier_probability_threshold": 0.61, "unknown_packages_ratio_threshold": 0.4, "outlier_package_count_threshold": 2, "package_list": ["p1", "p2", "p3", "np1"] }] frequency_dict = frequency_dict_data_store.read_json_file( filename=KD_PACKAGE_FREQUENCY) self.assertIsNotNone(frequency_dict) all_package_list_obj = RecommendationValidator.load_package_list_local( input_folder_name="tests/data/data_recom_valid/", additional_path="") response = score_eco_user_package_dict( user_request, user_eco_kronos_dict=user_eco_kronos_dict, eco_to_kronos_dependency_dict=eco_to_kronos_dependency_dict, all_package_list_obj=all_package_list_obj, package_frequency_dict=frequency_dict, use_filters=USE_FILTERS) self.assertIsNotNone(response) output_data_store.write_json_file(filename="response.json", contents=response) expected_response = output_data_store.read_json_file( filename="expected_response.json") self.assertIsNotNone(expected_response) self.assertDictEqual(response[0], expected_response[0])
def test_generate_and_save_kronos_dependency_local(self): input_data_store = LocalFileSystem("analytics_platform/kronos/softnet/test/data/input-kd-data") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem("analytics_platform/kronos/softnet/test/data/output-kd-data") self.assertTrue(output_data_store is not None) gnosis_ref_arch_json = input_data_store.read_json_file(filename="data_gnosis/gnosis_ref_arch.json") self.assertTrue(gnosis_ref_arch_json is not None) gnosis_ref_arch_dict = dict(gnosis_ref_arch_json) self.assertTrue(gnosis_ref_arch_dict is not None) package_topic_json = input_data_store.read_json_file("data_package_topic/package_topic.json") self.assertTrue(package_topic_json is not None) package_topic_dict = dict(package_topic_json) self.assertTrue(package_topic_dict is not None) eco_to_package_topic_dict = package_topic_dict["package_topic_map"] eco_to_topic_package_dict = package_topic_dict["topic_package_map"] eco_to_kronos_dependency_dict = dict() for ecosystem in eco_to_package_topic_dict.keys(): package_to_topic_dict = eco_to_package_topic_dict.get(ecosystem) topic_to_package_dict = eco_to_topic_package_dict.get(ecosystem) kronos_dependency_obj = KronosDependencyGenerator.generate_kronos_dependency( gnosis_ref_arch_dict=gnosis_ref_arch_dict, package_to_topic_dict=package_to_topic_dict, topic_to_package_dict=topic_to_package_dict) self.assertTrue(kronos_dependency_obj is not None) eco_to_kronos_dependency_dict[ecosystem] = kronos_dependency_obj for ecosystem in eco_to_kronos_dependency_dict.keys(): kronos_dependency_obj = eco_to_kronos_dependency_dict[ecosystem] filename = "data_kronos_dependency/kronos_dependency.json" filename_formatted = filename.replace(".", "_" + ecosystem + ".") kronos_dependency_obj.save(data_store=output_data_store, filename=filename_formatted) kronos_dependency_dict = kronos_dependency_obj.get_dictionary() self.assertTrue(kronos_dependency_dict is not None) expected_filename_formatted = filename_formatted.replace("/", "/expected_") expected_kronos_dependency_obj = KronosDependencyGenerator.load(data_store=output_data_store, filename=expected_filename_formatted) self.assertTrue(expected_kronos_dependency_obj is not None) expected_kronos_dependency_dict = expected_kronos_dependency_obj.get_dictionary() self.assertTrue(expected_kronos_dependency_dict is not None) self.assertDictEqual(kronos_dependency_dict, expected_kronos_dependency_dict)
def test_generate_and_save_cooccurrence_matrices_local(self): input_data_store = LocalFileSystem( "tests/data/data_softnet/input-com-data") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "tests/data/data_softnet/output-com-data") self.assertTrue(output_data_store is not None) eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict( input_kronos_dependency_data_store=input_data_store, additional_path="") self.assertTrue(eco_to_kronos_dependency_dict is not None) manifest_filenames = input_data_store.list_files( "data_input_manifest_file_list") self.assertTrue(manifest_filenames is not None) for manifest_filename in manifest_filenames: user_category = manifest_filename.split("/")[1] manifest_content_json_list = input_data_store.read_json_file( filename=manifest_filename) self.assertTrue(manifest_content_json_list is not None) for manifest_content_json in manifest_content_json_list: self.assertTrue(manifest_content_json is not None) manifest_content_dict = dict(manifest_content_json) ecosystem = manifest_content_dict["ecosystem"] kronos_dependency_dict = eco_to_kronos_dependency_dict[ ecosystem] list_of_package_list = manifest_content_dict.get( "package_list") cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix( kronos_dependency_dict=kronos_dependency_dict, list_of_package_list=list_of_package_list) self.assertTrue(cooccurrence_matrix_obj is not None) output_filename = "data_co_occurrence_matrix" + "/" + str( user_category) + "/" + "cooccurrence_matrix" + "_" + str( ecosystem) + ".json" cooccurrence_matrix_obj.save(data_store=output_data_store, filename=output_filename) expected_output_filename = "data_co_occurrence_matrix" + "/" + str( user_category ) + "/" + "expected_cooccurrence_matrix" + "_" + str( ecosystem) + ".json" expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load( data_store=output_data_store, filename=expected_output_filename) self.assertTrue(expected_cooccurrence_matrix_obj is not None) cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary( ) self.assertTrue(cooccurrence_matrix_df is not None) expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary( ) expected_columns = set(expected_cooccurrence_matrix_df.columns) resultant_columns = set(cooccurrence_matrix_df.columns) self.assertTrue(resultant_columns == expected_columns) self.assertTrue( set(cooccurrence_matrix_df).issubset( set(expected_cooccurrence_matrix_df)))
def test_generate_and_save_cooccurrence_matrices_local(self): input_data_store = LocalFileSystem( "analytics_platform/kronos/softnet/test/data/input-com-data") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "analytics_platform/kronos/softnet/test/data/output-com-data") self.assertTrue(output_data_store is not None) eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict( input_kronos_dependency_data_store=input_data_store, additional_path="") self.assertTrue(eco_to_kronos_dependency_dict is not None) manifest_filenames = input_data_store.list_files( "data_input_manifest_file_list") self.assertTrue(manifest_filenames is not None) for manifest_filename in manifest_filenames: user_category = manifest_filename.split("/")[1] manifest_content_json_list = input_data_store.read_json_file( filename=manifest_filename) self.assertTrue(manifest_content_json_list is not None) for manifest_content_json in manifest_content_json_list: self.assertTrue(manifest_content_json is not None) manifest_content_dict = dict(manifest_content_json) ecosystem = manifest_content_dict["ecosystem"] kronos_dependency_dict = eco_to_kronos_dependency_dict[ ecosystem] list_of_package_list = manifest_content_dict.get( "package_list") cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix( kronos_dependency_dict=kronos_dependency_dict, list_of_package_list=list_of_package_list) self.assertTrue(cooccurrence_matrix_obj is not None) output_filename = "data_co_occurrence_matrix" + "/" + str( user_category) + "/" + "cooccurrence_matrix" + "_" + str( ecosystem) + ".json" cooccurrence_matrix_obj.save(data_store=output_data_store, filename=output_filename) expected_output_filename = "data_co_occurrence_matrix" + "/" + str( user_category ) + "/" + "expected_cooccurrence_matrix" + "_" + str( ecosystem) + ".json" expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load( data_store=output_data_store, filename=expected_output_filename) self.assertTrue(expected_cooccurrence_matrix_obj is not None) cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary( ) self.assertTrue(cooccurrence_matrix_df is not None) expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary( ) self.assertTrue(expected_cooccurrence_matrix_df is not None) assert_frame_equal( cooccurrence_matrix_df.sort_index(axis=1), expected_cooccurrence_matrix_df.sort_index(axis=1), check_names=True)
def test_package_tag_creation(self): input_data_store = LocalFileSystem( "tests/data/data_gnosis/input-ptm-data/") self.assertTrue(input_data_store is not None) ptm_json = input_data_store.read_json_file( filename='data_input_curated_package_topic/package_topic.json') self.assertTrue(ptm_json) package_names = ptm_json[0]['package_topic_map'] for package_name in package_names: tag_list = create_tags_for_package(package_name) # At least one tag should be generated for each package self.assertTrue(tag_list)
def test_manifest_missing_packages(self): input_data_store = LocalFileSystem("tests/data/data_gnosis/") self.assertTrue(input_data_store is not None) manifest_json = input_data_store.read_json_file( filename= 'data_input_manifest_file_list/manifest_unknown_packages.json') self.assertTrue(manifest_json) self.assertTrue("package_list" in manifest_json[0]) package_list = manifest_json[0]['package_list'] packages = GnosisPackageTopicModel._get_unknown_packages_from_manifests( input_data_store, additional_path='', package_topic_dict={}) self.assertListEqual(sorted(package_list[0]), sorted(packages.keys()))
def test_score_eco_user_package_dict(self): input_data_store = LocalFileSystem( "tests/data/data_pgm/input-score-data/") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "tests/data/data_pgm/output-score-data/") self.assertTrue(output_data_store is not None) user_eco_kronos_dict = load_user_eco_to_kronos_model_dict( input_kronos_data_store=input_data_store, additional_path="") self.assertTrue(user_eco_kronos_dict is not None) eco_to_kronos_dependency_dict = get_eco_to_kronos_dependency_dict( data_store=input_data_store, folderpath="data_kronos_dependency") self.assertTrue(eco_to_kronos_dependency_dict is not None) user_request = [{ "ecosystem": "pypi", "comp_package_count_threshold": 10, "alt_package_count_threshold": 1, "outlier_probability_threshold": 0.61, "unknown_packages_ratio_threshold": 0.4, "outlier_package_count_threshold": 2, "package_list": ["p1", "p2", "p3", "np1", "p2", "p3", "p1"] }] response = score_eco_user_package_dict( user_request, user_eco_kronos_dict=user_eco_kronos_dict, eco_to_kronos_dependency_dict=eco_to_kronos_dependency_dict, all_package_list_obj=None) self.assertTrue(response is not None) output_data_store.write_json_file(filename="response.json", contents=response) expected_response = output_data_store.read_json_file( filename="expected_response.json") self.assertTrue(expected_response is not None) self.assertDictEqual(response[0], expected_response[0])
def test_generate_and_save_package_frequency_dict_local(self): input_data_store = LocalFileSystem( src_dir="tests/data/data_gnosis/input-ra-data/") self.assertIsNotNone(input_data_store) output_data_store = LocalFileSystem(src_dir="tests/data/data_apollo/") self.assertIsNotNone(output_data_store) frequency_dict_generator = FrequencyDictGenerator.create_frequency_generator( input_data_store=input_data_store, additional_path="") self.assertIsNotNone(frequency_dict_generator) frequency_dict_generator.generate_and_save_frequency_dict( output_data_store=output_data_store, additional_path="") frequency_dict = output_data_store.read_json_file( filename=KD_PACKAGE_FREQUENCY) self.assertIsNotNone(frequency_dict)
def load_credential_local(src_dir): data_store = LocalFileSystem(src_dir=src_dir) credential_json = data_store.read_json_file(CREDENTIAL_FILENAME) return dict(credential_json)