def test_empty_mod_type_annot_cove(self): empty_mod = Extractor().extract(open('./examples/__init__.py', 'r').read()) self.assertEqual(1.0, empty_mod.type_annot_cove)
def process_project(self, i, project): project_id = f'{project["author"]}/{project["repo"]}' project_analyzed_files: dict = { project_id: { "src_files": {}, "type_annot_cove": 0.0 } } try: print(f'Running pipeline for project {i} {project_id}') project['files'] = [] print(f'Extracting for {project_id}...') extracted_avl_types = None project_files = list_files( join(self.projects_path, project["author"], project["repo"])) print( f"{project_id} has {len(project_files)} files before deduplication" ) project_files = [ f for f in project_files if not self.is_file_duplicate(f) ] print( f"{project_id} has {len(project_files)} files after deduplication" ) project_files = [ (f, str(Path(f).relative_to(Path(self.projects_path).parent))) for f in project_files ] project_files = [(f, f_r, self.split_dataset_files[f_r] if f_r in self.split_dataset_files else None) for f, f_r in project_files] if len(project_files) != 0: if self.use_pyre: print(f"Running pyre for {project_id}") clean_pyre_config( join(self.projects_path, project["author"], project["repo"])) pyre_server_init( join(self.projects_path, project["author"], project["repo"])) for filename, f_relative, f_split in project_files: try: pyre_data_file = pyre_query_types( join(self.projects_path, project["author"], project["repo"]), filename) if self.use_pyre else None project_analyzed_files[project_id]["src_files"][f_relative] = \ self.apply_nlp_transf( Extractor().extract(read_file(filename), pyre_data_file).to_dict()) if self.nlp_transf \ else Extractor.extract(read_file(filename), pyre_data_file).to_dict() project_analyzed_files[project_id]["src_files"][ f_relative]['set'] = f_split if self.use_tc: print(f"Running type checker for file: {filename}") project_analyzed_files[project_id]["src_files"][f_relative]['tc'] = \ type_check_single_file(filename, self.tc) extracted_avl_types = project_analyzed_files[project_id]["src_files"][f_relative]['imports'] + \ [c['name'] for c in project_analyzed_files[project_id]["src_files"][f_relative]['classes']] except ParseError as err: # print(f"Could not parse file {filename}") traceback.print_exc() self.logger.error( "project: %s |file: %s |Exception: %s" % (project_id, filename, err)) except UnicodeDecodeError: print(f"Could not read file {filename}") except Exception as err: # Other unexpected exceptions; Failure of single file should not # fail the entire project processing. # TODO: A better workaround would be to have a specialized exception thrown # by the extractor, so that this exception is specialized. #print(f"Could not process file {filename}") traceback.print_exc() self.logger.error( "project: %s |file: %s |Exception: %s" % (project_id, filename, err)) #logging.error("project: %s |file: %s |Exception: %s" % (project_id, filename, err)) print(f'Saving available type hints for {project_id}...') if self.avl_types_dir is not None: if extracted_avl_types: with open( join( self.avl_types_dir, f'{project["author"]}_{project["repo"]}_avltypes.txt' ), 'w') as f: for t in extracted_avl_types: f.write("%s\n" % t) if len(project_analyzed_files[project_id] ["src_files"].keys()) != 0: project_analyzed_files[project_id]["type_annot_cove"] = \ round(sum([project_analyzed_files[project_id]["src_files"][s]["type_annot_cove"] for s in project_analyzed_files[project_id]["src_files"].keys()]) / len( project_analyzed_files[project_id]["src_files"].keys()), 2) save_json(self.get_project_filename(project), project_analyzed_files) if self.use_pyre: pyre_server_shutdown( join(self.projects_path, project["author"], project["repo"])) else: raise NullProjectException(project_id) except KeyboardInterrupt: quit(1) except NullProjectException as err: self.logger.error(err) print(err) except Exception as err: print(f'Running pipeline for project {i} failed') traceback.print_exc() self.logger.error("project: %s | Exception: %s" % (project_id, err))
def setUpClass(cls): cls.processed_f = Extractor().extract( open('examples/qualified_types.py', 'r').read()).to_dict()
from libsa4py.cst_extractor import Extractor from libsa4py.representations import FunctionInfo, ModuleInfo, create_output_seq, validate_output_seq from libsa4py.nl_preprocessing import normalize_module_code from libsa4py.exceptions import OutputSequenceException from libsa4py.utils import read_file import unittest processed_f = Extractor().extract(open('./examples/representations.py', 'r').read()) class TestModuleRepresentations(unittest.TestCase): """ It tests the Dict-based representation of modules """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.maxDiff = None def test_mod_repr_dict_keys(self): mod_repr_dict_key_exp = ['untyped_seq', 'typed_seq', 'imports', 'variables', 'mod_var_occur', 'mod_var_ln', 'classes', 'funcs', 'set', 'tc', 'no_types_annot', 'type_annot_cove'] self.assertListEqual(mod_repr_dict_key_exp, list(processed_f.to_dict().keys())) def test_mod_repr_cls_dict(self): cls_repr_mod_exp = [{'name': 'MyClass', 'q_name': 'MyClass', 'cls_lc': ((12, 0), (23, 44)), 'variables': {'cls_var': 'builtins.int'}, 'cls_var_occur': {'cls_var': [['MyClass', 'cls_var', 'c', 'n']]}, 'cls_var_ln': {'cls_var': ((16, 4), (16, 11))}, 'funcs': [{'name': '__init__', 'q_name': 'MyClass.__init__', 'fn_lc': ((18, 4), (19, 18)), 'params': {'self': '', 'y': 'builtins.float'}, 'ret_exprs': [],
def setUpClass(cls): cls.processed_f = Extractor().extract(open('./examples/assignments.py', 'r').read()).to_dict()
def setUpClass(cls): cls.processed_f = Extractor().extract( open('./examples/different_fns.py', 'r').read()).to_dict()
def setUpClass(cls): cls.processed_f = Extractor().extract( open('./examples/vars_args_occur.py', 'r').read()).to_dict()
def setUpClass(cls): cls.extractor_out = Extractor().extract( read_file('./examples/vars_types_pyre.py'), load_json('./examples/vars_types_pyre_data.json'))
def setUpClass(cls): cls.extractor_out = Extractor().extract( read_file('./examples/representations.py')) cls.extractor_out_wo_seq2seq = Extractor().extract( read_file('./examples/representations.py'), include_seq2seq=False)