Exemplo n.º 1
0
    def setUpClass(cls):
        # Input files to test the predict endpoint
        cls.test_file1 = read_file('./resources/test_file1.py')
        cls.test_file2 = read_file('./resources/test_file2.py')
        cls.test_file3 = read_file('./resources/test_file3.py')

        # Expected JSON response from the server
        cls.test_file1_exp = load_json('./resources/test_file1_exp.json')
        cls.test_file2_exp = load_json('./resources/test_file2_exp.json')
        cls.test_file3_exp = load_json('./resources/test_file3_exp.json')
Exemplo n.º 2
0
    def setUpClass(cls):
        cls.out_p = cst.parse_module(read_file('examples/space_tokens.py'))

        v = Visitor()
        mw = cst.metadata.MetadataWrapper(cls.out_p, cache={cst.metadata.TypeInferenceProvider: {'types': []}})
        mw.visit(v)

        cls.out_untyped_s = cls.out_p.visit(SpaceAdder())
        cls.out_typed_s = cls.out_p.visit(TypeAdder(v.module_all_annotations)).visit(SpaceAdder())

        cls.exp_p = read_file('exp_outputs/added_space.py')
Exemplo n.º 3
0
 def process_project(self, proj_json_path: str):
     proj_json = load_json(proj_json_path)
     for p in proj_json.keys():
         for i, (f, f_d) in enumerate(proj_json[p]['src_files'].items()):
             f_read = read_file(join(self.projects_path, f))
             if len(f_read) != 0:
                 try:
                     f_parsed = cst.parse_module(f_read)
                     try:
                         f_parsed = cst.metadata.MetadataWrapper(
                             f_parsed).visit(
                                 TypeApplier(f_d, self.apply_nlp))
                         write_file(join(self.projects_path, f),
                                    f_parsed.code)
                     except KeyError as ke:
                         print(
                             f"A variable not found | project {proj_json_path} | file {f}",
                             ke)
                         traceback.print_exc()
                     except TypeError as te:
                         print(f"Project {proj_json_path} | file {f}", te)
                         traceback.print_exc()
                 except cst._exceptions.ParserSyntaxError as pse:
                     print(
                         f"Can't parsed file {f} in project {proj_json_path}",
                         pse)
Exemplo n.º 4
0
def run_benchmark(args):
    def req_post(f):
        return requests.post(T4PY_API_URL, f)

    f_read = read_file(args.f)
    start_t = time.time()
    with concurrent.futures.ThreadPoolExecutor() as executor:
        res = [executor.submit(req_post, f_read) for i in range(args.r)]
        concurrent.futures.wait(res)
    
    print(f"Processed {args.r} reqeusts in {time.time()-start_t:.2f} sec.")
Exemplo n.º 5
0
    def test_type_apply_pipeline(self):
        ta = TypeAnnotatingProjects('./tmp_ta', None, apply_nlp=False)
        ta.process_project('./examples/type_apply_ex.json')

        exp_split = test_file_exp.splitlines()
        out_split = read_file('./tmp_ta/type_apply.py').splitlines()

        exp = """{}""".format("\n".join(exp_split[7:]))
        out = """{}""".format("\n".join(out_split[7:]))

        self.assertEqual(exp, out)
        # The imported types from typing
        self.assertEqual(Counter(" ".join(exp_split[0:7])),
                         Counter(" ".join(out_split[0:7])))
Exemplo n.º 6
0
 def setUpClass(cls):
     cls.out_p = cst.parse_module(read_file('examples/comment_removal.py')).visit(CommentAndDocStringRemover())
Exemplo n.º 7
0
    def process_project(self, i, project):

        project_id = f'{project["author"]}/{project["repo"]}'
        project_analyzed_files: dict = {
            project_id: {
                "src_files": {},
                "type_annot_cove": 0.0
            }
        }
        try:
            print(f'Running pipeline for project {i} {project_id}')
            project['files'] = []

            print(f'Extracting for {project_id}...')
            extracted_avl_types = None

            project_files = list_files(
                join(self.projects_path, project["author"], project["repo"]))
            print(
                f"{project_id} has {len(project_files)} files before deduplication"
            )
            project_files = [
                f for f in project_files if not self.is_file_duplicate(f)
            ]
            print(
                f"{project_id} has {len(project_files)} files after deduplication"
            )

            project_files = [
                (f, str(Path(f).relative_to(Path(self.projects_path).parent)))
                for f in project_files
            ]
            project_files = [(f, f_r, self.split_dataset_files[f_r]
                              if f_r in self.split_dataset_files else None)
                             for f, f_r in project_files]

            if len(project_files) != 0:
                if self.use_pyre:
                    print(f"Running pyre for {project_id}")
                    clean_pyre_config(
                        join(self.projects_path, project["author"],
                             project["repo"]))
                    pyre_server_init(
                        join(self.projects_path, project["author"],
                             project["repo"]))

                for filename, f_relative, f_split in project_files:
                    try:
                        pyre_data_file = pyre_query_types(
                            join(self.projects_path, project["author"],
                                 project["repo"]),
                            filename) if self.use_pyre else None

                        project_analyzed_files[project_id]["src_files"][f_relative] = \
                            self.apply_nlp_transf(
                                Extractor().extract(read_file(filename), pyre_data_file).to_dict()) if self.nlp_transf \
                                else Extractor.extract(read_file(filename), pyre_data_file).to_dict()

                        project_analyzed_files[project_id]["src_files"][
                            f_relative]['set'] = f_split
                        if self.use_tc:
                            print(f"Running type checker for file: {filename}")
                            project_analyzed_files[project_id]["src_files"][f_relative]['tc'] = \
                                type_check_single_file(filename, self.tc)

                        extracted_avl_types = project_analyzed_files[project_id]["src_files"][f_relative]['imports'] + \
                                              [c['name'] for c in
                                               project_analyzed_files[project_id]["src_files"][f_relative]['classes']]
                    except ParseError as err:
                        # print(f"Could not parse file {filename}")
                        traceback.print_exc()
                        self.logger.error(
                            "project: %s |file: %s |Exception: %s" %
                            (project_id, filename, err))
                    except UnicodeDecodeError:
                        print(f"Could not read file {filename}")
                    except Exception as err:
                        # Other unexpected exceptions; Failure of single file should not
                        # fail the entire project processing.
                        # TODO: A better workaround would be to have a specialized exception thrown
                        # by the extractor, so that this exception is specialized.
                        #print(f"Could not process file {filename}")
                        traceback.print_exc()
                        self.logger.error(
                            "project: %s |file: %s |Exception: %s" %
                            (project_id, filename, err))
                        #logging.error("project: %s |file: %s |Exception: %s" % (project_id, filename, err))

                print(f'Saving available type hints for {project_id}...')
                if self.avl_types_dir is not None:
                    if extracted_avl_types:
                        with open(
                                join(
                                    self.avl_types_dir,
                                    f'{project["author"]}_{project["repo"]}_avltypes.txt'
                                ), 'w') as f:
                            for t in extracted_avl_types:
                                f.write("%s\n" % t)

                if len(project_analyzed_files[project_id]
                       ["src_files"].keys()) != 0:
                    project_analyzed_files[project_id]["type_annot_cove"] = \
                        round(sum([project_analyzed_files[project_id]["src_files"][s]["type_annot_cove"] for s in
                                   project_analyzed_files[project_id]["src_files"].keys()]) / len(
                            project_analyzed_files[project_id]["src_files"].keys()), 2)

                    save_json(self.get_project_filename(project),
                              project_analyzed_files)

                if self.use_pyre:
                    pyre_server_shutdown(
                        join(self.projects_path, project["author"],
                             project["repo"]))

            else:
                raise NullProjectException(project_id)

        except KeyboardInterrupt:
            quit(1)
        except NullProjectException as err:
            self.logger.error(err)
            print(err)
        except Exception as err:
            print(f'Running pipeline for project {i} failed')
            traceback.print_exc()
            self.logger.error("project: %s | Exception: %s" %
                              (project_id, err))
Exemplo n.º 8
0
 def test_normalized_module_code(self):
     self.assertEqual(normalize_module_code(processed_f.untyped_seq),
                      read_file('exp_outputs/normalized_mod_code.txt').strip())
Exemplo n.º 9
0
 def setUpClass(cls):
     cls.out_p = cst.parse_module(read_file('examples/type_annot_removal.py')).visit(TypeAnnotationRemover())
Exemplo n.º 10
0
 def test_removed_strings_file(self):
     self.assertMultiLineEqual(read_file('exp_outputs/removed_num.py'), self.out_p.code)
Exemplo n.º 11
0
 def setUpClass(cls):
     cls.out_p = cst.parse_module(read_file('examples/num_removal.py')).visit(NumberRemover())
Exemplo n.º 12
0
 def setUpClass(cls):
     cls.out_p = cst.parse_module(read_file('examples/string_removal.py')).visit(StringRemover())
Exemplo n.º 13
0
 def test_removed_comments_docs_file(self):
     self.assertMultiLineEqual(read_file('exp_outputs/removed_com_docs.py'), self.out_p.code)
Exemplo n.º 14
0
 def setUpClass(cls):
     cls.extractor_out = Extractor().extract(
         read_file('./examples/representations.py'))
     cls.extractor_out_wo_seq2seq = Extractor().extract(
         read_file('./examples/representations.py'), include_seq2seq=False)
Exemplo n.º 15
0
 def setUpClass(cls):
     cls.extractor_out = Extractor().extract(
         read_file('./examples/vars_types_pyre.py'),
         load_json('./examples/vars_types_pyre_data.json'))
Exemplo n.º 16
0
 def test_propagated_types_file(self):
     # TODO: TypeAdder needs improvements to propagate all the types across the file
     self.assertMultiLineEqual(read_file('exp_outputs/propagated_types.py'), self.out_p.code)
Exemplo n.º 17
0
 def setUpClass(cls):
     cls.out_p = cst.parse_module(read_file('examples/types_prop.py'))
     v = Visitor()
     mw = cst.metadata.MetadataWrapper(cls.out_p, cache={cst.metadata.TypeInferenceProvider: {'types': []}})
     mw.visit(v)
     cls.out_p = cls.out_p.visit(TypeAdder(v.module_all_annotations))
Exemplo n.º 18
0
 def setUpClass(cls):
     cls.py_src_f = read_file("./examples/type_annot_count.py")