def parse_project(project, allFiles): for fyle in allFiles: if fyle.endswith(".java"): comments = comment_parser.extract_comments( fyle, mime="text/x-java-source") name = fyle.replace("\\", ".").replace( "C:.Users.Edward.Documents.GitHub.", "") output = output_directory + project + "\\" + name[:-5] + ".txt" F = open(output, "a") for comment in comments: F.write(str(comment)) F.close() ##F = open(f"{dirpath}-{fyle}.txt", "a") ##F.close() if fyle.endswith(".c"): try: comments = comment_parser.extract_comments(fyle, mime="text/x-c") name = fyle.replace("\\", ".").replace( "C:.Users.Edward.Documents.GitHub.", "") output = output_directory + project + "\\" + name[:-2] + ".txt" F = open(output, "a") for comment in comments: F.write(str(comment)) F.close() except UnicodeDecodeError: print("Bad characters in files!" + fyle) except ValueError: print("The file is empty!")
def get_comment_sents(filename): comment_sents = [] comment_blocks = comment_parser.extract_comments(filename) # # Skip copyright section # for comment_block in comment_blocks[1:]: # # Remove any special characters # comment_text = comment_block._text comment_text = comment_text.replace('*', '') comment_text = comment_text.replace('\n', '') comment_text = comment_text.replace('\t', '') comment_text = comment_text.replace('/', ' or ') comment_text = comment_text.replace('--', '') comment_text = comment_text.replace('i.e.', '') for sent in tokenize.sent_tokenize(comment_text): if has_alphabets(sent) is False: continue comment_sents.append(sent) return comment_sents
def get_todos_java(self, path: str): lines = comment_parser.extract_comments(path, mime=self.MIME_JAVA) todos = {} for line in lines: todo = self.comment2todo(line.string) if todo is None: continue todos[line.string] = todo return todos
def parse_file(self, filename): logger.debug("Parsing file {}".format(filename)) lines = self.get_lines(filename) if not lines: return commented_line_numbers = [] comments = [] try: for comment in comment_parser.extract_comments( filename, self.mime): comment_text = comment.text() comment_line = comment.line_number() if comment.is_multiline(): offset = len(comment_text.split("\n")) commented_line_numbers += range(comment_line, comment_line + offset) else: offset = 0 commented_line_numbers.append(comment_line) comments.append({ "text": comment_text, "line": comment_line, "offset": offset, "multiline": comment.is_multiline() }) except comment_parser.UnsupportedError as e: print(e) return for comment in comments: comment["text"] = comment["text"].strip() num_lines = 5 # Get 5 lines of code code = self.extract_comment_context( lines, commented_line_numbers, comment["line"] + comment["offset"], num_lines, comment["multiline"]) source = {"code": code, "filename": filename} annotations = self.parse_comment(comment["text"]) if annotations: for data in annotations: source["line"] = data.pop("line") source["annotation"] = data.pop("annotation") self.run_action(data, source)
def parse_and_add_comments_to_file(class_obj, file_path, tokenizer=WordTokenizer(Libest())): file_name = FileUtil.get_filename_from_path(file_path) comment_list = comment_parser.extract_comments(file_path, mime="text/x-c") code_elem_dict = {} all_line_beginnings = [] for method in class_obj.methods: code_elem_dict[method.line] = method all_line_beginnings.append(method.line) for attr in class_obj.attributes: code_elem_dict[attr.line] = attr all_line_beginnings.append(attr.line) for inner_classif in class_obj.inner_classifiers: code_elem_dict[inner_classif.line] = inner_classif all_line_beginnings.append(inner_classif.line) all_line_beginnings.sort() for comm in comment_list: comm_begin = comm.line_number() line_after_comm_end = comm_begin + 1 # one line comment if comm.is_multiline(): # multi line comment comm_text = comm.text() line_after_comm_end += comm_text.count("\n") if comm_text.startswith("\n"): comm_text = comm_text[1:] if line_after_comm_end < all_line_beginnings[0]: continue # Comment is before the first function/typedef, but doesn't belong to it -> ignore elif line_after_comm_end in code_elem_dict: # Top level comment (not inside method body) that belongs to a function/typedef code_elem_dict[line_after_comm_end].comment = IdentifierString( file_name, *(tokenizer.tokenize_to_string_list(comm_text))) else: # comment inside method body containing_method = code_elem_dict[ _find_closest_line_smaller_than(comm_begin, all_line_beginnings)] if isinstance(containing_method, Enum_): continue # Add comment to the method body containing_method.body += IdentifierString( file_name, *(WordTokenizer().tokenize_to_string_list(comm_text))) return class_obj
def fix_inline_comments(file_path, tc): """Remove commented code while leaving useful comments in-tact. Args: file_path (str): Path to a python file to clean tc (TextClassifier): TextClassifier object that is used to remove comments Returns: list: Contents of file after cleaning, list of str lines """ assert file_path[-3:] == '.py', "Only python files are currently supported" # Read in data for cleaning with open(file_path, 'r') as fh: read_data = fh.read().split('\n') # Extract comments from python files only comments = comment_parser.extract_comments(file_path, mime='text/x-python') comments = [comment for comment in comments if not comment.is_multiline()] removal_indexes = [] for comment in tqdm(comments): # If the model thinks it is commented code if np.argmax(tc.predict(str(comment)).cpu(), axis=1) == 1: line_num = comment.line_number() - 1 # -1 to get the '#' as well end_index = (len(read_data[line_num]) - len(str(comment))) - 1 # Remove it line_with_comment_removed = read_data[line_num][0:end_index] if len(line_with_comment_removed.strip()) == 0: removal_indexes.append(line_num) read_data[line_num] = line_with_comment_removed # We need to remove them from highest index to lowest index so that we don't change the indexing while processing removal_indexes.reverse() # Removes blank lines that were previously comments for idx in removal_indexes: read_data.pop(idx) return read_data
def on_post_save_async(self, view): ### temporary block of health analysis execute ### if True:#(not self.c_supported) or (not ACTIVE):#True: return try: self.clear_colors(view) cs = None if self.p_supported: cs = comment_parser.extract_comments(self.f_name) self.p_health_render(view, cs) elif self.c_supported: current_text = view.substr(sublime.Region(0, view.size())) cs = compare(str(self.original_file_contents), str(current_text)) print_comments(cs) self.c_health_render(view,cs) except Exception as e: sublime.status_message("Error: " + str(e)) print_to_log(traceback.format_exc()) pass
def _get_test_module_info(cls, tc_path: str) -> dict: """ This method gets the volume types for which the TC is to be run and the nature of a TC Args: tc_path (str): The path of the test case. Returns: test_flags (dict): This dictionary contains the volume types for which the TC is to be run and the nature of the TC, i.e. Disruptive / Non-Disruptive. For example, { "tcNature" : "disruptive", "volType" : [replicated, ...] } """ flags = str(extract_comments(tc_path, mime="text/x-python")[0]) tc_flags = {} tc_flags["tcNature"] = flags.split(';')[0].strip() tc_flags["volType"] = flags.split(';')[1].split(',') if tc_flags["volType"] == ['']: tc_flags["volType"] = ["Generic"] return tc_flags
universal_newlines=True, stdout=subprocess.PIPE) output2 = greps2.stdout text4.write(str(output2)) greps3 = subprocess.run(["sort", "-u", "js6.txt"], universal_newlines=True, stdout=subprocess.PIPE) output3 = greps3.stdout text5.write(str(output3)) os.remove("js4.txt") os.remove("js5.txt") os.remove("js6.txt") jsfiles = open("final-links.txt", "r") comment_file = open("js-comment.txt", "a") for jsfile in jsfiles: try: req = requests.get(jsfile, timeout=2) print("downloading " + jsfile) jscontent = open("jscontent.txt", "a") jscontent.write(req.text) comment = comment_parser.extract_comments("jscontent.txt", mime="text/x-javascript") comment_file.write(jsfile + "-" * len(jsfile) + "\n" + str(comment) + "\n" + "=" * 100 + "\n") print(comment) os.remove("jscontent.txt") except: pass jsfiles.close() comment_file.close()
def main(): """Program Entry point.""" version_string = f"%(prog)s {__version__}\n" parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=f"{module_name} (Version {__version__})") parser.add_argument("--input", "-i", dest="input", help="Input path, this can be a directory or file.") parser.add_argument("--output", "-o", dest="output", help="Output path, this must be a path point to a file.") parser.add_argument("--version", action="version", version=version_string, help="Display version information and dependencies.") args = parser.parse_args() input = args_input(args.input) output = args.output if output is None: dirname = output # Set it to None. filename = output # Set it to None. else: dirname = os.path.dirname(output) # Target directory path. filename = os.path.basename(output) # Target file to export as swagger yaml. pass isFile = os.path.isfile(input) isDir = os.path.isdir(input) print("[SWAGGER-IT] Start checking input:::") if not isFile and not isDir: raise ArgumentError('[ERROR] File input error:', input) print("[SWAGGER-IT] Start checking output:::") if not dirname and not filename: raise ArgumentError('[ERROR] File output error:', output) print("[SWAGGER-IT] Parsing comments and docstrings:::") comments = [] if isFile: # When is file.. extension = get_file_extension(input) mime_text = get_mime_type_by_extension(extension) comments = comment_parser.extract_comments(input, mime=mime_text) else: # When is directory.. for r, d, f in os.walk(input): for file in f: filepath = os.path.join(r, file) filepath = filepath.replace("\\", "/") if not contain_in_list(filepath, ignore_dir) and \ valid_source_file(filepath): extension = get_file_extension(filepath) mime_text = get_mime_type_by_extension(extension) new_comments = comment_parser.extract_comments(filepath, mime=mime_text) comments.extend(new_comments) pass pass pass pass comments = extract_swagger_identifier(comments) attr_lst = form_attribute_list(comments) print(comments) print("[SWAGGER-IT] Filling swagger informations:::") fill_swagger_info(attr_lst) print(swagger_info) mkdir_safe(dirname) # Ensure the path exists. file = open(output, "w+") file.write(str(swagger_info)) file.close() print("[SWAGGER-IT] Done generate the file:::") pass
def spell_check_file(filename, spell_checker, mimetype='', output_lvl=1, prefixes=[]): if len(mimetype) == 0: mimetype = getMimeType(filename) if output_lvl > 0: print("spell_check_file:", filename, ",", mimetype) # Returns a list of comment_parser.parsers.common.Comments try: clist = comment_parser.extract_comments(filename, mime=mimetype) except BaseException: print("Parser failed, skipping file\n") return [] bad_words = [] for c in clist: mistakes = [] spell_checker.set_text(c.text()) for error in spell_checker: if output_lvl > 1: print("Error:", error.word) # Check if the bad word starts with a prefix. # If so, spell check the word without that prefix. # for pre in prefixes: if error.word.startswith(pre): # check if the word is only the prefix if len(pre) == len(error.word): continue # remove the prefix wrd = error.word[len(pre):] if output_lvl > 1: print("Trying without prefix: ", error.word, wrd) try: if spell_checker.check(wrd): continue except BaseException: print("Caught an exception for word", error.word, wrd) # Try splitting camel case words and checking each sub-word if output_lvl > 1: print("Trying splitting camel case word") sub_words = splitCamelCase(error.word) if len(sub_words) > 1: ok_flag = True for s in sub_words: if not spell_checker.check(s): ok_flag = False if ok_flag: continue # Check for possesive words if error.word.endswith("'s"): wrd = error.word[:-2] if spell_checker.check(wrd): continue if output_lvl > 1: msg = 'error: ' + '\'' + error.word + '\', ' \ + 'suggestions: ' + str(spell_checker.suggest()) else: msg = error.word mistakes.append(msg) if len(mistakes): if output_lvl > 0: print("\nLine number", c.line_number()) if output_lvl > 0: print(c.text()) for m in mistakes: if output_lvl >= 0: print(" ", m) bad_words.append([m, filename, c.line_number()]) bad_words = sorted(bad_words) if output_lvl > 1: print("\nResults") for x in bad_words: print(x) return bad_words
def run(project_id, repo_path, cursor, **options): num_core_doc_msgs = 0 totalNumberOfdocmsgs = 0 cursor.execute(QUERY.format(project_id)) repoName = cursor.fetchone()[0] os.chdir("path/" + str(project_id) + "/") stri = os.getcwd() for repos in os.listdir(): if (repos == repoName): os.chdir(repos) stream = [] print("----- METRIC: DOCUMENTATION QUALITY -----") print('os path: ', os.getcwd()) for (root, dirs, files) in inner_os.walk(os.getcwd(), topdown=True): for fi in files: if (fi.endswith('.py')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-python') elif (fi.endswith('.rb')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-ruby') elif (fi.endswith('.c')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-c') elif (fi.endswith('.cpp')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-c++') elif (fi.endswith('.go')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-go') elif (fi.endswith('.html')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/html') elif (fi.endswith('.java')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-java-source') elif (fi.endswith('.js')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='application/javascript') elif (fi.endswith('.sh')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-shellscript') elif (fi.endswith('.xml')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/xml') for docs in stream: docs = str(docs).lower() trim_doc = re.sub("[^a-zA-Z ]+", "", docs) trim_doc = ' '.join(trim_doc.split()) if (len(trim_doc) > 0): totalNumberOfdocmsgs += 1 # totalNumberOfdocmsgs += len(trim_doc.split()) # spell = SpellChecker() # trim_doc = re.sub(r"\b[a-zA-Z]\b", "", trim_doc) # trim_doc = re.sub(r"\b[a-zA-Z][a-zA-Z]\b", "", trim_doc) ginger_parser = GingerIt() ginger_grammar_results = ginger_parser.parse(trim_doc) ginger_corrections = ginger_grammar_results['corrections'] if (len(ginger_corrections) == 0): num_core_doc_msgs += 1 # trim_doc = trim_doc.split() # spelled = spell.known(trim_doc) #num_core_doc_msgs += len(spelled) docs_ratio = 0 if (totalNumberOfdocmsgs > 0): docs_ratio = float(num_core_doc_msgs) / float( totalNumberOfdocmsgs * 1.0) print('documentation ratio: ', docs_ratio) break threshold = options['threshold'] return (docs_ratio >= threshold, docs_ratio)
import glob import json from comment_parser import comment_parser count = 0 for filename in glob.iglob("/home/pi/java_projects" + '**/*.java', recursive=True): print(filename) json_obj = {'text':[]} with open(filename, 'r') as file: all_comments = comment_parser.extract_comments(file, "text/x-java-source") for comment in all_comments: json_obj[text].add(comment.text()) with open("./results/" + str(count), 'w') as file: json.dump(json_obj, file) count += 1
newtext += line + "\n" return newtext env = jinja2.Environment( loader=jinja2.FileSystemLoader([TEMPLATES_PATH]), extensions=["jinja2.ext.with_"], trim_blocks=True, lstrip_blocks=True, ) if __name__ == "__main__": for fn in filenames: slug = fn.split("/")[-1].replace("wrappingS-", "").replace(".js", "").lower() comments = comment_parser.extract_comments(fn, mime="text/x-javascript") description = [c.text() for c in comments if "ingroup" in c.text()] context = {} context["filename"] = fn context["title"] = titles.get(slug, slug) if description: # print("+ " + slug) context["description"] = comment_to_md(description[0]) else: # meter título sem link no site # print(" " + slug) pass outfn = output_dir + slug + ".md" render_template_into_file(env, "wrapper.md", outfn, context=context)
def run(project_id, repo_path, cursor, **options): num_core_doc_words = 0 totalNumberOfdocWords = 0 cursor.execute(QUERY.format(project_id)) repoName = cursor.fetchone()[0] os.chdir("path/" + str(project_id) + "/") stri = os.getcwd() for repos in os.listdir(): if (repos == repoName): os.chdir(repos) stream = [] print("----- METRIC: DOCUMENTATION QUALITY -----") print('os path: ', os.getcwd()) for (root, dirs, files) in inner_os.walk(os.getcwd(), topdown=True): for fi in files: if (fi.endswith('.py')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-python') elif (fi.endswith('.rb')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-ruby') elif (fi.endswith('.c')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-c') elif (fi.endswith('.cpp')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-c++') elif (fi.endswith('.go')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-go') elif (fi.endswith('.html')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/html') elif (fi.endswith('.java')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-java-source') elif (fi.endswith('.js')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='application/javascript') elif (fi.endswith('.sh')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-shellscript') elif (fi.endswith('.xml')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/xml') for docs in stream: docs = str(docs).lower() trim_doc = re.sub("[^a-zA-Z ]+", "", docs) trim_doc = ' '.join(trim_doc.split()) totalNumberOfdocWords += len(trim_doc.split()) spell = SpellChecker() trim_doc = re.sub(r"\b[a-zA-Z]\b", "", trim_doc) trim_doc = re.sub(r"\b[a-zA-Z][a-zA-Z]\b", "", trim_doc) trim_doc = trim_doc.split() spelled = spell.known(trim_doc) num_core_doc_words += len(spelled) docs_ratio = 0 if (totalNumberOfdocWords > 0): docs_ratio = float(num_core_doc_words) / float( totalNumberOfdocWords * 1.0) #print('documentation ratio: ',docs_ratio) break ratio = 0 # Dictionary of language => metrics dictionary util = utilities.get_loc(stri) sloc = 0 cloc = 0 for lang, metrics in util.items(): sloc += metrics['sloc'] cloc += metrics['cloc'] if sloc == 0: # No source code return False, ratio t_loc = sloc + cloc ratio = (cloc / t_loc) if t_loc > 0 else 0 ratio = ratio * docs_ratio attr_threshold = options['threshold'] print("----- METRIC: DOCUMENTATION -----") print('ratio: ', ratio) threshold = options['threshold'] return (ratio >= threshold, ratio)
from comment_parser import comment_parser import language_check import os stream = [] for (root, dirs, files) in os.walk(os.getcwd(), topdown=True): for fi in files: if (fi.endswith('.py')): stream += comment_parser.extract_comments(os.path.join(root, fi), mime='text/x-python') elif (fi.endswith('.rb')): stream += comment_parser.extract_comments(os.path.join(root, fi), mime='text/x-ruby') elif (fi.endswith('.c')): stream += comment_parser.extract_comments(os.path.join(root, fi), mime='text/x-c') elif (fi.endswith('.cpp')): stream += comment_parser.extract_comments(os.path.join(root, fi), mime='text/x-c++') elif (fi.endswith('.go')): stream += comment_parser.extract_comments(os.path.join(root, fi), mime='text/x-go') elif (fi.endswith('.html')): stream += comment_parser.extract_comments(os.path.join(root, fi), mime='text/html') elif (fi.endswith('.java')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='text/x-java-source') elif (fi.endswith('.js')): stream += comment_parser.extract_comments( os.path.join(root, fi), mime='application/javascript') elif (fi.endswith('.sh')):
def get_comment(codeFilePath): commentList = comment_parser.extract_comments(codeFilePath, mime='text/x-java-source') return commentList
from comment_parser import comment_parser import argparse def output(all_comments, file_name): for comment in all_comments: # give the comment to the model # get a value for the specificity if True: # print(comment._line_number) print("File \"{}\", Line {} ".format(file_name, comment.line_number())) print("\"{}\"".format(comment.text())) print( "UnspecificCommentSuggestion: This comment ranks {0} in specificity\n" .format(0)) if __name__ == "__main__": print("hello") parser = argparse.ArgumentParser( description='gets comments from source code', prog="speci-comment.py") parser.add_argument("input_file", type=str, help="The input file") args = parser.parse_args() all_comments = comment_parser.extract_comments(args.input_file, "text/x-java-source") output(all_comments, args.input_file)
from comment_parser import comment_parser a = comment_parser.extract_comments(input("Please enter path of the C++ file you want:"), mime='text/x-c++') print("Comment(actual_comment, number_of_line, False 1 line comment/True multiple line comment)") print(a)