def add_project_to_corpus(project): """ Assumes that the project_dir contains a text file named build_command.txt that contains the build command(s) for the project in this directory, and a clean_command.txt that will clean the project. """ common.clean_project(project) """Run dljc Run Randoop to generate test sources Compile test sources Run daikon.Chicory on tests to create dtrace file Precompute graph kernels that are independent of ontology stuff """ common.run_dljc(project, ['dyntrace', 'graphtool'], [ '--graph-jar', common.get_jar('prog2dfg.jar'), '--dyntrace-libs', common.LIBS_DIR ]) """ run petablox """ #run_petablox(project_dir) """ run graph kernel computation """ project_dir = common.get_project_dir(project) kernel_file_path = common.get_kernel_path(project) graph_kernel_cmd = [ 'python', common.get_simprog('precompute_kernel.py'), project_dir, kernel_file_path ] common.run_cmd(graph_kernel_cmd) print 'Generated kernel file for {0}.'.format(project) return kernel_file_path
def run_inference(project): common.setup_checker_framework_env() classpath = os.path.join(os.environ['JSR308'], 'generic-type-inference-solver', 'bin') if os.environ.get('CLASSPATH'): os.environ['CLASSPATH'] += ':' + classpath else: os.environ['CLASSPATH'] = classpath project_dir = common.get_project_dir(project) annotation_dir = os.path.join(project_dir, common.DLJC_OUTPUT_DIR, 'annotations') if os.path.isdir(annotation_dir): shutil.rmtree(annotation_dir) with common.cd(project_dir): common.clean_project(project) common.run_dljc(project, ['inference'], [ '--solverArgs=backEndType=maxsatbackend.MaxSat', '--checker', 'ontology.OntologyChecker', '--solver', 'constraintsolver.ConstraintSolver', '-m', 'ROUNDTRIP', '-afud', annotation_dir ])
def run_inference(project_name): common.setup_checker_framework_env() classpath = os.path.join(os.environ['JSR308'], 'generic-type-inference-solver', 'bin') classpath += ':' + os.path.join(os.environ['JSR308'], 'ontology', 'bin') if os.environ.get('CLASSPATH'): os.environ['CLASSPATH'] += ':' + classpath else: os.environ['CLASSPATH'] = classpath project_dir = common.get_project_dir(project_name) annotation_dir = os.path.join(project_dir, common.DLJC_OUTPUT_DIR, 'annotations') if os.path.isdir(annotation_dir): shutil.rmtree(annotation_dir) common.run_dljc(project_name, ['inference'], [ '--solverArgs=solver=Z3', '--checker', 'ontology.OntologyChecker', '--solver', 'ontology.solvers.backend.OntologySolverEngine', '-m', 'ROUNDTRIP', '--cache', '-afud', annotation_dir ]) print("Building annotated JAR for {}".format(project_name)) build_jar(project_name)
def generate_project_kernel(project, cluster_json=None): """ run graph kernel computation """ project_dir = common.get_project_dir(project) dot_dirs = dot.dot_dirs(project) if not dot_dirs: print("No graphs generated for {}".format(project)) return out_dir = dot_dirs[0] kernel_file_path = dot.get_kernel_path(project, out_dir) if cluster_json: graph_kernel_cmd = [ 'python', common.get_simprog('precompute_kernel.py'), project_dir, kernel_file_path, cluster_json ] common.run_cmd(graph_kernel_cmd, 'graphkernel') else: graph_kernel_cmd = [ 'python', common.get_simprog('precompute_kernel.py'), project_dir, kernel_file_path ] common.run_cmd(graph_kernel_cmd, 'graphkernel')
def add_project_to_corpus(project): """ Assumes that the project_dir contains a text file named build_command.txt that contains the build command(s) for the project in this directory, and a clean_command.txt that will clean the project. """ common.clean_project(project) """Run dljc Run Randoop to generate test sources Compile test sources Run daikon.Chicory on tests to create dtrace file Precompute graph kernels that are independent of ontology stuff """ common.run_dljc(project, ['dyntrace', 'graphtool'], ['--graph-jar', common.get_jar('prog2dfg.jar'), '--dyntrace-libs', common.LIBS_DIR]) """ run petablox """ #run_petablox(project_dir) """ run graph kernel computation """ project_dir = common.get_project_dir(project) kernel_file_path = common.get_kernel_path(project) graph_kernel_cmd = ['python', common.get_simprog('precompute_kernel.py'), project_dir, kernel_file_path ] common.run_cmd(graph_kernel_cmd) print 'Generated kernel file for {0}.'.format(project) return kernel_file_path
def generate_project_kernel(project, cluster_json=None): """ run graph kernel computation """ project_dir = common.get_project_dir(project) out_dir = dot.dot_dirs(project)[0] kernel_file_path = dot.get_kernel_path(project, out_dir) if cluster_json: print("Using clustering output for node relabeling:") graph_kernel_cmd = ['python', common.get_simprog('precompute_kernel.py'), project_dir, kernel_file_path, cluster_json ] common.run_cmd(graph_kernel_cmd, True) else: graph_kernel_cmd = ['python', common.get_simprog('precompute_kernel.py'), project_dir, kernel_file_path ] common.run_cmd(graph_kernel_cmd, True) print("Generated kernel file for {0} in {1}.".format(project, kernel_file_path))
def clean(): """Removes all .pyc files from the project directory.""" project_dir = common.get_project_dir() for root, _, filenames in os.walk(project_dir): for filename in filenames: if filename.endswith('.pyc'): os.remove(root + '/' + filename)
def run_petablox(project): with common.cd(common.get_project_dir(project)): petablox_cmd = [ 'java', '-cp', common.get_jar('petablox.jar'), '-Dpetablox.reflect.kind=none', '-Dpetablox.run.analyses=cipa-0cfa-dlog', 'petablox.project.Boot' ] common.run_cmd(petablox_cmd)
def run_petablox(project): with common.cd(common.get_project_dir(project)): petablox_cmd = ['java', '-cp', common.get_jar('petablox.jar'), '-Dpetablox.reflect.kind=none', '-Dpetablox.run.analyses=cipa-0cfa-dlog', 'petablox.project.Boot'] common.run_cmd(petablox_cmd)
def collect_jars(project_name, out_dir): jars_dir = os.path.join(out_dir, "jars", project_name) common.mkdir(jars_dir) project_dir = common.get_project_dir(project_name) for path, _, files in os.walk(project_dir): if ".mvn" in path: continue for file in files: if file.endswith('.jar'): move(os.path.join(path, file), os.path.join(jars_dir))
def find_methods_with_signature(corpus, return_annotation, param_annotation_list): """ Finds all methods the corpus that have an annotation 'return_annotation' on the return value and the parameters annotated with 'param_annotation_list' OUTPUT: List of tuples (project, package, class, method) """ good_methods = [] for project in corpus: project_dir = common.get_project_dir(project) jaif_file = os.path.join(project_dir, "default.jaif") has_param = False has_ret = False current_package = "" current_class = "" current_method = "" with open(jaif_file, 'r') as f: for line in f.readlines(): if line.startswith("package "): current_package = line[len("package "):line.find(":")] if line.startswith("class "): current_class = line[len("class "):line.find(":")] if line.startswith("method "): current_method = line[len("method "):line.find(":")] has_param = False has_ret = False if param_annotation_list != None: if line.startswith("insert-annotation Method.parameter"): s = line[len("insert-annotation Method.parameter "):] param_idx = int(s[:s.find(",")]) if len(param_annotation_list ) > param_idx and param_annotation_list[ param_idx] in line: has_param = True elif len(param_annotation_list) <= param_idx: has_param = False else: hase_param = True if return_annotation != None: if line.startswith("insert-annotation Method.type" ) and return_annotation in line: has_ret = True else: has_ret = True if has_param == True and has_ret == True: good_methods += [(project, current_package, current_class, current_method)] print("Relevant Method: {}.{}".format( current_class, current_method)) has_param = False has_ret = False return good_methods
def insert_anno_to_project(project, jaif_file): """ Insert annotation info in the ${jaif_file} to ${project}. """ project_dir = common.get_project_dir(project) with common.cd(project_dir): common.setup_checker_framework_env() insert_cmd = ['insert-annotations-to-source', '-i', jaif_file] # using glob2.glob to recursive get java files under project dir java_files = glob.glob('{}/**/*.java'.format(project_dir)) insert_cmd.extend(java_files) common.run_cmd(insert_cmd, print_output=True)
def get_pylint_command(files_to_lint): """Returns a list representing the pylint command. Args: files_to_lint: A list of files to run pylint on. """ # We need to specify the rcfile because we want it to be a dotfile, but # pylint expects it to be called "pylintrc" without the dot. pylint_command = [ 'pylint', '--rcfile=%s/.pylintrc' % common.get_project_dir()] return pylint_command + files_to_lint
def get_files_to_lint(): """Returns a list of all python files in the project's directory.""" project_dir = common.get_project_dir() files_to_lint = [] for root, _, filenames in os.walk(project_dir): for filename in filenames: # We use empty __init__ files for imports. They don't need # docstrings. if filename.endswith('.py') and filename != '__init__.py': files_to_lint.append(root + '/' + filename) return files_to_lint
def find_methods_with_signature(corpus, return_annotation, param_annotation_list): """ Finds all methods the corpus that have an annotation 'return_annotation' on the return value and the parameters annotated with 'param_annotation_list' OUTPUT: List of tuples (project, package, class, method) """ good_methods = [] for project in corpus: project_dir = common.get_project_dir(project) jaif_file = os.path.join(project_dir, "default.jaif") has_param = False has_ret = False current_package = "" current_class = "" current_method = "" with open(jaif_file, 'r') as f: for line in f.readlines(): if line.startswith("package "): current_package = line[len("package "):line.find(":")] if line.startswith("class "): current_class = line[len("class "):line.find(":")] if line.startswith("method "): current_method = line[len("method "):line.find(":")] has_param = False has_ret = False if param_annotation_list!=None: if line.startswith("insert-annotation Method.parameter"): s = line[len("insert-annotation Method.parameter "):] param_idx = int(s[:s.find(",")]) if len(param_annotation_list) > param_idx and param_annotation_list[param_idx] in line: has_param = True elif len(param_annotation_list) <= param_idx: has_param = False else: hase_param = True if return_annotation != None: if line.startswith("insert-annotation Method.type") and return_annotation in line: has_ret = True else: has_ret = True if has_param==True and has_ret==True: good_methods += [(project, current_package, current_class, current_method)] print ("Relevant Method: {}.{}".format(current_class,current_method)) has_param = False has_ret = False return good_methods
def gather_kernels(projects, corpus_kernel_file): print("Gathering kernels from projects {0}".format(" and ".join(projects))) with open(corpus_kernel_file, "w") as corpus_kernel_file_handle: for project in projects: project_dir = common.get_project_dir(project) out_dir = dot.dot_dirs(project)[0] # only consider the first one project_kernel_file_path = dot.get_kernel_path(project, out_dir) if os.path.isfile(project_kernel_file_path): with open(project_kernel_file_path, "r") as fi: corpus_kernel_file_handle.write(fi.read()) else: print ("No kernel file find for project {0}.\n {1} is not a file.".format( project, project_kernel_file_path ))
def run_tests(with_coverage=False): """Runs unit tests using nose and the NoseGAE plugin.""" app_engine_dir = common.get_app_engine_dir() project_dir = common.get_project_dir() # NoseGAE wants us to be in the project directory. os.chdir(project_dir) # TODO(samking): Use https://github.com/jkrebs/nose-gae-index to # automatically update indexes when unit tests are run. command = [ 'nosetests', '--with-gae', '--without-sandbox', '--gae-lib-root=' + app_engine_dir, '--nologcapture', project_dir] if with_coverage: # Documentation for these flags is at # http://nose.readthedocs.org/en/latest/plugins/cover.html command += [ '--with-coverage', '--cover-package=ctc', '--cover-inclusive', '--cover-erase', '--cover-branches'] subprocess.call(command)
def create_jaif_file(project, mappings): """ create a {project_name}.jaif file under project_dir this jair file contains the insertted annotations info for this project Note: if ${project} value is "corpus", then it will create a "corpus.jaif" under corpus dir """ if project == "corpus": project_dir = common.CORPUS_DIR else: project_dir = common.get_project_dir(project) jaif_file = os.path.join(project_dir, "{}.jaif".format(project)) print("Writing project {} annotated info to file {}".format( project, jaif_file)) with open(jaif_file, 'w') as out_file: # write ontology package info out_file.write(JAIF_FILE_ONTOLOGY_HEADER) jaif_dict = dict() for mapping in mappings: for qualified_field in mapping['fields']: (package, clazz, field) = parse_field(qualified_field) if not package in jaif_dict: jaif_dict[package] = dict() if not clazz in jaif_dict[package]: jaif_dict[package][clazz] = dict() if not field in jaif_dict[package][clazz]: jaif_dict[package][clazz][field] = set() # assume 'label' at least has one element jaif_dict[package][clazz][field].add( mapping['label'][0].upper()) for package, classes in jaif_dict.items(): out_file.write("\npackage {}:\n".format(package)) for clazz, fields in classes.items(): out_file.write(" class {}:\n".format(clazz)) for field, value_set in fields.items(): out_file.write(" field {}:\n".format(field)) out_file.write( " @Ontology(values={{{value_name}}})\n".format( value_name=', '.join(value_set))) return jaif_file
def collect_stray_output(project_list, out_dir): dljc_out_dir = os.path.join(out_dir, common.DLJC_OUTPUT_DIR) common.mkdir(dljc_out_dir) jaif_out_dir = os.path.join(out_dir, "jaif") common.mkdir(jaif_out_dir) move(os.path.join(common.WORKING_DIR, 'class_info.json'), os.path.join(out_dir, 'class_info.json')) for project in project_list: collect_jars(project, out_dir) dljc_in_dir = common.get_dljc_dir(project) copytree(dljc_in_dir, os.path.join(dljc_out_dir, project)) move(os.path.join(common.get_project_dir(project), 'default.jaif'), os.path.join(jaif_out_dir, "{}.jaif".format(project))) move(os.path.join(common.CORPUS_DIR, 'corpus.jaif'), os.path.join(jaif_out_dir, 'corpus.jaif'))
def build_jar(project_name): project = common.project_info(project_name) project_dir = common.get_project_dir(project_name) if 'jar' not in project: print('No jar command available, skipping {}.') return jar_cmd = project['jar'].strip().split() build_system = jar_cmd[0] if build_system == "mvn": add_mvn_deps(project_dir) elif build_system == "gradle": add_gradle_deps(project_dir) else: print("Don't know how to build jar file for {} projects".format( build_system)) return with common.cd(project_dir): common.run_cmd(jar_cmd)
def run_inference(project): common.setup_checker_framework_env() classpath = os.path.join(os.environ['JSR308'], 'generic-type-inference-solver', 'bin') if os.environ.get('CLASSPATH'): os.environ['CLASSPATH'] += ':' + classpath else: os.environ['CLASSPATH'] = classpath project_dir = common.get_project_dir(project) annotation_dir = os.path.join(project_dir, common.DLJC_OUTPUT_DIR, 'annotations') if os.path.isdir(annotation_dir): shutil.rmtree(annotation_dir) with common.cd(project_dir): common.clean_project(project) common.run_dljc(project, ['inference'], ['--solverArgs=backEndType=maxsatbackend.MaxSat', '--checker', 'ontology.OntologyChecker', '--solver', 'constraintsolver.ConstraintSolver', '-m', 'ROUNDTRIP', '-afud', annotation_dir])
logger.info(value) f.write('%s\n' % value) # 重新下载 下载失败的的log文件 if __name__ == '__main__': # classify_name = '自拍达人原创申请' # file_dir = common.get_project_dir() + 'p**n' + os.sep + 'all' + os.sep + 'zpdr_ycsq_all' + os.sep # file_path = file_dir + 'un_done.log' # classify_name = '兴趣分享' # file_dir = common.get_project_dir() + 'p**n' + os.sep + 'all' + os.sep + 'xqfx' + os.sep # file_path = file_dir + 'un_down.log' classify_name = '我爱我妻' file_dir = common.get_project_dir( ) + 'p**n' + os.sep + 'all' + os.sep + 'wawq_all' + os.sep file_path = file_dir + 'un_down.log' # classify_name = '原创自拍区' # file_dir = common.get_project_dir() + 'p**n' + os.sep + 'all' + os.sep + 'yczp_all' + os.sep # file_path = file_dir + 'un_down.log' # classify_name = '自拍达人原创申请_JH' # file_dir = common.get_project_dir() + 'p**n' + os.sep + 'jh' + os.sep + 'zpdr_ycsq_jh' + os.sep # file_path = file_dir + 'un_done.log' # classify_name = '我爱我妻_JH' # file_dir = common.get_project_dir() + 'p**n' + os.sep + 'jh' + os.sep + 'wawq_jh' + os.sep # file_path = file_dir + 'un_down.log' filter_delete()
import sys # from urlparse import urlsplit from urllib.request import Request from urllib.request import urlopen import logging from bs4 import BeautifulSoup from common import DoConfig import random import common logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) project_dir = common.get_project_dir() class ProxyIp: __instance = None __init_flag = False def __new__(cls, *args, **kwargs): if cls.__instance is None: # logger.info(' proxyip not int ,start init') cls.__instance = object.__new__(cls) return cls.__instance else: # logger.info(' proxyip has init') return cls.__instance
def dot_dir(project_name): return os.path.join(get_project_dir(project_name), DLJC_OUTPUT_DIR, "dot")
def refactor_multi_decl(project): project_dir = common.get_project_dir(project) refactor_script = os.path.join(MAP_WORKING_DIR, "multiDeclRefactor", "run-refactor.sh") refactor_cmd = [refactor_script, project_dir] common.run_cmd(refactor_cmd)