예제 #1
0
파일: crawl.py 프로젝트: Suvodeep90/SLACC
def crawl_link(url, key):
    LOGGER.info("Fetching URL: %s" % url)
    parsed_url = urlparse.urlparse(url)
    user_name = re.sub('[^a-zA-Z]+', '',
                       urlparse.parse_qs(parsed_url.query)['username'][0])
    user_name_path = os.path.join(properties.PYTHON_PROJECTS_HOME, DATASET,
                                  key, user_name)
    cache.mk_package(user_name_path)
    file_handle, _ = urllib.urlretrieve(url)
    zip_file_object = zipfile.ZipFile(file_handle, 'r')
    for file_name in zip_file_object.namelist():
        f = zip_file_object.open(file_name)
        file_path = os.path.join(user_name_path, file_name)
        file_content = f.read()
        cache.write_file(file_path, file_content)
        if not cache.is_valid_python_file(file_path):
            LOGGER.info("Invalid Python File: %s. Deleting it .... " %
                        file_path)
            cache.delete_file(file_path)
    if len(
            cache.list_files(user_name_path,
                             False,
                             False,
                             ignores=["__init__.py"])) == 0:
        LOGGER.info("Folder '%s' contains no python file. Deleting it")
        cache.delete_folder(user_name_path)
예제 #2
0
def connected_components(dataset, base_folder):
    base_folder_path = os.path.join(properties.META_RESULTS_FOLDER, dataset,
                                    base_folder)
    contents = ["# Epsilons and methods"]
    for file_path in sorted(
            cache.list_files(base_folder_path, is_absolute=True)):
        if not file_path.endswith(".csv"):
            continue
        epsilon = cache.get_file_name(file_path).split(".")[0].split(
            "_", 1)[1].replace("_", ".")
        print(file_path)
        graph = networkx.Graph()
        contents.append("## eps = %s" % epsilon)
        with open(file_path) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=",")
            next(csv_reader, None)
            for row in csv_reader:
                graph.add_edge(row[0], row[1])
            n_clusters = networkx.number_connected_components(graph)
            contents.append("#### \# Functionalities = %d" % n_clusters)
            contents.append("```")
            [
                contents.append("%d: %s" % (i, ",\n\t".join(component)))
                for i, component in enumerate(
                    networkx.connected_components(graph))
            ]
            # [contents.append("%d: %s" % (i, ",".join(map(str, sorted(map(int, component))))))
            #  for i, component in enumerate(networkx.connected_components(graph))]
            contents.append("```")
        LOGGER.info("For epsilon = %s, # clusters = %d" %
                    (epsilon, n_clusters))
    write_file = os.path.join(base_folder_path, "components.md")
    cache.write_file(write_file, "\n".join(contents))
예제 #3
0
def extract_metadata_for_folder(dataset, problem_id=None):
    sys.path.append(properties.PYTHON_PROJECTS_HOME)
    function_store = get_function_store(dataset)
    root_folder = os.path.join(properties.PYTHON_PROJECTS_HOME, dataset)
    if problem_id:
        root_folder = os.path.join(root_folder, problem_id)
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):
        file_name = cache.get_file_name(file_path)
        if not file_name.startswith(a_consts.GENERATED_PREFIX):
            continue
        LOGGER.info("Processing '%s' ..." % helper.get_simple_name(file_path))
        for func in helper.get_generated_functions(file_path):
            function_name = func.__name__
            valid, func_key = is_executable_function(dataset, func, False)
            print(function_name, func_key, valid)
            if valid:
                meta_data = {
                    "name": function_name,
                    "body": inspect.getsource(func),
                    "inputKey": func_key,
                    "filePath": file_path
                }
                function_store.save_py_metadata(meta_data)
    sys.path.remove(properties.PYTHON_PROJECTS_HOME)
예제 #4
0
 def load_functions(self):
   functions = []
   results_folder = lib.get_dataset_functions_results_folder(self.dataset)
   for json_file in cache.list_files(results_folder, check_nest=True, is_absolute=True):
     if not json_file.endswith(".json"):
       continue
     functions += self.__load_functions_for_class(json_file)
   return functions
예제 #5
0
def execute(dataset, root_folder):
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):
        file_name = cache.get_file_name(file_path)
        if file_name == "__init__" or file_name.startswith(
                a_consts.GENERATED_PREFIX):
            continue
        get_meta_for_file(dataset, file_path)
예제 #6
0
def export_methods(dataset):
    root_folder = os.path.join(properties.PYTHON_PROJECTS_HOME, dataset,
                               problem)
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):
        file_name = cache.get_file_name(file_path)
        if file_name == "__init__" or file_name.startswith(
                a_consts.GENERATED_PREFIX):
            continue
        generate.generate_for_file(dataset, file_path)
예제 #7
0
def execute_problem(dataset, problem_id=None):
    root_folder = os.path.join(properties.PYTHON_PROJECTS_HOME, dataset)
    if problem_id:
        root_folder = os.path.join(root_folder, problem_id)
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):
        if not cache.get_file_name(file_path).startswith(
                a_consts.GENERATED_PREFIX):
            continue
        LOGGER.info("Processing '%s'" % helper.get_simple_name(file_path))
        execute_file(dataset, file_path)
예제 #8
0
def get_valid_functions_from_folder(dataset, problem_id=None):
    total_valid_functions = 0
    accessed_keys = set()
    root_folder = properties.PYTHON_PROJECTS_HOME
    if problem_id:
        root_folder = os.path.join(root_folder, problem_id)
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):

        file_name = cache.get_file_name(file_path)
        if not file_name.startswith(a_consts.GENERATED_PREFIX):
            continue
        LOGGER.info("Processing '%s'" % helper.get_simple_name(file_path))
        valid_keys, n_generated_functions = get_valid_function_keys_from_file(
            dataset, file_path)
        LOGGER.info("Valid Functions: %d / %d\n" %
                    (len(valid_keys), n_generated_functions))
        accessed_keys.update(valid_keys)
        total_valid_functions += len(valid_keys)
    LOGGER.info("Total valid functions: %d" % total_valid_functions)
    print(accessed_keys)