Пример #1
0
def statistic_dataset():
    # statistic [max, min, avg] [python files, LOC, import lines] on github dataset
    print("Statistic Github dataset")
    root = EGO_GITHUB_ROOT
    meta_path = os.path.join(root, "metadata.json")
    metadata = read_object_from_file(meta_path)
    statistic_all(root, metadata)
Пример #2
0
def statistic_pkgs_ego():
    # statistic pkgs installed in PyEGo-generated Dockerfile
    print("Statistic PyEGo-installed packages")
    ego_root = EGO_GITHUB_ROOT
    meta_path = os.path.join(ego_root, "metadata.json")
    metadata = read_object_from_file(meta_path)
    count_ego_all(ego_root, metadata=metadata)
Пример #3
0
def statistic_pkgs_me():
    print("Statistic DockerizeMe-installed packages")
    # statistic pkgs installed in DockerizeMe-generated Dockerfile
    me_root = ME_GITHUB_ROOT_39
    ego_root = EGO_GITHUB_ROOT
    meta_path = os.path.join(ego_root, "metadata.json")
    metadata = read_object_from_file(meta_path)
    count_ego_all(me_root, metadata=metadata)
Пример #4
0
def test_time_ego():
    # test time cost of PyEGo on github dataset
    root = EGO_GITHUB_ROOT
    metadata_path = os.path.join(root, "metadata.json")
    metadata = read_object_from_file(metadata_path)
    start = datetime.now()
    batch_test(root, metadata, "PyEGo", generate_only=True)
    end = datetime.now()
    cost = end - start
    print("total: {}s, avg: {}s/item".format(cost.seconds,
                                             1. * cost.seconds / 100))
Пример #5
0
def test_time_pipreqs():
    # test time cost of pipreqs on github dataset
    # cannot test here, because os.popen is async function
    start = datetime.now()
    root = REQS_GITHUB_ROOT_39
    metadata_path = os.path.join(root, "metadata.json")
    metadata = read_object_from_file(metadata_path)
    batch_test(root, metadata, "pipreqs", generate_only=True)
    end = datetime.now()
    cost = end - start
    print("total: {}s, avg: {}s/item".format(cost.seconds,
                                             1. * cost.seconds / 100))
Пример #6
0
def commit(commit_message):
  ''' Commit current index to disk. '''
  repo_path = find_pygit_repo()

  current_index_path = repo_path + '/' + globalVars.current_index_file_name

  commit_log_path = repo_path + '/' + globalVars.commit_log

  if os.path.exists(current_index_path):
    sys.stdout.write('Nothing to commit.')
  else:
    # This should be a more interesting data structure.
    if os.path.exists(commit_log_path):
      commit_log_list = utils.read_object_from_file(commit_log_path)
    else:
      commit_log_list = []

    # do stuff with commit_log_list
    current_index_dict = utils.read_object_from_file(current_index_path)
    for filename, file_contents = current_index_dict.iteritems():
      # do nothing for now. Compute hash later
      pass

    # Get first file name are representative hash string.
    representative_hash_string = utils.compute_string_hash(current_index_dict.iteritems()[0][0])

    if not os.path.exists(globalVars.blob_object_location):
      utils.write_error_message_and_exit("Broken pygit repo. Cannot find blob objects location")
    else:
      current_commit_file_name = globalVars.blob_object_location + '/' representative_hash_string)
      utils.write_object_to_file(current_commit_file_name, current_index_dict)

    commit_log_list.append((representative_hash_string, current_commit_file_name))

    utils.write_object_to_file(commit_log_path)

    os.remove(current_index_path)
Пример #7
0
def compare_pkgs_ego_pipreqs():
    # statistic pkgs installed in projects which solved by both PyEGo and pipreqs
    # require execute logs of PyEGo and pipreqs
    print("Compare Pipreqs-3.9 with DockerizeMe")
    ego_root = EGO_GITHUB_ROOT
    ego_log = EGO_GITHUB_LOG
    reqs_root = REQS_GITHUB_ROOT_39
    reqs_log = REQS_GITHUB_LOG_39
    meta_path = os.path.join(ego_root, "metadata.json")
    metadata = read_object_from_file(meta_path)
    count_same_ego_reqs(ego_root,
                        ego_log,
                        reqs_root,
                        reqs_log,
                        metadata=metadata)
Пример #8
0
def generate_github_overview():
    ego_log = TestResults("PyEGo", "PyEGo", dataset="github", log_path=EGO_GITHUB_LOG)
    me_log_39 = TestResults("DockerizeMe-3.9", "DockerizeMe", dataset="github", log_path=ME_GITHUB_LOG_39)
    reqs_log_39 = TestResults("pipreqs-3.9", "PyEGo", dataset="github", log_path=REQS_GITHUB_LOG_39)

    logs = [ego_log, me_log_39, reqs_log_39]
    meta_path = os.path.join(EGO_GITHUB_ROOT, "metadata.json")
    metadata = read_object_from_file(meta_path)

    logs_ = list()
    for log in logs:
        log_ = convert_id_to_name(log, metadata)
        logs_.append(log_)
    logs = logs_
    output = "./result_github.csv"
    generate_overview(logs, output)
Пример #9
0
def compare_pkgs_ego_me():
    # statistic pkgs installed in projects which solved by both PyEGo and DockerizeMe
    # require execute logs of PyEGo and DockerizeMe
    # Results of DockerizeMe-3.8 and DockerizeMe-3.9 are the same
    print("Compare PyEGo with DockerizeMe")
    ego_root = EGO_GITHUB_ROOT
    ego_log = EGO_GITHUB_LOG
    me_root = ME_GITHUB_ROOT_39
    me_log = ME_GITHUB_LOG_39
    meta_path = os.path.join(ego_root, "metadata.json")
    metadata = read_object_from_file(meta_path)
    count_same_ego_me(ego_root,
                      ego_log,
                      me_root,
                      me_log,
                      dataset="github",
                      metadata=metadata)
Пример #10
0
def add(relative_file_path):
  try:
    pygit_repo_path = utils.find_pygit_repo()
  except utils.RepoNotFoundException:
    sys.stderr.write('Could not find pygit repo.')
    sys.exit(41)

  # Check if file path exists
  if not os.path.exists(relative_file_path):
    sys.stderr.write('File to add does not exist.')
    sys.exit(43)

  # Check if previously tracked and so forth for other status information.

  start_cwd = os.getcwd()
  os.chdir(pygit_repo_path + '/.pygit')
  index_set = utils.read_object_from_file(globalVars.index_file_name)
  index_set.add(relative_file_path)
  utils.write_object_to_file(globalVars.index_file_name, index_set)
  os.chdir(start_cwd)
Пример #11
0
def run_test_pipreqs(pyver="3.8"):
    # run pipreqs on github dataset, results are logged in log/github_test.<YYYYMMDD>.log
    root = REQS_GITHUB_ROOT_39
    metadata_path = os.path.join(root, "metadata.json")
    metadata = read_object_from_file(metadata_path)
    batch_test(root, metadata, "pipreqs", False, pyver)
Пример #12
0
def run_test_ego():
    # run PyEGo on github dataset, results are logged in log/github_test.<YYYYMMDD>.log
    root = EGO_GITHUB_ROOT
    metadata_path = os.path.join(root, "metadata.json")
    metadata = read_object_from_file(metadata_path)
    batch_test(root, metadata, "PyEGo")
Пример #13
0
import os
import re

import config
from ModuleParser.file_parser import parse_import_modules
from ModuleParser.folder_parser import extract_all_py_filepath, parse_custom_top_levels
from ModuleParser.module_filter import filter_custom_modules, apart_standard_modules
from utils import read_object_from_file, write_object_to_file
from ModuleParser.neo4j_reader import get_pyvers_by_module, get_all_pyvers, \
    get_python_features, get_pkgvers_by_module_pyvers, get_os_by_pkg, get_std_top_modules, get_rank_by_pkg

std_top_levels = read_object_from_file(config.STD_TOP_CACHE_PATH)
if not std_top_levels:
    std_top_levels = get_std_top_modules()
    write_object_to_file(config.STD_TOP_CACHE_PATH, std_top_levels)


def parse_modules(root):
    # both file and folder are ok
    # extract all custom top level modules and python files
    custom_top_levels = list()
    all_py_filepath = list()
    if os.path.isdir(root):
        custom_top_levels.extend(parse_custom_top_levels(root, need_init=False))
        all_py_filepath.extend(extract_all_py_filepath(root))
    elif root.endswith(".py"):
        all_py_filepath.append(root)
    else:
        return None, None, None, None

    # extract top and second level modules used
Пример #14
0
        pkg, method = pkgmth.split("#")
        if method == "pip" or pkg == "python":
            continue
        dep_dict[pkgmth] = dict()
        ver_dict = pkgver_dict[pkgmth]
        for ver in ver_dict:
            dep_dict[pkgmth][ver] = list()
    return dep_dict


def convert_pkgvers_to_constrain_dict(pkgvers):
    constrain_dict = dict()
    for pkgmth in pkgvers:
        pkg, method = pkgmth.split("#")
        if method == "pip" or pkg == "python":
            constrain_dict[pkg] = pkgvers[pkgmth]
        else:
            constrain_dict[pkgmth] = pkgvers[pkgmth]
    return constrain_dict


pip_deps_dict = read_object_from_file(config.PIP_DEPS_CACHE_PATH)
if not pip_deps_dict:
    print("Caching pip dependencies in file...")
    pip_deps_dict = get_pip_deps()
    write_object_to_file(config.PIP_DEPS_CACHE_PATH, pip_deps_dict)
    print("Dependencies cached.")