示例#1
0
def getDependencies(input_file, output_file, git_hub_key, hop, partitions):
    dependency_data = {}
    no_dependency_data = []
    gitLink = []

    gh = xkcd2347.GitHub(key=git_hub_key)
    result = []

    def process_line(link):
        link = link.rstrip()
        owner, repo = link.split(".com/")[1].split("/")
        result = [
            dep['packageName'] for dep in gh.get_dependencies(
                repo_owner=owner, repo_name=repo, depth=hop)
        ]
        if (len(result) > 0):
            return (link, result)
        else:
            return (link, ["No dependency"])

    try:
        sc = SparkContext("local[*]", "PySpark Tutorial")
        result = sc.textFile(input_file,
                             partitions).map(process_line).collect()
    except:
        print("Error running spark job")

    with open(output_file, 'w') as fp:
        json.dump(dict(result), fp, indent=4)
    sc.stop()
def getDependencies(input_file, output_file, git_hub_key, hop):
    dependency_data = {}
    no_dependency_data = []
    gitLink = []
    gh = xkcd2347.GitHub(key=git_hub_key)

    with open(input_file, 'r') as fp:
        gitLink = fp.readlines()

    with open(output_file, 'a') as fp:
        for link in tqdm(gitLink):
            link = link.rstrip()
            owner, repo = link.split(".com/")[1].split("/")
            result = [
                dep['packageName'] for dep in gh.get_dependencies(
                    repo_owner=owner, repo_name=repo, depth=hop)
            ]
            dependency_data = {}
            if (len(result) > 0):
                dependency_data[link] = result
                json.dump(dependency_data, fp)
                fp.write("\n")
            else:
                no_dependency_data.append(link)

    with open('no_result_data_' + output_file, 'w') as fp:
        json.dump(no_dependency_data, fp)
示例#3
0
def test_cache():
    cache_dir = pathlib.Path('test-cache')
    if cache_dir.exists():
        shutil.rmtree(cache_dir)
        cache_dir.mkdir()
    cache = diskcache.Cache(cache_dir)

    gh = xkcd2347.GitHub(key=key, cache=cache)
    deps = list(gh.get_dependencies('docnow', 'twarc'))
    assert len(deps) > 0
    assert deps[0]['repository']['owner']['login']
示例#4
0
def getDependencies(input_file, output_file, git_hub_key, hop):
    dependency_data = {}
    no_dependency_data = []
    gitLink = []
    with open(input_file, 'r') as fp:
        gitLink = fp.readlines()

    link = gitLink[0]
    #for link in tqdm(gitLink):
    gh = xkcd2347.GitHub(key=git_hub_key)
    link = link.rstrip()
    owner, repo = link.split(".com/")[1].split("/")
    result = [
        dep['packageName'] for dep in gh.get_dependencies(
            repo_owner=owner, repo_name=repo, depth=hop)
    ]
    if (len(result) > 0):
        dependency_data[link] = result
    with open(output_file, 'w') as fp:
        json.dump(dependency_data, fp, indent=4)
示例#5
0
def test_get_dependencies():
    gh = xkcd2347.GitHub(key=key)
    deps = list(gh.get_dependencies('docnow', 'twarc'))
    assert len(deps) > 0
    assert deps[0]['repository']['owner']['login']