Exemplo n.º 1
0
def test_load_weblist():
    # test github
    ds = Dataset.load_web('top1kstarred',
                          registry='github',
                          from_type='list',
                          cache_dir=CACHE_DIR,
                          debug=True,
                          github_pat=os.getenv('GITHUB_PAT'))
    ds.trim(10)
    ds.get_projects_meta()
    ds.get_project_versions(historical='latest')
    ds.update(**{'name': 'test', 'version': '1.0'})
    ds.backup('../test.p')
    ds = Dataset.restore('../test.p')
    ds.export_inputset('../test.json')

    # test npm
    ds = Dataset.load_web('allbydependents',
                          registry='npm',
                          from_type='list',
                          cache_dir=CACHE_DIR,
                          debug=True)
    ds.trim(10)
    ds.get_projects_meta()
    ds.get_project_versions(historical='latest')
    ds.update(**{'name': 'test', 'version': '1.0'})
    ds.backup('../test.p')
    ds = Dataset.restore('../test.p')
    ds.export_inputset('../test.json')

    # test pypi
    ds = Dataset.load_web('top4kyear',
                          registry='pypi',
                          from_type='list',
                          cache_dir=CACHE_DIR,
                          debug=True)
    ds.trim(10)
    ds.get_projects_meta()
    ds.get_project_versions(historical='latest')
    ds.update(**{'name': 'test', 'version': '1.0'})
    ds.backup('../test.p')
    ds = Dataset.restore('../test.p')
    ds.export_inputset('../test.json')

    # cleanup files
    os.remove('../test.p')
    os.remove('../test.json')
Exemplo n.º 2
0
def get_fedora_packages():
    ds = Dataset.load_web(
        name="fedora",
        from_type="list",
        registry="portingdb",
        cache_dir=R2C_WEB_CACHE,
    )

    names = set([project.get_name() for project in ds.projects])
    return names
Exemplo n.º 3
0
def get_top_packages(kind="top4kmonth"):
    ds = Dataset.load_web(
        name=kind,
        from_type="list",
        registry="pypi",
        cache_dir=R2C_WEB_CACHE,
    )

    for project in ds.projects:
        yield project.get_name()
Exemplo n.º 4
0
def get_opensuse_packages(project):
    ds = Dataset.load_web(
        name=project,
        from_type="list",
        registry="opensuse",
        cache_dir=R2C_WEB_CACHE,
    )

    # Avoid dups like python2-cmd2 and python-cmd2
    names = set([project.get_name() for project in ds.projects])
    return names
Exemplo n.º 5
0
def load(ctx, registry, from_type, name_or_path, fileargs):
    """Generates a dataset from a weblist name or file path."""
    backup_ds = None

    try:
        backup_ds = deepcopy(ctx.obj.get('dataset', None))

        if registry == 'noreg':
            registry = None

        global TEMP_SETTINGS

        if from_type == 'file':
            # read in a file (fileargs is either a header string for csv
            # or a parser handle for json)
            ds = Dataset.load_file(name_or_path,
                                   registry,
                                   fileargs=fileargs,
                                   **TEMP_SETTINGS)

        else:
            # download a weblist or organization repo list
            ds = Dataset.load_web(name_or_path,
                                  registry,
                                  from_type=from_type,
                                  **TEMP_SETTINGS)

        ctx.obj['dataset'] = ds

        # reset the temporary api/metadata dict
        TEMP_SETTINGS = dict()

    except Exception as e:
        print_error(e, DEBUG)

        # silently restore the dataset
        ctx.obj['dataset'] = backup_ds