def do_import_languages(file_data: List[dict]):
    imported = set()
    print("Importing languages ... ", flush=True)
    with progressbar.ProgressBar(max_value=len(file_data)) as bar:
        for idx, p in enumerate(file_data):
            info = p.get('info')
            classifiers = info.get('classifiers')
            for c in classifiers:
                if 'Programming Language' not in c:
                    continue

                original = c

                c = c.replace('Implementation ::', '').replace('::', ':')
                text = c
                parts = c.split(':')
                if len(parts) > 1:
                    text = ' '.join(parts[-2:]).strip().replace('  ', ' ')

                if text not in imported:
                    imported.add(text)
                    session: Session = DbSession.factory()

                    lang = ProgrammingLanguage()
                    lang.description = original
                    lang.id = text
                    session.add(lang)
                    session.commit()

            bar.update(idx)

    sys.stderr.flush()
    sys.stdout.flush()
def insert_a_package():
    p = Package()
    p.id = input("Package name: ")
    p.summary = input("Package summary: ")
    p.author_name = input("Author name: ")
    p.author_email = input("Author email: ")
    p.license = input("license: ")

    r1 = Release()
    print("Release 1: ")
    r1.major_ver = int(input('Major version:'))
    r1.minor_ver = int(input('Minor version:'))
    r1.build_ver = int(input('Build version:'))
    r1.size = 100_000

    r2 = Release()
    print("Release 2: ")
    r2.major_ver = int(input('Major version:'))
    r2.minor_ver = int(input('Minor version:'))
    r2.build_ver = int(input('Build version:'))
    r2.size = 200_000

    p.releases.append(r1)
    p.releases.append(r2)

    session = DbSession.factory()

    session.add(p)

    session.commit()
示例#3
0
def latest_releases(limit=10) -> List[Package]:
    session: Session = DbSession.factory()

    releases = session.query(Release) \
        .order_by(Release.created_date.desc()) \
        .limit(limit * 2)

    packages_in_order = [r.package_id for r in releases]
    package_ids = set(packages_in_order)

    packages = {
        p.id: p
        for p in session.query(Package).filter(Package.id.in_(package_ids))
    }

    session.close()

    results = []
    for r in releases:
        if len(results) >= limit:
            break

        results.append(packages[r.package_id])

    return results
def do_summary():
    session = DbSession.factory()

    print("Final numbers:")
    print("Users: {:,}".format(session.query(User).count()))
    print("Packages: {:,}".format(session.query(Package).count()))
    print("Releases: {:,}".format(session.query(Release).count()))
    print("Maintainers: {:,}".format(session.query(Maintainer).count()))
    print("Languages: {:,}".format(session.query(ProgrammingLanguage).count()))
    print("Licenses: {:,}".format(session.query(License).count()))
示例#5
0
def maintainers_for_packages(package_name: str) -> List[Maintainer]:
    session: Session = DbSession.factory()
    try:
        user_ids = [
            r.user_id for r in session.query(Maintainer).filter(
                Maintainer.package_id == package_name)
        ]
        return list(session.query(User).filter(User.id.in_(user_ids)))
    finally:
        session.close()
def load_package(data: dict, user_lookup: Dict[str, User]):
    try:
        info = data.get('info', {})

        p = Package()
        p.id = data.get('package_name', '').strip()
        if not p.id:
            return

        p.author = info.get('author')
        p.author_email = info.get('author_email')

        releases = build_releases(p.id, data.get("releases", {}))

        if releases:
            p.created_date = releases[0].created_date

        maintainers_lookup = get_email_and_name_from_text(
            info.get('maintainer'), info.get('maintainer_email'))
        maintainers = []
        for email, name in maintainers_lookup.items():
            user = user_lookup.get(email)
            if not user:
                continue

            m = Maintainer()
            m.package_id = p.id
            m.user_id = user.id
            maintainers.append(m)

        p.summary = info.get('summary')
        p.description = info.get('description')

        p.home_page = info.get('home_page')
        p.docs_url = info.get('docs_url')
        p.package_url = info.get('package_url')

        p.author = info.get('author')
        p.author_email = info.get('author_email')
        p.license = detect_license(info.get('license'))

        session = DbSession.factory()
        session.add(p)
        session.add_all(releases)
        if maintainers:
            session.add_all(maintainers)
        session.commit()
        session.close()
    except OverflowError:
        # What the heck, people just putting fake data in here
        # Size is terabytes...
        pass
    except Exception:
        raise
示例#7
0
def create_user(email: str, name: str, password: str) -> User:
    user = User()
    user.name = name
    user.email = email.lower().strip()
    user.hashed_password = hash_text(password)

    session: Session = DbSession.factory()
    session.add(user)
    session.commit()
    session.close()

    return user
def do_user_import(user_lookup: Dict[str, str]) -> Dict[str, User]:
    print("Importing users ... ", flush=True)
    with progressbar.ProgressBar(max_value=len(user_lookup)) as bar:
        for idx, (email, name) in enumerate(user_lookup.items()):
            session: Session = DbSession.factory()
            session.expire_on_commit = False

            user = User()
            user.email = email
            user.name = name
            session.add(user)

            session.commit()
            bar.update(idx)

    print()
    sys.stderr.flush()
    sys.stdout.flush()

    session: Session = DbSession.factory()
    return {u.email: u for u in session.query(User)}
def main():
    init_db()
    session = DbSession.factory()
    user_count = session.query(User).count()
    session.close()
    if user_count == 0:
        file_data = do_load_files()
        users = find_users(file_data)

        db_users = do_user_import(users)
        do_import_packages(file_data, db_users)

        do_import_languages(file_data)
        do_import_licenses(file_data)

    do_summary()
示例#10
0
def find_package_by_name(package_name: str) -> Optional[Package]:
    session: Session = DbSession.factory()

    # .options(subqueryload(Package.releases))
    try:
        package = session.query(Package) \
            .filter(Package.id == package_name) \
            .options(subqueryload(Package.releases)) \
            .first()
        if package:
            # noinspection PyUnusedLocal
            devnull = package.releases

        return package
    finally:
        session.close()
示例#11
0
def login_user(email: str, password: str) -> Optional[User]:
    if not email:
        return None

    email = email.lower().strip()

    session: Session = DbSession.factory()
    user = session.query(User).filter(User.email == email).first()
    session.close()

    if not user:
        return None

    if not verify_hash(user.hashed_password, password):
        return None

    return user
示例#12
0
def do_import_licenses(file_data: List[dict]):
    imported = set()
    print("Importing licenses ... ", flush=True)
    with progressbar.ProgressBar(max_value=len(file_data)) as bar:
        for idx, p in enumerate(file_data):
            info = p.get('info')
            license_text = detect_license(info.get('license'))

            if license_text and license_text not in imported:
                imported.add(license_text)
                session: Session = DbSession.factory()

                package_license = License()
                package_license.id = license_text
                package_license.description = info.get('license')

                session.add(package_license)
                session.commit()

            bar.update(idx)

    sys.stderr.flush()
    sys.stdout.flush()
def init_db():
    top_folder = os.path.dirname(pypi_vm.app.__file__)
    rel_file = os.path.join('db', 'pypi.sqlite')
    db_file = os.path.join(top_folder, rel_file)
    DbSession.global_init(db_file)
示例#14
0
def find_user_by_id(user_id: int) -> Optional[User]:
    session: Session = DbSession.factory()
    try:
        return session.query(User).filter(User.id == user_id).first()
    finally:
        session.close()
示例#15
0
def find_user_by_email(email: str) -> Optional[User]:
    session: Session = DbSession.factory()
    try:
        return session.query(User).filter(User.email == email).first()
    finally:
        session.close()
示例#16
0
def user_count() -> int:
    session: Session = DbSession.factory()
    try:
        return session.query(User).count()
    finally:
        session.close()
示例#17
0
def package_count() -> int:
    session: Session = DbSession.factory()
    try:
        return session.query(Package).count()
    finally:
        session.close()
示例#18
0
def init_db():
    db_file = os.path.abspath(
        os.path.join(os.path.dirname(__file__), 'db', 'pypi.sqlite'))
    DbSession.global_init(db_file)
示例#19
0
def all_packages(limit: int) -> List[Package]:
    session: Session = DbSession.factory()
    try:
        return list(session.query(Package).limit(limit))
    finally:
        session.close()
示例#20
0
def release_count() -> int:
    session: Session = DbSession.factory()
    try:
        return session.query(Release).count()
    finally:
        session.close()