示例#1
0
def test_convert_pdf_page_to_web(tmp_fixture_dir):
    """Convert single page pdf to web version"""

    pdf = Path(tmp_fixture_dir) / 'PDF' / PAGE_ONE

    # Make new file
    opt = convert_pdf_to_web(pdf)

    # Default file location
    assert opt == pdf.parent / 'WEB' / PAGE_ONE
    assert opt.exists()

    # Conversion changes file content
    assert opt.stat().st_size != pdf.stat().st_size

    # Calling again should not create new file
    mtime = opt.stat().st_mtime
    opt2 = convert_pdf_to_web(pdf)
    assert opt2.stat().st_mtime == mtime

    # If pdf if changed, convert should rerun
    pdf.touch()
    opt3 = convert_pdf_to_web(pdf)
    assert opt3.stat().st_mtime > mtime

    # Nonexisting file should raise error
    false_pdf = pdf.with_name('nonexisting')
    assert not false_pdf.is_file()
    with pytest.raises(FileNotFoundError):
        convert_pdf_to_web(false_pdf)
示例#2
0
def test_convert_pdf_page_to_web(tmp_fixture_dir):
    """Convert single page pdf to web version"""

    pdf = Path(tmp_fixture_dir) / 'PDF' / PAGE_ONE

    # Make new file
    opt = convert_pdf_to_web(pdf)

    # Default file location
    assert opt == pdf.parent / 'WEB' / PAGE_ONE
    assert opt.exists()

    # Conversion changes file content
    assert opt.stat().st_size != pdf.stat().st_size

    # Calling again should not create new file
    mtime = opt.stat().st_mtime
    opt2 = convert_pdf_to_web(pdf)
    assert opt2.stat().st_mtime == mtime

    # If pdf if changed, convert should rerun
    pdf.touch()
    opt3 = convert_pdf_to_web(pdf)
    assert opt3.stat().st_mtime > mtime

    # Nonexisting file should raise error
    false_pdf = pdf.with_name('nonexisting')
    assert not false_pdf.is_file()
    with pytest.raises(FileNotFoundError):
        convert_pdf_to_web(false_pdf)
示例#3
0
def generate(db, base_dir: str, conf_common: PVConf,
             conf_branches: BranchesConf, force: bool):
    dist_dir = base_dir + '/dists.new'
    pool_dir = base_dir + '/pool'
    dist_dir_real = base_dir + '/dists'
    dist_dir_old = base_dir + '/dists.old'
    shutil.rmtree(dist_dir, ignore_errors=True)
    for key in conf_branches.keys():
        i = PosixPath(pool_dir).joinpath(key)
        if not i.is_dir():
            continue
        branch_name = i.name
        realbranchdir = os.path.join(dist_dir_real, branch_name)
        inrel = PosixPath(realbranchdir).joinpath('InRelease')
        expire_renewal_period = timedelta(days=conf_branches[branch_name].get(
            "renew_in", 1)).total_seconds()
        if not force and inrel.is_file():
            # See if we can skip this branch altogether
            inrel_mtime = inrel.stat().st_mtime
            inrel_sec_to_expire = get_valid_until_from_release(
                inrel) - datetime.now().timestamp()
            cur = db.cursor()
            cur.execute(
                "SELECT coalesce(extract(epoch FROM max(mtime)), 0) "
                "FROM pv_repos WHERE branch=%s", (branch_name, ))
            db_mtime = cur.fetchone()[0]
            cur.close()
            # Skip if
            # -   P-vector does not recognize this branch (usually means branch is empty)
            # OR  On-disk release mtime is newer than last time db was updated
            # AND On-disk release won't expire in 1 day
            if not db_mtime or inrel_mtime > db_mtime and inrel_sec_to_expire > expire_renewal_period:
                shutil.copytree(realbranchdir,
                                os.path.join(dist_dir, branch_name))
                logger_rel.info('Skip generating Packages and Contents for %s',
                                branch_name)
                continue
        component_name_list = []
        for j in PosixPath(pool_dir).joinpath(branch_name).iterdir():
            if not j.is_dir():
                continue
            component_name = j.name
            component_name_list.append(component_name)
            logger_rel.info('Generating Packages for %s-%s', branch_name,
                            component_name)
            gen_packages(db, dist_dir, branch_name, component_name)
            logger_rel.info('Generating Contents for %s-%s', branch_name,
                            component_name)
            gen_contents(db, branch_name, component_name, dist_dir)

        conf = conf_common.copy()
        conf.update(conf_branches[branch_name])
        logger_rel.info('Generating Release for %s', branch_name)
        gen_release(db, branch_name, component_name_list, dist_dir, conf)
    if PosixPath(dist_dir_real).exists():
        os.rename(dist_dir_real, dist_dir_old)
    os.rename(dist_dir, dist_dir_real)
    shutil.rmtree(dist_dir_old, True)
示例#4
0
    def get_item_dict(self,
                      item: PosixPath,
                      relative: Optional[PosixPath] = None) -> Dict[str, Any]:
        is_file: bool = item.is_file()
        _dict: Dict[str, Any] = {
            "name": item.name,
            "full_path": str(item),
            "type": "file" if is_file else "folder",
            "size": item.stat().st_size,
            "date": datetime.fromtimestamp(item.stat().st_ctime).date(),
        }
        if is_file:
            _dict["suffix"] = item.suffix
            _dict["used"] = str(item) in self.db_files
        else:
            _dict["files"] = []

        if relative:
            _dict["relative_path"] = str(
                item.relative_to(Path(relative.parent)))

        self.found_files.add(str(item))

        return _dict
示例#5
0
 def read_stream(self, stream_name, fn: PosixPath):
     stream = self._streams[stream_name]
     with stream.open() as fd:
         with open(fn, 'wb') as file:
             total = 100
             pbar = tqdm(total=total)
             data_loaded = 0
             while True:
                 data = fd.read(1024)  # raise IOError("Read timeout")
                 if data:
                     file.write(data)
                     data_loaded += 1
                     if data_loaded % 1024 == 0:
                         pbar.update(1)
                         pbar.set_description(
                             f"loaded: {data_loaded // 1024}Mb")
                         if data_loaded // 1024 == total - 1:
                             total += 100
                             pbar.total = total
                 else:
                     break
     file_size = fn.stat().st_size
     print(f"loaded file: {fn}, size: {file_size // 1024 // 1024}Mb")
示例#6
0
 def _get_date(self, file_path: PosixPath, date: str = "") -> str:
     """获取日期,如果日期为空,获取文件的最后修改日期"""
     if date:
         return date
     timestamp = file_path.stat().st_mtime
     return time.strftime('%Y-%m-%d', time.localtime(timestamp))
示例#7
0
def scan_dir(db, base_dir: str, branch: str, component: str, branch_idx: int):
    pool_path = PosixPath(base_dir).joinpath('pool')
    search_path = pool_path.joinpath(branch).joinpath(component)
    compname = '%s-%s' % (branch, component)
    comppath = '%s/%s' % (branch, component)
    cur = db.cursor()
    cur.execute("""SELECT p.package, p.version, p.repo, p.architecture,
          p.filename, p.size, p.mtime, p.sha256
        FROM pv_packages p
        INNER JOIN pv_repos r ON p.repo=r.name WHERE r.path=%s
        UNION ALL
        SELECT p.package, p.version, p.repo, p.architecture,
          p.filename, p.size, p.mtime, p.sha256
        FROM pv_package_duplicate p
        INNER JOIN pv_repos r ON p.repo=r.name WHERE r.path=%s""",
        (comppath, comppath))
    dup_pkgs = set()
    ignore_files = set()
    modified_repo = set()
    del_list = []
    # For each package/version/architecture we already know in the DB:
    for package, version, repopath, architecture, filename, size, mtime, sha256 in cur:
        fullpath = PosixPath(base_dir).joinpath(filename)
        if fullpath.is_file():
            # If a package with the same name exists:
            stat = fullpath.stat()
            sfullpath = str(fullpath)
            if size == stat.st_size and (mtime == int(stat.st_mtime) or
                # Ignore if the file isn't changed
                internal_pkgscan.sha256_file(sfullpath) == sha256):
                ignore_files.add(sfullpath)
            else:
                # Consider the new file to be a duplicate and replace the old one
                dup_pkgs.add(filename)
                del_list.append((filename, package, version, repopath))
        else:
            # If the package has been deleted
            del_list.append((filename, package, version, repopath))
            logger_scan.info('CLEAN  %s', filename)
            module_ipc.publish_change(
                compname, package, architecture, 'delete', version, '')
    # For each package/version/arch/repo to be deleted:
    for row in del_list:
        cur.execute("DELETE FROM pv_packages WHERE filename=%s", (row[0],))
        modified_repo.add(row[1:][-1])
    # Check if there are any new files added. Recursively scan the pool dir and take notes of
    # what we haven't seen yet.
    check_list = []
    for fullpath in search_path.rglob('*.deb'):
        if not fullpath.is_file():
            continue
        stat = fullpath.stat()
        sfullpath = str(fullpath)
        if sfullpath in ignore_files:
            continue
        check_list.append((sfullpath, str(fullpath.relative_to(base_dir)),
                           stat.st_size, int(stat.st_mtime)))
    del ignore_files
    with multiprocessing.dummy.Pool(max(1, os.cpu_count() - 1)) as mpool:
        for pkginfo, depinfo, sodeps, files in mpool.imap_unordered(scan_deb, check_list, 5):
            realname = pkginfo['architecture']
            validdeb = ('debtime' in pkginfo)
            if realname == 'all':
                realname = 'noarch'
            if component != 'main':
                realname = component + '-' + realname
            repo = '%s/%s' % (realname, branch)
            cur.execute("INSERT INTO pv_repos VALUES (%s,%s,%s,%s,%s,%s,%s,now()) "
                "ON CONFLICT DO NOTHING",
                (repo, realname, comppath, branch_idx,
                branch, component, pkginfo['architecture']))
            modified_repo.add(repo)
            pkginfo['repo'] = repo
            dbkey = (pkginfo['package'], pkginfo['version'], repo)
            if pkginfo['filename'] in dup_pkgs:
                if validdeb:
                    logger_scan.info('UPDATE %s', pkginfo['filename'])
                    module_ipc.publish_change(
                        compname, pkginfo['package'], pkginfo['architecture'],
                        'overwrite', pkginfo['version'], pkginfo['version']
                    )
            else:
                cur.execute("SELECT version, filename FROM pv_packages "
                    "WHERE package=%s AND repo=%s", (pkginfo['package'], repo))
                results = cur.fetchall()
                if results:
                    oldver = max(results, key=lambda x: dpkg_vercomp_key(x[0]))
                    vercomp = internal_dpkg_version.dpkg_version_compare(
                        oldver[0], pkginfo['version'])
                    if vercomp == -1:
                        if validdeb:
                            logger_scan.info('NEWER  %s %s %s >> %s',
                                pkginfo['architecture'], pkginfo['package'],
                                pkginfo['version'], oldver[0])
                            module_ipc.publish_change(
                                compname, pkginfo['package'],
                                pkginfo['architecture'], 'upgrade',
                                oldver[0], pkginfo['version']
                            )
                    elif vercomp:
                        logger_scan.warning('OLD    %s %s %s',
                            pkginfo['architecture'], pkginfo['package'],
                            pkginfo['version'])
                    else:
                        cur.execute("DELETE FROM pv_package_sodep "
                            "WHERE package=%s AND version=%s AND repo=%s", dbkey)
                        cur.execute("DELETE FROM pv_package_files "
                            "WHERE package=%s AND version=%s AND repo=%s", dbkey)
                        cur.execute("DELETE FROM pv_package_dependencies "
                            "WHERE package=%s AND version=%s AND repo=%s", dbkey)
                        cur.execute("DELETE FROM pv_package_duplicate "
                            "WHERE package=%s AND version=%s AND repo=%s", dbkey)
                        cur.execute("INSERT INTO pv_package_duplicate "
                            "SELECT * FROM pv_packages WHERE filename=%s",
                            (oldver[1],))
                        cur.execute("DELETE FROM pv_packages "
                            "WHERE package=%s AND version=%s AND repo=%s", dbkey)
                        logger_scan.error('DUP    %s == %s',
                            oldver[1], pkginfo['filename'])
                elif validdeb:
                    logger_scan.info('NEW    %s %s %s', pkginfo['architecture'],
                        pkginfo['package'], pkginfo['version'])
                    module_ipc.publish_change(
                        compname, pkginfo['package'], pkginfo['architecture'],
                        'new', '', pkginfo['version']
                    )
            keys, qms, vals = internal_db.make_insert(pkginfo)
            cur.execute("INSERT INTO pv_packages (%s) VALUES (%s)" %
                (keys, qms), vals)
            for row in depinfo.items():
                cur.execute("INSERT INTO pv_package_dependencies "
                    "VALUES (%s,%s,%s,%s,%s) "
                    "ON CONFLICT ON CONSTRAINT pv_package_dependencies_pkey "
                    "DO UPDATE SET value = %s",
                    dbkey + row + (row[1],))
            for row in sodeps:
                cur.execute("INSERT INTO pv_package_sodep VALUES "
                    "(%s,%s,%s,%s,%s,%s)", dbkey + row)
            for row in files:
                cur.execute("INSERT INTO pv_package_files VALUES "
                    "(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", dbkey + row)
    for repo in modified_repo:
        cur.execute("UPDATE pv_repos SET mtime=now() WHERE name=%s", (repo,))