示例#1
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        tmppath = statepath + '.tmp'

        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        # Get and parse repomd.xml
        repomd_url = self.url + 'repodata/repomd.xml'
        logger.Log('fetching metadata from ' + repomd_url)
        repomd_content = Get(repomd_url, check_status=True).text
        repomd_xml = xml.etree.ElementTree.fromstring(repomd_content)

        repodata_url = self.url + repomd_xml.find(
            '{http://linux.duke.edu/metadata/repo}data[@type="primary"]/{http://linux.duke.edu/metadata/repo}location'
        ).attrib['href']

        logger.Log('fetching ' + repodata_url)
        data = Get(repodata_url).content

        logger.GetIndented().Log('size is {} byte(s)'.format(len(data)))

        logger.GetIndented().Log('decompressing with gzip')
        data = gzip.decompress(data)

        logger.GetIndented().Log(
            'size after decompression is {} byte(s)'.format(len(data)))

        logger.GetIndented().Log('saving')
        with open(tmppath, 'wb') as statefile:
            statefile.write(data)

        os.replace(tmppath, statepath)
示例#2
0
    def DoFetch(self, statepath, update, logger):
        packages_url = self.url + 'packages.gz'
        logger.GetIndented().Log('fetching package list from ' + packages_url)
        data = Get(packages_url).text  # autogunzipped?

        package_names = []

        for line in data.split('\n'):
            line = line.strip()
            if line.startswith('#') or line == '':
                continue
            package_names.append(line)

        logger.GetIndented().Log('{} package name(s) parsed'.format(
            len(package_names)))

        pagesize = 100

        for page in range(0, len(package_names) // pagesize + 1):
            ifrom = page * pagesize
            ito = (page + 1) * pagesize
            url = '&'.join([
                'arg[]=' + urllib.parse.quote(name)
                for name in package_names[ifrom:ito]
            ])
            url = self.url + '/rpc/?v=5&type=info&' + url

            logger.GetIndented().Log('fetching page {}/{}'.format(
                page + 1,
                len(package_names) // pagesize + 1))

            with open(os.path.join(statepath, '{}.json'.format(page)),
                      'wb') as statefile:
                statefile.write(Get(url).content)
示例#3
0
文件: file.py 项目: mojca/repology
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        tmppath = statepath + '.tmp'

        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with open(tmppath, 'wb') as statefile:
            logger.Log('fetching ' + self.url)
            data = Get(self.url).content

            logger.GetIndented().Log('size is {} byte(s)'.format(len(data)))

            if self.compression == 'gz':
                logger.GetIndented().Log('decompressing with gzip')
                data = gzip.decompress(data)
            elif self.compression == 'bz2':
                logger.GetIndented().Log('decompressing with bz2')
                data = bz2.decompress(data)
            elif self.compression == 'xz':
                logger.GetIndented().Log('decompressing with xz')
                data = lzma.LZMADecompressor().decompress(data)

            if self.compression:
                logger.GetIndented().Log('size after decompression is {} byte(s)'.format(len(data)))

            logger.GetIndented().Log('saving')
            statefile.write(data)

        os.replace(tmppath, statepath)
示例#4
0
 def DoFetch(self, statepath, logger):
     root = xml.etree.ElementTree.fromstring(
         Get(self.url + "repodata/repomd.xml", check_status=True).text)
     location = root.find(
         "{http://linux.duke.edu/metadata/repo}data[@type='primary']/{http://linux.duke.edu/metadata/repo}location"
     )
     return FileFetcher(location)
示例#5
0
文件: file.py 项目: olevole/repology
    def DoFetch(self, statepath, update, logger):
        with open(statepath, "wb") as statefile:
            for source in self.sources:
                logger.Log("fetching " + source)
                data = Get(source).content

                logger.GetIndented().Log("size is {} byte(s)".format(
                    len(data)))

                if self.gz:
                    logger.GetIndented().Log("decompressing with gzip")
                    data = gzip.decompress(data)
                elif self.bz2:
                    logger.GetIndented().Log("decompressing with bz2")
                    data = bz2.decompress(data)
                elif self.xz:
                    logger.GetIndented().Log("decompressing with xz")
                    data = lzma.LZMADecompressor().decompress(data)

                if self.gz or self.bz2 or self.xz:
                    logger.GetIndented().Log(
                        "size after decompression is {} byte(s)".format(
                            len(data)))

                logger.GetIndented().Log("saving")
                statefile.write(data)
示例#6
0
    def LoadSpec(self, package, statepath, logger):
        specurl = self.giturl + '/{0}.git/plain/{0}.spec'.format(package)

        logger.GetIndented().Log('getting spec from {}'.format(specurl))

        r = Get(specurl, check_status=False)
        if r.status_code != 200:
            deadurl = self.giturl + '/{0}.git/plain/dead.package'.format(package)
            dr = Get(deadurl, check_status=False)
            if dr.status_code == 200:
                logger.GetIndented(2).Log('dead: ' + ';'.join(dr.text.split('\n')))
            else:
                logger.GetIndented(2).Log('failed: {}'.format(r.status_code))  # XXX: check .dead.package, instead throw
            return

        with open(os.path.join(statepath, package + '.spec'), 'wb') as file:
            file.write(r.content)
示例#7
0
    def DoFetch(self, statepath, update, logger):
        pages = [chr(x) for x in range(ord('a'), ord('z') + 1)]  # a..z
        pages.append('0-9')

        for page in pages:
            logger.Log('fetching page ' + page)
            pageurl = self.url + '/' + page + '.html'
            with open(os.path.join(statepath, page + '.html'), 'w', encoding='utf-8') as pagefile:
                pagefile.write(Get(pageurl).text)
示例#8
0
    def ParsePackages(self, statepath, logger):
        page = 1

        while True:
            pageurl = self.apiurl + 'packages/?page={}'.format(page)
            logger.Log('getting page {} from {}'.format(page, pageurl))
            pagedata = json.loads(Get(pageurl).text)

            for package in pagedata['packages']:
                self.LoadSpec(package['name'], statepath, logger)

            page += 1

            if page > pagedata['page_total']:
                break
示例#9
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isfile(statepath) and not update:
            logger.Log("no update requested, skipping")
            return

        # Get and parse repomd.xml
        repomd_url = self.repourl + "repodata/repomd.xml"
        logger.Log("fetching metadata from " + repomd_url)
        repomd_content = Get(repomd_url, check_status=True).text
        repomd_xml = xml.etree.ElementTree.fromstring(repomd_content)

        repodata_url = self.repourl + repomd_xml.find(
            "{http://linux.duke.edu/metadata/repo}data[@type='primary']/{http://linux.duke.edu/metadata/repo}location"
        ).attrib['href']
        return FileFetcher(repodata_url,
                           gz=True).Fetch(statepath, update, logger)
示例#10
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        state = {}

        if os.path.isfile(statepath):
            with open(statepath, 'r', encoding='utf-8') as oldstatefile:
                state = json.load(oldstatefile)
            logger.Log('loaded old state, {} entries'.format(len(state)))
        else:
            logger.Log('starting with empty state')

        newdata = json.loads(Get(self.url).text)

        # add new entries in reversed order, oldest first so newest
        # have higher priority; may also compare versions here
        for entry in newdata['releases']:
            if 'name' not in entry:
                logger.Log('skipping entry with no name')
                continue

            if entry['name'] in state:
                oldentry = state[entry['name']]

                if VersionCompare(entry['version'], oldentry['version']) > 0:
                    logger.Log(
                        'replacing entry "{}", version changed {} -> {}'.
                        format(entry['name'], oldentry['version'],
                               entry['version']))
                    state[entry['name']] = entry
            else:
                logger.Log('adding entry "{}", version {}'.format(
                    entry['name'], entry['version']))
                state[entry['name']] = entry

        temppath = statepath + '.tmp'
        with open(temppath, 'w', encoding='utf-8') as newstatefile:
            json.dump(state, newstatefile)

        os.replace(temppath, statepath)

        logger.Log('saved new state, {} entries'.format(len(state)))
示例#11
0
    def DoFetch(self, statepath, update, logger):
        numpage = 0
        nextpageurl = self.url + 'Packages()?$filter=IsLatestVersion'
        while True:
            logger.Log('getting ' + nextpageurl)

            text = Get(nextpageurl).text
            with open(os.path.join(statepath, '{}.xml'.format(numpage)), 'w', encoding='utf-8') as pagefile:
                pagefile.write(text)

            # parse next page
            logger.Log('parsing ' + nextpageurl)
            root = xml.etree.ElementTree.fromstring(text)

            next_link = root.find('{http://www.w3.org/2005/Atom}link[@rel="next"]')
            if next_link is None:
                break

            nextpageurl = next_link.attrib['href']
            numpage += 1
示例#12
0
    def DoFetch(self, statepath, update, logger):
        numpage = 0
        nextpageurl = self.apiurl + "Packages()?$filter=IsLatestVersion"
        while True:
            logger.Log("getting " + nextpageurl)

            text = Get(nextpageurl).text
            with open(os.path.join(statepath, "{}.xml".format(numpage)),
                      "w",
                      encoding="utf-8") as pagefile:
                pagefile.write(text)

            # parse next page
            logger.Log("parsing " + nextpageurl)
            root = xml.etree.ElementTree.fromstring(text)

            next_link = root.find(
                "{http://www.w3.org/2005/Atom}link[@rel='next']")
            if next_link is None:
                break

            nextpageurl = next_link.attrib['href']
            numpage += 1