Exemplo n.º 1
0
def chrome_versions():
    if is_ci:
        return []
    print('Getting chrome versions...')
    import html5lib
    raw = download_securely(
        'https://en.wikipedia.org/wiki/Google_Chrome_version_history').decode('utf-8')
    root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
    table = root.xpath('//*[@id="mw-content-text"]//tbody')[-1]
    ans = []
    for tr in table.iterchildren('tr'):
        cells = tuple(tr.iterchildren('td'))
        if not cells:
            continue
        if not cells[2].text or not cells[2].text.strip():
            continue
        s = cells[0].get('style')
        if '#a0e75a' not in s and 'salmon' not in s:
            break
        chrome_version = cells[0].text.strip()
        ts = datetime.strptime(cells[1].text.strip().split()[
                               0], '%Y-%m-%d').date().strftime('%Y-%m-%d')
        try:
            webkit_version = cells[2].text.strip().split()[1]
        except IndexError:
            continue
        ans.append({'date': ts, 'chrome_version': chrome_version,
                    'webkit_version': webkit_version})
    return list(reversed(ans))
Exemplo n.º 2
0
 def download_vendor_release(self, tdir, url):
     self.info('Downloading %s:' % self.TAR_NAME, url)
     try:
         raw = download_securely(url)
     except Exception:
         if not is_ci:
             raise
         self.info('Download failed, sleeping and retrying...')
         time.sleep(2)
         raw = download_securely(url)
     with tarfile.open(fileobj=BytesIO(raw)) as tf:
         tf.extractall(tdir)
         if len(os.listdir(tdir)) == 1:
             return self.j(tdir, os.listdir(tdir)[0])
         else:
             return tdir
Exemplo n.º 3
0
def common_user_agents():
    if is_ci:
        return [
            # IE 11 - windows 10
            'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko',
            # IE 11 - windows 8.1
            'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
            # IE 11 - windows 8
            'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko',
            # IE 11 - windows 7
            'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
            # 32bit IE 11 on 64 bit win 10
            'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
            # 32bit IE 11 on 64 bit win 8.1
            'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
            # 32bit IE 11 on 64 bit win 7
            'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
        ]
    print('Getting recent UAs...')
    raw = download_securely(
        'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/').decode('utf-8')
    lines = re.search(
        r'<textarea.+"get-the-list".+>([^<]+)</textarea>', raw).group(1).splitlines()
    ans = filter_ans(lines)
    if not ans:
        raise ValueError('Failed to download list of common UAs')
    return ans
Exemplo n.º 4
0
def common_user_agents():
    if is_ci:
        return [
            # IE 11 - windows 10
            'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko',
            # IE 11 - windows 8.1
            'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
            # IE 11 - windows 8
            'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko',
            # IE 11 - windows 7
            'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
            # 32bit IE 11 on 64 bit win 10
            'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
            # 32bit IE 11 on 64 bit win 8.1
            'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
            # 32bit IE 11 on 64 bit win 7
            'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
        ]
    print('Getting recent UAs...')
    raw = download_securely(
        'https://raw.githubusercontent.com/intoli/user-agents/master/src/user-agents.json.gz'
    )
    data = json.loads(gzip.GzipFile(fileobj=io.BytesIO(raw)).read())
    uas = []
    for item in data:
        ua = item['userAgent']
        if not ua.startswith('Opera'):
            uas.append(ua)
    ans = filter_ans(uas)[:256]
    if not ans:
        raise ValueError('Failed to download list of common UAs')
    return ans
Exemplo n.º 5
0
def chrome_versions():
    if is_ci:
        return []
    print('Getting chrome versions...')
    import html5lib
    raw = download_securely(
        'https://en.wikipedia.org/wiki/Google_Chrome_version_history').decode(
            'utf-8')
    root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
    table = root.xpath('//*[@id="mw-content-text"]//tbody')[-1]
    ans = []
    for tr in table.iterchildren('tr'):
        cells = tuple(tr.iterchildren('td'))
        if not cells:
            continue
        if not cells[2].text or not cells[2].text.strip():
            continue
        s = cells[0].get('style')
        if '#a0e75a' not in s and 'salmon' not in s:
            break
        chrome_version = cells[0].text.strip()
        ts = datetime.strptime(cells[1].text.strip().split()[0],
                               '%Y-%m-%d').date().strftime('%Y-%m-%d')
        try:
            webkit_version = cells[2].text.strip().split()[1]
        except IndexError:
            continue
        ans.append({
            'date': ts,
            'chrome_version': chrome_version,
            'webkit_version': webkit_version
        })
    return list(reversed(ans))
Exemplo n.º 6
0
 def run(self, opts):
     self.clean()
     os.makedirs(self.hyphenation_dir)
     self.info('Downloading hyphenation dictionaries...')
     with self.temp_dir() as src, ZipFile(BytesIO(
             download_securely(URL))) as zf, self.temp_dir() as output_dir:
         zf.extractall(src)
         if len(os.listdir(src)) == 1:
             src = os.path.join(src, os.listdir(src)[0])
         process_dictionaries(src, output_dir)
         dics = sorted(x for x in os.listdir(output_dir)
                       if x.endswith('.dic'))
         m = hashlib.sha1()
         for dic in dics:
             with open(os.path.join(output_dir, dic), 'rb') as f:
                 m.update(f.read())
         hsh = type('')(m.hexdigest())
         buf = BytesIO()
         with tarfile.TarFile(fileobj=buf, mode='w') as tf:
             for dic in dics:
                 with open(os.path.join(output_dir, dic), 'rb') as df:
                     tinfo = tf.gettarinfo(arcname=dic, fileobj=df)
                     tinfo.mtime = 0
                     tinfo.uid = tinfo.gid = 1000
                     tinfo.uname = tinfo.gname = 'kovid'
                     tf.addfile(tinfo, df)
         with open(
                 os.path.join(self.hyphenation_dir, 'dictionaries.tar.xz'),
                 'wb') as f:
             compress_tar(buf, f)
         with open(os.path.join(self.hyphenation_dir, 'sha1sum'), 'w') as f:
             f.write(hsh)
         shutil.copy(os.path.join(output_dir, 'locales.json'),
                     self.hyphenation_dir)
Exemplo n.º 7
0
def common_user_agents():
    if is_ci:
        return [
            # IE 11 - windows 10
            'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko',
            # IE 11 - windows 8.1
            'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
            # IE 11 - windows 8
            'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko',
            # IE 11 - windows 7
            'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
            # 32bit IE 11 on 64 bit win 10
            'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
            # 32bit IE 11 on 64 bit win 8.1
            'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
            # 32bit IE 11 on 64 bit win 7
            'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
        ]
    print('Getting recent UAs...')
    raw = download_securely(
        'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/'
    ).decode('utf-8')
    lines = re.search(r'<textarea.+"get-the-list".+>([^<]+)</textarea>',
                      raw).group(1).splitlines()
    ans = filter_ans(lines)
    if not ans:
        raise ValueError('Failed to download list of common UAs')
    return ans
Exemplo n.º 8
0
 def download_vendor_release(self, tdir, url):
     self.info('Downloading %s:' % self.TAR_NAME, url)
     raw = download_securely(url)
     with tarfile.open(fileobj=BytesIO(raw)) as tf:
         tf.extractall(tdir)
         if len(os.listdir(tdir)) == 1:
             return self.j(tdir, os.listdir(tdir)[0])
         else:
             return tdir
Exemplo n.º 9
0
def firefox_versions():
    print('Getting firefox versions...')
    import html5lib
    raw = download_securely(
        'https://www.mozilla.org/en-US/firefox/releases/').decode('utf-8')
    root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
    ol = root.xpath('//main[@id="main-content"]/ol')[0]
    ol.xpath('descendant::li/strong/a[@href]')
    ans = filter_ans(ol.xpath('descendant::li/strong/a[@href]/text()'))
    if not ans:
        raise ValueError('Failed to download list of firefox versions')
    return ans
Exemplo n.º 10
0
def firefox_versions():
    if is_ci:
        return '51.0 50.0'.split()
    print('Getting firefox versions...')
    import html5lib
    raw = download_securely(
        'https://www.mozilla.org/en-US/firefox/releases/').decode('utf-8')
    root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
    ol = root.xpath('//div[@id="main-content"]/ol')[0]
    ol.xpath('descendant::li/strong/a[@href]')
    ans = filter_ans(ol.xpath('descendant::li/strong/a[@href]/text()'))
    if not ans:
        raise ValueError('Failed to download list of firefox versions')
    return ans
Exemplo n.º 11
0
 def run(self, opts):
     try:
         with open(self.CA_PATH, 'rb') as f:
             raw = f.read()
     except OSError as err:
         if err.errno != errno.ENOENT:
             raise
         raw = b''
     nraw = download_securely('https://curl.haxx.se/ca/cacert.pem')
     if not nraw:
         raise RuntimeError('Failed to download CA cert bundle')
     if nraw != raw:
         self.info('Updating Mozilla CA certificates')
         with open(self.CA_PATH, 'wb') as f:
             f.write(nraw)
         self.verify_ca_certs()
Exemplo n.º 12
0
 def run(self, opts):
     try:
         with open(self.CA_PATH, 'rb') as f:
             raw = f.read()
     except EnvironmentError as err:
         if err.errno != errno.ENOENT:
             raise
         raw = b''
     nraw = download_securely('https://curl.haxx.se/ca/cacert.pem')
     if not nraw:
         raise RuntimeError('Failed to download CA cert bundle')
     if nraw != raw:
         self.info('Updating Mozilla CA certificates')
         with open(self.CA_PATH, 'wb') as f:
             f.write(nraw)
         self.verify_ca_certs()
Exemplo n.º 13
0
 def download_vendor_release(self, tdir, url):
     self.info('Downloading %s:' % self.TAR_NAME, url)
     num = 5 if is_ci else 1
     for i in range(num):
         try:
             raw = download_securely(url)
         except Exception as err:
             if i == num - 1:
                 raise
             self.info(
                 f'Download failed with error "{err}" sleeping and retrying...'
             )
             time.sleep(2)
     with tarfile.open(fileobj=BytesIO(raw)) as tf:
         tf.extractall(tdir)
         if len(os.listdir(tdir)) == 1:
             return self.j(tdir, os.listdir(tdir)[0])
         else:
             return tdir
Exemplo n.º 14
0
 def get_list(self):
     if is_ci:
         # Dont hammer the server from CI
         return [
              # IE 11 - windows 10
              'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko',
              # IE 11 - windows 8.1
              'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
              # IE 11 - windows 8
              'Mozilla/5.0 (Windows NT 6.2; Trident/7.0; rv:11.0) like Gecko',
              # IE 11 - windows 7
              'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
              # 32bit IE 11 on 64 bit win 10
              'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
              # 32bit IE 11 on 64 bit win 8.1
              'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
              # 32bit IE 11 on 64 bit win 7
              'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
     ]
     raw = download_securely('https://techblog.willshouse.com/2012/01/03/most-common-user-agents/').decode('utf-8')
     lines = re.search(r'<textarea.+"get-the-list".+>([^<]+)</textarea>', raw).group(1).splitlines()
     return [x.strip() for x in lines if x.strip()]
Exemplo n.º 15
0
 def download_mathjax_release(self, tdir, url):
     self.info('Downloading MathJax:', url)
     raw = download_securely(url)
     with ZipFile(BytesIO(raw)) as zf:
         zf.extractall(tdir)
         return os.path.join(tdir, 'MathJax-' + self.MATH_JAX_VERSION)