Python parse_document_from_requests примеры, htmlutils.parse_document_from_requests Python примеры использования

Пример #1

0

Показать файл

    def get_songs_from_list(self, url):
        doc = parse_document_from_requests(url, self.session)
        rows = doc.xpath(
            '//*[contains(concat(" ", normalize-space(@class), " "), " song-item ")]'
        )
        songs = []

        for tr in rows:
            try:
                a = tr.xpath('./span[@class="song-title"]/a')[0]
            except IndexError:
                # some lists contain empty items...
                # e.g. index 30 of this:
                # http://music.baidu.com/search/song?key=70%E5%90%8E&start=20&size=20
                continue
            href = a.get('href')
            sid = href.rsplit('/', 1)[-1]
            title = a.text_content()
            artists = tuple(
                a.text_content()
                for a in tr.xpath('./span[@class="singer"]/span/a'))
            try:
                album = tr.xpath('./span[@class="album-title"]/a'
                                 )[0].text_content().strip()
                album = album.lstrip('《').rstrip('》')
            except IndexError:
                album = None
            song = SongInfo(sid, title, href, artists, album, None)
            songs.append(song)

        return songs

Пример #2

0

Показать файл

Файл: fluxbbclient.py Проект: fuyadong/winterpy

 def get_user_topic_ids(self, user_id):
   r = self.request('/search.php?action=show_user_topics&user_id=%d' % user_id)
   doc = parse_document_from_requests(r)
   links = doc.xpath('//td[@class="tcl"]/div[@class="tclcon"]/div//a')
   tids = [int(x.get('href').split('=', 1)[-1])
           for x in links]
   return tids

Пример #3

0

Показать файл

Файл: api.py Проект: zsrkmyn/lilac

def download_official_pkgbuild(name: str) -> List[str]:
  url = 'https://www.archlinux.org/packages/search/json/?name=' + name
  logger.info('download PKGBUILD for %s.', name)
  info = s.get(url).json()
  r = [r for r in info['results'] if r['repo'] != 'testing'][0]
  repo = r['repo']
  arch = r['arch']
  if repo in ('core', 'extra'):
    gitrepo = 'packages'
  else:
    gitrepo = 'community'
  pkgbase = [r['pkgbase'] for r in info['results'] if r['repo'] != 'testing'][0]

  tree_url = 'https://projects.archlinux.org/svntogit/%s.git/tree/repos/%s-%s?h=packages/%s' % (
    gitrepo, repo, arch, pkgbase)
  doc = parse_document_from_requests(tree_url, s)
  blobs = doc.xpath('//div[@class="content"]//td/a[contains(concat(" ", normalize-space(@class), " "), " ls-blob ")]')
  files = [x.text for x in blobs]
  for filename in files:
    blob_url = 'https://projects.archlinux.org/svntogit/%s.git/plain/repos/%s-%s/%s?h=packages/%s' % (
      gitrepo, repo, arch, filename, pkgbase)
    with open(filename, 'wb') as f:
      logger.debug('download file %s.', filename)
      data = s.get(blob_url).content
      f.write(data)
  return files

Пример #4

0

Показать файл

Файл: xiami.py Проект: zouchao2010/winterpy

  def search(self, q):
    url = 'http://www.xiami.com/search?key=' + q
    doc = parse_document_from_requests(url, self.session)
    rows = doc.xpath('//table[@class="track_list"]//tr')[1:]
    ret = []
    for tr in rows:
      # 没有 target 属性的是用于展开的按钮
      names = tr.xpath('td[@class="song_name"]/a[@target]')
      if len(names) == 2:
        extra = names[1].text_content()
      else:
        extra = None
      name = names[0].text_content()
      href = names[0].get('href')

      # '/text()' in XPath get '.text', not '.text_content()'
      artist = tr.xpath('td[@class="song_artist"]/a')[0].text_content().strip()
      album = tr.xpath('td[@class="song_album"]/a')[0].text_content().strip()
      album = album.lstrip('《').rstrip('》')

      sid = href.rsplit('/', 1)[-1]
      song = SongInfo(sid, name, href, (artist,), album, extra)
      ret.append(song)

    return ret

Пример #5

0

Показать файл

Файл: fluxbbclient.py Проект: lilydjwg/winterpy

  def delete_unverified_users(self, doc=None, *, msg=None, since=None):
    '''delete inverified users in first page

    doc can be given if you have that page's parsed content alread.
    return False if no such users are found.
    '''
    if doc is None:
      url = '/admin_users.php?find_user=&' \
          'order_by=username&direction=ASC&user_group=0&p=1'
      if since:
        url += '&registered_before=' + since.strftime('%Y-%m-%d %H:%M:%s')
      res = self.request(url)
      doc = parse_document_from_requests(res)
    trs = doc.xpath('//div[@id="users2"]//tbody/tr')
    if not trs:
      return False

    users = [tr.xpath('td/input[@type="checkbox"]/@name')[0][6:-1]
             for tr in trs]
    users = ','.join(users)

    post = {
      'delete_users_comply': 'delete',
      'delete_posts': '1',
      'users': users,
    }
    res = self.request('/admin_users.php', data=post)
    res.text
    return True

Пример #6

0

Показать файл

Файл: fluxbbclient.py Проект: SevenHe/winterpy

 def get_user_topic_ids(self, user_id):
   r = self.request('/search.php?action=show_user_topics&user_id=%d' % user_id)
   doc = parse_document_from_requests(r)
   links = doc.xpath('//td[@class="tcl"]/div[@class="tclcon"]/div/strong/a')
   tids = [int(x.get('href').split('=', 1)[-1])
           for x in links]
   return tids

Пример #7

0

Показать файл

def download_official_pkgbuild(name):
    url = 'https://www.archlinux.org/packages/search/json/?name=' + name
    logger.info('download PKGBUILD for %s.', name)
    info = s.get(url).json()
    r = [r for r in info['results'] if r['repo'] != 'testing'][0]
    repo = r['repo']
    arch = r['arch']
    if repo in ('core', 'extra'):
        gitrepo = 'packages'
    else:
        gitrepo = 'community'
    pkgbase = [
        r['pkgbase'] for r in info['results'] if r['repo'] != 'testing'
    ][0]

    tree_url = 'https://projects.archlinux.org/svntogit/%s.git/tree/repos/%s-%s?h=packages/%s' % (
        gitrepo, repo, arch, pkgbase)
    doc = parse_document_from_requests(tree_url, s)
    blobs = doc.xpath(
        '//div[@class="content"]//td/a[contains(concat(" ", normalize-space(@class), " "), " ls-blob ")]'
    )
    files = [x.text for x in blobs]
    for filename in files:
        blob_url = 'https://projects.archlinux.org/svntogit/%s.git/plain/repos/%s-%s/%s?h=packages/%s' % (
            gitrepo, repo, arch, filename, pkgbase)
        with open(filename, 'wb') as f:
            logger.debug('download file %s.', filename)
            data = s.get(blob_url).content
            f.write(data)
    return files

Пример #8

0

Показать файл

Файл: baidumusic.py Проект: Lucnsy/winterpy

  def get_songs_from_list(self, url):
    doc = parse_document_from_requests(url, self.session)
    rows = doc.xpath(
      '//*[contains(concat(" ", normalize-space(@class), " "), " song-item ")]')
    songs = []

    for tr in rows:
      try:
        a = tr.xpath('./span[@class="song-title"]/a')[0]
      except IndexError:
        # some lists contain empty items...
        # e.g. index 30 of this:
        # http://music.baidu.com/search/song?key=70%E5%90%8E&start=20&size=20
        continue
      href = a.get('href')
      sid = href.rsplit('/', 1)[-1]
      title = a.text_content()
      artists = tuple(
        a.text_content() for a in
        tr.xpath('./span[@class="singer"]/span/a'))
      try:
        album = tr.xpath('./span[@class="album-title"]/a')[0].text_content().strip()
        album = album.lstrip('《').rstrip('》')
      except IndexError:
        album = None
      song = SongInfo(sid, title, href, artists, album, None)
      songs.append(song)

    return songs

Пример #9

0

Показать файл

Файл: fluxbbclient.py Проект: fuyadong/winterpy

 def get_post_ids_from_topic(self, topic_id):
   r = self.request('/viewtopic.php?id=%d' % topic_id)
   doc = parse_document_from_requests(r)
   links = doc.xpath('//div[@id]/h2//a')
   pids = [int(x.get('href').split('#', 1)[-1][1:])
           for x in links]
   return pids

Пример #10

0

Показать файл

Файл: fluxbbclient.py Проект: zouchao2010/winterpy

    def delete_unverified_users(self, doc=None, *, msg=None, since=None):
        '''delete inverified users in first page

    doc can be given if you have that page's parsed content alread.
    return False if no such users are found.
    '''
        if doc is None:
            url = '/admin_users.php?find_user=&' \
                'order_by=username&direction=ASC&user_group=0&p=1'
            if since:
                url += '&registered_before=' + since.strftime(
                    '%Y-%m-%d %H:%M:%s')
            res = self.request(url)
            doc = parse_document_from_requests(res)
        trs = doc.xpath('//div[@id="users2"]//tbody/tr')
        if not trs:
            return False

        users = [
            tr.xpath('td/input[@type="checkbox"]/@name')[0][6:-1] for tr in trs
        ]
        users = ','.join(users)

        post = {
            'delete_users_comply': 'delete',
            'delete_posts': '1',
            'users': users,
        }
        res = self.request('/admin_users.php', data=post)
        body = res.text
        return True

Пример #11

0

Показать файл

Файл: fluxbbclient.py Проект: lilydjwg/winterpy

 def get_post_ids_from_topic(self, topic_id):
   r = self.request('/viewtopic.php?id=%d' % topic_id)
   doc = parse_document_from_requests(r)
   links = doc.xpath('//div[@id]/h2//a')
   pids = [int(x.get('href').split('#', 1)[-1][1:])
           for x in links]
   return pids

Пример #12

0

Показать файл

Файл: fluxbbclient.py Проект: lilydjwg/winterpy

  def block_user(self, user_id):
    r = self.request('/profile.php?section=admin&id=%d' % user_id)
    r.content

    data = {
      'form_sent': '1',
      'group_id': '4',
      'ban': '阻止用户',
    }
    r = self.request('/profile.php?section=admin&id=%d' % user_id, data=data)
    doc = parse_document_from_requests(r)
    r = self.request('/admin_bans.php?add_ban=%d' % user_id)
    doc = parse_document_from_requests(r)
    form = doc.forms[0]
    form.fields['ban_message'] = 'spam'
    r = self.request(form.action, data=dict(form.fields))
    r.content

Пример #13

0

Показать файл

Файл: fluxbbclient.py Проект: fuyadong/winterpy

  def block_user(self, user_id):
    r = self.request('/profile.php?section=admin&id=%d' % user_id)
    r.content

    data = {
      'form_sent': '1',
      'group_id': '4',
      'ban': '阻止用户',
    }
    r = self.request('/profile.php?section=admin&id=%d' % user_id, data=data)
    doc = parse_document_from_requests(r)
    r = self.request('/admin_bans.php?add_ban=%d' % user_id)
    doc = parse_document_from_requests(r)
    form = doc.forms[0]
    form.fields['ban_message'] = 'spam'
    r = self.request(form.action, data=dict(form.fields))
    r.content

Пример #14

0

Показать файл

Файл: v2exclient.py Проект: lilydjwg/winterpy

 def get_login_things(self):
   r = self.request(self.login_url)
   doc = parse_document_from_requests(r)
   once = doc.xpath('//input[@name="once"]')[0].get('value')
   form = doc.xpath('//form')[-1]
   username_field = form.xpath('.//input[@type="text"]')[0].get('name')
   password_field = form.xpath('.//input[@type="password"]')[0].get('name')
   return once, username_field, password_field

Пример #15

0

Показать файл

Файл: v2exclient.py Проект: lilydjwg/winterpy

 def get_login_things(self):
     r = self.request(self.login_url)
     doc = parse_document_from_requests(r)
     once = doc.xpath('//input[@name="once"]')[0].get('value')
     form = doc.xpath('//form')[-1]
     username_field = form.xpath('.//input[@type="text"]')[0].get('name')
     password_field = form.xpath('.//input[@type="password"]')[0].get(
         'name')
     return once, username_field, password_field

Пример #16

0

Показать файл

Файл: api.py Проект: DDoSolitary/lilac

def _get_aur_packager(name: str) -> Tuple[Optional[str], str]:
    doc = parse_document_from_requests(
        f'https://aur.archlinux.org/packages/{name}/', s)
    maintainer: Optional[str] = str(
        doc.xpath('//th[text()="Maintainer: "]/following::td[1]/text()')[0])
    last_packager = str(
        doc.xpath('//th[text()="Last Packager: "]/following::td[1]/text()')[0])
    if maintainer == 'None':
        maintainer = None
    return maintainer, last_packager

Пример #17

0

Показать файл

 def edit_post(self, post_id, body, *, subject=None, sticky=False):
     r = self.request('/viewtopic.php?pid=%s' % post_id)
     post = parse_document_from_requests(r)
     old_subject = post.xpath('//ul[@class="crumbs"]/li/strong/a')[0].text
     data = {
         'form_sent': '1',
         'req_message': body,
         'req_subject': subject or old_subject,
         'stick_topic': sticky and '1' or '0',
     }
     url = '/edit.php?id=%s&action=edit' % post_id
     res = self.request(url, data=data)
     return b'http-equiv="refresh"' in res.content

Пример #18

0

Показать файл

Файл: fluxbbclient.py Проект: lilydjwg/winterpy

 def edit_post(self, post_id, body, *, subject=None, sticky=False):
   r = self.request('/viewtopic.php?pid=%s' % post_id)
   post = parse_document_from_requests(r)
   old_subject = post.xpath('//ul[@class="crumbs"]/li/strong/a')[0].text
   data = {
     'form_sent': '1',
     'req_message': body,
     'req_subject': subject or old_subject,
     'stick_topic': sticky and '1' or '0',
   }
   url = '/edit.php?id=%s&action=edit' % post_id
   res = self.request(url, data=data)
   return b'http-equiv="refresh"' in res.content

Пример #19

0

Показать файл

Файл: v2exclient.py Проект: grandi23/winterpy

    def daily_mission(self):
        r = self.request(self.daily_url)
        if 'href="/signin"' in r.text:
            raise NotLoggedIn

        doc = parse_document_from_requests(r)
        buttons = doc.xpath('//input[@value = "领取 X 铜币"]')
        if not buttons:
            raise MissionNotAvailable

        button = buttons[0]
        url = button.get('onclick').split("'")[1]
        r = self.request(urljoin(self.index_url, url))
        if '已成功领取每日登录奖励' not in r.text:
            raise V2EXFailure('daily mission failed', r)

Пример #20

0

Показать файл

Файл: v2exclient.py Проект: hongjianqiang/winterpy

  def daily_mission(self):
    r = self.request(self.daily_url)
    if 'href="/signin"' in r.text:
      raise NotLoggedIn

    doc = parse_document_from_requests(r)
    buttons = doc.xpath('//input[@value = "领取 X 铜币"]')
    if not buttons:
      raise MissionNotAvailable

    button = buttons[0]
    url = button.get('onclick').split("'")[1]
    r = self.request(urljoin(self.index_url, url))
    if '已成功领取每日登录奖励' not in r.text:
      raise V2EXFailure('daily mission failed', r)

Пример #21

0

Показать файл

 def check_login(self):
     '''check if we have logged in already (by cookies)'''
     res = self.request('/')
     doc = parse_document_from_requests(res)
     return len(
         doc.xpath('//div[@id="brdwelcome"]/*[@class="conl"]/li')) > 0

Пример #22

0

Показать файл

Файл: v2exclient.py Проект: zouchao2010/winterpy

 def get_once_value(self):
     r = self.request(self.login_url)
     doc = parse_document_from_requests(r)
     return doc.xpath('//input[@name="once"]')[0].get('value')

Пример #23

0

Показать файл

Файл: v2exclient.py Проект: hongjianqiang/winterpy

 def get_once_value(self):
   r = self.request(self.login_url)
   doc = parse_document_from_requests(r)
   return doc.xpath('//input[@name="once"]')[0].get('value')

Пример #24

0

Показать файл

Файл: fluxbbclient.py Проект: lilydjwg/winterpy

 def check_login(self):
   '''check if we have logged in already (by cookies)'''
   res = self.request('/')
   doc = parse_document_from_requests(res)
   return len(doc.xpath(
     '//div[@id="brdwelcome"]/*[@class="conl"]/li')) > 0

Python parse_document_from_requests примеры использования