def get_game_data(username): wishlist_url = 'http://steamcommunity.com/id/%s/games/?tab=all' % (username,) response = urllib.request.urlopen(wishlist_url) html_data = response.read().decode('utf-8') doc = html.document_fromstring(html_data) translator = HTMLTranslator() row_selector = translator.css_to_xpath('script[language=javascript]') games = None for el in doc.xpath(row_selector): variables = parse_script(el.text_content()) for variable in variables: if variable.identifier.value == 'rgGames': games = variable return[to_map(item) for item in games.initializer.items]
base_url = "http://etymonline.com/index.php?term={}" etymologynr = int(vim.eval("bufwinnr('^etymology$')")) word_to_look_up = sys.argv[0] term_start = "{} {{{{{{" term_end = "}}}" if etymologynr > -1: vim.command('{}wincmd w'.format(etymologynr)) else: vim.command('silent keepalt belowright split etymology') vim.command('setlocal noswapfile nobuflisted nospell nowrap modifiable') vim.command('setlocal buftype=nofile bufhidden=hide') vim.command('setlocal foldmethod=marker textwidth=80 wrapmargin=0') term_xpath = etree.XPath(htmltrans.css_to_xpath('dt')) linkfixes = etree.XPath(htmltrans.css_to_xpath("a.crossreference")) foreignfixes = etree.XPath(htmltrans.css_to_xpath("span.foreign")) definitions = html.parse(base_url.format(uparse.quote_plus(word_to_look_up))) lines = [] for foreignfix in foreignfixes(definitions): foreignfix.text = "<<{}>>".format(foreignfix.text) for linkfix in linkfixes(definitions): linkfix.text = ">>{}<<".format(linkfix.text) for term_el in term_xpath(definitions): term_name = term_el[0].text lines.append(term_start.format(term_name)) lines.extend(term_el.getnext().text_content().split('\r\n')) lines.append(term_end)
import lxml.html from cssselect import HTMLTranslator import re from scraper.models import FilmDict from scraper.utils import (decode_html, unicode_normalize, clean_string, string_to_list, correct_countries_list) from cinema.utils import (titlecase, country_title) html_translator = HTMLTranslator() META_XPATH = html_translator.css_to_xpath('header.carousel-caption > h6') ANCHOR_XPATH = html_translator.css_to_xpath('ul.thumbnails > li .thumbnail > a:nth-of-type(1)') SYNOPSIS_GRAPHS_XPATH = "//div[@class='lead']/p" DESCRIPTION_GRAPHS_XPATH = '//article/h4[2]/following-sibling::p' DIRECTOR_REG = r'dir\.\s+([^\d]+)' COUNTRIES_REG = r'(?:\,\s+(\w[\'\w\s]+)+)' class HTMLScraper: """docstring for HTMLScraper""" def __init__(self, raw_html, source_url=None): super(HTMLScraper, self).__init__() self.source_url = source_url self.raw_html = raw_html self._tree = None @property def tree(self): if self._tree is None: self._tree = self.make_tree() return self._tree
base_url="http://etymonline.com/index.php?term={}" etymologynr = int(vim.eval("bufwinnr('^etymology$')")) word_to_look_up = sys.argv[0] term_start = "{} {{{{{{" term_end = "}}}" if etymologynr > -1: vim.command('{}wincmd w'.format(etymologynr)) else: vim.command('silent keepalt belowright split etymology') vim.command('setlocal noswapfile nobuflisted nospell nowrap modifiable') vim.command('setlocal buftype=nofile bufhidden=hide') vim.command('setlocal foldmethod=marker textwidth=80 wrapmargin=0') term_xpath = etree.XPath(htmltrans.css_to_xpath('dt')) linkfixes = etree.XPath(htmltrans.css_to_xpath("a.crossreference")) foreignfixes = etree.XPath(htmltrans.css_to_xpath("span.foreign")) definitions = html.parse(base_url.format(uparse.quote_plus(word_to_look_up))) lines = [] for foreignfix in foreignfixes(definitions): foreignfix.text="<<{}>>".format(foreignfix.text) for linkfix in linkfixes(definitions): linkfix.text=">>{}<<".format(linkfix.text) for term_el in term_xpath(definitions): term_name = term_el[0].text lines.append(term_start.format(term_name)) lines.extend(term_el.getnext().text_content().split('\r\n')) lines.append(term_end)
def cssToXpath(css_selector, translator=None): if not translator: translator = HTMLTranslator() return translator.css_to_xpath(css_selector)