def put(self, projectKey, key): #update issue = Issue().get(key) issue.project = Project().get(projectKey) issue.summary = self.request.get('summary') or issue.summary issue.text = self.request.get('text') or issue.text closed = self.request.get('closed') if closed: issue.closed = closed.lower() == "true" issue.put() url = issue.url(self.request.url) self.response.headers.add_header("Location", url) self.response.out.write(json(url))
class IssueScraper(Scraper): model = Issue() def scrap_and_assign(self): for article_scraper in self.get_articles_scrapers(): article_scraper.scrap_and_assign() def get_articles_scrapers(self): for li in self._extract_articles_html(): url = PathConfig.ERUDIT_PATH + li.h6.a["href"] article_title = li.h6.a.text.strip() issue = self.get_issue_number() article = [i for i in url.split("/") if i != ""][-1] iss = self.extract_issue(issue) cassette_prefix = self.generate_cassette_prefix( volume=iss.get("volume"), number=iss.get("number"), year=iss.get("year")) cassette_name = cassette_prefix + "/" + article scraper = ArticleScraper(revue=self.revue, url=url, cassette_name=cassette_name) scraper.model = Article() # fix: not sure how not to do this self.model.articles.append(scraper.model) self.model.title = self.get_title() scraper.model.issue = self.model scraper.model.pages = self.get_pages(li) scraper.model.title = article_title yield scraper def get_title(self): return self.soup.find_all("span", {"class": "theme-title"})[0].text.strip() def get_issue_number(self): return self.soup.find_all("span", {"class": "issue-number"})[0].text def get_pages(self, li): """ Return None when no pages """ a = [i for i in li.find_all("p", {"class": "bib-record__pages"})][0] pages = re.findall(self.REGEXP_PAGES, a.text) return next(iter(pages), None) def _extract_articles_html(self): for li in self.soup.find_all("li", {"class": "bib-record"}): yield li
def get_issue_scrapers(self): for li in self._extract_issues_html(): url = PathConfig.ERUDIT_PATH + li.a["href"] issue = li.span.text iss = self.extract_issue(issue) cassette_name = self.generate_cassette_prefix( volume=iss.get("volume"), number=iss.get("number"), year=iss.get("year")) scraper = IssueScraper(revue=self.revue, url=url, cassette_name=cassette_name) scraper.model = Issue() # fix: not sure how not to do this self.model.issues.append(scraper.model) scraper.model.revue = self.model scraper.model.volume = iss.get("volume") scraper.model.number = iss.get("number") scraper.model.year = iss.get("year") yield scraper