def insert_data(self, data): """ Will handle inserting data into the database """ try: db_session = DBSession() # Check if comic is in database, if so update else create try: comic = db_session.query(Comic).filter( Comic.comic_id == data.get('comic_id')).one() except NoResultFound: comic = Comic() comic.title = data.get('title') comic.alt = data.get('alt') comic.comic_id = data.get('comic_id') comic.source_file_location = data.get('source_file_location') comic.saved_file_location = data.get('saved_file_location') comic.posted_at = data.get('posted_at') comic.raw_json = data.get('raw_json') comic.time_collected = data.get('time_collected') comic.transcript = data.get('transcript') db_session.add(comic) db_session.commit() # self.track_stat('rows_added_to_db', rows_affected) except Exception: db_session.rollback() logger.exception("Error adding to db {data}".format(data=data))
def insert_data(self, data): """ Will handle inserting data into the database """ try: db_session = DBSession() # Check if comic is in database, if so update else create try: comic = db_session.query(Comic).filter( Comic.comic_id == data.get('comic_id')).one() except NoResultFound: comic = Comic() comic.title = data.get('title') comic.comic_id = data.get('id') comic.alt = data.get('alt') comic.file_path = data.get('file_path') comic.posted_at = data.get('posted_at') comic.time_collected = data.get('time_collected') db_session.add(comic) db_session.commit() except Exception: db_session.rollback() logger.exception("Error adding to db {data}".format(data=data))
def get_info(comic_num): URL = "https://www.explainxkcd.com/wiki/index.php/%s" % str(comic_num) soup = make_soup(URL) if soup == None: return "Error: comic %d not found" % comic_num result = Comic(comic_num) # get title result.og_title = get_title(comic_num) result.title = clean_text(result.og_title.split()) # get transcript try: transcript = soup.find("span", {"id": "Transcript"}) result.transcript = [] cur = transcript.parent while cur: if cur.name == 'dl': for dd in cur: result.transcript.append( str(dd).strip('<dd>').strip('</dd>')) elif cur.name == 'span': break cur = cur.nextSibling result.transcript = clean_text((" ".join(result.transcript)).split()) except: result.transcript = [] # get title text result.og_ttext = get_ttext(comic_num) result.title_text = clean_text(result.og_ttext.split()) # get explanation try: explanation = soup.find("span", {"id": "Explanation"}) result.explanation = [] cur = explanation.parent while cur: if cur.name == 'p': result.explanation.append(cur.text) elif cur.name == 'span': break cur = cur.nextSibling result.explanation = clean_text((" ".join(result.explanation)).split()) except: result.explanation = [] # get image URL result.img_url = get_img_url(comic_num) return result