def getDeepAuthorPosts(self, fic: Fic) -> Dict[str, Any]: from bs4 import BeautifulSoup urls = self.getDeepPageUrls(fic) soups: Dict[str, Any] = {} for url in urls: pageContent = self.scrapeLike(url) pageSoup = BeautifulSoup(pageContent, 'html5lib') posts = pageSoup.find_all(self.postContainer, { 'class': 'message', 'data-author': fic.getAuthorName() }) for post in posts: soups[post.get('id')] = post return soups
def getDeepAuthorPostUrls(self, fic: Fic) -> List[str]: urls = self.getDeepPageUrls(fic) util.logMessage( f'XenForo.getDeepAuthorPostUrls|deep page urls: {urls}') # TODO this should probably be more comprehensive... author = fic.getAuthorName() altAuthor = author.replace("'", ''') postUrls: List[str] = [] seenIdStubs = set() for url in urls: pageContent = self.scrapeLike(url) # See getReaderPostUrls for a fully parsed version for b in pageContent.split('<'): e = b.find('>') if e == -1: continue s = b[:e] # TODO FIXME this is bad :( # looking for li or article (the post container) if not (b.startswith('li id=') or b.startswith('article class=')): continue # check for 'message' -- simulates checking for message class if not 'message' in s: continue # to check the data-author we simply look for the author and hope # there aren't collisions if s.find(author) < 0 and s.find(altAuthor) < 0: continue # loop over spaced tokens looking for an unspaced id attribute for sb in s.split(): if not sb.startswith('id="') or not sb.endswith('"'): continue idStub = sb[len('id="'):-1] if idStub.startswith('js-'): idStub = idStub[len('js-'):] postUrl = url + '#' + idStub if idStub not in seenIdStubs: postUrls += [postUrl] seenIdStubs |= {idStub} util.logMessage(f'XenForo.getDeepAuthorPostUrls|postUrls: {postUrls}') return postUrls
def parseInfoInto(self, fic: Fic, wwwHtml: str) -> Fic: from bs4 import BeautifulSoup soup = BeautifulSoup(wwwHtml, 'html5lib') fic.fetched = OilTimestamp.now() fic.languageId = Language.getId("English") # TODO: don't hard code? if fic.ficStatus is None or fic.ficStatus == FicStatus.broken: fic.ficStatus = FicStatus.ongoing # default optional fields fic.reviewCount = 0 fic.favoriteCount = 0 fic.followCount = 0 fic.ageRating = 'M' # TODO? # grab title from <title> element titles = soup.find('head').find_all('title') if len(titles) != 1: raise Exception(f'error: cannot find title: {len(titles)}') ntitle = '' try: ntitle = titles[0].get_text() except: pass # TODO FIXME if fic.title is None or len(ntitle.strip()) > 0: fic.title = ntitle if len(self.titleSuffix) > 0 and fic.title.endswith(self.titleSuffix): fic.title = fic.title[:-len(self.titleSuffix)] fic.title = fic.title.strip() # determine author authorPost = self.getRealAuthorPost(fic) authorPostUsernames = authorPost.find_all('a', {'class': 'username'}) if len(authorPostUsernames) < 1: raise Exception('error: unable to find author username') author = authorPostUsernames[0].get_text() auth_href = authorPostUsernames[0].get('href') authorUrl = urllib.parse.urljoin(self.baseUrl, auth_href) if not authorUrl.startswith(self.baseUrl): raise Exception('error: unknown username href format') authorId = authorUrl[len(self.baseUrl):] if not authorId.startswith('members/'): raise Exception(f'error: unknown author id format: {authorId}') authorId = authorId.split('/')[1] self.setAuthor(fic, author, authorUrl, authorId) if fic.description is None: # TODO? fic.description = htmlEscape(fic.title + ' by ' + fic.getAuthorName()) # try grabbing reader version, fallback to full pages threadmarksHtml = None try: sep = '?' if self.baseUrl.find('?') < 0 else '&' url = f'{self.baseUrl}threads/{fic.localId}/threadmarks{sep}category_id=1' threadmarksHtml = self.scrapeLike(url) self.readerSoftScrape(fic) except: # note: we do this before the theardmarks check for old-style fics # soft scrape all thread pages to ensure we have everything self.deepSoftScrape(fic) postSoups: Dict[str, Any] = {} postUrls: List[str] = [] chapterTitles = {} try: # scrape the threadmarks page, assuming there is one threadmarksSoup = BeautifulSoup(threadmarksHtml, 'html5lib') # attempt to extract a fic description threadmarkExtraInfo = threadmarksSoup.find( 'div', {'class': 'threadmarkListingHeader-extraInfo'}) if threadmarkExtraInfo is not None: bbWrapper = threadmarkExtraInfo.find('div', {'class': 'bbWrapper'}) if bbWrapper is not None: desc = bbWrapper.decode_contents() descView = HtmlView(desc, markdown=False) fic.description = ''.join( [f'<p>{l}</p>' for l in descView.text]) # determine chapter count based on threadmarks threadmarkList = threadmarksSoup.find('div', {'class': 'threadmarkList'}) threadmarks = None if threadmarkList is not None: threadmarks = threadmarkList.find_all( 'li', {'class': 'threadmarkListItem'}) else: threadmarkList = threadmarksSoup.find( 'div', {'class': 'block-body--threadmarkBody'}) if threadmarkList is None: raise Exception('error: unable to find threadmark menu') if threadmarkList.find(class_='fa-ellipsis-h') is not None: raise Exception('unable to handle elided threamdarks') threadmarks = threadmarkList.find_all('li') if len(threadmarks) == 0: threadmarks = threadmarkList.find_all('tr') util.logMessage( f'XenForo|new threadmarks count|{len(threadmarks)}') for threadmark in threadmarks: if threadmark.find( 'span', {'class': 'message-newIndicator'}) is not None: continue a = threadmark.find('a') purl = a.get('href') if purl.startswith('threads/'): purl = '{}{}'.format(self.baseUrl, purl) elif purl.startswith('/threads/'): purl = '{}{}'.format(self.baseUrl, purl[1:]) postUrls += [purl] chapterTitles[len(postUrls)] = a.getText().strip() try: postSoups, _ = self.getReaderPosts(fic) except Exception as ie: # FIXME oh boy: # https://forum.questionablequesting.com/threads/worm-cyoa-things-to-do-in-brockton-bay-when-youre-a-bored-demigod.1247/reader # Reader page says 36 threadmarks, but actual threadmark list says 33 # First reader page abruptly stops at 27 threadmarks util.logMessage( 'XenForoAdapter: unable to getReaderPosts: {}\n{}'.format( ie, traceback.format_exc())) except Exception as e: util.logMessage( 'XenForoAdapter: unable to parse threadmarks: {}\n{}'.format( e, traceback.format_exc())) try: postUrls = self.getReaderPostUrls(fic) postSoups, chapterTitles = self.getReaderPosts(fic) except Exception as ie: util.logMessage( 'XenForoAdapter: unable to parse reader posts: {}\n{}'. format(ie, traceback.format_exc())) postUrls = self.getDeepAuthorPostUrls(fic) # if we fallback to here, don't immediately setup postSoups at all; # they'll be fetched as needed later fic.chapterCount = len(postUrls) chapterPosts: List[Optional[str]] = [] chapterUrls: List[str] = [] chapterPostIds: List[str] = [] lastSoupUrl: Optional[str] = None lastSoup: Optional[Any] = None for purl in postUrls: parts = purl.split('#') burl = parts[0] postId = authorPost.get('id') if len(parts) < 2 else parts[1] rawPost = None # first try getting the post from the reader pages if postId in postSoups and postSoups[postId] is not None: rawPost = str(postSoups[postId]) else: # if needed, fallback to grabbing that page from the entire thread pageSoup = None if lastSoupUrl is not None and lastSoupUrl == burl: pageSoup = lastSoup else: pageContent = self.scrapeLike(burl) pageSoup = BeautifulSoup(pageContent, 'html5lib') lastSoupUrl = burl lastSoup = pageSoup assert (pageSoup is not None) if postId is not None: poss = pageSoup.find_all(self.postContainer, {'id': postId}) if len(poss) != 1: # XenForo2 often has js- prefixed on the actual id attr poss = pageSoup.find_all(self.postContainer, {'id': 'js-' + postId}) if len(poss) != 1: raise Exception( f'error: cannot find post for chapter {postId}') rawPost = str(poss[0]) else: rawPost = str( pageSoup.find_all(self.postContainer, {'class': 'message'})[0]) chapterPosts += [rawPost] chapterUrls += [burl] chapterPostIds += [postId] fic.wordCount = 0 fic.published = None fic.updated = None chapterContents: List[str] = [] for rawPost in chapterPosts: post = BeautifulSoup(rawPost, 'html5lib') content = post.find_all( 'div', {'class': ['messageContent', 'message-content']}) if len(content) != 1: raise Exception('error: cannot find content for chapter post') content = content[0] lastEditedDivs = content.find_all('div', {'class': 'message-lastEdit'}) for lastEditedDiv in lastEditedDivs: br = soup.new_tag("br") lastEditedDiv.insert_before(br) chapterContents += [str(content)] fic.wordCount += len(str(content).split()) uts = self.getPostUpdatedOrPublished(post) if fic.published is None: fic.published = OilTimestamp(uts) fic.updated = OilTimestamp(uts) if fic.updated is None: raise Exception( f'unable to determine updated date: {len(chapterPosts)} {len(postUrls)}' ) fic.upsert() for cid in range(fic.chapterCount): chapter = fic.chapter(cid + 1) chapter.url = chapterUrls[cid] chapter.localChapterId = chapterPostIds[cid] if (cid + 1) in chapterTitles: chapter.title = chapterTitles[(cid + 1)] chapter.upsert() chapter.setHtml(str(chapterContents[cid])) # TODO: word count, published, updated can only be found once all chapters # each post is inside an li id="post-{number}" class="message" # each post has data-author="{author}" self.updateTitle(fic) return fic
def parseInfoInto(self, fic: Fic, html: str) -> Fic: from bs4 import BeautifulSoup soup = BeautifulSoup(html, 'html.parser') fic.fetched = OilTimestamp.now() fic.languageId = Language.getId("English") # TODO: don't hard code? titleHeadings = soup.findAll('h2', {'class': 'title heading'}) if len(titleHeadings) != 1: raise Exception('unable to find ao3 title {}'.format(fic.url)) fic.title = titleHeadings[0].get_text().strip() summaryModules = soup.findAll('div', {'class': 'summary module'}) if len(summaryModules) != 1: prefaceGroups = soup.findAll('div', {'class': 'preface group'}) if len(prefaceGroups) == 1: summaryModules = prefaceGroups[0].findAll( 'div', {'class': 'summary module'} ) if len(summaryModules) == 1: summaryBq = summaryModules[0].find('blockquote') fic.description = summaryBq.decode_contents(formatter='html').strip() elif fic.description is None: fic.description = "{no summary}" # raise Exception('unable to find ao3 summary {}'.format(fic.localId)) fic.ageRating = '<unkown>' # TODO: error handling cText = ' '.join(soup.find('dd', {'class': 'chapters'}).contents).strip() ps = cText.split('/') completedChapters = int(ps[0]) totalChapters = None if ps[1] == '?' else int(ps[1]) fic.chapterCount = completedChapters wText = ' '.join(soup.find('dd', {'class': 'words'}).contents).strip() fic.wordCount = int(wText) fic.reviewCount = 0 fic.favoriteCount = 0 kDefinition = soup.find('dd', {'class': 'kudos'}) if kDefinition is not None: kText = ' '.join(kDefinition.contents).strip() fic.favoriteCount = int(kText) fic.followCount = 0 pText = ' '.join(soup.find('dd', {'class': 'published'}).contents).strip() publishedUts = util.parseDateAsUnix(pText, fic.fetched) fic.published = OilTimestamp(publishedUts) if fic.updated is None: fic.updated = fic.published if fic.updated is not None: updatedUts = util.parseDateAsUnix(fic.updated, fic.fetched) fic.updated = OilTimestamp(updatedUts) fic.ficStatus = FicStatus.ongoing # TODO chapter/chapters? if totalChapters is None or completedChapters < totalChapters: fic.ficStatus = FicStatus.ongoing statusDt = soup.find('dt', {'class': 'status'}) if statusDt is not None: if statusDt.contents[0] == 'Completed:': fic.ficStatus = FicStatus.complete cText = ' '.join(soup.find('dd', {'class': 'status'}).contents).strip() updatedUts = util.parseDateAsUnix(cText, fic.fetched) fic.updated = OilTimestamp(updatedUts) elif statusDt.contents[0] == 'Updated:': fic.ficStatus = FicStatus.ongoing uText = ' '.join(soup.find('dd', {'class': 'status'}).contents).strip() updatedUts = util.parseDateAsUnix(uText, fic.fetched) fic.updated = OilTimestamp(updatedUts) else: raise Exception('unkown status: {}'.format(statusDt.contents[0])) byline = soup.find('h3', {'class': 'byline heading'}) authorLink = byline.find('a') if authorLink is None: if fic.authorId is not None and len(fic.getAuthorName()) > 0: pass # updated author to anon, don't make changes else: # first loaded after it was already set to anonymous authorUrl = '' author = 'Anonymous' authorId = 'Anonymous' self.setAuthor(fic, author, authorUrl, authorId) else: authorUrl = authorLink.get('href') author = ' '.join(byline.find('a').contents) authorId = author # map pseudo to real? self.setAuthor(fic, author, authorUrl, authorId) if fic.chapterCount > 1: fic.upsert() localChapterIdSelect = soup.find(id='selected_id').findAll('option') # note: ao3 sometimes says there are less chapters than there really # are, possibly due to caching on their end. We just ensure there's _at # least_ chapterCount chapters, then fetch whatever the dropdown tells # us to if len(localChapterIdSelect) > fic.chapterCount: fic.chapterCount = len(localChapterIdSelect) fic.upsert() if len(localChapterIdSelect) != fic.chapterCount: raise Exception('mismatching localChapterId count?') for cid in range(1, fic.chapterCount + 1): chap = fic.chapter(cid) chap.url = '{}{}/chapters/{}?view_adult=true'.format( self.baseUrl, fic.localId, localChapterIdSelect[cid - 1].get('value') ) chap.localChapterId = localChapterIdSelect[cid - 1].get('value') chap.title = localChapterIdSelect[cid - 1].getText().strip() if chap.title is not None: chap.title = util.cleanChapterTitle(chap.title, cid) chap.upsert() fandomDd = soup.find('dd', {'class': 'fandom tags'}) if fandomDd is not None: fandomTags = fandomDd.findAll('a', {'class': 'tag'}) for ft in fandomTags: originalF = ft.contents[0].strip() f = originalF.lower() # TODO: this seriously needs reworked if ( (f.startswith("harry potter ") and f.endswith("rowling")) or f == 'harry potter - fandom' or f == 'fantastic beasts and where to find them (movies)' or f == 'harry potter next generation - fandom' ): fic.add(Fandom.define('Harry Potter')) elif ( f == 'sherlock - fandom' or f == 'sherlock (tv)' or f == 'sherlock holmes & related fandoms' or f == 'sherlock holmes - arthur conan doyle' or f == 'sherlock holmes (downey films)' ): fic.add(Fandom.define('Sherlock Holmes')) elif f == 'furry (fandom)' or f == 'harry - fandom': continue # skip elif f == 'fleurmione - fandom': continue # skip elif f == 'skyfall (2012) - fandom': fic.add(Fandom.define('James Bond')) elif f == 'orphan black (tv)': fic.add(Fandom.define('Orphan Black')) elif ( f == 'naruto' or f == 'naruto shippuden' or f == 'naruto shippuuden - fandom' ): fic.add(Fandom.define('Naruto')) elif f == 'naruto/harry potter': fic.add(Fandom.define('Naruto')) fic.add(Fandom.define('Harry Potter')) elif f == 'bleach': fic.add(Fandom.define('Bleach')) elif ( f == 'iron man (movies)' or f == 'iron man - all media types' or f == 'iron man (comic)' or f == 'iron man - fandom' or f == 'iron man (comics)' ): fic.add(Fandom.define('Iron Man')) elif ( f == 'the avengers (marvel) - all media types' or f == 'the avengers (marvel movies)' or f == 'the avengers - ambiguous fandom' or f == 'the avengers (2012)' or f == 'the avengers' or f == 'avengers (marvel) - all media types' or f == 'marvel avengers movies universe' or f == 'avengers' ): fic.add(Fandom.define('Avengers')) elif f == 'marvel 616': fic.add(Fandom.define('Marvel')) fic.add(Fandom.define('Marvel 616')) elif f == 'thor (movies)' or f == 'thor - all media types': fic.add(Fandom.define('Thor')) elif ( f == 'captain america (movies)' or f == 'captain america - all media types' or f == 'captain america (comics)' ): fic.add(Fandom.define('Captain America')) elif ( f == 'avatar: the last airbender' or f == 'avatar: legend of korra' or f == 'avatar the last airbender - fandom' ): fic.add(Fandom.define('Avatar')) elif f == 'original work': fic.add(Fandom.define('Original Work')) elif f == 'stargate atlantis': fic.add(Fandom.define('Stargate Atlantis')) elif f == 'stargate sg-1': fic.add(Fandom.define('Stargate SG-1')) elif f == 'stargate - all series': fic.add(Fandom.define('Stargate Atlantis')) fic.add(Fandom.define('Stargate SG-1')) elif f == 'agents of s.h.i.e.l.d. (tv)': fic.add(Fandom.define('Avengers')) elif f == 'supernatural': fic.add(Fandom.define('Supernatural')) elif f == 'teen wolf (tv)': fic.add(Fandom.define('Teen Wolf')) elif f == 'grimm (tv)': fic.add(Fandom.define('Grimm')) elif ( f == 'the amazing spider-man (movies - webb)' or f == 'spider-man - all media types' or f == 'spider-man: homecoming (2017)' ): fic.add(Fandom.define('Spiderman')) elif ( f == 'x-men - all media types' or f == 'x-men (movieverse)' or f == 'x-men (comicverse)' ): fic.add(Fandom.define('X-Men')) elif ( f == 'lord of the rings - j. r. r. tolkien' or f == 'the lord of the rings - j. r. r. tolkien' ): fic.add(Fandom.define('Lord of the Rings')) elif ( f == 'crisis core: final fantasy vii' or f == 'compilation of final fantasy vii' or f == 'final fantasy vii' ): fic.add(Fandom.define('Final Fantasy VII')) fic.add(Fandom.define('Final Fantasy')) elif f == 'sen to chihiro no kamikakushi | spirited away': fic.add(Fandom.define('Spirited Away')) elif f == 'howl no ugoku shiro | howl\'s moving castle': fic.add(Fandom.define('Howl\'s Moving Castle')) elif f == 'rise of the guardians (2012)': fic.add(Fandom.define('Rise of the Guardians')) elif ( f == 'doctor who' or f == 'doctor who (2005)' or f == 'doctor who & related fandoms' ): fic.add(Fandom.define('Doctor Who')) elif f == 'daredevil (tv)' or f == 'daredevil (comics)': fic.add(Fandom.define('DareDevil')) elif f == 'labyrinth (1986)': fic.add(Fandom.define('Labyrinth')) elif f == 'gravity falls': fic.add(Fandom.define('Gravity Falls')) elif f == 'once upon a time (tv)': fic.add(Fandom.define('Once Upon a Time')) elif f == 'doctor strange (comics)': fic.add(Fandom.define('Doctor Strange')) elif f == 'the sentinel': fic.add(Fandom.define('The Sentinel')) elif f == 'teen titans (animated series)': fic.add(Fandom.define('Teen Titans')) elif ( f == 'dcu' or f == 'dcu animated' or f == 'dcu (comics)' or f == 'dc extended universe' or f == 'dc animated universe' ): fic.add(Fandom.define('DC')) elif f == 'vampire hunter d': fic.add(Fandom.define('Vampire Hunter D')) elif f == 'homestuck': fic.add(Fandom.define('Homestuck')) elif f == 'one piece': fic.add(Fandom.define('One Piece')) elif f == 'batman (movies - nolan)': fic.add(Fandom.define('Batman')) elif f == 'die hard (movies)': fic.add(Fandom.define('Die Hard')) elif f == 'discworld - terry pratchett': fic.add(Fandom.define('Discworld')) elif f == 'gossip girl': fic.add(Fandom.define('Gossip Girl')) elif ( f == 'a song of ice and fire - george r. r. martin' or f == 'a song of ice and fire & related fandoms' ): fic.add(Fandom.define('A Song of Ice and Fire')) elif f == 'supergirl (tv 2015)': fic.add(Fandom.define('Supergirl')) elif f == 'merlin (tv)': fic.add(Fandom.define('Merlin')) elif f == 'star trek': fic.add(Fandom.define('Star Trek')) elif f == 'steven universe (cartoon)': fic.add(Fandom.define('Steven Universe')) elif f == 'hellsing': fic.add(Fandom.define('Hellsing')) elif f == 'the breaker': fic.add(Fandom.define('The Breaker')) elif f == 'smallville': fic.add(Fandom.define('Smallville')) elif f == '베리타스 | veritas (manhwa)': fic.add(Fandom.define('Veritas (manhwa)')) elif f == 'guardians of childhood - william joyce': fic.add(Fandom.define('Guardians of Childhood')) elif f == 'person of interest (tv)': fic.add(Fandom.define('Person of Interest')) elif f == 'james bond (craig movies)': fic.add(Fandom.define('James Bond')) elif f == 'the bourne legacy (2012)': fic.add(Fandom.define('Jason Bourne')) elif f == 'numb3rs': fic.add(Fandom.define('Numb3rs')) elif f == 'temeraire - naomi novik': fic.add(Fandom.define('Temeraire')) elif f == 'twilight series - stephenie meyer': fic.add(Fandom.define('Twilight')) elif f == 'dungeons and dragons - fandom': fic.add(Fandom.define('Dungeons and Dragons')) elif f == 'american horror story' or f == 'american horror story: cult': fic.add(Fandom.define('American Horror Story')) elif ( f == 'worm (web serial novel)' or f == 'worm - wildbow' or f == 'parahumans series - wildbow' or f == 'worm (web serial) | wildbow' or f == 'worm - fandom' or f == 'parahumans - fandom' or f == 'worm (parahumans)' or f == 'worm (web serial)' or f == 'worm | parahumans' or f == 'worm (web novel)' ): fic.add(Fandom.define('Worm')) elif f == 'toaru kagaku no railgun | a certain scientific railgun': fic.add(Fandom.define('A Certain Scientific Railgun')) elif f == 'toaru majutsu no index | a certain magical index': fic.add(Fandom.define('A Certain Magical Index')) elif f == 'cthulhu mythos - h. p. lovecraft': fic.add(Fandom.define('Cthulhu')) elif f == 'transformers - all media types': fic.add(Fandom.define('Transformers')) elif f == 'destiny (video game)': fic.add(Fandom.define('Destiny')) elif f == 'fandom - fandom' or f == 'meta - fandom': pass # >_> elif f == 'house m.d.': fic.add(Fandom.define('House, M.D.')) elif f == 'the hobbit (jackson movies)': fic.add(Fandom.define('The Hobbit')) elif f == 'doctor strange (2016)': fic.add(Fandom.define('Doctor Strange')) elif f == 'arrow (tv 2012)': fic.add(Fandom.define('Arrow')) elif f == 'the flash (tv 2014)': fic.add(Fandom.define('Flash')) elif f == 'senki zesshou symphogear': fic.add(Fandom.define('Symphogear')) elif ( f == 'fullmetal alchemist: brotherhood & manga' or f == 'fullmetal alchemist - all media types' or f == 'fullmetal alchemist (anime 2003)' ): fic.add(Fandom.define('Fullmetal Alchemist')) elif ( f == 'star wars - all media types' or f == 'star wars episode vii: the force awakens (2015)' or f == 'star wars prequel trilogy' ): fic.add(Fandom.define('Star Wars')) elif ( f == 'guardians of the galaxy (2014)' or f == 'guardians of the galaxy - all media types' or f == 'guardians of the galaxy (movies)' ): fic.add(Fandom.define('Guardians of the Galaxy')) elif f == 'ant man (2015)' or f == 'ant-man (movies)': fic.add(Fandom.define('Ant Man')) elif f == 'the defenders (marvel tv)': fic.add(Fandom.define('The Defenders')) elif f == 'elementary (tv)': fic.add(Fandom.define('Elementary')) elif f == 'good omens - neil gaiman & terry pratchett': fic.add(Fandom.define('Good Omens')) elif f == 'danny phantom': fic.add(Fandom.define('Danny Phantom')) elif f == 'katekyou hitman reborn!': fic.add(Fandom.define('Katekyo Hitman Reborn!')) elif f == 'welcome to night vale': fic.add(Fandom.define('Welcome to Night Vale')) elif f == 'ncis': fic.add(Fandom.define('NCIS')) elif f == 'torchwood': fic.add(Fandom.define('Torchwood')) elif f == 'magic: the gathering': fic.add(Fandom.define('Magic: The Gathering')) elif f == 'overwatch (video game)': fic.add(Fandom.define('Overwatch')) elif f == 'detroit: become human (video game)': fic.add(Fandom.define('Detroit: Become Human')) elif f == 'greek and roman mythology': pass elif f == 'life is strange (video game)': fic.add(Fandom.define('life is strange (video game)')) elif f == 'akatsuki no yona | yona of the dawn': fic.add(Fandom.define('Yona of the Dawn')) elif f == '僕のヒーローアカデミア | boku no hero academia | my hero academia': fic.add(Fandom.define('My Hero Academia')) elif f == 'voltron: legendary defender': fic.add(Fandom.define('Voltron')) elif f == 'selfie (tv)': fic.add(Fandom.define('Selfie')) elif f == 'suits (tv)': fic.add(Fandom.define('Suits')) elif f == 'fruits basket': fic.add(Fandom.define('Fruits Basket')) elif f == 'hetalia: axis powers': fic.add(Fandom.define('Hetalia: Axis Powers')) elif f == 'carmilla (web series)': fic.add(Fandom.define('Carmilla')) elif f == 'the dresden files - jim butcher': fic.add(Fandom.define('Dresden Files')) elif f == 'girl genius': fic.add(Fandom.define('Girl Genius')) elif f == 'unspecified fandom': pass # TODO? elif f == 'nightwing (comics)': fic.add(Fandom.define('Nightwing')) elif f == 'books of the raksura - martha wells': fic.add(Fandom.define('Books of the Raksura')) elif f == 'fall of ile-rien - martha wells': fic.add(Fandom.define('Fall of Ile-Rien')) elif f == 'vorkosigan saga - lois mcmaster bujold': fic.add(Fandom.define('Vorkosigan Saga')) elif ( f == 'highlander: the series' or f == 'highlander - all media types' ): fic.add(Fandom.define('Highlander')) elif f == 'yoroiden samurai troopers | ronin warriors': fic.add(Fandom.define('Ronin Warriors')) elif f == 'hockey rpf': fic.add(Fandom.define('Hockey RPF')) elif f == 'pacific rim (2013)': fic.add(Fandom.define('Pacific Rim')) elif f == 'enchanted forest chronicles - patricia wrede': fic.add(Fandom.define('Enchanted Forest Chronicles')) elif f == 'tortall - tamora pierce': fic.add(Fandom.define('Tortall')) elif f == 'protector of the small - tamora pierce': fic.add(Fandom.define('Protector of the Small')) elif f == 'leverage': fic.add(Fandom.define('Leverage')) elif f == 'valdemar series - mercedes lackey': fic.add(Fandom.define('Valdemar Series')) elif ( f == 'b.p.r.d.' or f == 'bureau for paranormal research and defense' ): fic.add(Fandom.define('B.P.R.D.')) elif f == 'hellboy (comic)': fic.add(Fandom.define('Hellboy')) elif f == 'sga/avatar': fic.add(Fandom.define('Stargate Atlantis')) fic.add(Fandom.define('Avatar')) elif f == 'annihilation (2018 garland)': fic.add(Fandom.define('Annihilation')) elif f == 'craft sequence - max gladstone': fic.add(Fandom.define('Craft Sequence')) elif f == 'the good place (tv)': fic.add(Fandom.define('The Good Place')) elif f == 'jessica jones (tv)': fic.add(Fandom.define('Jessica Jones')) elif f == 'mad max series (movies)': fic.add(Fandom.define('Mad Max')) elif f == 'american gods (tv)': fic.add(Fandom.define('American Gods')) elif f == 'terminator: the sarah connor chronicles': fic.add(Fandom.define('Terminator: The Sarah Connor Chronicles')) fic.add(Fandom.define('Terminator')) elif f == 'wolf 359 (radio)': fic.add(Fandom.define('Wolf 359')) elif f == 'shadowrun: dragonfall': fic.add(Fandom.define('Shadowrun')) elif f == 'ars paradoxica (podcast)': fic.add(Fandom.define('Ars Paradoxica')) elif f == 'love is strange - fandom': fic.add(Fandom.define('Love is Strange')) elif f == 'dune - all media types': fic.add(Fandom.define('Dune')) elif f == 'dragon age: origins': fic.add(Fandom.define('Dragon Age: Origins')) elif f == 'game of thrones (tv)': fic.add(Fandom.define('Game of Thrones')) elif f == 'chronicles of amber - roger zelazny': fic.add(Fandom.define('Chronicles of Amber')) elif f == 'the southern reach trilogy - jeff vandermeer': fic.add(Fandom.define('The Southern Reach Trilogy')) elif f == 'continuum (tv)': fic.add(Fandom.define('Continuum')) elif f == 'mage: the ascension': fic.add(Fandom.define('Mage: The Ascension')) elif f == 'the good wife (tv)' or f == 'good wife (tv)': fic.add(Fandom.define('The Good Wife')) elif f == 'alliance-union - c. j. cherryh': fic.add(Fandom.define('Alliance-Union')) elif f == 'indexing - seanan mcguire': fic.add(Fandom.define('Indexing')) elif f == 'ultraviolet (tv)': fic.add(Fandom.define('Ultraviolet')) elif f == 'veronica mars (tv)': fic.add(Fandom.define('Veronica Mars')) elif f == 'secret circle (tv)': fic.add(Fandom.define('Secret Circle')) elif f == 'mahou shoujo madoka magika | puella magi madoka magica': fic.add(Fandom.define('Madoka Magica')) elif f == 'agent carter (tv)': fic.add(Fandom.define('Agent Carter')) elif f == 'dracula & related fandoms': fic.add(Fandom.define('Dracula')) elif f == 'dragon ball': fic.add(Fandom.define('Dragon Ball')) elif f == 'mass effect - all media types': fic.add(Fandom.define('Mass Effect')) elif f == 'firefly' or f == 'serenity (2005)': fic.add(Fandom.define('Firefly')) else: anyHere = False global ao3FandomsMap for fm in ao3FandomsMap: here = False for uf in fm[0]: if f == uf.lower().strip(): here = True break if not here: continue anyHere = True for mf in fm[1]: fic.add(Fandom.define(mf)) if not anyHere: util.logMessage(f'ao3|unknown fandom|{fic.url}|{originalF}') #raise Exception('unknown fandom: {} "{}"'.format(fic.url, originalF)) ourDoms = fic.fandoms() # we have a canonical fandom, try to find our characters if len(ourDoms) == 1: relationshipDd = soup.find('dd', {'class': 'relationship tags'}) if relationshipDd is not None: relationshipTags = relationshipDd.findAll('a', {'class': 'tag'}) for rt in relationshipTags: r = rt.contents[0] chars = r.split('/') if len(chars) > 8: # TODO: sometimes more? raise Exception('unable to parse relationship: {}'.format(r)) for char in chars: fic.add(Character.defineInFandom(ourDoms[0], char, self.ftype)) return fic