Пример #1
0
 def getDeepAuthorPosts(self, fic: Fic) -> Dict[str, Any]:
     from bs4 import BeautifulSoup
     urls = self.getDeepPageUrls(fic)
     soups: Dict[str, Any] = {}
     for url in urls:
         pageContent = self.scrapeLike(url)
         pageSoup = BeautifulSoup(pageContent, 'html5lib')
         posts = pageSoup.find_all(self.postContainer, {
             'class': 'message',
             'data-author': fic.getAuthorName()
         })
         for post in posts:
             soups[post.get('id')] = post
     return soups
Пример #2
0
    def getDeepAuthorPostUrls(self, fic: Fic) -> List[str]:
        urls = self.getDeepPageUrls(fic)
        util.logMessage(
            f'XenForo.getDeepAuthorPostUrls|deep page urls: {urls}')
        # TODO this should probably be more comprehensive...
        author = fic.getAuthorName()
        altAuthor = author.replace("'", ''')
        postUrls: List[str] = []
        seenIdStubs = set()
        for url in urls:
            pageContent = self.scrapeLike(url)

            # See getReaderPostUrls for a fully parsed version
            for b in pageContent.split('<'):
                e = b.find('>')
                if e == -1:
                    continue
                s = b[:e]
                # TODO FIXME this is bad :(
                # looking for li or article (the post container)
                if not (b.startswith('li id=')
                        or b.startswith('article class=')):
                    continue
                # check for 'message' -- simulates checking for message class
                if not 'message' in s:
                    continue
                # to check the data-author we simply look for the author and hope
                # there aren't collisions
                if s.find(author) < 0 and s.find(altAuthor) < 0:
                    continue
                # loop over spaced tokens looking for an unspaced id attribute
                for sb in s.split():
                    if not sb.startswith('id="') or not sb.endswith('"'):
                        continue
                    idStub = sb[len('id="'):-1]
                    if idStub.startswith('js-'):
                        idStub = idStub[len('js-'):]
                    postUrl = url + '#' + idStub
                    if idStub not in seenIdStubs:
                        postUrls += [postUrl]
                    seenIdStubs |= {idStub}
        util.logMessage(f'XenForo.getDeepAuthorPostUrls|postUrls: {postUrls}')
        return postUrls
Пример #3
0
    def parseInfoInto(self, fic: Fic, wwwHtml: str) -> Fic:
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(wwwHtml, 'html5lib')

        fic.fetched = OilTimestamp.now()
        fic.languageId = Language.getId("English")  # TODO: don't hard code?
        if fic.ficStatus is None or fic.ficStatus == FicStatus.broken:
            fic.ficStatus = FicStatus.ongoing

        # default optional fields
        fic.reviewCount = 0
        fic.favoriteCount = 0
        fic.followCount = 0
        fic.ageRating = 'M'  # TODO?

        # grab title from <title> element
        titles = soup.find('head').find_all('title')
        if len(titles) != 1:
            raise Exception(f'error: cannot find title: {len(titles)}')
        ntitle = ''
        try:
            ntitle = titles[0].get_text()
        except:
            pass  # TODO FIXME
        if fic.title is None or len(ntitle.strip()) > 0:
            fic.title = ntitle
        if len(self.titleSuffix) > 0 and fic.title.endswith(self.titleSuffix):
            fic.title = fic.title[:-len(self.titleSuffix)]
        fic.title = fic.title.strip()

        # determine author
        authorPost = self.getRealAuthorPost(fic)
        authorPostUsernames = authorPost.find_all('a', {'class': 'username'})
        if len(authorPostUsernames) < 1:
            raise Exception('error: unable to find author username')
        author = authorPostUsernames[0].get_text()
        auth_href = authorPostUsernames[0].get('href')
        authorUrl = urllib.parse.urljoin(self.baseUrl, auth_href)
        if not authorUrl.startswith(self.baseUrl):
            raise Exception('error: unknown username href format')
        authorId = authorUrl[len(self.baseUrl):]
        if not authorId.startswith('members/'):
            raise Exception(f'error: unknown author id format: {authorId}')
        authorId = authorId.split('/')[1]
        self.setAuthor(fic, author, authorUrl, authorId)

        if fic.description is None:
            # TODO?
            fic.description = htmlEscape(fic.title + ' by ' +
                                         fic.getAuthorName())

        # try grabbing reader version, fallback to full pages
        threadmarksHtml = None
        try:
            sep = '?' if self.baseUrl.find('?') < 0 else '&'
            url = f'{self.baseUrl}threads/{fic.localId}/threadmarks{sep}category_id=1'
            threadmarksHtml = self.scrapeLike(url)
            self.readerSoftScrape(fic)
        except:
            # note: we do this before the theardmarks check for old-style fics
            # soft scrape all thread pages to ensure we have everything
            self.deepSoftScrape(fic)

        postSoups: Dict[str, Any] = {}

        postUrls: List[str] = []
        chapterTitles = {}
        try:
            # scrape the threadmarks page, assuming there is one
            threadmarksSoup = BeautifulSoup(threadmarksHtml, 'html5lib')

            # attempt to extract a fic description
            threadmarkExtraInfo = threadmarksSoup.find(
                'div', {'class': 'threadmarkListingHeader-extraInfo'})
            if threadmarkExtraInfo is not None:
                bbWrapper = threadmarkExtraInfo.find('div',
                                                     {'class': 'bbWrapper'})
                if bbWrapper is not None:
                    desc = bbWrapper.decode_contents()
                    descView = HtmlView(desc, markdown=False)
                    fic.description = ''.join(
                        [f'<p>{l}</p>' for l in descView.text])

            # determine chapter count based on threadmarks
            threadmarkList = threadmarksSoup.find('div',
                                                  {'class': 'threadmarkList'})
            threadmarks = None
            if threadmarkList is not None:
                threadmarks = threadmarkList.find_all(
                    'li', {'class': 'threadmarkListItem'})
            else:
                threadmarkList = threadmarksSoup.find(
                    'div', {'class': 'block-body--threadmarkBody'})
                if threadmarkList is None:
                    raise Exception('error: unable to find threadmark menu')
                if threadmarkList.find(class_='fa-ellipsis-h') is not None:
                    raise Exception('unable to handle elided threamdarks')
                threadmarks = threadmarkList.find_all('li')
                if len(threadmarks) == 0:
                    threadmarks = threadmarkList.find_all('tr')
                util.logMessage(
                    f'XenForo|new threadmarks count|{len(threadmarks)}')

            for threadmark in threadmarks:
                if threadmark.find(
                        'span', {'class': 'message-newIndicator'}) is not None:
                    continue
                a = threadmark.find('a')
                purl = a.get('href')
                if purl.startswith('threads/'):
                    purl = '{}{}'.format(self.baseUrl, purl)
                elif purl.startswith('/threads/'):
                    purl = '{}{}'.format(self.baseUrl, purl[1:])
                postUrls += [purl]

                chapterTitles[len(postUrls)] = a.getText().strip()

            try:
                postSoups, _ = self.getReaderPosts(fic)
            except Exception as ie:
                # FIXME oh boy:
                # https://forum.questionablequesting.com/threads/worm-cyoa-things-to-do-in-brockton-bay-when-youre-a-bored-demigod.1247/reader
                # Reader page says 36 threadmarks, but actual threadmark list says 33
                # First reader page abruptly stops at 27 threadmarks
                util.logMessage(
                    'XenForoAdapter: unable to getReaderPosts: {}\n{}'.format(
                        ie, traceback.format_exc()))
        except Exception as e:
            util.logMessage(
                'XenForoAdapter: unable to parse threadmarks: {}\n{}'.format(
                    e, traceback.format_exc()))
            try:
                postUrls = self.getReaderPostUrls(fic)
                postSoups, chapterTitles = self.getReaderPosts(fic)
            except Exception as ie:
                util.logMessage(
                    'XenForoAdapter: unable to parse reader posts: {}\n{}'.
                    format(ie, traceback.format_exc()))
                postUrls = self.getDeepAuthorPostUrls(fic)
                # if we fallback to here, don't immediately setup postSoups at all;
                # they'll be fetched as needed later

        fic.chapterCount = len(postUrls)

        chapterPosts: List[Optional[str]] = []
        chapterUrls: List[str] = []
        chapterPostIds: List[str] = []

        lastSoupUrl: Optional[str] = None
        lastSoup: Optional[Any] = None

        for purl in postUrls:
            parts = purl.split('#')
            burl = parts[0]
            postId = authorPost.get('id') if len(parts) < 2 else parts[1]

            rawPost = None
            # first try getting the post from the reader pages
            if postId in postSoups and postSoups[postId] is not None:
                rawPost = str(postSoups[postId])
            else:
                # if needed, fallback to grabbing that page from the entire thread
                pageSoup = None
                if lastSoupUrl is not None and lastSoupUrl == burl:
                    pageSoup = lastSoup
                else:
                    pageContent = self.scrapeLike(burl)
                    pageSoup = BeautifulSoup(pageContent, 'html5lib')
                    lastSoupUrl = burl
                    lastSoup = pageSoup
                assert (pageSoup is not None)
                if postId is not None:
                    poss = pageSoup.find_all(self.postContainer,
                                             {'id': postId})
                    if len(poss) != 1:
                        # XenForo2 often has js- prefixed on the actual id attr
                        poss = pageSoup.find_all(self.postContainer,
                                                 {'id': 'js-' + postId})
                    if len(poss) != 1:
                        raise Exception(
                            f'error: cannot find post for chapter {postId}')
                    rawPost = str(poss[0])
                else:
                    rawPost = str(
                        pageSoup.find_all(self.postContainer,
                                          {'class': 'message'})[0])

            chapterPosts += [rawPost]
            chapterUrls += [burl]
            chapterPostIds += [postId]

        fic.wordCount = 0
        fic.published = None
        fic.updated = None

        chapterContents: List[str] = []
        for rawPost in chapterPosts:
            post = BeautifulSoup(rawPost, 'html5lib')
            content = post.find_all(
                'div', {'class': ['messageContent', 'message-content']})
            if len(content) != 1:
                raise Exception('error: cannot find content for chapter post')
            content = content[0]

            lastEditedDivs = content.find_all('div',
                                              {'class': 'message-lastEdit'})
            for lastEditedDiv in lastEditedDivs:
                br = soup.new_tag("br")
                lastEditedDiv.insert_before(br)

            chapterContents += [str(content)]
            fic.wordCount += len(str(content).split())

            uts = self.getPostUpdatedOrPublished(post)

            if fic.published is None:
                fic.published = OilTimestamp(uts)
            fic.updated = OilTimestamp(uts)

        if fic.updated is None:
            raise Exception(
                f'unable to determine updated date: {len(chapterPosts)} {len(postUrls)}'
            )

        fic.upsert()
        for cid in range(fic.chapterCount):
            chapter = fic.chapter(cid + 1)
            chapter.url = chapterUrls[cid]
            chapter.localChapterId = chapterPostIds[cid]
            if (cid + 1) in chapterTitles:
                chapter.title = chapterTitles[(cid + 1)]
            chapter.upsert()

            chapter.setHtml(str(chapterContents[cid]))

        # TODO: word count, published, updated can only be found once all chapters

        # each post is inside an li id="post-{number}" class="message"
        # each post has data-author="{author}"

        self.updateTitle(fic)

        return fic
Пример #4
0
	def parseInfoInto(self, fic: Fic, html: str) -> Fic:
		from bs4 import BeautifulSoup
		soup = BeautifulSoup(html, 'html.parser')

		fic.fetched = OilTimestamp.now()
		fic.languageId = Language.getId("English")  # TODO: don't hard code?

		titleHeadings = soup.findAll('h2', {'class': 'title heading'})
		if len(titleHeadings) != 1:
			raise Exception('unable to find ao3 title {}'.format(fic.url))
		fic.title = titleHeadings[0].get_text().strip()

		summaryModules = soup.findAll('div', {'class': 'summary module'})
		if len(summaryModules) != 1:
			prefaceGroups = soup.findAll('div', {'class': 'preface group'})
			if len(prefaceGroups) == 1:
				summaryModules = prefaceGroups[0].findAll(
					'div', {'class': 'summary module'}
				)

		if len(summaryModules) == 1:
			summaryBq = summaryModules[0].find('blockquote')
			fic.description = summaryBq.decode_contents(formatter='html').strip()
		elif fic.description is None:
			fic.description = "{no summary}"
			# raise Exception('unable to find ao3 summary {}'.format(fic.localId))

		fic.ageRating = '<unkown>'

		# TODO: error handling
		cText = ' '.join(soup.find('dd', {'class': 'chapters'}).contents).strip()
		ps = cText.split('/')
		completedChapters = int(ps[0])
		totalChapters = None if ps[1] == '?' else int(ps[1])
		fic.chapterCount = completedChapters

		wText = ' '.join(soup.find('dd', {'class': 'words'}).contents).strip()
		fic.wordCount = int(wText)

		fic.reviewCount = 0

		fic.favoriteCount = 0
		kDefinition = soup.find('dd', {'class': 'kudos'})
		if kDefinition is not None:
			kText = ' '.join(kDefinition.contents).strip()
			fic.favoriteCount = int(kText)

		fic.followCount = 0

		pText = ' '.join(soup.find('dd', {'class': 'published'}).contents).strip()
		publishedUts = util.parseDateAsUnix(pText, fic.fetched)
		fic.published = OilTimestamp(publishedUts)

		if fic.updated is None:
			fic.updated = fic.published
		if fic.updated is not None:
			updatedUts = util.parseDateAsUnix(fic.updated, fic.fetched)
			fic.updated = OilTimestamp(updatedUts)

		fic.ficStatus = FicStatus.ongoing  # TODO chapter/chapters?

		if totalChapters is None or completedChapters < totalChapters:
			fic.ficStatus = FicStatus.ongoing

		statusDt = soup.find('dt', {'class': 'status'})
		if statusDt is not None:
			if statusDt.contents[0] == 'Completed:':
				fic.ficStatus = FicStatus.complete
				cText = ' '.join(soup.find('dd', {'class': 'status'}).contents).strip()
				updatedUts = util.parseDateAsUnix(cText, fic.fetched)
				fic.updated = OilTimestamp(updatedUts)
			elif statusDt.contents[0] == 'Updated:':
				fic.ficStatus = FicStatus.ongoing
				uText = ' '.join(soup.find('dd', {'class': 'status'}).contents).strip()
				updatedUts = util.parseDateAsUnix(uText, fic.fetched)
				fic.updated = OilTimestamp(updatedUts)
			else:
				raise Exception('unkown status: {}'.format(statusDt.contents[0]))

		byline = soup.find('h3', {'class': 'byline heading'})
		authorLink = byline.find('a')
		if authorLink is None:
			if fic.authorId is not None and len(fic.getAuthorName()) > 0:
				pass  # updated author to anon, don't make changes
			else:
				# first loaded after it was already set to anonymous
				authorUrl = ''
				author = 'Anonymous'
				authorId = 'Anonymous'
				self.setAuthor(fic, author, authorUrl, authorId)
		else:
			authorUrl = authorLink.get('href')
			author = ' '.join(byline.find('a').contents)
			authorId = author  # map pseudo to real?
			self.setAuthor(fic, author, authorUrl, authorId)

		if fic.chapterCount > 1:
			fic.upsert()
			localChapterIdSelect = soup.find(id='selected_id').findAll('option')
			# note: ao3 sometimes says there are less chapters than there really
			# are, possibly due to caching on their end. We just ensure there's _at
			# least_ chapterCount chapters, then fetch whatever the dropdown tells
			# us to
			if len(localChapterIdSelect) > fic.chapterCount:
				fic.chapterCount = len(localChapterIdSelect)
				fic.upsert()
			if len(localChapterIdSelect) != fic.chapterCount:
				raise Exception('mismatching localChapterId count?')

			for cid in range(1, fic.chapterCount + 1):
				chap = fic.chapter(cid)
				chap.url = '{}{}/chapters/{}?view_adult=true'.format(
					self.baseUrl, fic.localId, localChapterIdSelect[cid - 1].get('value')
				)
				chap.localChapterId = localChapterIdSelect[cid - 1].get('value')
				chap.title = localChapterIdSelect[cid - 1].getText().strip()
				if chap.title is not None:
					chap.title = util.cleanChapterTitle(chap.title, cid)
				chap.upsert()

		fandomDd = soup.find('dd', {'class': 'fandom tags'})
		if fandomDd is not None:
			fandomTags = fandomDd.findAll('a', {'class': 'tag'})
			for ft in fandomTags:
				originalF = ft.contents[0].strip()
				f = originalF.lower()
				# TODO: this seriously needs reworked
				if (
					(f.startswith("harry potter ") and f.endswith("rowling"))
					or f == 'harry potter - fandom'
					or f == 'fantastic beasts and where to find them (movies)'
					or f == 'harry potter next generation - fandom'
				):
					fic.add(Fandom.define('Harry Potter'))
				elif (
					f == 'sherlock - fandom' or f == 'sherlock (tv)'
					or f == 'sherlock holmes & related fandoms'
					or f == 'sherlock holmes - arthur conan doyle'
					or f == 'sherlock holmes (downey films)'
				):
					fic.add(Fandom.define('Sherlock Holmes'))
				elif f == 'furry (fandom)' or f == 'harry - fandom':
					continue  # skip
				elif f == 'fleurmione - fandom':
					continue  # skip
				elif f == 'skyfall (2012) - fandom':
					fic.add(Fandom.define('James Bond'))
				elif f == 'orphan black (tv)':
					fic.add(Fandom.define('Orphan Black'))
				elif (
					f == 'naruto' or f == 'naruto shippuden'
					or f == 'naruto shippuuden - fandom'
				):
					fic.add(Fandom.define('Naruto'))
				elif f == 'naruto/harry potter':
					fic.add(Fandom.define('Naruto'))
					fic.add(Fandom.define('Harry Potter'))
				elif f == 'bleach':
					fic.add(Fandom.define('Bleach'))
				elif (
					f == 'iron man (movies)' or f == 'iron man - all media types'
					or f == 'iron man (comic)' or f == 'iron man - fandom'
					or f == 'iron man (comics)'
				):
					fic.add(Fandom.define('Iron Man'))
				elif (
					f == 'the avengers (marvel) - all media types'
					or f == 'the avengers (marvel movies)'
					or f == 'the avengers - ambiguous fandom'
					or f == 'the avengers (2012)' or f == 'the avengers'
					or f == 'avengers (marvel) - all media types'
					or f == 'marvel avengers movies universe' or f == 'avengers'
				):
					fic.add(Fandom.define('Avengers'))
				elif f == 'marvel 616':
					fic.add(Fandom.define('Marvel'))
					fic.add(Fandom.define('Marvel 616'))
				elif f == 'thor (movies)' or f == 'thor - all media types':
					fic.add(Fandom.define('Thor'))
				elif (
					f == 'captain america (movies)'
					or f == 'captain america - all media types'
					or f == 'captain america (comics)'
				):
					fic.add(Fandom.define('Captain America'))
				elif (
					f == 'avatar: the last airbender' or f == 'avatar: legend of korra'
					or f == 'avatar the last airbender - fandom'
				):
					fic.add(Fandom.define('Avatar'))
				elif f == 'original work':
					fic.add(Fandom.define('Original Work'))
				elif f == 'stargate atlantis':
					fic.add(Fandom.define('Stargate Atlantis'))
				elif f == 'stargate sg-1':
					fic.add(Fandom.define('Stargate SG-1'))
				elif f == 'stargate - all series':
					fic.add(Fandom.define('Stargate Atlantis'))
					fic.add(Fandom.define('Stargate SG-1'))
				elif f == 'agents of s.h.i.e.l.d. (tv)':
					fic.add(Fandom.define('Avengers'))
				elif f == 'supernatural':
					fic.add(Fandom.define('Supernatural'))
				elif f == 'teen wolf (tv)':
					fic.add(Fandom.define('Teen Wolf'))
				elif f == 'grimm (tv)':
					fic.add(Fandom.define('Grimm'))
				elif (
					f == 'the amazing spider-man (movies - webb)'
					or f == 'spider-man - all media types'
					or f == 'spider-man: homecoming (2017)'
				):
					fic.add(Fandom.define('Spiderman'))
				elif (
					f == 'x-men - all media types' or f == 'x-men (movieverse)'
					or f == 'x-men (comicverse)'
				):
					fic.add(Fandom.define('X-Men'))
				elif (
					f == 'lord of the rings - j. r. r. tolkien'
					or f == 'the lord of the rings - j. r. r. tolkien'
				):
					fic.add(Fandom.define('Lord of the Rings'))
				elif (
					f == 'crisis core: final fantasy vii'
					or f == 'compilation of final fantasy vii' or f == 'final fantasy vii'
				):
					fic.add(Fandom.define('Final Fantasy VII'))
					fic.add(Fandom.define('Final Fantasy'))
				elif f == 'sen to chihiro no kamikakushi | spirited away':
					fic.add(Fandom.define('Spirited Away'))
				elif f == 'howl no ugoku shiro | howl\'s moving castle':
					fic.add(Fandom.define('Howl\'s Moving Castle'))
				elif f == 'rise of the guardians (2012)':
					fic.add(Fandom.define('Rise of the Guardians'))
				elif (
					f == 'doctor who' or f == 'doctor who (2005)'
					or f == 'doctor who & related fandoms'
				):
					fic.add(Fandom.define('Doctor Who'))
				elif f == 'daredevil (tv)' or f == 'daredevil (comics)':
					fic.add(Fandom.define('DareDevil'))
				elif f == 'labyrinth (1986)':
					fic.add(Fandom.define('Labyrinth'))
				elif f == 'gravity falls':
					fic.add(Fandom.define('Gravity Falls'))
				elif f == 'once upon a time (tv)':
					fic.add(Fandom.define('Once Upon a Time'))
				elif f == 'doctor strange (comics)':
					fic.add(Fandom.define('Doctor Strange'))
				elif f == 'the sentinel':
					fic.add(Fandom.define('The Sentinel'))
				elif f == 'teen titans (animated series)':
					fic.add(Fandom.define('Teen Titans'))
				elif (
					f == 'dcu' or f == 'dcu animated' or f == 'dcu (comics)'
					or f == 'dc extended universe' or f == 'dc animated universe'
				):
					fic.add(Fandom.define('DC'))
				elif f == 'vampire hunter d':
					fic.add(Fandom.define('Vampire Hunter D'))
				elif f == 'homestuck':
					fic.add(Fandom.define('Homestuck'))
				elif f == 'one piece':
					fic.add(Fandom.define('One Piece'))
				elif f == 'batman (movies - nolan)':
					fic.add(Fandom.define('Batman'))
				elif f == 'die hard (movies)':
					fic.add(Fandom.define('Die Hard'))
				elif f == 'discworld - terry pratchett':
					fic.add(Fandom.define('Discworld'))
				elif f == 'gossip girl':
					fic.add(Fandom.define('Gossip Girl'))
				elif (
					f == 'a song of ice and fire - george r. r. martin'
					or f == 'a song of ice and fire & related fandoms'
				):
					fic.add(Fandom.define('A Song of Ice and Fire'))
				elif f == 'supergirl (tv 2015)':
					fic.add(Fandom.define('Supergirl'))
				elif f == 'merlin (tv)':
					fic.add(Fandom.define('Merlin'))
				elif f == 'star trek':
					fic.add(Fandom.define('Star Trek'))
				elif f == 'steven universe (cartoon)':
					fic.add(Fandom.define('Steven Universe'))
				elif f == 'hellsing':
					fic.add(Fandom.define('Hellsing'))
				elif f == 'the breaker':
					fic.add(Fandom.define('The Breaker'))
				elif f == 'smallville':
					fic.add(Fandom.define('Smallville'))
				elif f == '베리타스 | veritas (manhwa)':
					fic.add(Fandom.define('Veritas (manhwa)'))
				elif f == 'guardians of childhood - william joyce':
					fic.add(Fandom.define('Guardians of Childhood'))
				elif f == 'person of interest (tv)':
					fic.add(Fandom.define('Person of Interest'))
				elif f == 'james bond (craig movies)':
					fic.add(Fandom.define('James Bond'))
				elif f == 'the bourne legacy (2012)':
					fic.add(Fandom.define('Jason Bourne'))
				elif f == 'numb3rs':
					fic.add(Fandom.define('Numb3rs'))
				elif f == 'temeraire - naomi novik':
					fic.add(Fandom.define('Temeraire'))
				elif f == 'twilight series - stephenie meyer':
					fic.add(Fandom.define('Twilight'))
				elif f == 'dungeons and dragons - fandom':
					fic.add(Fandom.define('Dungeons and Dragons'))
				elif f == 'american horror story' or f == 'american horror story: cult':
					fic.add(Fandom.define('American Horror Story'))
				elif (
					f == 'worm (web serial novel)' or f == 'worm - wildbow'
					or f == 'parahumans series - wildbow'
					or f == 'worm (web serial) | wildbow' or f == 'worm - fandom'
					or f == 'parahumans - fandom' or f == 'worm (parahumans)'
					or f == 'worm (web serial)' or f == 'worm | parahumans'
					or f == 'worm (web novel)'
				):
					fic.add(Fandom.define('Worm'))
				elif f == 'toaru kagaku no railgun | a certain scientific railgun':
					fic.add(Fandom.define('A Certain Scientific Railgun'))
				elif f == 'toaru majutsu no index | a certain magical index':
					fic.add(Fandom.define('A Certain Magical Index'))
				elif f == 'cthulhu mythos - h. p. lovecraft':
					fic.add(Fandom.define('Cthulhu'))
				elif f == 'transformers - all media types':
					fic.add(Fandom.define('Transformers'))
				elif f == 'destiny (video game)':
					fic.add(Fandom.define('Destiny'))
				elif f == 'fandom - fandom' or f == 'meta - fandom':
					pass  # >_>
				elif f == 'house m.d.':
					fic.add(Fandom.define('House, M.D.'))
				elif f == 'the hobbit (jackson movies)':
					fic.add(Fandom.define('The Hobbit'))
				elif f == 'doctor strange (2016)':
					fic.add(Fandom.define('Doctor Strange'))
				elif f == 'arrow (tv 2012)':
					fic.add(Fandom.define('Arrow'))
				elif f == 'the flash (tv 2014)':
					fic.add(Fandom.define('Flash'))
				elif f == 'senki zesshou symphogear':
					fic.add(Fandom.define('Symphogear'))
				elif (
					f == 'fullmetal alchemist: brotherhood & manga'
					or f == 'fullmetal alchemist - all media types'
					or f == 'fullmetal alchemist (anime 2003)'
				):
					fic.add(Fandom.define('Fullmetal Alchemist'))
				elif (
					f == 'star wars - all media types'
					or f == 'star wars episode vii: the force awakens (2015)'
					or f == 'star wars prequel trilogy'
				):
					fic.add(Fandom.define('Star Wars'))
				elif (
					f == 'guardians of the galaxy (2014)'
					or f == 'guardians of the galaxy - all media types'
					or f == 'guardians of the galaxy (movies)'
				):
					fic.add(Fandom.define('Guardians of the Galaxy'))
				elif f == 'ant man (2015)' or f == 'ant-man (movies)':
					fic.add(Fandom.define('Ant Man'))
				elif f == 'the defenders (marvel tv)':
					fic.add(Fandom.define('The Defenders'))
				elif f == 'elementary (tv)':
					fic.add(Fandom.define('Elementary'))
				elif f == 'good omens - neil gaiman & terry pratchett':
					fic.add(Fandom.define('Good Omens'))
				elif f == 'danny phantom':
					fic.add(Fandom.define('Danny Phantom'))
				elif f == 'katekyou hitman reborn!':
					fic.add(Fandom.define('Katekyo Hitman Reborn!'))
				elif f == 'welcome to night vale':
					fic.add(Fandom.define('Welcome to Night Vale'))
				elif f == 'ncis':
					fic.add(Fandom.define('NCIS'))
				elif f == 'torchwood':
					fic.add(Fandom.define('Torchwood'))
				elif f == 'magic: the gathering':
					fic.add(Fandom.define('Magic: The Gathering'))
				elif f == 'overwatch (video game)':
					fic.add(Fandom.define('Overwatch'))
				elif f == 'detroit: become human (video game)':
					fic.add(Fandom.define('Detroit: Become Human'))
				elif f == 'greek and roman mythology':
					pass
				elif f == 'life is strange (video game)':
					fic.add(Fandom.define('life is strange (video game)'))
				elif f == 'akatsuki no yona | yona of the dawn':
					fic.add(Fandom.define('Yona of the Dawn'))
				elif f == '僕のヒーローアカデミア | boku no hero academia | my hero academia':
					fic.add(Fandom.define('My Hero Academia'))
				elif f == 'voltron: legendary defender':
					fic.add(Fandom.define('Voltron'))
				elif f == 'selfie (tv)':
					fic.add(Fandom.define('Selfie'))
				elif f == 'suits (tv)':
					fic.add(Fandom.define('Suits'))
				elif f == 'fruits basket':
					fic.add(Fandom.define('Fruits Basket'))
				elif f == 'hetalia: axis powers':
					fic.add(Fandom.define('Hetalia: Axis Powers'))
				elif f == 'carmilla (web series)':
					fic.add(Fandom.define('Carmilla'))
				elif f == 'the dresden files - jim butcher':
					fic.add(Fandom.define('Dresden Files'))
				elif f == 'girl genius':
					fic.add(Fandom.define('Girl Genius'))
				elif f == 'unspecified fandom':
					pass  # TODO?
				elif f == 'nightwing (comics)':
					fic.add(Fandom.define('Nightwing'))
				elif f == 'books of the raksura - martha wells':
					fic.add(Fandom.define('Books of the Raksura'))
				elif f == 'fall of ile-rien - martha wells':
					fic.add(Fandom.define('Fall of Ile-Rien'))
				elif f == 'vorkosigan saga - lois mcmaster bujold':
					fic.add(Fandom.define('Vorkosigan Saga'))
				elif (
					f == 'highlander: the series' or f == 'highlander - all media types'
				):
					fic.add(Fandom.define('Highlander'))
				elif f == 'yoroiden samurai troopers | ronin warriors':
					fic.add(Fandom.define('Ronin Warriors'))
				elif f == 'hockey rpf':
					fic.add(Fandom.define('Hockey RPF'))
				elif f == 'pacific rim (2013)':
					fic.add(Fandom.define('Pacific Rim'))
				elif f == 'enchanted forest chronicles - patricia wrede':
					fic.add(Fandom.define('Enchanted Forest Chronicles'))
				elif f == 'tortall - tamora pierce':
					fic.add(Fandom.define('Tortall'))
				elif f == 'protector of the small - tamora pierce':
					fic.add(Fandom.define('Protector of the Small'))
				elif f == 'leverage':
					fic.add(Fandom.define('Leverage'))
				elif f == 'valdemar series - mercedes lackey':
					fic.add(Fandom.define('Valdemar Series'))
				elif (
					f == 'b.p.r.d.' or f == 'bureau for paranormal research and defense'
				):
					fic.add(Fandom.define('B.P.R.D.'))
				elif f == 'hellboy (comic)':
					fic.add(Fandom.define('Hellboy'))
				elif f == 'sga/avatar':
					fic.add(Fandom.define('Stargate Atlantis'))
					fic.add(Fandom.define('Avatar'))
				elif f == 'annihilation (2018 garland)':
					fic.add(Fandom.define('Annihilation'))
				elif f == 'craft sequence - max gladstone':
					fic.add(Fandom.define('Craft Sequence'))
				elif f == 'the good place (tv)':
					fic.add(Fandom.define('The Good Place'))
				elif f == 'jessica jones (tv)':
					fic.add(Fandom.define('Jessica Jones'))
				elif f == 'mad max series (movies)':
					fic.add(Fandom.define('Mad Max'))
				elif f == 'american gods (tv)':
					fic.add(Fandom.define('American Gods'))
				elif f == 'terminator: the sarah connor chronicles':
					fic.add(Fandom.define('Terminator: The Sarah Connor Chronicles'))
					fic.add(Fandom.define('Terminator'))
				elif f == 'wolf 359 (radio)':
					fic.add(Fandom.define('Wolf 359'))
				elif f == 'shadowrun: dragonfall':
					fic.add(Fandom.define('Shadowrun'))
				elif f == 'ars paradoxica (podcast)':
					fic.add(Fandom.define('Ars Paradoxica'))
				elif f == 'love is strange - fandom':
					fic.add(Fandom.define('Love is Strange'))
				elif f == 'dune - all media types':
					fic.add(Fandom.define('Dune'))
				elif f == 'dragon age: origins':
					fic.add(Fandom.define('Dragon Age: Origins'))
				elif f == 'game of thrones (tv)':
					fic.add(Fandom.define('Game of Thrones'))
				elif f == 'chronicles of amber - roger zelazny':
					fic.add(Fandom.define('Chronicles of Amber'))
				elif f == 'the southern reach trilogy - jeff vandermeer':
					fic.add(Fandom.define('The Southern Reach Trilogy'))
				elif f == 'continuum (tv)':
					fic.add(Fandom.define('Continuum'))
				elif f == 'mage: the ascension':
					fic.add(Fandom.define('Mage: The Ascension'))
				elif f == 'the good wife (tv)' or f == 'good wife (tv)':
					fic.add(Fandom.define('The Good Wife'))
				elif f == 'alliance-union - c. j. cherryh':
					fic.add(Fandom.define('Alliance-Union'))
				elif f == 'indexing - seanan mcguire':
					fic.add(Fandom.define('Indexing'))
				elif f == 'ultraviolet (tv)':
					fic.add(Fandom.define('Ultraviolet'))
				elif f == 'veronica mars (tv)':
					fic.add(Fandom.define('Veronica Mars'))
				elif f == 'secret circle (tv)':
					fic.add(Fandom.define('Secret Circle'))
				elif f == 'mahou shoujo madoka magika | puella magi madoka magica':
					fic.add(Fandom.define('Madoka Magica'))
				elif f == 'agent carter (tv)':
					fic.add(Fandom.define('Agent Carter'))
				elif f == 'dracula & related fandoms':
					fic.add(Fandom.define('Dracula'))
				elif f == 'dragon ball':
					fic.add(Fandom.define('Dragon Ball'))
				elif f == 'mass effect - all media types':
					fic.add(Fandom.define('Mass Effect'))
				elif f == 'firefly' or f == 'serenity (2005)':
					fic.add(Fandom.define('Firefly'))
				else:
					anyHere = False
					global ao3FandomsMap
					for fm in ao3FandomsMap:
						here = False
						for uf in fm[0]:
							if f == uf.lower().strip():
								here = True
								break
						if not here:
							continue
						anyHere = True
						for mf in fm[1]:
							fic.add(Fandom.define(mf))
					if not anyHere:
						util.logMessage(f'ao3|unknown fandom|{fic.url}|{originalF}')
						#raise Exception('unknown fandom: {} "{}"'.format(fic.url, originalF))

		ourDoms = fic.fandoms()
		# we have a canonical fandom, try to find our characters
		if len(ourDoms) == 1:
			relationshipDd = soup.find('dd', {'class': 'relationship tags'})
			if relationshipDd is not None:
				relationshipTags = relationshipDd.findAll('a', {'class': 'tag'})
				for rt in relationshipTags:
					r = rt.contents[0]
					chars = r.split('/')
					if len(chars) > 8:  # TODO: sometimes more?
						raise Exception('unable to parse relationship: {}'.format(r))
					for char in chars:
						fic.add(Character.defineInFandom(ourDoms[0], char, self.ftype))

		return fic