Пример #1
0
def v0_fic_all(urlId: str) -> Any:
    fics = Fic.select({'urlId': urlId})
    if len(fics) != 1:
        return Err.urlId_not_found.get()
    fic = fics[0]
    if fic.chapterCount is None:
        print(f'err: fic has no chapter count: {fic.id}')
        return Err.urlId_not_found.get()
    ficChapters = {
        fc.chapterId: fc
        for fc in FicChapter.select({'ficId': fic.id})
    }
    chapters = {}
    for cid in range(1, fic.chapterCount + 1):
        if cid not in ficChapters:
            return Err.cid_not_found.get({'arg': f'{fic.id}/{cid}'})
        chapter = ficChapters[cid]
        cres = chapter.toJSONable()
        try:
            content = cres['content']
            if content is not None:
                content = util.decompress(content)
                content = scrape.decodeRequest(content, f'{fic.id}/{cid}')
                content = cleanHtml(content)
                if content != cleanHtml(content):
                    print(
                        f'v0_fic_all: {fic.id}/{cid} did not round-trip through cleanHtml'
                    )
            cres['content'] = content
            chapters[cid] = cres
        except:
            pass

    res = fic.toJSONable()
    return Err.ok({'info': res, 'chapters': chapters})
Пример #2
0
    def get(self, localId: str) -> Fic:
        existing = Fic.select({'sourceId': self.ftype, 'localId': localId})
        if len(existing) == 1:
            return existing[0]

        fic = Fic.new()
        fic.sourceId = self.ftype
        fic.localId = localId
        fic.created = OilTimestamp.now()
        return self.create(fic)
Пример #3
0
	def getFromZList(self, localId: int, ts: int, html: str) -> Fic:
		fic = None
		existing = Fic.select({'sourceId': self.ftype, 'localId': str(localId)})
		if len(existing) != 1:
			fic = Fic.new()
			fic.sourceId = self.ftype
			fic.localId = str(localId)
			fic.created = OilTimestamp.now()
		else:
			fic = existing[0]
		return self.createFromZList(fic, ts, html)
Пример #4
0
	def get(self, localId: str) -> Fic:
		existing = Fic.select({'sourceId': self.ftype, 'localId': localId})
		if len(existing) == 1:
			return existing[0]
		if not self.cacheable:
			raise Exception('cannot cache {}/{}'.format(localId, self.ftype))

		fic = Fic.new()
		fic.sourceId = self.ftype
		fic.localId = localId
		fic.created = OilTimestamp.now()
		return self.create(fic)
Пример #5
0
	def tryParseUrl(self, url: str) -> Optional[FicId]:
		# by default, we simply try to look up the url in existing chapters or fics
		chaps = FicChapter.select({'url': url})
		if len(chaps) == 1:
			fic = Fic.get((chaps[0].ficId, ))
			if fic is not None:
				return FicId(
					FicType(fic.sourceId), fic.localId, chaps[0].chapterId, False
				)

		fics = Fic.select({'url': url})
		if len(fics) == 1:
			return FicId(FicType(fics[0].sourceId), fics[0].localId)

		raise NotImplementedError()
Пример #6
0
def v0_cache(urlId: str) -> Any:
    fics = Fic.select({'urlId': urlId})
    if len(fics) != 1:
        return Err.urlId_not_found.get()
    fic = fics[0]
    if fic.chapterCount is None:
        print(f'err: fic has no chapter count: {fic.id}')
        return Err.urlId_not_found.get()
    for cid in range(1, fic.chapterCount + 1):
        try:
            chapter = fic.chapter(cid)
            chapter.cache()
        except Exception as e:
            return Err.failed_to_cache_cid.get({'arg': f'{fic.id}/{cid}'})

    return Err.ok(fic.toJSONable())
Пример #7
0
def populateManualTemplate(url, chapterUrls, author):
	existingManual = Fic.select({'type': FicType.manual})
	lid = len(existingManual) + 1

	manRename = {'id': None}
	manDefaults = {
		'fandoms': [],
		'characters': [],
		'tags': [],
		'genres': [],
		'authorUrl': url,
		'author': author,
		'authorId': author,
		'ageRating': 'M',
		'language': 'English',
		'favorites': 0,
		'follows': 0,
		'reviews': 0,
		'url': url,
		'lastUrl': url,
		'type': FicType.manual,
		'lid': lid,
		'ficStatus': Status.complete,
		'wordCount': -1,
		'description': 'FILL IN MY DESCRIPTION',
		'title': 'FILL IN MY TITLE',
		'published': 'FILL IN MY PUBLISHED DATE',
		'updated': 'FILL IN MY UPDATED DATE',
		'added': int(time.time()),
		'fetched': int(time.time())
	}

	fic = Fic.new().__dict__
	fic = inflateObject(fic, manRename, manDefaults)

	fic['chapters'] = {}
	fic['chapterCount'] = len(chapterUrls)

	for cid in range(1, len(chapterUrls) + 1):
		fic['chapters'][cid] = {
			'lastModified': int(time.time()),
			'status': Status.ongoing,
			'fetched': int(time.time()),
			'url': chapterUrls[cid - 1],
		}

	return fic
Пример #8
0
	def tryParseUrl(self, url: str) -> Optional[FicId]:
		if not url.startswith(self.baseUrl):
			return None

		# by default, we simply try to look up the url in existing chapters or fics
		chaps = FicChapter.select({'url': url})
		if len(chaps) == 1:
			fic = Fic.get((chaps[0].ficId, ))
			if fic is not None:
				ftype = FicType(fic.sourceId)
				return FicId(ftype, fic.localId, chaps[0].chapterId, False)

		fics = Fic.select({'url': url})
		if len(fics) == 1:
			ftype = FicType(fics[0].sourceId)
			return FicId(ftype, fics[0].localId)

		leftover = url[len(self.baseUrl):]
		if not leftover.endswith('.html'):
			return None

		ps = leftover.split('/')
		if len(ps) != 3 or ps[0] != 'authors':
			return None

		author = ps[1]
		storyId = ps[2]
		suffixes = ['01a.html', '.html']
		for suffix in suffixes:
			if storyId.endswith(suffix):
				storyId = storyId[:-len(suffix)]

		# note: seems to be safe to lowercase these
		lid = (author + '/' + storyId).lower()
		#print(lid)
		# make lid author/story ?

		# TODO: we need some sort of local lid mapping...
		raise NotImplementedError()
Пример #9
0
def dumpDB():
	data = {}

	fandomMap = {f.id: f for f in Fandom.select()}
	characterMap = {c.id: c for c in Character.select()}
	genreMap = {g.id: g for g in Genre.select()}
	tagMap = {t.id: t for t in Tag.select()}

	data['fandoms'] = [fandomMap[k].name for k in fandomMap]
	data['characters'] = [
		{
			'name': characterMap[k].name,
			'fandom': fandomMap[characterMap[k].fandom_id].name
		} for k in characterMap
	]
	data['genres'] = [genreMap[k].name for k in genreMap]
	data['tags'] = [tagMap[k].name for k in tagMap]

	data['fics'] = {}

	frename = {'id': None, 'chapters': 'chapterCount'}
	crename = {
		'id': None,
		'ficId': None,
		'cid': None,
		'raw': None,
		'fic': None,
		'lastLine': None
	}
	cdefaults = {
		'line': 0,
		'subLine': 0,
		'notes': None,
		'status': Status.ongoing,
		'fetched': None,
		'url': None
	}

	fics = Fic.select()
	for fic in fics:
		k = '{}/{}'.format(fic.type, fic.localId)
		o = fic.__dict__.copy()
		o = deflateObject(o, frename)

		o['fandoms'] = [f.name for f in fic.fandoms()]
		o['characters'] = [
			{
				'name': c.name,
				'fandom': fandomMap[c.fandom_id].name
			} for c in fic.characters()
		]
		o['tags'] = [t.name for t in fic.tags()]
		o['genres'] = [g.name for g in fic.genres()]

		co = {}
		ficChapters = FicChapter.select({'ficId': fic.id})
		for chapter in ficChapters:
			here = chapter.__dict__.copy()
			ffNetUrl = 'https://www.fanfiction.net/s/{}/{}/{}'.format(
				fic.localId, chapter.chapterId, util.urlTitle(fic.title)
			)
			cdefaults['url'] = ffNetUrl
			cdefaults['lastModified'] = here['fetched']
			here = deflateObject(here, crename, cdefaults)

			co[chapter.chapterId] = here
			if chapter.raw is None:
				continue

			contentPath = './content/{}/{}/{}/'.format(
				fic.type, fic.localId, chapter.chapterId
			)
			if not os.path.isdir(contentPath):
				os.makedirs(contentPath)
			with open(contentPath + 'content.html', 'w') as f:
				f.write(chapter.content())

		o['chapters'] = co

		data['fics'][k] = o

	return data
Пример #10
0
	def parseInfoInto(self, fic: Fic, wwwHtml: str) -> Fic:
		from bs4 import BeautifulSoup  # type: ignore
		deletedFicTexts = [
			# probably deleted by user
			'Story Not FoundUnable to locate story. Code 1.',
			# probably deleted by admin
			'Story Not FoundUnable to locate story. Code 2.',
			# unknown
			'Story Not FoundStory is unavailable for reading. (A)',
		]
		soup = BeautifulSoup(wwwHtml, 'html5lib')
		profile_top = soup.find(id='profile_top')
		# story might've been deleted
		if profile_top is None:
			gui_warnings = soup.find_all('span', {'class': 'gui_warning'})
			for gui_warning in gui_warnings:
				for deletedFicText in deletedFicTexts:
					if gui_warning.get_text() == deletedFicText:
						if fic.ficStatus != FicStatus.complete:
							fic.ficStatus = FicStatus.abandoned
						fic.upsert()
						return fic

		text = profile_top.get_text()
		pt_str = str(profile_top)

		fic.fetched = OilTimestamp.now()
		fic.languageId = Language.getId("English")  # TODO: don't hard code?

		for b in profile_top.find_all('b'):
			b_class = b.get('class')
			if len(b_class) == 1 and b_class[0] == 'xcontrast_txt':
				fic.title = b.get_text()
				break
		else:
			raise Exception('error: unable to find title:\n{}\n'.format(pt_str))

		fic.url = self.constructUrl(fic.localId, 1, fic.title)

		descriptionFound = False
		for div in profile_top.find_all('div'):
			div_class = div.get('class')
			if (
				div.get('style') == 'margin-top:2px' and len(div_class) == 1
				and div_class[0] == 'xcontrast_txt'
			):
				fic.description = div.get_text()
				descriptionFound = True
				break
		if descriptionFound == False:
			raise Exception('error: unable to find description:\n{}\n'.format(pt_str))

		# default optional fields
		fic.reviewCount = 0
		fic.favoriteCount = 0
		fic.followCount = 0

		# TODO we should match this only on the section following the description
		matcher = RegexMatcher(
			text, {
				'ageRating': ('Rated:\s+Fiction\s*(\S+)', str),
				'chapterCount?': ('Chapters:\s+(\d+)', int),
				'wordCount': ('Words:\s+(\S+)', int),
				'reviewCount?': ('Reviews:\s+(\S+)', int),
				'favoriteCount?': ('Favs:\s+(\S+)', int),
				'followCount?': ('Follows:\s+(\S+)', int),
				'updated?': ('Rated:.*Updated:\s+(\S+)', str),
				'published': ('Published:\s+([^-]+)', str),
			}
		)
		matcher.matchAll(fic)

		if fic.published is not None:
			publishedUts = util.parseDateAsUnix(fic.published, fic.fetched)
			fic.published = OilTimestamp(publishedUts)

		if fic.updated is None:
			fic.updated = fic.published
		elif fic.updated is not None:
			updatedUts = util.parseDateAsUnix(fic.updated, fic.fetched)
			fic.updated = OilTimestamp(updatedUts)

		if fic.chapterCount is None:
			fic.chapterCount = 1

		match = re.search(
			'(Rated|Chapters|Words|Updated|Published):.*Status:\s+(\S+)', text
		)
		if match is None:
			fic.ficStatus = FicStatus.ongoing
		else:
			status = match.group(2)
			if status == 'Complete':
				fic.ficStatus = FicStatus.complete
			else:
				raise Exception('unknown status: {}: {}'.format(fic.url, status))

		for a in profile_top.find_all('a'):
			a_href = a.get('href')
			if a_href.startswith('/u/'):
				author = a.get_text()
				authorUrl = self.baseUrl + a_href
				authorId = a_href.split('/')[2]
				self.setAuthor(fic, author, authorUrl, authorId)
				break
		else:
			raise Exception('unable to find author:\n{}'.format(text))

		preStoryLinks = soup.find(id='pre_story_links')
		preStoryLinksLinks = []
		if preStoryLinks is not None:
			preStoryLinksLinks = preStoryLinks.find_all('a')
		pendingFandoms: List[Fandom] = []
		for a in preStoryLinksLinks:
			href = a.get('href')
			hrefParts = href.split('/')

			# if it's a top level category
			if (
				len(hrefParts) == 3 and len(hrefParts[0]) == 0
				and len(hrefParts[2]) == 0
			):
				cat = hrefParts[1]
				if cat in ffNetFandomCategories:
					continue  # skip categories
				raise Exception('unknown category: {}'.format(cat))

			# if it's a crossover /Fandom1_and_Fandm2_Crossovers/f1id/f2id/
			if (
				len(hrefParts) == 5 and hrefParts[1].endswith("_Crossovers")
				and len(hrefParts[0]) == 0 and len(hrefParts[4]) == 0
			):
				fIds = [int(hrefParts[2]), int(hrefParts[3])]
				pendingFandoms += self.handleCrossoverFandom(
					fic, hrefParts[1], fIds, href
				)
				continue

			# if it's a regular fandom in some category
			if (
				len(hrefParts) == 4 and len(hrefParts[0]) == 0
				and len(hrefParts[3]) == 0
			):
				# ensure category is in our map
				if hrefParts[1] not in ffNetFandomCategories:
					raise Exception('unknown category: {}'.format(hrefParts[1]))

				pendingFandoms += self.handleFandom(fic, hrefParts[2])
				continue

			util.logMessage('unknown fandom {0}: {1}'.format(fic.id, href))

		fic.upsert()
		poss = Fic.select({'sourceId': fic.sourceId, 'localId': fic.localId})
		if len(poss) != 1:
			raise Exception(f'unable to upsert fic?')
		fic = poss[0]
		for pfandom in pendingFandoms:
			fic.add(pfandom)

		if fic.chapterCount is None:
			return fic

		chapterTitles = []
		if fic.chapterCount > 1:
			chapterSelect = soup.find(id='chap_select')
			chapterOptions = []
			if chapterSelect is not None:
				chapterOptions = chapterSelect.findAll('option')
			chapterTitles = [co.getText().strip() for co in chapterOptions]

		for cid in range(1, fic.chapterCount + 1):
			ch = fic.chapter(cid)
			ch.localChapterId = str(cid)
			ch.url = self.constructUrl(fic.localId, cid)
			if len(chapterTitles) > cid:
				ch.title = util.cleanChapterTitle(chapterTitles[cid - 1], cid)
			elif fic.chapterCount == 1 and cid == 1:
				ch.title = fic.title
			ch.upsert()

		metaSpan = profile_top.find('span', {'class': 'xgray'})
		if metaSpan is not None:
			try:
				res = self.parseFicMetaSpan(metaSpan.decode_contents())
				#fic.language = res["language"]

				# reconstruct
				fields = [
					('rated', 'Rated: Fiction ZZZ'),
					('language', 'Language: ZZZ'),
					('genres', 'Genre: ZZZ'),
					('characters', 'Characters: ZZZ'),
					('reviews', 'Reviews: ZZZ'),
					('favorites', 'Favs: ZZZ'),
					('follows', 'Follows: ZZZ'),
				]
				rmeta = ' - '.join(
					[f[1].replace('ZZZ', res[f[0]]) for f in fields if f[0] in res]
				)

				fic.extraMeta = rmeta
				publishedUts = util.parseDateAsUnix(res['published'], fic.fetched)
				fic.published = OilTimestamp(publishedUts)
				fic.updated = fic.published
				if 'updated' in res:
					updatedUts = util.parseDateAsUnix(res['updated'], fic.fetched)
					fic.updated = OilTimestamp(updatedUts)
				fic.upsert()

			except Exception as e:
				util.logMessage(
					f'FFNAdapter.parseInfoInto: .parseFicMetaSpan:\n{e}\n{traceback.format_exc()}'
				)
				util.logMessage(
					f'FFNAdapter.parseFicMetaSpan: {metaSpan.decode_contents()}'
				)
				pass

		return fic
Пример #11
0
    def __tryParse(ident: str) -> Optional['FicId']:
        if len(ident.strip()) < 1:
            return None

        # strip view-source from potential urls
        if ident.startswith('view-source:http'):
            ident = ident[len('view-source:'):]

        # guess url next
        if ident.startswith('http'):
            return FicId.tryParseUrl(ident)

        # check for link{site}(id) style idents
        for ftype in adapters:
            a = adapters[ftype]
            if a is None: continue
            if a.botLinkSuffix is None: continue
            l = 'link{}('.format(a.botLinkSuffix)
            if ident.startswith(l) and ident.endswith(')'):
                mid = ident[len(l):-1]
                if not mid.isnumeric():
                    return FicId.tryParse(ident)
                return FicId(ftype, mid, ambiguous=False)

        # maybe it's an actual story id
        from store import Fic
        parts = ident.split('/')
        if parts[0].isnumeric():
            fic = Fic.get((int(parts[0]), ))
            if fic is not None:
                fid = fic.fid()
                if len(parts) == 2 and parts[1].isnumeric():
                    fid.chapterId = int(parts[1])
                    fid.ambiguous = False
                return fid

        # or maybe it's a url id...
        potential = Fic.select({'urlId': parts[0]})
        if len(potential) == 1:
            fid = potential[0].fid()
            if len(parts) == 2 and parts[1].isnumeric():
                fid.chapterId = int(parts[1])
                fid.ambiguous = False
            return fid

        # assume numeric is ffnet
        if ident.isnumeric():
            return FicId(FicType.ff_net, ident)

        # guess story/chapter on ffnet
        if len(parts) == 2 and parts[1].isnumeric():
            cid = int(parts[1])
            return FicId(FicType.ff_net, parts[0], cid, ambiguous=False)

        # try prepending https protocol
        if ident.find('://') < 0:
            ident = 'https://' + ident
            return FicId.tryParseUrl(ident)

        # just guess manual adapter
        return FicId.tryParseFallback(ident)
Пример #12
0
def v0_fic(urlId: str) -> Any:
    fics = Fic.select({'urlId': urlId})
    if len(fics) != 1:
        return Err.urlId_not_found.get()
    return Err.ok(fics[0].toJSONable())