Exemplos de MetaRecord.title em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: cps.services.Metadata

Classe / Tipo: MetaRecord

Método / Função: title

Exemplos em hotexamples.com: 2

MetaRecord.title em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de cps.services.Metadata.MetaRecord.title em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

MetaRecord(6)

cover(6)

description(6)

publishedDate(5)

identifiers(4)

publisher(4)

rating(4)

tags(4)

series(3)

languages(2)

title(2)

authors(1)

series_index(1)

Métodos Frequentes

MetaRecord (6)

cover (6)

description (6)

publishedDate (5)

identifiers (4)

publisher (4)

rating (4)

tags (4)

series (3)

languages (2)

Métodos Frequentes

title (2)

authors (1)

series_index (1)

Exemplo n.º 1

0

Exibir arquivo

def inner(link, index) -> [dict, int]: with self.session as session: try: r = session.get(f"https://www.amazon.com/{link}") r.raise_for_status() except Exception as ex: log.warning(ex) return long_soup = BS(r.text, "lxml") #~4sec :/ soup2 = long_soup.find( "div", attrs={ "cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper" }) if soup2 is None: return try: match = MetaRecord( title="", authors="", source=MetaSourceInfo(id=self.__id__, description="Amazon Books", link="https://amazon.com/"), url=f"https://www.amazon.com{link}", #the more searches the slower, these are too hard to find in reasonable time or might not even exist publisher="", # very unreliable publishedDate="", # very unreliable id=None, # ? tags=[] # dont exist on amazon ) try: match.description = "\n".join( soup2.find("div", attrs={"data-feature-name": "bookDescription"}).stripped_strings)\ .replace("\xa0"," ")[:-9].strip().strip("\n") except (AttributeError, TypeError): return None # if there is no description it is not a book and therefore should be ignored try: match.title = soup2.find("span", attrs={ "id": "productTitle" }).text except (AttributeError, TypeError): match.title = "" try: match.authors = [ next( filter( lambda i: i != " " and i != "\n" and not i.startswith("{"), x.findAll(text=True))).strip() for x in soup2.findAll("span", attrs={"class": "author"}) ] except (AttributeError, TypeError, StopIteration): match.authors = "" try: match.rating = int( soup2.find("span", class_="a-icon-alt").text.split(" ") [0].split(".")[0]) # first number in string except (AttributeError, ValueError): match.rating = 0 try: match.cover = soup2.find( "img", attrs={"class": "a-dynamic-image frontImage"})["src"] except (AttributeError, TypeError): match.cover = "" return match, index except Exception as e: log.error_or_exception(e) return

Exemplo n.º 2

0

Exibir arquivo

Arquivo: douban.py Projeto: rockonedege/calibre-web

def _parse_single_book(self, id: str, generic_cover: str = "") -> Optional[MetaRecord]: url = f"https://book.douban.com/subject/{id}/" try: r = self.session.get(url) r.raise_for_status() except Exception as e: log.warning(e) return None match = MetaRecord( id=id, title="", authors=[], url=url, source=MetaSourceInfo( id=self.__id__, description=self.DESCRIPTION, link=self.META_URL, ), ) html = etree.HTML(r.content.decode("utf8")) match.title = html.xpath(self.TITTLE_XPATH)[0].text match.cover = html.xpath( self.COVER_XPATH)[0].attrib["href"] or generic_cover try: rating_num = float(html.xpath(self.RATING_XPATH)[0].text.strip()) except Exception: rating_num = 0 match.rating = int(-1 * rating_num // 2 * -1) if rating_num else 0 tag_elements = html.xpath(self.TAGS_XPATH) if len(tag_elements): match.tags = [tag_element.text for tag_element in tag_elements] description_element = html.xpath(self.DESCRIPTION_XPATH) if len(description_element): match.description = html2text( etree.tostring(description_element[-1], encoding="utf8").decode("utf8")) info = html.xpath(self.INFO_XPATH) for element in info: text = element.text if self.AUTHORS_PATTERN.search(text): next = element.getnext() while next is not None and next.tag != "br": match.authors.append(next.text) next = next.getnext() elif self.PUBLISHER_PATTERN.search(text): match.publisher = element.tail.strip() elif self.SUBTITLE_PATTERN.search(text): match.title = f'{match.title}:' + element.tail.strip() elif self.PUBLISHED_DATE_PATTERN.search(text): match.publishedDate = self._clean_date(element.tail.strip()) elif self.SUBTITLE_PATTERN.search(text): match.series = element.getnext().text elif i_type := self.IDENTIFIERS_PATTERN.search(text): match.identifiers[i_type.group()] = element.tail.strip()