def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value('title', response.xpath('//title//text()').get()) general.add_value( 'language', response.xpath('//meta[@property="og:locale"]/@content').get()) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value("title", response.xpath("/data/titel/text()").get()) general.add_value("description", response.xpath("/data/beschreibung/text()").get()) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response=response) general.add_value("title", self.get("title", response=response)) general.add_value("keyword", self.getKeywords(response)) general.add_value("description", self.get("description", response=response)) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value("title", response.meta["item"].get("Name").strip()) general.add_value( "keyword", list( filter( lambda x: x, map( lambda x: x.strip(), response.xpath( '//*[@id="ContentModuleApp"]//*[@class="topic-name"]//text()' ).getall(), ), )), ) description = "\n".join( list( filter( lambda x: x, map( lambda x: x.strip(), response.xpath( '//*[@id="ContentModuleApp"]//*[@content-module-type="inlinetext"]//p//text()' ).getall(), ), ))).strip() general.add_value("description", description) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value('identifier', self.get('id', response = response)) general.add_value('title', self.get('title', response = response)) general.add_value('keyword', self.get('keywords', response = response)) general.add_value('language', self.get('language', response = response)) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value('identifier', response.meta['item'].xpath('guid//text()').get()) general.add_value('title', response.meta['item'].xpath('title//text()').get()) general.add_value('language', self.commonProperties['language']) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value("identifier", self.get("id", response=response)) general.add_value("title", self.get("title", response=response)) general.add_value("keyword", self.get("keywords", response=response)) general.add_value("language", self.get("language", response=response)) general.add_value("description", self.get("description", response=response)) return general
def getLOMGeneral(self, response: Response) -> items.LomGeneralItemloader: general = LomBase.getLOMGeneral(self, response) general.add_value("title", response.meta["row"]["title"]) general.add_value("description", response.meta["row"]["description"]) general.add_value( "keyword", self.parse_csv_field(response.meta["row"]["keywords"])) general.add_value("language", self.static_values["language"]) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.replace_value("title", response.meta["item"]["title"]) general.add_value("keyword", self.getProperty("cclom:general_keyword", response)) general.add_value( "description", self.getProperty("cclom:general_description", response)) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value("title", response.xpath("/data/titel/text()").get()) general.add_value("description", response.xpath("/data/beschreibung/text()").get()) # Adding a default aggregationLevel, which can be used during filtering queries. general.add_value("aggregationLevel", "1") return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value("identifier", self.getLRMI("identifier", response=response)) general.add_value("title", self.getLRMI("name", "headline", response=response)) general.add_value("keyword", self.getLRMI("keywords", response=response)) general.add_value("language", self.getLRMI("inLanguage", response=response)) general.add_value( "description", self.getLRMI("description", "about", response=response) ) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.replace_value( 'title', HTMLParser().unescape( self.get('title.rendered', json=response.meta['item']))) keywords = self.get('tags', json=response.meta['item']) if keywords: keywords = list(map(lambda x: self.keywords[x], keywords)) general.add_value('keyword', keywords) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value("title", response.meta["row"][CSVBase.COLUMN_TITLE]["text"]) general.replace_value( "language", response.meta["row"][CSVBase.COLUMN_LANGUAGE]["text"]) general.add_value("keyword", response.meta["row"][CSVBase.COLUMN_KEYWORD]["list"]) general.add_value( "description", response.meta["row"][CSVBase.COLUMN_DESCRIPTION]["text"]) return general
def getLOMGeneral(self, response: Response) -> items.LomGeneralItemloader: general = LomBase.getLOMGeneral(self, response) general.add_value("title", response.meta["item"]["snippet"]["title"]) general.add_value("description", self.getDescription(response)) general.add_value( "keyword", self.parse_csv_field(response.meta["row"]["keyword"])) if "tags" in response.meta["item"]["snippet"]: general.add_value("keyword", response.meta["item"]["snippet"]["tags"]) general.add_value( "language", self.parse_csv_field(response.meta["row"]["language"])) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value('title', response.meta['item'].get('Name').strip()) general.add_value( 'keyword', list( filter( lambda x: x, map( lambda x: x.strip(), response.xpath( '//*[@id="ContentModuleApp"]//*[@class="topic-name"]//text()' ).getall())))) return general
def getLOMGeneral(self, response): response.selector.remove_namespaces() record = response.xpath('//OAI-PMH/GetRecord/record') general = LomBase.getLOMGeneral(response) general.add_value( 'identifier', record.xpath('header/identifier//text()').extract_first()) general.add_value( 'title', record.xpath( 'metadata/lom/general/title/string//text()').extract_first()) keywords = record.xpath( 'metadata/lom/general/keyword/string//text()').getall() general.add_value('keyword', keywords) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value( "identifier", response.meta["item"].xpath("guid//text()").get() ) general.add_value( "title", response.meta["item"].xpath("title//text()").get().strip() ) general.add_value("language", self.commonProperties["language"]) description = response.meta["item"].xpath("description//text()").get() if not description: description = ( response.meta["item"].xpath('//*[name()="summary"]//text()').get() ) general.add_value("description", description) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.replace_value( "title", self.get("parse.title", json=response.meta["item"])) keywords = self.get("parse.links", json=response.meta["item"]) if keywords: keywords = list(map(lambda x: x["*"], keywords)) general.add_value("keyword", keywords) props = self.get("parse.properties") if props: description = list( map( lambda x: x["*"], filter(lambda x: x["name"] == "description", props), )) general.add_value("description", description) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.replace_value( "title", HTMLParser().unescape( self.get("title.rendered", json=response.meta["item"])), ) keywords = self.get("tags", json=response.meta["item"]) if keywords: keywords = list(map(lambda x: self.keywords[x], keywords)) general.add_value("keyword", keywords) general.add_value( "description", HTMLParser().unescape( self.get("acf.short_text", json=response.meta["item"])), ) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) # Element response as a Python dict. element_dict = response.meta["item"] # TODO: Decide which title. Do we have to construct the title, by concatenating multiple from the provided ones? # Einzeltitel, einzeluntertitel, serientitel, serienuntertitel general.add_value("title", element_dict["einzeltitel"]) # self._if_exists_add(general, element_dict, "description", "kurzinhalt") if "kurzinhalt" in element_dict: general.add_value("description", element_dict["kurzinhalt"]) liste_stichwort = (element_dict["listeStichwort"] if "listeStichwort" in element_dict else None) if liste_stichwort is not None and len(liste_stichwort) > 0: general.add_value("keyword", liste_stichwort) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.replace_value( "title", self.html2Text( self.get("title.rendered", json=response.meta["item"])), ) general.add_value( "description", self.html2Text( self.get("excerpt.rendered", json=response.meta["item"])).replace( "… weiterlesen …", ""), ) cat = self.get("categories", json=response.meta["item"]) if cat: general.add_value("keyword", list(map(lambda x: self.categories[x], cat))) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value( "title", HTMLParser().unescape( response.meta["item"].xpath("titel//text()").get()), ) general.add_value("language", response.meta["item"].xpath("sprache//text()").get()) general.add_value( "keyword", HTMLParser().unescape(response.meta["item"].xpath( "schlagwort//text()").get()).split("; "), ) desc = response.meta["item"].xpath( "beschreibung//text()").get().strip() # dirty cleaning of invalid descriptions # not perfect yet, these objects also appear inside the content if not desc.startswith("swiffyobject_"): general.add_value("description", HTMLParser().unescape(desc)) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value("title", response.meta["item"]["name"]) if 'description' in response.meta["item"]: general.add_value("description", response.meta["item"]["description"]) else: html = self.getUrlData(response.url)["html"] if html: data = (Selector(text=html).xpath( '//ul[contains(@class,"worksheet-pages")]//text()').getall( )) cutoff = 4 if len(data) > cutoff: for i in range(cutoff): del data[0] text = " ".join(data) text = text[:1000] general.add_value("description", text) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) # Element response as a Python dict. element_dict = response.meta["item"] general.add_value("title", element_dict["title"]) general.add_value("aggregationLevel", element_dict["aggregation_level"]) # self._if_exists_add(general, element_dict, "description", "kurzinhalt") if "kurzinhalt" in element_dict: general.add_value("description", element_dict["kurzinhalt"]) liste_stichwort = (element_dict["listeStichwort"] if "listeStichwort" in element_dict else None) if liste_stichwort is not None and len(liste_stichwort) > 0: general.add_value("keyword", liste_stichwort) return general
def getLOMGeneral(self, response): response.selector.remove_namespaces() record = response.xpath("//OAI-PMH/GetRecord/record") general = LomBase.getLOMGeneral(response) general.add_value( "identifier", record.xpath("header/identifier//text()").extract_first()) general.add_value( "title", record.xpath( "metadata/lom/general/title/string//text()").extract_first(), ) general.add_value( "description", record.xpath("metadata/lom/general/description/string//text()"). extract_first(), ) keywords = record.xpath( "metadata/lom/general/keyword/string//text()").getall() general.add_value("keyword", keywords) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value( "title", HTMLParser().unescape(response.meta["item"].xpath("@title").get()), ) general.add_value( "description", self.html2Text(response.meta["item"].xpath("@task").get())) general.add_value("language", response.meta["item"].xpath("@language").get()) general.add_value("keyword", response.meta["item"].xpath("@tags").get()) # TODO: Maybe later in a vocabulary try: general.add_value( 'keyword', list( map(lambda x: self.subcategories[x], response.meta["item"].xpath("@subcategory").getall()))) except: pass return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value('title', response.meta['item']['name']) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response = response) general.add_value('title', self.get('title', response = response)) general.add_value('keyword', self.getKeywords(response)) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value('title', HTMLParser().unescape(response.meta['item'].xpath('titel//text()').get())) general.add_value('language', response.meta['item'].xpath('sprache//text()').get()) general.add_value('keyword', HTMLParser().unescape(response.meta['item'].xpath('schlagwort//text()').get()).split('; ')) return general
def getLOMGeneral(self, response): general = LomBase.getLOMGeneral(self, response) general.add_value('title', response.xpath('/data/titel/text()').get()) return general