def parse_record(self, line) -> Optional[DirectoryInfo]: record = json.loads(line) extra = dict( status=clean_str(record.get("current_status")), first_year=record.get("first_year"), collection=record.get("collection_acronym"), ) for k in list(extra.keys()): if extra[k] is None: extra.pop(k) country: Optional[str] = None if record["publisher_country"] and len( record["publisher_country"][0]) == 2: country = record["publisher_country"][0].lower() info = DirectoryInfo( directory_slug=self.source_slug, issne=clean_issn(record.get("electronic_issn") or ""), issnp=clean_issn(record.get("print_issn") or ""), custom_id=clean_str(record.get("scielo_issn")), name=clean_str(record.get("fulltitle")), publisher=clean_str((record.get("publisher_name") or [""])[0]), abbrev=clean_str(record["abbreviated_iso_title"]), platform="scielo", langs=[ lang for lang in [parse_lang(s) for s in record["languages"]] if lang ], country=country, extra=extra, ) if record["url"]: homepage = HomepageUrl.from_url(record["url"]) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, record) -> Optional[DirectoryInfo]: # HACK if "\ufeffTitle" in record: record["Title"] = record["\ufeffTitle"] if not record["Title"]: return None info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(record["ISSN"]), issne=clean_issn(record["EISSN"]), name=clean_str(record["Title"]), ) homepage = HomepageUrl.from_url(record["URL"]) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, row) -> Optional[DirectoryInfo]: row = json.loads(row) info = DirectoryInfo( directory_slug=self.source_slug, ) # format is an array of metadata elements for el in row: if "label" in el and el["@id"].startswith( "http://id.loc.gov/vocabulary/countries" ): value = el["label"] if "(State)" in value: value = "" if value == "Russia (Federation)": value = "Russia" info.country = parse_country(el["label"]) if not "@type" in el: continue if el["@type"] == "http://id.loc.gov/ontologies/bibframe/IssnL": info.issnl = clean_issn(el["value"]) if "mainTitle" in el: if type(el["mainTitle"]) == list: info.name = clean_str(el["mainTitle"][0]) else: info.name = clean_str(el["mainTitle"]) if el.get("format") == "vocabularies/medium#Print": info.issnp = clean_issn(el["issn"]) elif el.get("format") == "vocabularies/medium#Electronic": info.issne = clean_issn(el["issn"]) urls = el.get("url", []) if isinstance(urls, str): urls = [ urls, ] for url in urls: homepage = HomepageUrl.from_url(url) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, record) -> Optional[DirectoryInfo]: if not record["Journal Name"]: return None info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(record["ISSN"]), issne=clean_issn(record["E-ISSN"]), name=clean_str(record["Journal Name"]), publisher=clean_str(record["Publisher"]), langs=[ lang for lang in [parse_lang(record["Language(s)"])] if lang ], country=parse_country(record["Country"]), ) homepage = HomepageUrl.from_url(record["Internet Archive Link"]) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, record) -> Optional[DirectoryInfo]: info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(record["Issn"]), custom_id=record["JournalId"], name=clean_str(record["DisplayName"]), publisher=clean_str(record["Publisher"]), ) homepage = HomepageUrl.from_url(record["Webpage"] or "") if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: issne: Optional[str] = clean_issn(row["online_identifier"] or "") issnp: Optional[str] = clean_issn(row["print_identifier"] or "") issnl: Optional[str] = None if issne: issnl = issn_db.issn2issnl(issne) if issnp and not issnl: issnl = issn_db.issn2issnl(issnp) start_year: Optional[int] = None end_year: Optional[int] = None if row["date_first_issue_online"]: start_year = int(row["date_first_issue_online"][:4]) if row["date_last_issue_online"]: end_year = int(row["date_last_issue_online"][:4]) end_volume = row["num_last_vol_online"] # hack to handle open-ended preservation if end_year is None and end_volume and "(present)" in end_volume: end_year = THIS_YEAR record = KbartRecord( issnl=issnl, issnp=issnp, issne=issne, title=clean_str(row["publication_title"]), publisher=clean_str(row["publisher_name"]), url=HomepageUrl.from_url(row["title_url"]), embargo=clean_str(row["embargo_info"]), start_year=start_year, end_year=end_year, start_volume=clean_str(row["num_first_vol_online"]), end_volume=clean_str(row["num_last_vol_online"]), year_spans=[], ) if record.start_volume == "null": record.start_volume = None if record.end_volume == "null": record.end_volume = None return record
def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: raw_issn = clean_issn(row["ISSN"]) issne = clean_issn(row["ISSN"]) issnl = issn_db.issn2issnl(raw_issn or issne or "") # convert list of years to a set of year spans years = [int(y.strip()) for y in row["Preserved Years"].split(";") if y] year_spans = merge_spans([], [[y, y] for y in years]) record = KbartRecord( issnl=issnl, issne=issne, issnp=None, embargo=None, title=clean_str(row["Title"]), publisher=clean_str(row["Publisher"]), url=None, start_year=None, end_year=None, start_volume=None, end_volume=None, year_spans=year_spans, ) return record
def parse_record(self, line) -> Optional[DirectoryInfo]: record = json.loads(line) issn_info = record.get("identifiers", {}).get("issn", {}) # sometimes is a list for k in "generic", "electronic", "print": if type(issn_info.get(k)) == list: issn_info[k] = issn_info[k][0] info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(issn_info.get("generic", "")), issne=clean_issn(issn_info.get("electronic", "")), issnp=clean_issn(issn_info.get("print", "")), name=clean_str(record.get("title")), langs=[ lang for lang in [parse_lang(s) for s in record["languages"]] if lang ], ) if record["url"]: homepage = HomepageUrl.from_url(record["url"]) if homepage: info.homepage_urls.append(homepage) return info
def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: raw_issn = clean_issn(row["ISSN"]) issnl = issn_db.issn2issnl(raw_issn or "") start_year = int(row["Published"][:4]) start_volume = clean_str(row["Vol"]) record = KbartRecord( issnl=issnl, issne=None, issnp=None, embargo=None, title=clean_str(row["Title"]), publisher=clean_str(row["Publisher"]), url=HomepageUrl.from_url(row["Url"]), start_year=start_year, end_year=start_year, start_volume=start_volume, end_volume=start_volume, year_spans=[], ) return record
def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: # unpack fields # access = dict(allow="bright", deny="dark")[row['access']] raw_issn = clean_issn(row["issn"].split(",")[0]) imprint = clean_str(row["imprint"]) raw_date = row["rights_date_used"].strip() issnl = issn_db.issn2issnl(raw_issn or "") rights_date: Optional[int] = None if raw_date.isdigit(): rights_date = int(raw_date) start_year: Optional[int] = rights_date if start_year == 9999: start_year = None publisher: Optional[str] = None if imprint: publisher = imprint.split(".")[0].split(",")[0].split("[")[0].strip() record = KbartRecord( issnl=issnl, issne=None, issnp=None, embargo=None, title=clean_str(row["title"]), publisher=publisher, url=None, start_year=start_year, end_year=start_year, start_volume=None, end_volume=None, year_spans=[], ) return record