def check_issns(row_object): __tracebackhide__ = True row = row_object.row line_str = '{}, line {}: '.format(row_object.file_name, row_object.line_number) for issn_column in [row["issn"], row["issn_print"], row["issn_electronic"], row["issn_l"]]: if issn_column != "NA": if not oat.is_wellformed_ISSN(issn_column): fail(line_str + 'value "' + issn_column + '" is not a ' + 'well-formed ISSN') elif not oat.is_valid_ISSN(issn_column): fail(line_str + 'value "' + issn_column + '" is no valid ' + 'ISSN (check digit mismatch)') issn_l = row["issn_l"] if issn_l != "NA": msg = line_str + "Two entries share a common {} ({}), but the issn_l differs ({} vs {})" issn = row["issn"] if issn != "NA": for reduced_row in issn_dict[issn]: if reduced_row["issn_l"] != issn_l: fail(msg.format("issn", issn, issn_l, reduced_row["issn_l"])) issn_p = row["issn_print"] if issn_p != "NA": for reduced_row in issn_p_dict[issn_p]: if reduced_row["issn_l"] != issn_l: fail(msg.format("issn_p", issn_p, issn_l, reduced_row["issn_l"])) issn_e = row["issn_electronic"] if issn_e != "NA": for reduced_row in issn_e_dict[issn_e]: if reduced_row["issn_l"] != issn_l: fail(msg.format("issn_e", issn_e, issn_l, reduced_row["issn_l"]))
def check_issns(row_object): __tracebackhide__ = True row = row_object.row line_str = '{}, line {}: '.format(row_object.file_name, row_object.line_number) for issn_column in [row["issn"], row["issn_print"], row["issn_electronic"], row["issn_l"]]: if issn_column != "NA": if not oat.is_wellformed_ISSN(issn_column): pytest.fail(line_str + 'value "' + issn_column + '" is not a ' + 'well-formed ISSN') if not oat.is_valid_ISSN(issn_column): pytest.fail(line_str + 'value "' + issn_column + '" is no valid ' + 'ISSN (check digit mismatch)')
def check_issns(row_object): __tracebackhide__ = True row = row_object.row line_str = '{}, line {}: '.format(row_object.file_name, row_object.line_number) for issn_column in [ row["issn"], row["issn_print"], row["issn_electronic"], row["issn_l"] ]: if issn_column != "NA": if not oat.is_wellformed_ISSN(issn_column): pytest.fail(line_str + 'value "' + issn_column + '" is not a ' + 'well-formed ISSN') if not oat.is_valid_ISSN(issn_column): pytest.fail(line_str + 'value "' + issn_column + '" is no valid ' + 'ISSN (check digit mismatch)')
def main(): analysed_journals = {} if os.path.isfile(JOURNALTOC_RESULTS_FILE): with open(JOURNALTOC_RESULTS_FILE) as results: reader = DictReader(results) for line in reader: title = line["journal_full_title"] if title not in analysed_journals: analysed_journals[title] = line remaining_journals = {} with open(APC_DE_FILE) as apc_de: reader = DictReader(apc_de) for line in reader: title = line["journal_full_title"] if title in analysed_journals: continue if title not in remaining_journals: remaining_journals[title] = { "journal_full_title": line["journal_full_title"], "publisher": line["publisher"], "is_hybrid": line["is_hybrid"], "issns": [] } for issn_type in ISSN_TYPES: issn = line[issn_type] if issn not in remaining_journals[title][ "issns"] and oat.is_wellformed_ISSN(issn): remaining_journals[title]["issns"].append(issn) is_hybrid = line["is_hybrid"] if is_hybrid in [ "TRUE", "FALSE" ] and is_hybrid != remaining_journals[title]["is_hybrid"]: remaining_journals[title]["is_hybrid"] = "FLIPPED" msg = "{} unique journals found in OpenAPC core data file, {} already analysed, {} remaining." oat.print_g( msg.format( len(remaining_journals) + len(analysed_journals), len(analysed_journals), len(remaining_journals))) count = 0 for title, fields in remaining_journals.items(): count += 1 entry = {field: None for field in RESULTS_FILE_FIELDNAMES} entry["journal_full_title"] = title for key in ["publisher", "is_hybrid"]: entry[key] = fields[key] entry["issns"] = "|".join(fields["issns"]) msg = 'Analysing journal "{}" ({}), OpenAPC hybrid status is {}...' msg = msg.format(entry["journal_full_title"], entry["issns"], entry["is_hybrid"]) oat.print_b(msg) for issn in fields["issns"]: oat.print_y("Looking up ISSN " + issn + "...") jtoc_metadata = get_jtoc_metadata(issn) if jtoc_metadata["jtoc_id"] is not None: entry["in_jtoc"] = "TRUE" for key in ["jtoc_publisher", "jtoc_title"]: entry[key] = jtoc_metadata[key] journal_type = get_jtoc_journal_type(jtoc_metadata["jtoc_id"]) entry["jtoc_type"] = journal_type msg = 'Journal found ("{}"), JournalTOCs type is {}' oat.print_g(msg.format(entry["jtoc_title"], entry["jtoc_type"])) break else: oat.print_r("None of the associated ISSNS found in JTOCs!") analysed_journals[title] = entry if count < BATCH_SIZE: sleep(2) else: break with open(JOURNALTOC_RESULTS_FILE, "w") as res_file: writer = DictWriter(res_file, fieldnames=RESULTS_FILE_FIELDNAMES) writer.writeheader() for _, entry in analysed_journals.items(): writer.writerow(entry)