def check(self, metadata): """Enriquece e normaliza itens do dicionário ``metadata``, que representa metadados de um periódico. A estrutura de ``metadata`` é a mesma retornada pelo formato JSON, do ``articlemeta.scielo.org``, conforme exemplo: https://gist.github.com/gustavofonseca/92638fe6e1f85dd84bcebce72e83b76e """ metadata_copy = metadata.copy() journal = Journal(metadata_copy) issns = set([ journal.any_issn(priority=u'electronic'), journal.any_issn(priority=u'print'), journal.scielo_issn ]) metadata_copy['code'] = journal.scielo_issn metadata_copy['issns'] = list(issns) metadata_copy['collection'] = journal.collection_acronym if not isinstance(journal.data['processing_date'], datetime): try: metadata_copy['processing_date'] = datetime.strptime(journal.data['processing_date'], '%Y-%m-%d') except: metadata_copy['processing_date'] = datetime.now() return metadata_copy
def pipeline_scieloorg(self): journal = Journal(self._journal) def _safegetter(func): try: return func() except: return None return { 'title': _safegetter(lambda: journal.title), 'subtitle': _safegetter(lambda: journal.subtitle), 'previous_title': _safegetter(lambda: journal.previous_title), 'acronym': _safegetter(lambda: journal.acronym), 'scielo_url': _safegetter(lambda: journal.url()), 'institutional_url': _safegetter(lambda: journal.institutional_url), 'subject_areas': _safegetter(lambda: journal.subject_areas), 'wos_subject_areas': _safegetter(lambda: journal.wos_subject_areas), 'publisher_city': _safegetter(lambda: journal.publisher_city), 'publisher_country': _safegetter(lambda: journal.publisher_country), 'publisher_name': _safegetter(lambda: journal.publisher_name), 'status': _safegetter(lambda: journal.current_status) }
def check(self, metadata): """Enriquece e normaliza itens do dicionário ``metadata``, que representa metadados de um periódico. A estrutura de ``metadata`` é a mesma retornada pelo formato JSON, do ``articlemeta.scielo.org``, conforme exemplo: https://gist.github.com/gustavofonseca/92638fe6e1f85dd84bcebce72e83b76e """ metadata_copy = metadata.copy() journal = Journal(metadata_copy) issns = set([ journal.any_issn(priority=u'electronic'), journal.any_issn(priority=u'print'), journal.scielo_issn ]) metadata_copy['code'] = journal.scielo_issn metadata_copy['issns'] = list(issns) metadata_copy['collection'] = journal.collection_acronym if not isinstance(journal.data['processing_date'], datetime): try: metadata_copy['processing_date'] = datetime.strptime( journal.data['processing_date'], '%Y-%m-%d') except: metadata_copy['processing_date'] = datetime.now() return metadata_copy
def _check_journal_meta(self, metadata): """ This method will check the given metadata and retrieve a new dictionary with some new fields. """ journal = Journal(metadata) issns = set([journal.any_issn(priority=u"electronic"), journal.any_issn(priority=u"print")]) metadata["code"] = list(issns) metadata["collection"] = journal.collection_acronym return metadata
def journal(self, code, collection): try: journal = self.client.get_journal(code=code, collection=collection) except self.ARTICLEMETA_THRIFT.ServerError as e: msg = 'Error retrieving journal: %s_%s' % (collection, code) raise ServerError(msg) if not journal: logger.warning('Journal not found for: %s_%s' % (collection, code)) return None jjournal = None try: jjournal = json.loads(journal) except: msg = 'Fail to load JSON when retrienving journal: %s_%s' % ( collection, code) raise ValueError(msg) xjournal = Journal(jjournal) logger.info('Journal loaded: %s_%s' % (collection, code)) return xjournal
def get_journals_from_json(journals_file_path: str) -> dict: """Obtem dicionário à partir dos dados extraídos da base ISIS Title, presentes no arquivo `journals_file_path`, para facilitar a obtenção do ISSN ID do periódico. Para cada ISSN, sendo impresso ou eletrônico, apontará para o ISSN ID. Exemplo: Dado periódico com PRINT ISSN `0101-0101`, ONLINE ISSN `0101-0X02` e ISSN ID `0101-0X02`, seus dados estarão da seguinte maneira no dicionário de saída: { ... "0101-0X02": "0101-0X02", "0101-0101": "0101-0X02", ... } """ with open(journals_file_path) as journals_file: journals = json.load(journals_file) data_journal = {} for journal in journals: o_journal = Journal(journal) if o_journal.print_issn: data_journal[o_journal.print_issn] = o_journal.scielo_issn if o_journal.electronic_issn: data_journal[o_journal.electronic_issn] = o_journal.scielo_issn if o_journal.scielo_issn: data_journal[o_journal.scielo_issn] = o_journal.scielo_issn return data_journal
def _check_journal_meta(self, metadata): """ This method will check the given metadata and retrieve a new dictionary with some new fields. """ journal = Journal(metadata) issns = set([ journal.any_issn(priority=u'electronic'), journal.any_issn(priority=u'print'), journal.scielo_issn ]) metadata['code'] = journal.scielo_issn metadata['issns'] = list(issns) metadata['collection'] = journal.collection_acronym return metadata
def conversion_journal_to_bundle(journal: dict) -> None: """Transforma um objeto Journal (xylose) para o formato de dados equivalente ao persistido pelo Kernel em um banco mongodb""" _journal = Journal(journal) _bundle = xylose_converter.journal_to_kernel(_journal) return _bundle
def journal(self, code, collection): response = self._request_journal(code, collection) try: response = response[0] except IndexError: return None return Journal(response)
def test_get_all_journal(self, mk_ext_journal, mk_ext_identifiers, mk_r): obj_journal = Journal(SAMPLES_JOURNAL) mk_ext_identifiers.return_value = { "objects": ["ANY", "ANY", {"code": "36341997000100001"}] } mk_ext_journal.return_value = obj_journal result = journal.get_all_journal() self.assertEqual(result[0], obj_journal)
def ext_journal(issn): journal = request.get( "%s/journal" % config.get("AM_URL_API"), params={ "collection": config.get("SCIELO_COLLECTION"), "issn": issn }, ).json() return Journal(journal[0])
def test_get_all_articles_notXML(self, mk_ext_identifiers): obj_journal = Journal(SAMPLES_JOURNAL) mk_ext_identifiers.return_value = { "objects": [ {"code": "S0036-36341997000100001"}, {"code": "S2237-96222017000400783"}, ] } result = article.get_all_articles_notXML("0036-3634") self.assertEqual(result[0][0], "S0036-36341997000100001")
def pipeline_scieloorg(self): journal = Journal(self._journal) def _safegetter(func): try: return func() except: return None return {'title': _safegetter(lambda: journal.title), 'subtitle': _safegetter(lambda: journal.subtitle), 'previous_title': _safegetter(lambda: journal.previous_title), 'acronym': _safegetter(lambda: journal.acronym), 'scielo_url': _safegetter(lambda: journal.url()), 'institutional_url': _safegetter(lambda: journal.institutional_url), 'subject_areas': _safegetter(lambda: journal.subject_areas), 'wos_subject_areas': _safegetter(lambda: journal.wos_subject_areas), 'publisher_city': _safegetter(lambda: journal.publisher_city), 'publisher_country': _safegetter(lambda: journal.publisher_country), 'publisher_name': _safegetter(lambda: journal.publisher_name), 'status': _safegetter(lambda: journal.current_status)}
def setUp(self): from fixtures.journals import journal_accesses, metadata self.accesses = copy.deepcopy(journal_accesses) self.metadata = copy.deepcopy(metadata) self.journals = { metadata['code'][0]: { 'metadata': Journal(self.metadata), 'accesses': self.accesses } }
def process_journals(**context): """Processa uma lista de journals carregados a partir do resultado de leitura da base MST""" journals = context["ti"].xcom_pull(task_ids="read_title_mst") journals = json.loads(journals) journals_as_kernel = [ journal_as_kernel(Journal(journal)) for journal in journals ] for journal in journals_as_kernel: _id = journal.pop("_id") response = register_or_update(_id, journal, KERNEL_API_JOURNAL_ENDPOINT)
def ext_journal(issn): try: journal = request.get( "%s/journal" % config.get("AM_URL_API"), params={ "collection": config.get("SCIELO_COLLECTION"), "issn": issn }, ) except request.HTTPGetError: logger.error("Journal nao encontrado: %s: %s" % (config.get("SCIELO_COLLECTION"), issn)) else: return Journal(journal.json()[0])
def journals_issns(): coll = MongoClient('node1-mongodb.scielo.org', 27000)['articlemeta']['journals'] issns = [] for journal in coll.find(): xjournal = Journal(journal) jissns = [ xjournal.scielo_issn, xjournal.print_issn, xjournal.electronic_issn ] issns += [i for i in jissns if i] return set(issns)
def _check_journal_meta(self, metadata): """ This method will check the given metadata and retrieve a new dictionary with some new fields. """ journal = Journal(metadata) issns = set([ journal.any_issn(priority=u'electronic'), journal.any_issn(priority=u'print'), journal.scielo_issn ]) metadata['code'] = journal.scielo_issn metadata['issns'] = list(issns) metadata['collection'] = journal.collection_acronym if not isinstance(journal.processing_date, datetime): try: metadata['processing_date'] = datetime.strptime(journal.processing_date, '%Y-%m-%d') except: metadata['processing_date'] = datetime.now() return metadata
def journal(self, code, collection): url = urljoin(self.ARTICLEMETA_URL, self.JOURNAL_ENDPOINT) params = {'issn': code, 'collection': collection} result = self._do_request(url, params) if not result: return None if len(result) != 1: return None xresult = Journal(result[0]) return xresult
def journal(self, code, collection=None): kwargs = { 'code': code, } if collection: kwargs['collection'] = collection journal = self.client.get_journal(**kwargs) if not journal: return None jjournal = json.loads(journal) xjournal = Journal(jjournal) return xjournal
def process_journals(**context): """Processa uma lista de journals carregados a partir do resultado de leitura da base MST""" title_json_path = context["ti"].xcom_pull( task_ids="copy_mst_bases_to_work_folder_task", key="title_json_path" ) with open(title_json_path, "r") as f: journals = f.read() logging.info("reading file from %s." % (title_json_path)) journals = json.loads(journals) journals_as_kernel = [journal_as_kernel(Journal(journal)) for journal in journals] for journal in journals_as_kernel: _id = journal.pop("_id") register_or_update(_id, journal, KERNEL_API_JOURNAL_ENDPOINT)
def journal(self, code, collection=None): journal = self.dispatcher('get_journal', code, collection) if not journal: logger.info('Journal not found for: %s_%s', collection, code) return None jjournal = None try: jjournal = json.loads(journal) except: msg = 'Fail to load JSON when retrienving journal: %s_%s' % ( collection, code) raise ValueError(msg) xjournal = Journal(jjournal) logger.info('Journal loaded: %s_%s', collection, code) return xjournal
def setUp(self): self.json_journal = { "v100": [{"_": "sample"}], "v68": [{"_": "spl"}], "v940": [{"_": "20190128"}], "v50": [{"_": "C"}], "v901": [ {"l": "es", "_": "Publicar artículos"}, {"l": "pt", "_": "Publicar artigos"}, {"l": "en", "_": "To publish articles"}, ], "v151": [{"_": "sample."}], "v150": [{"_": "sample"}], "v400": [{"_": "0001-3714"}], "v435": [{"t": "PRINT", "_": "0001"}, {"t": "ONLIN", "_": "2448-167X"}], "v51": [ { "a": "20190128", "b": "C", "c": "20190129", "d": "S", "e": "suspended-by-editor", } ], "v441": [{"_": "Health Sciences"}], "v140": [{"_": "SCIELO"}], "v854": [{"_": "AREA"}], "v692": [{"_": "test.com"}], "v710": [{"_": "next journal"}], "v610": [{"_": "previous journal"}], "v64": [{"_": "*****@*****.**"}], "v63": [{"_": "Rua de exemplo, 1, São Paulo, SP, Brasil"}], "v480": [{"_": "Sociedade Brasileira de Medicina Tropical - SBMT"}], "v310": [{"_": "BR"}], "v320": [{"_": "MG"}], "v490": [{"_": "Uberaba"}], } self._journal = Journal(self.json_journal)
def journals(self, collection=None, issn=None): offset = 0 while True: identifiers = self.client.get_journal_identifiers( collection=collection, issn=issn, limit=LIMIT, offset=offset) if len(identifiers) == 0: raise StopIteration for identifier in identifiers: journal = self.client.get_journal( code=identifier.code[0], collection=identifier.collection) jjournal = json.loads(journal) xjournal = Journal(jjournal) logger.info('Journal loaded: %s_%s' % (identifier.collection, identifier.code)) yield xjournal offset += 1000
def journal(self, code, collection=None, fmt='xylose'): query = {'code': code} if collection: query['collection'] = collection try: journal = self.client.get_journal(**query) except: msg = 'Error retrieving document: %s_%s' % (collection, code) raise ServerError(msg) if not journal: return None if fmt == 'xylose': jjournal = json.loads(journal) xjournal = Journal(jjournal) logger.info('Journal loaded: %s_%s' % (collection, code)) return xjournal else: logger.info('Journal loaded: %s_%s' % (collection, code)) return journal
def journal_as_kernel(journal: Journal) -> dict: """Gera um dicionário com a estrutura esperada pela API do Kernel a partir da estrutura gerada pelo isis2json""" _payload = {} _id = journal.any_issn() if not _id: _id = journal.scielo_issn _payload["_id"] = _id if journal.mission: _payload["mission"] = [{ "language": lang, "value": value } for lang, value in journal.mission.items()] else: _payload["mission"] = [] _payload["title"] = journal.title or "" _payload["title_iso"] = journal.abbreviated_iso_title or "" _payload["short_title"] = journal.abbreviated_title or "" _payload["acronym"] = journal.acronym or "" _payload["scielo_issn"] = journal.scielo_issn or "" _payload["print_issn"] = journal.print_issn or "" _payload["electronic_issn"] = journal.electronic_issn or "" _payload["status"] = {} if journal.status_history: _status = journal.status_history[-1] _payload["status"]["status"] = _status[1] if _status[2]: _payload["status"]["reason"] = _status[2] _payload["subject_areas"] = [] if journal.subject_areas: for subject_area in journal.subject_areas: # TODO: Algumas áreas estão em caixa baixa, o que devemos fazer? # A Base MST possui uma grande área que é considerada errada # é preciso normalizar o valor if subject_area.upper() == "LINGUISTICS, LETTERS AND ARTS": subject_area = "LINGUISTIC, LITERATURE AND ARTS" _payload["subject_areas"].append(subject_area.upper()) _payload["sponsors"] = [] if journal.sponsors: _payload["sponsors"] = [{ "name": sponsor } for sponsor in journal.sponsors] _payload["subject_categories"] = journal.wos_subject_areas or [] _payload["online_submission_url"] = journal.submission_url or "" _payload["next_journal"] = {} if journal.next_title: _payload["next_journal"]["name"] = journal.next_title _payload["previous_journal"] = {} if journal.previous_title: _payload["previous_journal"]["name"] = journal.previous_title _payload["contact"] = {} if journal.editor_email: _payload["contact"]["email"] = journal.editor_email if journal.editor_address: _payload["contact"]["address"] = journal.editor_address return _payload
def transform(self): xylose_source = self.clean_for_xylose() xylose_journal = Journal(xylose_source) # jid uuid = self.extract_model_instance.uuid self.transform_model_instance['uuid'] = uuid self.transform_model_instance['jid'] = uuid # collection transform_col = TransformCollection.objects.get( acronym__iexact=xylose_journal.collection_acronym) self.transform_model_instance['collection'] = transform_col.uuid # subject_categories if hasattr(xylose_journal, 'wos_subject_areas'): self.transform_model_instance[ 'subject_categories'] = xylose_journal.wos_subject_areas # study_areas if hasattr(xylose_journal, 'subject_areas'): self.transform_model_instance[ 'study_areas'] = xylose_journal.subject_areas # current_status if hasattr(xylose_journal, 'current_status'): self.transform_model_instance[ 'current_status'] = xylose_journal.current_status # publisher_city if hasattr(xylose_journal, 'publisher_loc'): self.transform_model_instance[ 'publisher_city'] = xylose_journal.publisher_loc # publisher_name if hasattr( xylose_journal, 'publisher_name') and len(xylose_journal.publisher_name) > 0: self.transform_model_instance[ 'publisher_name'] = xylose_journal.publisher_name[0] # eletronic_issn if hasattr(xylose_journal, 'electronic_issn'): self.transform_model_instance[ 'eletronic_issn'] = xylose_journal.electronic_issn # scielo_issn if hasattr(xylose_journal, 'scielo_issn'): self.transform_model_instance[ 'scielo_issn'] = xylose_journal.scielo_issn # print_issn if hasattr(xylose_journal, 'print_issn'): self.transform_model_instance[ 'print_issn'] = xylose_journal.print_issn # acronym if hasattr(xylose_journal, 'acronym'): self.transform_model_instance['acronym'] = xylose_journal.acronym # previous_title if hasattr(xylose_journal, 'previous_title'): self.transform_model_instance[ 'previous_journal_ref'] = xylose_journal.previous_title # title if hasattr(xylose_journal, 'title'): self.transform_model_instance['title'] = xylose_journal.title # editor_email if hasattr(xylose_journal, 'editor_email'): email = xylose_journal.editor_email if email: strip_email = email.strip() if not validate_email(strip_email): self.transform_model_instance['editor_email'] = strip_email else: self.transform_model_instance['editor_email'] = None # abbreviated_iso_title if hasattr(xylose_journal, 'abbreviated_iso_title'): self.transform_model_instance[ 'title_iso'] = xylose_journal.abbreviated_iso_title # next_title if hasattr(xylose_journal, 'next_title'): self.transform_model_instance[ 'next_title'] = xylose_journal.next_title # mission if hasattr(xylose_journal, 'mission'): missions = [] for lang, des in xylose_journal.mission.items(): missions.append({'language': lang, 'description': des}) self.transform_model_instance['mission'] = missions # timeline if hasattr(xylose_journal, 'status_history'): timelines = [] for status in xylose_journal.status_history: timelines.append({ 'reason': status[2], 'status': status[1], 'since': trydate(status[0]), }) self.transform_model_instance['timeline'] = timelines # short_title if hasattr(xylose_journal, 'abbreviated_title'): self.transform_model_instance[ 'short_title'] = xylose_journal.abbreviated_title # index_at if hasattr(xylose_journal, 'wos_citation_indexes'): self.transform_model_instance[ 'index_at'] = xylose_journal.wos_citation_indexes # updated if hasattr(xylose_journal, 'update_date'): self.transform_model_instance['updated'] = trydate( xylose_journal.update_date) # created if hasattr(xylose_journal, 'creation_date'): self.transform_model_instance['created'] = trydate( xylose_journal.creation_date) # copyrighter if hasattr(xylose_journal, 'copyrighter'): self.transform_model_instance[ 'copyrighter'] = xylose_journal.copyrighter # publisher_country if hasattr(xylose_journal, 'publisher_country') and len( xylose_journal.publisher_country) > 1: self.transform_model_instance[ 'publisher_country'] = xylose_journal.publisher_country[1] # online_submission_url if hasattr(xylose_journal, 'submission_url'): self.transform_model_instance[ 'online_submission_url'] = xylose_journal.submission_url # publisher_state if hasattr(xylose_journal, 'publisher_state'): self.transform_model_instance[ 'publisher_state'] = xylose_journal.publisher_state # sponsors if hasattr(xylose_journal, 'sponsors'): self.transform_model_instance['sponsors'] = xylose_journal.sponsors # other_titles if hasattr(xylose_journal, 'other_titles') and xylose_journal.other_titles: other_titles = [] for title in xylose_journal.other_titles: other_titles.append({ 'title': title, 'category': "other", }) self.transform_model_instance['other_titles'] = other_titles # metrics: if hasattr(self.extract_model_instance, 'metrics'): metrics = self.extract_model_instance.metrics self.transform_model_instance['metrics'] = metrics # logo_url def _open_logo(file_path, mode='rb'): """ Open asset as file like object(bytes) """ try: return open(file_path, mode) except IOError as e: logger.error(u'Erro ao tentar abri o ativo: %s, erro: %s', file_path, e) raise Exception(u'Erro ao tentar abri o ativo: %s', file_path) acron = xylose_journal.acronym.lower() logo_name = 'glogo.gif' file_path = '%s/%s/%s' % (config.OPAC_PROC_ASSETS_SOURCE_MEDIA_PATH, acron, logo_name) pfile = _open_logo(file_path) ssm_asset = SSMHandler( pfile, logo_name, 'img', { 'issn': self.extract_model_instance.code, 'pid': self.extract_model_instance.code, 'collection': transform_col.acronym, 'file_name': logo_name, 'type': 'img', 'bucket_name': acron, 'journal': acron }, acron) code, existing_asset = ssm_asset.exists() if code == 2: logger.info( u"Lista de imagens com mesmo filename para o journal: %s", existing_asset) logger.info(u"Removendo a lista de images: %s", existing_asset) for asset in existing_asset: ssm_asset.remove(asset['uuid']) if code == 2 or code == 0: uuid = ssm_asset.register() logger.info(u'Registrado logo do períodico: %s, com uuid: %s', acron, uuid) logo_url = ssm_asset.get_urls()['url_path'] self.transform_model_instance['logo_url'] = logo_url logger.info(u'URL para logo do períodico: %s, com acrônimo: %s' % (logo_url, acron)) if code == 1: for asset in existing_asset: self.transform_model_instance['logo_url'] = asset[ 'absolute_url'] return self.transform_model_instance
def register_documents_in_documents_bundle( session_db, file_documents: str, file_journals: str ) -> None: journals = reading.read_json_file(file_journals) data_journal = {} for journal in journals: o_journal = Journal(journal) for _issn in (o_journal.print_issn, o_journal.electronic_issn, o_journal.scielo_issn): if _issn: data_journal[_issn] = o_journal.scielo_issn def get_issn(document, data_journal=data_journal): """Recupera o ISSN ID do Periódico ao qual documento pertence""" for issn_type in ("eissn", "pissn", "issn"): if document.get(issn_type) is not None: issn_value = document[issn_type].strip() if data_journal.get(issn_value) is not None: return data_journal[issn_value] def get_bundle_info(issn, document): """ Obtém e retorna os dados do `bundle`: ID e se é um fascículo Args: issn (str): ISSN document (dict): Dados do documento Returns: tuple (bool, str): True para é fascículoID do `bundle` de fascículo ou aop """ bundle_id = scielo_ids_generator.any_bundle_id( issn, document.get("year"), document.get("volume"), document.get("number"), document.get("supplement"), ) aops_bundle_id = scielo_ids_generator.aops_bundle_id(issn) is_issue = bundle_id != aops_bundle_id return is_issue, bundle_id err_filename = os.path.join( config.get("ERRORS_PATH"), "insert_documents_in_bundle.err" ) with open(file_documents) as f: documents = f.readlines() documents_bundles = {} for document in documents: document = json.loads(document) issn_id = get_issn(document) if issn_id is None: logger.error("No ISSN in document '%s'", document["pid_v3"]) files.write_file(err_filename, document["pid_v3"] + "\n", "a") continue is_issue, bundle_id = get_bundle_info(issn_id, document) documents_bundles.setdefault(bundle_id, {}) documents_bundles[bundle_id].setdefault("items", []) documents_bundles[bundle_id]["items"].append( {"id": document.pop("pid_v3"), "order": document.get("order", "")} ) documents_bundles[bundle_id]["data"] = { "is_issue": is_issue, "bundle_id": bundle_id, "issn": issn_id, } for documents_bundle in documents_bundles.values(): data = documents_bundle["data"] items = documents_bundle["items"] try: documents_bundle = get_documents_bundle( session_db, data["bundle_id"], data["is_issue"], data["issn"] ) except ValueError as exc: logger.error( "The bundle '%s' was not updated. During executions " "this following exception was raised '%s'.", data["bundle_id"], exc, ) content = json.dumps({"issue": data["bundle_id"], "items": items}) files.write_file(err_filename, content + "\n", "a") else: link_documents_bundles_with_documents(documents_bundle, items, session_db)
def transform(self): xylose_source = self.clean_for_xylose() xylose_journal = Journal(xylose_source) # jid uuid = self.extract_model_instance.uuid self.transform_model_instance['uuid'] = uuid self.transform_model_instance['jid'] = uuid # collection transform_col = TransformCollection.objects.get( acronym__iexact=xylose_journal.collection_acronym) self.transform_model_instance['collection'] = transform_col.uuid # subject_categories if hasattr(xylose_journal, 'wos_subject_areas'): self.transform_model_instance[ 'subject_categories'] = xylose_journal.wos_subject_areas # study_areas if hasattr(xylose_journal, 'subject_areas'): self.transform_model_instance[ 'study_areas'] = xylose_journal.subject_areas # current_status if hasattr(xylose_journal, 'current_status'): self.transform_model_instance[ 'current_status'] = xylose_journal.current_status # publisher_city if hasattr(xylose_journal, 'publisher_loc'): self.transform_model_instance[ 'publisher_city'] = xylose_journal.publisher_loc # publisher_name if hasattr( xylose_journal, 'publisher_name') and len(xylose_journal.publisher_name) > 0: self.transform_model_instance[ 'publisher_name'] = xylose_journal.publisher_name[0] # eletronic_issn if hasattr(xylose_journal, 'electronic_issn'): self.transform_model_instance[ 'eletronic_issn'] = xylose_journal.electronic_issn # scielo_issn if hasattr(xylose_journal, 'scielo_issn'): self.transform_model_instance[ 'scielo_issn'] = xylose_journal.scielo_issn # print_issn if hasattr(xylose_journal, 'print_issn'): self.transform_model_instance[ 'print_issn'] = xylose_journal.print_issn # acronym if hasattr(xylose_journal, 'acronym'): self.transform_model_instance['acronym'] = xylose_journal.acronym # previous_title if hasattr(xylose_journal, 'previous_title'): self.transform_model_instance[ 'previous_journal_ref'] = xylose_journal.previous_title # title if hasattr(xylose_journal, 'title'): self.transform_model_instance['title'] = xylose_journal.title # editor_email if hasattr(xylose_journal, 'editor_email'): email = xylose_journal.editor_email if email: strip_email = email.strip() if not validate_email(strip_email): self.transform_model_instance['editor_email'] = strip_email else: self.transform_model_instance['editor_email'] = None # abbreviated_iso_title if hasattr(xylose_journal, 'abbreviated_iso_title'): self.transform_model_instance[ 'title_iso'] = xylose_journal.abbreviated_iso_title # next_title if hasattr(xylose_journal, 'next_title'): self.transform_model_instance[ 'next_title'] = xylose_journal.next_title # mission if hasattr(xylose_journal, 'mission'): missions = [] for lang, des in xylose_journal.mission.items(): missions.append({'language': lang, 'description': des}) self.transform_model_instance['mission'] = missions # timeline if hasattr(xylose_journal, 'status_history'): timelines = [] for status in xylose_journal.status_history: timelines.append({ 'reason': status[2], 'status': status[1], 'since': trydate(status[0]), }) self.transform_model_instance['timeline'] = timelines # short_title if hasattr(xylose_journal, 'abbreviated_title'): self.transform_model_instance[ 'short_title'] = xylose_journal.abbreviated_title # index_at if hasattr(xylose_journal, 'wos_citation_indexes'): self.transform_model_instance[ 'index_at'] = xylose_journal.wos_citation_indexes # updated if hasattr(xylose_journal, 'update_date'): self.transform_model_instance['updated'] = trydate( xylose_journal.update_date) # created if hasattr(xylose_journal, 'creation_date'): self.transform_model_instance['created'] = trydate( xylose_journal.creation_date) # copyrighter if hasattr(xylose_journal, 'copyrighter'): self.transform_model_instance[ 'copyrighter'] = xylose_journal.copyrighter # publisher_country if hasattr(xylose_journal, 'publisher_country') and len( xylose_journal.publisher_country) > 1: self.transform_model_instance[ 'publisher_country'] = xylose_journal.publisher_country[1] # online_submission_url if hasattr(xylose_journal, 'submission_url'): self.transform_model_instance[ 'online_submission_url'] = xylose_journal.submission_url # publisher_state if hasattr(xylose_journal, 'publisher_state'): self.transform_model_instance[ 'publisher_state'] = xylose_journal.publisher_state # sponsors if hasattr(xylose_journal, 'sponsors'): self.transform_model_instance['sponsors'] = xylose_journal.sponsors # other_titles if hasattr(xylose_journal, 'other_titles') and xylose_journal.other_titles: other_titles = [] for title in xylose_journal.other_titles: other_titles.append({ 'title': title, 'category': "other", }) self.transform_model_instance['other_titles'] = other_titles # metrics: if hasattr(self.extract_model_instance, 'metrics'): metrics = self.extract_model_instance.metrics self.transform_model_instance['metrics'] = metrics return self.transform_model_instance
def test_raise_exception_if_journal_hasnt_id(self): with self.assertRaises(ValueError): del self.json_journal["v435"] del self.json_journal["v400"] journal_to_kernel(Journal(self.json_journal))
def register_documents_in_documents_bundle(session_db, file_documents: str, file_journals: str) -> None: err_filename = os.path.join(config.get("ERRORS_PATH"), "insert_documents_in_bundle.err") not_registered = [] journals = reading.read_json_file(file_journals) documents = reading.read_json_file(file_documents) data_journal = {} for journal in journals: o_journal = Journal(journal) if o_journal.print_issn: data_journal[o_journal.print_issn] = o_journal.scielo_issn if o_journal.electronic_issn: data_journal[o_journal.electronic_issn] = o_journal.scielo_issn if o_journal.scielo_issn: data_journal[o_journal.scielo_issn] = o_journal.scielo_issn documents_bundles = {} for scielo_id, document in documents.items(): is_issue = bool(document.get("volume") or document.get("number")) issn = "" for issn_type in ("eissn", "pissn", "issn"): issn = document.get(issn_type) if issn: break if is_issue: bundle_id = scielo_ids_generator.issue_id( data_journal[issn], document.get("year"), document.get("volume"), document.get("number"), document.get("supplement"), ) else: bundle_id = scielo_ids_generator.aops_bundle_id(data_journal[issn]) documents_bundles.setdefault(bundle_id, {}) documents_bundles[bundle_id].setdefault("items", []) documents_bundles[bundle_id]["items"].append({ "id": scielo_id, "order": document.get("order", ""), }) documents_bundles[bundle_id]["data"] = { "is_issue": is_issue, "bundle_id": bundle_id, "issn": data_journal[document.get("issn")], } for documents_bundle in documents_bundles.values(): data = documents_bundle["data"] items = documents_bundle["items"] try: documents_bundle = get_documents_bundle(session_db, data["bundle_id"], data["is_issue"], data["issn"]) except ValueError as error: files.write_file(err_filename, data["bundle_id"] + "\n", "a") not_registered.append(data["bundle_id"]) else: link_documents_bundles_with_documents(documents_bundle, items, session_db)