def get_solr_args_from_article(document, indexed_date): article = Article(document) original_title = article.original_title() if original_title is not None: original_title = original_title try: # publication_date format maybe yyyy-mm-dd publication_date = datetime.strptime(article.publication_date, '%Y-%m-%d').isoformat() except ValueError: try: # publication_date format maybe yyyy-mm publication_date = datetime.strptime("{0}-01".format(article.publication_date), '%Y-%m-%d').isoformat() except ValueError: # publication_date format maybe yyyy publication_date = datetime.strptime("{0}-01-01".format(article.publication_date), '%Y-%m-%d').isoformat() article_languages = article.languages() languages = [] for l in article_languages: languages.append(l) article_authors = article.authors authors = [] if article_authors is not None: for author in article_authors: author_name = u"{0} {1}".format(author["given_names"], author["surname"]) authors.append(remove_control_chars(author_name)) article_first_author = article.first_author if article_first_author is not None: first_author = remove_control_chars(u"{0} {1}".format(article_first_author["given_names"], article_first_author["surname"])) else: first_author = "" #Start - Insert categories and magazines # print ('Start - Insert categories and magazines') magazine_name = remove_control_chars(u"{0}".format(article.journal.title)) magazine_issn = article.journal.scielo_issn magazine_abbreviated_title = remove_control_chars(article.journal.abbreviated_title) magazine_domain = article.scielo_domain magazine_acronym = article.journal.acronym try: magazine = Magazine.objects.get(magazine_name=magazine_name) except Magazine.DoesNotExist: magazine = Magazine.objects.create(magazine_name=magazine_name, magazine_abbreviated_title=magazine_abbreviated_title, magazine_issn=magazine_issn, magazine_domain=magazine_domain, magazine_acronym=magazine_acronym) magazine.save() category_ids = [] if article.journal.subject_areas is not None: for item_category in article.journal.subject_areas: category_name = remove_control_chars(u"{0}".format(item_category)).title() try: category = Category.objects.get(category_name_en=category_name) except Category.DoesNotExist: category = Category.objects.create(category_name_en=category_name) category.save() category_ids.append(category.id) category_publication_relationship = False for category_loop in magazine.categories.all(): if category_loop.category_name_en == category_name: category_publication_relationship = True break if not category_publication_relationship: magazine.categories.add(category) magazine.save() # print ('End - Insert categories and magazines') # End - Insert categories and magazines args = { "id": u"{0}{1}".format(article.publisher_id, article.collection_acronym), # "scielo_issn": article.journal.scielo_issn, "any_issn": article.journal.any_issn(), "journal_title": remove_control_chars(article.journal.title), # Magazine "journal_id": magazine.id, "journal_volume": article.volume, "journal_number": article.issue, # "journal_abbreviated_title": remove_control_chars(article.journal.abbreviated_title), "original_title": remove_control_chars(original_title), "original_abstract": remove_control_chars(article.original_abstract()), "publication_date": "{0}Z".format(publication_date), # "journal_acronym": article.journal.acronym, "subject_areas": article.journal.subject_areas, # Categories "subject_areas_ids": category_ids, # Category ids "wos_subject_areas": article.journal.wos_subject_areas, "original_language": article.original_language(), "languages": languages, "document_type": article.document_type, "authors": authors, "first_author": first_author, "corporative_authors": article.corporative_authors, # "scielo_domain": article.scielo_domain, "publisher_id": article.publisher_id, "collection_acronym": article.collection_acronym, "indexed_date": indexed_date } # Adding cover if reindexing or updating. try: cover_article = CoverArticle.objects.get(article_id=args[u"id"]) args[u"image_upload_path"] = cover_article.image args[u"image_upload_date"] = cover_article.upload_time args[u"image_uploader"] = cover_article.administrator.name except CoverArticle.DoesNotExist: pass article_translated_abstracts = article.translated_abstracts() if article_translated_abstracts is not None: for language in article_translated_abstracts: args[u"translated_abstracts_{0}".format(language)] = remove_control_chars(article_translated_abstracts[language]) article_translated_titles = article.translated_titles() if article_translated_titles is not None: for language in article_translated_titles: args[u"translated_titles_{0}".format(language)] = remove_control_chars(article_translated_titles[language]) article_keywords = article.keywords() if article_keywords is not None: for language in article_keywords: keywords = [] for keyword in article_keywords[language]: keywords.append(remove_control_chars(keyword)) args[u"keywords_{0}".format(language)] = keywords return args