def fix_duplicate_orcids(p): from collections import defaultdict from papers.name import most_similar_author counts = defaultdict(int) for a in p.authors: if a.orcid: counts[a.orcid] += 1 author_name_pairs = p.author_name_pairs() best_indices = {} for orcid, count in list(counts.items()): if count >= 2: try: n = Name.objects.get(researcher__orcid=orcid) best_author = most_similar_author(n.pair, author_name_pairs) best_indices[best_author] = orcid except Name.DoesNotExist: logger.exception('DUPLICATE ORCID WITH NO RESEARCHER_ID') logger.info(best_indices) for idx, a in enumerate(p.authors): if a.orcid and counts[a.orcid] >= 2: if best_indices.get(idx) != a.orcid: # delete p.authors_list[idx]['orcid'] = None p.authors_list[idx]['researcher_id'] = None if best_indices: p.save() p.update_index()
def get_form_initial_data(self): data = super(HALProtocol, self).get_form_initial_data() data['first_name'] = self.user.first_name data['last_name'] = self.user.last_name # Abstract if self.paper.abstract: data['abstract'] = kill_html(self.paper.abstract) else: self.paper.consolidate_metadata(wait=False) # Topic topic_text = '' if 'abstract' in data: topic_text = data['abstract'] else: topic_text = self.paper.title data['topic'] = self.predict_topic(topic_text) if data['topic'] == 'OTHER': del data['topic'] # Depositing author most_similar_idx = None first, last = (self.user.first_name, self.user.last_name) if first and last: most_similar_idx = most_similar_author((first,last), self.paper.author_name_pairs()) data['depositing_author'] = most_similar_idx return data
def get_form_initial_data(self, **kwargs): data = super(HALProtocol, self).get_form_initial_data(**kwargs) data['first_name'] = self.user.first_name data['last_name'] = self.user.last_name # Abstract if self.paper.abstract: data['abstract'] = kill_html(self.paper.abstract) else: self.paper.consolidate_metadata(wait=False) # Topic topic_text = '' if 'abstract' in data: topic_text = data['abstract'] else: topic_text = self.paper.title data['topic'] = self.predict_topic(topic_text) if data['topic'] == 'OTHER': del data['topic'] # Depositing author most_similar_idx = None first, last = (self.user.first_name, self.user.last_name) if first and last: most_similar_idx = most_similar_author( (first, last), self.paper.author_name_pairs()) data['depositing_author'] = most_similar_idx return data
def affiliate_author_with_orcid(ref_name, orcid, authors, initial_orcids=None): """ Given a reference name and an ORCiD for a researcher, find out which author in the list is the most likely to be that author. This function is run on author lists of papers listed in the ORCiD record so we expect that one of the authors should be the same person as the ORCiD holder. This just finds the most similar name and returns the appropriate orcids list (None everywhere except for the most similar name where it is the ORCiD). """ max_sim_idx = most_similar_author(ref_name, authors) orcids = [None] * len(authors) if initial_orcids and len(initial_orcids) == len(authors): orcids = initial_orcids if max_sim_idx is not None: orcids[max_sim_idx] = orcid return orcids