def associate_recids(citation_elements): for el in citation_elements: try: el['recid'] = find_referenced_recid(el).pop() except (IndexError, KeyError): el['recid'] = None return citation_elements
def split_citations(citation_elements): """Split a citation line in multiple citations We handle the case where the author has put 2 citations in the same line but split with ; or some other method. """ splitted_citations = [] new_elements = [] current_recid = None def check_ibid(current_elements, trigger_el): # Check for ibid if trigger_el.get('is_ibid', False): if splitted_citations: els = chain(reversed(current_elements), reversed(splitted_citations[-1])) else: els = reversed(current_elements) for el in els: if el['type'] == 'AUTH': new_elements.append(el.copy()) break def start_new_citation(): """Start new citation""" splitted_citations.append(new_elements[:]) del new_elements[:] to_merge = None for el in citation_elements: if to_merge: el['misc_txt'] = to_merge + " " + el.get('misc_txt', '') to_merge = None try: el_recid = find_referenced_recid(el).pop() except (IndexError, KeyError): el_recid = None if current_recid and el_recid and current_recid == el_recid: # Do not start a new citation pass elif current_recid and el_recid and current_recid != el_recid: start_new_citation() # Some authors may be found in the previous citation balance_authors(splitted_citations, new_elements) elif ';' in el['misc_txt'] and valid_citation(new_elements): el['misc_txt'], to_merge = el['misc_txt'].rsplit(';', 1) start_new_citation() if el_recid: current_recid = el_recid check_ibid(new_elements, el) new_elements.append(el) if to_merge: new_elements[-1]['misc_txt'] += " " + to_merge new_elements[-1]['misc_txt'] = new_elements[-1]['misc_txt'].strip() splitted_citations.append(new_elements) return splitted_citations