def read_file(filename): with codecs.open(filename, mode='r') as file: reader = csv.reader(file) first_line = reader.next() # maintain stack of taxa that are parents of the current taxon stack = [] # name of parent of root taxon should be in cell A1 root_name = first_line[0] if root_name != '': root_parent = Taxon.filter(Taxon.valid_name == root_name)[0] stack.append(root_parent) # current valid taxon (for synonyms) current_valid = None # whether current taxon should be marked as root of a page is_page_root = True error_occurred = False for row in reader: try: # ignore blank rows if row[14] == '' and row[8] == '': continue data = parse_row(row) # deal with "nHT", which is a pain. Some of these may need to be manually # readded to the DB if data['kind'] == 'nHT': if data['valid_name'] in ignored_nHT: continue else: raise Exception("Unrecognized nHT: " + str(data)) # nsgen is i.s., just ignore if data['kind'] == 'nsgen': continue if data['status'] == STATUS_VALID: # get stuff off the stack rank = data['rank'] if rank == ROOT: current_valid = Taxon.create( valid_name=data['valid_name'], age=data['age'], rank=data['rank'], is_page_root=True ) else: # TODO: make this somehow unranked-clade-aware while rank >= stack[-1].rank: stack.pop() # create new Taxon current_valid = Taxon.create( valid_name=data['valid_name'], age=data['age'], rank=data['rank'], parent=stack[-1], is_page_root=is_page_root ) if is_page_root: is_page_root = False stack.append(current_valid) # create new Name data['taxon'] = current_valid if data['status'] == STATUS_DUBIOUS: # current system is inadequate for properly marking si species # assume there's only genera and species if ' ' in data['valid_name']: data['group'] = GROUP_SPECIES else: data['group'] = GROUP_GENUS # si species don't have a meaningful "valid name", but preserve what's there now data['data']['si_valid_name'] = data['valid_name'] else: # this will be wrong in the few cases where a high-ranked name is listed # as a synonym of a family-group name. Don't see a way to correct that # programmatically. data['group'] = helpers.group_of_rank(current_valid.rank) if data['status'] == STATUS_SYNONYM: if data['kind'] == 'synHT': valid_name = data['valid_name'].split(' ', 1)[1] else: valid_name = data['valid_name'] if valid_name != current_valid.valid_name: raise Exception("Valid name of synonym does not match: " + data['valid_name'] + " and " + current_valid.valid_name) # shorten root name for family-group names if data['group'] == GROUP_FAMILY: data['root_name'] = helpers.strip_rank(data['root_name'], current_valid.rank) del data['kind'] data['data'] = json.dumps(remove_null(data['data'])) # Detect whether a name object is already present (Principle of Coordination) nm = None if data['status'] == STATUS_VALID: root_name = data['root_name'] if current_valid.rank == FAMILY: nm = detect_super(root_name + 'oidea', SUPERFAMILY) elif current_valid.rank == SUBFAMILY: nm = detect_super(root_name + 'idae', FAMILY) elif current_valid.rank == TRIBE: nm = detect_super(root_name + 'inae', SUBFAMILY) elif current_valid.rank == SUBTRIBE: nm = detect_super(root_name + 'ini', TRIBE) elif current_valid.rank == SUBGENUS: nm = detect_super(root_name, GENUS) elif current_valid.rank == SPECIES: spg_name = helpers.spg_of_species(current_valid.valid_name) nm = detect_super(spg_name, SPECIES_GROUP) elif current_valid.rank == SUBSPECIES and helpers.is_nominate_subspecies(current_valid.valid_name): sp_name = helpers.species_of_subspecies(current_valid.valid_name) nm = detect_super(sp_name, SPECIES) if nm is not None: del data['taxon'] nm.add_additional_data(data) # nm's Taxon should be the lowest-ranking one nm.taxon = current_valid # create a new Name if none was found if nm is None: nm = Name.create(**data) # set base_name field if data['status'] == STATUS_VALID: current_valid.base_name = nm if 'additional_synonyms' in data: group = helpers.group_of_rank(current_valid.rank) for synonym in data['additional_synonyms']: Name.create(taxon=current_valid, root_name=synonym, group=group, status=STATUS_SYNONYM) except Exception: traceback.print_exc() error_occurred = True # ignore error and happily go on with the next return not error_occurred
def create_root(): Taxon.create(rank=ROOT, valid_name='root', is_page_root=True)