def load_molecules(filename: str, dbname='#'): '''### load molecules from fasta file #### params: - filename: Your file name - dbname: Name of your data base *returns* -> dict with molecules ''' molecules = {} with open(filename) as file: lines = file.read() molecules_str = lines.split('>')[1::] for molecule_str in molecules_str: mol_str = molecule_str.split('\n', 1) mol_str[1] = mol_str[1].replace('\n', '') ignorable_alleles = ['N', 'L', 'Q', 'S', 'A', 'C'] name = mol_str[0].split(' ') if len(name) > 2: name = name[1] elif len(name) > 1: name = name[0] else: name = name[0] if any(name.endswith(allele) for allele in ignorable_alleles): print('ignorated ', name) continue if name: if name.find(':') > 2: name = ':'.join(name.split(':', 2)[:2]) if any(subname in molecules for subname in name): continue else: name = 'None' mol = Molecule(dbname=dbname, name=name, seq=mol_str[1]) molecules[name] = mol return molecules