receptor_binding_sites = [x-1 for x in [159,169,170,172,173,203,207]] sp=17 virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H1N1', 'alignment_file':'data/H1N1_gisaid_epiflu_sequence.fasta', 'outgroup':'A/Tokyo/1/51', 'time_interval':(1990,2010), 'force_include':'source-data/H1N1_HI_strains.txt', 'force_include_all':True, 'date_spec':'year', 'max_global':True, # sample as evenly as possible from different geographic regions 'min_freq':0.10, 'cds':[0,None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) #'clade_designations': {}, 'auspice_prefix':'H1N1_HI_', 'HI_fname':'source-data/H1N1_HI_titers.txt', 'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, HI', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '3c2.a, 3c3.a'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, }) class H1N1_filter(flu_filter): def __init__(self,min_length = 987, **kwargs): '''
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus': 'H1N1pdm', 'alignment_file': '/Users/yujia_zhou/Documents/Work/H9_nextflu-master/augur/src/data/H1N1pdm_gisaid_epiflu_sequence.fasta', 'outgroup': 'A/Swine/Indiana/P12439/00', 'force_include': '/Users/yujia_zhou/Documents/Work/H9_nextflu-master/augur/src/data/H1N1pdm_HI_strains.txt', 'force_include_all': True, 'date_spec': 'year', 'max_global': True, # sample as evenly as possible from different geographic regions 'cds': [0, None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide ''''clade_designations': { '2': [('HA1', 125, 'N'), ('HA1', 134 ,'A'), ('HA1', 183, 'S'), ('HA1', 31,'D'), ('HA1', 172,'N'), ('HA1', 186,'T')], '3': [('HA1', 134 ,'T'), ('HA1', 183, 'P')], '4': [('HA1', 125, 'D'), ('HA1', 134 ,'A'), ('HA1', 183, 'S')], '5': [('HA1', 87, 'N'), ('HA1', 205, 'K'), ('HA1', 216, 'V'), ('HA1', 149, 'L')], '6': [('HA1', 185,'T'), ('HA1', 97, 'N'), ('HA1', 197, 'A')], '6c':[('HA1', 234,'I'), ('HA1', 97, 'N'), ('HA1', 197, 'A'), ('HA1', 283,'E')], '6b':[('HA1', 163,'Q'), ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E')], '7': [('HA1', 143,'G'), ('HA1', 97, 'D'), ('HA1', 197, 'T')], '8': [('HA1', 186,'T'), ('HA1', 272,'A')], '84N':[('HA1', 163,'Q'), ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E'), ('SigPep', 13, 'T'), ('HA1', 84, 'N')] },''' 'HI_fname': '/Users/yujia_zhou/Documents/Work/H9_nextflu-master/augur/src/data/H1N1pdm_HI_titers.txt', 'auspice_prefix': 'H1N1pdm_', 'html_vars': { 'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, HI_dist', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '6b, 6c' }, 'js_vars': { 'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn': 2 }, 'layout': 'auspice', })
from process import process, virus_config from Bio import SeqIO from Bio.Seq import Seq from Bio.Align import MultipleSeqAlignment import numpy as np from itertools import izip std_outgroup_file = '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/outgroups.fasta' virus_config.update({ # data source and sequence parsing/cleaning/processing 'fasta_fields': { 0: 'strain', 1: 'date', 2: 'isolate_id', 3: 'passage', 4: 'subtype', 5: 'ori_lab', 6: 'sub_lab', 7: 'submitter' }, 'cds': [0, None], # define the HA start i n 0 numbering 'auspice_prefix': 'H4_', 'verbose': 3 }) class mutation_tree(process, flu_filter, tree_refine, virus_clean): """docstring for mutation_tree""" def __init__(self, aln_fname, outgroup, outdir='./',
] else '0' for pos in xrange(1,1725)]) receptor_binding_sites = [159,169,170,172,173,203,207] virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'Vic', 'alignment_file':'data/Vic_gisaid_epiflu_sequence.fasta', 'outgroup':'B/HongKong/02/1993', #'force_include':'source-data/HI_strains.txt', 'force_include_all':False, 'max_global':True, # sample as evenly as possible from different geographic regions # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide 'clade_designations': { '1A': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K')], '1B': [('HA1', 75,'K'), ('HA1', 58, 'P'), ('HA1', 165, 'K')] }, 'html_vars': {'coloring': 'lbi, dfreq, region, date', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '1A, 1B'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, }) class BVic_filter(flu_filter): def __init__(self,min_length = 987, **kwargs): ''' parameters
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus': 'H3N2', 'alignment_file': 'data/H3N2_gisaid_epiflu_sequence.fasta', 'outgroup': 'A/Beijing/32/1992', #'force_include':'source-data/HI_strains.txt', 'force_include_all': False, 'max_global': True, # sample as evenly as possible from different geographic regions 'cds': [0, None], # define the HA1 start i n 0 numbering 'n_iqd': 8, 'min_mutation_frequency': 0.1, # define relevant clades in canonical HA1 numbering (+1) # numbering starting at HA1 start, adding sp to obtain numbering from methionine 'clade_designations': { "3c3.a": [('HA1', 128, 'A'), ('HA1', 142, 'G'), ('HA1', 159, 'S')], "3c3": [('HA1', 128, 'A'), ('HA1', 142, 'G'), ('HA1', 159, 'F')], "3c2.a": [('HA1', 144, 'S'), ('HA1', 159, 'Y'), ('HA1', 225, 'D'), ('HA1', 311, 'H'), ('HA2', 160, 'N')], "3c2": [('HA1', 144, 'N'), ('HA1', 159, 'F'), ('HA1', 225, 'N'), ('HA2', 160, 'N'), ('HA1', 142, 'R')], "3c3.b": [('HA1', 83, 'R'), ('HA1', 261, 'Q'), ('HA1', 62, 'K'), ('HA1', 122, 'D')] }, 'html_vars': { 'coloring': 'ep, ne, rb, lbi, dfreq, region, date', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '3c2.a, 3c3.a' }, 'js_vars': { 'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn': 2 }, })
sp = 17 virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus': 'H1N1', 'alignment_file': 'data/H1N1_gisaid_epiflu_sequence.fasta', 'outgroup': 'A/Tokyo/1/51', 'time_interval': (1990, 2010), 'force_include': 'source-data/H1N1_HI_strains.txt', 'force_include_all': True, 'date_spec': 'year', 'max_global': True, # sample as evenly as possible from different geographic regions 'min_freq': 0.10, 'cds': [0, None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) #'clade_designations': {}, 'auspice_prefix': 'H1N1_HI_', 'HI_fname': 'source-data/H1N1_HI_titers.txt', 'html_vars': { 'coloring': 'ep, ne, rb, lbi, dfreq, region, date, HI', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '3c2.a, 3c3.a' }, 'js_vars': { 'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn': 2 }, })
100, 132, 139 #Cb ] else '0' for pos in xrange(1, 1725) ]) receptor_binding_sites = [x - 1 for x in [159, 169, 170, 172, 173, 203, 207]] virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus': 'H1N1', 'alignment_file': 'data/H1N1_gisaid_epiflu_sequence.fasta', 'outgroup': 'A/Tokyo/1/51', 'time_interval': (1990, 2010), #'force_include':'source-data/HI_strains.txt', 'force_include_all': False, 'max_global': True, # sample as evenly as possible from different geographic regions 'cds': [0, None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) 'clade_designations': {}, 'auspice_prefix': 'H1N1_', }) class H1N1_filter(flu_filter): def __init__(self, min_length=987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable '''
receptor_binding_sites = [159, 169, 170, 172, 173, 203, 207] virus_config.update( { # data source and sequence parsing/cleaning/processing "virus": "Vic", "alignment_file": "data/Vic_gisaid_epiflu_sequence.fasta", "outgroup": "B/HongKong/02/1993", #'force_include':'source-data/HI_strains.txt', "force_include_all": False, "max_global": True, # sample as evenly as possible from different geographic regions "cds": [11, None], # define the translation start in 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide "clade_designations": { "1A": [(90, "K"), (73, "L"), (180, "K"), (604, "S")], "1B": [(90, "K"), (73, "P"), (180, "K")], }, "html_vars": { "coloring": "lbi, dfreq, region, date", "gtplaceholder": "HA1 positions...", "freqdefault": "1A, 1B", }, "js_vars": {"LBItau": 0.0005, "LBItime_window": 0.5, "dfreq_dn": 2}, } ) class BVic_filter(flu_filter): def __init__(self, min_length=987, **kwargs):
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H1N1pdm', 'alignment_file':'data/H1N1pdm_gisaid_epiflu_sequence.fasta', 'outgroup':'A/Swine/Indiana/P12439/00', #'force_include':'source-data/HI_strains.txt', 'force_include_all':False, 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide 'clade_designations': { '2':[(142, 'N'), (151 ,'A'), (200, 'S'), (48,'D'), (189,'N'), (203,'T')], '3':[(151 ,'T'), (200, 'P')], '4':[(142, 'D'), (151 ,'A'), (200, 'S')], '5':[(104, 'N'), (222, 'K'), (233, 'V'), (266, 'L')], '6':[(202,'T'), (114, 'N'), (214, 'A')], '6c':[(251,'I'), (114, 'N'), (214, 'A'), (300,'E')], '6b':[(180,'Q'), (273, 'T'), (214, 'A'), (300,'E')], '7':[(160,'G'), (114, 'D'), (214, 'T')], '8':[(203,'T'), (289,'A')], }, 'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '6b, 6c'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, })
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus': 'H10', 'alignment_file': '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H10_gisaid_epiflu_sequence.fasta', 'outgroup': 'A/mallard-duck/ALB/302/1977', #'force_include':'H10_HI_strains.txt', 'force_include_all': True, 'date_spec': 'year', 'max_global': True, # sample as evenly as possible from different geographic regions # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide ''''clade_designations': { '1A': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K')], '1B': [('HA1', 75,'K'), ('HA1', 58, 'P'), ('HA1', 165, 'K')], '117V': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K'), ('HA1', 129, 'D'), ('HA1', 117, 'V')] },''' 'auspice_prefix': 'H10_', 'HI_fname': 'H10_HI_titers.txt', 'html_vars': { 'coloring': 'lbi, dfreq, region, date, cHI, HI_dist', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '1A, 1B' }, 'js_vars': { 'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn': 2 }, 'layout': 'auspice', })
from Bio.Seq import Seq from Bio.Align import MultipleSeqAlignment import numpy as np from itertools import izip virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'Zika', 'fasta_fields':{0:'strain', 2:'accession', 3:'date', 5:'country', 5:'region', 8:'db', 10:'authors'}, # 0 1 2 3 4 5 6 7 8 9 10 #>BeH818995|zika|KU365777|2015-XX-XX|south_america|brazil|?|?|genbank|genome|Azevedo et al 'alignment_file':'data/zika.fasta', 'outgroup':'H/PF/2013', 'aggregate_regions':[('global', None)], 'force_include_all':False, 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide 'min_mutation_frequency':0.499, 'min_genotype_frequency':0.499, 'html_vars': {'coloring': 'lbi, dfreq, region, date', 'gtplaceholder': 'Genomic positions...', 'freqdefault': ''}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, }) class zika_filter(virus_filter): def __init__(self,min_length = 987, **kwargs): '''
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus': 'H7', 'alignment_file': '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_gisaid_epiflu_sequence.fasta', 'outgroup': 'A/equine/Prague/2/1956', #'force_include':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_HI_strains.txt', 'force_include_all': False, 'date_spec': 'year', 'max_global': True, # sample as evenly as possible from different geographic regions 'cds': [0, None], # define the translation start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide ''''clade_designations': { '2': [('HA1', 48,'K'), ('HA1', 108, 'A'), ('HA1', 150, 'S')], '3': [('HA1', 48,'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I')], '3a': [('HA1', 37,'A'), ('HA1', 298, 'E'), ('HA1', 48,'R'), ('HA1', 105, 'P'), ('HA1', 150, 'I')], '172Q': [('HA1', 48,'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I'), ('HA1', 116, 'K'), ('HA1', 172, 'Q')] },''' 'auspice_prefix': 'H7_', #'HI_fname':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_HI_titers.txt', 'html_vars': { 'coloring': 'region, date', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '' }, 'js_vars': { 'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn': 2 }, 'layout': 'auspice', })
receptor_binding_sites = [159, 169, 170, 172, 173, 203, 207] virus_config.update( { # data source and sequence parsing/cleaning/processing "virus": "Yam", "alignment_file": "data/Yam_gisaid_epiflu_sequence.fasta", "outgroup": "B/Singapore/11/94", #'force_include':'source-data/HI_strains.txt', "force_include_all": False, "max_global": True, # sample as evenly as possible from different geographic regions "cds": [11, None], # define the translation start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide "clade_designations": { "2": [("HA1", 48, "K"), ("HA1", 108, "A"), ("HA1", 150, "S")], "3": [("HA1", 48, "R"), ("HA1", 108, "P"), ("HA1", 150, "I")], "3a": [("HA1", 37, "A"), ("HA1", 298, "E"), ("HA1", 48, "R"), ("HA1", 105, "P"), ("HA1", 150, "I")], }, "html_vars": { "coloring": "lbi, dfreq, region, date", "gtplaceholder": "HA1 positions...", "freqdefault": "2, 3, 3a", }, "js_vars": {"LBItau": 0.0005, "LBItime_window": 0.5, "dfreq_dn": 2}, } ) class BYam_filter(flu_filter):
import numpy as np from itertools import izip path_to_augur = './' + '/'.join(sys.argv[0].split('/')[:-2]) std_outgroup_file_blast = path_to_augur + '/source-data/outgroups.fasta' std_outgroup_file_nuc = path_to_augur + '/source-data/outgroups_nucleotides_unspliced.fasta' no_raxml_threshold = 15000 virus_config.update({ # data source and sequence parsing/cleaning/processing 'fasta_fields': { 0: 'strain', 1: 'isolate_id', 2: 'date', 3: 'subtype', 4: 'country', 5: 'region', 7: 'host', 6: 'passage' }, 'cds': [0, None], # define the HA start i n 0 numbering 'verbose': 3 }) def get_date(strain): from datetime import datetime date_str = strain.split('|')[2] try: collection_date = datetime.strptime(date_str, '%Y-%m-%d') return collection_date.strftime('%Y-%m-%d')
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H4', 'alignment_file':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H4_gisaid_epiflu_sequence.fasta', 'outgroup':'A/Duck/Czechoslovakia/1956', #'force_include':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H4_HI_strains.txt', 'force_include_all':True, 'date_spec':'year', 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) """ 'clade_designations': { '2': [('HA1', 125, 'N'), ('HA1', 134 ,'A'), ('HA1', 183, 'S'), ('HA1', 31,'D'), ('HA1', 172,'N'), ('HA1', 186,'T')], '3': [('HA1', 134 ,'T'), ('HA1', 183, 'P')], '4': [('HA1', 125, 'D'), ('HA1', 134 ,'A'), ('HA1', 183, 'S')], '5': [('HA1', 87, 'N'), ('HA1', 205, 'K'), ('HA1', 216, 'V'), ('HA1', 149, 'L')], '6': [('HA1', 185,'T'), ('HA1', 97, 'N'), ('HA1', 197, 'A')], '6c':[('HA1', 234,'I'), ('HA1', 97, 'N'), ('HA1', 197, 'A'), ('HA1', 283,'E')], '6b':[('HA1', 163,'Q'), ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E')], '7': [('HA1', 143,'G'), ('HA1', 97, 'D'), ('HA1', 197, 'T')], '8': [('HA1', 186,'T'), ('HA1', 272,'A')], '84N':[('HA1', 163,'Q'), ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E'), ('SigPep', 13, 'T'), ('HA1', 84, 'N')] }, """ #'HI_fname':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H4_HI_titers.txt', 'auspice_prefix':'H4_', 'html_vars': {'coloring': 'region, date', 'gtplaceholder': 'HA1 positions...', 'freqdefault': ''}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, 'layout':'auspice', })
from Bio.Seq import Seq from Bio.Align import MultipleSeqAlignment import numpy as np from itertools import izip virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'Zika', 'fasta_fields':{0:'strain', 2:'accession', 3:'date', 5:'country', 5:'region', 8:'db', 10:'authors'}, # 0 1 2 3 4 5 6 7 8 9 10 #>BeH818995|Zika|KU365777|2015-07-21|SouthAmerica|Brazil|Para|Belem|Genbank|Genome|Azevedo et al|?| 'alignment_file':'data/Zika.fasta', 'outgroup':'H/PF/2013', 'aggregate_regions':[('global', None)], 'force_include_all':False, 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide 'min_mutation_frequency':0.499, 'min_genotype_frequency':0.499, 'html_vars': {'coloring': 'lbi, dfreq, region, date', 'gtplaceholder': 'Genomic positions...', 'freqdefault': ''}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, }) class zika_filter(virus_filter): def __init__(self,min_length = 987, **kwargs): '''
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H3N2', 'alignment_file': 'data/H3N2_IRD_sequence_october_clean.fasta', # data/H3N2_gisaid_epiflu_sequence.fasta # data/H3N2_IRD_sequence_clean.fasta 'fasta_fields': {0: 'strain', 1: 'isolate_id', 3: 'passage', 5: 'date', 7: 'lab', 8: "accession"}, #'alignment_file':'data/H3N2_gisaid_epiflu_sequence.fasta', 'outgroup':'A/Beijing/32/1992', 'force_include':'data/H3N2_HI_strains.txt', 'force_include_all':False, 'date_spec':'year', 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the HA1 start i n 0 numbering 'n_iqd':5, 'min_mutation_frequency':0.01, # define relevant clades in canonical HA1 numbering (+1) # numbering starting at HA1 start, adding sp to obtain numbering from methionine 'clade_designations': { "3c3.a":[('HA1', 128,'A'), ('HA1',142,'G'), ('HA1',159,'S')], "3c3": [('HA1', 128,'A'), ('HA1',142,'G'), ('HA1',159,'F')], "3c2.a": [('HA1', 144,'S'), ('HA1',159,'Y'), ('HA1',225,'D'), ('HA1', 311,'H'), ('HA2', 160,'N')], "3c2": [('HA1', 144,'N'), ('HA1',159,'F'), ('HA1',225,'N'), ('HA2', 160,'N'), ('HA1', 142, 'R')], "3c3.b": [('HA1', 83,'R'), ('HA1',261,'Q'), ('HA1',62,'K'), ('HA1', 122,'D')] }, 'epitope_masks_fname':'source-data/H3N2_epitope_masks.tsv', 'epitope_mask_version':'wolf', 'HI_fname':'data/H3N2_HI_titers.txt', 'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, HI_dist', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '3c2.a, 3c3.a, 3c3.b'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, 'excluded_tables': ['NIMR_Sep2012_08.csv'], #, 'nimr-sep-2010-table8', 'nimr-sep-2010-table8','NIMR_Sep2012_11.csv'], 'layout':'auspice', 'min_aamuts': 1, # 'predictors': ['dfreq', 'cHI'] # estimate 'predictors': { 'dfreq': [2.50, 2.84], 'cHI': [1.68, 0.45] } # fix predictor: [value, std deviation] })
from process import process, virus_config from Bio import SeqIO, AlignIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.Align import MultipleSeqAlignment import numpy as np from itertools import izip path_to_augur = './' + '/'.join(sys.argv[0].split('/')[:-2]) std_outgroup_file_blast = path_to_augur+'/source-data/outgroups.fasta' std_outgroup_file_nuc = path_to_augur+'/source-data/outgroups_nucleotides_unspliced.fasta' no_raxml_threshold = 15000 virus_config.update({ # data source and sequence parsing/cleaning/processing 'fasta_fields':{0:'strain', 1:'isolate_id', 2:'date', 3:'subtype', 4:'country', 5:'region', 7:'host', 6:'passage'}, 'cds':[0,None], # define the HA start i n 0 numbering 'verbose':3 }) def get_date(strain): from datetime import datetime date_str = strain.split('|')[2] try: collection_date = datetime.strptime(date_str, '%Y-%m-%d') return collection_date.strftime('%Y-%m-%d') except: collection_date = datetime.strptime(date_str[:4], '%Y') return collection_date.strftime('%Y-%m-%d') class mutation_tree(process, flu_filter, tree_refine, virus_clean): """docstring for mutation_tree"""
receptor_binding_sites = [159,169,170,172,173,203,207] ''' virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H7', 'alignment_file':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_gisaid_epiflu_sequence.fasta', 'outgroup':'A/equine/Prague/2/1956', #'force_include':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_HI_strains.txt', 'force_include_all':False, 'date_spec':'year', 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the translation start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide ''''clade_designations': { '2': [('HA1', 48,'K'), ('HA1', 108, 'A'), ('HA1', 150, 'S')], '3': [('HA1', 48,'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I')], '3a': [('HA1', 37,'A'), ('HA1', 298, 'E'), ('HA1', 48,'R'), ('HA1', 105, 'P'), ('HA1', 150, 'I')], '172Q': [('HA1', 48,'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I'), ('HA1', 116, 'K'), ('HA1', 172, 'Q')] },''' 'auspice_prefix':'H7_', #'HI_fname':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_HI_titers.txt', 'html_vars': {'coloring': 'region, date', 'gtplaceholder': 'HA1 positions...', 'freqdefault': ''}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, 'layout':'auspice', }) class H7_filter(flu_filter):
epitope_mask = np.fromstring(sp*"0"+"0000000000000000000000000000000000000000000011111011011001010011000100000001001011110011100110101000001100000100000001000110101011111101011010111110001010011111000101011011111111010010001111101110111001010001110011111111000000111110000000101010101110000000000011100100000001011011100000000000001001011000110111111000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", dtype='S1') receptor_binding_sites = map(lambda x:x+sp-1, [145, 155, 156, 158, 159, 189, 193]) virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H3N2', 'alignment_file':'data/H3N2_gisaid_epiflu_sequence.fasta', 'outgroup':'A/Beijing/32/1992', #'force_include':'source-data/HI_strains.txt', 'force_include_all':False, 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the HA1 start i n 0 numbering 'n_iqd':6, # define relevant clades in canonical HA1 numbering (+1) # numbering starting at HA1 start, adding sp to obtain numbering from methionine 'clade_designations': { "3c3.a":[(128+sp,'A'), (142+sp,'G'), (159+sp,'S')], "3c3": [(128+sp,'A'), (142+sp,'G'), (159+sp,'F')], "3c2.a": [(144+sp,'S'), (159+sp,'Y'), (225+sp,'D'), (311+sp,'H'), (489+sp,'N')], "3c2": [(144+sp,'N'), (159+sp,'F'), (225+sp,'N'), (489+sp,'N'), (142+sp, 'R')], "3c3.b": [(83+sp,'R'), (261+sp,'Q'), (62+sp,'K'), (122+sp,'D')] }, 'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '3c2.a, 3c3.a'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, }) class H3N2_filter(flu_filter): def __init__(self,min_length = 987, **kwargs):
receptor_binding_sites = [159,169,170,172,173,203,207] virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H10', 'alignment_file':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H10_gisaid_epiflu_sequence.fasta', 'outgroup':'A/mallard-duck/ALB/302/1977', #'force_include':'H10_HI_strains.txt', 'force_include_all':True, 'date_spec':'year', 'max_global':True, # sample as evenly as possible from different geographic regions # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide ''''clade_designations': { '1A': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K')], '1B': [('HA1', 75,'K'), ('HA1', 58, 'P'), ('HA1', 165, 'K')], '117V': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K'), ('HA1', 129, 'D'), ('HA1', 117, 'V')] },''' 'auspice_prefix':'H10_', 'HI_fname':'H10_HI_titers.txt', 'html_vars': {'coloring': 'lbi, dfreq, region, date, cHI, HI_dist', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '1A, 1B'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, 'layout':'auspice', }) class H10_filter(flu_filter): def __init__(self,min_length = 0, **kwargs):
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus': 'Yam', 'alignment_file': 'data/yam.fasta', 'outgroup': 'B/Singapore/11/94', 'force_include': 'data/yam_hi_strains.tsv', 'force_include_all': False, 'date_spec': 'year', 'max_global': True, # sample as evenly as possible from different geographic regions 'cds': [11, None], # define the translation start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide 'clade_designations': { '2': [('HA1', 48, 'K'), ('HA1', 108, 'A'), ('HA1', 150, 'S')], '3': [('HA1', 48, 'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I')], '3a': [('HA1', 37, 'A'), ('HA1', 298, 'E'), ('HA1', 48, 'R'), ('HA1', 105, 'P'), ('HA1', 150, 'I')], '172Q': [('HA1', 48, 'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I'), ('HA1', 116, 'K'), ('HA1', 172, 'Q')] }, 'HI_fname': 'data/yam_hi_titers.tsv', 'html_vars': { 'coloring': 'lbi, dfreq, region, date, cHI, HI_dist', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '2, 3, 3a' }, 'js_vars': { 'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn': 2 }, 'layout': 'auspice', })
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H9', 'alignment_file':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_gisaid_epiflu_sequence.fasta', 'outgroup':'A/duck/HongKong/147/1977', #'force_include':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_HI_strains.txt', 'force_include_all':False, 'date_spec':'year', 'max_global':True, # sample as evenly as possible from different geographic regions #'max_globalh':True, 'cds':[0,None], # define the HA1 start i n 0 numbering 'n_iqd':5, 'min_mutation_frequency':0.01, # define relevant clades in canonical HA1 numbering (+1) # numbering starting at HA1 start, adding sp to obtain numbering from methionine ''''clade_designations': { "Y439":[('HA1',122,'F'), ('HA1',353,'P')], "Korea":[('HA1',107,'M'), ('HA1',122,'F'), ('HA1',127,'R'), ('HA1',130,'K'), ('HA1',132,'L'), ('HA1',134,'L'), ('HA1',179,'D'), ('HA1',212,'I'), ('HA1',299,'T'), ('HA1',353,'P'), ('HA1',473,'K')], "G1":[('HA1',353,'P'), ('HA1',473,'K')], "Ck-Bei":[('HA1',107,'M'), ('HA1',299,'T'), ('HA1',473,'K')], "G9":[('HA1',107,'M'), ('HA1',299,'T'), ('HA1',473,'K')], "Y280":[('HA1',299,'T'), ('HA1',473,'K')] },''' #'epiope_masks_fname':'/Users/yujiazhou/Documents/FluProject/augur/source-data/H9_epitope_masks.tsv', #'epitope_mask_version':'wolf', #'HI_fname':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_HI_titers.txt', 'auspice_prefix':'H9_', 'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, host', 'gtplaceholder': 'HA1 positions...', 'freqdefault': 'Y439, Korea, G1, Ck-Bei, G9, Y280'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, 'excluded_tables': ['NIMR_Sep2012_08.csv'], #, 'nimr-sep-2010-table8', 'nimr-sep-2010-table8','NIMR_Sep2012_11.csv'], 'layout':'auspice', 'min_aamuts': 1, # 'predictors': ['dfreq', 'cHI'] # estimate 'predictors': { 'dfreq': [2.50, 2.84], 'cHI': [1.68, 0.45] } # fix predictor: [value, std deviation] })
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H3N2', 'alignment_file':'data/h3n2.fasta', 'outgroup':'A/Beijing/32/1992', 'force_include':'data/h3n2_hi_strains.tsv', 'force_include_all':False, 'date_spec':'year', 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the HA1 start i n 0 numbering 'n_iqd':5, 'min_mutation_frequency':0.01, # define relevant clades in canonical HA1 numbering (+1) # numbering starting at HA1 start, adding sp to obtain numbering from methionine 'clade_designations': { "3c3.a":[('HA1',128,'A'), ('HA1',142,'G'), ('HA1',159,'S')], "3c3": [('HA1',128,'A'), ('HA1',142,'G'), ('HA1',159,'F')], "3c2.a": [('HA1',144,'S'), ('HA1',159,'Y'), ('HA1',225,'D'), ('HA1',311,'H'), ('HA2',160,'N')], "171K": [('HA1',144,'S'), ('HA1',159,'Y'), ('HA1',171,'K'), ('HA1',225,'D'), ('HA1',311,'H'), ('HA2',77,'V'), ('HA2',155,'E'), ('HA2',160,'N')], "3c2": [('HA1',144,'N'), ('HA1',159,'F'), ('HA1',225,'N'), ('HA2',160,'N'), ('HA1',142,'R')], "3c3.b": [('HA1',83,'R'), ('HA1',261,'Q'), ('HA1',62,'K'), ('HA1',122,'D')] }, 'epitope_masks_fname':'source-data/H3N2_epitope_masks.tsv', 'epitope_mask_version':'wolf', 'HI_fname':'data/h3n2_hi_titers.tsv', 'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, HI_dist', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '3c2.a, 3c3.a, 3c3.b'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, 'excluded_tables': ['NIMR_Sep2012_08.csv'], #, 'nimr-sep-2010-table8', 'nimr-sep-2010-table8','NIMR_Sep2012_11.csv'], 'layout':'auspice', 'min_aamuts': 1, # 'predictors': ['dfreq', 'cHI'] # estimate 'predictors': { 'dfreq': [2.50, 2.84], 'cHI': [1.68, 0.45] } # fix predictor: [value, std deviation] })
170,173,174,177,206,207,210,211,212,214,216, #Sb 183,187,191,196,221,225,254,258,288, #Ca1 154,157,158,159,161,163,238,239,242,243, #Ca2 87, 88, 90, 91, 92, 95, 96, 98, 99, 100, 132, 139 #Cb ] else '0' for pos in xrange(1,1725)]) receptor_binding_sites = [x-1 for x in [159,169,170,172,173,203,207]] virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H1N1', 'alignment_file':'data/H1N1_gisaid_epiflu_sequence.fasta', 'outgroup':'A/Tokyo/1/51', 'time_interval':(1990,2010), #'force_include':'source-data/HI_strains.txt', 'force_include_all':False, 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) 'clade_designations': {}, 'auspice_prefix':'H1N1_', }) class H1N1_filter(flu_filter): def __init__(self,min_length = 987, **kwargs): ''' parameters min_length -- minimal length for a sequence to be acceptable ''' flu_filter.__init__(self, **kwargs)
receptor_binding_sites = [159,169,170,172,173,203,207] virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'Vic', 'alignment_file':'data/vic.fasta', 'outgroup':'B/HongKong/02/1993', 'force_include':'data/vic_hi_strains.tsv', 'force_include_all':False, 'date_spec':'year', 'max_global':True, # sample as evenly as possible from different geographic regions # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide 'clade_designations': { '1A': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K')], '1B': [('HA1', 75,'K'), ('HA1', 58, 'P'), ('HA1', 165, 'K')], '117V': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K'), ('HA1', 129, 'D'), ('HA1', 117, 'V')] }, 'HI_fname':'data/vic_hi_titers.tsv', 'html_vars': {'coloring': 'lbi, dfreq, region, date, cHI, HI_dist', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '1A, 1B'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, 'layout':'auspice', }) class BVic_filter(flu_filter): def __init__(self,min_length = 987, **kwargs):
from tree_refine import tree_refine from virus_clean import virus_clean from virus_filter import flu_filter from collections import defaultdict from process import process, virus_config from Bio import SeqIO from Bio.Seq import Seq from Bio.Align import MultipleSeqAlignment import numpy as np from itertools import izip std_outgroup_file = '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/outgroups.fasta' virus_config.update({ # data source and sequence parsing/cleaning/processing 'fasta_fields':{0:'strain', 1:'date', 2:'isolate_id', 3:'passage', 4:'subtype', 5:'ori_lab', 6:'sub_lab', 7:'submitter'}, 'cds':[0,None], # define the HA start i n 0 numbering 'auspice_prefix':'H4_', 'verbose':3 }) class mutation_tree(process, flu_filter, tree_refine, virus_clean): """docstring for mutation_tree""" def __init__(self, aln_fname, outgroup, outdir = './', formats = ['pdf','svg','png'], verbose = 0, **kwargs): process.__init__(self, **kwargs) flu_filter.__init__(self, alignment_file = aln_fname, **kwargs) tree_refine.__init__(self, **kwargs) virus_clean.__init__(self, **kwargs) self.verbose = verbose self.formats = formats self.outdir = outdir.rstrip('/')+'/'
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'H1N1pdm', 'alignment_file':'data/H1N1pdm_gisaid_epiflu_sequence.fasta', 'outgroup':'A/Swine/Indiana/P12439/00', 'force_include':'data/H1N1pdm_HI_strains.txt', 'force_include_all':False, 'date_spec':'year', 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[0,None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide 'clade_designations': { '2': [('HA1', 125, 'N'), ('HA1', 134 ,'A'), ('HA1', 183, 'S'), ('HA1', 31,'D'), ('HA1', 172,'N'), ('HA1', 186,'T')], '3': [('HA1', 134 ,'T'), ('HA1', 183, 'P')], '4': [('HA1', 125, 'D'), ('HA1', 134 ,'A'), ('HA1', 183, 'S')], '5': [('HA1', 87, 'N'), ('HA1', 205, 'K'), ('HA1', 216, 'V'), ('HA1', 149, 'L')], '6': [('HA1', 185,'T'), ('HA1', 97, 'N'), ('HA1', 197, 'A')], '6c':[('HA1', 234,'I'), ('HA1', 97, 'N'), ('HA1', 197, 'A'), ('HA1', 283,'E')], '6b':[('HA1', 163,'Q'), ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E')], '7': [('HA1', 143,'G'), ('HA1', 97, 'D'), ('HA1', 197, 'T')], '8': [('HA1', 186,'T'), ('HA1', 272,'A')], '6b.1':[('HA1', 163,'Q'), ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283, 'E'), ('SigPep', 13, 'T'), ('HA1', 84, 'N'), ('HA1', 162, 'N')], '6b.2':[('HA1', 163,'Q'), ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283, 'E'), ('HA2', 164, 'G'), ('HA1', 152, 'T'), ('HA2', 174, 'E')] }, 'HI_fname':'data/H1N1pdm_HI_titers.txt', 'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, HI_dist', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '6b, 6c'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, 'layout':'auspice', })
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus': 'H9', 'alignment_file': '/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_gisaid_epiflu_sequence.fasta', 'outgroup': 'A/duck/HongKong/147/1977', #'force_include':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_HI_strains.txt', 'force_include_all': False, 'date_spec': 'year', 'max_global': True, # sample as evenly as possible from different geographic regions #'max_globalh':True, 'cds': [0, None], # define the HA1 start i n 0 numbering 'n_iqd': 5, 'min_mutation_frequency': 0.01, # define relevant clades in canonical HA1 numbering (+1) # numbering starting at HA1 start, adding sp to obtain numbering from methionine ''''clade_designations': { "Y439":[('HA1',122,'F'), ('HA1',353,'P')], "Korea":[('HA1',107,'M'), ('HA1',122,'F'), ('HA1',127,'R'), ('HA1',130,'K'), ('HA1',132,'L'), ('HA1',134,'L'), ('HA1',179,'D'), ('HA1',212,'I'), ('HA1',299,'T'), ('HA1',353,'P'), ('HA1',473,'K')], "G1":[('HA1',353,'P'), ('HA1',473,'K')], "Ck-Bei":[('HA1',107,'M'), ('HA1',299,'T'), ('HA1',473,'K')], "G9":[('HA1',107,'M'), ('HA1',299,'T'), ('HA1',473,'K')], "Y280":[('HA1',299,'T'), ('HA1',473,'K')] },''' #'epiope_masks_fname':'/Users/yujiazhou/Documents/FluProject/augur/source-data/H9_epitope_masks.tsv', #'epitope_mask_version':'wolf', #'HI_fname':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_HI_titers.txt', 'auspice_prefix': 'H9_', 'html_vars': { 'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, host', 'gtplaceholder': 'HA1 positions...', 'freqdefault': 'Y439, Korea, G1, Ck-Bei, G9, Y280' }, 'js_vars': { 'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn': 2 }, 'excluded_tables': [ 'NIMR_Sep2012_08.csv' ], #, 'nimr-sep-2010-table8', 'nimr-sep-2010-table8','NIMR_Sep2012_11.csv'], 'layout': 'auspice', 'min_aamuts': 1, # 'predictors': ['dfreq', 'cHI'] # estimate 'predictors': { 'dfreq': [2.50, 2.84], 'cHI': [1.68, 0.45] } # fix predictor: [value, std deviation] })
else '0' for pos in xrange(1,1725)]) receptor_binding_sites = [159,169,170,172,173,203,207] virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus':'Yam', 'alignment_file':'data/Yam_gisaid_epiflu_sequence.fasta', 'outgroup':'B/Singapore/11/94', #'force_include':'source-data/HI_strains.txt', 'force_include_all':False, 'max_global':True, # sample as evenly as possible from different geographic regions 'cds':[11,None], # define the translation start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide 'clade_designations': { '2': [(63,'K'), (123, 'A'), (165, 'S')], '3': [(63,'R'), (123, 'P'), (165, 'I')], '3a': [(52,'A'), (313, 'E'), (63,'R'), (123, 'P'), (165, 'I')], }, 'html_vars': {'coloring': 'lbi, dfreq, region, date', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '2, 3, 3a'}, 'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2}, }) class BYam_filter(flu_filter): def __init__(self,min_length = 987, **kwargs): '''
virus_config.update({ # data source and sequence parsing/cleaning/processing 'virus': 'H1N1pdm', 'alignment_file': 'data/H1N1pdm_gisaid_epiflu_sequence.fasta', 'outgroup': 'A/Swine/Indiana/P12439/00', #'force_include':'source-data/HI_strains.txt', 'force_include_all': False, 'max_global': True, # sample as evenly as possible from different geographic regions 'cds': [0, None], # define the HA start i n 0 numbering # define relevant clades in canonical HA1 numbering (+1) # numbering starting at methionine including the signal peptide 'clade_designations': { '2': [('HA1', 125, 'N'), ('HA1', 134, 'A'), ('HA1', 183, 'S'), ('HA1', 31, 'D'), ('HA1', 172, 'N'), ('HA1', 186, 'T')], '3': [('HA1', 134, 'T'), ('HA1', 183, 'P')], '4': [('HA1', 125, 'D'), ('HA1', 134, 'A'), ('HA1', 183, 'S')], '5': [('HA1', 87, 'N'), ('HA1', 205, 'K'), ('HA1', 216, 'V'), ('HA1', 149, 'L')], '6': [('HA1', 185, 'T'), ('HA1', 97, 'N'), ('HA1', 197, 'A')], '6c': [('HA1', 234, 'I'), ('HA1', 97, 'N'), ('HA1', 197, 'A'), ('HA1', 283, 'E')], '6b': [('HA1', 163, 'Q'), ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283, 'E')], '7': [('HA1', 143, 'G'), ('HA1', 97, 'D'), ('HA1', 197, 'T')], '8': [('HA1', 186, 'T'), ('HA1', 272, 'A')], }, 'html_vars': { 'coloring': 'ep, ne, rb, lbi, dfreq, region, date', 'gtplaceholder': 'HA1 positions...', 'freqdefault': '6b, 6c' }, 'js_vars': { 'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn': 2 }, })