receptor_binding_sites = [x-1 for x in [159,169,170,172,173,203,207]]



sp=17
virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H1N1',
	'alignment_file':'data/H1N1_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/Tokyo/1/51',
	'time_interval':(1990,2010),
	'force_include':'source-data/H1N1_HI_strains.txt',
	'force_include_all':True,
	'date_spec':'year',
	'max_global':True,   # sample as evenly as possible from different geographic regions 
	'min_freq':0.10,
	'cds':[0,None], # define the HA start i n 0 numbering
	# define relevant clades in canonical HA1 numbering (+1)
	#'clade_designations': {},
	'auspice_prefix':'H1N1_HI_',
	'HI_fname':'source-data/H1N1_HI_titers.txt',
	'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, HI',
				   'gtplaceholder': 'HA1 positions...',
					'freqdefault': '3c2.a, 3c3.a'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	})


class H1N1_filter(flu_filter):
	def __init__(self,min_length = 987, **kwargs):
		'''
示例#2
0
virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'virus':
    'H1N1pdm',
    'alignment_file':
    '/Users/yujia_zhou/Documents/Work/H9_nextflu-master/augur/src/data/H1N1pdm_gisaid_epiflu_sequence.fasta',
    'outgroup':
    'A/Swine/Indiana/P12439/00',
    'force_include':
    '/Users/yujia_zhou/Documents/Work/H9_nextflu-master/augur/src/data/H1N1pdm_HI_strains.txt',
    'force_include_all':
    True,
    'date_spec':
    'year',
    'max_global':
    True,  # sample as evenly as possible from different geographic regions
    'cds': [0, None],  # define the HA start i n 0 numbering

    # define relevant clades in canonical HA1 numbering (+1)
    # numbering starting at methionine including the signal peptide
    ''''clade_designations': {
		'2': [('HA1', 125, 'N'), ('HA1', 134 ,'A'), ('HA1', 183, 'S'), ('HA1', 31,'D'), ('HA1', 172,'N'), ('HA1', 186,'T')],
		'3': [('HA1', 134 ,'T'), ('HA1', 183, 'P')],
		'4': [('HA1', 125, 'D'), ('HA1', 134 ,'A'), ('HA1', 183, 'S')],
		'5': [('HA1', 87, 'N'), ('HA1', 205, 'K'), ('HA1', 216, 'V'), ('HA1', 149, 'L')],
		'6': [('HA1', 185,'T'),  ('HA1', 97, 'N'), ('HA1', 197, 'A')],
		'6c':[('HA1', 234,'I'),  ('HA1', 97, 'N'), ('HA1', 197, 'A'), ('HA1', 283,'E')],
		'6b':[('HA1', 163,'Q'),  ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E')],
		'7': [('HA1', 143,'G'),  ('HA1', 97, 'D'), ('HA1', 197, 'T')],
		'8': [('HA1', 186,'T'),  ('HA1', 272,'A')],
		'84N':[('HA1', 163,'Q'),  ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E'), ('SigPep', 13, 'T'), ('HA1', 84, 'N')]
		},'''
    'HI_fname':
    '/Users/yujia_zhou/Documents/Work/H9_nextflu-master/augur/src/data/H1N1pdm_HI_titers.txt',
    'auspice_prefix':
    'H1N1pdm_',
    'html_vars': {
        'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, HI_dist',
        'gtplaceholder': 'HA1 positions...',
        'freqdefault': '6b, 6c'
    },
    'js_vars': {
        'LBItau': 0.0005,
        'LBItime_window': 0.5,
        'dfreq_dn': 2
    },
    'layout':
    'auspice',
})
示例#3
0
from process import process, virus_config
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Align import MultipleSeqAlignment
import numpy as np
from itertools import izip

std_outgroup_file = '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/outgroups.fasta'
virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'fasta_fields': {
        0: 'strain',
        1: 'date',
        2: 'isolate_id',
        3: 'passage',
        4: 'subtype',
        5: 'ori_lab',
        6: 'sub_lab',
        7: 'submitter'
    },
    'cds': [0, None],  # define the HA start i n 0 numbering
    'auspice_prefix': 'H4_',
    'verbose': 3
})


class mutation_tree(process, flu_filter, tree_refine, virus_clean):
    """docstring for mutation_tree"""
    def __init__(self,
                 aln_fname,
                 outgroup,
                 outdir='./',
示例#4
0
									   ]
						else '0' for pos in xrange(1,1725)])

receptor_binding_sites = [159,169,170,172,173,203,207]


virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'Vic',
	'alignment_file':'data/Vic_gisaid_epiflu_sequence.fasta',
	'outgroup':'B/HongKong/02/1993',
	#'force_include':'source-data/HI_strains.txt',
	'force_include_all':False,
	'max_global':True,   # sample as evenly as possible from different geographic regions 
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at methionine including the signal peptide
	'clade_designations': {
		'1A': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K')],
		'1B': [('HA1', 75,'K'), ('HA1', 58, 'P'), ('HA1', 165, 'K')]
	},
	'html_vars': {'coloring': 'lbi, dfreq, region, date',
				  'gtplaceholder': 'HA1 positions...',
				  'freqdefault': '1A, 1B'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	})


class BVic_filter(flu_filter):
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
示例#5
0
virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'virus': 'H3N2',
    'alignment_file': 'data/H3N2_gisaid_epiflu_sequence.fasta',
    'outgroup': 'A/Beijing/32/1992',
    #'force_include':'source-data/HI_strains.txt',
    'force_include_all': False,
    'max_global':
    True,  # sample as evenly as possible from different geographic regions 
    'cds': [0, None],  # define the HA1 start i n 0 numbering
    'n_iqd': 8,
    'min_mutation_frequency': 0.1,
    # define relevant clades in canonical HA1 numbering (+1)
    # numbering starting at HA1 start, adding sp to obtain numbering from methionine
    'clade_designations': {
        "3c3.a": [('HA1', 128, 'A'), ('HA1', 142, 'G'), ('HA1', 159, 'S')],
        "3c3": [('HA1', 128, 'A'), ('HA1', 142, 'G'), ('HA1', 159, 'F')],
        "3c2.a": [('HA1', 144, 'S'), ('HA1', 159, 'Y'), ('HA1', 225, 'D'),
                  ('HA1', 311, 'H'), ('HA2', 160, 'N')],
        "3c2": [('HA1', 144, 'N'), ('HA1', 159, 'F'), ('HA1', 225, 'N'),
                ('HA2', 160, 'N'), ('HA1', 142, 'R')],
        "3c3.b": [('HA1', 83, 'R'), ('HA1', 261, 'Q'), ('HA1', 62, 'K'),
                  ('HA1', 122, 'D')]
    },
    'html_vars': {
        'coloring': 'ep, ne, rb, lbi, dfreq, region, date',
        'gtplaceholder': 'HA1 positions...',
        'freqdefault': '3c2.a, 3c3.a'
    },
    'js_vars': {
        'LBItau': 0.0005,
        'LBItime_window': 0.5,
        'dfreq_dn': 2
    },
})
示例#6
0
sp = 17
virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'virus': 'H1N1',
    'alignment_file': 'data/H1N1_gisaid_epiflu_sequence.fasta',
    'outgroup': 'A/Tokyo/1/51',
    'time_interval': (1990, 2010),
    'force_include': 'source-data/H1N1_HI_strains.txt',
    'force_include_all': True,
    'date_spec': 'year',
    'max_global':
    True,  # sample as evenly as possible from different geographic regions 
    'min_freq': 0.10,
    'cds': [0, None],  # define the HA start i n 0 numbering
    # define relevant clades in canonical HA1 numbering (+1)
    #'clade_designations': {},
    'auspice_prefix': 'H1N1_HI_',
    'HI_fname': 'source-data/H1N1_HI_titers.txt',
    'html_vars': {
        'coloring': 'ep, ne, rb, lbi, dfreq, region, date, HI',
        'gtplaceholder': 'HA1 positions...',
        'freqdefault': '3c2.a, 3c3.a'
    },
    'js_vars': {
        'LBItau': 0.0005,
        'LBItime_window': 0.5,
        'dfreq_dn': 2
    },
})

示例#7
0
        100,
        132,
        139  #Cb
    ] else '0' for pos in xrange(1, 1725)
])

receptor_binding_sites = [x - 1 for x in [159, 169, 170, 172, 173, 203, 207]]

virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'virus': 'H1N1',
    'alignment_file': 'data/H1N1_gisaid_epiflu_sequence.fasta',
    'outgroup': 'A/Tokyo/1/51',
    'time_interval': (1990, 2010),
    #'force_include':'source-data/HI_strains.txt',
    'force_include_all': False,
    'max_global':
    True,  # sample as evenly as possible from different geographic regions 
    'cds': [0, None],  # define the HA start i n 0 numbering
    # define relevant clades in canonical HA1 numbering (+1)
    'clade_designations': {},
    'auspice_prefix': 'H1N1_',
})


class H1N1_filter(flu_filter):
    def __init__(self, min_length=987, **kwargs):
        '''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
示例#8
0
receptor_binding_sites = [159, 169, 170, 172, 173, 203, 207]


virus_config.update(
    {
        # data source and sequence parsing/cleaning/processing
        "virus": "Vic",
        "alignment_file": "data/Vic_gisaid_epiflu_sequence.fasta",
        "outgroup": "B/HongKong/02/1993",
        #'force_include':'source-data/HI_strains.txt',
        "force_include_all": False,
        "max_global": True,  # sample as evenly as possible from different geographic regions
        "cds": [11, None],  # define the translation start in 0 numbering
        # define relevant clades in canonical HA1 numbering (+1)
        # numbering starting at methionine including the signal peptide
        "clade_designations": {
            "1A": [(90, "K"), (73, "L"), (180, "K"), (604, "S")],
            "1B": [(90, "K"), (73, "P"), (180, "K")],
        },
        "html_vars": {
            "coloring": "lbi, dfreq, region, date",
            "gtplaceholder": "HA1 positions...",
            "freqdefault": "1A, 1B",
        },
        "js_vars": {"LBItau": 0.0005, "LBItime_window": 0.5, "dfreq_dn": 2},
    }
)


class BVic_filter(flu_filter):
    def __init__(self, min_length=987, **kwargs):
示例#9
0

virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H1N1pdm',
	'alignment_file':'data/H1N1pdm_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/Swine/Indiana/P12439/00',
	#'force_include':'source-data/HI_strains.txt',
	'force_include_all':False,
	'max_global':True,   # sample as evenly as possible from different geographic regions 
	'cds':[0,None], # define the HA start i n 0 numbering
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at methionine including the signal peptide
	'clade_designations': {
		'2':[(142, 'N'), (151 ,'A'), (200, 'S'), (48,'D'), (189,'N'), (203,'T')],
		'3':[(151 ,'T'), (200, 'P')],
		'4':[(142, 'D'), (151 ,'A'), (200, 'S')],
		'5':[(104, 'N'), (222, 'K'), (233, 'V'), (266, 'L')],
		'6':[(202,'T'),  (114, 'N'), (214, 'A')],
		'6c':[(251,'I'), (114, 'N'), (214, 'A'), (300,'E')],
		'6b':[(180,'Q'), (273, 'T'), (214, 'A'), (300,'E')],
		'7':[(160,'G'),  (114, 'D'), (214, 'T')],
		'8':[(203,'T'), (289,'A')],
		},
	'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date',
				  'gtplaceholder': 'HA1 positions...',
				  'freqdefault': '6b, 6c'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	})

示例#10
0
virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'virus':
    'H10',
    'alignment_file':
    '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H10_gisaid_epiflu_sequence.fasta',
    'outgroup':
    'A/mallard-duck/ALB/302/1977',
    #'force_include':'H10_HI_strains.txt',
    'force_include_all':
    True,
    'date_spec':
    'year',
    'max_global':
    True,  # sample as evenly as possible from different geographic regions
    # define relevant clades in canonical HA1 numbering (+1)
    # numbering starting at methionine including the signal peptide
    ''''clade_designations': {
		'1A': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K')],
		'1B': [('HA1', 75,'K'), ('HA1', 58, 'P'), ('HA1', 165, 'K')],
		'117V': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K'), ('HA1', 129, 'D'), ('HA1', 117, 'V')]
	},'''
    'auspice_prefix':
    'H10_',
    'HI_fname':
    'H10_HI_titers.txt',
    'html_vars': {
        'coloring': 'lbi, dfreq, region, date, cHI, HI_dist',
        'gtplaceholder': 'HA1 positions...',
        'freqdefault': '1A, 1B'
    },
    'js_vars': {
        'LBItau': 0.0005,
        'LBItime_window': 0.5,
        'dfreq_dn': 2
    },
    'layout':
    'auspice',
})
示例#11
0
from Bio.Seq import Seq
from Bio.Align import MultipleSeqAlignment
import numpy as np
from itertools import izip

virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'Zika',
	'fasta_fields':{0:'strain', 2:'accession', 3:'date', 5:'country', 5:'region', 8:'db', 10:'authors'},
	# 0         1    2        3          4             5      6 7 8       9      10
	#>BeH818995|zika|KU365777|2015-XX-XX|south_america|brazil|?|?|genbank|genome|Azevedo et al
	'alignment_file':'data/zika.fasta',
	'outgroup':'H/PF/2013',
	'aggregate_regions':[('global', None)],
	'force_include_all':False,
	'max_global':True,   # sample as evenly as possible from different geographic regions 
	'cds':[0,None], # define the HA start i n 0 numbering
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at methionine including the signal peptide
	'min_mutation_frequency':0.499,
	'min_genotype_frequency':0.499,
	'html_vars': {'coloring': 'lbi, dfreq, region, date',
				   'gtplaceholder': 'Genomic positions...',
					'freqdefault': ''},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	})


class zika_filter(virus_filter):
	def __init__(self,min_length = 987, **kwargs):
		'''
示例#12
0
virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'virus':
    'H7',
    'alignment_file':
    '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_gisaid_epiflu_sequence.fasta',
    'outgroup':
    'A/equine/Prague/2/1956',
    #'force_include':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_HI_strains.txt',
    'force_include_all':
    False,
    'date_spec':
    'year',
    'max_global':
    True,  # sample as evenly as possible from different geographic regions
    'cds': [0, None],  # define the translation start i n 0 numbering
    # define relevant clades in canonical HA1 numbering (+1)
    # numbering starting at methionine including the signal peptide
    ''''clade_designations': {
		'2':  [('HA1', 48,'K'), ('HA1', 108, 'A'), ('HA1', 150, 'S')],
		'3':  [('HA1', 48,'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I')],
		'3a': [('HA1', 37,'A'), ('HA1', 298, 'E'), ('HA1', 48,'R'), ('HA1', 105, 'P'), ('HA1', 150, 'I')],
		'172Q': [('HA1', 48,'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I'), ('HA1', 116, 'K'), ('HA1', 172, 'Q')]
	},'''
    'auspice_prefix':
    'H7_',
    #'HI_fname':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_HI_titers.txt',
    'html_vars': {
        'coloring': 'region, date',
        'gtplaceholder': 'HA1 positions...',
        'freqdefault': ''
    },
    'js_vars': {
        'LBItau': 0.0005,
        'LBItime_window': 0.5,
        'dfreq_dn': 2
    },
    'layout':
    'auspice',
})
示例#13
0
receptor_binding_sites = [159, 169, 170, 172, 173, 203, 207]


virus_config.update(
    {
        # data source and sequence parsing/cleaning/processing
        "virus": "Yam",
        "alignment_file": "data/Yam_gisaid_epiflu_sequence.fasta",
        "outgroup": "B/Singapore/11/94",
        #'force_include':'source-data/HI_strains.txt',
        "force_include_all": False,
        "max_global": True,  # sample as evenly as possible from different geographic regions
        "cds": [11, None],  # define the translation start i n 0 numbering
        # define relevant clades in canonical HA1 numbering (+1)
        # numbering starting at methionine including the signal peptide
        "clade_designations": {
            "2": [("HA1", 48, "K"), ("HA1", 108, "A"), ("HA1", 150, "S")],
            "3": [("HA1", 48, "R"), ("HA1", 108, "P"), ("HA1", 150, "I")],
            "3a": [("HA1", 37, "A"), ("HA1", 298, "E"), ("HA1", 48, "R"), ("HA1", 105, "P"), ("HA1", 150, "I")],
        },
        "html_vars": {
            "coloring": "lbi, dfreq, region, date",
            "gtplaceholder": "HA1 positions...",
            "freqdefault": "2, 3, 3a",
        },
        "js_vars": {"LBItau": 0.0005, "LBItime_window": 0.5, "dfreq_dn": 2},
    }
)


class BYam_filter(flu_filter):
示例#14
0
import numpy as np
from itertools import izip

path_to_augur = './' + '/'.join(sys.argv[0].split('/')[:-2])
std_outgroup_file_blast = path_to_augur + '/source-data/outgroups.fasta'
std_outgroup_file_nuc = path_to_augur + '/source-data/outgroups_nucleotides_unspliced.fasta'
no_raxml_threshold = 15000

virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'fasta_fields': {
        0: 'strain',
        1: 'isolate_id',
        2: 'date',
        3: 'subtype',
        4: 'country',
        5: 'region',
        7: 'host',
        6: 'passage'
    },
    'cds': [0, None],  # define the HA start i n 0 numbering
    'verbose': 3
})


def get_date(strain):
    from datetime import datetime
    date_str = strain.split('|')[2]
    try:
        collection_date = datetime.strptime(date_str, '%Y-%m-%d')
        return collection_date.strftime('%Y-%m-%d')
示例#15
0
virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H4',
	'alignment_file':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H4_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/Duck/Czechoslovakia/1956',
	#'force_include':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H4_HI_strains.txt',
	'force_include_all':True,
	'date_spec':'year',
	'max_global':True,   # sample as evenly as possible from different geographic regions

	'cds':[0,None], # define the HA start i n 0 numbering

	# define relevant clades in canonical HA1 numbering (+1)
	"""
	'clade_designations': {
		'2': [('HA1', 125, 'N'), ('HA1', 134 ,'A'), ('HA1', 183, 'S'), ('HA1', 31,'D'), ('HA1', 172,'N'), ('HA1', 186,'T')],
		'3': [('HA1', 134 ,'T'), ('HA1', 183, 'P')],
		'4': [('HA1', 125, 'D'), ('HA1', 134 ,'A'), ('HA1', 183, 'S')],
		'5': [('HA1', 87, 'N'), ('HA1', 205, 'K'), ('HA1', 216, 'V'), ('HA1', 149, 'L')],
		'6': [('HA1', 185,'T'),  ('HA1', 97, 'N'), ('HA1', 197, 'A')],
		'6c':[('HA1', 234,'I'),  ('HA1', 97, 'N'), ('HA1', 197, 'A'), ('HA1', 283,'E')],
		'6b':[('HA1', 163,'Q'),  ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E')],
		'7': [('HA1', 143,'G'),  ('HA1', 97, 'D'), ('HA1', 197, 'T')],
		'8': [('HA1', 186,'T'),  ('HA1', 272,'A')],
		'84N':[('HA1', 163,'Q'),  ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E'), ('SigPep', 13, 'T'), ('HA1', 84, 'N')]
		},
	"""
	#'HI_fname':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H4_HI_titers.txt',
	'auspice_prefix':'H4_',
	'html_vars': {'coloring': 'region, date',
				  'gtplaceholder': 'HA1 positions...',
				  'freqdefault': ''},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	'layout':'auspice',
	})
示例#16
0
from Bio.Seq import Seq
from Bio.Align import MultipleSeqAlignment
import numpy as np
from itertools import izip

virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'Zika',
	'fasta_fields':{0:'strain', 2:'accession', 3:'date', 5:'country', 5:'region', 8:'db', 10:'authors'},
	# 0         1    2        3          4            5      6    7     8       9      10
	#>BeH818995|Zika|KU365777|2015-07-21|SouthAmerica|Brazil|Para|Belem|Genbank|Genome|Azevedo et al|?|
	'alignment_file':'data/Zika.fasta',
	'outgroup':'H/PF/2013',
	'aggregate_regions':[('global', None)],
	'force_include_all':False,
	'max_global':True,   # sample as evenly as possible from different geographic regions 
	'cds':[0,None], # define the HA start i n 0 numbering
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at methionine including the signal peptide
	'min_mutation_frequency':0.499,
	'min_genotype_frequency':0.499,
	'html_vars': {'coloring': 'lbi, dfreq, region, date',
				   'gtplaceholder': 'Genomic positions...',
					'freqdefault': ''},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	})


class zika_filter(virus_filter):
	def __init__(self,min_length = 987, **kwargs):
		'''
示例#17
0
virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H3N2',
	'alignment_file': 'data/H3N2_IRD_sequence_october_clean.fasta',
    # data/H3N2_gisaid_epiflu_sequence.fasta  # data/H3N2_IRD_sequence_clean.fasta
    'fasta_fields': {0: 'strain', 1: 'isolate_id', 3: 'passage', 5: 'date', 7: 'lab', 8: "accession"},
	#'alignment_file':'data/H3N2_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/Beijing/32/1992',
	'force_include':'data/H3N2_HI_strains.txt',
	'force_include_all':False,
	'date_spec':'year',
	'max_global':True,   # sample as evenly as possible from different geographic regions
	'cds':[0,None], # define the HA1 start i n 0 numbering
	'n_iqd':5,
	'min_mutation_frequency':0.01,
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at HA1 start, adding sp to obtain numbering from methionine
	'clade_designations': { "3c3.a":[('HA1', 128,'A'), ('HA1',142,'G'), ('HA1',159,'S')],
						   "3c3":   [('HA1', 128,'A'), ('HA1',142,'G'), ('HA1',159,'F')],
						   "3c2.a": [('HA1', 144,'S'), ('HA1',159,'Y'), ('HA1',225,'D'), ('HA1', 311,'H'), ('HA2', 160,'N')],
						   "3c2":   [('HA1', 144,'N'), ('HA1',159,'F'), ('HA1',225,'N'), ('HA2', 160,'N'), ('HA1', 142, 'R')],
						   "3c3.b": [('HA1',  83,'R'), ('HA1',261,'Q'), ('HA1',62,'K'),  ('HA1', 122,'D')]
							},
	'epitope_masks_fname':'source-data/H3N2_epitope_masks.tsv',
	'epitope_mask_version':'wolf',
	'HI_fname':'data/H3N2_HI_titers.txt',
	'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, HI_dist',
				   'gtplaceholder': 'HA1 positions...',
					'freqdefault': '3c2.a, 3c3.a, 3c3.b'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	'excluded_tables': ['NIMR_Sep2012_08.csv'], #, 'nimr-sep-2010-table8', 'nimr-sep-2010-table8','NIMR_Sep2012_11.csv'],
	'layout':'auspice',
	'min_aamuts': 1,
#	'predictors': ['dfreq', 'cHI']												# estimate
	'predictors': { 'dfreq': [2.50, 2.84], 'cHI': [1.68, 0.45] }				# fix predictor: [value, std deviation]
	})
示例#18
0
from process import process, virus_config
from Bio import SeqIO, AlignIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
import numpy as np
from itertools import izip

path_to_augur = './' + '/'.join(sys.argv[0].split('/')[:-2])
std_outgroup_file_blast = path_to_augur+'/source-data/outgroups.fasta'
std_outgroup_file_nuc = path_to_augur+'/source-data/outgroups_nucleotides_unspliced.fasta'
no_raxml_threshold = 15000

virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'fasta_fields':{0:'strain', 1:'isolate_id', 2:'date',  3:'subtype', 4:'country', 5:'region', 7:'host', 6:'passage'},
	'cds':[0,None], # define the HA start i n 0 numbering
	'verbose':3
	})

def get_date(strain):
	from datetime import datetime
	date_str = strain.split('|')[2]
	try:
		collection_date = datetime.strptime(date_str, '%Y-%m-%d')
		return collection_date.strftime('%Y-%m-%d')
	except:
		collection_date = datetime.strptime(date_str[:4], '%Y')
		return collection_date.strftime('%Y-%m-%d')

class mutation_tree(process, flu_filter, tree_refine, virus_clean):
	"""docstring for mutation_tree"""
示例#19
0
receptor_binding_sites = [159,169,170,172,173,203,207]
'''

virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H7',
	'alignment_file':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/equine/Prague/2/1956',
	#'force_include':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_HI_strains.txt',
	'force_include_all':False,
	'date_spec':'year',
	'max_global':True,   # sample as evenly as possible from different geographic regions
	'cds':[0,None], # define the translation start i n 0 numbering
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at methionine including the signal peptide
	''''clade_designations': {
		'2':  [('HA1', 48,'K'), ('HA1', 108, 'A'), ('HA1', 150, 'S')],
		'3':  [('HA1', 48,'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I')],
		'3a': [('HA1', 37,'A'), ('HA1', 298, 'E'), ('HA1', 48,'R'), ('HA1', 105, 'P'), ('HA1', 150, 'I')],
		'172Q': [('HA1', 48,'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I'), ('HA1', 116, 'K'), ('HA1', 172, 'Q')]
	},'''
	'auspice_prefix':'H7_',
	#'HI_fname':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H7_HI_titers.txt',
	'html_vars': {'coloring': 'region, date',
				  'gtplaceholder': 'HA1 positions...',
				  'freqdefault': ''},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	'layout':'auspice',
	})


class H7_filter(flu_filter):
示例#20
0
epitope_mask = np.fromstring(sp*"0"+"0000000000000000000000000000000000000000000011111011011001010011000100000001001011110011100110101000001100000100000001000110101011111101011010111110001010011111000101011011111111010010001111101110111001010001110011111111000000111110000000101010101110000000000011100100000001011011100000000000001001011000110111111000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", dtype='S1')
receptor_binding_sites = map(lambda x:x+sp-1, [145, 155, 156, 158, 159, 189, 193])


virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H3N2',
	'alignment_file':'data/H3N2_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/Beijing/32/1992',
	#'force_include':'source-data/HI_strains.txt',
	'force_include_all':False,
	'max_global':True,   # sample as evenly as possible from different geographic regions 
	'cds':[0,None], # define the HA1 start i n 0 numbering
	'n_iqd':6,
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at HA1 start, adding sp to obtain numbering from methionine
	'clade_designations': { "3c3.a":[(128+sp,'A'), (142+sp,'G'), (159+sp,'S')],
						   "3c3":   [(128+sp,'A'), (142+sp,'G'), (159+sp,'F')],
						   "3c2.a": [(144+sp,'S'), (159+sp,'Y'), (225+sp,'D'), (311+sp,'H'), (489+sp,'N')],
						   "3c2":   [(144+sp,'N'), (159+sp,'F'), (225+sp,'N'), (489+sp,'N'), (142+sp, 'R')],
						   "3c3.b":   [(83+sp,'R'), (261+sp,'Q'), (62+sp,'K'), (122+sp,'D')]
							},
	'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date',
				   'gtplaceholder': 'HA1 positions...',
					'freqdefault': '3c2.a, 3c3.a'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	})


class H3N2_filter(flu_filter):
	def __init__(self,min_length = 987, **kwargs):
示例#21
0
receptor_binding_sites = [159,169,170,172,173,203,207]


virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H10',
	'alignment_file':'/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/src/data/H10_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/mallard-duck/ALB/302/1977',
	#'force_include':'H10_HI_strains.txt',
	'force_include_all':True,
	'date_spec':'year',
	'max_global':True,   # sample as evenly as possible from different geographic regions
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at methionine including the signal peptide
	''''clade_designations': {
		'1A': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K')],
		'1B': [('HA1', 75,'K'), ('HA1', 58, 'P'), ('HA1', 165, 'K')],
		'117V': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K'), ('HA1', 129, 'D'), ('HA1', 117, 'V')]
	},'''
	'auspice_prefix':'H10_',
	'HI_fname':'H10_HI_titers.txt',
	'html_vars': {'coloring': 'lbi, dfreq, region, date, cHI, HI_dist',
				  'gtplaceholder': 'HA1 positions...',
				  'freqdefault': '1A, 1B'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	'layout':'auspice',
	})


class H10_filter(flu_filter):
	def __init__(self,min_length = 0, **kwargs):
示例#22
0
virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'virus': 'Yam',
    'alignment_file': 'data/yam.fasta',
    'outgroup': 'B/Singapore/11/94',
    'force_include': 'data/yam_hi_strains.tsv',
    'force_include_all': False,
    'date_spec': 'year',
    'max_global':
    True,  # sample as evenly as possible from different geographic regions
    'cds': [11, None],  # define the translation start i n 0 numbering
    # define relevant clades in canonical HA1 numbering (+1)
    # numbering starting at methionine including the signal peptide
    'clade_designations': {
        '2': [('HA1', 48, 'K'), ('HA1', 108, 'A'), ('HA1', 150, 'S')],
        '3': [('HA1', 48, 'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I')],
        '3a': [('HA1', 37, 'A'), ('HA1', 298, 'E'), ('HA1', 48, 'R'),
               ('HA1', 105, 'P'), ('HA1', 150, 'I')],
        '172Q': [('HA1', 48, 'R'), ('HA1', 108, 'P'), ('HA1', 150, 'I'),
                 ('HA1', 116, 'K'), ('HA1', 172, 'Q')]
    },
    'HI_fname': 'data/yam_hi_titers.tsv',
    'html_vars': {
        'coloring': 'lbi, dfreq, region, date, cHI, HI_dist',
        'gtplaceholder': 'HA1 positions...',
        'freqdefault': '2, 3, 3a'
    },
    'js_vars': {
        'LBItau': 0.0005,
        'LBItime_window': 0.5,
        'dfreq_dn': 2
    },
    'layout': 'auspice',
})
示例#23
0
virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H9',
	'alignment_file':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/duck/HongKong/147/1977',
	#'force_include':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_HI_strains.txt',
	'force_include_all':False,
	'date_spec':'year',
	'max_global':True,   # sample as evenly as possible from different geographic regions
	#'max_globalh':True,
	'cds':[0,None], # define the HA1 start i n 0 numbering
	'n_iqd':5,
	'min_mutation_frequency':0.01,
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at HA1 start, adding sp to obtain numbering from methionine
	''''clade_designations': { "Y439":[('HA1',122,'F'), ('HA1',353,'P')],
							"Korea":[('HA1',107,'M'), ('HA1',122,'F'), ('HA1',127,'R'), ('HA1',130,'K'), ('HA1',132,'L'), ('HA1',134,'L'), ('HA1',179,'D'), ('HA1',212,'I'), ('HA1',299,'T'), ('HA1',353,'P'), ('HA1',473,'K')],
						   	"G1":[('HA1',353,'P'), ('HA1',473,'K')],
							"Ck-Bei":[('HA1',107,'M'), ('HA1',299,'T'), ('HA1',473,'K')],
						   	"G9":[('HA1',107,'M'), ('HA1',299,'T'), ('HA1',473,'K')],
						   	"Y280":[('HA1',299,'T'), ('HA1',473,'K')]
							},'''
	#'epiope_masks_fname':'/Users/yujiazhou/Documents/FluProject/augur/source-data/H9_epitope_masks.tsv',
	#'epitope_mask_version':'wolf',
	#'HI_fname':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_HI_titers.txt',
	'auspice_prefix':'H9_',
	'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, host',
				   'gtplaceholder': 'HA1 positions...',
					'freqdefault': 'Y439, Korea, G1, Ck-Bei, G9, Y280'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	'excluded_tables': ['NIMR_Sep2012_08.csv'], #, 'nimr-sep-2010-table8', 'nimr-sep-2010-table8','NIMR_Sep2012_11.csv'],
	'layout':'auspice',
	'min_aamuts': 1,
#	'predictors': ['dfreq', 'cHI']												# estimate
	'predictors': { 'dfreq': [2.50, 2.84], 'cHI': [1.68, 0.45] }				# fix predictor: [value, std deviation]
	})
示例#24
0
virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H3N2',
	'alignment_file':'data/h3n2.fasta',
	'outgroup':'A/Beijing/32/1992',
	'force_include':'data/h3n2_hi_strains.tsv',
	'force_include_all':False,
	'date_spec':'year',
	'max_global':True,   # sample as evenly as possible from different geographic regions
	'cds':[0,None], # define the HA1 start i n 0 numbering
	'n_iqd':5,
	'min_mutation_frequency':0.01,
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at HA1 start, adding sp to obtain numbering from methionine
	'clade_designations': { "3c3.a":[('HA1',128,'A'), ('HA1',142,'G'), ('HA1',159,'S')],
						   "3c3":   [('HA1',128,'A'), ('HA1',142,'G'), ('HA1',159,'F')],
						   "3c2.a": [('HA1',144,'S'), ('HA1',159,'Y'), ('HA1',225,'D'), ('HA1',311,'H'), ('HA2',160,'N')],
						   "171K": [('HA1',144,'S'), ('HA1',159,'Y'), ('HA1',171,'K'), ('HA1',225,'D'), ('HA1',311,'H'), ('HA2',77,'V'), ('HA2',155,'E'), ('HA2',160,'N')],
						   "3c2":   [('HA1',144,'N'), ('HA1',159,'F'), ('HA1',225,'N'), ('HA2',160,'N'), ('HA1',142,'R')],
						   "3c3.b": [('HA1',83,'R'), ('HA1',261,'Q'), ('HA1',62,'K'),  ('HA1',122,'D')]
							},
	'epitope_masks_fname':'source-data/H3N2_epitope_masks.tsv',
	'epitope_mask_version':'wolf',
	'HI_fname':'data/h3n2_hi_titers.tsv',
	'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, HI_dist',
				   'gtplaceholder': 'HA1 positions...',
					'freqdefault': '3c2.a, 3c3.a, 3c3.b'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	'excluded_tables': ['NIMR_Sep2012_08.csv'], #, 'nimr-sep-2010-table8', 'nimr-sep-2010-table8','NIMR_Sep2012_11.csv'],
	'layout':'auspice',
	'min_aamuts': 1,
#	'predictors': ['dfreq', 'cHI']												# estimate
	'predictors': { 'dfreq': [2.50, 2.84], 'cHI': [1.68, 0.45] }				# fix predictor: [value, std deviation]
	})
										170,173,174,177,206,207,210,211,212,214,216,		 #Sb
										183,187,191,196,221,225,254,258,288,				 #Ca1
										154,157,158,159,161,163,238,239,242,243,			 #Ca2
										87, 88, 90, 91, 92, 95, 96, 98, 99, 100, 132, 139	 #Cb
									   ]
						else '0' for pos in xrange(1,1725)])

receptor_binding_sites = [x-1 for x in [159,169,170,172,173,203,207]]

virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H1N1',
	'alignment_file':'data/H1N1_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/Tokyo/1/51',
	'time_interval':(1990,2010),
	#'force_include':'source-data/HI_strains.txt',
	'force_include_all':False,
	'max_global':True,   # sample as evenly as possible from different geographic regions 
	'cds':[0,None], # define the HA start i n 0 numbering
	# define relevant clades in canonical HA1 numbering (+1)
	'clade_designations': {},
	'auspice_prefix':'H1N1_',
	})


class H1N1_filter(flu_filter):
	def __init__(self,min_length = 987, **kwargs):
		'''
		parameters
		min_length  -- minimal length for a sequence to be acceptable
		'''
		flu_filter.__init__(self, **kwargs)
示例#26
0
receptor_binding_sites = [159,169,170,172,173,203,207]


virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'Vic',
	'alignment_file':'data/vic.fasta',
	'outgroup':'B/HongKong/02/1993',
	'force_include':'data/vic_hi_strains.tsv',
	'force_include_all':False,
	'date_spec':'year',
	'max_global':True,   # sample as evenly as possible from different geographic regions
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at methionine including the signal peptide
	'clade_designations': {
		'1A': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K')],
		'1B': [('HA1', 75,'K'), ('HA1', 58, 'P'), ('HA1', 165, 'K')],
		'117V': [('HA1', 75,'K'), ('HA1', 58, 'L'), ('HA1', 165, 'K'), ('HA1', 129, 'D'), ('HA1', 117, 'V')]
	},
	'HI_fname':'data/vic_hi_titers.tsv',
	'html_vars': {'coloring': 'lbi, dfreq, region, date, cHI, HI_dist',
				  'gtplaceholder': 'HA1 positions...',
				  'freqdefault': '1A, 1B'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	'layout':'auspice',
	})


class BVic_filter(flu_filter):
	def __init__(self,min_length = 987, **kwargs):
示例#27
0
from tree_refine import tree_refine
from virus_clean import virus_clean
from virus_filter import flu_filter
from collections import defaultdict
from process import process, virus_config
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Align import MultipleSeqAlignment
import numpy as np
from itertools import izip

std_outgroup_file = '/Users/yujiazhou/Documents/nextflu/H9_nextflu-master/augur/source-data/outgroups.fasta'
virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'fasta_fields':{0:'strain', 1:'date', 2:'isolate_id', 3:'passage', 4:'subtype', 5:'ori_lab', 6:'sub_lab', 7:'submitter'},
	'cds':[0,None], # define the HA start i n 0 numbering
	'auspice_prefix':'H4_',
	'verbose':3
	})


class mutation_tree(process, flu_filter, tree_refine, virus_clean):
	"""docstring for mutation_tree"""
	def __init__(self, aln_fname, outgroup, outdir = './', formats = ['pdf','svg','png'], verbose = 0, **kwargs):
		process.__init__(self, **kwargs)
		flu_filter.__init__(self, alignment_file = aln_fname, **kwargs)
		tree_refine.__init__(self, **kwargs)
		virus_clean.__init__(self, **kwargs)
		self.verbose = verbose
		self.formats = formats
		self.outdir = outdir.rstrip('/')+'/'
示例#28
0
virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'H1N1pdm',
	'alignment_file':'data/H1N1pdm_gisaid_epiflu_sequence.fasta',
	'outgroup':'A/Swine/Indiana/P12439/00',
	'force_include':'data/H1N1pdm_HI_strains.txt',
	'force_include_all':False,
	'date_spec':'year',
	'max_global':True,   # sample as evenly as possible from different geographic regions

	'cds':[0,None], # define the HA start i n 0 numbering

	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at methionine including the signal peptide
	'clade_designations': {
		'2': [('HA1', 125, 'N'), ('HA1', 134 ,'A'), ('HA1', 183, 'S'), ('HA1', 31,'D'), ('HA1', 172,'N'), ('HA1', 186,'T')],
		'3': [('HA1', 134 ,'T'), ('HA1', 183, 'P')],
		'4': [('HA1', 125, 'D'), ('HA1', 134 ,'A'), ('HA1', 183, 'S')],
		'5': [('HA1', 87, 'N'), ('HA1', 205, 'K'), ('HA1', 216, 'V'), ('HA1', 149, 'L')],
		'6': [('HA1', 185,'T'),  ('HA1', 97, 'N'), ('HA1', 197, 'A')],
		'6c':[('HA1', 234,'I'),  ('HA1', 97, 'N'), ('HA1', 197, 'A'), ('HA1', 283,'E')],
		'6b':[('HA1', 163,'Q'),  ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283,'E')],
		'7': [('HA1', 143,'G'),  ('HA1', 97, 'D'), ('HA1', 197, 'T')],
		'8': [('HA1', 186,'T'),  ('HA1', 272,'A')],
		'6b.1':[('HA1', 163,'Q'),  ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283, 'E'), ('SigPep', 13, 'T'), ('HA1', 84, 'N'), ('HA1', 162, 'N')],
		'6b.2':[('HA1', 163,'Q'),  ('HA1', 256, 'T'), ('HA1', 197, 'A'), ('HA1', 283, 'E'), ('HA2', 164, 'G'), ('HA1', 152, 'T'), ('HA2', 174, 'E')]
		},
	'HI_fname':'data/H1N1pdm_HI_titers.txt',
	'html_vars': {'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, HI_dist',
				  'gtplaceholder': 'HA1 positions...',
				  'freqdefault': '6b, 6c'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},
	'layout':'auspice',
	})
示例#29
0
virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'virus':
    'H9',
    'alignment_file':
    '/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_gisaid_epiflu_sequence.fasta',
    'outgroup':
    'A/duck/HongKong/147/1977',
    #'force_include':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_HI_strains.txt',
    'force_include_all':
    False,
    'date_spec':
    'year',
    'max_global':
    True,  # sample as evenly as possible from different geographic regions
    #'max_globalh':True,
    'cds': [0, None],  # define the HA1 start i n 0 numbering
    'n_iqd':
    5,
    'min_mutation_frequency':
    0.01,
    # define relevant clades in canonical HA1 numbering (+1)
    # numbering starting at HA1 start, adding sp to obtain numbering from methionine
    ''''clade_designations': { "Y439":[('HA1',122,'F'), ('HA1',353,'P')],
							"Korea":[('HA1',107,'M'), ('HA1',122,'F'), ('HA1',127,'R'), ('HA1',130,'K'), ('HA1',132,'L'), ('HA1',134,'L'), ('HA1',179,'D'), ('HA1',212,'I'), ('HA1',299,'T'), ('HA1',353,'P'), ('HA1',473,'K')],
						   	"G1":[('HA1',353,'P'), ('HA1',473,'K')],
							"Ck-Bei":[('HA1',107,'M'), ('HA1',299,'T'), ('HA1',473,'K')],
						   	"G9":[('HA1',107,'M'), ('HA1',299,'T'), ('HA1',473,'K')],
						   	"Y280":[('HA1',299,'T'), ('HA1',473,'K')]
							},'''

    #'epiope_masks_fname':'/Users/yujiazhou/Documents/FluProject/augur/source-data/H9_epitope_masks.tsv',
    #'epitope_mask_version':'wolf',
    #'HI_fname':'/Users/yujiazhou/Documents/FluProject/augur/src/data/H9_HI_titers.txt',
    'auspice_prefix':
    'H9_',
    'html_vars': {
        'coloring': 'ep, ne, rb, lbi, dfreq, region, date, cHI, host',
        'gtplaceholder': 'HA1 positions...',
        'freqdefault': 'Y439, Korea, G1, Ck-Bei, G9, Y280'
    },
    'js_vars': {
        'LBItau': 0.0005,
        'LBItime_window': 0.5,
        'dfreq_dn': 2
    },
    'excluded_tables': [
        'NIMR_Sep2012_08.csv'
    ],  #, 'nimr-sep-2010-table8', 'nimr-sep-2010-table8','NIMR_Sep2012_11.csv'],
    'layout':
    'auspice',
    'min_aamuts':
    1,
    #	'predictors': ['dfreq', 'cHI']												# estimate
    'predictors': {
        'dfreq': [2.50, 2.84],
        'cHI': [1.68, 0.45]
    }  # fix predictor: [value, std deviation]
})
示例#30
0
						else '0' for pos in xrange(1,1725)])

receptor_binding_sites = [159,169,170,172,173,203,207]


virus_config.update({
	# data source and sequence parsing/cleaning/processing
	'virus':'Yam',
	'alignment_file':'data/Yam_gisaid_epiflu_sequence.fasta',
	'outgroup':'B/Singapore/11/94',
	#'force_include':'source-data/HI_strains.txt',
	'force_include_all':False,
	'max_global':True,   # sample as evenly as possible from different geographic regions 
	'cds':[11,None], # define the translation start i n 0 numbering
	# define relevant clades in canonical HA1 numbering (+1)
	# numbering starting at methionine including the signal peptide
	'clade_designations': {
		'2':  [(63,'K'), (123, 'A'), (165, 'S')],
		'3':  [(63,'R'), (123, 'P'), (165, 'I')],
		'3a': [(52,'A'), (313, 'E'), (63,'R'), (123, 'P'), (165, 'I')],
	},
	'html_vars': {'coloring': 'lbi, dfreq, region, date',
				  'gtplaceholder': 'HA1 positions...',
				  'freqdefault': '2, 3, 3a'},
	'js_vars': {'LBItau': 0.0005, 'LBItime_window': 0.5, 'dfreq_dn':2},	
	})


class BYam_filter(flu_filter):
	def __init__(self,min_length = 987, **kwargs):
		'''
示例#31
0
virus_config.update({
    # data source and sequence parsing/cleaning/processing
    'virus': 'H1N1pdm',
    'alignment_file': 'data/H1N1pdm_gisaid_epiflu_sequence.fasta',
    'outgroup': 'A/Swine/Indiana/P12439/00',
    #'force_include':'source-data/HI_strains.txt',
    'force_include_all': False,
    'max_global':
    True,  # sample as evenly as possible from different geographic regions 
    'cds': [0, None],  # define the HA start i n 0 numbering
    # define relevant clades in canonical HA1 numbering (+1)
    # numbering starting at methionine including the signal peptide
    'clade_designations': {
        '2': [('HA1', 125, 'N'), ('HA1', 134, 'A'), ('HA1', 183, 'S'),
              ('HA1', 31, 'D'), ('HA1', 172, 'N'), ('HA1', 186, 'T')],
        '3': [('HA1', 134, 'T'), ('HA1', 183, 'P')],
        '4': [('HA1', 125, 'D'), ('HA1', 134, 'A'), ('HA1', 183, 'S')],
        '5': [('HA1', 87, 'N'), ('HA1', 205, 'K'), ('HA1', 216, 'V'),
              ('HA1', 149, 'L')],
        '6': [('HA1', 185, 'T'), ('HA1', 97, 'N'), ('HA1', 197, 'A')],
        '6c': [('HA1', 234, 'I'), ('HA1', 97, 'N'), ('HA1', 197, 'A'),
               ('HA1', 283, 'E')],
        '6b': [('HA1', 163, 'Q'), ('HA1', 256, 'T'), ('HA1', 197, 'A'),
               ('HA1', 283, 'E')],
        '7': [('HA1', 143, 'G'), ('HA1', 97, 'D'), ('HA1', 197, 'T')],
        '8': [('HA1', 186, 'T'), ('HA1', 272, 'A')],
    },
    'html_vars': {
        'coloring': 'ep, ne, rb, lbi, dfreq, region, date',
        'gtplaceholder': 'HA1 positions...',
        'freqdefault': '6b, 6c'
    },
    'js_vars': {
        'LBItau': 0.0005,
        'LBItime_window': 0.5,
        'dfreq_dn': 2
    },
})