Exemplo n.º 1
0
 def test_ace_for_picrust_ml(self):
     """ test_ace_for_picrust with method 'ML' functions as expected with valid input
     """
     actual, actual_ci = ace_for_picrust(self.in_tree1_fp, self.in_trait1_fp, method="ML")
     expected = Table(
         ["nodes", "trait1", "trait2"],
         [
             ["14", "2.9737", "2.5436"],
             ["12", "2.3701", "2.7056"],
             ["11", "0.8370", "2.9706"],
             ["10", "4.4826", "2.1388"],
         ],
     )
     self.assertEqual(actual.tostring(), expected.tostring())
     expected_ci = Table(
         ["nodes", "trait1", "trait2"],
         [
             ["14", "1.4467|4.5007", "2.1979|2.8894"],
             ["12", "0.9729|3.7674", "2.3892|3.0219"],
             ["11", "0.147|1.527", "2.8143|3.1268"],
             ["10", "3.4227|5.5426", "1.8988|2.3788"],
             ["sigma", "1.9742|0.6981", "0.1012|0.0359"],
             ["loglik", "-6.7207", "5.1623"],
         ],
     )
     self.assertEqual(actual_ci.tostring(), expected_ci.tostring())
Exemplo n.º 2
0
 def test_wagner_for_picrust_with_funky_tip_labels(self):
     """ test_wagner_for_picrust for a tree with quoted tip labels
     """
     actual = wagner_for_picrust(self.in_tree2_fp, self.in_trait3_fp)
     expected = Table(['nodes', 'trait1', 'trait2'],
                      [['11', '1', '3'], ['12', '2', '3'], ['10', '5', '2'],
                       ['14', '5', '3']])
     self.assertEqual(actual.tostring(), expected.tostring())
Exemplo n.º 3
0
 def test_wagner_for_picrust_with_funky_tip_labels(self):
     """ test_wagner_for_picrust for a tree with quoted tip labels
     """
     actual = wagner_for_picrust(self.in_tree2_fp, self.in_trait3_fp)
     expected = Table(
         ["nodes", "trait1", "trait2"], [["11", "1", "3"], ["12", "2", "3"], ["10", "5", "2"], ["14", "5", "3"]]
     )
     self.assertEqual(actual.tostring(), expected.tostring())
Exemplo n.º 4
0
 def test_ace_for_picrust_pic_with_funky_tip_labels(self):
     """ test_ace_for_picrust for a tree with underscores in tip labels
     """
     actual, ci = ace_for_picrust(self.in_tree2_fp, self.in_trait3_fp, method="pic")
     expected = Table(
         ["nodes", "trait1", "trait2"],
         [["14", "2.9737", "2.5436"], ["12", "1.2727", "3"], ["11", "0.6667", "3"], ["10", "5", "2"]],
     )
     self.assertEqual(actual.tostring(), expected.tostring())
Exemplo n.º 5
0
 def test_ace_for_picrust_pic_single_trait(self):
     """ test_ace_for_picrust with method 'pic' functions as expected with single column trait table
     """
     actual, ci = ace_for_picrust(self.in_tree1_fp,
                                  self.in_trait2_fp,
                                  method="pic")
     expected = Table(['nodes', 'trait1'],
                      [['14', '2.9737'], ['12', '1.2727'], ['11', '0.6667'],
                       ['10', '5']])
     self.assertEqual(actual.tostring(), expected.tostring())
Exemplo n.º 6
0
 def test_ace_for_picrust_pic_with_funky_tip_labels(self):
     """ test_ace_for_picrust for a tree with underscores in tip labels
     """
     actual, ci = ace_for_picrust(self.in_tree2_fp,
                                  self.in_trait3_fp,
                                  method="pic")
     expected = Table(['nodes', 'trait1', 'trait2'],
                      [['14', '2.9737', '2.5436'], ['12', '1.2727', '3'],
                       ['11', '0.6667', '3'], ['10', '5', '2']])
     self.assertEqual(actual.tostring(), expected.tostring())
Exemplo n.º 7
0
 def test_only_required_columns(self):
     """generate bedgraph from minimal data"""
     table = Table(header=['chrom', 'start', 'end', 'value'],
                 rows=[['1', 100, i, 0] for i in range(101,111)] + \
                      [['1', 150, i, 10] for i in range(151,161)])
     
     bgraph = table.tostring(format='bedgraph', name='test track',
                 description='test of bedgraph', color=(255,0,0))
     self.assertTrue(bgraph,
         '\n'.join(['track type=bedGraph name="test track" '\
         +'description="test of bedgraph" color=255,0,0',
         '1\t100\t110\t0', '1\t150\t160\t10']))
Exemplo n.º 8
0
 def test_merged_overlapping_spans(self):
     """bedgraph merged overlapping spans, one chrom"""
     rows = [['1', i, i+1, 0] for i in range(100, 121)] +\
             [['1', i, i+1, 10] for i in range(150, 161)]
     table = Table(header=['chrom', 'start', 'end', 'value'], rows=rows)
     
     bgraph = table.tostring(format='bedgraph', name='test track',
                 description='test of bedgraph', color=(255,0,0))
     self.assertTrue(bgraph,
         '\n'.join(['track type=bedGraph name="test track" '\
         +'description="test of bedgraph" color=255,0,0',
         '1\t100\t120\t0', '1\t150\t160\t10']))
Exemplo n.º 9
0
 def test_only_required_columns(self):
     """generate bedgraph from minimal data"""
     table = Table(header=['chrom', 'start', 'end', 'value'],
                 rows=[['1', 100, i, 0] for i in range(101,111)] + \
                      [['1', 150, i, 10] for i in range(151,161)])
     
     bgraph = table.tostring(format='bedgraph', name='test track',
                 description='test of bedgraph', color=(255,0,0))
     self.assertTrue(bgraph,
         '\n'.join(['track type=bedGraph name="test track" '\
         +'description="test of bedgraph" color=255,0,0',
         '1\t100\t110\t0', '1\t150\t160\t10']))
Exemplo n.º 10
0
 def test_merged_overlapping_spans(self):
     """bedgraph merged overlapping spans, one chrom"""
     rows = [['1', i, i+1, 0] for i in range(100, 121)] +\
             [['1', i, i+1, 10] for i in range(150, 161)]
     table = Table(header=['chrom', 'start', 'end', 'value'], rows=rows)
     
     bgraph = table.tostring(format='bedgraph', name='test track',
                 description='test of bedgraph', color=(255,0,0))
     self.assertTrue(bgraph,
         '\n'.join(['track type=bedGraph name="test track" '\
         +'description="test of bedgraph" color=255,0,0',
         '1\t100\t120\t0', '1\t150\t160\t10']))
Exemplo n.º 11
0
 def test_ace_for_picrust_pic(self):
     """ test_ace_for_picrust with method 'pic' functions as expected with valid input
     """
     actual,actual_ci= ace_for_picrust(self.in_tree1_fp,self.in_trait1_fp, method="pic")
     expected=Table(['nodes','trait1','trait2'],[['14','2.9737','2.5436'],['12','1.2727','3'],['11','0.6667','3'],['10','5','2']])
     self.assertEqual(actual.tostring(),expected.tostring())
     expected_ci=Table(['nodes','trait1','trait2'],\
                           [['14','0.7955|5.1519','0.3655|4.7218'],\
                            ['12','-1.1009|3.6464','0.6264|5.3736'],\
                            ['11','-0.4068|1.7402','1.9265|4.0735'],\
                            ['10','3.3602|6.6398','0.3602|3.6398'],\
                            ])
     self.assertEqual(actual_ci.tostring(),expected_ci.tostring())
Exemplo n.º 12
0
 def test_int_correctly_formatted(self):
     """int should be correctly formatted"""
     rows = [['1', i, i+1, 0] for i in range(100, 121)] +\
             [['1', i, i+1, 10] for i in range(150, 161)]
     table = Table(header=['chrom', 'start', 'end', 'value'], rows=rows)
     
     bgraph = table.tostring(format='bedgraph', name='test track',
         description='test of bedgraph', color=(255,0,0), smoothingWindow=10)
     
     self.assertTrue(bgraph,
         '\n'.join(['track type=bedGraph name="test track" '\
         +'description="test of bedgraph" color=255,0,0 smoothingWindow=10',
         '1\t100\t110\t1', '1\t150\t160\t10']))
Exemplo n.º 13
0
 def test_boolean_correctly_formatted(self):
     """boolean setting correctly formatted"""
     rows = [['1', i, i+1, 0] for i in range(100, 121)] +\
             [['1', i, i+1, 10] for i in range(150, 161)]
     table = Table(header=['chrom', 'start', 'end', 'value'], rows=rows)
     
     bgraph = table.tostring(format='bedgraph', name='test track',
         description='test of bedgraph', color=(255,0,0), autoScale=True)
     
     self.assertTrue(bgraph,
         '\n'.join(['track type=bedGraph name="test track" '\
         +'description="test of bedgraph" color=255,0,0 autoScale=on',
         '1\t100\t110\t1', '1\t150\t160\t10']))
Exemplo n.º 14
0
 def test_int_correctly_formatted(self):
     """int should be correctly formatted"""
     rows = [['1', i, i+1, 0] for i in range(100, 121)] +\
             [['1', i, i+1, 10] for i in range(150, 161)]
     table = Table(header=['chrom', 'start', 'end', 'value'], rows=rows)
     
     bgraph = table.tostring(format='bedgraph', name='test track',
         description='test of bedgraph', color=(255,0,0), smoothingWindow=10)
     
     self.assertTrue(bgraph,
         '\n'.join(['track type=bedGraph name="test track" '\
         +'description="test of bedgraph" color=255,0,0 smoothingWindow=10',
         '1\t100\t110\t1', '1\t150\t160\t10']))
Exemplo n.º 15
0
 def test_boolean_correctly_formatted(self):
     """boolean setting correctly formatted"""
     rows = [['1', i, i+1, 0] for i in range(100, 121)] +\
             [['1', i, i+1, 10] for i in range(150, 161)]
     table = Table(header=['chrom', 'start', 'end', 'value'], rows=rows)
     
     bgraph = table.tostring(format='bedgraph', name='test track',
         description='test of bedgraph', color=(255,0,0), autoScale=True)
     
     self.assertTrue(bgraph,
         '\n'.join(['track type=bedGraph name="test track" '\
         +'description="test of bedgraph" color=255,0,0 autoScale=on',
         '1\t100\t110\t1', '1\t150\t160\t10']))
Exemplo n.º 16
0
 def test_ace_for_picrust_ml(self):
     """ test_ace_for_picrust with method 'ML' functions as expected with valid input
     """
     actual,actual_ci= ace_for_picrust(self.in_tree1_fp, self.in_trait1_fp, method="ML")
     expected=Table(['nodes','trait1','trait2'],[['14','2.9737','2.5436'],['12','2.3701','2.7056'],['11','0.8370','2.9706'],['10','4.4826','2.1388']])
     self.assertEqual(actual.tostring(),expected.tostring())
     expected_ci=Table(['nodes','trait1','trait2'],\
                           [['14','1.4467|4.5007','2.1979|2.8894'],\
                            ['12','0.9729|3.7674','2.3892|3.0219'],\
                            ['11','0.147|1.527','2.8143|3.1268'],\
                            ['10','3.4227|5.5426','1.8988|2.3788'],\
                            ['sigma','1.9742|0.6981','0.1012|0.0359'],\
                            ['loglik','-6.7207','5.1623'],\
                            ])
     self.assertEqual(actual_ci.tostring(),expected_ci.tostring())
Exemplo n.º 17
0
def gene_expr_diff_to_table(data_path, sep='\t', stable_id_label='',
        probeset_label='', exp_label='', sig_label='', pval_label='',
        allow_probeset_many_gene=False, validate=True):
    """
        As per gene_expr_to_table() but with the addition of sig_label and
        pval_label columns.
    """
    rr = RunRecord('gene_expr_diff_to_table')

    rr.addInfo('Reading expression diff file', data_path)
    genes, probes, exp, sig, pval, probes_present = _read_data_file(\
            data_path, sep=sep, stable_id_label=stable_id_label,
            probeset_label=probeset_label, exp_label=exp_label,
            sig_label=sig_label, pval_label=pval_label, is_diff=True)

    if probes_present:
        if validate:
            # if probes and exp are mismatched, nuke the gene
            genes, probes, exp, sig, pval =\
                    _validate_probes_scores(genes, probes, exp, sig, pval)

        if not allow_probeset_many_gene:
            # each probe should map to only one gene
            genes, probes, exp, sig, pval =\
                    _remove_multimapped_probesets(genes, probes, exp,
                    sig, pval)

    header = DIFF_HEADER
    rows = [[g, p, e, s, v] for g, p, e, s, v in \
                zip(genes, probes, exp, sig, pval)]

    return Table(header=header, rows=rows)
Exemplo n.º 18
0
 def __str__(self):
     rows = []
     for common in self._common_species:
         species = self._common_species[common]
         ensembl = self._species_ensembl[species]
         rows += [[common, species, ensembl]]
     return str(Table(['Common Name', 'Species Name', 'Ensembl Db Prefix'],
                 rows=rows, space=2).sorted())
Exemplo n.º 19
0
 def test_wagner_for_picrust(self):
     """ test_wagner_for_picrust functions as expected with valid input
     """
     actual = wagner_for_picrust(self.in_tree1_fp, self.in_trait1_fp)
     expected = Table(['nodes', 'trait1', 'trait2'],
                      [['11', '1', '3'], ['12', '2', '3'], ['10', '5', '2'],
                       ['14', '5', '3']])
     self.assertEqual(actual, expected)
Exemplo n.º 20
0
 def test_invalid_args_fail(self):
     """incorrect bedgraph args causes RuntimeError"""
     rows = [['1', i, i+1, 0] for i in range(100, 121)] +\
             [['1', i, i+1, 10] for i in range(150, 161)]
     table = Table(header=['chrom', 'start', 'end', 'value'], rows=rows)
     
     self.assertRaises(RuntimeError, table.tostring,
         format='bedgraph', name='test track',
         description='test of bedgraph', color=(255,0,0), abc=None)
Exemplo n.º 21
0
def output_pca(PCA_matrix, eigvals, names):
    """Creates a string output for principal coordinates analysis results. 

    PCA_matrix and eigvals are generated with the get_principal_coordinates 
    function. Names is a list of names that corresponds to the columns in the
    PCA_matrix. It is the order that samples were represented in the initial
    distance matrix.
    
    returns a cogent Table object"""

    output = []
    #get order to output eigenvectors values. reports the eigvecs according
    #to their cooresponding eigvals from greatest to least
    vector_order = list(argsort(eigvals))
    vector_order.reverse()

    # make the eigenvector header line and append to output
    vec_num_header = ['vec_num-%d' % i for i in range(len(eigvals))]
    header = ['Label'] + vec_num_header
    #make data lines for eigenvectors and add to output
    rows = []
    for name_i, name in enumerate(names):
        row = [name]
        for vec_i in vector_order:
            row.append(PCA_matrix[vec_i, name_i])
        rows.append(row)
    eigenvectors = Table(header=header,
                         rows=rows,
                         digits=2,
                         space=2,
                         title='Eigenvectors')
    output.append('\n')
    # make the eigenvalue header line and append to output
    header = ['Label'] + vec_num_header
    rows = [['eigenvalues'] + [eigvals[vec_i] for vec_i in vector_order]]
    pcnts = (eigvals / sum(eigvals)) * 100
    rows += [['var explained (%)'] + [pcnts[vec_i] for vec_i in vector_order]]
    eigenvalues = Table(header=header,
                        rows=rows,
                        digits=2,
                        space=2,
                        title='Eigenvalues')

    return eigenvectors.appended('Type', eigenvalues, title='')
Exemplo n.º 22
0
 def test_raises_on_incorrect_format_val(self):
     """raise AssertionError when provide incorrect format value"""
     rows = [['1', i, i+1, 0] for i in range(100, 121)] +\
             [['1', i, i+1, 10] for i in range(150, 161)]
     table = Table(header=['chrom', 'start', 'end', 'value'], rows=rows)
     
     self.assertRaises(AssertionError, table.tostring,
         format='bedgraph', name='test track',
         description='test of bedgraph', color=(255,0,0),
         windowingFunction='sqrt')
Exemplo n.º 23
0
 def test_invalid_table_fails(self):
     """assertion error if table has > 4 columns"""
     rows = [['1', i, i+1, 0, 1] for i in range(100, 121)] +\
             [['1', i, i+1, 10, 1] for i in range(150, 161)]
     table = Table(header=['chrom', 'start', 'end', 'value', 'blah'],
                 rows=rows)
     
     self.assertRaises(AssertionError, table.tostring,
         format='bedgraph', name='test track',
         description='test of bedgraph', color=(255,0,0), abc=None)
Exemplo n.º 24
0
 def test_ace_for_picrust_pic(self):
     """ test_ace_for_picrust with method 'pic' functions as expected with valid input
     """
     actual, actual_ci = ace_for_picrust(self.in_tree1_fp, self.in_trait1_fp, method="pic")
     expected = Table(
         ["nodes", "trait1", "trait2"],
         [["14", "2.9737", "2.5436"], ["12", "1.2727", "3"], ["11", "0.6667", "3"], ["10", "5", "2"]],
     )
     self.assertEqual(actual.tostring(), expected.tostring())
     expected_ci = Table(
         ["nodes", "trait1", "trait2"],
         [
             ["14", "0.7955|5.1519", "0.3655|4.7218"],
             ["12", "-1.1009|3.6464", "0.6264|5.3736"],
             ["11", "-0.4068|1.7402", "1.9265|4.0735"],
             ["10", "3.3602|6.6398", "0.3602|3.6398"],
         ],
     )
     self.assertEqual(actual_ci.tostring(), expected_ci.tostring())
Exemplo n.º 25
0
 def _get_allele_table_record(self):
     variation_id = self._table_rows['variation_feature']['variation_id']
     allele_table = self.allele_table
     query = sql.select([allele_table],
             allele_table.c.variation_id == variation_id)
     records = [r for r in query.execute()]
     
     if len(records) == 0:
         self._cached[('AlleleFreqs')] = self.NULL_VALUE
         return
     
     self._table_rows['allele_table'] = records
     data = [(rec['allele'], rec['frequency'], rec['sample_id'])
                                 for rec in records if rec['sample_id']]
     if not data:
         self._cached[('AlleleFreqs')] = self.NULL_VALUE
         return
     
     table = Table(header='allele freq sample_id'.split(), rows=data)
     self._cached[('AlleleFreqs')] = table.sorted(['sample_id', 'allele'])
Exemplo n.º 26
0
def gene_expr_to_table(data_path, sep='\t', stable_id_label='',
        probeset_label='', exp_label='', allow_probeset_many_gene=False,
        validate=True):
    """
        Returns a cogent table object

        Deals with a simple tab-delimited representation of gene expression
        data which may have come from either micro-array or mRNA-seq
        experiments.

        Data from micro-arrays will have probeset information for each
        gene and a score to match each probe.

        RNA-seq data will not have probes and simply a single score for each
        gene. In this case we will create a fake probe for each gene of the
        form 'P' + a unique integer.

        Probset id's and expressions scores are separated by the pipe
        -- | -- character. The probset and expression scores are then
        converted to tuples of ints or floats respectively.

        Arguments:
            - probeset_label: name of column containing probesets
            - exp_label: name of column containing expression scores
            - stable_id_label: name of column containing Ensembl stable IDs
            - allow_probeset_many_gene: whether one probeset can map to
                multiple genes. If not we remove probes and scores that multi-
                map.
            - validate: checks that -- stable IDs are unique in the file,
                that for each row the number of probesets equals the
                number of expression scores. Removes the gene entry.
    """

    rr = RunRecord('geneExprDataToTable')

    rr.addInfo('Reading expression data', data_path)
    genes, probes, exp, probes_present = _read_data_file(data_path, sep=sep,
            stable_id_label=stable_id_label, probeset_label=probeset_label,
            exp_label=exp_label)

    if probes_present:
        if validate:
            # if probes and scores are mismatched, nuke the gene
            genes, probes, exp = \
                    _validate_probes_scores(genes, probes, exp)

        if not allow_probeset_many_gene:
            # each probe should map to only one gene
            genes, probes, exp = \
                    _remove_multimapped_probesets(genes, probes, exp)

    rows = [[g,p,e] for g,p,e in zip(genes, probes, exp)]
    return Table(header=EXPR_HEADER, rows=rows)
Exemplo n.º 27
0
 def __repr__(self):
     from cogent.util.table import Table
     labels = []
     for (i, label) in enumerate(self.Tags):
         if hasattr(label, '__len__') and not isinstance(label, basestring):
             label = ','.join(str(z) for z in label)
         # Table needs unique labels
         label = "%s (%s)" % (label, i)
         labels.append(label)
     heading = [''] + labels
     a = [[name] + list(row) for (name, row) in zip(labels, self.Matrix)]
     return str(Table(header=heading, rows=a))
Exemplo n.º 28
0
 def __repr__(self):
     """print table format"""
     header = ['Type', 'Levels']
     if self.Species not in self._species_feature_levels:
         result = ''
     else:
         collate = []
         feature_levels = self._species_feature_levels[self.Species]
         for feature in feature_levels.keys():
             record = feature_levels[feature]
             collate.append([feature, ', '.join(record.levels)])
         result = str(Table(header, collate, title=self.Species))
     return result
Exemplo n.º 29
0
 def __repr__(self):
     """print table format"""
     header = ['Type', 'Levels']
     result = []
     for species in self._species_feature_levels.keys():
         feature_levels = self._species_feature_levels[species]
         collate = []
         for feature in feature_levels.keys():
             collate.append([feature, feature_levels[feature].levels])
         t = Table(header, collate, title=species)
         result.append(str(t))
     result = '\n'.join(result)
     return result
Exemplo n.º 30
0
def output_pca(PCA_matrix, eigvals, names):
    """Creates a string output for principal coordinates analysis results. 

    PCA_matrix and eigvals are generated with the get_principal_coordinates 
    function. Names is a list of names that corresponds to the columns in the
    PCA_matrix. It is the order that samples were represented in the initial
    distance matrix.
    
    returns a cogent Table object"""
    
    output = []
    #get order to output eigenvectors values. reports the eigvecs according
    #to their cooresponding eigvals from greatest to least
    vector_order = list(argsort(eigvals))
    vector_order.reverse()
    
    # make the eigenvector header line and append to output
    vec_num_header = ['vec_num-%d' % i for i in range(len(eigvals))]
    header = ['Label'] + vec_num_header
    #make data lines for eigenvectors and add to output
    rows = []
    for name_i, name in enumerate(names):
        row = [name]
        for vec_i in vector_order:
            row.append(PCA_matrix[vec_i,name_i])
        rows.append(row)
    eigenvectors = Table(header=header,rows=rows,digits=2,space=2,
                    title='Eigenvectors')
    output.append('\n')
    # make the eigenvalue header line and append to output
    header = ['Label']+vec_num_header
    rows = [['eigenvalues']+[eigvals[vec_i] for vec_i in vector_order]]
    pcnts = (eigvals/sum(eigvals))*100
    rows += [['var explained (%)']+[pcnts[vec_i] for vec_i in vector_order]]
    eigenvalues = Table(header=header,rows=rows,digits=2,space=2, 
                    title='Eigenvalues')
    
    return eigenvectors.appended('Type', eigenvalues, title='')
Exemplo n.º 31
0
def parse_wagner_parsimony_output(raw_output_with_comments,remove_num_tips=0):
    '''Parses wagner parsimony output from Count and returns a Cogent Table object'''

    #keep only lines with actual ASR count information
    #throw away first 2 columns and last 4 columns (these are extra stuff from Count)
    filtered_output=[x.split('\t')[1:-4] for x in raw_output_with_comments if x[0:8] == '# FAMILY']

    if(remove_num_tips):
        #remove columns that contain trait data for tips (not internal node data)
        filtered_output=[[x[0]]+ x[remove_num_tips+1:] for x in filtered_output]


    #Take the first row as the header and the rest as rows in the table
    table=Table(filtered_output[0],filtered_output[1:])
    return table
Exemplo n.º 32
0
 def __str__(self):
     """Returns string representation of SummaryStatistics object."""
     result = []
     for field in ["Count", "Sum", "Median", "Mean", "StandardDeviation", \
                    "Variance", "SumSquares"]:
         try:
             val = getattr(self, field)
             if not val:
                 continue
             result.append([field, val])
         except:
             pass
     if not result:
         return ''
     return str(Table("Statistic Value".split(), result,
         column_templates={'Value': "%.4g"}))
Exemplo n.º 33
0
    def _get_method_link_species_set(self):
        if self._method_species_link is not None:
            return self._method_species_link

        method_link_table = self.ComparaDb.getTable('method_link')
        query = sql.select(
            [method_link_table],
            method_link_table.c['class'].like('%' + 'alignment' + '%'))
        methods = query.execute().fetchall()
        method_link_ids = dict([(r['method_link_id'], r) for r in methods])
        method_link_species_table = \
                            self.ComparaDb.getTable('method_link_species_set')
        query = sql.select([method_link_species_table],
                           sql.and_(
                               method_link_species_table.c.species_set_id.in_(
                                   self.species_set),
                               method_link_species_table.c.method_link_id.in_(
                                   method_link_ids.keys())))
        records = query.execute().fetchall()
        # store method_link_id, type, species_set_id,
        # method_link_species_set.name, class
        header = [
            'method_link_species_set_id', 'method_link_id', 'species_set_id',
            'align_method', 'align_clade'
        ]
        rows = []
        for record in records:
            ml_id = record['method_link_id']
            sp_set_id = record['species_set_id']
            ml_sp_set_id = record['method_link_species_set_id']
            clade_name = record['name']
            aln_name = method_link_ids[ml_id]['type']
            rows += [[ml_sp_set_id, ml_id, sp_set_id, aln_name, clade_name]]

        if rows == []:
            rows = empty((0, len(header)))

        t = Table(header=header,
                  rows=rows,
                  space=2,
                  row_ids=True,
                  title='Align Methods/Clades')
        self._method_species_link = t
        return t
Exemplo n.º 34
0
 def test_ace_for_picrust_pic(self):
     """ test_ace_for_picrust with method 'pic' functions as expected with valid input
     """
     actual,actual_ci= ace_for_picrust(self.in_tree1_fp,self.in_trait1_fp, method="pic")
     expected=Table(['nodes','trait1','trait2'],[['14','2.9737','2.5436'],['12','1.2727','3'],['11','0.6667','3'],['10','5','2']])
     self.assertEqual(actual.tostring(),expected.tostring())
     expected_ci=Table(['nodes','trait1','trait2'],\
                           [['14','0.7955|5.1519','0.3655|4.7218'],\
                            ['12','-1.1009|3.6464','0.6264|5.3736'],\
                            ['11','-0.4068|1.7402','1.9265|4.0735'],\
                            ['10','3.3602|6.6398','0.3602|3.6398'],\
                            ])
     self.assertEqual(actual_ci.tostring(),expected_ci.tostring())
Exemplo n.º 35
0
 def test_ace_for_picrust_ml(self):
     """ test_ace_for_picrust with method 'ML' functions as expected with valid input
     """
     actual,actual_ci= ace_for_picrust(self.in_tree1_fp, self.in_trait1_fp, method="ML")
     expected=Table(['nodes','trait1','trait2'],[['14','2.9737','2.5436'],['12','2.3701','2.7056'],['11','0.8370','2.9706'],['10','4.4826','2.1388']])
     self.assertEqual(actual.tostring(),expected.tostring())
     expected_ci=Table(['nodes','trait1','trait2'],\
                           [['14','1.4467|4.5007','2.1979|2.8894'],\
                            ['12','0.9729|3.7674','2.3892|3.0219'],\
                            ['11','0.147|1.527','2.8143|3.1268'],\
                            ['10','3.4227|5.5426','1.8988|2.3788'],\
                            ['sigma','1.9742|0.6981','0.1012|0.0359'],\
                            ['loglik','-6.7207','5.1623'],\
                            ])
     self.assertEqual(actual_ci.tostring(),expected_ci.tostring())
Exemplo n.º 36
0
 def test_wagner_for_picrust_with_funky_tip_labels(self):
     """ test_wagner_for_picrust for a tree with quoted tip labels
     """
     actual= wagner_for_picrust(self.in_tree2_fp,self.in_trait3_fp)
     expected=Table(['nodes','trait1','trait2'],[['11','1','3'],['12','2','3'],['10','5','2'],['14','5','3']])
     self.assertEqual(actual.tostring(),expected.tostring())
    fragment.annotations['element'] = possibilities
    

        
                    
                    
                    
# Print results table
def formatcol(value):
    if isinstance(value, float):
        val = "%.4e" % value
    else:
        val = str(value)
    return val

t = Table(header = column_headings, rows = rows, column_templates = dict(YR_evalue=formatcol, RT_evalue=formatcol, MT_evalue=formatcol))
t.Title = "YR element hits in " + genome_code
t.Legend = 'Indications are structural and not phylogenetic.\nIndications depend on Ugene parameters.\n1. pat1-like: ==>=RT=MT=>>=>=YR=>>=.\n2. kangaroo: ==>=RT=MT=>>=>=RY=>>=.\n3. PAT: =>RT=MT=YR=>>=>=>>.\n4. dirs1-like: =>=RT=MT=YR=<>>=<<=.\n5. Ngaro or Viper: =>RT=YR=>>=>=>>=.'
print t
t.writeToFile(outnamestart + 'table.out')
                    



#Write protein fasta

#If multiple RT or multiple MT on a single fragment, if the distance between them is less than the allowed intron length and both of them are within the TE coordinates
#and they are on the same strand, they will be concatenated in the fasta file
#If they are both in the element but the distance between them is larger than allowed intron they will be written to the fasta file as individual sequences and only the 
#sequence marked as 'serial_on_frag=1' will be concatenated with the other domains
#a warning will be made and it is important to check manually because another solution may be better, eg, choose number 2 or concatenate anyways.
Exemplo n.º 38
0
 def test_ace_for_picrust_pic_single_trait(self):
     """ test_ace_for_picrust with method 'pic' functions as expected with single column trait table
     """
     actual,ci= ace_for_picrust(self.in_tree1_fp,self.in_trait2_fp, method="pic")
     expected=Table(['nodes','trait1'],[['14','2.9737'],['12','1.2727'],['11','0.6667'],['10','5']])
     self.assertEqual(actual.tostring(),expected.tostring())
Exemplo n.º 39
0
 def test_ace_for_picrust_pic_with_funky_tip_labels(self):
     """ test_ace_for_picrust for a tree with underscores in tip labels
     """
     actual,ci= ace_for_picrust(self.in_tree2_fp,self.in_trait3_fp, method="pic")
     expected=Table(['nodes','trait1','trait2'],[['14','2.9737','2.5436'],['12','1.2727','3'],['11','0.6667','3'],['10','5','2']])
     self.assertEqual(actual.tostring(),expected.tostring())
Exemplo n.º 40
0
 def test_ace_for_picrust_pic_single_trait(self):
     """ test_ace_for_picrust with method 'pic' functions as expected with single column trait table
     """
     actual, ci = ace_for_picrust(self.in_tree1_fp, self.in_trait2_fp, method="pic")
     expected = Table(["nodes", "trait1"], [["14", "2.9737"], ["12", "1.2727"], ["11", "0.6667"], ["10", "5"]])
     self.assertEqual(actual.tostring(), expected.tostring())
def run_asr_in_parallel(tree, table, asr_method, parallel_method='sge',tmp_dir='jobs/',num_jobs=100, verbose=False):
    '''Runs the ancestral state reconstructions in parallel'''

    asr_script_fp = join(get_picrust_project_dir(),'scripts','ancestral_state_reconstruction.py')

    if(parallel_method=='sge'):
        cluster_jobs_fp=join(get_picrust_project_dir(),'scripts','start_parallel_jobs_sge.py')
    elif(parallel_method=='multithreaded'):
        cluster_jobs_fp=join(get_picrust_project_dir(),'scripts','start_parallel_jobs.py')
    elif(parallel_method=='torque'):
        cluster_jobs_fp=join(get_picrust_project_dir(),'scripts','start_parallel_jobs_torque.py')
    else:
        raise RuntimeError

    if(verbose):
        print "Loading trait table..."

    #foreach trait in the table, create a new tmp file with just that trait, and create the job command and add it a tmp jobs file
    table=LoadTable(filename=table, header=True, sep='\t')

    #get dimensions of the table
    dim=table.Shape

    created_tmp_files=[]
    output_files=[]
    ci_files=[]

    #create a tmp file to store the job commands (which we will pass to our parallel script to run)
    jobs_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='jobs_asr_')
    jobs=open(jobs_fp,'w')
    created_tmp_files.append(jobs_fp)

    if(verbose):
        print "Creating temporary input files in: ",tmp_dir

    #iterate over each column
    for i in range(1,dim[1]):
        #create a new table with only a single trait
        single_col_table=table.getColumns([0,i])

        #write the new table to a tmp file
        single_col_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='in_asr_')
        single_col_table.writeToFile(single_col_fp,sep='\t')
        created_tmp_files.append(single_col_fp)

        #create tmp output files
        tmp_output_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='out_asr_')
        output_files.append(tmp_output_fp)
        tmp_ci_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='out_asr_ci_')
        ci_files.append(tmp_ci_fp)

        #create the job command
        cmd= "{0} -i {1} -t {2} -m {3} -o {4} -c {5}".format(asr_script_fp, single_col_fp, tree, asr_method, tmp_output_fp, tmp_ci_fp)

        #add job command to the the jobs file
        jobs.write(cmd+"\n")

    jobs.close()
    created_tmp_files.extend(output_files)
    created_tmp_files.extend(ci_files)

    if(verbose):
        print "Launching parallel jobs."

    #run the job command
    job_prefix='asr'
    submit_jobs(cluster_jobs_fp ,jobs_fp,job_prefix,num_jobs=num_jobs)

    if(verbose):
        print "Jobs are now running. Will wait until finished."

    #wait until all jobs finished (e.g. simple poller)
    wait_for_output_files(output_files)

    if(verbose):
        print "Jobs are done running. Now combining all tmp files."
    #Combine output files
    combined_table=combine_asr_tables(output_files)
    combined_ci_table=combine_asr_tables(ci_files)

    #create a Table object
    combined_table=Table(header=combined_table[0],rows=combined_table[1:])
    combined_ci_table=Table(header=combined_ci_table[0],rows=combined_ci_table[1:])

    #clean up all tmp files
    for file in created_tmp_files:
        remove(file)

    #return the combined table
    return combined_table,combined_ci_table