Пример #1
0
def read_ratios(params, args_in):
    """reading ratios matrix"""
    if params['normalize_ratios']:
        if test_data_change(params, args_in) == True:
            #Turn off the nochange_filter if you're resuming a run an have changed the data matrix
            ratio_filters = [dm.center_scale_filter]
        else:
            ratio_filters = [dm.nochange_filter, dm.center_scale_filter]
    else:
        ratio_filters = []

    matrix_factory = dm.DataMatrixFactory(ratio_filters)
    matrix_filename = args_in.ratios

    if matrix_filename.startswith('http://'):
        indata = util.read_url(matrix_filename).decode('utf-8')
        infile = util.dfile_from_text(indata, has_header=True, quote='\"')
    else:
        infile = util.read_dfile(matrix_filename, has_header=True, quote='\"')

    if params['case_sensitive'] or args_in.case_sensitive:
        ratios = matrix_factory.create_from(infile, True)
    else:
        ratios = matrix_factory.create_from(infile, False)
    return ratios
Пример #2
0
    def setUp(self):  # pylint; disable-msg=C0103
        """test fixture"""
        self.search_distances = {'upstream': (-20, 150)}
        self.scan_distances = {'upstream': (-30, 250)}

        matrix_factory = dm.DataMatrixFactory(
            [dm.nochange_filter, dm.center_scale_filter])
        infile = util.read_dfile('example_data/hal/halo_ratios5.tsv',
                                 has_header=True,
                                 quote='\"')
        self.ratio_matrix = matrix_factory.create_from(infile)
        self.organism = testutil.make_halo(self.search_distances,
                                           self.scan_distances,
                                           self.ratio_matrix)
        self.config_params = {
            'memb.min_cluster_rows_allowed': 3,
            'memb.max_cluster_rows_allowed': 70,
            'multiprocessing': False,
            'memb.clusters_per_row': 2,
            'memb.clusters_per_col': int(round(43 * 2.0 / 3.0)),
            'num_clusters': 43,
            'num_iterations': 2000
        }
        self.membership = self.__read_members()  # relies on config_params
        self.iteration_result = {'iteration': 51}
Пример #3
0
 def test_simple_filter(self):
     """test a factory using a single filter"""
     factory = dm.DataMatrixFactory([times2])
     matrix = factory.create_from(self.dfile)
     self.assertEquals(2, matrix.num_rows)
     self.assertEquals(2, matrix.num_columns)
     self.assertEquals(matrix.column_names, ["H2", "H3"])
     self.assertEquals(matrix.row_names, ["R1", "R2"])
     self.assertTrue((matrix.values[0] == [2, 4]).all())
     self.assertTrue((matrix.values[1] == [6, 8]).all())
Пример #4
0
 def test_no_filters(self):
     """test a factory without filters"""
     factory = dm.DataMatrixFactory([])
     matrix = factory.create_from(self.dfile)
     self.assertEquals(2, matrix.num_rows)
     self.assertEquals(2, matrix.num_columns)
     self.assertEquals(matrix.column_names, ["H2", "H3"])
     self.assertEquals(matrix.row_names, ["R1", "R2"])
     self.assertTrue((matrix.values[0] == [1, 2]).all())
     self.assertTrue((matrix.values[1] == [3, 4]).all())
Пример #5
0
 def test_with_na_values(self):
     """test a factory with a DelimitedFile containing NA values"""
     factory = dm.DataMatrixFactory([])
     matrix = factory.create_from(self.dfile_with_na)
     self.assertEquals(2, matrix.num_rows)
     self.assertEquals(2, matrix.num_columns)
     self.assertEquals(matrix.column_names, ["H2", "H3"])
     self.assertEquals(matrix.row_names, ["R1", "R2"])
     self.assertTrue(np.isnan(matrix.values[0][0]))
     self.assertEquals(2.0, matrix.values[0][1])
     self.assertTrue(np.isnan(matrix.values[1][0]))
     self.assertEquals(4.0, matrix.values[1][1])
Пример #6
0
    def test_motif_scoring(self):
        """tests the motif scoring in integration"""
        search_distances = {'upstream': (-20, 150)}
        scan_distances = {'upstream': (-30, 250)}

        matrix_factory = dm.DataMatrixFactory(
            [dm.nochange_filter, dm.center_scale_filter])
        infile = util.read_dfile('example_data/hal/halo_ratios5.tsv',
                                 has_header=True,
                                 quote='\"')
        ratio_matrix = matrix_factory.create_from(infile)
        organism = testutil.make_halo(search_distances, scan_distances,
                                      ratio_matrix)
        membership = FakeMembership()
        config_params = {
            'memb.min_cluster_rows_allowed': 3,
            'memb.max_cluster_rows_allowed': 70,
            'multiprocessing': False,
            'num_clusters': 1,
            'output_dir': 'out',
            'debug': {},
            'search_distances': {
                'upstream': (-20, 150)
            },
            'num_iterations': 2000,
            'MEME': {
                'schedule': lambda i: True,
                'version': '4.3.0',
                'global_background': False,
                'arg_mod': 'zoops',
                'nmotifs_rvec':
                'c(rep(1, num_iterations/3), rep(2, num_iterations/3))',
                'use_revcomp': 'True',
                'max_width': 24,
                'background_order': 3
            },
            'Motifs': {
                'schedule': lambda i: True,
                'scaling': ('scaling_const', 1.0)
            }
        }
        func = motif.MemeScoringFunction(organism,
                                         membership,
                                         ratio_matrix,
                                         config_params=config_params)
        iteration_result = {'iteration': 100}
        matrix = func.compute(iteration_result)
Пример #7
0
    def setUp(self):  # pylint; disable-msg=C0103
        """test fixture"""
        self.search_distances = {'upstream': (-20, 150)}
        self.scan_distances = {'upstream': (-30, 250)}

        matrix_factory = dm.DataMatrixFactory(
            [dm.nochange_filter, dm.center_scale_filter])
        infile = util.read_dfile('example_data/hal/halo_ratios5.tsv',
                                 has_header=True,
                                 quote='\"')
        self.ratio_matrix = matrix_factory.create_from(infile)
        self.organism = testutil.make_halo(self.search_distances,
                                           self.scan_distances,
                                           self.ratio_matrix)
        self.config_params = {
            'memb.min_cluster_rows_allowed': 3,
            'memb.max_cluster_rows_allowed': 70,
            'multiprocessing': False,
            'num_cores': None,
            'memb.clusters_per_row': 2,
            'memb.clusters_per_col': int(round(43 * 2.0 / 3.0)),
            'num_clusters': 43,
            'output_dir': 'out',
            'remap_network_nodes': False,
            'use_BSCM': False,
            'num_iterations': 2000,
            'debug': {},
            'search_distances': {
                'upstream': (-20, 150)
            },
            'Columns': {
                'schedule': lambda i: True
            },
            'Rows': {
                'schedule': lambda i: True,
                'scaling': ('scaling_const', 6.0)
            },
            'Motifs': {
                'schedule': lambda i: True,
                'scaling':
                ('scaling_rvec', 'seq(0, 1, length=num_iterations*3/4)')
            },
            'MEME': {
                'version': '4.3.0',
                'global_background': False,
                'schedule': lambda i: True,
                'nmotifs_rvec':
                'c(rep(1, num_iterations/3), rep(2, num_iterations/3))',
                'max_width': 24,
                'arg_mod': 'zoops',
                'background_order': 3,
                'use_revcomp': 'True'
            },
            'Networks': {
                'schedule':
                lambda i: True,
                'scaling':
                ('scaling_rvec', 'seq(1e-5, 0.5, length=num_iterations*3/4)')
            }
        }
        self.membership = self.__read_members()  # relies on config_params
        self.iteration_result = {'iteration': 51, 'score_means': {}}
Пример #8
0
def read_matrix(filename):
    """reads a matrix file"""
    infile = util.read_dfile(filename, has_header=True, quote='\"')
    return dm.DataMatrixFactory([]).create_from(
        infile, case_sensitive=True).sorted_by_row_name()
Пример #9
0
                cond_id = cond_map[ratios.column_names[col]]
                value = ratios.values[row][col]
                outfile.write("%d\t%d\t%f\n" % (gene_id, cond_id, value))

if __name__ == '__main__':
    description = 'addnwportal.py - adding a cMonkey/python run to the database'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--resultdir', required=True, help='cMonkey result directory')
    parser.add_argument('--exptable', help='filename of expression table to generate',
                        default=None)
    args = parser.parse_args()
    resultdb = os.path.join(args.resultdir, 'cmonkey_run.db')
    ratiofile = os.path.join(args.resultdir, 'ratios.tsv.gz')

    # read the matrix
    matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter])
    infile = util.read_dfile(ratiofile, has_header=True, quote='\"')
    ratios = matrix_factory.create_from(infile)

    # access the run information
    conn = sqlite3.connect(resultdb)
    cursor = conn.cursor()
    cursor.execute('select organism, species, num_iterations, num_clusters from run_infos')
    orgcode, species, num_iterations, num_clusters = cursor.fetchone()
    print "organism: %s species: %s iterations: %d clusters: %d" % (orgcode, species,
                                                                    num_iterations,
                                                                    num_clusters)

    # start populating the database
    microbedb, organism = make_microbe(orgcode)
    ncbi_code = microbedb.rsat_info.taxonomy_id
Пример #10
0
 def __read_colscores_refresult(self):
     dfile = util.read_dfile('testdata/column_scores_refresult.tsv',
                             has_header=True,
                             quote='"')
     return dm.DataMatrixFactory([]).create_from(dfile, case_sensitive=True)
Пример #11
0
 def __read_ratios(self):
     dfile = util.read_dfile('testdata/row_scores_testratios.tsv',
                             has_header=True)
     return dm.DataMatrixFactory([]).create_from(dfile, case_sensitive=True)
Пример #12
0
def read_matrix(filename):
    dfile = util.DelimitedFile.read(filename,
                                    has_header=True,
                                    sep='\t',
                                    quote='"')
    return dm.DataMatrixFactory([]).create_from(dfile).sorted_by_row_name()