filename=data_dir + cell + "_" + str(wid)  + "bp_Classes.txt";
        classes=numpy.loadtxt(filename,delimiter='\t',dtype=object)
        filename=data_dir+ cell + "_Features.txt"
        features=numpy.loadtxt(filename,delimiter='\t',dtype=object)
              
        given=["A-E","I-E","A-P","I-P","A-X","I-X","UK"]
        #given=["A-E","I-E"]
        #given=["A-P","I-P"]
        #given=["A-E","A-P"]
        #given=["A-E","A-X"]
        #given=["A-P","A-X"]
        #given=["A-E","A-P","A-X"]
        #given=["A-E","I-E","A-P","I-P"]
        #given=["A-E","I-E","A-P","I-P","A-X","I-X"]
        #given=["I-E","I-P"]
        data,classes,_=cl.take_some_classes(data,classes,given=given,others=None)
        
        # balance the sample sizes of the classes
        rng=numpy.random.RandomState(1000)
        data,classes,others=cl.balance_sample_size(data,classes,others=None,min_size_given=None,rng=rng)

        print data.shape
        print numpy.unique(classes)

        #group=[["A-E"],["I-E"],["A-P"],["I-P"],["A-X"],["I-X"],["UK"]]
        #group=[["A-E","A-P"],["I-E","I-P","A-X","I-X","UK"]]
        #group=[["A-E","A-P","A-X"],["I-E","I-P","I-X","UK"]]
        group=[["A-E"],["A-P"],["I-E","I-P","A-X","I-X","UK"]]
        #group=[["A-E"],["A-P"],["A-X"],["I-E","I-P","I-X","UK"]]
        #group=[["A-E"],["I-E"]]
        #group=[["A-P"],["I-P"]]
Exemplo n.º 2
0
Each row is a string (white space not allowed) as the feature name of the corresponding column in [1].
"""

##################################
#load your data here ...
##################################
filename="/home/yifengli/research/dnashape/result/Data_1000bp.txt";
data=numpy.loadtxt(filename,delimiter='\t',dtype='float16')
filename="/home/yifengli/research/dnashape/result/Classes_1000bp.txt";
classes=numpy.loadtxt(filename,delimiter='\t',dtype=str)
filename="/home/yifengli/research/dnashape/result/Features.txt";
features=numpy.loadtxt(filename,delimiter='\t',dtype=str)

# change class labels
given=["Enhancer","EnhancerFalse"]
data,classes=cl.take_some_classes(data,classes,given)

given={"Enhancer":0,"EnhancerFalse":1}
classes=cl.change_class_labels_to_given(classes,given)

train_set_x_org,train_set_y_org,valid_set_x_org,valid_set_y_org,test_set_x_org,test_set_y_org \
=cl.partition_train_valid_test(data,classes,ratio=(1,1,1))    
del data
gc_collect()

rng=numpy.random.RandomState(1000)    
numpy.warnings.filterwarnings('ignore')        
# train
classifier,training_time=convolutional_mlp.train_model( train_set_x_org=train_set_x_org, train_set_y_org=train_set_y_org,
                        valid_set_x_org=valid_set_x_org, valid_set_y_org=valid_set_y_org, 
                        n_row_each_sample=4,
Exemplo n.º 3
0
        classes = numpy.loadtxt(filename, delimiter='\t', dtype=object)
        filename = data_dir + cell + "_Features.txt"
        features = numpy.loadtxt(filename, delimiter='\t', dtype=object)

        given = ["A-E", "I-E", "A-P", "I-P", "A-X", "I-X", "UK"]
        #given=["A-E","I-E"]
        #given=["A-P","I-P"]
        #given=["A-E","A-P"]
        #given=["A-E","A-X"]
        #given=["A-P","A-X"]
        #given=["A-E","A-P","A-X"]
        #given=["A-E","I-E","A-P","I-P"]
        #given=["A-E","I-E","A-P","I-P","A-X","I-X"]
        #given=["I-E","I-P"]
        data, classes, _ = cl.take_some_classes(data,
                                                classes,
                                                given=given,
                                                others=None)

        # balance the sample sizes of the classes
        rng = numpy.random.RandomState(1000)
        data, classes, others = cl.balance_sample_size(data,
                                                       classes,
                                                       others=None,
                                                       min_size_given=None,
                                                       rng=rng)

        print data.shape
        print numpy.unique(classes)

        #group=[["A-E"],["I-E"],["A-P"],["I-P"],["A-X"],["I-X"],["UK"]]
        #group=[["A-E","A-P"],["I-E","I-P","A-X","I-X","UK"]]
Exemplo n.º 4
0
Each row is a string (white space not allowed) as the feature name of the corresponding column in [1].
"""

##################################
#load your data here ...
##################################
filename = "/home/yifengli/research/dnashape/result/Data_1000bp.txt"
data = numpy.loadtxt(filename, delimiter='\t', dtype='float16')
filename = "/home/yifengli/research/dnashape/result/Classes_1000bp.txt"
classes = numpy.loadtxt(filename, delimiter='\t', dtype=str)
filename = "/home/yifengli/research/dnashape/result/Features.txt"
features = numpy.loadtxt(filename, delimiter='\t', dtype=str)

# change class labels
given = ["Enhancer", "EnhancerFalse"]
data, classes = cl.take_some_classes(data, classes, given)

given = {"Enhancer": 0, "EnhancerFalse": 1}
classes = cl.change_class_labels_to_given(classes, given)

train_set_x_org,train_set_y_org,valid_set_x_org,valid_set_y_org,test_set_x_org,test_set_y_org \
=cl.partition_train_valid_test(data,classes,ratio=(1,1,1))
del data
gc_collect()

rng = numpy.random.RandomState(1000)
numpy.warnings.filterwarnings('ignore')
# train
classifier, training_time = convolutional_mlp.train_model(
    train_set_x_org=train_set_x_org,
    train_set_y_org=train_set_y_org,