def run(self, ko_file, wt_file, ts_file=None, kd_file=None, name=None): import numpy os.chdir(os.environ["gene_path"]) print "Reading in knockout data" knockout_storage = ReadData(ko_file, "knockout") knockout_storage.normalize() wildtype_storage = ReadData(wt_file, "wildtype") wildtype_storage.normalize() knockdown_storage = ReadData(kd_file, "knockdown") knockdown_storage.normalize() wildtype_storage.combine(knockdown_storage) timeseries_storage = None if ts_file != None: timeseries_storage = ReadData(ts_file, "timeseries") for ts in timeseries_storage: ts.normalize() settings = {} settings = ReadConfig(settings) # TODO: CHANGE ME settings["global"]["working_dir"] = os.getcwd() + '/' # Setup job manager print "Starting new job manager" jobman = JobManager(settings) # Make MCZ jobs mczjob = MCZ() mczjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, name) print "Queuing job..." jobman.queueJob(mczjob) print jobman.queue print "Running queue..." jobman.runQueue() jobman.waitToClear() print "Queue finished" job = jobman.finished[0] print job.alg.gene_list print job.alg.read_output(settings) jobnet = job.alg.network print "PREDICTED NETWORK:" print job.alg.network.network print jobnet.original_network return jobnet.original_network
def run(self, name, datafiles, goldnet_file): import numpy os.chdir(os.environ["gene_path"]) datastore = ReadData(datafiles[0], "steadystate") for file in datafiles[1:]: datastore.combine(ReadData(file, "steadystate")) datastore.normalize() settings = {} settings = ReadConfig(settings) # TODO: CHANGE ME settings["global"]["working_dir"] = os.getcwd() + '/' # Setup job manager print "Starting new job manager" jobman = JobManager(settings) # Make GENIE3 jobs genie3 = GENIE3() genie3.setup(datastore, settings, name) print "Queuing job..." jobman.queueJob(genie3) print jobman.queue print "Running queue..." jobman.runQueue() jobman.waitToClear() print "Queue finished" job = jobman.finished[0] print job.alg.gene_list print job.alg.read_output(settings) jobnet = job.alg.network print "PREDICTED NETWORK:" print job.alg.network.network print jobnet.original_network if goldnet_file != None: goldnet = Network() goldnet.read_goldstd(goldnet_file) print "GOLD NETWORK:" print goldnet.network print jobnet.analyzeMotifs(goldnet).ToString() print jobnet.calculateAccuracy(goldnet) return jobnet.original_network
for key in goldnets.keys(): goldnet = Network() goldnet.read_goldstd(goldnets[key]) goldnets[key] = goldnet genie3nets = {} for i in range(20): for name in data.keys(): ts_storage = data[name] settings["global"]["time_series_delta_t"] = (1008.0 / (len(ts_storage[0].experiments)-1)) combined = ReadData(exp_data_directory + '/' + name + '/' + timeseries_filename, "timeseries")[0] for ts in timeseries_as_steady_state[name][1:11]: combined.combine(ts) #combined.combine(knockouts[name]) combined.combine(multifactorials[name]) genie3job = GENIE3() genie3job.setup(combined, settings, "Genie3_TimeSeries_{0}_{1}".format(name, i)) jobman.queueJob(genie3job) genie3nets[name] = genie3job genie3job.goldnet = goldnets[name] jobman.runQueue() jobman.waitToClear() for job in jobman.finished:
goldnet = Network() goldnet.read_goldstd(settings["global"]["large_network_goldnet_file"]) ko_file = settings["global"]["large_network_knockout_file"].split() kd_file = settings["global"]["large_network_knockdown_file"].split() ts_file = settings["global"]["large_network_timeseries_file"].split() wt_file = settings["global"]["large_network_wildtype_file"].split() # Read data into program # Where the format is "FILENAME" "DATATYPE" knockout_storage = ReadData(ko_file[0], "knockout") knockdown_storage = ReadData(kd_file[0], "knockdown") timeseries_storage = ReadData(ts_file[0], "timeseries") wildtype_storage = ReadData(wt_file[0], "wildtype") wildtype_storage.combine(knockout_storage) wildtype_storage.combine(knockdown_storage) wildtype_storage.combine(timeseries_storage) # Setup job manager jobman = JobManager(settings) # Make BANJO jobs mczjob = MCZ() mczjob.setup(knockout_storage, wildtype_storage, settings, None, "mcz-test-run-1") jobman.queueJob(mczjob) print jobman.queue jobman.runQueue()
def get_network_results(name, settings, cache): print "STARTING", name if name in cache.keys(): print "CACHE HIT" return cache[name] ko_file, kd_file, ts_file, wt_file, mf_file, goldnet = get_example_data_files(name, settings) # Create date string to append to output_dir t = datetime.now().strftime("%Y-%m-%d_%H.%M.%S") settings["global"]["output_dir"] = settings["global"]["output_dir_save"] + "/" + \ settings["global"]["experiment_name"] + "-" + t + "-" + name + "/" os.mkdir(settings["global"]["output_dir"]) # Get a list of the multifactorial files # Read data into program # Where the format is "FILENAME" "DATATYPE" mf_storage = ReadData(mf_file[0], "multifactorial") knockout_storage = ReadData(ko_file[0], "knockout") knockdown_storage = ReadData(kd_file[0], "knockdown") wildtype_storage = ReadData(wt_file[0], "wildtype") timeseries_storage = ReadData(ts_file[0], "timeseries") gene_list = knockout_storage.gene_list # Setup job manager jobman = JobManager(settings) # MCZ mczjob = MCZ() mczjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, knockdown_storage, "MCZ") jobman.queueJob(mczjob) # CLR clrjob = CLR() clrjob.setup(knockout_storage, settings, "CLR", "plos", 6) jobman.queueJob(clrjob) # GENIE3 mf_storage.combine(knockout_storage) mf_storage.combine(wildtype_storage) mf_storage.combine(knockdown_storage) genie3job = GENIE3() genie3job.setup(mf_storage, settings, "GENIE3") jobman.queueJob(genie3job) ## TLCLR tlclrjob = TLCLR() tlclrjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, knockdown_storage, "TLCLR") jobman.queueJob(tlclrjob) #if sys.argv[1] != "dream4100": #cojob = ConvexOptimization() #cojob.setup(knockout_storage, settings, "ConvOpt_T-"+ str(0.01),None, None, 0.01) #jobman.queueJob(cojob) ### DFG4GRN dfg = DFG4GRN() settings["dfg4grn"]["eta_z"] = 0.01 settings["dfg4grn"]["lambda_w"] = 0.001 settings["dfg4grn"]["tau"] = 3 dfg.setup(timeseries_storage, TFList(timeseries_storage[0].gene_list), settings, "DFG", 20) jobman.queueJob(dfg) ### Inferelator ### NIR nirjob = NIR() nirjob.setup(knockout_storage, settings, "NIR", 5, 5) jobman.queueJob(nirjob) #### TDARACNE settings = ReadConfig(settings, "./config/default_values/tdaracne.cfg") bjob = tdaracne() settings["tdaracne"]["num_bins"] = 4 bjob.setup(timeseries_storage, settings, "TDARACNE") jobman.queueJob(bjob) print jobman.queue jobman.runQueue() jobman.waitToClear(name) SaveResults(jobman.finished, goldnet, settings, name) cache[name] = jobman.finished[:] return cache[name]
kno3_1 = ReadData("datasets/RootArrayData/KNO3norm1.csv", "dex") kno3_2 = ReadData("datasets/RootArrayData/KNO3norm2.csv", "dex") kno3_3 = ReadData("datasets/RootArrayData/KNO3norm3.csv", "dex") kno3_4 = ReadData("datasets/RootArrayData/KNO3norm4.csv", "dex") settings["global"]["time_series_delta_t"] = "3 3 3 3 3 5" dex_storage.filter(kno3_1.gene_list) dexcombined.filter(kno3_1.gene_list) dex_storage2.filter(kno3_1.gene_list) cnlo_storage.filter(kno3_1.gene_list) cnlo_no3_storage.filter(kno3_1.gene_list) no3_1_storage.filter(kno3_1.gene_list) no3_2_storage.filter(kno3_1.gene_list) no3_3_storage.filter(kno3_1.gene_list) dexcombined.combine(dex_storage2) no3_storage = no3_1_storage no3_storage.combine(no3_2_storage) no3_storage.combine(no3_3_storage) cnlo_no3_storage.combine(no3_storage) #all_storage.combine(cnlo_no3_storage) #dex_storage.combine(cnlo_storage) #dex_storage.combine(no3_storage) #dex_storage.normalize() no3_storage.normalize() cnlo_storage.normalize() cnlo_no3_storage.normalize()
def run(self, kofile, tsfile, wtfile, datafiles, name, goldnet_file, normalize=False): os.chdir(os.environ["gene_path"]) knockout_storage = ReadData(kofile, "knockout") print "Reading in knockout data" wildtype_storage = ReadData(wtfile, "steadystate") if datafiles == []: other_storage = None else: other_storage = ReadData(datafiles[0], "steadystate") for file in datafiles[1:]: other_storage.combine(ReadData(file, "steadystate")) timeseries_storage = None if tsfile != None: timeseries_storage = ReadData(tsfile, "timeseries") #for ts in timeseries_storage: #ts.normalize() #if normalize: #knockout_storage.normalize() #wildtype_storage.normalize() #other_storage.normalize() settings = {} settings = ReadConfig(settings) # TODO: CHANGE ME settings["global"]["working_dir"] = os.getcwd() + '/' # Setup job manager print "Starting new job manager" jobman = JobManager(settings) # Make inferelator jobs inferelatorjob = inferelator() inferelatorjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, other_storage, name) print "Queuing job..." jobman.queueJob(inferelatorjob) print jobman.queue print "Running queue..." jobman.runQueue() jobman.waitToClear() print "Queue finished" job = jobman.finished[0] #print job.alg.gene_list #print job.alg.read_output(settings) jobnet = job.alg.network #print "PREDICTED NETWORK:" #print job.alg.network.network print jobnet.original_network if goldnet_file != None: goldnet = Network() goldnet.read_goldstd(goldnet_file) #print "GOLD NETWORK:" #print goldnet.network #print jobnet.analyzeMotifs(goldnet).ToString() print jobnet.calculateAccuracy(goldnet) import AnalyzeResults tprs, fprs, rocs = AnalyzeResults.GenerateMultiROC(jobman.finished, goldnet ) ps, rs, precs = AnalyzeResults.GenerateMultiPR(jobman.finished, goldnet) print "Area Under ROC" print rocs print "Area Under PR" print precs return jobnet.original_network
c4d.filter(topgenes_list) c4l.filter(topgenes_list) c21d.filter(topgenes_list) c21hl.filter(topgenes_list) c21l.filter(topgenes_list) c21ll.filter(topgenes_list) c32l.filter(topgenes_list) c32l2.filter(topgenes_list) #for dataset in ts_storage: #dataset.normalize() combined.filter(topgenes_list) combined.combine(c4l) combined.combine(c21d) combined.combine(c21hl) combined.combine(c21l) combined.combine(c21ll) combined.combine(c32l) combined.combine(c32l2) # Remove the last time point for testing leave_out = [] for i, ts in enumerate(ts_storage): leave_out.append(ts.experiments[-1]) #goldnet = Network()
def run(self, datafiles=None, name=None, goldnet_file=None, topd=None, restk=None): import numpy os.chdir(os.environ["gene_path"]) print "Reading in data" data_storage = ReadData(datafiles[0], "steadystate") for file in datafiles[1:]: data_storage.combine(ReadData(file, "steadystate")) settings = {} settings = ReadConfig(settings) # TODO: CHANGE ME settings["global"]["working_dir"] = os.getcwd() + "/" # Setup job manager print "Starting new job manager" jobman = JobManager(settings) # Make nir jobs nirjob = NIR() nirjob.setup(data_storage, settings, name, topd, restk) print "Queuing job..." jobman.queueJob(nirjob) print jobman.queue print "Running queue..." jobman.runQueue() jobman.waitToClear() print "Queue finished" job = jobman.finished[0] print job.alg.gene_list print job.alg.read_output(settings) jobnet = job.alg.network print "PREDICTED NETWORK:" print job.alg.network.network if goldnet_file != None: goldnet = Network() goldnet.read_goldstd(goldnet_file) # print "GOLD NETWORK:" # print goldnet.network # print jobnet.analyzeMotifs(goldnet).ToString() print jobnet.calculateAccuracy(goldnet) import AnalyzeResults tprs, fprs, rocs = AnalyzeResults.GenerateMultiROC( jobman.finished, goldnet, True, job.alg.output_dir + "/ROC.pdf" ) ps, rs, precs = AnalyzeResults.GenerateMultiPR( jobman.finished, goldnet, True, job.alg.output_dir + "/PR.pdf" ) print "Area Under ROC" print rocs print "Area Under PR" print precs return job.alg.network.network
jobman = JobManager(settings) # Make BANJO jobs mczjob = MCZ() mczjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, knockdown_storage, "MCZ_Alone") jobman.queueJob(mczjob) clrjob = CLR() clrjob.setup(knockout_storage, settings, "clr_" + t + "_Bins-" + str(6), "plos", 6) jobman.queueJob(clrjob) #cojob = ConvexOptimization() #cojob.setup(knockout_storage, settings, "ConvOpt_T-Plos",None, None, 0.04) #jobman.queueJob(cojob) mf_storage.combine(knockout_storage) mf_storage.combine(wildtype_storage) mf_storage.combine(knockdown_storage) genie3job = GENIE3() genie3job.setup(mf_storage, settings, "MF_KO_WT_KD") jobman.queueJob(genie3job) print jobman.queue jobman.runQueue() jobman.waitToClear() accs = [] precs = [] settings["dfg4grn"]["eta_z"] = 0.001 settings["dfg4grn"]["lambda_w"] = 0.01 settings["dfg4grn"]["tau"] = 3
# Read data into program # Where the format is "FILENAME" "DATATYPE" mf_storage = ReadData(mf_file[0], "multifactorial") ko_storage = ReadData(ko_file[0], "knockout") kd_storage = ReadData(kd_file[0], "knockdown") wt_storage = ReadData(wt_file[0], "wildtype") # Setup job manager jobman = JobManager(settings) # Make GENIE3 jobs genie3job = GENIE3() genie3job.setup(mf_storage, settings, "MF") jobman.queueJob(genie3job) mf_storage.combine(ko_storage) genie3job = GENIE3() genie3job.setup(mf_storage, settings, "MF_KO") jobman.queueJob(genie3job) mf_storage.combine(wt_storage) genie3job = GENIE3() genie3job.setup(mf_storage, settings, "MF_KO_WT") jobman.queueJob(genie3job) mf_storage.combine(kd_storage) genie3job = GENIE3() genie3job.setup(mf_storage, settings, "MF_KO_WT_KD") jobman.queueJob(genie3job) print jobman.queue
if goldnet.network[gene1][gene2] > 0: t.append(gene1) tfs[name] = list(set(t)) goldnet = Network() goldnet.read_goldstd(goldnets[data.keys()[0]]) genie3nets = {} for name in data.keys(): for i in range(50): ts_storage = data[name] settings["global"]["time_series_delta_t"] = (1008.0 / (len(ts_storage[0].experiments)-1)) combined = ReadData(exp_data_directory + '/' + name + '/' + timeseries_filename, "timeseries")[0] for ts in timeseries_as_steady_state[name][1:]: combined.combine(ts) combined.combine(multifactorials[name]) genie3job = GENIE3() genie3job.setup(combined, settings, "Genie3_TimeSeries_SS_{0}-{1}".format(name, i)) jobman.queueJob(genie3job) genie3nets[name] = genie3job jobman.runQueue() jobman.waitToClear() for name in data.keys(): for i in range(50): ts_storage = data[name] settings["global"]["time_series_delta_t"] = (1008.0 / (len(ts_storage[0].experiments)-1))
kno3_3 = ReadData("datasets/RootArrayData/KNO3norm3.csv", "dex") kno3_4 = ReadData("datasets/RootArrayData/KNO3norm4.csv", "dex") dex_storage.filter(kno3_1.gene_list) cnlo_storage.filter(kno3_1.gene_list) cnlo_no3_storage.filter(kno3_1.gene_list) no3_1_storage.filter(kno3_1.gene_list) no3_2_storage.filter(kno3_1.gene_list) no3_3_storage.filter(kno3_1.gene_list) no3_storage = no3_1_storage no3_storage.combine(no3_2_storage) no3_storage.combine(no3_3_storage) cnlo_no3_storage.combine(no3_storage) #all_storage.combine(cnlo_no3_storage) #dex_storage.combine(cnlo_storage) #dex_storage.combine(no3_storage) #dex_storage.normalize() no3_storage.normalize() cnlo_storage.normalize() cnlo_no3_storage.normalize() #all_storage.normalize() # Set delta_t to be without the last time point settings["global"]["time_series_delta_t"] = "3 3 3 3 3" # Remove the last time point from each of these