def addDataset(label, feature_matrix, associations, method, source, description, comments, configfile, results_path, ds_date, disease, contact): print "Adding " + source + " dataset to admin table with config " + configfile + " for label " + label if (description == ""): #not general, revisit this to enter all TCGA known cancers if (label.find("brca") != -1 or label.find("BRCA") != -1): description = "Breast" if (label.find("ov") != -1 or label.find("OV") != -1): description = description + "Ovarian" if (label.find("gbm") != -1 or label.find("GBM") != -1): description = description + "Glioblastoma" if (label.find("coadread") != -1 or label.find("COAD") != -1 or label.find("coad") != -1 or label.find("crc") != -1 or label.find("CRC") != -1): description = description + "ColoRectal" if (label.find("cesc") != -1 or label.find("CESC") != -1): description = description + "Cervical" if (label.find("hnsc") != -1 or label.find("HNSC") != -1): description = description + "HeadNeck" if (label.find("kirc") != -1 or label.find("KIRC") != -1 or label.find("kirp") != -1 or label.find("KIRP") != -1): description = description + "Kidney" if (label.find("luad") != -1 or label.find("LUAD") != -1 or label.find("lusc") != -1 or label.find("LUSC") != -1): description = description + "Lung" if (label.find("stad") != -1 or label.find("STAD") != -1): description = description + "Stomach" if (label.find("nomask") != -1): description = description elif (label.find("mask") != -1): description = description + " filtered" if (comments == ""): comments = "{matrix:"+feature_matrix+",associations:"+associations+"}" inputfiles = "{matrix:"+feature_matrix+",associations:"+associations+"}" currentDate = time.strftime("%m-%d-%y") config = db_util.getConfig(configfile) max_logpv = -1.0 if (os.path.exists(results_path + 'edges_out_' + label + '_meta.json')): meta_json_file = open(results_path + 'edges_out_' + label + '_meta.json','r') metaline = meta_json_file.read() if (len(metaline) > 1): try: max_logpv = json.loads(metaline)["max_logpv"] except ValueError: max_logpv = -1 #okay that the max_logpv is not set except: print "Unexpected error:", sys.exc_info()[0] raise meta_json_file.close() summary_json = "" if (os.path.exists(results_path + "feature_summary_" + label + ".json")): summary_file = open(results_path + "feature_summary_" + label + ".json", "r") summary_json = summary_file.read().strip() summary_file.close() insertSql = "replace into tcga.regulome_explorer_dataset (label,method,source,contact,comments,dataset_date,description,max_logged_pvalue, input_files, default_display,disease,summary_json) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %f, '%s', '%i', '%s', '%s');" %(label, method, source, contact, comments,ds_date,description, max_logpv, inputfiles, 1, disease, summary_json) print "updating regulome_explorer_dataset\n" + insertSql db_util.executeInsert(config, insertSql)
def populate_sample_meta(sampleList, config): """ sampleList needs to be a list of patients """ global dataset_label labelTokens = dataset_label.split("_") cancer_type = labelTokens[0] clabel = "" samColIndex = 0 for sam in sampleList: #REPLACE INTO `tcga`.`SampleMeta` (sample_key,cancer_type,dataset_label,matrix_col_offset,meta_json) VALUES ('a' /*not nullable*/,'s' /*not nullable*/,'s' /*not nullable*/,0,'s'); insertSampleSql = "replace into sample_meta (sample_key,cancer_type,dataset_label,matrix_col_offset,meta_json) values ('%s', '%s', '%s', '%i', '%s');" %(sam, cancer_type,clabel,samColIndex,"{age:X,status:someStatus,comments:some comments}") db_util.executeInsert(config, insertSampleSql) samColIndex += 1 print "Done populating sample list for " + dataset_label
def populate_sample_meta(sampleList, config): """ sampleList needs to be a list of patients """ global dataset_label labelTokens = dataset_label.split("_") cancer_type = labelTokens[0] clabel = "" samColIndex = 0 for sam in sampleList: #REPLACE INTO `tcga`.`SampleMeta` (sample_key,cancer_type,dataset_label,matrix_col_offset,meta_json) VALUES ('a' /*not nullable*/,'s' /*not nullable*/,'s' /*not nullable*/,0,'s'); insertSampleSql = "replace into sample_meta (sample_key,cancer_type,dataset_label,matrix_col_offset,meta_json) values ('%s', '%s', '%s', '%i', '%s');" % ( sam, cancer_type, clabel, samColIndex, "{age:X,status:someStatus,comments:some comments}") db_util.executeInsert(config, insertSampleSql) samColIndex += 1 print "Done populating sample list for " + dataset_label
sys.exit(-1) if operation.upper() == "ADD": pathwaymembers = raw_input("Enter pathway members(required and comma separated) e.g.\nTP53,GENE1,GENE2...\n") pathwayurl = raw_input("Enter pathway source url(optional)\n") if len(pathwaymembers) < 1: print "Invalid pathway defined, check your inputs" sys.exit(-1) # print "name %s\n members %s\n source %s\n url%s" %(pathwayname, pathwaymembers, pathwaysource, pathwayurl) insertSql = "insert into random_forest.pathways values('%s', '%s', '%s', '%s')" % ( pathwaysource, pathwayname, pathwayurl, pathwaymembers, ) rc = db_util.executeInsert(config, insertSql) if rc >= 0: print "%s added" % pathwayname else: print "Problems with adding - return code is %i" % rc elif operation.upper() == "DELETE": deleteSql = "delete from random_forest.pathways where pname = '%s' and psource = '%s'" % ( pathwayname, pathwaysource, ) rc = db_util.executeInsert(config, deleteSql) if rc >= 0: print "%s removed" % pathwayname else: print "Problems with deleting - return code is %i" % rc else:
operation = raw_input("ADD or DELETE pathways?(required)\n") pathwayname = raw_input("Enter pathway name(required)\n") pathwaysource = raw_input("Enter pathway source(required but custom okay)\n") if (len(pathwayname) < 1 or len(pathwaysource) < 1): print "Invalid pathway defined, check your inputs" sys.exit(-1) if (operation.upper() == "ADD"): pathwaymembers = raw_input("Enter pathway members(required and comma separated) e.g.\nTP53,GENE1,GENE2...\n") pathwayurl = raw_input("Enter pathway source url(optional)\n") if (len(pathwaymembers) < 1): print "Invalid pathway defined, check your inputs" sys.exit(-1) #print "name %s\n members %s\n source %s\n url%s" %(pathwayname, pathwaymembers, pathwaysource, pathwayurl) insertSql = "insert into random_forest.pathways values('%s', '%s', '%s', '%s')" %(pathwaysource, pathwayname, pathwayurl,pathwaymembers) rc = db_util.executeInsert(config, insertSql) if (rc >= 0): print "%s added" %pathwayname else: print "Problems with adding - return code is %i" % rc elif (operation.upper() == "DELETE"): deleteSql = "delete from random_forest.pathways where pname = '%s' and psource = '%s'" %(pathwayname, pathwaysource) rc = db_util.executeInsert(config, deleteSql) if (rc >= 0): print "%s removed" %pathwayname else: print "Problems with deleting - return code is %i" % rc else: print "operation %s not supported" %(operation)
def addDataset( label, feature_matrix, associations, method, source, description, comments, configfile, results_path, ds_date, disease, contact, ): print "Adding " + source + " dataset to admin table with config " + configfile + " for label " + label if description == "": # not general, revisit this to enter all TCGA known cancers if label.find("brca") != -1 or label.find("BRCA") != -1: description = "Breast" if label.find("ov") != -1 or label.find("OV") != -1: description = description + "Ovarian" if label.find("gbm") != -1 or label.find("GBM") != -1: description = description + "Glioblastoma" if ( label.find("coadread") != -1 or label.find("COAD") != -1 or label.find("coad") != -1 or label.find("crc") != -1 or label.find("CRC") != -1 ): description = description + "ColoRectal" if label.find("cesc") != -1 or label.find("CESC") != -1: description = description + "Cervical" if label.find("hnsc") != -1 or label.find("HNSC") != -1: description = description + "HeadNeck" if label.find("kirc") != -1 or label.find("KIRC") != -1 or label.find("kirp") != -1 or label.find("KIRP") != -1: description = description + "Kidney" if label.find("luad") != -1 or label.find("LUAD") != -1 or label.find("lusc") != -1 or label.find("LUSC") != -1: description = description + "Lung" if label.find("stad") != -1 or label.find("STAD") != -1: description = description + "Stomach" if label.find("nomask") != -1: description = description elif label.find("mask") != -1: description = description + " filtered" if comments == "": comments = "{matrix:" + feature_matrix + ",associations:" + associations + "}" inputfiles = "{matrix:" + feature_matrix + ",associations:" + associations + "}" currentDate = time.strftime("%m-%d-%y") config = db_util.getConfig(configfile) max_logpv = -1.0 if os.path.exists(results_path + "edges_out_" + label + "_meta.json"): meta_json_file = open(results_path + "edges_out_" + label + "_meta.json", "r") metaline = meta_json_file.read() if len(metaline) > 1: try: max_logpv = json.loads(metaline)["max_logpv"] except ValueError: max_logpv = -1 # okay that the max_logpv is not set except: print "Unexpected error:", sys.exc_info()[0] raise meta_json_file.close() summary_json = "" if os.path.exists(results_path + "feature_summary_" + label + ".json"): summary_file = open(results_path + "feature_summary_" + label + ".json", "r") summary_json = summary_file.read().strip() summary_file.close() insertSql = ( "replace into tcga.regulome_explorer_dataset (label,method,source,contact,comments,dataset_date,description,max_logged_pvalue, input_files, default_display,disease,summary_json) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %f, '%s', '%i', '%s', '%s');" % ( label, method, source, contact, comments, ds_date, description, max_logpv, inputfiles, 1, disease, summary_json, ) ) print "updating regulome_explorer_dataset\n" + insertSql db_util.executeInsert(config, insertSql)