def analyze(meta, dbPath=None, resultPath=None, verbose=False): meta = result.getMeta(meta) if dbPath == None: dbPath = settings.CGI_DB_PATH print "Analyzing", dbPath con = DB.connect(dbPath) result.sortFeatures(meta) features = meta["features"] count = 1 numFeatures = len(features) nonSelected = [] for featureName in features: if not isinstance(features[featureName], int): if verbose: print "Processing feature", featureName, str(count) + "/" + str(numFeatures) geneName = getGeneName(featureName) if geneName != None: mappings = getTermAnalysis(con, geneName, "disease") result.setValue(features[featureName], "CancerGeneIndex", mappings) mappings = getTermAnalysis(con, geneName, "drug") result.setValue(features[featureName], "CancerGeneDrug", mappings) else: geneName = getGeneName(featureName) if geneName != None: nonSelected.append(geneName) count += 1 result.setValue(meta, "CancerGeneIndex", analyzeTermCoverage(features), "analysis") result.setValue(meta["analysis"], "non-selected", getCancerGeneCoverage(con, nonSelected), "CancerGeneIndex") if resultPath != None: result.saveMeta(meta, resultPath) return meta
def processGeneEntries(xmlFile, dbPath): con = DB.connect(dbPath) tableValuePaths = OrderedDict() inserts = {} for tableName in sorted(settings.CGI_TABLES.keys()): columns = getColumns(tableName) inserts[tableName] = DB.defineSQLInsert(tableName, columns) columnToElement = {v:k for k, v in settings.CGI_TABLES[tableName]["columns"].items()} valueElementPaths = [] for i in range(len(columns)): valueElementPaths.append(columnToElement[columns[i][0]]) tableValuePaths[tableName] = valueElementPaths for elem in iterparse(xmlFile, tag='GeneEntry'): for tableName in tableValuePaths: valuePaths = tableValuePaths[tableName] valueLists = [] if "elements" in settings.CGI_TABLES[tableName]: listElemPath = settings.CGI_TABLES[tableName]["elements"] elemList = elem.findall(listElemPath) else: listElemPath = "" elemList = [elem] for listElem in elemList: values = [] for valueElementPath in valuePaths: valueElementPath = valueElementPath.replace(listElemPath, "").strip("/") if len(valueElementPath) > 0: if valueElementPath.startswith("../"): valueElem = elem.find(valueElementPath.strip("../")) else: valueElem = listElem.find(valueElementPath) else: valueElem = listElem if valueElem != None: values.append(valueElem.text) else: values.append(None) #print inserts[tableName] #print values #con.execute(inserts[tableName], values) valueLists.append(values) if "preprocess" in settings.CGI_TABLES[tableName]: valueLists = settings.CGI_TABLES[tableName]["preprocess"](tableName, elem, valueLists) #print inserts[tableName] #print valueLists con.executemany(inserts[tableName], valueLists) print elem.find("HUGOGeneSymbol").text con.commit() con.close()
def initDB(dbPath, clear=True): # Initialize the database if clear and os.path.exists(dbPath): print "Removing existing database", dbPath os.remove(dbPath) if not os.path.exists(os.path.dirname(dbPath)): os.makedirs(os.path.dirname(dbPath)) con = DB.connect(dbPath) for tableName in sorted(settings.CGI_TABLES.keys()): columns = getColumns(tableName) table = settings.CGI_TABLES[tableName] con.execute(DB.defineSQLTable(tableName, columns, table.get("primary_key", None))) if "indices" in table: DB.addIndices(con, tableName, table["indices"]) return con
def makeCountTables(filename): con = DB.connect(filename) con.execute(""" CREATE TABLE disease AS SELECT hugo_gene_symbol, matched_disease_term, nci_disease_concept_code, organism, COUNT(*) AS term_count FROM sentence WHERE matched_disease_term IS NOT NULL GROUP BY hugo_gene_symbol, matched_disease_term, nci_disease_concept_code, organism ORDER BY hugo_gene_symbol; """) DB.addIndices(con, "disease", ["hugo_gene_symbol"]) con.execute(""" CREATE TABLE drug AS SELECT hugo_gene_symbol, drug_term, nci_drug_concept_code, organism, COUNT(*) AS term_count FROM sentence WHERE drug_term IS NOT NULL GROUP BY hugo_gene_symbol, drug_term, nci_drug_concept_code, organism ORDER BY hugo_gene_symbol; """) DB.addIndices(con, "drug", ["hugo_gene_symbol"]) con.commit() con.close()