def getDataPackage(): softfile = "data/GDS2545.soft.gz" gnfile = "data/c2.biocarta.v2.5.symbols.gmt" synfile = "data/Homo_sapiens.gene_info.gz" gnf = GMTParser.GMTParser(gnfile) sp = SOFTParser.SOFTParser(softfile) normal = [] tumor = [] for line in sp.column_heading_info[0]: if string.find(line[1], 'normal prostate tissue free') > 0: normal.append(line[0].strip()) elif string.find(line[1], 'tumor') > 0: tumor.append(line[0].strip()) dt = DataCleaner.DataTable() dt.getSOFTData(sp) dp = DataPackager.dataPackager() dp.addGeneNetwork(gnf.getAllNetworks()) dp.addDataTable(dt) dp.addSynonyms(synfile) dp.createClassification("Tumor") dp.createClassification("Normal") for samp in tumor: dp.addToClassification("Tumor", dt.dt_id, samp) for samp in normal: dp.addToClassification("Normal", dt.dt_id, samp) return dp
sp3 = SOFTParser.SOFTParser("data/GDS3329.soft.gz") gnfile = "data/c2.biocarta.v2.5.symbols.gmt" print "creating Table" dt1 = DataCleaner.DataTable() print "importing parser" dt1.getSOFTData(sp) print "gene parser" dt2 = DataCleaner.DataTable() print "importing parser" dt2.getSOFTData(sp2) dt3 = DataCleaner.DataTable() print "importing parser" dt3.getSOFTData(sp3) gnf = GMTParser.GMTParser(gnfile) print "data packager" dp = DataPackager.dataPackager() dp.addGeneNetwork(gnf.getAllNetworks()) print "adding data table 1" dp.addDataTable(dt1) print "adding data table 2" dp.addDataTable(dt2) dp.addDataTable(dt3) dp.createClassification("first") dp.createClassification("post") for val in dt1.getSamples()[:len(dt1.getSamples())/2]: dp.addToClassification("first", dt1.dt_id, val) for val in dt2.getSamples()[len(dt2.getSamples())/2:]: dp.addToClassification("post", dt2.dt_id, val) for val in dt3.getSamples()[len(dt3.getSamples())/2:]: dp.addToClassification("post", dt3.dt_id, val)