Пример #1
0
    cellFrm['mod_sig_id'] = cellFrm.distil_id.str.replace(':', '.')
    cellFrm.index = cellFrm.mod_sig_id
    cellFrm.to_csv(outF, sep='\t')
    ### make gene signature groups - gmt file
    # geneGrped = cellFrm.groupby('pert_mfc_desc')
    geneGrped = cellFrm.groupby('x_mutation_status')
    gmtList = []
    for grp in geneGrped:
        gmtDictUp = {}
        gmtDictUp['id'] = grp[0]
        # gmtDictUp['desc'] = grp[0]
        gmtDictUp['desc'] = str(list(set(grp[1].x_mutation_status)))
        gmtDictUp['sig'] = list(grp[1].index.values)
        gmtList.append(gmtDictUp)
    gmtOut = cellDir + '/mutation_status_oe_sig_id.gmt'
    gmt.write(gmtList, gmtOut)

#########################
### Run NMF projection ##
#########################

# COMPZ.MODZ_SCORE
nComponents = 20
dimDict = {
    'A549': 'n4487x978',  # 
    'AALE': 'n2235x978',
    'H1299': 'n1503x978',
    'SALE': 'n2128x978'
}

# ZSPCINF
### add lines for gct headers
line_pre_adder(outFile,str(mtrx.shape[0])+'\t'+str(mtrx.shape[1]-1))
line_pre_adder(outFile,"#1.2")

### make gmts of gene shRNAs
geneGrped = annt.groupby('pert_id')
gmtList = []
for grp in geneGrped:
    gmtDictUp = {}
    gmtDictUp['id'] = grp[0]
    gmtDictUp['desc'] = grp[0]
    gmtDictUp['sig'] = list(grp[1].sig_id.values)
    gmtList.append(gmtDictUp)
# gmtOut = wkdir + '/gene_shRNA_sig_id.gmt'
gmtOut = wkdir + '/gene_oe_sig_id.gmt'
gmt.write(gmtList,gmtOut)

### load core drivers - save sig_ids to new gmt
gFile= wkdir + '/core_lung_drivers.gmt'
coreGMT = gmt.read(gFile)
coreOE = coreGMT['sig']
coreFrm = annt[annt.pert_id.isin(coreOE)]
sig_ids = list(coreFrm.sig_id.values)
gmtDict = {}
gmtDict['id'] = 'core_lung_drivers'
gmtDict['desc'] = 'core_lung_drivers'
gmtDict['sig'] = sig_ids
gmtOut = wkdir + '/core_lung_drivers_sig_id.gmt'
gmt.write([gmtDict],gmtOut)

Пример #3
0
cpd_targets_n368_file = '/xchip/cogs/sig_tools/sig_cliqueselect_tool/sample/cpd_targets_n368/summly/self_connectivity.txt'
n368 = pd.read_csv(cpd_targets_n368_file, sep='\t')
median_rnkpt_thresh = 73
cp_connected = n368[n368.median_rankpt >= median_rnkpt_thresh]

#load in clique annotations and matrix
cFile = '/xchip/cogs/projects/pharm_class/rnwork/cliques/cpd_targets_n368.gmt'
cliqueGMT = gmt.read(cFile)
cliqFrm = pd.DataFrame(cliqueGMT)
# limit only to drug-gene groups that have coherence
cliqFrm = cliqFrm[cliqFrm.id.isin(cp_connected.group_id)]

# write a new, shorter gmt file
gmtUpdate = [x for x in cliqueGMT if x['desc'] in cliqFrm.desc.values]
outF = basedir + '/n69_drug_targets.gmt'
gmt.write(gmtUpdate, outF)

### set parameters
probeSpace = 'lm_epsilon'  # lm_epsilon or bing
nDMSO = 50
nKeep = 2  # number of signatures per drug
for cell in cellList:
    print(cell)
    prefix = cell + '_drug_c9_' + probeSpace
    wkdir = basedir + '/' + prefix
    if not os.path.exists(wkdir):
        os.mkdir(wkdir)
    # set grouping structures
    pclDict = {}
    for x in cliqFrm.iterrows():
        pclDict[x[1]['id']] = set(x[1]['sig'])
mtrx.to_csv(outFile, sep='\t')
### add lines for gct headers
line_pre_adder(outFile, str(mtrx.shape[0]) + '\t' + str(mtrx.shape[1] - 1))
line_pre_adder(outFile, "#1.2")

### make gmts of gene shRNAs
geneGrped = annt.groupby('pert_id')
gmtList = []
for grp in geneGrped:
    gmtDictUp = {}
    gmtDictUp['id'] = grp[0]
    gmtDictUp['desc'] = grp[0]
    gmtDictUp['sig'] = list(grp[1].sig_id.values)
    gmtList.append(gmtDictUp)
# gmtOut = wkdir + '/gene_shRNA_sig_id.gmt'
gmtOut = wkdir + '/gene_oe_sig_id.gmt'
gmt.write(gmtList, gmtOut)

### load core drivers - save sig_ids to new gmt
gFile = wkdir + '/core_lung_drivers.gmt'
coreGMT = gmt.read(gFile)
coreOE = coreGMT['sig']
coreFrm = annt[annt.pert_id.isin(coreOE)]
sig_ids = list(coreFrm.sig_id.values)
gmtDict = {}
gmtDict['id'] = 'core_lung_drivers'
gmtDict['desc'] = 'core_lung_drivers'
gmtDict['sig'] = sig_ids
gmtOut = wkdir + '/core_lung_drivers_sig_id.gmt'
gmt.write([gmtDict], gmtOut)
Пример #5
0
    gmtDictUp['desc'] = sig
    gmtDictUp['sig'] = upProbes
    gmtListUp.append(gmtDictUp)
    # Dn
    gmtDictDn = {}
    gmtDictDn['id'] = sig
    gmtDictDn['desc'] = sig
    gmtDictDn['sig'] = dnProbes
    gmtListDn.append(gmtDictDn)
# make query directory
queryDir = os.path.join(sig_path,'cmap_query')
if not os.path.exists(queryDir):
    os.mkdir(queryDir)
# write gmt file
gmtOutUp = queryDir + '/EMT_signatures_up.gmt'
gmt.write(gmtListUp,gmtOutUp)
gmtOutDn = queryDir + '/EMT_signatures_dn.gmt'
gmt.write(gmtListDn,gmtOutDn)

### run cmap query
metric = 'wtcs'
cmd = ' '.join(['rum -q local -f sig_query_tool',
         '--uptag ' + gmtOutUp,
         '--dntag ' + gmtOutDn,
         '--metric ' + metric,
         '--row_space full',
         '--column_space gold',
         '--out ' + queryDir,
         '--mkdir false',
         '--save_tail false'])
os.system(cmd)
Пример #6
0
 gmtDictUp['desc'] = sigPrefix
 gmtDictUp['sig'] = upProbes
 gmtListUp.append(gmtDictUp)
 # Dn
 gmtDictDn = {}
 gmtDictDn['id'] = sig
 gmtDictDn['desc'] = sigPrefix
 gmtDictDn['sig'] = dnProbes
 gmtListDn.append(gmtDictDn)
 # make query directory
 queryDir = os.path.join(sig_path,'cmap_query')
 if not os.path.exists(queryDir):
     os.mkdir(queryDir)
 # write gmt file
 gmtOutUp = os.path.join(queryDir,sig + '_up.gmt')
 gmt.write(gmtListUp,gmtOutUp)
 gmtOutDn = os.path.join(queryDir,sig + '_dn.gmt')
 gmt.write(gmtListDn,gmtOutDn)
 ### run cmap query
 metric = 'wtcs'
 cmd = ' '.join(['rum -q local -f sig_query_tool',
          '--uptag ' + gmtOutUp,
          '--dntag ' + gmtOutDn,
          '--metric ' + metric,
          '--row_space full',
          '--column_space gold',
          '--out ' + queryDir,
          '--mkdir false',
          '--save_tail false'])
 os.system(cmd)
 ### run summly
Пример #7
0
    cellFrm['mod_sig_id'] = cellFrm.distil_id_original.str.replace(':','.')
    cellFrm.index = cellFrm.mod_sig_id
    cellFrm.to_csv(outF,sep='\t')
    ### make gene signature groups - gmt file
    mtch_field = 'pert_iname'
    geneGrped = cellFrm.groupby(mtch_field)
    gmtList = []
    for grp in geneGrped:
        gmtDictUp = {}
        gmtDictUp['id'] = grp[0]
        gmtDictUp['desc'] = grp[0]
        # gmtDictUp['desc'] = str(list(set(grp[1][mtch_field])))
        gmtDictUp['sig'] = list(grp[1].index.values)
        gmtList.append(gmtDictUp)
    gmtOut = cellDir + '/actomyosin_kd_distil_id.gmt'
    gmt.write(gmtList,gmtOut)

#########################
### Run NMF projection ##
#########################

# COMPZ.MODZ_SCORE
nComponents = 20
# dimDict = {}
# for grp in cell_grped:
#     dimDict[grp[0]] = 'n'+str(grp[1].shape[0])+'x978'
dimDict = {'A375': 'n1684x978',
 'A549': 'n1410x978',
 'ASC': 'n260x978',
 'HA1E': 'n1445x978',
 'HCC515': 'n1163x978',
dose_len = dose_set.apply(len)
is_at_dose = dose_len > 3
cps_at_dose = dose_set[is_at_dose]
#which PCL members are at dose?
PCL_members_dose = cps_at_dose[cps_at_dose.index.isin(brdAllGroups)]

### make dose GMT
new_gmt = []
for x in cliqueGMT:
    brds = x['sig']
    brd_dose = [j for j in brds if j in PCL_members_dose.index]
    if len(brd_dose) > 0:
        x['sig'] = brd_dose
        new_gmt.append(x)
cFile = source_dir + '/PCL_compounds_at_dose.gmt'
gmt.write(new_gmt,cFile)
# load in new file
cliqueGMT = gmt.read(cFile)
cliqFrm = pd.DataFrame(cliqueGMT)
# set grouping structures 
pclDict = {}
for x in cliqFrm.iterrows():
    pclDict[x[1]['id']] = set(x[1]['sig'])
# create list of all compounds members
brdAllGroups = []
for group in pclDict:
    brdAllGroups.extend(pclDict[group])
brdAllGroups.append('DMSO')
brdAllGroups = list(set(brdAllGroups))
testGroups = cliqFrm['id'].values
cpd_targets_n368_file = '/xchip/cogs/sig_tools/sig_cliqueselect_tool/sample/cpd_targets_n368/summly/self_connectivity.txt'
n368 = pd.read_csv(cpd_targets_n368_file,sep='\t')
median_rnkpt_thresh = 73
cp_connected = n368[n368.median_rankpt >= median_rnkpt_thresh]

#load in clique annotations and matrix
cFile = '/xchip/cogs/projects/pharm_class/rnwork/cliques/cpd_targets_n368.gmt'
cliqueGMT = gmt.read(cFile)
cliqFrm = pd.DataFrame(cliqueGMT)
# limit only to drug-gene groups that have coherence 
cliqFrm = cliqFrm[cliqFrm.id.isin(cp_connected.group_id)]

# write a new, shorter gmt file
gmtUpdate = [x for x in cliqueGMT if x['desc'] in cliqFrm.desc.values]
outF = basedir + '/n69_drug_targets.gmt'
gmt.write(gmtUpdate,outF)

### set parameters
probeSpace = 'lm_epsilon' # lm_epsilon or bing
nDMSO = 50
nKeep = 2 # number of signatures per drug
for cell in cellList:
    print(cell)
    prefix = cell + '_drug_c9_' + probeSpace
    wkdir = basedir + '/' + prefix
    if not os.path.exists(wkdir):
        os.mkdir(wkdir)
    # set grouping structures 
    pclDict = {}
    for x in cliqFrm.iterrows():
        pclDict[x[1]['id']] = set(x[1]['sig'])