def convert_to_refseq(assay_df, unit, data_type): # features of dataset columns_source = set(list(assay_df)) new_names = {} count = 0 for gene in columns_source: new_name = find_refseqs(gene) if new_name: new_names[gene] = new_name[0] count += 1 else: new_names[gene] = gene assay_df.rename(columns=new_names, inplace=True) return assay_df, count
def convert_to_refseq(assay_df, platform): from wrappers.input.utils import find_refseqs # features of dataset columns_source = set(list(assay_df)) new_names = {} count = 0 for gene in columns_source: new_name = list(find_refseqs(gene)) if new_name: for n in new_name: if n in platform: new_names[gene] = n count += 1 # find first and assign it to gene break else: new_names[gene] = gene assay_df.rename(columns=new_names, inplace=True) return assay_df, count
def get_matrix_for_platform(self, exp, gene_list, mirna_list = None, symmetrize=True, tolower=False): from collections import defaultdict from wrappers.input.utils import find_refseqs hasht = dict(zip(gene_list, range(len(gene_list)))) mirna_hasht = dict() if mirna_list is not None: mirna_hasht = dict(zip(mirna_list, range(len(mirna_list)))) inter_hash = defaultdict(list) interactons = self.load_pairs() cols=[] rows=[] log.debug("transforming interactions") for ix in range(len(interactons)): a, b, val = interactons.iloc[ix] inter_hash[a].append([b, val]) AllUpdated( exp.pk, comment=u"Transforming interaction matrix done", silent=False, mode=NotifyMode.INFO ).send() log.debug("transformation of interactions done") count = 0 counter2 = 0 counter3 = 0 counter4 = 0 size_hash = len(inter_hash) if mirna_list is None: for key, value in inter_hash.iteritems(): count += 1 if count % 500 == 0: log.debug("translating gene %d", count) AllUpdated( exp.pk, comment=u"Translating gene %s of %s" % (count, size_hash), silent=False, mode=NotifyMode.INFO ).send() refseqs = find_refseqs(key) for refseq in refseqs: counter2 += 1 if refseq not in hasht: continue if refseq in hasht: for (gene, strength) in value: # new_inters.append([(refseq, new_refseq, strength) for new_refseq in find_refseqs(gene): counter3 += 1 gi = refseq gj = new_refseq if gj not in hasht: continue counter4 += 1 val = strength if tolower: gi=gi.lower() gj=gj.lower() cols.append(hasht[gi]) rows.append(hasht[gj]) else: for key, value in inter_hash.iteritems(): count += 1 if count % 500 == 0: log.debug("translating gene %d", count) AllUpdated( exp.pk, comment=u"Translating gene %s of %s" % (count, size_hash), silent=False, mode=NotifyMode.INFO ).send() refseqs = find_refseqs(key) for refseq in refseqs: counter2 += 1 if refseq not in mirna_hasht: continue if refseq in mirna_hasht: for (gene, strength) in value: for new_refseq in find_refseqs(gene): counter3 += 1 gi = refseq gj = new_refseq if gj not in hasht: continue counter4 += 1 val = strength if tolower: gi=gi.lower() gj=gj.lower() rows.append(mirna_hasht[gi]) cols.append(hasht[gj]) size = max(max(rows), max(cols)) + 1 AllUpdated( exp.pk, comment=u"%d interactions were found." % len(cols), silent=False, mode=NotifyMode.INFO ).send() inters_matr = None # TODO fix for custom value of interactions if mirna_list is None: inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (size, size)) else: inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (max(rows) + 1, max(cols) + 1)) if symmetrize: inters_matr = inters_matr + inters_matr.T inters_matr.data /= inters_matr.data return inters_matr
def get_matrix_for_platform(self, exp, gene_list, mirna_list=None, symmetrize=True, identifiers=True, tolower=False): if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) from collections import defaultdict from wrappers.input.utils import find_refseqs log.debug(gene_list) if mirna_list: log.debug(mirna_list) regex = "^[A-Z][A-Z]_[a-zA-Z0-9.]*" if len( filter( lambda x: x is not None, map(lambda x: re.match(regex, str(x), re.IGNORECASE), gene_list))) < (len(gene_list) * 0.5): new_g = [] for gene in gene_list: rf = list(find_refseqs(gene)) if len(rf) > 0: new_g.append(rf[0]) if len(rf) == 0: new_g.append(gene) gene_list = new_g hasht = dict(zip(gene_list, range(len(gene_list)))) mirna_hasht = dict() if mirna_list is not None: new_g = [] for gene in mirna_list: rf = list(find_refseqs(gene)) if len(rf) > 0: new_g.append(rf[0]) else: new_g.append(gene) mirna_list = new_g mirna_hasht = dict(zip(mirna_list, range(len(mirna_list)))) inter_hash = defaultdict(list) interactons = self.load_pairs() cols = [] rows = [] log.debug("transforming interactions") for ix in range(len(interactons)): a, b, val = interactons.iloc[ix] if mirna_list is not None: if self.x2_unit == 'mirbase': inter_hash[b].append([a, val]) else: inter_hash[a].append([b, val]) else: inter_hash[a].append([b, val]) if exp: AllUpdated(exp.pk, comment=u"Transforming interaction matrix done", silent=False, mode=NotifyMode.INFO).send() log.debug("transformation of interactions done") count = 0 counter2 = 0 counter3 = 0 counter4 = 0 size_hash = len(inter_hash) if mirna_list is None: for key, value in inter_hash.iteritems(): count += 1 if count % 500 == 0: log.debug("translating gene %d", count) if exp: AllUpdated(exp.pk, comment=u"Translating gene %s of %s" % (count, size_hash), silent=False, mode=NotifyMode.INFO).send() refseqs = find_refseqs(key) for refseq in refseqs: counter2 += 1 if refseq not in hasht: continue if refseq in hasht: for (gene, strength) in value: # new_inters.append([(refseq, new_refseq, strength) for new_refseq in find_refseqs(gene): counter3 += 1 gi = refseq gj = new_refseq if gj not in hasht: continue counter4 += 1 val = strength if tolower: gi = gi.lower() gj = gj.lower() cols.append(hasht[gi]) rows.append(hasht[gj]) else: for key, value in inter_hash.iteritems(): count += 1 if count % 500 == 0: log.debug("translating miRNA %d", count) if exp: AllUpdated(exp.pk, comment=u"Translating miRNA %s of %s" % (count, size_hash), silent=False, mode=NotifyMode.INFO).send() refseqs = find_refseqs(key) for refseq in refseqs: counter2 += 1 if refseq not in mirna_hasht: continue if refseq in mirna_hasht: for (gene, strength) in value: for new_refseq in find_refseqs(gene): counter3 += 1 gi = refseq gj = new_refseq if gj not in hasht: continue counter4 += 1 val = strength if tolower: gi = gi.lower() gj = gj.lower() rows.append(mirna_hasht[gi]) cols.append(hasht[gj]) # size = max(max(rows), max(cols)) + 1 if exp: AllUpdated(exp.pk, comment=u"%d interactions were found." % len(cols), silent=False, mode=NotifyMode.INFO).send() inters_matr = None # TODO fix for custom value of interactions if mirna_list is None: # inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (size, size)) inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (len(gene_list), len(gene_list))) else: inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (len(mirna_list), len(gene_list))) #inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (max(rows) + 1, max(cols) + 1)) if symmetrize: inters_matr = inters_matr + inters_matr.T inters_matr.data /= inters_matr.data if identifiers: inters_matr = inters_matr.tocsr() sparse_df = pd.SparseDataFrame([ pd.SparseSeries(inters_matr[i].toarray().ravel()) for i in np.arange(inters_matr.shape[0]) ]) # sparse_df = sparse_df.to_dense() if mirna_list is None: index = gene_list[:sparse_df.shape[0]] columns = gene_list[:sparse_df.shape[1]] else: index = mirna_list[:sparse_df.shape[0]] columns = gene_list[:sparse_df.shape[1]] if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) # sparse_df['new_index'] = pd.Series(index, index=sparse_df.index) sparse_df.set_index([index], inplace=True) sparse_df.columns = columns return sparse_df return inters_matr
def get_matrix_for_platform(self, exp, gene_list, mirna_list=None, symmetrize=True, identifiers=True, tolower=False): if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) from collections import defaultdict from wrappers.input.utils import find_refseqs log.debug(gene_list) if mirna_list: log.debug(mirna_list) regex = "^[A-Z][A-Z]_[a-zA-Z0-9.]*" if len(filter(lambda x: x is not None, map(lambda x: re.match(regex, str(x), re.IGNORECASE), gene_list))) < (len(gene_list)*0.5): new_g = [] for gene in gene_list: rf = list(find_refseqs(gene)) if len(rf) > 0: new_g.append(rf[0]) gene_list = new_g # gene_list = map(lambda gene: list(find_refseqs(gene))[0], gene_list) hasht = dict(zip(gene_list, range(len(gene_list)))) mirna_hasht = dict() if mirna_list is not None: mirna_hasht = dict(zip(mirna_list, range(len(mirna_list)))) inter_hash = defaultdict(list) interactons = self.load_pairs() cols = [] rows = [] log.debug("transforming interactions") for ix in range(len(interactons)): a, b, val = interactons.iloc[ix] if mirna_list is not None: if self.x2_unit == 'mirbase': inter_hash[b].append([a, val]) else: inter_hash[a].append([b, val]) else: inter_hash[a].append([b, val]) AllUpdated( exp.pk, comment=u"Transforming interaction matrix done", silent=False, mode=NotifyMode.INFO ).send() log.debug("transformation of interactions done") count = 0 counter2 = 0 counter3 = 0 counter4 = 0 size_hash = len(inter_hash) if mirna_list is None: for key, value in inter_hash.iteritems(): count += 1 if count % 500 == 0: log.debug("translating gene %d", count) AllUpdated( exp.pk, comment=u"Translating gene %s of %s" % (count, size_hash), silent=False, mode=NotifyMode.INFO ).send() refseqs = find_refseqs(key) for refseq in refseqs: counter2 += 1 if refseq not in hasht: continue if refseq in hasht: for (gene, strength) in value: # new_inters.append([(refseq, new_refseq, strength) for new_refseq in find_refseqs(gene): counter3 += 1 gi = refseq gj = new_refseq if gj not in hasht: continue counter4 += 1 val = strength if tolower: gi = gi.lower() gj = gj.lower() cols.append(hasht[gi]) rows.append(hasht[gj]) else: for key, value in inter_hash.iteritems(): count += 1 if count % 500 == 0: log.debug("translating miRNA %d", count) AllUpdated( exp.pk, comment=u"Translating miRNA %s of %s" % (count, size_hash), silent=False, mode=NotifyMode.INFO ).send() refseqs = find_refseqs(key) for refseq in refseqs: counter2 += 1 if refseq not in mirna_hasht: continue if refseq in mirna_hasht: for (gene, strength) in value: for new_refseq in find_refseqs(gene): counter3 += 1 gi = refseq gj = new_refseq if gj not in hasht: continue counter4 += 1 val = strength if tolower: gi = gi.lower() gj = gj.lower() rows.append(mirna_hasht[gi]) cols.append(hasht[gj]) size = max(max(rows), max(cols)) + 1 AllUpdated( exp.pk, comment=u"%d interactions were found." % len(cols), silent=False, mode=NotifyMode.INFO ).send() inters_matr = None # TODO fix for custom value of interactions if mirna_list is None: inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (size, size)) else: inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (max(rows) + 1, max(cols) + 1)) if symmetrize: inters_matr = inters_matr + inters_matr.T inters_matr.data /= inters_matr.data if identifiers: inters_matr = inters_matr.tocsr() sparse_df = pd.SparseDataFrame([pd.SparseSeries(inters_matr[i].toarray().ravel()) for i in np.arange(inters_matr.shape[0])]) sparse_df = sparse_df.to_dense() if mirna_list is None: index = gene_list[:sparse_df.shape[0]] columns = gene_list[:sparse_df.shape[1]] else: index = mirna_list[:sparse_df.shape[0]] columns = gene_list[:sparse_df.shape[1]] sparse_df['new_index'] = pd.Series(index, index=sparse_df.index) sparse_df.set_index(['new_index'], inplace=True) sparse_df.columns = columns return sparse_df return inters_matr