示例#1
0
class Ebseq(object):
    def __init__(self, count, group, repl, out):
        """
        Inite object Ebseq
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._exp_column = 1
        self._exp = "DE"

    def run_de(self, gene):
        de = 0
        if gene[self._exp_column] == self._exp:
            de = 1
        return de

    def run_ebseq(self):
        """
        Execute default analysis with EBSeq
        :return:
        """
        try:
            robjects.r('library("' + 'EBSeq' + '")')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE)'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            grup = ""
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                grup = aux + grup
            grup = grup[:(len(grup) - 2)]
            # siz = 'data(m)'
            # robjects.r(siz)
            siz = 'Sizes=MedianNorm(m)'
            robjects.r(siz)
            ct = 'EBOut=EBTest(Data=m, ' \
                 'Conditions=as.factor(rep(' \
                'c(' + grup + '),each=' + str(self._replic) + ')), sizeFactors=Sizes, maxround=5)'
            robjects.r(ct)
            ct = 'EBDERes=GetDEResults(EBOut, FDR=0.05)'
            robjects.r(ct)
            wr = 'write.table(EBDERes$Status, file="' + self._output + '", sep = "\t", quote = FALSE)'
            robjects.r(wr)
            self._message.message_9("--- EBSeq: is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in baySeq execution: " + str(rre))
            raise rre
示例#2
0
class LimmaVoom(object):
    def __init__(self, count, group, repl, out):
        """
        Inite object Ebseq
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._logfc_column = 2
        self._pvalue_column = 5
        self._logfc = 2
        self._pvalue = 0.05

    def run_de(self, gene):
        de = 0
        lfc = float(gene[self._logfc_column])
        pv = float(gene[self._pvalue_column])
        if lfc >= self._logfc or lfc <= -self._logfc:
            if pv >= self._pvalue:
                de = 1
        return de

    def run_limmavoom(self):
        """
        Execute default analysis with Limma-voom
        :return:
        """
        if self._replic == 1:
            self._message.message_4(
                "limma-voom require more than one replics.")
            self._message.message_9("--- limma-voom: is kipped!")
        else:
            try:
                robjects.r('library("' + 'edgeR' + '")')
                robjects.r('library("' + 'limma' + '")')
                ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE)'
                res = robjects.r(ct)
                res = robjects.r('m <- as.matrix(table)')
                res = robjects.r('nf = calcNormFactors(m, method = "TMM")')

                grup = ""
                for ind in iter(self._groups_name):
                    grup = grup + ('"' + ind + '",') * self._replic
                grup = grup[:(len(grup) - 1)]
                robjects.r('condition = factor(c(' + grup + '))')

                res = robjects.r(
                    'voom.data <- voom(m, model.matrix(~factor(condition)), lib.ize = colSums(m) * nf)'
                )
                res = robjects.r('voom.data$genes = rownames(m)')
                res = robjects.r(
                    'voom.fitlimma = lmFit(voom.data, design=model.matrix(~factor(condition)))'
                )
                res = robjects.r('voom.fitbayes = eBayes(voom.fitlimma)')
                res = robjects.r('voom.pvalues = voom.fitbayes$p.value[, 2]')
                res = robjects.r(
                    'voom.adjpvalues = p.adjust(voom.pvalues, method="BH")')
                var = 'design <- c(' + '1,' * self._replic + '2,' * self._replic
                var = var[:(len(var) - 1)] + ')'
                res = robjects.r(var)
                res = robjects.r(
                    'data <- topTable(voom.fitbayes, coef=ncol(design), number=1000000)'
                )
                wr = 'write.table(data, file="' + self._output + '", sep = "\t", quote = FALSE)'
                robjects.r(wr)
                self._message.message_9("--- limma-voom: is completed!")
            except RRuntimeError as rre:
                self._message.message_9("Error in limma-voom execution: " +
                                        str(rre))
                raise rre
示例#3
0
class EdgeR(object):
    def __init__(self, count, group, repl, out):
        """
        Define the edgeR object
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._column_result = [3,4]
        self._min_result = []
        self._message = Message()
        self._logfc_colum = 1
        self._pvalue_colum = 3
        self._pvalue = 0.05
        self._logfc = 2


    def run_de(self, gene):
        de = 0
        lfc = float(gene[self._logfc_colum])
        pv = float(gene[self._pvalue_colum])
        if lfc >= self._logfc or lfc <= -self._logfc:
            if pv >= self._pvalue:
                de = 1
        return de

    def run_edger(self):
        """
        Execute default analysis with edegeR
        :return:
        """
        try:
            finish_message = ""
            res = robjects.r('library("limma")')
            res = robjects.r('library("edgeR")')
            ct = 'table <- read.csv("' \
                 + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE, sep = "' + "," + '")'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            grup = ""
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                grup = grup + aux * self._replic
            grup = grup[:(len(grup) - 2)]
            grup = 'group <- c(' + grup + ')'
            res = robjects.r(grup)
            res = robjects.r('y.dge <- DGEList(counts = m, group = group)')
            if (self._replic < 1):
                self._message.message_4(" Replicates not found by edgeR. EdgeR should be executed manual form.")
            elif (self._replic == 1):
                # edgeR manual based solution for without replicates
                res = robjects.r('bcv <- 0.2')
                res = robjects.r('y.et <- exactTest(y.dge, dispersion = bcv^2)')
                res = robjects.r('y.tp <- topTags(y.et, n = 100000)')
                res = robjects.r('y.pvalues <- y.et$table$PValue')
                wr = 'write.table(y.tp$table, "' + self._output + '", sep = "\t", quote = FALSE)'
                res = robjects.r(wr)
                finish_message = "--- edgeR without replicates is completed!"
            else:
                r('y.dge <- calcNormFactors(y.dge)')
                r('y.dge <- estimateDisp(y.dge)')
                r('y.dge <- estimateCommonDisp(y.dge)')
                r('y.et <- exactTest(y.dge)')
                r('y.tp <- topTags(y.et, n = 100000)')
                r('y.pvalues <- y.et$table$PValue')
                wr = 'write.table(y.tp$table, "' + self._output + '", sep = "\t", quote = FALSE)'
                r(wr)
                finish_message = "--- edgeR with replicates is completed!"
            self._message.message_9(finish_message)
        except RRuntimeError as rre:
            self._message.message_9("Error in edgeR execution: " + str(rre))
            raise rre
class ExperimentDao(object):
    """
    Object manager data of experiment
    """
    def __init__(self):
        self._message = Message()
        self._file_conf = None
        self._name_par = "NAME"
        self._replic_parm = "REPLIC"
        self._group_number_parm = "GROUP_NUMBER"
        self._group_name_parm = "GROUP_NAMES"
        self._reference_parm = "REFERENCE_GENOME"
        self._read_directory_parm = "READS_DIRECTORY"
        self._group_directory_parm = "GROUP_DIRECTORIES"
        self._paired_end_parm = "PAIRED_END"
        self._threads_parm = "THREADS"
        self._count_mode_parm = "MODE"
        self._annotation_file_parm = "ANOTATION_FILE"
        self._annotation_type_parm = "ANOTATION_TYPE"
        self._output_parm = "OUTPUT"
        self._name = ""
        self._replic = []
        self._group_number = 0
        self._group_name = []
        self._reference = ""
        self._read_directory = ""
        self._group_directory = []
        self._paired_end = False
        self._threads = 0
        self._count_mode = ""
        self._annotation_file = ""
        self._annotation_type = ""
        self._output = ""

    def read_configuration_file(self, file):
        """
        Read file and feed class attributes, any error terminates execution
        :param file: path to config file
        :return: void
        """
        self._message.message_9("- Reading configuration file.. ----")
        conf = open(file, 'r')
        count_line = 0
        parms = {}

        for line in iter(conf):
            count_line += 1
            if line[0] != "#" and line[0] != "":
                l = line.rstrip("\n")
                p = l.split(": ")

                if p[0] in parms:
                    self._message.message_9("Parameter  " + p[0] +
                                            " is repeated!")
                else:
                    if len(p) < 2:
                        parms[p[0]] = ""
                    else:
                        parms[p[0]] = p[1]

        if self._name_par in parms:
            self._name = parms[self._name_par]
        if self._replic_parm in parms:
            self._replic = int(parms[self._replic_parm])
        if self._group_number_parm in parms:
            self._group_number = int(parms[self._group_number_parm])
        if self._group_name_parm in parms:
            self._group_name = parms[self._group_name_parm].split(',')
        if self._reference_parm in parms:
            self._reference = parms[self._reference_parm]
        if self._read_directory_parm in parms:
            self._read_directory = parms[self._read_directory_parm]
        if self._group_directory_parm in parms:
            self._group_directory = parms[self._group_directory_parm].split(
                ',')
        if self._paired_end_parm in parms:
            self._paired_end = parms[self._paired_end_parm]
        if self._threads_parm in parms:
            self._threads = int(parms[self._threads_parm])
        if self._count_mode_parm in parms:
            self._count_mode = parms[self._count_mode_parm]
        if self._annotation_file_parm in parms:
            self._annotation_file = parms[self._annotation_file_parm]
        if self._annotation_type_parm in parms:
            self._annotation_type = parms[self._annotation_type_parm]
        if self._output_parm in parms:
            self._output = parms[self._output_parm]


# # # #================ TESTE DA CLASSE =====================================
# file = "dao/CONFIG_tool"
# exp = ExperimentDao()
# exp.read_configuration_file(file)
# print "---"
# print exp._name
示例#5
0
class DESeq (object):
    """
    Run DESeq analysis
    """
    def __init__(self, count, group, repl, out):
        """
        Define the edgeR object
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._logfc_column = 6
        self._pvalue_column = 7
        self._pvalue = 0.05
        self._logfc = 2

    def run_de(self, gene):
        de = 0
        try:
            lfc = float(gene[self._logfc_column])
            pv = float(gene[self._pvalue_column])
            if lfc >= self._logfc or lfc <= -self._logfc:
                if pv <= self._pvalue:
                    de = 1
        except ValueError:
            de = 0
        return de

    def run_deseq(self):
        """
        Execute default analysis with DESeq
        :return:
        """
        try:
            res = robjects.r('library("parallel")')
            res = robjects.r('library("stats4")')
            res = robjects.r('library("BiocGenerics")')
            res = robjects.r('library("Biobase")')
            res = robjects.r('library("locfit")')
            res = robjects.r('library(DESeq)')
            res = robjects.r('library("lattice")')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE)'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            grup = ""
            b_test = ""
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                b_test = aux + b_test
                grup = grup + aux * self._replic
            grup = grup[:(len(grup) - 2)]
            b_test = b_test[:len(b_test) - 2]
            res = robjects.r('condition = factor( c(' + grup + '))')
            res = robjects.r('cds <- newCountDataSet(m, condition)')
            res = robjects.r('cds <- estimateSizeFactors(cds)')
            command = ""
            if (self._replic == 1):
                command = 'cds <- estimateDispersions(cds, method="blind", fitType="local")' # fitType="local"
            else:
                command ='cds <- estimateDispersions(cds, fitType="local")' #fitType="local"

            res = robjects.r(command)
            cm = 'res <- nbinomTest(cds, ' + b_test + ')'
            res = robjects.r(cm)
            wr = 'write.table(res, file="' + self._output + '", sep = "\t", quote = FALSE)'
            res = robjects.r(wr)
        except RRuntimeError as rre:
            self._message.message_9("Error in DESeq execution: " + str(rre))
            raise rre

        self._message.message_9("--- DESeq: is completed!")

# =============================== TESTES DA CLASSE ==================================
# inp = '/Volumes/SD128/bioconvergencia/reads_RNApa/kallisto_quant_RNApa_apa_1B_0B.csv'
# gr = ["0b", "pb"]
# rp = 2
# out = 'RNApa_apa_1B_0B-consexpression_deseq.csv'
# t = DESeq(inp, gr, rp, out)
# t.run_deseq() # Não temos DESeq na versão necessária
示例#6
0
class Noiseq(object):
    def __init__(self, count, group, repl, out):
        """
        Define the NOISeq object
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._likelihood_column = len(group) + 3
        self._likelihood = 0.95

    def run_de(self, gene):
        de = 0
        try:
            like = float(gene[self._likelihood_column])
            if like >= self._likelihood:
                de = 1
        except ValueError:
            de = 0
        return de

    def run_noiseq(self):
        """
        Execute default analysis with NOISeq
        :return:
        """
        try:
            res = robjects.r('library("parallel")')
            res = robjects.r('library("splines")')
            res = robjects.r('library("Matrix")')
            res = robjects.r('library("BiocGenerics")')
            res = robjects.r('library("Biobase")')
            res = robjects.r('library("NOISeq")')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE)'
            res = robjects.r(ct)
            res = robjects.r('table <- as.matrix(table)')
            ts = ""
            run = ""
            tsrun = ""
            count_run = 1
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                ts = ts + aux * self._replic
                while (count_run <= self._replic):
                    tsrun = tsrun + "'" + ind + str(count_run) + "', "
                    run = run + "'" + "R" + str(count_run) + "', "
                    count_run += 1
                count_run = 1
            ts = ts[:(len(ts) - 2)]
            tsrun = tsrun[:(len(tsrun) - 2)]
            run = run[:(len(run) - 2)]
            res = robjects.r('myfactors = data.frame(Tissue=c(' + ts +
                             '), TissueRun=c(' + tsrun + '), Run=c(' + run +
                             '))')
            res = robjects.r(
                'mydata <- readData(data = table, factors = myfactors)')
            res = robjects.r(
                'mynoiseq = noiseq(mydata, k = 0.5, factor = "Tissue", lc = 1, replicates = "technical")'
            )
            res = robjects.r('results <- head(mynoiseq@results)')
            res = robjects.r('write.csv(results, file="' + self._output +
                             '", sep = "\t", quote = FALSE)')
            self._message.message_9("--- NOISeq: is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in NOISeq execution: " + str(rre))
            raise rre


#========================= TESTE da CLASSE==============
# inp = 'UHR_vs_Brain_gencode_TopHat_NOISeq.csv'
# inp = 'consexpression_NOISeq.csv'
# grp = "g1", "g2"
# rep = 1
# out = 'consexpression_NOISeq_out.csv'
# b = Noiseq(inp, grp, rep, out)
# read_bay = open(inp, 'r')
# c_b = 0
# for line in iter(read_bay):
#     #print('--' + line)
#     if c_b > 0:
#        gene = line.split(",")
#        print(gene[0])
#        v = b.run_de(gene)
#        print('--> '+ str(v))
#     c_b += 1
示例#7
0
class SamSeq (object):

    def __init__(self, count, group, repl, out):
        """
        Inite object Ebseq
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        robjects.r['options'](warn=-1)
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._class = '"Two class unpaired"'
        self._message = Message()
        self._fd_column = 4
        self._qvalue_column = 5
        self._qvalue = 1
        self._fd = 2

    def run_de(self, gene):
        de = 0
        fd = float(gene[self._fd_column])
        qv = float(gene[self._qvalue_column])
        if fd <= self._fd and fd <= self._qvalue:
            de = 1
        return de


    def run_samseq(self):
        """
        Execute default analysis with SAMSeq
        :return:
        """
        try:
            if len(self._groups_name) > 2:
                self._class = '"Multiclass"'

            robjects.r('library("'+'samr'+'")')
            res = robjects.r('table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE, sep = "' + ',' + '")')
            res = robjects.r('m <- as.matrix(table)')

            grup = ""
            for ind in iter(self._groups_name):
                grup = grup + '"' + ind + '",'
            grup = grup[:(len(grup) - 1)]

            cm = 'SAMseq.test = SAMseq(m, as.factor(rep(c('
            cm = cm + grup + '),each=' + str(self._replic) + ')), resp.type = '+ self._class + ', geneid = rownames(m), genenames = rownames(m), nperms = 100)'
            #print(cm)
            res = robjects.r(cm)
            res = robjects.r('SAMseq.result.table = rbind(SAMseq.test$siggenes.table$genes.up, SAMseq.test$siggenes.table$genes.lo)')
            res = robjects.r('SAMseq.score = rep(0, nrow(m))')
            res = robjects.r('SAMseq.score[match(SAMseq.result.table[,1], rownames(m))] = as.numeric(SAMseq.result.table[,3])')
            res = robjects.r('SAMseq.FDR = rep(1, nrow(m))')
            res = robjects.r('SAMseq.FDR[match(SAMseq.result.table[,1], rownames(m))] = as.numeric(SAMseq.result.table[,5])/100')
            wr = 'write.table(SAMseq.result.table, file="' + self._output + '", sep = "\t", quote = FALSE)'
            robjects.r(wr)
            self._message.message_9("--- SAMSeq: is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in SAMSeq execution: " + str(rre))
            # raise rre
示例#8
0
class BaySeq(object):
    """

    Commands to run BaySeq expression analysis
    """
    def __init__(self, count, group, repl, out):
        """
        Define the edgeR object
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._likelihood_column = 2 + len(group) * repl
        self._fdr_de_column = 4 + len(group) * repl
        self._likelihood = 0.95
        self._fdr = 0.1

    def run_de(self, gene):
        de = 0
        try:
            fdr = float(gene[self._fdr_de_column])
            like = float(gene[self._likelihood_column])
            if fdr <= self._fdr and like > self._likelihood:
                de = 1
        except ValueError:
            de = 0
        return de

    def run_bayseq(self):
        """
        Execute default analysis with baySeq
        :return:
        """
        try:
            res = robjects.r('library("parallel")')
            res = robjects.r('library("stats4")')
            res = robjects.r('library("BiocGenerics")')
            res = robjects.r('library("S4Vectors")')
            res = robjects.r('library("IRanges")')
            res = robjects.r('library("GenomeInfoDb")')
            res = robjects.r('library("abind")')
            res = robjects.r('library("perm")')
            res = robjects.r('library("GenomicRanges")')
            res = robjects.r('library("baySeq")')

            res = robjects.r(
                'if(require("parallel")) cl <- makeCluster(4) else cl <- NUL')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors = FALSE)'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            replicates = ""
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                replicates = replicates + aux * self._replic
            replicates = replicates[:(len(replicates) - 2)]
            replicates = 'replicates <- c(' + replicates + ')'
            res = robjects.r(replicates)
            groups = 'groups <- list(NDE = c(' + "1," * len(self._groups_name)
            groups = groups[:(len(groups) - 1)] + ')'
            groups = groups + ', DE = c(' + '1,' * self._replic
            groups = groups + '2,' * self._replic
            groups = groups[:(len(groups) - 1)] + "))"
            res = robjects.r(groups)
            res = robjects.r(
                'CD <- new("countData", data = m, replicates = replicates, groups = groups)'
            )
            res = robjects.r('libsizes(CD) <- getLibsizes(CD)')
            res = robjects.r(
                'CD <- getPriors.NB(CD, samplesize = 1000, estimation = "QL", cl = cl, equalDispersions = TRUE)'
            )
            res = robjects.r(
                'CD <- getLikelihoods(CD, prs=c(0.5, 0.5), pET="BIC", cl=cl)')
            # CD.posteriors.DE < - exp(CD @ posteriors)[, 2]
            res = robjects.r(
                'write.table(topCounts(CD, group = "DE", number = 65000, normaliseData = TRUE), "'
                + self._output + '", sep="\t", quote = FALSE)')
            self._message.message_9("--- baySeq is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in baySeq execution: " + str(rre))
            #raise rre


#========================= TESTE da CLASSE==============
# inp = '/home/juliana/Dropbox/UTFPR/PPGBIOINFO/Projeto/results_gencode/TopHat_results/bayseq/UHR_vs_Brain_gencode_TopHat_baySeq.csv'
# grp = "g1", "g2"
# rep = 7
# out = '/home/juliana/Documentos/Projeto_Juliana/Datasets/consexpression_baySeq_out.csv'
# b = BaySeq(inp, grp, rep, out)
# read_bay = open(inp, 'r')
# c_b = 1
# for line in iter(read_bay):
#     #print('--' + line)
#     if c_b > 0:
#        gene = line.split("\t")
#        print(gene[0])
#        v = b.run_de(gene)
#        print('--> '+ str(v))
#     c_b += 1
示例#9
0
class MappBo(object):
    """
    This class make rules of validate information and command, to execute Mapp tools
    """
    def __init__(self, mapp):
        assert isinstance(mapp, MappVo)
        self._map_vo = mapp
        self._reads_file = []
        self.message = Message()

    def threads_conf(self, threads_vo):
        """
        Alter threads larger to default of system
        :param threads_vo: number of threads
        :return: void
        """
        threads_sys = multiprocessing.cpu_count()
        if threads_vo < threads_sys:
            self._map_vo._threads_value = threads_sys - 1
            self.message.message_9("The threads nunber defined is " +
                                   str(threads_vo) +
                                   ", but the system have only " +
                                   str(threads_sys))
            self.message.message_9("---> Number of threads was change to " +
                                   str(threads_sys - 1))
        self.message.message_9("Successful! Threads configuration is ok!")

    def execute_mapp(self):
        """
        Execute the command: 0 is ok, 1 is fail mapped task
        :return: int
        """
        self.threads_conf(self._map_vo._threads_value)
        n = self.make_bowtie2_index(self._map_vo._index_name)
        self._map_vo._index_name = n
        text = self._map_vo.to_string()
        return_code = subprocess.call(text, shell=True)
        return return_code

    def make_bowtie2_index(self, index):
        """
        Execute command to make a bowtie2 index if do not exists
        :param index: fasta file reference to mapp
        :return: name of generated index
        """
        dot = index.rfind('.f')
        name = index[:dot]
        if os.path.isfile(name + ".1.bt2"):
            return name
        else:
            command = "bowtie2-build " + index + " " + name
            if subprocess.call(command, shell=True) == 0:
                return name
            else:
                self.message.message_4("Error in index build")
                return ""


# #===== TESTES DA CLASSE =====================
# name = "Bowtie2"
# index_name = "/home/juliana/Documents/Projeto_Juliana/Datasets/Referencias/GRCh38.p5/GCA_000001405.20_GRCh38.p5_genomic.fna"
# threads_value = 3
# reads1_name = "/home/juliana/Documents/Eliandro-UEL/E1_S1_L001_R1_001_prinseq_1.fastq"
# reads2_name = "/home/juliana/Documents/Eliandro-UEL/E1_S1_L001_R2_001_prinseq_2.fastq"
# output_name = "/home/juliana/Documents/Testes_RNATool/eliandro_uel.sam"
# map = vo.MappVo.MappVo(name,index_name,reads1_name, reads2_name, threads_value,output_name,"",False)
# mapbo = MappBo(map)
# mapbo.make_bowtie2_index(index_name)
# # map.parm_mapp()
# # teste = map.to_string()
# # print teste
# # print teste
示例#10
0
class BaySeq(object):
    """

    Commands to run BaySeq expression analysis
    """
    def __init__(self, count, group, repl, output):
        """
        Define the edgeR object
        :param count:
        :param group:
        :param repl:
        :param output:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = output
        self._message = Message()
        self._likelihood_column = 2 + len(group) * repl
        self._fdr_de_column = 4 + len(group) * repl
        self._likelihood = 0.95
        self._fdr = 0.1

    def run_de(self, gene):
        de = 0
        try:
            fdr = float(gene[self._fdr_de_column])
            like = float(gene[self._likelihood_column])
            if fdr <= self._fdr and like > self._likelihood:
                de = 1
        except ValueError:
            de = 0
        return de

    def run_bayseq(self):
        """
        Execute default analysis with baySeq
        :return:
        """
        try:
            res = robjects.r('library("parallel")')
            res = robjects.r('library("stats4")')
            res = robjects.r('library("BiocGenerics")')
            res = robjects.r('library("S4Vectors")')
            res = robjects.r('library("IRanges")')
            res = robjects.r('library("GenomeInfoDb")')
            res = robjects.r('library("abind")')
            # res = robjects.r('library("perm")')
            res = robjects.r('library("GenomicRanges")')
            res = robjects.r('library("baySeq")')

            res = robjects.r(
                'if(require("parallel")) cl <- makeCluster(4) else cl <- NUL')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors = FALSE)'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            replicates = ""
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                replicates = replicates + aux * self._replic
            replicates = replicates[:(len(replicates) - 2)]
            replicates = 'replicates <- c(' + replicates + ')'
            res = robjects.r(replicates)
            groups = 'groups <- list(NDE = c(' + "1," * len(self._groups_name)
            groups = groups[:(len(groups) - 1)] + ')'
            groups = groups + ', DE = c(' + '1,' * self._replic
            groups = groups + '2,' * self._replic
            groups = groups[:(len(groups) - 1)] + "))"
            print(groups)
            res = robjects.r(groups)
            res = robjects.r(
                'CD <- new("countData", data = m, replicates = replicates, groups = groups)'
            )
            res = robjects.r('libsizes(CD) <- getLibsizes(CD)')
            res = robjects.r(
                'CD <- getPriors.NB(CD, samplesize = 1000, estimation = "QL", cl = cl, equalDispersions = TRUE)'
            )
            res = robjects.r(
                'CD <- getLikelihoods(CD, prs=c(0.5, 0.5), pET="BIC", cl=cl)')
            # CD.posteriors.DE < - exp(CD @ posteriors)[, 2]
            res = robjects.r(
                'write.table(topCounts(CD, group = "DE", number = 65000, normaliseData = TRUE), "'
                + self._output + '", sep="\t", quote = FALSE)')
            self._message.message_9("--- baySeq is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in baySeq execution: " + str(rre))
            raise rre