Python PlotGroupOfSNPs примеры использования

Язык программирования: Python

Пространство имен/Пакет: PlotGroupOfSNPs

Класс/Тип: PlotGroupOfSNPs

Примеров на hotexamples.com: 6

Python PlotGroupOfSNPs - 6 примеров найдено. Это лучшие примеры Python кода для PlotGroupOfSNPs.PlotGroupOfSNPs, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

findOutWhichPhenotypeColumn(2)

Пример #1

Показать файл

    def removeUnPhenotypedSNPData(
        clf, snpData, header_phen, strain_acc_list_phen, data_matrix_phen, phenotype_method_id_ls
    ):
        """
		2010-2-25
			remove un-phenotyped ecotypes from the SNP data in order to keep the snp dataset small 
		"""
        sys.stderr.write("Removing un-phenotyped ecotypes from the SNP data ...")
        phenData = SNPData(header=header_phen, strain_acc_list=strain_acc_list_phen, data_matrix=data_matrix_phen)
        if phenotype_method_id_ls:
            which_phenotype_ls = PlotGroupOfSNPs.findOutWhichPhenotypeColumn(phenData, Set(phenotype_method_id_ls))
        else:  # if not available, take all phenotypes
            which_phenotype_ls = range(len(phenData.col_id_ls))

        phenotyped_ecotype_id_set = set()
        for i in range(len(phenData.row_id_ls)):
            ecotype_id = phenData.row_id_ls[i]
            keep_this_ecotype = False
            for col_index in which_phenotype_ls:
                if phenData.data_matrix[i][col_index] != "NA":  # 2010-2-25 phenotype values are in raw string.
                    keep_this_ecotype = True
                    break
            if keep_this_ecotype:
                phenotyped_ecotype_id_set.add(ecotype_id)

        row_ids_to_be_kept = set()  # 2010-2-21
        no_of_ecotypes_in_total = len(snpData.row_id_ls)
        for row_id in snpData.row_id_ls:
            ecotype_id = row_id[0]  # 1st column is ecotype_id, 2nd is array id
            if ecotype_id in phenotyped_ecotype_id_set:
                row_ids_to_be_kept.add(row_id)
        snpData = SNPData.keepRowsByRowID(snpData, row_ids_to_be_kept)
        no_of_removed = no_of_ecotypes_in_total - len(row_ids_to_be_kept)
        sys.stderr.write("%s removed. Done.\n" % (no_of_removed))
        return snpData

Пример #2

Показать файл

    def readInData(
        cls, phenotype_fname, input_fname, eigen_vector_fname, phenotype_method_id_ls, test_type=1, report=0
    ):
        """
		2010-2-25
			call removeUnPhenotypedSNPData() to shrink the snp dataset by removing un-phenotyped ecotypes
		2009-3-20
			refactored out of run(), easy for MpiAssociation.py to call
		"""
        header, strain_acc_list, category_list, data_matrix = read_data(input_fname)
        snpData = SNPData(
            header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix
        )

        header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(
            phenotype_fname, turn_into_integer=0
        )
        snpData = cls.removeUnPhenotypedSNPData(
            snpData, header_phen, strain_acc_list_phen, data_matrix_phen, phenotype_method_id_ls
        )

        newSnpData, allele2index_ls = snpData.convertSNPAllele2Index(
            report
        )  # 0 (NA) or -2 (untouched) is all converted to -2 as 0 is used to denote allele
        newSnpData.header = snpData.header

        data_matrix_phen = cls.get_phenotype_matrix_in_data_matrix_order(
            strain_acc_list, strain_acc_list_phen, data_matrix_phen
        )
        phenData = SNPData(header=header_phen, strain_acc_list=snpData.strain_acc_list, data_matrix=data_matrix_phen)

        if eigen_vector_fname:
            PC_data = cls.getPCFromFile(eigen_vector_fname)
            PC_matrix = PC_data.PC_matrix
        else:
            if test_type == 4:  # eigen_vector_fname not given for this test_type. calcualte PCs.
                import pca_module

                T, P, explained_var = pca_module.PCA_svd(newSnpData.data_matrix, standardize=False)
                PC_matrix = T
            else:
                PC_matrix = None

        del snpData
        if phenotype_method_id_ls:
            which_phenotype_ls = PlotGroupOfSNPs.findOutWhichPhenotypeColumn(phenData, Set(phenotype_method_id_ls))
        else:  # if not available, take all phenotypes
            which_phenotype_ls = range(len(phenData.col_id_ls))
        pdata = PassingData(
            snpData=newSnpData,
            phenData=phenData,
            PC_matrix=PC_matrix,
            which_phenotype_ls=which_phenotype_ls,
            phenotype_method_id_ls=phenotype_method_id_ls,
        )
        return pdata

Пример #3

Показать файл

    def inputNodePrepare(self, snp_info=None):
        """
		2009-2-16
			get phenData.phenotype_method_id_ls in the same order as phenData.col_id_ls
		2009-2-11
			refactored out of run()
		"""
        header, strain_acc_list, category_list, data_matrix = read_data(
            self.input_fname)
        snpData = SNPData(header=header, strain_acc_list=strain_acc_list, \
            data_matrix=data_matrix, turn_into_array=1) #category_list is not used to facilitate row-id matching

        picklef = open(self.snps_context_fname)
        snps_context_wrapper = cPickle.load(picklef)
        del picklef
        gene_id2snps_id_ls = self.get_gene_id2snps_id_ls(snps_context_wrapper)
        del snps_context_wrapper
        gene_id_ls = gene_id2snps_id_ls.keys()
        gene_id_ls.sort()

        header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(
            self.phenotype_fname, turn_into_integer=0)
        phenData = SNPData(header=header_phen,
                           strain_acc_list=strain_acc_list_phen,
                           data_matrix=data_matrix_phen)
        phenData.data_matrix = Kruskal_Wallis.get_phenotype_matrix_in_data_matrix_order(
            snpData.row_id_ls, phenData.row_id_ls, phenData.data_matrix)
        phenData.phenotype_method_id_ls = get_phenotype_method_id_lsFromPhenData(
            phenData)  #2009-2-16

        self.phenotype_index_ls = PlotGroupOfSNPs.findOutWhichPhenotypeColumn(
            phenData, Set(self.phenotype_method_id_ls))

        if not self.phenotype_index_ls:
            self.phenotype_index_ls = range(len(phenData.col_id_ls))

        pdata = PassingData(gene_id_ls=gene_id_ls, gene_id2snps_id_ls=gene_id2snps_id_ls, \
            phenotype_index_ls=self.phenotype_index_ls, snp_info=snp_info)
        params_ls = self.generate_params(self.gene_id_fname, pdata,
                                         self.block_size)

        other_data = PassingData(gene_id2snps_id_ls=gene_id2snps_id_ls, gene_id_ls=pdata.gene_id_ls, phenData=phenData, \
              phenotype_index_ls=self.phenotype_index_ls, snp_info=snp_info)
        other_data_pickle = cPickle.dumps(other_data, -1)
        del other_data

        output_node_data = PassingData(phenotype_label_ls=phenData.col_id_ls, \
              phenotype_index_ls=self.phenotype_index_ls)
        output_node_data_pickle = cPickle.dumps(output_node_data, -1)

        snpData_pickle = cPickle.dumps(snpData, -1)
        del snpData, data_matrix
        return_data = PassingData(snpData_pickle=snpData_pickle, other_data_pickle=other_data_pickle,\
              output_node_data_pickle=output_node_data_pickle, params_ls=params_ls)
        return return_data

Пример #4

Показать файл

	def inputNodePrepare(self, snp_info=None):
		"""
		2009-2-16
			get phenData.phenotype_method_id_ls in the same order as phenData.col_id_ls
		2009-2-11
			refactored out of run()
		"""
		header, strain_acc_list, category_list, data_matrix = read_data(self.input_fname)
		snpData = SNPData(header=header, strain_acc_list=strain_acc_list, \
						data_matrix=data_matrix, turn_into_array=1)	#category_list is not used to facilitate row-id matching
		
		picklef = open(self.snps_context_fname)
		snps_context_wrapper = cPickle.load(picklef)
		del picklef
		gene_id2snps_id_ls = self.get_gene_id2snps_id_ls(snps_context_wrapper)
		del snps_context_wrapper
		gene_id_ls = gene_id2snps_id_ls.keys()
		gene_id_ls.sort()
		
		header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(self.phenotype_fname, turn_into_integer=0)
		phenData = SNPData(header=header_phen, strain_acc_list=strain_acc_list_phen, data_matrix=data_matrix_phen)
		phenData.data_matrix = Kruskal_Wallis.get_phenotype_matrix_in_data_matrix_order(snpData.row_id_ls, phenData.row_id_ls, phenData.data_matrix)
		phenData.phenotype_method_id_ls = get_phenotype_method_id_lsFromPhenData(phenData)	#2009-2-16
		
		self.phenotype_index_ls = PlotGroupOfSNPs.findOutWhichPhenotypeColumn(phenData, Set(self.phenotype_method_id_ls))
		
		if not self.phenotype_index_ls:
			self.phenotype_index_ls = range(len(phenData.col_id_ls))
		
		pdata = PassingData(gene_id_ls=gene_id_ls, gene_id2snps_id_ls=gene_id2snps_id_ls, \
						phenotype_index_ls=self.phenotype_index_ls, snp_info=snp_info)
		params_ls = self.generate_params(self.gene_id_fname, pdata, self.block_size)
		
		other_data = PassingData(gene_id2snps_id_ls=gene_id2snps_id_ls, gene_id_ls=pdata.gene_id_ls, phenData=phenData, \
								phenotype_index_ls=self.phenotype_index_ls, snp_info=snp_info)
		other_data_pickle = cPickle.dumps(other_data, -1)
		del other_data
		
		output_node_data = PassingData(phenotype_label_ls=phenData.col_id_ls, \
								phenotype_index_ls=self.phenotype_index_ls)
		output_node_data_pickle = cPickle.dumps(output_node_data, -1)
		
		snpData_pickle = cPickle.dumps(snpData, -1)
		del snpData, data_matrix
		return_data = PassingData(snpData_pickle=snpData_pickle, other_data_pickle=other_data_pickle,\
								output_node_data_pickle=output_node_data_pickle, params_ls=params_ls)
		return return_data

Пример #5

Показать файл

    def run(self):
        """
		2008-12-02
		"""
        if self.debug:
            import pdb
            pdb.set_trace()

        delimiter = figureOutDelimiter(self.input_fname, report=self.report)
        header, strain_acc_list, category_list, data_matrix = read_data(
            self.input_fname, delimiter=delimiter)

        if self.array_id_2nd_column:
            snpData = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list,\
                data_matrix=data_matrix)
        else:
            snpData = SNPData(header=header, strain_acc_list=strain_acc_list,\
                data_matrix=data_matrix) #ignore category_list

        newSnpData, allele_index2allele_ls = snpData.convert2Binary(
            self.report)

        if self.phenotype_fname and self.phenotype_method_id:
            header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(
                self.phenotype_fname, turn_into_integer=0)
            phenData = SNPData(
                header=header_phen,
                strain_acc_list=newSnpData.strain_acc_list,
                data_matrix=data_matrix_phen
            )  #row label is that of the SNP matrix, because the phenotype matrix is gonna be re-ordered in that way
            phenData.data_matrix = Kruskal_Wallis.get_phenotype_matrix_in_data_matrix_order(
                newSnpData.row_id_ls, strain_acc_list_phen,
                phenData.data_matrix)  #tricky, using strain_acc_list_phen

            phenotype_col_index = PlotGroupOfSNPs.findOutWhichPhenotypeColumn(
                phenData, Set([self.phenotype_method_id]))[0]
            phenotype_label = phenData.col_id_ls[phenotype_col_index]
            phenotype_f = open(
                '%s_%s.pheno' %
                (self.output_fname_prefix, phenotype_label.replace('/', '_')),
                'w')
            for phenotype_value in phenData.data_matrix[:,
                                                        phenotype_col_index]:
                if self.phenotype_is_binary:  #binary and non-binary have different NA designator
                    if numpy.isnan(phenotype_value):
                        phenotype_value = 9
                    else:
                        phenotype_value = int(phenotype_value)
                else:
                    if numpy.isnan(phenotype_value):
                        phenotype_value = -100.0
                phenotype_f.write('%s\n' % phenotype_value)
            del phenotype_f

        genotype_f = open('%s.geno' % self.output_fname_prefix, 'w')
        ind_writer = csv.writer(open('%s.ind' % self.output_fname_prefix, 'w'),
                                delimiter='\t')
        snp_writer = csv.writer(open('%s.snp' % self.output_fname_prefix, 'w'),
                                delimiter='\t')

        #transpose it
        newSnpData = transposeSNPData(newSnpData)

        no_of_rows = len(newSnpData.data_matrix)
        no_of_cols = len(newSnpData.data_matrix[0])
        for i in range(no_of_rows):
            snp_id = newSnpData.row_id_ls[i]
            chr, pos = snp_id.split('_')
            allele1 = allele_index2allele_ls[i][0]  #major allele
            allele2 = allele_index2allele_ls[i][1]  #minor allele
            snp_writer.writerow([snp_id, chr, 0.0, pos, allele1, allele2])
            geno_line = ''
            for j in range(no_of_cols):
                if i == 0:  #write out the accessions
                    ind_writer.writerow([newSnpData.col_id_ls[j], 'U', 'Case'])
                allele = newSnpData.data_matrix[i][j]
                if allele == 0:
                    geno_line += '0'
                elif allele == 1:
                    geno_line += '2'
                else:
                    geno_line += '9'
            geno_line += '\n'
            genotype_f.write(geno_line)

        del genotype_f, ind_writer, snp_writer

Пример #6

Показать файл

	def run(self):
		"""
		2008-12-02
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		
		delimiter = figureOutDelimiter(self.input_fname, report=self.report)
		header, strain_acc_list, category_list, data_matrix = read_data(self.input_fname, delimiter=delimiter)
		
		if self.array_id_2nd_column:
			snpData = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list,\
							data_matrix=data_matrix)
		else:
			snpData = SNPData(header=header, strain_acc_list=strain_acc_list,\
							data_matrix=data_matrix)	#ignore category_list
		
		newSnpData, allele_index2allele_ls = snpData.convert2Binary(self.report)
		
		if self.phenotype_fname and self.phenotype_method_id:
			header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(self.phenotype_fname, turn_into_integer=0)
			phenData = SNPData(header=header_phen, strain_acc_list=newSnpData.strain_acc_list, data_matrix=data_matrix_phen)	#row label is that of the SNP matrix, because the phenotype matrix is gonna be re-ordered in that way
			phenData.data_matrix = Kruskal_Wallis.get_phenotype_matrix_in_data_matrix_order(newSnpData.row_id_ls, strain_acc_list_phen, phenData.data_matrix)	#tricky, using strain_acc_list_phen
			
			phenotype_col_index = PlotGroupOfSNPs.findOutWhichPhenotypeColumn(phenData, Set([self.phenotype_method_id]))[0]
			phenotype_label = phenData.col_id_ls[phenotype_col_index]
			phenotype_f = open('%s_%s.pheno'%(self.output_fname_prefix, phenotype_label.replace('/', '_')), 'w')
			for phenotype_value in phenData.data_matrix[:,phenotype_col_index]:
				if self.phenotype_is_binary:	#binary and non-binary have different NA designator
					if numpy.isnan(phenotype_value):
						phenotype_value = 9
					else:
						phenotype_value = int(phenotype_value)
				else:
					if numpy.isnan(phenotype_value):
						phenotype_value = -100.0
				phenotype_f.write('%s\n'%phenotype_value)
			del phenotype_f
		
		genotype_f = open('%s.geno'%self.output_fname_prefix, 'w')
		ind_writer = csv.writer(open('%s.ind'%self.output_fname_prefix, 'w'), delimiter='\t')
		snp_writer = csv.writer(open('%s.snp'%self.output_fname_prefix, 'w'), delimiter='\t')
		
		#transpose it
		newSnpData = transposeSNPData(newSnpData)
		
		no_of_rows = len(newSnpData.data_matrix)
		no_of_cols = len(newSnpData.data_matrix[0])
		for i in range(no_of_rows):
			snp_id = newSnpData.row_id_ls[i]
			chr, pos = snp_id.split('_')
			allele1 = allele_index2allele_ls[i][0]	#major allele
			allele2 = allele_index2allele_ls[i][1]	#minor allele
			snp_writer.writerow([snp_id, chr, 0.0, pos, allele1, allele2])
			geno_line = ''
			for j in range(no_of_cols):
				if i==0:	#write out the accessions
					ind_writer.writerow([newSnpData.col_id_ls[j], 'U', 'Case'])
				allele = newSnpData.data_matrix[i][j]
				if allele==0:
					geno_line += '0'
				elif allele==1:
					geno_line += '2'
				else:
					geno_line += '9'
			geno_line += '\n'
			genotype_f.write(geno_line)
		
		del genotype_f, ind_writer, snp_writer