Exemplo n.º 1
0
    def run(self):
        """
		2008-04-11
			new way of handling raw cel files:
			
			md5sum each array, check if each array is in db already.
			if yes:
				skip it
			else:
				assign a new id and insert an entry into array_info_table
				copy the original cel file to output_dir and put array_id in the beginning of the filename.
		2008-02-28
		"""
        if self.debug:
            import pdb
            pdb.set_trace()

        import MySQLdb
        conn = MySQLdb.connect(db=self.dbname,
                               host=self.hostname,
                               user=self.user,
                               passwd=self.passwd)
        curs = conn.cursor()
        """
		filename2array_id_in_db = self.get_filename2array_id_in_db(curs, self.array_info_table)
		filename2array_id = self.get_filename2array_id(self.input_dir, filename2array_id_in_db)
		if self.commit:
			self.submit_filename2array_id(curs, filename2array_id, self.array_info_table)
		xypos2probes_id = self.get_xypos2probes_id(curs, self.probes_table)
		self.submit_all_array_data(filename2array_id, xypos2probes_id, curs, self.array_data_table)
		"""
        arrayInfo = ArrayInfo(curs=curs, array_info_table=self.array_info_table, user=self.user, \
             experimenter=self.experimenter, mapping_file=self.mapping_file)
        ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(curs,
                                                            debug=self.debug)
        nativename2ecotypeid_ls = getNativename2EcotypeIDLs(curs)
        input_fname_ls = self.get_all_files_in_input_dir(self.input_dir)
        for filename in input_fname_ls:
            sys.stderr.write("Assigning new id to %s ... " % filename)
            return_value = arrayInfo.assignNewIdToThisArray(filename, self.output_dir, ecotypeid2tg_ecotypeid=ecotypeid2tg_ecotypeid,\
                       nativename2ecotypeid_ls=nativename2ecotypeid_ls)
            if return_value == -1:
                sys.stderr.write("Failed.\n")
            else:
                sys.stderr.write("\n")
        if self.commit:
            curs.execute("commit")
Exemplo n.º 2
0
    def run(self):
        """
		2009-5-28
		"""
        if self.debug:
            import pdb
            pdb.set_trace()
        db = Stock_250kDB(drivername=self.drivername,
                          username=self.db_user,
                          password=self.db_passwd,
                          hostname=self.hostname,
                          database=self.dbname,
                          schema=self.schema)
        db.setup(create_tables=False)

        nativename2tg_ecotypeid_set = getNativename2TgEcotypeIDSet(
            db.metadata.bind, turnUpperCase=True)
        ecotype_id_set_250k_in_pipeline = get_ecotype_id_set_250k_in_pipeline(
            ArrayInfo)
        ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(db.metadata.bind)

        #turn_into_integer=2 because it's not nucleotides
        header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(
            self.input_fname, turn_into_integer=2, matrix_data_type=float)
        data_matrix_phen = numpy.array(data_matrix_phen)

        #2009-8-19 bug here. strain_acc_list_phen is not unique for each row. causing replicates to have the same value
        #from Association import Association
        #data_matrix_phen = Association.get_phenotype_matrix_in_data_matrix_order(strain_acc_list_phen, strain_acc_list_phen, data_matrix_phen)

        phenData = SNPData(header=header_phen,
                           strain_acc_list=strain_acc_list_phen,
                           data_matrix=data_matrix_phen)

        ecotype_id_ls = self.straightenEcotypeID(phenData.row_id_ls, nativename2tg_ecotypeid_set, ecotypeid2tg_ecotypeid, \
                  ecotype_id_set_250k_in_pipeline)

        session = db.session
        session.begin()
        if self.run_type == 1:
            self.putPhenotypeIntoDB(db, phenData, ecotype_id_ls)
        elif self.run_type == 2:
            self.putReplicatePhenotypeIntoDB(db, phenData, ecotype_id_ls)
        else:
            sys.stderr.write("Unsupported run type: %s.\n" % (self.run_type))
        if self.commit:
            session.commit()
Exemplo n.º 3
0
	def run(self):
		"""
		2008-04-11
			new way of handling raw cel files:
			
			md5sum each array, check if each array is in db already.
			if yes:
				skip it
			else:
				assign a new id and insert an entry into array_info_table
				copy the original cel file to output_dir and put array_id in the beginning of the filename.
		2008-02-28
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		
		import MySQLdb
		conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user = self.user, passwd = self.passwd)
		curs = conn.cursor()
		"""
		filename2array_id_in_db = self.get_filename2array_id_in_db(curs, self.array_info_table)
		filename2array_id = self.get_filename2array_id(self.input_dir, filename2array_id_in_db)
		if self.commit:
			self.submit_filename2array_id(curs, filename2array_id, self.array_info_table)
		xypos2probes_id = self.get_xypos2probes_id(curs, self.probes_table)
		self.submit_all_array_data(filename2array_id, xypos2probes_id, curs, self.array_data_table)
		"""
		arrayInfo = ArrayInfo(curs=curs, array_info_table=self.array_info_table, user=self.user, \
							experimenter=self.experimenter, mapping_file=self.mapping_file)
		ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(curs, debug=self.debug)
		nativename2ecotypeid_ls = getNativename2EcotypeIDLs(curs)
		input_fname_ls = self.get_all_files_in_input_dir(self.input_dir)
		for filename in input_fname_ls:
			sys.stderr.write("Assigning new id to %s ... "%filename)
			return_value = arrayInfo.assignNewIdToThisArray(filename, self.output_dir, ecotypeid2tg_ecotypeid=ecotypeid2tg_ecotypeid,\
														nativename2ecotypeid_ls=nativename2ecotypeid_ls)
			if return_value==-1:
				sys.stderr.write("Failed.\n")
			else:
				sys.stderr.write("\n")
		if self.commit:
			curs.execute("commit")
Exemplo n.º 4
0
	def run(self):
		"""
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
			
		db = StockDB.StockDB(drivername=self.drivername, username=self.db_user,
				 		password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup()
		session = db.session
		session.begin()
		ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(db.metadata.bind, debug=self.debug)
		tg_ecotypeid2row = self.dropRedundantEcotypes(self.input_fname, ecotypeid2tg_ecotypeid)
		snp_id_ls = self.getSNPIDLs()
		self.putHaplotypeGroupIntoDB(session, self.input_fname, tg_ecotypeid2row, self.max_snp_typing_error_rate, snp_id_ls)
		
		if self.commit:
			session.commit()
			session.clear()
		else:	#default is rollback(). to demonstrate good programming
			session.rollback()
Exemplo n.º 5
0
	def run(self):
		"""
		2009-5-28
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		
		nativename2tg_ecotypeid_set = getNativename2TgEcotypeIDSet(db.metadata.bind, turnUpperCase=True)
		ecotype_id_set_250k_in_pipeline = get_ecotype_id_set_250k_in_pipeline(ArrayInfo)
		ecotypeid2tg_ecotypeid = get_ecotypeid2tg_ecotypeid(db.metadata.bind)
		
		#turn_into_integer=2 because it's not nucleotides
		header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(self.input_fname, turn_into_integer=2, matrix_data_type=float)
		data_matrix_phen = numpy.array(data_matrix_phen)
		
		#2009-8-19 bug here. strain_acc_list_phen is not unique for each row. causing replicates to have the same value
		#from Association import Association
		#data_matrix_phen = Association.get_phenotype_matrix_in_data_matrix_order(strain_acc_list_phen, strain_acc_list_phen, data_matrix_phen)
		
		phenData = SNPData(header=header_phen, strain_acc_list=strain_acc_list_phen, data_matrix=data_matrix_phen)
		
		ecotype_id_ls = self.straightenEcotypeID(phenData.row_id_ls, nativename2tg_ecotypeid_set, ecotypeid2tg_ecotypeid, \
												ecotype_id_set_250k_in_pipeline)
		
		session = db.session
		session.begin()
		if self.run_type==1:
			self.putPhenotypeIntoDB(db, phenData, ecotype_id_ls)
		elif self.run_type==2:
			self.putReplicatePhenotypeIntoDB(db, phenData, ecotype_id_ls)
		else:
			sys.stderr.write("Unsupported run type: %s.\n"%(self.run_type))
		if self.commit:
			session.commit()