Exemplo n.º 1
0
	def run(self):
		
		if self.debug:
			import pdb
			pdb.set_trace()
		
		db = StockDB.StockDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname)
		db.setup(create_tables=False)
		session = db.session
		session.begin()
		
		self.cmp_data_filename = self.findOutCmpDataFilename(self.cmp_data_filename, self.QC_method_id, StockDB.QCMethod)
		header, strain_acc_list, category_list, data_matrix = read_data(self.cmp_data_filename)
		strain_acc_list = map(int, strain_acc_list)	#it's ecotypeid, cast it to integer to be compatible to the later ecotype_id_ls from db
		snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, \
							data_matrix=data_matrix)	#category_list is not used.
		
		readme = formReadmeObj(sys.argv, self.ad, StockDB.README)
		session.save(readme)
		
		import MySQLdb
		conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user = self.db_user, passwd = self.db_passwd)
		curs = conn.cursor()
		from dbSNP2data import dbSNP2data
		snp_id2index, snp_id_list, snp_id2info = dbSNP2data.get_snp_id2index_m(curs, StockDB.Calls.table.name, StockDB.SNPs.table.name)
		strain_info_data = self.get_strain_id_info(self.QC_method_id)
		data_matrix = self.get_data_matrix(db, strain_info_data.strain_id2index, snp_id2index, StockDB.Calls.table.name)
		strain_acc_list = [strain_info_data.strain_id2acc[strain_id] for strain_id in strain_info_data.strain_id_list]
		category_list = [strain_info_data.strain_id2category[strain_id] for strain_id in strain_info_data.strain_id_list]
		header = ['ecotypeid', 'strainid']
		for snp_id in snp_id_list:
			snp_name, chromosome, position = snp_id2info[snp_id]
			header.append(snp_name)
		snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \
						snps_table='stock.snps')	#snps_table is set to the stock_250k snps_table
		
		twoSNPData = TwoSNPData(SNPData1=snpData1, SNPData2=snpData2, curs=curs, \
							QC_method_id=self.QC_method_id, user=self.db_user, row_matching_by_which_value=0, debug=self.debug)
		if self.run_type==1:
			row_id2NA_mismatch_rate = twoSNPData.cmp_row_wise()
		elif self.run_type==2:
			#twoSNPData.save_col_wise(session, readme)	#2008-08-18 need to implement a new one for 149SNP
			row_id2NA_mismatch_rate = {}
		else:
			sys.stderr.write("run_type=%s is not supported.\n"%self.run_type)
			sys.exit(5)
		if self.output_fname and self.run_type==1 and row_id2NA_mismatch_rate:
			self.output_row_id2NA_mismatch_rate(row_id2NA_mismatch_rate, self.output_fname)
		
		if self.run_type==1 and self.commit and not self.input_dir and row_id2NA_mismatch_rate:
			#if self.input_dir is given, no db submission. call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid)
			#row_id2NA_mismatch_rate might be None if it's method 0.
			self.submit_to_call_QC(session, row_id2NA_mismatch_rate, self.QC_method_id, self.db_user, \
								twoSNPData.row_id12row_id2, readme)
		if self.commit:
			session.commit()
		else:
			session.rollback()
    def run(self):
        """
		2008-08-11
			the database interface changed in variation.src.dbsnp
		2008-05-06
		"""
        import MySQLdb
        conn = MySQLdb.connect(db=self.dbname,
                               host=self.hostname,
                               user=self.user,
                               passwd=self.passwd)
        curs = conn.cursor()
        if self.debug:
            import pdb
            pdb.set_trace()

        db = DBSNP(username=self.user,
                   password=self.passwd,
                   hostname=self.hostname,
                   database=self.dbname)
        session = db.session
        session.begin()
        #transaction = session.create_transaction()

        snps_name2possible_mappings, snps_name2snps_id = self.get_snps_name2possible_mappings(
            db)

        from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
        header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(
            self.input_fname1)
        snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \
             col_id2id=snps_name2snps_id, snps_table='dbsnp.snps')

        header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(
            self.input_fname2)
        snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix,\
            snps_table='stock_250k.snps')

        twoSNPData = TwoSNPData384(SNPData1=snpData1,
                                   SNPData2=snpData2,
                                   curs=curs,
                                   user=self.user)

        readme = formReadmeObj(sys.argv, self.ad, README)
        session.save(readme)
        session.flush()
        twoSNPData.figureOutABMapping(session, readme,
                                      snps_name2possible_mappings)
        if self.commit:
            curs.execute("commit")
            session.commit()
        else:
            session.rollback()
Exemplo n.º 3
0
	def run(self):
		self.communicator = MPI.world.duplicate()
		node_rank = self.communicator.rank
		free_computing_nodes = range(1, self.communicator.size-1)	#exclude the 1st and last node
		free_computing_node_set = Set(free_computing_nodes)
		output_node_rank = self.communicator.size-1
		
		if node_rank not in free_computing_node_set:	#computing nodes don't need db connection
			db = StockDB.StockDB(drivername=self.drivername, username=self.db_user,
								password=self.db_passwd, hostname=self.hostname, database=self.dbname)
			db.setup(create_tables=False)
			session = db.session
		
		if node_rank == 0:
			twoSNPData = self.prepareTwoSNPData(db)
			twoSNPData_pickle = cPickle.dumps(twoSNPData, -1)
			for node in free_computing_nodes:	#send it to the computing_node
				sys.stderr.write("passing initial data to nodes from %s to %s ... "%(node_rank, node))
				self.communicator.send(twoSNPData_pickle, node, 0)
				sys.stderr.write(".\n")
			del twoSNPData_pickle
		elif node_rank in free_computing_node_set:
			data, source, tag = self.communicator.receiveString(0, 0)
			twoSNPData =  cPickle.loads(data)
			del data
			sys.stderr.write(".\n")
		else:
			readme = formReadmeObj(sys.argv, self.ad, StockDB.README)
			session.save(readme)
		
		mw = MPIwrapper(self.communicator, debug=self.debug, report=self.report)
		mw.synchronize()
		if node_rank == 0:
			param_obj = PassingData(communicator=self.communicator, twoSNPData=twoSNPData, output_node_rank=output_node_rank, \
								QC_method_id=self.QC_method_id, output_fname=getattr(self, 'output_fname', None), report=self.report)
			self.input_node(param_obj, free_computing_nodes, self.message_size)
		elif node_rank in free_computing_node_set:
			computing_parameter_obj = PassingData(twoSNPData=twoSNPData, QC_method_id=self.QC_method_id)
			mw.computing_node(computing_parameter_obj, self.computing_node_handler)
		else:
			if getattr(self, 'output_fname', None):
				writer = csv.writer(open(self.output_fname, 'w'), delimiter='\t')
			else:
				writer = None
			param_obj = PassingData(writer=writer, session=session, commit=self.commit, QC_method_id=self.QC_method_id, readme=readme, is_header_written=False)
			mw.output_node(free_computing_nodes, param_obj, self.output_node_handler)
			del writer
		mw.synchronize()	#to avoid some node early exits
Exemplo n.º 4
0
	def run(self):
		"""
		2008-08-11
			the database interface changed in variation.src.dbsnp
		2008-05-06
		"""
		import MySQLdb
		conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user = self.user, passwd = self.passwd)
		curs = conn.cursor()
		if self.debug:
			import pdb
			pdb.set_trace()
		
		db = DBSNP(username=self.user,
				   password=self.passwd, hostname=self.hostname, database=self.dbname)
		session = db.session
		session.begin()
		#transaction = session.create_transaction()
		
		snps_name2possible_mappings, snps_name2snps_id = self.get_snps_name2possible_mappings(db)
		
		from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
		header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(self.input_fname1)
		snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \
							col_id2id=snps_name2snps_id, snps_table='dbsnp.snps')
				
		header, strain_acc_list, category_list, data_matrix = FilterStrainSNPMatrix.read_data(self.input_fname2)
		snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix,\
						snps_table='stock_250k.snps')
		
		twoSNPData = TwoSNPData384(SNPData1=snpData1, SNPData2=snpData2, curs=curs, user=self.user)
		
		readme = formReadmeObj(sys.argv, self.ad, README)
		session.save(readme)
		session.flush()
		twoSNPData.figureOutABMapping(session, readme, snps_name2possible_mappings)
		if self.commit:
			curs.execute("commit")
			session.commit()
		else:
			session.rollback()
Exemplo n.º 5
0
	def run(self):
		if self.debug:
			import pdb
			pdb.set_trace()
		
		db = StockDB.StockDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname)
		db.setup(create_tables=False)
		session = db.session
		#session.begin()
		
		readme = formReadmeObj(sys.argv, self.ad, StockDB.README)
		session.save(readme)
		
		twoSNPData = self.prepareTwoSNPData(db, report=self.report)
		twoSNPData.cal_row_id2pairwise_dist()
		
		self.submitToQCCrossMatch(session, twoSNPData.row_id2pairwise_dist, self.QC_method_id, readme, self.commit,\
								max_mismatch_rate=self.max_mismatch_rate, min_no_of_non_NA_pairs=self.min_no_of_non_NA_pairs)
		
		"""
Exemplo n.º 6
0
    def run(self):
        if self.debug:
            import pdb
            pdb.set_trace()

        db = StockDB.StockDB(drivername=self.drivername,
                             username=self.db_user,
                             password=self.db_passwd,
                             hostname=self.hostname,
                             database=self.dbname)
        db.setup(create_tables=False)
        session = db.session
        #session.begin()

        readme = formReadmeObj(sys.argv, self.ad, StockDB.README)
        session.save(readme)

        twoSNPData = self.prepareTwoSNPData(db, report=self.report)
        twoSNPData.cal_row_id2pairwise_dist()

        self.submitToQCCrossMatch(session, twoSNPData.row_id2pairwise_dist, self.QC_method_id, readme, self.commit,\
              max_mismatch_rate=self.max_mismatch_rate, min_no_of_non_NA_pairs=self.min_no_of_non_NA_pairs)
        """
Exemplo n.º 7
0
    def run(self):
        """
		2008-04-25
			return None if QC_method_id==0
		2008-04-20
			for plone to call it just to get row_id2NA_mismatch_rate
		"""
        # database connection and etc
        db = Stock_250kDB.Stock_250kDB(
            drivername=self.drivername,
            username=self.user,
            password=self.passwd,
            hostname=self.hostname,
            database=self.dbname,
        )
        db.setup()
        session = db.session
        session.begin()
        # transaction = session.create_transaction()

        self.cmp_data_filename = self.findOutCmpDataFilename(
            self.cmp_data_filename, self.QC_method_id, self.QCMethod_class
        )
        qm = self.QCMethod_class.query.get(self.QC_method_id)  # 2009-5-20

        import MySQLdb

        conn = MySQLdb.connect(db=self.dbname, host=self.hostname, user=self.user, passwd=self.passwd)
        curs = conn.cursor()
        self.curs = curs
        if self.debug:
            import pdb

            pdb.set_trace()

        readme = formReadmeObj(sys.argv, self.ad, Stock_250kDB.README)
        session.save(readme)

        QC_method_id2snps_table = self.QC_method_id2snps_table

        if self.QC_method_id == 0:
            self.cal_independent_NA_rate(db, self.min_probability, readme)
            row_id2NA_mismatch_rate = None
        else:
            # from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
            header, strain_acc_list, category_list, data_matrix = read_data(
                self.cmp_data_filename, ignore_het=qm.ignore_het
            )
            strain_acc_list = map(
                int, strain_acc_list
            )  # it's ecotypeid, cast it to integer to be compatible to the later ecotype_id_ls from db
            snpData2 = SNPData(
                header=header,
                strain_acc_list=strain_acc_list,
                data_matrix=data_matrix,
                snps_table=QC_method_id2snps_table.get(self.QC_method_id),
                ignore_het=qm.ignore_het,
            )  # category_list is not used. 05/20/09 ignore_het is useless cuz data_matrix is provided.
            """
			if self.input_dir and os.path.isdir(self.input_dir):
				#04/22/08 Watch: call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid)
				#no submission to db
				call_info_id2fname = self.get_array_id2fname(curs, self.input_dir)
			"""
            if self.input_dir and os.path.isfile(self.input_dir):  # it's file
                call_info_id2fname = None
            else:
                if self.run_type == 2:  # no filtering on call_info entries that have been QCed.
                    filter_calls_QCed = 0
                elif self.run_type == 1:
                    filter_calls_QCed = 1
                    self.max_call_info_mismatch_rate = 1  # don't use this when doing accession-wise QC
                else:
                    sys.stderr.write("run_type=%s is not supported.\n" % self.run_type)
                    sys.exit(5)
                call_data = self.get_call_info_id2fname(
                    db,
                    self.QC_method_id,
                    self.call_method_id,
                    filter_calls_QCed,
                    self.max_call_info_mismatch_rate,
                    self.debug,
                    min_no_of_non_NA_pairs=self.min_no_of_non_NA_pairs,
                    input_dir=self.input_dir,
                )
                call_info_id2fname = call_data.call_info_id2fname
                call_info_ls_to_return = call_data.call_info_ls_to_return
            if self.run_type == 2:
                snps_name2snps_id = self.get_snps_name2snps_id(db)
            else:
                snps_name2snps_id = None

            if call_info_id2fname:
                if self.one_by_one and self.run_type == 1:  # one_by_one only for QC by accession
                    row_id2NA_mismatch_rate = {}
                    row_id12row_id2 = {}
                    counter = 0
                    for call_info_id, value in call_info_id2fname.iteritems():
                        counter += 1
                        print "No", counter
                        tmp_dict = {}
                        tmp_dict[call_info_id] = value
                        pdata = self.read_call_matrix(
                            tmp_dict, self.min_probability
                        )  # 05/20/09 no need for qm.ignore_het because 250k is all h**o
                        passingdata = self.qcDataMatrixVSsnpData(
                            pdata, snps_name2snps_id, snpData2, curs, session, readme
                        )
                        row_id2NA_mismatch_rate.update(passingdata.row_id2NA_mismatch_rate)
                        row_id12row_id2.update(passingdata.row_id12row_id2)
                        del pdata

                        if self.debug and counter == 10:
                            break
                else:
                    pdata = self.read_call_matrix(
                        call_info_id2fname, self.min_probability
                    )  # 05/20/09 no need for qm.ignore_het because 250k is all h**o
                    passingdata = self.qcDataMatrixVSsnpData(pdata, snps_name2snps_id, snpData2, curs, session, readme)
                    row_id2NA_mismatch_rate = passingdata.row_id2NA_mismatch_rate
                    row_id12row_id2 = passingdata.row_id12row_id2
                    del pdata
            else:
                # input file is SNP by strain format. double header (1st two lines)
                header, snps_name_ls, category_list, data_matrix = read_data(
                    self.input_dir, double_header=1, ignore_het=qm.ignore_het
                )
                pdata = PassingData()
                pdata.ecotype_id_ls = header[0][2:]
                pdata.call_info_id_ls = header[1][2:]
                data_matrix = numpy.array(data_matrix)
                pdata.data_matrix = data_matrix.transpose()
                pdata.header = ["", ""] + snps_name_ls  # fake a header for SNPData
                passingdata = self.qcDataMatrixVSsnpData(pdata, snps_name2snps_id, snpData2, curs, session, readme)
                row_id2NA_mismatch_rate = passingdata.row_id2NA_mismatch_rate
                row_id12row_id2 = passingdata.row_id12row_id2
                del pdata

        if self.output_fname and self.run_type == 1 and row_id2NA_mismatch_rate:
            self.output_row_id2NA_mismatch_rate(row_id2NA_mismatch_rate, self.output_fname)

        if self.run_type == 1 and self.commit and not self.input_dir and row_id2NA_mismatch_rate:
            # if self.input_dir is given, no db submission. call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid)
            # row_id2NA_mismatch_rate might be None if it's method 0.
            self.submit_to_call_QC(
                session,
                row_id2NA_mismatch_rate,
                self.QC_method_id,
                self.user,
                self.min_probability,
                row_id12row_id2,
                self.call_method_id,
                readme,
            )
        if self.commit:
            curs.execute("commit")
            session.commit()
        else:
            session.rollback()

        self.row_id2NA_mismatch_rate = row_id2NA_mismatch_rate  # for plone to get the data structure
Exemplo n.º 8
0
    def run(self):
        self.communicator = MPI.world.duplicate()
        node_rank = self.communicator.rank
        free_computing_nodes = range(1, self.communicator.size -
                                     1)  #exclude the 1st and last node
        free_computing_node_set = Set(free_computing_nodes)
        output_node_rank = self.communicator.size - 1

        if node_rank not in free_computing_node_set:  #computing nodes don't need db connection
            db = StockDB.StockDB(drivername=self.drivername,
                                 username=self.db_user,
                                 password=self.db_passwd,
                                 hostname=self.hostname,
                                 database=self.dbname)
            db.setup(create_tables=False)
            session = db.session

        if node_rank == 0:
            twoSNPData = self.prepareTwoSNPData(db)
            twoSNPData_pickle = cPickle.dumps(twoSNPData, -1)
            for node in free_computing_nodes:  #send it to the computing_node
                sys.stderr.write(
                    "passing initial data to nodes from %s to %s ... " %
                    (node_rank, node))
                self.communicator.send(twoSNPData_pickle, node, 0)
                sys.stderr.write(".\n")
            del twoSNPData_pickle
        elif node_rank in free_computing_node_set:
            data, source, tag = self.communicator.receiveString(0, 0)
            twoSNPData = cPickle.loads(data)
            del data
            sys.stderr.write(".\n")
        else:
            readme = formReadmeObj(sys.argv, self.ad, StockDB.README)
            session.save(readme)

        mw = MPIwrapper(self.communicator,
                        debug=self.debug,
                        report=self.report)
        mw.synchronize()
        if node_rank == 0:
            param_obj = PassingData(communicator=self.communicator, twoSNPData=twoSNPData, output_node_rank=output_node_rank, \
                 QC_method_id=self.QC_method_id, output_fname=getattr(self, 'output_fname', None), report=self.report)
            self.input_node(param_obj, free_computing_nodes, self.message_size)
        elif node_rank in free_computing_node_set:
            computing_parameter_obj = PassingData(
                twoSNPData=twoSNPData, QC_method_id=self.QC_method_id)
            mw.computing_node(computing_parameter_obj,
                              self.computing_node_handler)
        else:
            if getattr(self, 'output_fname', None):
                writer = csv.writer(open(self.output_fname, 'w'),
                                    delimiter='\t')
            else:
                writer = None
            param_obj = PassingData(writer=writer,
                                    session=session,
                                    commit=self.commit,
                                    QC_method_id=self.QC_method_id,
                                    readme=readme,
                                    is_header_written=False)
            mw.output_node(free_computing_nodes, param_obj,
                           self.output_node_handler)
            del writer
        mw.synchronize()  #to avoid some node early exits
Exemplo n.º 9
0
    def run(self):

        if self.debug:
            import pdb
            pdb.set_trace()

        db = StockDB.StockDB(drivername=self.drivername,
                             username=self.db_user,
                             password=self.db_passwd,
                             hostname=self.hostname,
                             database=self.dbname)
        db.setup(create_tables=False)
        session = db.session
        session.begin()

        self.cmp_data_filename = self.findOutCmpDataFilename(
            self.cmp_data_filename, self.QC_method_id, StockDB.QCMethod)
        header, strain_acc_list, category_list, data_matrix = read_data(
            self.cmp_data_filename)
        strain_acc_list = map(
            int, strain_acc_list
        )  #it's ecotypeid, cast it to integer to be compatible to the later ecotype_id_ls from db
        snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, \
             data_matrix=data_matrix) #category_list is not used.

        readme = formReadmeObj(sys.argv, self.ad, StockDB.README)
        session.save(readme)

        import MySQLdb
        conn = MySQLdb.connect(db=self.dbname,
                               host=self.hostname,
                               user=self.db_user,
                               passwd=self.db_passwd)
        curs = conn.cursor()
        from dbSNP2data import dbSNP2data
        snp_id2index, snp_id_list, snp_id2info = dbSNP2data.get_snp_id2index_m(
            curs, StockDB.Calls.table.name, StockDB.SNPs.table.name)
        strain_info_data = self.get_strain_id_info(self.QC_method_id)
        data_matrix = self.get_data_matrix(db,
                                           strain_info_data.strain_id2index,
                                           snp_id2index,
                                           StockDB.Calls.table.name)
        strain_acc_list = [
            strain_info_data.strain_id2acc[strain_id]
            for strain_id in strain_info_data.strain_id_list
        ]
        category_list = [
            strain_info_data.strain_id2category[strain_id]
            for strain_id in strain_info_data.strain_id_list
        ]
        header = ['ecotypeid', 'strainid']
        for snp_id in snp_id_list:
            snp_name, chromosome, position = snp_id2info[snp_id]
            header.append(snp_name)
        snpData1 = SNPData(header=header, strain_acc_list=strain_acc_list, category_list=category_list, data_matrix=data_matrix, \
            snps_table='stock.snps') #snps_table is set to the stock_250k snps_table

        twoSNPData = TwoSNPData(SNPData1=snpData1, SNPData2=snpData2, curs=curs, \
             QC_method_id=self.QC_method_id, user=self.db_user, row_matching_by_which_value=0, debug=self.debug)
        if self.run_type == 1:
            row_id2NA_mismatch_rate = twoSNPData.cmp_row_wise()
        elif self.run_type == 2:
            #twoSNPData.save_col_wise(session, readme)	#2008-08-18 need to implement a new one for 149SNP
            row_id2NA_mismatch_rate = {}
        else:
            sys.stderr.write("run_type=%s is not supported.\n" % self.run_type)
            sys.exit(5)
        if self.output_fname and self.run_type == 1 and row_id2NA_mismatch_rate:
            self.output_row_id2NA_mismatch_rate(row_id2NA_mismatch_rate,
                                                self.output_fname)

        if self.run_type == 1 and self.commit and not self.input_dir and row_id2NA_mismatch_rate:
            #if self.input_dir is given, no db submission. call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid)
            #row_id2NA_mismatch_rate might be None if it's method 0.
            self.submit_to_call_QC(session, row_id2NA_mismatch_rate, self.QC_method_id, self.db_user, \
                 twoSNPData.row_id12row_id2, readme)
        if self.commit:
            session.commit()
        else:
            session.rollback()
Exemplo n.º 10
0
    def run(self):
        """
		2008-04-25
			return None if QC_method_id==0
		2008-04-20
			for plone to call it just to get row_id2NA_mismatch_rate
		"""
        #database connection and etc
        db = Stock_250kDB.Stock_250kDB(drivername=self.drivername,
                                       username=self.user,
                                       password=self.passwd,
                                       hostname=self.hostname,
                                       database=self.dbname)
        db.setup(create_tables=False)
        session = db.session
        session.begin()
        #transaction = session.create_transaction()

        self.cmp_data_filename = self.findOutCmpDataFilename(
            self.cmp_data_filename, self.QC_method_id, self.QCMethod_class)
        qm = self.QCMethod_class.query.get(self.QC_method_id)  #2009-5-20

        import MySQLdb
        conn = MySQLdb.connect(db=self.dbname,
                               host=self.hostname,
                               user=self.user,
                               passwd=self.passwd)
        curs = conn.cursor()
        self.curs = curs
        if self.debug:
            import pdb
            pdb.set_trace()

        readme = formReadmeObj(sys.argv, self.ad, Stock_250kDB.README)
        session.add(readme)

        QC_method_id2snps_table = self.QC_method_id2snps_table

        if self.QC_method_id == 0:
            self.cal_independent_NA_rate(db, self.min_probability, readme)
            row_id2NA_mismatch_rate = None
        else:
            #from variation.src.FilterStrainSNPMatrix import FilterStrainSNPMatrix
            header, strain_acc_list, category_list, data_matrix = read_data(
                self.cmp_data_filename, ignore_het=qm.ignore_het)
            strain_acc_list = map(
                int, strain_acc_list
            )  #it's ecotypeid, cast it to integer to be compatible to the later ecotype_id_ls from db
            snpData2 = SNPData(header=header, strain_acc_list=strain_acc_list, \
                data_matrix=data_matrix, snps_table=QC_method_id2snps_table.get(self.QC_method_id),\
                ignore_het=qm.ignore_het) #category_list is not used. 05/20/09 ignore_het is useless cuz data_matrix is provided.
            """
			if self.input_dir and os.path.isdir(self.input_dir):
				#04/22/08 Watch: call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid)
				#no submission to db
				call_info_id2fname = self.get_array_id2fname(curs, self.input_dir)
			"""
            if self.input_dir and os.path.isfile(self.input_dir):  #it's file
                call_info_id2fname = None
            else:
                if self.run_type == 2:  #no filtering on call_info entries that have been QCed.
                    filter_calls_QCed = 0
                elif self.run_type == 1:
                    filter_calls_QCed = 1
                    self.max_call_info_mismatch_rate = 1  #don't use this when doing accession-wise QC
                else:
                    sys.stderr.write("run_type=%s is not supported.\n" %
                                     self.run_type)
                    sys.exit(5)
                call_data = self.get_call_info_id2fname(db, self.QC_method_id, self.call_method_id, \
                          filter_calls_QCed, self.max_call_info_mismatch_rate, self.debug,\
                          min_no_of_non_NA_pairs=self.min_no_of_non_NA_pairs, input_dir=self.input_dir)
                call_info_id2fname = call_data.call_info_id2fname
                call_info_ls_to_return = call_data.call_info_ls_to_return
            if self.run_type == 2:
                snps_name2snps_id = self.get_snps_name2snps_id(db)
            else:
                snps_name2snps_id = None

            if call_info_id2fname:
                db_id2chr_pos = db.getSNPID2ChrPos()  #2011-22
                from DB_250k2data import DB_250k2Data
                db_id2index = DB_250k2Data.getSNPID2index(
                    call_info_id2fname.values()[0][1], db_id2chr_pos)
                if self.one_by_one and self.run_type == 1:  #one_by_one only for QC by accession
                    row_id2NA_mismatch_rate = {}
                    row_id12row_id2 = {}
                    counter = 0
                    for call_info_id, value in call_info_id2fname.iteritems():
                        counter += 1
                        print "No", counter
                        tmp_dict = {}
                        tmp_dict[call_info_id] = value
                        pdata = self.read_call_matrix(
                            tmp_dict,
                            self.min_probability,
                            db_id2chr_pos=db_id2chr_pos,
                            db_id2index=db_id2index)
                        #05/20/09 no need for qm.ignore_het because 250k is all h**o
                        passingdata = self.qcDataMatrixVSsnpData(
                            pdata, snps_name2snps_id, snpData2, curs, session,
                            readme)
                        row_id2NA_mismatch_rate.update(
                            passingdata.row_id2NA_mismatch_rate)
                        row_id12row_id2.update(passingdata.row_id12row_id2)
                        del pdata

                        if self.debug and counter == 10:
                            break
                else:
                    pdata = self.read_call_matrix(call_info_id2fname,
                                                  self.min_probability,
                                                  db_id2chr_pos=db_id2chr_pos,
                                                  db_id2index=db_id2index)
                    #05/20/09 no need for qm.ignore_het because 250k is all h**o
                    passingdata = self.qcDataMatrixVSsnpData(
                        pdata, snps_name2snps_id, snpData2, curs, session,
                        readme)
                    row_id2NA_mismatch_rate = passingdata.row_id2NA_mismatch_rate
                    row_id12row_id2 = passingdata.row_id12row_id2
                    del pdata
            else:
                #input file is SNP by strain format. double header (1st two lines)
                header, snps_name_ls, category_list, data_matrix = read_data(
                    self.input_dir, double_header=1, ignore_het=qm.ignore_het)
                pdata = PassingData()
                pdata.ecotype_id_ls = header[0][2:]
                pdata.call_info_id_ls = header[1][2:]
                data_matrix = numpy.array(data_matrix)
                pdata.data_matrix = data_matrix.transpose()
                pdata.header = ['', ''
                                ] + snps_name_ls  #fake a header for SNPData
                passingdata = self.qcDataMatrixVSsnpData(
                    pdata, snps_name2snps_id, snpData2, curs, session, readme)
                row_id2NA_mismatch_rate = passingdata.row_id2NA_mismatch_rate
                row_id12row_id2 = passingdata.row_id12row_id2
                del pdata

        if self.output_fname and self.run_type == 1 and row_id2NA_mismatch_rate:
            self.output_row_id2NA_mismatch_rate(row_id2NA_mismatch_rate,
                                                self.output_fname)

        if self.run_type == 1 and self.commit and not self.input_dir and row_id2NA_mismatch_rate:
            #if self.input_dir is given, no db submission. call_info_id2fname here is fake, it's actually keyed by (array_id, ecotypeid)
            #row_id2NA_mismatch_rate might be None if it's method 0.
            self.submit_to_call_QC(session, row_id2NA_mismatch_rate, self.QC_method_id, self.user, self.min_probability, \
                 row_id12row_id2, self.call_method_id, readme)
        if self.commit:
            curs.execute("commit")
            session.commit()
        else:
            session.rollback()

        self.row_id2NA_mismatch_rate = row_id2NA_mismatch_rate  #for plone to get the data structure