def __init__(self, inputFnameLs=None, **keywords): """ 2008-07-27 use option_default_dict 2008-07-06 use the firstline (header) of the fasta file to extract which chromosome. using filename is unreliable. """ AbstractDBInteractingJob.__init__(self, inputFnameLs=inputFnameLs, **keywords) #self.connectDB() called within its __init__() self.FigureOutTaxID_ins = FigureOutTaxID(db_user=self.db_user, db_passwd=self.db_passwd, hostname=self.hostname, dbname=self.dbname) if self.organism is not None: from annot.bin.codense.common import org_short2long, org2tax_id if org_short2long(self.organism): self.tax_id = org2tax_id(org_short2long(self.organism)) else: self.tax_id = self.FigureOutTaxID_ins.returnTaxIDGivenSentence(self.organism) #self.p_chromosome = re.compile(r'[a-zA-Z]+_chr(\w+).fa') self.p_chromosome = re.compile(r'chromosome (\w+)[,\n\r]?') #the last ? means [,\n\r] is optional self.p_acc_ver = re.compile(r'(\w+)\.(\d+)') self.parseFastaDescriptionDict = {1: self.parseFastaDescriptionForGenBank, \ 2: self.parseFastaDescriptionForWUSTLVervetScaffolds,\ 3: self.parseFastaDescriptionForFullVervetBACs,\ 4: self.parseFastaDescriptionForWUSTLVervetChromosomeGenome}
def __init__(self, hostname='zhoudb', dbname='mdb', schema='', inputfile=None, \ organism='hs', type=1, debug=0, report=0, commit=0): self.hostname = hostname self.dbname = dbname self.schema = schema self.inputfile = inputfile self.organism = organism self.type = int(type) self.debug = int(debug) self.report = int(report) self.commit = int(commit) self.tax_id = org2tax_id(org_short2long(self.organism)) self.parser_dict = {1: self.harbison2004_parse, 2: self.cisred_parse, 3: self.sgd_regulatory_parse, 4: self.ucsc_tfbs_conserved_parse}
def __init__(self, hostname='dl324b-1', dbname='yhdb', schema='dbsnp', input_fname=None, \ output_table=None, strain_info_table='strain_info', snp_locus_table='snp_locus', \ organism='hs', type=1, debug=0, report=0, commit=0): self.hostname = hostname self.dbname = dbname self.schema = schema self.input_fname = input_fname self.output_table = output_table self.strain_info_table = strain_info_table self.snp_locus_table = snp_locus_table self.tax_id = org2tax_id(org_short2long(organism)) self.type = int(type) self.debug = int(debug) self.report = int(report) self.commit = int(commit) self.snp_acc_category_pattern = re.compile("([a-zA-Z]*[\-]*[a-zA-Z]+)[\-_ ]*[\w]+")
def __init__(self, hostname='dl324b-1', dbname='yhdb', schema='dbsnp', input_fname=None, \ output_table=None, strain_info_table='strain_info', snp_locus_table='snp_locus', \ organism='hs', type=1, debug=0, report=0, commit=0): self.hostname = hostname self.dbname = dbname self.schema = schema self.input_fname = input_fname self.output_table = output_table self.strain_info_table = strain_info_table self.snp_locus_table = snp_locus_table self.tax_id = org2tax_id(org_short2long(organism)) self.type = int(type) self.debug = int(debug) self.report = int(report) self.commit = int(commit) self.snp_acc_category_pattern = re.compile( "([a-zA-Z]*[\-]*[a-zA-Z]+)[\-_ ]*[\w]+")