def main(): parameter = {} # in case I ever have to handle multiple versions of ensembl # (but for now I don't have enough space) # note though that there are functions in el_utils/mysql.py that assume # that whatever ensembl stuff is available to the mysql server corresponds to the same release release_number = '76' parameter['ensembl_release_number'] = release_number parameter['blastp_e_value'] = "1.e-10" # it will be used as a string when fmting the blastp cmd parameter['min_accptbl_exon_sim'] = 0.33333 #minimum acceptable exon similarity dir_path = {} dir_path['ensembl_fasta'] = '/mnt/ensembl-mirror/release-'+release_number+'/fasta' # local juggling of data from one database base to the other dir_path['afs_dumps'] = '/afs/bii.a-star.edu.sg/dept/biomodel_design/Group/ivana/' dir_path['afs_dumps'] += 'ExoLocator/results/dumpster' dir_path['resources'] = '/afs/bii.a-star.edu.sg/dept/biomodel_design/Group/ivana/' dir_path['resources'] += 'pypeworks/exolocator/resources' dir_path['scratch'] = '/tmp' dir_path['maxentscan'] = '/afs/bii.a-star.edu.sg/dept/biomodel_design/Group/ivana/' dir_path['maxentscan'] += 'pypeworks/exolocator/pl_utils/maxentscan' util_path = {} util_path['mafft'] = '/usr/bin/mafft' util_path['blastall'] = '/usr/bin/blastall' util_path['fastacmd'] = '/usr/bin/fastacmd' util_path['sw#'] = '/usr/bin/swsharp' util_path['usearch'] = '/usr/bin/usearch' util_path['score3'] = dir_path['maxentscan'] + '/score3.pl' util_path['score5'] = dir_path['maxentscan'] + '/score5.pl' if 1: # check if the paths are functioning (at this point at least) for util in util_path.values(): if (not os.path.exists(util)): print util, " not found " sys.exit (1) for dir in dir_path.values(): if (not os.path.exists(dir)): print dir, " not found " sys.exit (1) if (not os.path.isdir (dir)): print dir, " is not a directory " sys.exit (1) db = connect_to_mysql() cursor = db.cursor() ####################################################### # check if the config db exists -- if not, make it db_name = "exolocator_config" qry = "show databases like'%s'" % db_name rows = search_db (cursor, qry) if (not rows): print db_name, "database not found" qry = "create database %s " % db_name rows = search_db (cursor, qry) if (rows): print "some problem creating the database ..." rows = search_db (cursor, qry, verbose = True) else: print db_name, "database found" qry = "use %s " % db_name search_db (cursor, qry) # make tables for table in ['util_path', 'dir_path', 'parameter']: if ( check_table_exists (cursor, db_name, table)): print table, " found in ", db_name else: print table, " not found in ", db_name make_table (cursor, table) # fill util, dir and path tables fixed_fields = {} update_fields = {} for [name, path] in util_path.iteritems(): fixed_fields['name'] = name update_fields['path'] = path store_or_update (cursor, 'util_path', fixed_fields, update_fields) fixed_fields = {} update_fields = {} for [name, path] in dir_path.iteritems(): fixed_fields['name'] = name update_fields['path'] = path store_or_update (cursor, 'dir_path', fixed_fields, update_fields) fixed_fields = {} update_fields = {} for [name, value] in parameter.iteritems(): fixed_fields['name'] = name update_fields['value'] = value store_or_update (cursor, 'parameter', fixed_fields, update_fields) ####################################################### # add trivial names to ncbi_taxonomy.names [all_species, ensembl_db_name] = get_species (cursor) feed_trivial_names (cursor, all_species) ####################################################### # add species shorthands (used in ENS* names formation) # though we will not needed unit the paralogue alignment reconstruction point) feed_name_shorthands (cursor, all_species) cursor.close() db.close()
def feed_name_shorthands (cursor, all_species): short = {} short['ailuropoda_melanoleuca'] = 'AME' short['anas_platyrhynchos'] = 'APL' short['anolis_carolinensis'] = 'ACA' short['astyanax_mexicanus'] = 'AMX' short['bos_taurus'] = 'BTA' short['callithrix_jacchus'] = 'CJA' short['canis_familiaris'] = 'CAF' short['cavia_porcellus'] = 'CPO' short['choloepus_hoffmanni'] = 'CHO' short['danio_rerio'] = 'DAR' short['dasypus_novemcinctus'] = 'DNO' short['dipodomys_ordii'] = 'DOR' short['echinops_telfairi'] = 'ETE' short['equus_caballus'] = 'ECA' short['erinaceus_europaeus'] = 'EEU' short['felis_catus'] = 'FCA' short['ficedula_albicollis'] = 'FAL' short['gadus_morhua'] = 'GMO' short['gallus_gallus'] = 'GAL' short['gasterosteus_aculeatus'] = 'GAC' short['gorilla_gorilla'] = 'GGO' short['homo_sapiens'] = '' short['ictidomys_tridecemlineatus'] = 'STO' short['latimeria_chalumnae'] = 'LAC' short['lepisosteus_oculatus'] = 'LOC' short['loxodonta_africana'] = 'LAF' short['macaca_mulatta'] = 'MMU' short['macropus_eugenii'] = 'MEU' short['meleagris_gallopavo'] = 'MGA' short['microcebus_murinus'] = 'MIC' short['monodelphis_domestica'] = 'MOD' short['mus_musculus'] = 'MUS' short['mustela_putorius_furo'] = 'MPU' short['myotis_lucifugus'] = 'MLU' short['nomascus_leucogenys'] = 'NLE' short['ochotona_princeps'] = 'OPR' short['oreochromis_niloticus'] = 'ONI' short['ornithorhynchus_anatinus'] = 'OAN' short['oryctolagus_cuniculus'] = 'OCU' short['oryzias_latipes'] = 'ORL' short['ovis_aries'] = 'OAR' short['otolemur_garnettii'] = 'OGA' short['pan_troglodytes'] = 'PTR' short['papio_anubis'] = 'PAN' short['poecilia_formosa'] = 'PFO' short['pelodiscus_sinensis'] = 'PSI' short['petromyzon_marinus'] = 'PMA' short['pongo_abelii'] = 'PPY' short['procavia_capensis'] = 'PCA' short['pteropus_vampyrus'] = 'PVA' short['rattus_norvegicus'] = 'RNO' short['sarcophilus_harrisii'] = 'SHA' short['sorex_araneus'] = 'SAR' short['sus_scrofa'] = 'SSC' short['taeniopygia_guttata'] = 'TGU' short['takifugu_rubripes'] = 'TRU' short['tarsius_syrichta'] = 'TSY' short['tetraodon_nigroviridis'] = 'TNI' short['tupaia_belangeri'] = 'TBE' short['tursiops_truncatus'] = 'TTR' short['vicugna_pacos'] = 'VPA' short['xenopus_tropicalis'] = 'XET' short['xiphophorus_maculatus'] = 'XMA' db_name = get_compara_name (cursor) qry = "use %s " % db_name search_db (cursor, qry) table = 'species_name_shorthands' # if the table does not exist, make it if not check_table_exists (cursor, db_name, table): qry = "CREATE TABLE " + table + " (id INT(10) PRIMARY KEY AUTO_INCREMENT)" rows = search_db (cursor, qry) if (rows): return False qry = "ALTER TABLE %s ADD %s VARCHAR(100)" % (table, 'species') rows = search_db (cursor, qry) if (rows): return False qry = "ALTER TABLE %s ADD %s VARCHAR(10)" % (table, 'shorthand') rows = search_db (cursor, qry) if (rows): return False for species in all_species: if short.has_key(species): fixed_fields = {} update_fields = {} fixed_fields ['species'] = species update_fields ['shorthand'] = short[species] store_or_update (cursor, table, fixed_fields, update_fields) else: print "short for ", species, " not found " short[species] = ""