Exemplo n.º 1
0
def runExtractUniProt(species,species_full,uniprot_filename_url,trembl_filename_url,force):
    global uniprot_ensembl_db;uniprot_ensembl_db={}
    global uniprot_db;uniprot_db={}; global species_name; global uniprot_fildir
    global secondary_to_primary_db; secondary_to_primary_db={}
    import update; reload(update)
    
    species_name = species_full
    
    import UI; species_names = UI.getSpeciesInfo()
    species_full = species_names[species]
    species_full = string.replace(species_full,' ','_')

    uniprot_file = string.split(uniprot_filename_url,'/')[-1]; uniprot_file = string.replace(uniprot_file,'.gz','')
    trembl_file = string.split(trembl_filename_url,'/')[-1]; trembl_file = string.replace(trembl_file,'.gz','')
    uniprot_fildir = 'AltDatabase/uniprot/'+species+'/'
    uniprot_download_fildir = 'AltDatabase/uniprot/'
    uniprot_ens_file = species+'_Ensembl-UniProt.txt'; uniprot_ens_location = uniprot_fildir+uniprot_ens_file
    uniprot_location = uniprot_download_fildir+uniprot_file
    trembl_location = uniprot_download_fildir+trembl_file

    add_trembl_annotations = 'no' ### Currently we don't need these annotations    
    try: importEnsemblUniprot(uniprot_ens_location)
    except IOError:
        try:
            ### Download the data from the AltAnalyze website (if there)
            update.downloadCurrentVersion(uniprot_ens_location,species,'txt')
            importEnsemblUniprot(uniprot_ens_location)
        except Exception: null=[]
    try:
        uniprot_ens_location_built = string.replace(uniprot_ens_location,'UniProt','Uniprot-SWISSPROT')
        uniprot_ens_location_built = string.replace(uniprot_ens_location_built,'uniprot','Uniprot-SWISSPROT')
        importEnsemblUniprot(uniprot_ens_location_built)
    except Exception: null=[]
    
    ### Import UniProt annotations
    counts = update.verifyFile(uniprot_location,'counts')
    if force == 'no' or counts > 8: import_uniprot_db(uniprot_location)
    else:
        ### Directly download the data from UniProt
        gz_filepath, status = update.download(uniprot_filename_url,uniprot_download_fildir,'')

        if status == 'not-removed':
            try: os.remove(gz_filepath) ### Not sure why this works now and not before
            except OSError: status = status     
        import_uniprot_db(uniprot_location)
        
    if add_trembl_annotations == 'yes':
        ### Import TreMBL annotations
        try:
            if force == 'yes': uniprot_location += '!!!!!' ### Force an IOError
            import_uniprot_db(trembl_location)
        except IOError:
            ### Directly download the data from UniProt
            update.download(trembl_filename_url,uniprot_download_fildir,'')
            import_uniprot_db(trembl_location)        
    export()
    exportEnsemblUniprot(uniprot_ens_location)
Exemplo n.º 2
0
def runExtractUniProt(species,species_full,uniprot_filename_url,trembl_filename_url,force):
    global uniprot_ensembl_db;uniprot_ensembl_db={}
    global uniprot_db;uniprot_db={}; global species_name; global uniprot_fildir
    global secondary_to_primary_db; secondary_to_primary_db={}
    import update; reload(update)
    
    species_name = species_full
    
    import UI; species_names = UI.getSpeciesInfo()
    species_full = species_names[species]
    species_full = string.replace(species_full,' ','_')

    uniprot_file = string.split(uniprot_filename_url,'/')[-1]; uniprot_file = string.replace(uniprot_file,'.gz','')
    trembl_file = string.split(trembl_filename_url,'/')[-1]; trembl_file = string.replace(trembl_file,'.gz','')
    uniprot_fildir = 'AltDatabase/uniprot/'+species+'/'
    uniprot_download_fildir = 'AltDatabase/uniprot/'
    uniprot_ens_file = species+'_Ensembl-UniProt.txt'; uniprot_ens_location = uniprot_fildir+uniprot_ens_file
    uniprot_location = uniprot_download_fildir+uniprot_file
    trembl_location = uniprot_download_fildir+trembl_file

    add_trembl_annotations = 'no' ### Currently we don't need these annotations    
    try: importEnsemblUniprot(uniprot_ens_location)
    except IOError:
        try:
            ### Download the data from the AltAnalyze website (if there)
            update.downloadCurrentVersion(uniprot_ens_location,species,'txt')
            importEnsemblUniprot(uniprot_ens_location)
        except Exception: null=[]
    try:
        uniprot_ens_location_built = string.replace(uniprot_ens_location,'UniProt','Uniprot-SWISSPROT')
        uniprot_ens_location_built = string.replace(uniprot_ens_location_built,'uniprot','Uniprot-SWISSPROT')
        importEnsemblUniprot(uniprot_ens_location_built)
    except Exception: null=[]
    
    ### Import UniProt annotations
    counts = update.verifyFile(uniprot_location,'counts')
    if force == 'no' or counts > 8: import_uniprot_db(uniprot_location)
    else:
        ### Directly download the data from UniProt
        gz_filepath, status = update.download(uniprot_filename_url,uniprot_download_fildir,'')

        if status == 'not-removed':
            try: os.remove(gz_filepath) ### Not sure why this works now and not before
            except OSError: status = status     
        import_uniprot_db(uniprot_location)
        
    if add_trembl_annotations == 'yes':
        ### Import TreMBL annotations
        try:
            if force == 'yes': uniprot_location += '!!!!!' ### Force an IOError
            import_uniprot_db(trembl_location)
        except IOError:
            ### Directly download the data from UniProt
            update.download(trembl_filename_url,uniprot_download_fildir,'')
            import_uniprot_db(trembl_location)        
    export()
    exportEnsemblUniprot(uniprot_ens_location)
def main(argv):
    parser = argparse.ArgumentParser(description="Utilities for Arabic Sentiment Analysis")
    subparsers = parser.add_subparsers(dest="cmd", help="Commands")

    import_parser = subparsers.add_parser("import", help="Import files to IR")
    import_parser.add_argument("-t", dest="type", choices=["csv", "xml", "flat"], required=True)
    import_parser.add_argument("-o", dest="output", required=True, help="Output IR file")
    import_parser.add_argument("input", help="Input file")

    tok_parser = subparsers.add_parser("tok", help="Tokenize and morphologically analyze documents in IR format")
    tok_parser.add_argument("-l", dest="lang", choices=["en", "ar"], default="ar", help="Tokenization language.")
    tok_parser.add_argument("-o", dest="output", required=True, help="Output IR file")
    tok_parser.add_argument("input", help="Input IR file")

    subsample_parser = subparsers.add_parser("sub", help="Subsample corpus in IR format")
    subsample_parser.add_argument("-n", dest="num", type=int, required=True, help="Number of documents to sample")
    subsample_parser.add_argument("-o", dest="output", required=True, help="Output IR file")
    subsample_parser.add_argument("input", help="Input IR file")

    cv_parser = subparsers.add_parser("cv", help="Generate CV folds from IR format")
    cv_parser.add_argument("-n", dest="num", type=int, required=True, help="Number of folds")
    cv_parser.add_argument("-s", dest="split", type=float, required=True, help="Split [0, 1]")
    cv_parser.add_argument("-o", dest="output", required=True, help="Output directory")
    cv_parser.add_argument("input", help="Input IR file")

    export_parser = subparsers.add_parser("export", help="Export from IR to desired format")
    export_parser.add_argument("-c", dest="cv", action="store_true", help="Specify that input cross validation.")
    export_parser.add_argument("-l", dest="lang", default="ar", help="Language (en, ar)")
    export_parser.add_argument("-n", dest="name", default="arbooks", help="Corpus name")
    export_parser.add_argument("-o", dest="output", required=True, help="Output file or directory")
    export_parser.add_argument("-t", dest="type", choices=["itm", "shlda", "svml", "mlt"], required=True)
    export_parser.add_argument("input", help="Input IR file")

    arguments = parser.parse_args(argv)
    #print arguments

    if arguments.cmd == "import":
        arasent_import(arguments.type, arguments.input, arguments.output);
    elif arguments.cmd == "tok":
        tokenize(arguments.input, arguments.output, arguments.lang);
    elif arguments.cmd == "sub":
        main_subsample(arguments.num, arguments.input, arguments.output);
    elif arguments.cmd == "cv":
        prep_cv(arguments.input, arguments.output, arguments.num, arguments.split);
    elif arguments.cmd == "export":
        export(arguments.cv, arguments.type, arguments.input, arguments.output, arguments.name, arguments.lang);
Exemplo n.º 4
0
 def start(self, gauge):
     global PROGRESS_MAX
     count = 0 
     while count < PROGRESS_MAX and gauge.isValid() == True:
         wurl = self.url + '&start=' + str(count * 15) 
         try:
             getContent(wurl, self.rankList)
         except Timeout:
             continue
         except HTTPError:
             break
         wx.CallAfter(gauge.UpdateGauge, count, "%i of %i"%(count, PROGRESS_MAX))
         count += 1
     if gauge.isValid() == True:
         filepath = '../export_files/booklist_of_' + self.key_word
         export(self.rankList, filepath)
         box = wx.MessageDialog(None, 'Done!', 'Successfully Exported', wx.OK)
         box.ShowModal()
         box.Destroy()
     gauge.Destroy()
Exemplo n.º 5
0
 def start(self, gauge):
     global counter
     index = 0
     counter = 0
     thread0 = threading.Thread(target = self.getContent, args = (index, gauge))
     index += 10
     thread1 = threading.Thread(target = self.getContent, args = (index, gauge))
     index += 10
     thread2 = threading.Thread(target = self.getContent, args = (index, gauge))
     thread0.start()
     thread1.start()
     thread2.start()
     thread0.join()
     thread1.join()
     thread2.join()
     if gauge.isValid() == True:
         filepath = '../export_files/booklist_of_' + self.key_word
         export(self.rankList, filepath)
         box = wx.MessageDialog(None, 'Done!', 'Successfully Exported', wx.OK)
         box.ShowModal()
         box.Destroy()
     gauge.Destroy()
Exemplo n.º 6
0
##Make note import into array later and change Generate note to accept array only; use *argv or something

#newInstrument()
#instrument1() #Change later to GenerateInstrument(number of instruments)
#addInstrumentToSong()

#instrument2()

#GenerateNote(64, 1567.98)
#GenerateNote(16,note("80"))
#addInstrumentToSong()

GenerateNote(instrumentC,
             [[32, [note("rest")]], [32, [note("C4")]], [32, [note("C5")]]])
#GenerateNote(randomInstrument(20),randomNotes(16,16))
export()

####Execution time####
print "My program took", time.time(
) - start_time, "seconds to run (Excluding plotting time)"
from evaluate import song
print "Computation time for song length", (len(song) / 44100) / (
    time.time() - start_time), "%"
####Execution time####

plot()

print ""
'''
	When you try to divide a list by a real number, python says "you are crazy! You can't do that." The array is like a vector. If you divide it by a real number, each "thing" in there is divided by that number. This can be super useful.
'''
            except AttributeError:
                rate = 0
            dic = {'title': title, 'pub': pubinfo, 'read': pl, 'rate': rate}
            lock.acquire()
            try:
                rankList.append(dic)
            finally:
                lock.release()
        index += 1
        i += 1

if __name__ == '__main__':
    url = "https://book.douban.com/subject_search?search_text="
    key_word = raw_input('key word:')
    tag = quote(key_word.encode('utf-8'))
    url = url + tag
    rankList = []
    index = 0
    thread0 = threading.Thread(target = getContent, args = (url, index, rankList))
    index += 10
    thread1 = threading.Thread(target = getContent, args = (url, index, rankList))
    index += 10
    thread2 = threading.Thread(target = getContent, args = (url, index, rankList))
    thread0.start()
    thread1.start()
    thread2.start()
    thread0.join()
    thread1.join()
    thread2.join()
    export(rankList, key_word)
Exemplo n.º 8
0
def test_export(tmp_path, example_data):
    # Exercise the exporter on the myriad cases parametrized in example_data.
    documents = example_data()
    export(documents, tmp_path)
Exemplo n.º 9
0
    PD_s_dev,
    "AUC": [
        PD_AUC_dev, PD_AUC_val, PD_s_val, PD_AUC_S, PD_AUC_p, "yes", 0, 0, 0,
        PD_s_dev
    ],
    "customer_migrations": [upper_MWB, lower_MWB],
    "concentration_rating_grades": [HI_init, HI_curr, cr_pval, HI_curr_exp],
    "stability_migration_matrix": [transition_matrix_freq, z, z_pval],
    "avg_PD":
    development_set.groupby("grade").PD.mean().values,
    "nb_cust":
    development_set.grade.value_counts().sort_index().values,
    "orgExp_Grade":
    development_set.groupby("grade").original_exposure.sum().values,
}
export().PD_toExcel(PD_excel_input)

LGD_excel_inputs = {
    "predictive_ability": [LGD_backtesting_ptf, LGD_backtesting_perGrade],
    "AUC": [
        LGD_gAUC_init, LGD_gAUC_curr, LGD_S, LGD_curr_var, LGD_init_var,
        LGD_p_val
    ],
    "stability_migration_matrix": [z_up, z_low, zUP_pval, zDOWN_pval],
}
export().LGD_toExcel(development_set, LGD_excel_inputs)

CCF_excel_inputs = {
    "predictive_ability": [CCF_backtesting_ptf, CCF_backtesting_perGrade],
    "AUC": [
        CCF_gAUC_init, CCF_gAUC_curr, CCF_S, CCF_curr_var, CCF_p_val,