Exemplo n.º 1
0
def test_bootstrap() :
    corpus = utils.get_data('SELECT id FROM refdesc WHERE abstract_keywords IS NOT NULL LIMIT 2000;','../../Data/dumps/20160126_cybergeo.sqlite3')
    for kwLimit in [50,100,200] :
        for subCorpusSize in [100,500,1000,2000] :
            bootstrapSize=25
            [relevantkw,relevant_dico,allkw] = bootstrap_subcorpuses(corpus,kwLimit,subCorpusSize,bootstrapSize)
            utils.export_dico_csv(relevant_dico,'res/conv_dico/bootstrap_relevantDico_kwLimit'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize)+'_bootstrapSize'+str(bootstrapSize),True)
            utils.export_list(relevantkw,'res/conv_kw/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize),False)
	    utils.export_dico_num_csv(relevantkw,'res/conv_tm/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize),False)
	    for i in range(len(allkw)) :
		    local_kw = allkw[i]
		    utils.export_list(local_kw.keys(),'res/conv_kw/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize)+'_run'+str(i),False)
		    utils.export_dico_num_csv(local_kw,'res/conv_tm/kw_'+str(kwLimit)+'_subCorpusSize'+str(subCorpusSize)+'_run'+str(i),False)
Exemplo n.º 2
0
def extract_relevant_cybergeo_fulltext(kwLimit):
    resdir = "res/cybergeo_full/"
    #
    corpus = utils.get_data(
        "SELECT id FROM cybergeo WHERE fulltext_keywords IS NOT NULL AND fulltext_keywords!='' LIMIT 10;", "mysql"
    )
    occurence_dicos = utils.import_kw_dico_req(
        "SELECT id,fulltext_keywords FROM cybergeo WHERE fulltext_keywords IS NOT NULL AND fulltext_keywords!='' LIMIT 10;",
        "mysql",
    )
    [relevantkw, relevant_dico] = kwFunctions.extract_relevant_keywords(corpus, kwLimit, occurence_dicos)
    # export as csv
    utils.export_dico_csv(relevant_dico, resdir + "relevantDico_kw" + str(kwLimit), False)
    export_dico_num_csv(relevantkw, resdir + "termhoods_kw" + str(kwLimit), False)
Exemplo n.º 3
0
def extract_relevant_cybergeo(kwLimit, database):
    corpus = utils.get_data(
        "SELECT cybergeo.id FROM refdesc INNER JOIN cybergeo ON cybergeo.id=refdesc.id WHERE abstract_keywords IS NOT NULL AND abstract_keywords!='';",
        database,
    )
    print(corpus)
    occurence_dicos = utils.import_kw_dico_req(
        "SELECT cybergeo.id,abstract_keywords FROM refdesc INNER JOIN cybergeo ON cybergeo.id=refdesc.id WHERE abstract_keywords IS NOT NULL AND abstract_keywords!='';",
        database,
    )
    print(occurence_dicos)
    [relevantkw, relevant_dico] = kwFunctions.extract_relevant_keywords(corpus, kwLimit, occurence_dicos)
    utils.export_dico_csv(relevant_dico, "res/cybergeo/relevantDico_kwLimit" + str(kwLimit), False)
    utils.export_dico_num_csv(relevantkw, "res/cybergeo/kw_" + str(kwLimit), False)
Exemplo n.º 4
0
def extract_relevant_cybergeo(kwLimit, database):
    corpus = utils.get_data(
        'SELECT cybergeo.id FROM refdesc INNER JOIN cybergeo ON cybergeo.id=refdesc.id WHERE abstract_keywords IS NOT NULL AND abstract_keywords!=\'\';',
        database)
    print(corpus)
    occurence_dicos = utils.import_kw_dico_req(
        'SELECT cybergeo.id,abstract_keywords FROM refdesc INNER JOIN cybergeo ON cybergeo.id=refdesc.id WHERE abstract_keywords IS NOT NULL AND abstract_keywords!=\'\';',
        database)
    print(occurence_dicos)
    [relevantkw, relevant_dico
     ] = kwFunctions.extract_relevant_keywords(corpus, kwLimit,
                                               occurence_dicos)
    utils.export_dico_csv(relevant_dico,
                          'res/cybergeo/relevantDico_kwLimit' + str(kwLimit),
                          False)
    utils.export_dico_num_csv(relevantkw, 'res/cybergeo/kw_' + str(kwLimit),
                              False)
Exemplo n.º 5
0
def extract_relevant_cybergeo_fulltext(kwLimit):
    resdir = 'res/cybergeo_full/'
    #
    corpus = utils.get_data(
        'SELECT id FROM cybergeo WHERE fulltext_keywords IS NOT NULL AND fulltext_keywords!=\'\' LIMIT 10;',
        'mysql')
    occurence_dicos = utils.import_kw_dico_req(
        'SELECT id,fulltext_keywords FROM cybergeo WHERE fulltext_keywords IS NOT NULL AND fulltext_keywords!=\'\' LIMIT 10;',
        'mysql')
    [relevantkw, relevant_dico
     ] = kwFunctions.extract_relevant_keywords(corpus, kwLimit,
                                               occurence_dicos)
    # export as csv
    utils.export_dico_csv(relevant_dico,
                          resdir + 'relevantDico_kw' + str(kwLimit), False)
    export_dico_num_csv(relevantkw, resdir + 'termhoods_kw' + str(kwLimit),
                        False)