Exemplo n.º 1
0
def Flask_process_text(text0, module="H810", task="TMA01"):
    essay = top_level_procedure(text0, None, None, None, "NVL", module, task)

    # reformat n-grams into unified structure
    keylemmas = essay['ke_data']['keylemmas']
    bigram_keyphrases = essay['ke_data']['bigram_keyphrases']
    trigram_keyphrases = essay['ke_data']['trigram_keyphrases']
    quadgram_keyphrases = essay['ke_data']['quadgram_keyphrases']
    myarray_ke = essay['ke_data']['myarray_ke']

    scoresNfreqs = essay['ke_data']['scoresNfreqs']

    # Build an associative array out of the keywords list
    for (word, score, r, c) in scoresNfreqs:
        __mapkeyscore[word] = score

    nvl_data = {}
    nvl_data['keywords'] = lemmaToJSON(keylemmas, myarray_ke)
    nvl_data['bigrams'] = ngramToJSON(bigram_keyphrases, myarray_ke)
    nvl_data['trigrams'] = ngramToJSON(trigram_keyphrases, myarray_ke)
    nvl_data['quadgrams'] = ngramToJSON(quadgram_keyphrases, myarray_ke)
    essay['nvl_data'] = nvl_data

    # Get complete flat list of text's lemmas
    lemmas = [
        l for p in essay['se_data']['se_parasenttok'] for s in p
        for l in s['lemma']
    ]

    # build dispersion arrays for lemma
    for ngram in nvl_data['keywords']:
        hh = ngram['ngram'][0]
        kk = [idx for idx, w in enumerate(lemmas) if w == hh]
        ngram['dispersion'] = kk
        h, b = np.histogram(kk, bins=10, range=(0, len(lemmas)))
        ngram['trend'] = h.tolist()

    # build dispersion arrays for ngrams
    # TODO: dispersion does not work for key phrases; removed from data structure
    #setDispersionNgram(nvl_data['bigrams'],myarray_ke,lemmas)
    #setDispersionNgram(nvl_data['trigrams'],myarray_ke,lemmas)
    #setDispersionNgram(nvl_data['quadgrams'],myarray_ke,lemmas)

    return essay
Exemplo n.º 2
0
def Flask_process_text(text0, module="H810", task="TMA01"):
    essay = top_level_procedure(text0, None, None, None, "NVL", module, task)

    # reformat n-grams into unified structure
    keylemmas = essay["ke_data"]["keylemmas"]
    bigram_keyphrases = essay["ke_data"]["bigram_keyphrases"]
    trigram_keyphrases = essay["ke_data"]["trigram_keyphrases"]
    quadgram_keyphrases = essay["ke_data"]["quadgram_keyphrases"]
    myarray_ke = essay["ke_data"]["myarray_ke"]

    scoresNfreqs = essay["ke_data"]["scoresNfreqs"]

    # Build an associative array out of the keywords list
    for (word, score, r, c) in scoresNfreqs:
        __mapkeyscore[word] = score

    nvl_data = {}
    nvl_data["keywords"] = lemmaToJSON(keylemmas, myarray_ke)
    nvl_data["bigrams"] = ngramToJSON(bigram_keyphrases, myarray_ke)
    nvl_data["trigrams"] = ngramToJSON(trigram_keyphrases, myarray_ke)
    nvl_data["quadgrams"] = ngramToJSON(quadgram_keyphrases, myarray_ke)
    essay["nvl_data"] = nvl_data

    # Get complete flat list of text's lemmas
    lemmas = [l for p in essay["se_data"]["se_parasenttok"] for s in p for l in s["lemma"]]

    # build dispersion arrays for lemma
    for ngram in nvl_data["keywords"]:
        hh = ngram["ngram"][0]
        kk = [idx for idx, w in enumerate(lemmas) if w == hh]
        ngram["dispersion"] = kk
        h, b = np.histogram(kk, bins=10, range=(0, len(lemmas)))
        ngram["trend"] = h.tolist()

    # build dispersion arrays for ngrams
    # TODO: dispersion does not work for key phrases; removed from data structure
    # setDispersionNgram(nvl_data['bigrams'],myarray_ke,lemmas)
    # setDispersionNgram(nvl_data['trigrams'],myarray_ke,lemmas)
    # setDispersionNgram(nvl_data['quadgrams'],myarray_ke,lemmas)

    return essay
Exemplo n.º 3
0
def Flask_process_text(text0):
    essay = top_level_procedure(text0, None, None, None, "NVL","H810","TMA01")
    
    # reformat n-grams into unified structure
    keylemmas = essay['ke_data']['keylemmas']
    bigram_keyphrases = essay['ke_data']['bigram_keyphrases']
    trigram_keyphrases = essay['ke_data']['trigram_keyphrases']
    quadgram_keyphrases = essay['ke_data']['quadgram_keyphrases']
    myarray_ke = essay['ke_data']['myarray_ke']
    
    scoresNfreqs = essay['ke_data']['scoresNfreqs']
    
    # Build an associative array out of the keywords list    
    for (word,score,r,c) in scoresNfreqs:
        __mapkeyscore[word] = score
    
    nvl_data = {}
    nvl_data['keywords'] = lemmaToJSON(keylemmas,myarray_ke)
    nvl_data['bigrams'] = ngramToJSON(bigram_keyphrases,myarray_ke)
    nvl_data['trigrams'] = ngramToJSON(trigram_keyphrases,myarray_ke)
    nvl_data['quadgrams'] = ngramToJSON(quadgram_keyphrases,myarray_ke)
    essay['nvl_data'] = nvl_data
    return essay
Exemplo n.º 4
0
            essay_fname, 'r',
            encoding='utf-8')  # Open current essay file for reading
        essay_txt = f.read()  # Read in the essay and set to var 'essay_txt'
        f.close()  # Close the essay file
        string = essay_fname[:-4] + '_results' + '.txt'
        newfilename = os.path.join(tempdir1, string)
        nf = codecs.open(
            newfilename, 'w', encoding='utf-8'
        )  # Open 'newfilename' (for writing to) and set open file to var 'nf'

        #if dev == 'DGF':
        #nf2.write('\n') # Add blank lines to the essay results file
        nf2.write(str(
            essay_fname))  # Write the new file name to the essay results file
        nf2.write('; ')
        essay = top_level_procedure(essay_txt, essay_fname, nf, nf2, dev,
                                    "H810", "TMA01")

        ##        #############################
        ##        #############################
        ##        ### This section is for drawing figures. Comment it in, and comment out previous line. Also change 'return' line in 'top_level_procedure'.
        ##        #############################
        ##        #############################
        ##        essay, gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01")
        ##        string = essay_fname[:-4] + '_gr_se_sample_nodes' + '.png'
        ##        figurefilename = os.path.join(tempdir1, string)
        ##        #pos=nx.circular_layout(gr_se_sample)
        ##        #pos=nx.graphviz_layout(gr_se_sample,prog="neato")
        ##        plt.figure(1, figsize=(8,8))
        ##        x = gr_se_sample.nodes()
        ##        #plt.title(essay_fname)
        ##        nx.draw(gr_se_sample, font_size=0, font_color='c', font_weight='normal',\
Exemplo n.º 5
0
        )  # Open 'newfilename' (for writing to) and set open file to var 'nf'

        #if dev == 'DGF':
        #nf2.write('\n') # Add blank lines to the essay results file
        nf2.write(str(
            essay_fname))  # Write the new file name to the essay results file
        nf2.write('; ')
        #essay = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01")

        #############################
        #############################
        ### This section is for drawing diagrams. Comment it in, and comment out previous line. Also change 'return' line in 'top_level_procedure'.
        #############################
        #############################
        #        essay, gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") # xxxx 'essay' argument not needed here, only running EssayAnalyser not OpenEssayist, next line down is normal version
        gr_se_sample, gr_ke_sample = top_level_procedure(
            essay_txt, essay_fname, nf, nf2, dev, "H810", "TMA01")
        #print 'Key sentence subgraph for rainbow diagram'
        #print(gr_se_sample.adj)    # This is how you print a networkx graph
        string = essay_fname[:-4] + '_gr_se_sample_nodes' + '.png'
        figurefilename = os.path.join(tempdir1, string)
        #pos=nx.circular_layout(gr_se_sample)
        #pos=nx.graphviz_layout(gr_se_sample,prog="neato")
        plt.figure(1, figsize=(8, 8))
        x = gr_se_sample.nodes()
        print '\nNumber of nodes in key sentence subgraph for rainbow diagram = ', len(
            x)
        #print gr_se_sample.nodes(data = True)
        plt.title(essay_fname)
        nx.draw(gr_se_sample, font_size=0, font_color='c', font_weight='normal',\
        #nx.draw(gr_se_sample, font_size=5, font_color='b', font_weight='normal',\
                node_size=500,
Exemplo n.º 6
0
for essay_fname in filelist: # For each file in the current directory...
    #startfiletime = time() # Set current time to a variable for later calculations
    if essay_fname[-3:] == 'txt': # If a file name ends in 'txt'...
        print '\n', essay_fname # Print to shell to monitor progress
        f = codecs.open(essay_fname, 'r',encoding='utf-8') # Open current essay file for reading
        essay_txt = f.read() # Read in the essay and set to var 'essay_txt'
        f.close() # Close the essay file
        string = essay_fname[:-4] + '_results' + '.txt'
        newfilename = os.path.join(tempdir1, string)                     
        nf = codecs.open(newfilename, 'w',encoding='utf-8') # Open 'newfilename' (for writing to) and set open file to var 'nf'

        #if dev == 'DGF':
            #nf2.write('\n') # Add blank lines to the essay results file            
        nf2.write(str(essay_fname)) # Write the new file name to the essay results file
        nf2.write('; ')        
        essay = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01")

##        #############################
##        #############################
##        ### This section is for drawing figures. Comment it in, and comment out previous line. Also change 'return' line in 'top_level_procedure'.
##        #############################
##        #############################
##        essay, gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01")
##        string = essay_fname[:-4] + '_gr_se_sample_nodes' + '.png'
##        figurefilename = os.path.join(tempdir1, string)
##        #pos=nx.circular_layout(gr_se_sample)
##        #pos=nx.graphviz_layout(gr_se_sample,prog="neato")
##        plt.figure(1, figsize=(8,8))
##        x = gr_se_sample.nodes()
##        #plt.title(essay_fname)
##        nx.draw(gr_se_sample, font_size=0, font_color='c', font_weight='normal',\
Exemplo n.º 7
0
        newfilename = os.path.join(tempdir1, string)                     
        nf = codecs.open(newfilename, 'w',encoding='utf-8') # Open 'newfilename' (for writing to) and set open file to var 'nf'

        #if dev == 'DGF':
            #nf2.write('\n') # Add blank lines to the essay results file            
        nf2.write(str(essay_fname)) # Write the new file name to the essay results file
        nf2.write('; ')        
        #essay = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01")

        #############################
        #############################
        ### This section is for drawing diagrams. Comment it in, and comment out previous line. Also change 'return' line in 'top_level_procedure'.
        #############################
        #############################
#        essay, gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") # xxxx 'essay' argument not needed here, only running EssayAnalyser not OpenEssayist, next line down is normal version
        gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01")
        #print 'Key sentence subgraph for rainbow diagram'
        #print(gr_se_sample.adj)    # This is how you print a networkx graph
        string = essay_fname[:-4] + '_gr_se_sample_nodes' + '.png'
        figurefilename = os.path.join(tempdir1, string)
        #pos=nx.circular_layout(gr_se_sample)
        #pos=nx.graphviz_layout(gr_se_sample,prog="neato")
        plt.figure(1, figsize=(8,8))
        x = gr_se_sample.nodes()
        print '\nNumber of nodes in key sentence subgraph for rainbow diagram = ', len(x)      
        #print gr_se_sample.nodes(data = True)
        plt.title(essay_fname)
        nx.draw(gr_se_sample, font_size=0, font_color='c', font_weight='normal',\
        #nx.draw(gr_se_sample, font_size=5, font_color='b', font_weight='normal',\
                node_size=500,
                #stretch_factor=100,