def Flask_process_text(text0, module="H810", task="TMA01"): essay = top_level_procedure(text0, None, None, None, "NVL", module, task) # reformat n-grams into unified structure keylemmas = essay['ke_data']['keylemmas'] bigram_keyphrases = essay['ke_data']['bigram_keyphrases'] trigram_keyphrases = essay['ke_data']['trigram_keyphrases'] quadgram_keyphrases = essay['ke_data']['quadgram_keyphrases'] myarray_ke = essay['ke_data']['myarray_ke'] scoresNfreqs = essay['ke_data']['scoresNfreqs'] # Build an associative array out of the keywords list for (word, score, r, c) in scoresNfreqs: __mapkeyscore[word] = score nvl_data = {} nvl_data['keywords'] = lemmaToJSON(keylemmas, myarray_ke) nvl_data['bigrams'] = ngramToJSON(bigram_keyphrases, myarray_ke) nvl_data['trigrams'] = ngramToJSON(trigram_keyphrases, myarray_ke) nvl_data['quadgrams'] = ngramToJSON(quadgram_keyphrases, myarray_ke) essay['nvl_data'] = nvl_data # Get complete flat list of text's lemmas lemmas = [ l for p in essay['se_data']['se_parasenttok'] for s in p for l in s['lemma'] ] # build dispersion arrays for lemma for ngram in nvl_data['keywords']: hh = ngram['ngram'][0] kk = [idx for idx, w in enumerate(lemmas) if w == hh] ngram['dispersion'] = kk h, b = np.histogram(kk, bins=10, range=(0, len(lemmas))) ngram['trend'] = h.tolist() # build dispersion arrays for ngrams # TODO: dispersion does not work for key phrases; removed from data structure #setDispersionNgram(nvl_data['bigrams'],myarray_ke,lemmas) #setDispersionNgram(nvl_data['trigrams'],myarray_ke,lemmas) #setDispersionNgram(nvl_data['quadgrams'],myarray_ke,lemmas) return essay
def Flask_process_text(text0, module="H810", task="TMA01"): essay = top_level_procedure(text0, None, None, None, "NVL", module, task) # reformat n-grams into unified structure keylemmas = essay["ke_data"]["keylemmas"] bigram_keyphrases = essay["ke_data"]["bigram_keyphrases"] trigram_keyphrases = essay["ke_data"]["trigram_keyphrases"] quadgram_keyphrases = essay["ke_data"]["quadgram_keyphrases"] myarray_ke = essay["ke_data"]["myarray_ke"] scoresNfreqs = essay["ke_data"]["scoresNfreqs"] # Build an associative array out of the keywords list for (word, score, r, c) in scoresNfreqs: __mapkeyscore[word] = score nvl_data = {} nvl_data["keywords"] = lemmaToJSON(keylemmas, myarray_ke) nvl_data["bigrams"] = ngramToJSON(bigram_keyphrases, myarray_ke) nvl_data["trigrams"] = ngramToJSON(trigram_keyphrases, myarray_ke) nvl_data["quadgrams"] = ngramToJSON(quadgram_keyphrases, myarray_ke) essay["nvl_data"] = nvl_data # Get complete flat list of text's lemmas lemmas = [l for p in essay["se_data"]["se_parasenttok"] for s in p for l in s["lemma"]] # build dispersion arrays for lemma for ngram in nvl_data["keywords"]: hh = ngram["ngram"][0] kk = [idx for idx, w in enumerate(lemmas) if w == hh] ngram["dispersion"] = kk h, b = np.histogram(kk, bins=10, range=(0, len(lemmas))) ngram["trend"] = h.tolist() # build dispersion arrays for ngrams # TODO: dispersion does not work for key phrases; removed from data structure # setDispersionNgram(nvl_data['bigrams'],myarray_ke,lemmas) # setDispersionNgram(nvl_data['trigrams'],myarray_ke,lemmas) # setDispersionNgram(nvl_data['quadgrams'],myarray_ke,lemmas) return essay
def Flask_process_text(text0): essay = top_level_procedure(text0, None, None, None, "NVL","H810","TMA01") # reformat n-grams into unified structure keylemmas = essay['ke_data']['keylemmas'] bigram_keyphrases = essay['ke_data']['bigram_keyphrases'] trigram_keyphrases = essay['ke_data']['trigram_keyphrases'] quadgram_keyphrases = essay['ke_data']['quadgram_keyphrases'] myarray_ke = essay['ke_data']['myarray_ke'] scoresNfreqs = essay['ke_data']['scoresNfreqs'] # Build an associative array out of the keywords list for (word,score,r,c) in scoresNfreqs: __mapkeyscore[word] = score nvl_data = {} nvl_data['keywords'] = lemmaToJSON(keylemmas,myarray_ke) nvl_data['bigrams'] = ngramToJSON(bigram_keyphrases,myarray_ke) nvl_data['trigrams'] = ngramToJSON(trigram_keyphrases,myarray_ke) nvl_data['quadgrams'] = ngramToJSON(quadgram_keyphrases,myarray_ke) essay['nvl_data'] = nvl_data return essay
essay_fname, 'r', encoding='utf-8') # Open current essay file for reading essay_txt = f.read() # Read in the essay and set to var 'essay_txt' f.close() # Close the essay file string = essay_fname[:-4] + '_results' + '.txt' newfilename = os.path.join(tempdir1, string) nf = codecs.open( newfilename, 'w', encoding='utf-8' ) # Open 'newfilename' (for writing to) and set open file to var 'nf' #if dev == 'DGF': #nf2.write('\n') # Add blank lines to the essay results file nf2.write(str( essay_fname)) # Write the new file name to the essay results file nf2.write('; ') essay = top_level_procedure(essay_txt, essay_fname, nf, nf2, dev, "H810", "TMA01") ## ############################# ## ############################# ## ### This section is for drawing figures. Comment it in, and comment out previous line. Also change 'return' line in 'top_level_procedure'. ## ############################# ## ############################# ## essay, gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") ## string = essay_fname[:-4] + '_gr_se_sample_nodes' + '.png' ## figurefilename = os.path.join(tempdir1, string) ## #pos=nx.circular_layout(gr_se_sample) ## #pos=nx.graphviz_layout(gr_se_sample,prog="neato") ## plt.figure(1, figsize=(8,8)) ## x = gr_se_sample.nodes() ## #plt.title(essay_fname) ## nx.draw(gr_se_sample, font_size=0, font_color='c', font_weight='normal',\
) # Open 'newfilename' (for writing to) and set open file to var 'nf' #if dev == 'DGF': #nf2.write('\n') # Add blank lines to the essay results file nf2.write(str( essay_fname)) # Write the new file name to the essay results file nf2.write('; ') #essay = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") ############################# ############################# ### This section is for drawing diagrams. Comment it in, and comment out previous line. Also change 'return' line in 'top_level_procedure'. ############################# ############################# # essay, gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") # xxxx 'essay' argument not needed here, only running EssayAnalyser not OpenEssayist, next line down is normal version gr_se_sample, gr_ke_sample = top_level_procedure( essay_txt, essay_fname, nf, nf2, dev, "H810", "TMA01") #print 'Key sentence subgraph for rainbow diagram' #print(gr_se_sample.adj) # This is how you print a networkx graph string = essay_fname[:-4] + '_gr_se_sample_nodes' + '.png' figurefilename = os.path.join(tempdir1, string) #pos=nx.circular_layout(gr_se_sample) #pos=nx.graphviz_layout(gr_se_sample,prog="neato") plt.figure(1, figsize=(8, 8)) x = gr_se_sample.nodes() print '\nNumber of nodes in key sentence subgraph for rainbow diagram = ', len( x) #print gr_se_sample.nodes(data = True) plt.title(essay_fname) nx.draw(gr_se_sample, font_size=0, font_color='c', font_weight='normal',\ #nx.draw(gr_se_sample, font_size=5, font_color='b', font_weight='normal',\ node_size=500,
for essay_fname in filelist: # For each file in the current directory... #startfiletime = time() # Set current time to a variable for later calculations if essay_fname[-3:] == 'txt': # If a file name ends in 'txt'... print '\n', essay_fname # Print to shell to monitor progress f = codecs.open(essay_fname, 'r',encoding='utf-8') # Open current essay file for reading essay_txt = f.read() # Read in the essay and set to var 'essay_txt' f.close() # Close the essay file string = essay_fname[:-4] + '_results' + '.txt' newfilename = os.path.join(tempdir1, string) nf = codecs.open(newfilename, 'w',encoding='utf-8') # Open 'newfilename' (for writing to) and set open file to var 'nf' #if dev == 'DGF': #nf2.write('\n') # Add blank lines to the essay results file nf2.write(str(essay_fname)) # Write the new file name to the essay results file nf2.write('; ') essay = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") ## ############################# ## ############################# ## ### This section is for drawing figures. Comment it in, and comment out previous line. Also change 'return' line in 'top_level_procedure'. ## ############################# ## ############################# ## essay, gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") ## string = essay_fname[:-4] + '_gr_se_sample_nodes' + '.png' ## figurefilename = os.path.join(tempdir1, string) ## #pos=nx.circular_layout(gr_se_sample) ## #pos=nx.graphviz_layout(gr_se_sample,prog="neato") ## plt.figure(1, figsize=(8,8)) ## x = gr_se_sample.nodes() ## #plt.title(essay_fname) ## nx.draw(gr_se_sample, font_size=0, font_color='c', font_weight='normal',\
newfilename = os.path.join(tempdir1, string) nf = codecs.open(newfilename, 'w',encoding='utf-8') # Open 'newfilename' (for writing to) and set open file to var 'nf' #if dev == 'DGF': #nf2.write('\n') # Add blank lines to the essay results file nf2.write(str(essay_fname)) # Write the new file name to the essay results file nf2.write('; ') #essay = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") ############################# ############################# ### This section is for drawing diagrams. Comment it in, and comment out previous line. Also change 'return' line in 'top_level_procedure'. ############################# ############################# # essay, gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") # xxxx 'essay' argument not needed here, only running EssayAnalyser not OpenEssayist, next line down is normal version gr_se_sample, gr_ke_sample = top_level_procedure(essay_txt,essay_fname,nf,nf2,dev,"H810","TMA01") #print 'Key sentence subgraph for rainbow diagram' #print(gr_se_sample.adj) # This is how you print a networkx graph string = essay_fname[:-4] + '_gr_se_sample_nodes' + '.png' figurefilename = os.path.join(tempdir1, string) #pos=nx.circular_layout(gr_se_sample) #pos=nx.graphviz_layout(gr_se_sample,prog="neato") plt.figure(1, figsize=(8,8)) x = gr_se_sample.nodes() print '\nNumber of nodes in key sentence subgraph for rainbow diagram = ', len(x) #print gr_se_sample.nodes(data = True) plt.title(essay_fname) nx.draw(gr_se_sample, font_size=0, font_color='c', font_weight='normal',\ #nx.draw(gr_se_sample, font_size=5, font_color='b', font_weight='normal',\ node_size=500, #stretch_factor=100,