def create_tag_cloud(prm, filename_json): if prm < 0: prm_least_flag = True else: prm_least_flag = False jsonData = format_json.loadjson(filename_json) taglist = [] for key in jsonData.keys(): taglist.append(jsonData[key]["tags"]) tag_count = collections.Counter(itertools.chain.from_iterable(taglist)) deltemp = tag_count.most_common(1) tag_count.pop(deltemp[0][0]) G = nx.Graph() if prm == 0: temp_most_common = tag_count.most_common() str_prm = "all" else: if prm_least_flag: temp_most_common = tag_count.most_common()[prm:] else: temp_most_common = tag_count.most_common(prm) str_prm = str(prm) G.add_nodes_from([(tag, { "count": count }) for tag, count in temp_most_common]) G = add_edges(G, taglist) define_plt_pram(G) filename_plot = get_filename_savefig(filename_json) + "_CLOUD_" + str_prm plot_save_nx(G, filename_plot)
def loadjson(filename): list_dict_results = [] dict_json = format_json.loadjson(filename) if dict_json == -1: print("error: Not Found file. Create new file") if ".json" not in filename: filename += ".json" savejson(list_dict_results, filename) else: for key in dict_json.keys(): list_dict_results.append(dict_json[key]) return list_dict_results
def all_tags_list(filename_json): taglist = [] jsonData = format_json.loadjson(filename_json) for key in jsonData.keys(): taglist.append(jsonData[key]["tags"]) tag_count = collections.Counter(itertools.chain.from_iterable(taglist)) tag_most = tag_count.most_common() result = [] for e in tag_most: result.append(e[0]) return result
def create_year_map(filename_json): jsonData = format_json.loadjson(filename_json) taglist = [] for key in jsonData.keys(): temp = jsonData[key]["tags"] year = jsonData[key]["year"][-4:] if year != "None": temp.append(int(year)) taglist.append(temp) min_year = 3000 dict_taglist = {} for e in taglist: if e[-1] in dict_taglist.keys(): temp = dict_taglist[e[-1]] set_temp = set(e[1:-1]) temp = temp | set_temp dict_taglist[e[-1]] = temp else: temp = set(e[1:-1]) dict_taglist[e[-1]] = temp if min_year > e[-1]: min_year = e[-1] tag_count = collections.Counter(itertools.chain.from_iterable(taglist)) main_tag = tag_count.most_common(2)[1][0] G = nx.Graph() all_tags = all_tags_list(filename_json) temp = [] for e in dict_taglist.keys(): tags = dict_taglist[e] for t in tags: temp.append((str(e) + "_" + t, {"year": e, "tag": t})) G.add_nodes_from(temp) plt.figure(figsize=(50, 50)) #pos = nx.spring_layout(G, k=1.5) pos = {} for e in temp: pos[e[0]] = ((e[1]["year"] - min_year) * 1000, all_tags.index(e[1]["tag"]) * 100) nx.draw_networkx_nodes(G, pos, node_color='b', alpha=0.2, node_size=30, font_weight="bold", font_family='VL Gothic') nx.draw_networkx_labels(G, pos, fontsize=9) keys = list(dict_taglist.keys()) b_tags = dict_taglist[keys[0]] for i in range(1, len(keys)): a_tags = dict_taglist[keys[i]] same_tags = b_tags & a_tags for s in same_tags: G.add_edge(str(keys[i - 1]) + "_" + s, str(keys[i]) + "_" + s, weight=1) b_tags = a_tags nx.draw_networkx_edges(G, pos, alpha=0.4, edge_color='r', width=2) filename_plot = get_filename_savefig(filename_json) plot_save_nx(G, filename_plot)
for key in jsondata.keys(): sentence = jsondata[key]["summary"] token = nltk.word_tokenize(sentence) text = nltk.Text(token) fdist = nltk.FreqDist(w.lower() for w in text if w.lower() not in stopwords + symbols) tagged = nltk.pos_tag(fdist) temp = nltk.FreqDist( stemmer.stem(w[0]) for w in tagged if w[1] not in exclude_list) jsondata[key]["tags"] = temp return jsondata # ---- ---- # main if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('Filename', metavar='F', type=str, nargs='+', help='Filename for saving data') args = parser.parse_args() FILENAME_JSON = args.Filename[0] jsondata = format_json.loadjson(FILENAME_JSON) jsondata = auto_tagging(jsondata) format_json.savejson_dict(jsondata, FILENAME_JSON)