Exemplo n.º 1
0
def create_tag_cloud(prm, filename_json):
    if prm < 0:
        prm_least_flag = True
    else:
        prm_least_flag = False

    jsonData = format_json.loadjson(filename_json)
    taglist = []
    for key in jsonData.keys():
        taglist.append(jsonData[key]["tags"])
    tag_count = collections.Counter(itertools.chain.from_iterable(taglist))
    deltemp = tag_count.most_common(1)
    tag_count.pop(deltemp[0][0])

    G = nx.Graph()
    if prm == 0:
        temp_most_common = tag_count.most_common()
        str_prm = "all"
    else:
        if prm_least_flag:
            temp_most_common = tag_count.most_common()[prm:]
        else:
            temp_most_common = tag_count.most_common(prm)
        str_prm = str(prm)
    G.add_nodes_from([(tag, {
        "count": count
    }) for tag, count in temp_most_common])
    G = add_edges(G, taglist)
    define_plt_pram(G)

    filename_plot = get_filename_savefig(filename_json) + "_CLOUD_" + str_prm
    plot_save_nx(G, filename_plot)
def loadjson(filename):
  list_dict_results = []
  dict_json = format_json.loadjson(filename)
  if dict_json == -1:
    print("error: Not Found file. Create new file")
    if ".json" not in filename:
      filename += ".json"
    savejson(list_dict_results, filename)
  else:
    for key in dict_json.keys():
      list_dict_results.append(dict_json[key])
  return list_dict_results
Exemplo n.º 3
0
def all_tags_list(filename_json):
    taglist = []
    jsonData = format_json.loadjson(filename_json)
    for key in jsonData.keys():
        taglist.append(jsonData[key]["tags"])
    tag_count = collections.Counter(itertools.chain.from_iterable(taglist))
    tag_most = tag_count.most_common()

    result = []
    for e in tag_most:
        result.append(e[0])
    return result
Exemplo n.º 4
0
def create_year_map(filename_json):
    jsonData = format_json.loadjson(filename_json)
    taglist = []
    for key in jsonData.keys():
        temp = jsonData[key]["tags"]
        year = jsonData[key]["year"][-4:]
        if year != "None":
            temp.append(int(year))
            taglist.append(temp)

    min_year = 3000
    dict_taglist = {}
    for e in taglist:
        if e[-1] in dict_taglist.keys():
            temp = dict_taglist[e[-1]]
            set_temp = set(e[1:-1])
            temp = temp | set_temp
            dict_taglist[e[-1]] = temp
        else:
            temp = set(e[1:-1])
            dict_taglist[e[-1]] = temp
        if min_year > e[-1]:
            min_year = e[-1]

    tag_count = collections.Counter(itertools.chain.from_iterable(taglist))
    main_tag = tag_count.most_common(2)[1][0]

    G = nx.Graph()

    all_tags = all_tags_list(filename_json)
    temp = []
    for e in dict_taglist.keys():
        tags = dict_taglist[e]
        for t in tags:
            temp.append((str(e) + "_" + t, {"year": e, "tag": t}))
    G.add_nodes_from(temp)
    plt.figure(figsize=(50, 50))
    #pos = nx.spring_layout(G, k=1.5)

    pos = {}
    for e in temp:
        pos[e[0]] = ((e[1]["year"] - min_year) * 1000,
                     all_tags.index(e[1]["tag"]) * 100)

    nx.draw_networkx_nodes(G,
                           pos,
                           node_color='b',
                           alpha=0.2,
                           node_size=30,
                           font_weight="bold",
                           font_family='VL Gothic')
    nx.draw_networkx_labels(G, pos, fontsize=9)

    keys = list(dict_taglist.keys())
    b_tags = dict_taglist[keys[0]]
    for i in range(1, len(keys)):
        a_tags = dict_taglist[keys[i]]
        same_tags = b_tags & a_tags
        for s in same_tags:
            G.add_edge(str(keys[i - 1]) + "_" + s,
                       str(keys[i]) + "_" + s,
                       weight=1)
        b_tags = a_tags

    nx.draw_networkx_edges(G, pos, alpha=0.4, edge_color='r', width=2)
    filename_plot = get_filename_savefig(filename_json)
    plot_save_nx(G, filename_plot)
Exemplo n.º 5
0
    for key in jsondata.keys():
        sentence = jsondata[key]["summary"]
        token = nltk.word_tokenize(sentence)
        text = nltk.Text(token)
        fdist = nltk.FreqDist(w.lower() for w in text
                              if w.lower() not in stopwords + symbols)
        tagged = nltk.pos_tag(fdist)
        temp = nltk.FreqDist(
            stemmer.stem(w[0]) for w in tagged if w[1] not in exclude_list)
        jsondata[key]["tags"] = temp
    return jsondata


# ---- ----
# main

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('Filename',
                        metavar='F',
                        type=str,
                        nargs='+',
                        help='Filename for saving data')
    args = parser.parse_args()

    FILENAME_JSON = args.Filename[0]
    jsondata = format_json.loadjson(FILENAME_JSON)

    jsondata = auto_tagging(jsondata)
    format_json.savejson_dict(jsondata, FILENAME_JSON)