Exemplo n.º 1
0
    def export_to_graphml(self, filename: str):
        """Save the network as .graphml file.

        Parameters
        ----------
        filename
            Specify filename for exporting the graph.

        """
        nx.write_graphml_lxml(self.graph, filename)
Exemplo n.º 2
0
def main_entry_point(argv=None):
    arguments = docopt(__doc__, version=__version__, argv=argv)
    # Since there are no support for default positional arguments in
    # docopt yet. Might be useful for complex default values, too
    if arguments["<out-file>"] is None:
        arguments["<out-file>"] = "-"

    coref_tags = set([arguments["--tag"]])

    with smart_open(arguments["<in-file>"]) as in_stream:
        inpt_dict = json.load(in_stream)

    sys_antecedents = ((
        mention,
        ((
            antecedent["id"],
            float(antecedent["score"]),
            antecedent["gold"] in coref_tags,
        ) for antecedent in v["candidates"]
         if antecedent["sys"] in coref_tags),
    ) for mention, v in inpt_dict.items())

    if arguments["--algo"] == "best":
        sys_links = best_first(sys_antecedents)
    elif arguments["--algo"] == "all":
        sys_links = transitive(sys_antecedents)
    else:
        raise ValueError(f"Invalid algo: arguments['--algo']")

    S = nx.DiGraph()
    S.add_nodes_from((
        m,
        {
            "content":
            v["content"],
            "color": ("black" if any(a["gold"] in coref_tags
                                     for a in v["candidates"]) else "yellow"),
        },
    ) for m, v in inpt_dict.items())

    # S.add_edges_from(gold_antecedents(inpt_dict, coref_tags))
    S.add_edges_from(sys_links)
    with smart_open(arguments["<out-file>"], "wb") as out_stream:
        nx.write_graphml_lxml(S, out_stream)

    if arguments["--gold"] is not None:
        gold_links = [(m, a["id"]) for m, v in inpt_dict.items()
                      for a in v["candidates"] if a["gold"] in coref_tags]
        S = nx.DiGraph()
        S.add_nodes_from((m, {
            "content": v["content"]
        }) for m, v in inpt_dict.items())
        S.add_edges_from(gold_links)
        with smart_open(arguments["--gold"], "wb") as out_stream:
            nx.write_graphml_lxml(S, out_stream)
Exemplo n.º 3
0
def build_network_from_df(df,key,threshold=None,k1="k1",k2="k2",save_path="/Users/iris/Documents/QMUL-2018/Individual_Project/coding/final_networks/"):
    if threshold is not None:
        df=df[df[key]>threshold]
    output_net=nx.from_pandas_edgelist(df,k1,k2,[key])
    
    #quick view of the networks
    nx.draw_networkx(output_net, with_labels=True)         
    plt.show()
    nx.write_graphml_lxml(output_net,save_path+"net_{}.graphml".format(key))
    
    return output_net
Exemplo n.º 4
0
    def export_to_graphml(self, filename: str):
        """Save the network as .graphml file.

        Parameters
        ----------
        filename
            Specify filename for exporting the graph.

        """
        if not self.graph:
            raise ValueError("No network found. Make sure to first run .create_network() step")
        nx.write_graphml_lxml(self.graph, filename)
Exemplo n.º 5
0
def save(G, fName):
    """suggest different file formats"""
    nx.write_gml(G, fName + ".gml")
    # G = nx.read_gml(fName+".gml")
    if False:
        ox.save_graphml(G, filename=fName, folder=baseDir + "gis/graph/")
        ox.save_gdf_shapefile(graph, filename=fName + ".gdf", folder=baseDir + "gis/graph/")
        ox.save_graphml(G, filename=gName, folder=baseDir + "gis/graph/")
        G = ox.load_graphml(filename=gName, folder=baseDir + "gis/graph/")
        ox.save_gdf_shapefile(G, filename=fName + ".gdf", folder=baseDir + "gis/graph/")
        G = ox.load_graphml(filename=fName + ".gdf", folder=baseDir + "gis/graph/")
        nx.readwrite.write_shp(G, baseDir + "gis/graph/berlin_street")
        nx.write_graphml_lxml(G, baseDir + "fName" + ".graphml")
Exemplo n.º 6
0
def dumpDataset(dataset_name, dataset, lxml=False):
    # add a folder for the dataset
    try:
        os.mkdir(os.getcwd() + '/' + dataset_name)
    except Exception:
        return
    # dump all graphml files
    for i, graph in enumerate(dataset):
        if lxml:
            with open(f"{dataset_name}/{dataset_name}_{i + 1}.graphml",
                      'w') as f:
                nx.write_graphml_lxml(
                    graph, f"{dataset_name}/{dataset_name}_{i + 1}.graphml")
        else:
            with open(f"{dataset_name}/{dataset_name}_{i + 1}.graphml",
                      'w') as f:
                nx.write_graphml_xml(
                    graph, f"{dataset_name}/{dataset_name}_{i + 1}.graphml")
Exemplo n.º 7
0
 def load_graph(self, keep_isolates=False, output=False):
     path = os.path.join(self.dir, self.filename)
     df = pd.read_csv(path, index_col=0)
     if self.source is 'comentions':
         G = nx.from_pandas_adjacency(df)
     elif self.source  is 'topics':
         G = nx.Graph()
         MPs = df.index.values.tolist()
         topics = list(df.columns)
         G.add_nodes_from(MPs, bipartite=0)
         G.add_nodes_from(topics, bipartite=1)
         weighted_edges = [(idx, topic, df.loc[idx, topic]) for topic in topics for idx, row in df.iterrows() if df.loc[idx, topic] > 0]
         G.add_weighted_edges_from(weighted_edges)
     if keep_isolates is False:
         G = remove_isolates(G)
     if output:
         nx.write_graphml_lxml(G, os.path.join(self.dir, f"{self.source}_{self.convocation_id}_graph.graphml"))
     self.G = G
Exemplo n.º 8
0
def main():
    # Get list of chunks as generic chunks divorced from type
    genericChunkList = readInputFile()
    typedChunkList = []

    # Process the generic chunks into their actual chunk types
    for chunk in genericChunkList:
        newChunk = processChunk(chunk)
        typedChunkList.append(newChunk)

    # Clear the generic chunk list to free up memory
    genericChunkList.clear()

    # DEBUG: Print out the chunks
    for chunk in typedChunkList:
        print(chunk)

    # Generate graph based on chunks
    graph = generateGraph(typedChunkList)
    networkx.write_graphml_lxml(graph, CONST_INPUT_FILE_NAME + ".graphml")
def build_network_from_df(df,key,threshold=None,k1="k1",k2="k2",save_path="/Users/iris/Documents/QMUL-2018/Individual_Project/coding/"):
    if threshold is not None:
        df=df[df[key]>threshold]
    output_net=nx.from_pandas_edgelist(df,k1,k2,[key])
    
    ###add edge weights to network 
    kk1 = tuple(df[k1])    
    #print type(kk1)
    kk2 = tuple(df[k2])    
    #print type(kk1)
    kk3 = tuple(df[key])
    tuple_edgeweight = zip(kk1,kk2,kk3)
    
    output_net.add_weighted_edges_from(tuple_edgeweight, weight='weight')
    
    #quick view of the networks
    nx.draw_networkx(output_net, with_labels=True)         
    plt.show()
    nx.write_graphml_lxml(output_net,save_path+"net_{}.graphml".format(key))
    
    return output_net
Exemplo n.º 10
0
            f.write('\t'.join([str(x) for x in line]) + '\n')
    print('[i] most frequent 10 changes')
    print(
        tabulate(
            [line[:-1] for line in table][:10],
            tablefmt='pipe',
            headers=['Source', 'Target', 'Frequency', 'Relative Frequency']))
    print('[i] most rare 10 changes')
    print(
        tabulate(
            [line[:-1] for line in table[::-1]][:10],
            tablefmt='pipe',
            headers=['Source', 'Target', 'Frequency', 'Relative Frequency']))

if "network" in argv:
    nx.write_graphml_lxml(G, argv[1][:-4] + '.graphml')

#if 'suffixes' in argv:
#    data = defaultdict(list)
#    for idx, doculect, tokens, relation, process in wl.iter_rows(
#            'doculect', 'tokens', 'relation', 'process'):
#        if doculect == "Indo-European" and relation == 'derivation':
#            if ',' in process:
#                grade, suffix = process.split(', ')
#                if suffix in ['suffix', 'prefix']:
#                    affix_type, affix_form = suffix.split(': ')
#                else:
#                    grade = ''
#            else:
#                grade, affix_type, affix_form = process, '', ''
#            if grade:
Exemplo n.º 11
0
def DRSToItem():
    import matplotlib.pyplot as plt
    # Declare relevant variables
    DRSGraph = None
    DRSLines = []
    # Read in DRS instructions from file
    DRSFile = open("DRS_read_in.txt", "r")
    for line in DRSFile:
        # Get DRS command and remove any leading and ending whitespace
        DRSLines.append(line.strip())
    # Get numbers of which lines are headers ([A, B, C, ...] and conditionals (=>) )
    symbolLines = getSymbolLines(DRSLines)

    categorizedDRSLines = categorizeDRSLines(DRSLines, symbolLines)

    # Get all if-then sets
    conditionalSets = getConditionals(DRSLines, categorizedDRSLines)

    # print(conditionalSets)
    # Set up the predicate switcher
    predSwitcher = predicateSwitcher()

    # Set up counter for question response
    questionCounter = 1

    # Iterate through the DRS instructions
    for index, currentInstruction in enumerate(DRSLines):
        # take next instruction or exit
        nextStep = ''

        # As long as no "exit" given
        if nextStep != 'exit':
            print(currentInstruction)
            # If the current line is an instruction
            if categorizedDRSLines.get(index) == CONST_INSTRUCTION_TAG:
                # Get the predicate type and contents
                instructionCountInMatchingIfBlock, conditionalWithMatchingIfBlock = \
                    checkCurrentInstructionIf(DRSLines, index, currentInstruction, conditionalSets)
                if instructionCountInMatchingIfBlock == 0:
                    DRSGraph = splitAndRun(currentInstruction, predSwitcher)

        # Break out of loop with exit
        else:
            break

    # On end of reading in instructions
    # process conditionals first:
    for conditional in conditionalSets:
        if not conditional.processed:
            DRSGraph = runFullConditional(conditional, predSwitcher, DRSGraph,
                                          conditionalSets)

    # Set up questionSwitcher
    qSwitcher = questionSwitcher()
    questionInput = input('Please enter a question')
    # "exit" is trigger word to end questioning
    while questionInput != 'exit':
        questionLines = APEWebserviceCall(questionInput)
        while questionLines is None:
            questionInput = input(
                'There was an error with the ACE entered - please try again.')
            questionLines = APEWebserviceCall(questionInput)

        for currentLine in questionLines:
            predicateSplit = currentLine.split('(', 1)
            predicateType = predicateSplit[0]
            predicateContents = predicateSplit[1]
            # print(categorizedDRSLines.get(index))
            qSwitcher.callFunction(predicateType, predicateContents, DRSGraph)
            print(currentInstruction)

        result = qSwitcher.resolveQuestion()
        if result:
            print("Question", str(questionCounter), "Answer: Yes")
        elif not result and result is not None:
            print("Question", str(questionCounter), "Answer: No")
        else:
            print("Question", str(questionCounter), "Answer: Unknown")
        questionCounter = questionCounter + 1
        # I have my doubts about these lines below but they seem to work
        DRSGraph = qSwitcher.returnDRSGraph()
        predSwitcher.updateDRSGraph(DRSGraph.graph)
        # Reset qSwitcher to be a new question switcher
        qSwitcher = questionSwitcher()
        questionInput = input('Please enter a DRS line for your question')

    # Once "exit" has been entered
    # At end of program, if an ontology was built at all, print it out and export it in GraphML
    if DRSGraph is not None:
        # networkx.draw(DRSGraph.graph, labels=networkx.get_node_attributes(DRSGraph.graph, CONST_NODE_VALUE_KEY))
        # plt.show()
        jsonFile = open("jsonFile.txt", "w")
        jsonSerializable = networkx.readwrite.json_graph.node_link_data(
            DRSGraph.graph)
        jsonOutput = json.dumps(jsonSerializable)
        jsonFile.write(jsonOutput)
        networkx.write_graphml_lxml(DRSGraph.graph, "DRSGraph.graphml")
Exemplo n.º 12
0
def write_adj_matrix(gnx, file_name):
    nx.write_graphml_lxml(gnx, file_name)
Exemplo n.º 13
0
# build network
import networkx as nx
net=nx.from_pandas_edgelist(relationships,create_using=nx.DiGraph())

# make sure isolates are in the network
net.add_nodes_from(twusers.twitter)

#add attributes from data frame
### data frame as dictionary
attributes=twusers.set_index('twitter').to_dict('index')
### add attributes of nodes to network
nx.set_node_attributes(net, attributes)

#%%

nx.write_graphml_lxml(net, "presiAmericas.graphml") 

#%%

nx.draw_networkx(net)


#%%


pos = nx.nx_pydot.graphviz_layout(net)
nx.draw_networkx(net,pos=pos)


#%%
import matplotlib.pyplot as plt
Exemplo n.º 14
0
def main(simulated_time):

    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    """
    TOPOLOGY from a json
    """

    t = Topology()
    t.G = nx.read_graphml("Euclidean.graphml")
    t.G = nx.convert_node_labels_to_integers(t.G,
                                             first_label=0,
                                             ordering='default',
                                             label_attribute=None)

    print "Nodes: %i" % len(t.G.nodes())
    print "Edges: %i" % len(t.G.edges())
    #MANDATORY fields of a link
    # Default values =  {"BW": 1, "PR": 1}
    valuesOne = dict(itertools.izip(t.G.edges(), np.ones(len(t.G.edges()))))

    nx.set_edge_attributes(t.G, name='BW', values=valuesOne)
    nx.set_edge_attributes(t.G, name='PR', values=valuesOne)

    centrality = nx.betweenness_centrality(t.G)
    nx.set_node_attributes(t.G, name="centrality", values=centrality)

    sorted_clustMeasure = sorted(centrality.items(),
                                 key=operator.itemgetter(1),
                                 reverse=True)

    top20_devices = sorted_clustMeasure[:20]
    main_fog_device = copy.copy(top20_devices[0][0])

    print "-" * 20
    print "Top 20 centralised nodes:"
    for item in top20_devices:
        print item
    print "-" * 20
    """
    APPLICATION
    """
    app1 = create_application("app1")
    """
    PLACEMENT algorithm
    """
    #There are not modules to place.
    placement = NoPlacementOfModules("NoPlacement")
    """
    POPULATION algorithm
    """
    number_generators = int(len(t.G) * 0.1)
    print number_generators

    #you can use whatever funciton to change the topology
    dStart = deterministicDistributionStartPoint(0, 100, name="Deterministic")
    dStart2 = exponentialDistributionStartPoint(500,
                                                100.0,
                                                name="Deterministic")
    pop = Pop_and_Failures(name="mttf-nodes",
                           srcs=number_generators,
                           activation_dist=dStart2)
    pop.set_sink_control({
        "ids": top20_devices,
        "number": 1,
        "module": app1.get_sink_modules()
    })

    dDistribution = deterministicDistribution(name="Deterministic", time=10)
    pop.set_src_control({
        "number": 1,
        "message": app1.get_message("M.Action"),
        "distribution": dDistribution
    })

    #In addition, a source includes a distribution function:
    """--
    SELECTOR algorithm
    """
    selectorPath = BroadPath()
    """
    SIMULATION ENGINE
    """
    s = Sim(t, default_results_path="Results_%s_exp" % (simulated_time))
    s.deploy_app(app1, placement, pop, selectorPath)

    s.run(simulated_time,
          test_initial_deploy=False,
          show_progress_monitor=False)
    # s.draw_allocated_topology() # for debugging
    print "Total nodes available in the  toopology %i" % len(
        s.topology.G.nodes())
    print "Total edges available in the  toopology %i" % len(
        s.topology.G.edges())

    print pop.nodes_removed
    nx.write_graphml_lxml(s.topology.G, "final_network.graphml")
}, {
    "Reasoning": ["Reasoning", "Memory", "Learning"]
}, {
    "Sentiment": ["Language", "Memory", "Attention", "Emotion"]
}, {
    "Size": ["Spatial", "Perceptual", "Attention"]
}, {
    "State": ["Reasoning", "Categorization", "Perceptual"]
}, {
    "Numerosity": ["Reasoning", "Perceptual", "Attention"]
}]

graph = networkx.Graph()
graph.add_nodes_from(capacities)

for gapType in vqa_gaps:
    # Go through each gap type
    for gapTypeName in gapType:
        # Get the list of capabilities associated with it
        gapCapabilityList = gapType.get(gapTypeName)
        print(gapTypeName)
        print(gapCapabilityList)
        # Create a node for the gap type
        gapTypeName = gapTypeName
        graph.add_node(gapTypeName)
        # Iterate through the capabilities and connect each one to the gap type via an edge
        for capability in gapCapabilityList:
            graph.add_edge(gapTypeName, capability, label="affects")

networkx.write_graphml_lxml(graph, fileLocation)
Exemplo n.º 16
0
def main():
    # params
    IEXCLOUD_TOKEN = os.getenv("IEXCLOUD_TOKEN")

    # 需要事前提供的檔案

    # 在執行時會自動產生的檔案
    output_folder = './outputs'
    downlaods_folder = './downloads'
    entities_json = f"{output_folder}/wd_entities.json"
    tk_csv = './downloads/bats_symbols_traded_byx.csv'
    tk_info_json = "./downloads/iex_ticker_info.json"
    urls_json = f"{output_folder}/wiki_urls.json"
    mentions_json = f"{output_folder}/wiki_mentions.json"
    sent_cooccurs_json = f"{output_folder}/corpus_mentions_sent_cooccurs.json"
    atk_cooccurs_json = f"{output_folder}/corpus_mentions_atk_cooccurs.json"
    atk_bags_json = f"{output_folder}/corpus_mentions_atk_bags.json"
    freqs_json = f"{output_folder}/corpus_mentions_freqs.json"

    # Wiki processor requires:
    explore_n_wk_depth: int = 2  # 探索wk的層數
    adpot_n_wk_depth: int = 1  # 在n層以內的wk-titles會被實際採用(其他用作graph計算)
    wkd_dump_json = "./downloads/latest-all.json.bz2"
    seeded_wk_titles = []
    sp500_csv = f"{downlaods_folder}/s_and_p_500.csv"

    # Wiki processor outputs:
    wk_titles_graphml = f"{output_folder}/wk_titles.graphml.bz2"
    wk_pagerank_json = f"{output_folder}/wk_pagerank.json"
    wk_cat_tags_json = f"{output_folder}/wk_cat_tags.json"
    # wk_tags_json = f"{output_folder}/wk_tags.json"
    wk_tags_pagerank_csv = f"{output_folder}/wk_tags_pagerank.csv"

    wkd_filtered_entities_json = f"{output_folder}/wkd_filtered_entities.json"
    wk_ranked_titles_json = f"{output_folder}/wk_ranked_titles.json"
    wkd_entites_by_ranked_titles_json = f"{output_folder}/wkd_entites_by_ranked_titles.json"

    pathlib.Path(output_folder).mkdir(exist_ok=True)

    # print(get_matched_wkd_entities(titles, wkd_dump_path=wkd_dump_json))
    # entities = load_or_run(wkd_entites_by_ranked_titles_json,
    #                     lambda: get_matched_wkd_entities(titles, wkd_dump_path=wkd_dump_json),
    #                     forcerun=True)

    # print("從wikidata取得具有symbol屬性的entities")
    # results = load_or_run(
    #     entities_json, lambda: query_wikidata_by_property())
    # comp_wdids = [e['item']['value'].split('/')[-1]
    #               for e in results['results']['bindings']]

    # print("找wikidata-entity對應的wikipage")
    # comp_titles = load_or_run(
    #     comp_titles_json, lambda: query_wikipage_title(comp_wdids))
    # return

    # print("讀取tickers")
    # df = pd.read_csv(tk_csv)
    # tickers = list(df['Symbols'])
    # # tickers = ['ADBE', 'BA', 'RXT', 'TTOO']
    # print(f"載入ticker數量: {len(tickers)}")

    # print("從iexcloud抓ticker info")
    # infos = load_or_run(
    #     tk_info_json, lambda: download_ticker_info_from_iexcloud(tickers, IEXCLOUD_TOKEN))
    # names = [v['companyName'] for k, v in infos.items()]

    # print("找ticker-info中的公司名搜尋對應的wikipage")
    # urls = load_or_run(
    #     urls_json, lambda: search_wikipage(names))

    #  掃wikipedia-dump,從company的wiki-page開始抓裡面的mentions
    #  將新加入的mentions設為next_entities,重複抓取n次(=爬n層)
    # print(f"取得跟公司關聯的mentions - {depth}階層")
    # titles = [v.split('/')[-1].replace("_", " ")
    #           for _, v in urls.items() if v is not None]

    print(f"# 連線elasticsearch(用於存放wiki-page, news-corpus)")
    es.connect(["es:9200"])

    print(f"# 以S&P500 wikipage為起點,爬'{explore_n_wk_depth}階層'的titles,建立graph")
    # seedtitles = ["List of S&P 500 companies"]
    seedtitles = ["Wilson (company)"]
    try:
        # raise FileNotFoundError
        g = gt.load_graph(wk_titles_graphml)
        print(f"File loaded: {wk_titles_graphml}")
    except FileNotFoundError:
        print(f"File not found, create new one")
        g = get_wktitles_graph(seedtitles, n_depth=explore_n_wk_depth)
        for n in g:
            g.nodes[n]['mentions'] = json.dumps(g.nodes[n]['mentions'],
                                                ensure_ascii=False,
                                                default=serialize_sets)
        nx.write_graphml_lxml(g, wk_titles_graphml)
        g = gt.load_graph(wk_titles_graphml)

    print("# 使用完整的graph跑pagerank(為避免記憶體不足,改用graph-tool庫)")
    ranks = load_or_run(wk_pagerank_json,
                        lambda: calc_pagerank(g),
                        forcerun=True)

    print(f"# 挑出graph中的wiki-category,再找主要描述此category的wiki-title")

    def _cat_tags() -> Iterable[str]:
        _, wk_title, _ = zip(*ranks)
        cats = filter(lambda e: "Category:" in e, wk_title)
        # print(list(cats))
        # print([c for c in cats])
        tags = [es.get_corresponded_wktitles(cat_title=c) for c in cats]
        tags = set(itertools.chain(*tags))
        # tags &= set(tags)
        return tags

    cat_tags = load_or_run(wk_cat_tags_json,
                           lambda: _cat_tags(),
                           forcerun=True)

    print(f"# 依照wk-title找尋對應的wkd-entity")

    # tags = ["Technology", "Internet", "Metal"]
    cattag_entity = get_matched_wkd_entities(cat_tags)
    ranks_by_tags = []
    for _, wk_title, pagerank in ranks:
        try:
            e = cattag_entity[wk_title]
            ranks_by_tags.append((e.entity_id, e.get_enwiki_title(),
                                  e.get_label("zh"), pagerank))
        except KeyError:
            pass

    print("# 將ranks存成csv格式")
    wkd_id, wk_title, zh_label, pagerank = zip(*ranks_by_tags)
    tags = wk_title
    df = pd.DataFrame({
        'wkd_id': wkd_id,
        'wk_title': wk_title,
        'zh_label': zh_label,
        'pagerank': pagerank
    })
    df.to_csv(wk_tags_pagerank_csv, index=False)

    return

    print("# 找一個ticker的tags")

    def get_neighbors(v: gt.Vertex, n_expands: int = 2):
        seeds = set([v])
        traveled = set()
        for i in range(n_expands):
            nextseeds = set()
            for v in seeds:
                nextseeds |= set(v.out_neighbors())
            nextseeds -= seeds
            traveled |= seeds
            seeds = nextseeds
        return traveled

    # tags = set(["joint venture"])
    tickers = ["Wilson (company)"]
    tags_by_tickers = []
    for tk in tickers:
        v = gt.find_vertex(g, g.vp['_graphml_vertex_id'], tk)[0]
        neighbors = get_neighbors(v, n_expands=2)
        neighbors = set([g.vp['_graphml_vertex_id'][v] for v in neighbors])
        tags_by_tickers.append((tk, tags & neighbors))
    print(tags_by_tickers)

    return

    print(f"tag的排序、重要度、重複性(用max_flow、n_path之類的方式)")
    # for tk in tickers:
    #     neighbors = get_neighbors(tk)

    print(f"TODO:巡所有的news,計算mentions的詞頻")

    # print(f"巡所有的news,計算mentions的詞頻")

    # TODO: 擴展同義詞(用於flashtext)
    # print(f"載入S&P500,做為seed-wk-titles")
    # df = pd.read_csv(sp500_csv)
    # seedtitles = list(df['Name'])

    # print(f"以seed-wk-titles為起點,爬'{explore_n_wk_depth}階層'的wk-titles,建立graph")
    # try:
    #     # raise FileNotFoundError
    #     g = gt.load_graph(wk_titles_graphml)
    #     print(f"File loaded: {wk_titles_graphml}")
    # except FileNotFoundError:
    #     print(f"File not found, create new one")
    #     g = get_wktitles_graph(seedtitles, n_depth=explore_n_wk_depth)
    #     for n in g:
    #         g.nodes[n]['mentions'] = json.dumps(
    #             g.nodes[n]['mentions'], ensure_ascii=False, default=serialize_sets)
    #     nx.write_graphml_lxml(g, wk_titles_graphml)
    #     g = gt.load_graph(wk_titles_graphml)

    # print(f"僅採用{adpot_n_wk_depth}-depth的wk-titles")
    # vp_label = g.vp['_graphml_vertex_id']
    # vp_depth = g.vp['depth']
    # wktitles = [vp_label[v]
    #             for v in g.vertices() if vp_depth[v] <= adpot_n_wk_depth]

    # print("掃wkd-dump,將沒有中文名、有位置claim(很可能是地點)、是人的wk-titles排除")
    # try:
    #     raise FileNotFoundError
    #     entities = WikidataJsonDump(wkd_filtered_entities_json)
    #     filtered_wktitles = set([e.get_enwiki_title() for e in entities])
    #     print(f"File loaded: {wkd_filtered_entities_json}")
    # except FileNotFoundError:
    #     print(f"File not found, create new one")
    #     entities = get_matched_wkd_entities(
    #         wktitles, wkd_dump_path=wkd_dump_json)
    #     dump_entities_to_json(entities, wkd_filtered_entities_json)
    #     filtered_wktitles = set([e.get_enwiki_title() for e in entities])

    # print("使用完整的graph跑pagerank(為避免記憶體不足,改用graph-tool庫)")
    # load_or_run(wk_filtered_pagerank_json,
    #             lambda: calc_pagerank(g, included_wktitles=filtered_wktitles), forcerun=True)

    return
Exemplo n.º 17
0
def generate_graph(start_number=None,
                   end_number=None,
                   graph="new",
                   blocks_path=os.path.dirname(os.path.realpath(__file__)),
                   continuation=True):

    # Add the ability to split a number of blocks into a many graphs then combine to save
    # memory and prevent hangups

    blocks_added = []  # list of added blocks for the graph_meta.json
    block_file_list = []  # file, so that added blocks are ignored
    #
    #debug vars__ to be deleted
    debug_zero_div = []

    os.chdir(blocks_path + "/blocks")
    if ("meta.json" in os.listdir()):
        print(str(len(os.listdir()) - 2) + " Blocks Found")
        block_file_list = natsorted(os.listdir())
        print("Popped", block_file_list.pop(),
              block_file_list.pop())  #popping the meta.json file

    else:
        print(str(len(os.listdir())) + "Blocks Found")

    if graph == "new":
        graph = nx.MultiDiGraph()
        graph.add_node("Coinbase")
        graph_meta = {}
        graph_meta["blocks_added"] = blocks_added

        if start_number == None:
            start_number = int(block_file_list[0].split('_')[1].split('.')[0])
            graph_meta["starting_block"] = start_number
        else:
            graph_meta["starting_block"] = start_number

        if end_number == None:
            end_number = int(block_file_list[len(block_file_list) -
                                             1].split('_')[1].split('.')[0])
            graph_meta["starting_block"] = end_number
        else:
            graph_meta["starting_block"] = end_number

        with open("graph_meta.json", "w+") as graph_meta_file:
            json.dump(graph_meta, graph_meta_file)
    else:
        with open("graph_meta.json") as graph_meta_file:
            graph_meta = json.load(graph_meta_file)
            blocks_added = graph_meta["blocks_added"]

    for block_file2open in block_file_list:

        # Make sure to remove the blocks from memory every X blocks

        block_number = block_file2open.split('_')[1].split('.')[0]

        #if graph != "new" and (block_number in blocks_added): # If block is already added
        #    continue                                          # and we aren't creating a
        # reimplemented                                        # new graph, then the
        #
        #
        #

        print("Graphing block " + block_number)

        with open(block_file2open) as block_file:
            current_block_file = json.load(block_file)
            current_ntxs = current_block_file['blocks'][0]['n_tx']
            current_block = current_block_file['blocks'][0]["height"]

            current_input_address = None
            current_input_value = None
            current_output_address = None
            current_output_value = None

            if (current_block in blocks_added) and (continuation == True):
                continue

            for tx in range(current_ntxs):  # Add support for coinbase txs,
                # Find out what to do with segregated
                # witness (are there other weird tx
                # types ?)

                #note to self: nx doesn't add duplicate nodes
                current_transaction = current_block_file['blocks'][0]["tx"][tx]
                inputs = current_block_file['blocks'][0]["tx"][tx]["inputs"]
                outputs = current_block_file['blocks'][0]["tx"][tx]["out"]
                n_of_inputs = int(current_block_file['blocks'][0]["tx"][tx]
                                  ["vin_sz"])  #faster than len?
                n_of_outputs = int(current_block_file['blocks'][0]["tx"][tx]
                                   ["vout_sz"])  #faster than len?
                # it is worse because all changes in inputs need to be manually adjusted

                is_coinbase = False

                input_vals = {}
                output_vals = {}
                tx_vals_temp = []
                tx_vals = []

                for inputx in range(len(inputs)):
                    try:

                        current_input_address = inputs[inputx]["prev_out"][
                            "addr"]
                        current_input_value = inputs[inputx]["prev_out"][
                            "value"]
                        input_vals[current_input_address] = current_input_value
                        graph.add_node(current_input_address)

                    except:

                        if (n_of_inputs == 1) and (n_of_outputs == 1):

                            #input_vals["Coinbase"] = outputs[0]["value"] #--> which is output value
                            is_coinbase = True
                            graph.add_weighted_edges_from([
                                ("Coinbase", outputs[0]["addr"],
                                 outputs[0]["value"])
                            ])

                        else:
                            continue  # Add checks for values of difficulty vs reward
                            # for checking authenticity and to catch simmilar txs that
                            # are not Coinbase

                        # No need to add Coinbase multiple times
                        # already added when creating graph
                    # only add inputs here then add outputs and edges together
                    # this seems to be rather inefficient O(n^2)
                    # Be careful not to assume seg witness et al are
                    # coinbase just because there might not be an input address
                    # or an input value

                    #Adding input address to graph, doesn't duplicate

                    if is_coinbase == True:  #Then skip output loop because we already added the Tx
                        break

                    for out in range(len(outputs)):
                        current_output_address = outputs[out]["addr"]
                        current_output_value = outputs[out]["value"]

                        if current_output_address in input_vals:  # remove from output list and remove the returned
                            # change value from the transaction
                            input_vals[
                                current_output_address] -= current_output_value  #Removed change from tx
                            n_of_outputs -= 1  #coinbase txs can still be recorded
                            continue  #don't add to output_vals and later to edges

                        else:  #If output is not a change tx, add node then generate edges
                            graph.add_node(current_output_address)
                            output_vals[
                                current_output_address] = current_output_value
                            tx_vals_temp.append([
                                current_input_address, current_output_address,
                                None
                            ])

                    #if n_of_outputs <= 0: # If it is one of the weird reorganizing txs
                    #    continue          # then skip (refer to block 546)
                    try:
                        current_input_value = current_input_value / n_of_outputs
                    except ZeroDivisionError:
                        debug_zero_div.append((block_number, tx))
                        print(
                            "alarm ____________________________________________ zero division error"
                        )

                    for tx_tuple in tx_vals:
                        tx_tuple[2] = current_input_value
                        tx_tuple = tuple(tx_tuple)
                        tx_vals.append(tx_tuple)

                    graph.add_weighted_edges_from(tx_vals)

        blocks_added.append(block_number)
        print("-----Nodes in graph: " + str(graph.number_of_nodes()))
        print("-----Edges in graph: " + str(graph.number_of_edges()))

    print("Zero div list ", debug_zero_div)  #The edge generating loop O(n^2)

    print("writing edgelist please wait as this might take a while")
    #Modify nx to add status updates in the write process?
    nx.readwrite.edgelist.write_edgelist(
        graph,
        "../" + str(start_number) + "to" + str(end_number) + ".edgelist")
    print("Edgelist written")

    print("writing graphML please wait as this might take a while")
    nx.write_graphml_lxml(
        graph, "../" + str(start_number) + "to" + str(end_number) + ".graphml")
Exemplo n.º 18
0
    iterations = int((percent / 100) * len(list(G.edges)))
    for x in range(iterations):
        edges = sorted(list(G.edges))
        non_edges = list(nx.non_edges(G))
        chosen_non_edge = random.choice(non_edges)
        print('Chosen edge to add: ', chosen_non_edge)
        print('List of current edges: ', edges)
        G.add_edge(*chosen_non_edge)

    print('Total edge count end: ', len(list(G.edges)))


for x in range(10):
    G = nx.read_graphml('hiv.graphml')
    remove_percent_edges(G, 1)
    nx.write_graphml_lxml(G, "graphs/hiv-remove-1-" + str(x + 1) + ".graphml")

for x in range(10):
    G = nx.read_graphml('hiv.graphml')
    remove_percent_edges(G, 5)
    nx.write_graphml_lxml(G, "graphs/hiv-remove-5-" + str(x + 1) + ".graphml")

for x in range(10):
    G = nx.read_graphml('hiv.graphml')
    remove_percent_edges(G, 10)
    nx.write_graphml_lxml(G, "graphs/hiv-remove-10-" + str(x + 1) + ".graphml")

for x in range(10):
    G = nx.read_graphml('hiv.graphml')
    remove_percent_edges(G, 1)
    nx.write_graphml_lxml(G, "graphs/hiv-add-1-" + str(x + 1) + ".graphml")
C_2005_2013_btwness = C_2005_2013
C_2005_2013_sprding = C_2005_2013

C_2007_2015_eigenvec = C_2007_2015
C_2007_2015_harmnic = C_2007_2015
C_2007_2015_btwness = C_2007_2015
C_2007_2015_sprding = C_2007_2015

C_2010_2018_eigenvec = C_2010_2018
C_2010_2018_harmnic = C_2010_2018
C_2010_2018_btwness = C_2010_2018
C_2010_2018_sprding = C_2010_2018

# graph-tool conversions 
C_2010_2018_gt = C_2010_2018
nx.write_graphml_lxml(C_2010_2018_gt, "mea_2018_gt.graphml")
gt_2018_univ = graph_tool.load_graph("mea_2018_gt.graphml", fmt='auto', ignore_vp=None, ignore_ep=None, ignore_gp=None)

# extra
C_2010_2018_eigenvec_gt = C_2010_2018
C_2010_2018_harmnic_gt = C_2010_2018

# Eigenvector centrality graph-tool 
nx.write_graphml_lxml(C_2010_2018_eigenvec_gt, "mea_eigen_ctrlty_2018_gt.graphml")
gt_eigen_2018_new = graph_tool.load_graph("mea_eigen_ctrlty_2018_gt.graphml", fmt='auto', ignore_vp=None, ignore_ep=None, ignore_gp=None)

# harmonic centrality graph-tool 
nx.write_graphml_lxml(C_2010_2018_harmnic_gt, "mea_harmnic_ctrlty_2018_gt.graphml")
gt_harmnic_2018_new = graph_tool.load_graph("mea_harmnic_ctrlty_2018_gt.graphml", fmt='auto', ignore_vp=None, ignore_ep=None, ignore_gp=None)

# Eigenvector centrality
Exemplo n.º 20
0
sys.path.append('/scratch/kzltin001/sabcom/')
from SABCoModel import *

# load parameters
with open('parameters.json') as json_file:
    parameters = json.load(json_file)

# load neighbourhood data
with open('neighbourhood_data.json') as json_file:
    neighbourhood_data = json.load(json_file)

# Monte Carlo simulation
pos = int(os.getenv('SLURM_ARRAY_TASK_ID'))
seed = pos
# initialization
environment = EnvironmentNetwork(seed, parameters, neighbourhood_data)
# running the simulation
runner = Runner()
runner.baseline(environment, seed)

# save network
if not parameters["high_performance"]:
    for idx, network in enumerate(environment.infection_states):
        for i, node in enumerate(network.nodes):
            network.nodes[i]['agent'] = network.nodes[i]['agent'].status

        nx.write_graphml_lxml(
            network,
            "measurement/" + str(pos) + "/network_time{}.graphml".format(idx))
Exemplo n.º 21
0
def make_classyfire_network(
        classyfire_obo_filename='ChemOnt_2_1.obo',
        nodetable_filename='classyfire_chemont_nodetable.tab',
        network_filename="classyfire_ontology_network_template.graphml",
        parentchild_filename='classyfire_chemont_network.tab',
        save=True):
    from copy import deepcopy
    import pandas as pd
    import numpy as np
    import networkx as nx

    with open(classyfire_obo_filename, 'r') as fid:
        chemont = fid.read()
    chemont = chemont.split('[Term]')
    chemont = [c.strip() for c in chemont]
    chemont.pop(0)  #remove header

    print(('There are %d entries' % len(chemont)))

    # make an empty dict that has all possible chemont terms
    attributes = {}
    for c in chemont:
        for a in c.split('\n'):
            attributes[a.split(': ')[0]] = np.nan

    chemont_df = []
    for c in chemont:
        chemont_df.append(deepcopy(attributes))
        chemont_df[-1]['synonym'] = ''
        for a in c.split('\n'):
            split_str = a.split(': ')
            attr = split_str[0]
            value = split_str[-1]
            # there are many synonyms for each entry.  make delimited list
            if attr in ['synonym', 'xref']:
                chemont_df[-1][attr] = '%s; %s' % (chemont_df[-1]['synonym'],
                                                   value)

            if ' ! ' in value:
                chemont_df[-1][attr] = value.split(' ! ')[0].strip()
            else:
                chemont_df[-1][attr] = value.strip()

    chemont_df = pd.DataFrame(chemont_df)

    if save is True:
        chemont_df[['id', 'is_a']].to_csv(parentchild_filename,
                                          index=None,
                                          sep='\t')

    chemont_info = chemont_df[[
        'id', 'name', 'def', 'synonym', 'xref', 'alt_id', 'comment'
    ]]
    chemont_info = chemont_info.drop_duplicates(['id', 'name'])
    chemont_info.rename(columns={'name': 'ontology_name'}, inplace=True)
    if save is True:
        chemont_info.to_csv(nodetable_filename, index=None, sep='\t')

    G = nx.from_pandas_edgelist(chemont_df, 'is_a', 'id')
    nx.set_node_attributes(G, chemont_info.set_index('id').to_dict('index'))
    G.remove_node('CHEMONTID:0000000')
    if save is True:
        nx.write_graphml_lxml(G, network_filename)
    return G
Exemplo n.º 22
0
 def write_graphml(self, file_name):
     nx.write_graphml_lxml(self, file_name)
Exemplo n.º 23
0
from examples.coviidnetwork.src.environment import Environment
from examples.coviidnetwork.src.runner import Runner
import networkx as nx

args = ["configs/environments/", "config_coviid", "log/", 1]

# initialization
environment_directory = str(args[0])
identifier = str(args[1])
log_directory = str(args[2])
runs = args[3]

# Monte Carlo Simulations
for i in range(runs):
    # initialize environment and runner from files
    environment = Environment(environment_directory, identifier, seed=i)
    runner = Runner(environment)
    # do the run
    runner.do_run(environment, seed=i)

    # save network
    for idx, network in enumerate(environment.infection_states):
        for idx2, node in enumerate(network.nodes):
            network.nodes[idx2]['agent'] = network.nodes[idx2]['agent'].status

        nx.write_graphml_lxml(
            network, "measurements/{}-network_time{}.graphml".format(i, idx))
Exemplo n.º 24
0
# load neighbourhood data
with open('parameters/lock_down/neighbourhood_data.json') as json_file:
    neighbourhood_data = json.load(json_file)

# load age data
age_distribution = pd.read_csv('age_dist.csv', sep=';', index_col=0)
age_distribution_per_ward = dict(age_distribution.transpose())

# Monte Carlo simulation
for seed in range(parameters['monte_carlo_runs']):
    # make new folder for seed, if it does not exist
    if not os.path.exists('measurement/lockdown/seed{}'.format(seed)):
        os.makedirs('measurement/lockdown/seed{}'.format(seed))

    # initialization
    environment = EnvironmentNetwork(seed, parameters, neighbourhood_data, age_distribution_per_ward)

    # running the simulation
    runner = Runner()
    runner.lock_down(environment, seed)

    # save network
    if not parameters["high_performance"]:
        for idx, network in enumerate(environment.infection_states):
            for i, node in enumerate(network.nodes):
                network.nodes[i]['agent'] = network.nodes[i]['agent'].status

            idx_string = '{0:04}'.format(idx)
            nx.write_graphml_lxml(network, "measurement/lockdown/seed{}/network_time{}.graphml".format(seed, idx_string))
Exemplo n.º 25
0
def dump_graph(_graph, out='transactions'):
    """ Dump networkx graph to graphml lxml """
    nx.write_graphml_lxml(_graph, f'output/{out}.graphml')