Exemplo n.º 1
0
    def generatePaths(self, pathCount=1, pathLength=10):
        """
        This method generates a given number of paths, each having a given length

        @param pathCount: the number of paths to generate

        @param pathLength: the (maximum) length of each path. Note that paths can be shorter 
            if the Markov chain runs into a state that does not allow for a further transition.
            In this case, the current path will be concluded. 
        """
        # generate a Paths object that will contain the paths
        paths = pp.Paths()

        for i in range(pathCount):
            path = random.choice(self.nodes)

            memory = path

            # add missing steps to path of length pathLength
            for l in range(pathLength - self.k + 1):

                if len(self.P[memory]) > 0:
                    path += (random.choice(self.P[memory]), )

                    # assign new memory based on last k nodes on path
                    memory = path[-self.k:]
                else:
                    break

            # Add to Paths object
            paths.add_path(path)

        return paths
Exemplo n.º 2
0
def test_estimate_order_strongly_connected():
    """
    Example with single strongly connected component in first- 
    and two connected components in second-order network
    """
    paths = pp.Paths()

    ngram_list = [
        'a,b,c', 'b,c,b', 'c,b,a', 'b,a,b', 'e,b,f', 'b,f,b', 'f,b,e', 'b,e,b'
    ]

    for ngram in ngram_list:
        paths.addPath(ngram)

    g1 = pp.HigherOrderNetwork(paths, k=1)
    g1.reduceToGCC()
    assert g1.vcount(
    ) == 5, "Error, wrong number of nodes in first-order network"
    assert g1.ecount(
    ) == 8, "Error, wrong number of links in first-order network"

    g2 = pp.HigherOrderNetwork(paths, k=2)
    g2.reduceToGCC()
    assert g2.vcount(
    ) == 4, "Error, wrong number of nodes in second-order network"
    assert g2.ecount(
    ) == 4, "Error, wrong number of links in second-order network"

    # test mapping of higher-order nodes and paths
    assert g2.HigherOrderNodeToPath('a-b') == ('a', 'b'), \
        "Error: mapping from higher-order node to first-order path failed"
    assert g2.HigherOrderPathToFirstOrder(('a-b', 'b-c')) == ('a', 'b', 'c'), \
        "Error: mapping from higher-order path to first-order path failed"
Exemplo n.º 3
0
def test_estimate_order_strongly_connected():
    """
    Example with single strongly connected component in first-
    and two connected components in second-order network
    """
    paths = pp.Paths()

    ngram_list = [
        'a,b,c', 'b,c,b', 'c,b,a', 'b,a,b', 'e,b,f', 'b,f,b', 'f,b,e', 'b,e,b'
    ]

    for ngram in ngram_list:
        paths.add_path(ngram)

    g1 = pp.HigherOrderNetwork(paths, k=1)
    pp.algorithms.components.reduce_to_gcc(g1)
    assert g1.ncount(
    ) == 5, "Error, wrong number of nodes in first-order network"
    assert g1.ecount(
    ) == 8, "Error, wrong number of links in first-order network"

    g2 = pp.HigherOrderNetwork(paths, k=2)
    pp.algorithms.components.reduce_to_gcc(g2)
    assert g2.ncount(
    ) == 4, "Error, wrong number of nodes in second-order network"
    assert g2.ecount(
    ) == 4, "Error, wrong number of links in second-order network"

    # test mapping of higher-order nodes and paths
    assert g2.higher_order_node_to_path('a,b') == ('a', 'b'), \
        "Error: mapping from higher-order node to first-order path failed"
    assert g2.higher_order_path_to_first_order(('a,b', 'b,c')) == ('a', 'b', 'c'), \
        "Error: mapping from higher-order path to first-order path failed"
Exemplo n.º 4
0
def test_estimate_order_2():
    # Example with second-order correlations
    paths = pp.Paths()

    paths.add_path('a,c')
    paths.add_path('b,c')
    paths.add_path('c,d')
    paths.add_path('c,e')

    for k in range(4):
        paths.add_path('a,c,d')
        paths.add_path('b,c,e')

    m = pp.MultiOrderModel(paths, max_order=2)
    assert m.estimate_order() == 2
Exemplo n.º 5
0
def test_estimate_order_1():
    """Example without second-order correlations"""
    paths = pp.Paths()

    paths.add_path('a,c')
    paths.add_path('b,c')
    paths.add_path('c,d')
    paths.add_path('c,e')

    for k in range(4):
        paths.add_path('a,c,d')
        paths.add_path('b,c,e')
        paths.add_path('b,c,d')
        paths.add_path('a,c,e')

    m = pp.MultiOrderModel(paths, max_order=2)
    assert m.estimate_order() == 1, \
        "Error, wrongly detected higher-order correlations"
Exemplo n.º 6
0
def generate_random_path(size, rnd_seed):
    """Generate a Path with random path sequences"""
    import string
    node_set = string.ascii_lowercase

    def random_ngram(p_len, nodes):
        num_elements = len(nodes)
        sequence = np.random.choice(num_elements, p_len)
        path = [nodes[i] for i in sequence]
        return ','.join(path)

    np.random.seed(rnd_seed)
    paths = pp.Paths()
    for _ in range(size):
        frequency = np.random.randint(1, 4)
        path_length = np.random.randint(1, 10)
        path_to_add = random_ngram(path_length, node_set)
        paths.addPath(path_to_add, pathFrequency=frequency)

    return paths
Exemplo n.º 7
0
def test_estimate_order_2():
    # Example with second-order correlations
    paths = pp.Paths()

    paths.addPath('a,c')
    paths.addPath('b,c')
    paths.addPath('c,d')
    paths.addPath('c,e')

    for k in range(4):
        paths.addPath('a,c,d')
        paths.addPath('b,c,e')

    m = pp.MultiOrderModel(paths, maxOrder=2)
    assert m.estimateOrder(
        paths) == 2, "Error, did not detect second-order correlations"

    x = list(map(str, _np.random.choice(range(10), 100000)))
    ms = pp.MarkovSequence(x)
    assert ms.estimateOrder(maxOrder=2, method='BIC') == 1, \
        "Error, wrongly detected higher-order correlations"
    assert ms.estimateOrder(maxOrder=2, method='AIC') == 1, \
        "Error, wrongly detected higher-order correlations"

    g1 = pp.HigherOrderNetwork(paths, k=1)
    assert g1.vcount() == 5, \
        "Error, wrong number of nodes in first-order network"
    assert g1.ecount() == 4, \
        "Error, wrong number of links in first-order network"

    g2 = pp.HigherOrderNetwork(paths, k=2)
    assert g2.vcount() == 4, \
        "Error, wrong number of nodes in second-order network"
    assert g2.ecount() == 2, \
        "Error, wrong number of links in second-order network"

    g2.reduceToGCC()
    assert g2.vcount() == 1, \
        "Error, wrong number of nodes in giant connected component"
    assert g2.ecount() == 0, \
        "Error, wrong number of links in giant connected component"
"""
This is a test file, that you can use to validate 
"""

#%% validate that pathpy was installed correct
import pathpy as pp
paths = pp.Paths()
paths.add_path('a,b,c')
print(paths)

#%% validate that kernel was started in correct root directory
t = pp.TemporalNetwork.read_file('data/temporal_clusters.tedges')
print(t)
#%% In [1]
import pathpy as pp

#%% In [2]
help(pp.Paths)

#%% In [3]
toy_paths = pp.Paths()

#%% In [4]
toy_paths.add_path(('a', 'c', 'd'), frequency=10)

#%% In [5]
print(toy_paths)

#%% In [6]
ngram_paths = pp.Paths()
ngram_paths.add_path('b-c-e', separator='-', frequency=10)
print(ngram_paths)

#%% In [7]
toy_paths += ngram_paths
print(toy_paths)

#%% In [8]
toy_paths.write_file('data/toy_paths.ngram')

#%% In [9]
toy_graph = pp.Network.from_paths(toy_paths)
print(toy_graph)
Exemplo n.º 10
0
#%% In [1]
import pathpy as pp

toy_paths = pp.Paths()
toy_paths.add_path('a,c,d', 2)
toy_paths.add_path('b,c,e', 2)
print(toy_paths)

#%% In [2]
hon_1 = pp.HigherOrderNetwork(toy_paths)
pp.visualisation.plot(hon_1)
print(hon_1.transition_matrix())

#%% In [3]
print(hon_1.likelihood(toy_paths, log=False))

#%% In [4]
hon_2 = pp.HigherOrderNetwork(toy_paths, k=2)
print(hon_2.transition_matrix())
hon_2.likelihood(toy_paths, log=False)

#%% In [5]
hon_2_null = pp.HigherOrderNetwork(toy_paths, k=2, null_model=True)
pp.visualisation.plot(hon_2_null)
print(hon_2.transition_matrix())
hon_2_null.likelihood(toy_paths, log=False)

#%% In [6]
from scipy.stats import chi2

d = hon_2.degrees_of_freedom() - hon_1.degrees_of_freedom()
Exemplo n.º 11
0
def estimate_user_kopt(user, top_nodes):

    USER = user

    ##PATH COLLECTION

    paths = list()
    path = list()
    filename = PATH + FILENAME
    with open(filename, 'r', encoding='utf-8') as csvfile:
        csv_reader = csv.reader(csvfile, delimiter='\t')
        print(f"Parsed file: {FILENAME}")
        line_count = 0
        user_count = 0
    
        user_last_clicks = {}
        for row in csv_reader:
            # Ignoring header row
            if line_count == 0:
                print(f'Columns: {", ".join(row)}')
                line_count += 1
                # Ignoring data from other users
            elif USER == "all":
                line_count += 1
                user = row[2]
                article = row[3]
                game = row[4]
          
                if user_last_clicks.get('game', "") == game:
                    if user_last_clicks['article'] != article:
                        path.append(article)
                else:
                    if len(path) != 0:
                        paths.append(path)
                    
                    path = list()
                    path.append(article)
                user_last_clicks = {"article": article, "game": game}               
            elif row[2] == USER:
                line_count += 1
                user = row[2]
                article = row[3]
                game = row[4]
          
                if user_last_clicks.get('game', "") == game:
                    if user_last_clicks['article'] != article:
                        path.append(article)
                else:
                    if len(path) != 0:
                        paths.append(path)
                    
                    path = list()
                    path.append(article)
                user_last_clicks = {"article": article, "game": game}
            else:
                continue

    ##PATH FILTERING

    top_node_number=top_nodes
    flat_list=Counter([item for path in paths for item in path])
    #print(flat_list)
    sorted_nodes=[ x[0] for x in sorted( flat_list.items() , key=lambda x: x[1], reverse=True)]
    top_sorted_nodes=sorted_nodes[0:top_node_number]
    #print(top_sorted_nodes, end="\n\n")

    paths_reduced = list()
    for path in paths:
        runs = listrun(path, top_sorted_nodes)
        for run in runs:
            paths_reduced.append(run)
    #print(paths_reduced)

    ## Add paths to pathpy 
    p = pp.Paths()
    for path in paths_reduced:
        p.add_path(path)
    print(p)
                        
    mog = pp.MultiOrderModel(p, max_order=2)
    #print('Optimal order = ', mog.estimate_order())
    return (len(paths_reduced), mog.estimate_order())
Exemplo n.º 12
0
A broader overview of the research on higher-order models for complex systems is available the following preprint:

- R Lambiotte, M Rosvall, I Scholtes: **Understanding Complex Systems: From Networks to Optimal Higher-Order Models**, preprint, June 2018,  [arXiv 1806.05977](https://arxiv.org/abs/1806.05977)
""")

#%%
md("""
The data analysis and modelling framework outlined in these works builds on a generalisation of standard, first-order networks to $k$-dimensional De Bruijn graph models for paths in complex networks.

The class `HigherOrderNetwork` allows us to generate such higher-order network models of paths. In the documentation, we find that the constructor takes a parameter `paths`, i.e. the statistics of the observed paths that we want to model. With the parameter `k` we specify the order $k$ of the higher-order model that we want to fit. To understand this better, let us do this for our toy example.

<span style="color:red">**TODO:** Read the toy example from unit 1.2 from the file `data/toy_paths.ngram`, generate a **first-order** model instance `hon_1` and print a summary of the resulting instance.</span>
""")

#%% In [2]
toy_paths = pp.Paths()
toy_paths.add_path('a,c,d', frequency=10)
toy_paths.add_path('b,c,e', frequency=10)

hon_1 = pp.HigherOrderNetwork(toy_paths, k=1)
hon_1

#%%
md("""
This generates a first-order model of our paths, with five nodes $a,b,c,d$ and $e$, and four links $(a,c), (b,c), (c,d), (c,e)$. It is identicaly to the `Network` instance that we have previously created using `Network.from_paths`. Indeed, each `HigherOrderNetwork` instance is derived from the class `Network`, which means we can store edge and node attributes and visualise it by exactly the same methods.

<span style="color:red">**TODO:** Plot the `HigherOrderModel` instance `hon_1` and print the weight of all edges.</span>
""")

#%% In [3]
Exemplo n.º 13
0
def test_get_distance_matrix_empty():
    p = pp.Paths()
    shortest_paths_dict = pp.algorithms.shortest_paths.distance_matrix(p)
    assert len(shortest_paths_dict) == 0
Exemplo n.º 14
0
<span style="color:red">**TODO:** Use the `help` function to obtain a description of the class `Paths`.</span>
""")

#%% In [2]
help(pp.Paths)

#%%
md("""
In Visual Studio Code, the documentation of classes, methods, and properties is automatically shown as a tooltip as you type. If you use the browser-based `jupyter notebook` editor, you can bring up the documentation by pressing `Shift`+`Tab` as you type. You can try this with the `Paths` class. 

<span style="color:red">**TODO:** Create an empty `Paths` instance `toy_paths` by calling the constructor with no arguments.</span>
""")

#%% In [3]
toy_paths = pp.Paths()
print(toy_paths)

#%%
md("""
We now have an empty `Paths` instance `toy_paths` that we can use to add path statistics to generate a small toy example. We can add paths using the method `add_path`. As the first parameter, it accepts any iterable (list, string, etc.) of `string` variables (or objects that can be cast to `string)`, where each entry in the iterable is one step (i.e. node) on a path. The optional `frequency` parameter captures the number of times a specific path has been observed.

<span style="color:red">**TODO:** Add 10 observations of a path $a \rightarrow c \rightarrow e$ between three nodes $a$, $c$, and $e$ to the `toy_paths` instance.</span>
""")

#%% In [4]
toy_paths.add_path(('a', 'c', 'd'), frequency=10)

#%%
md("""
Each class in `pathpy` provides a properly formatted string representation, which can be shown by invoking `print` on an instance.
Exemplo n.º 15
0
def test_get_distance_matrix_empty():
    p = pp.Paths()
    shortest_paths_dict = p.getDistanceMatrix()
    assert len(shortest_paths_dict) == 0
Exemplo n.º 16
0
#convert pathset as lists
print('extracting path information')
original_path_set = S.sequence()
original_path_set = '+'.join(original_path_set)
original_path_set = original_path_set.split('|')
original_path_set.pop()
for i in range(len(original_path_set)):
    temp = original_path_set[i].split('+')
    temp.remove('')
    if i != 0:
        temp.remove('')
    original_path_set[i] = temp

#extract paths without redundant nodes
real_path_set = pathpy.Paths()
print('generating pathset without redundant nodes...')
break_and_add_path(original_path_set, real_path_set)

print('information of the pathset without redundant nodes:')
print(real_path_set)

#The high-order model is generated from the path set S and named as Model. The maximum order is preliminarily set as 5. If the final estimated optimal order is the same as the maximum order, the maximum order should be increased
max_order = 5
success = False
while not success:
    try:
        model = pathpy.MultiOrderModel(real_path_set, max_order=max_order)
        success = True
    except:
        max_order -= 1