def prepare_rank_feature(transactions, ds):
        g = session_transition_graph(transactions)
        page_ranks = nx.pagerank(g)
        print(len(page_ranks))

        ds.loc[:, PAGE_RANK_FEATURE] = pd.Series(np.zeros(len(ds)), index=ds.index)
        item_idx = ds.columns.get_loc('item_id') + 1
        for t in ds.itertuples():
            item_row = items[items.item_id == t[item_idx]]
            if len(item_row) == 1:
                item_url = item_row.url.iloc[0]
                if item_url in page_ranks:
                    # print("%s -> %.3f" % (item_url, page_ranks[item_url]))
                    ds[t[0], PAGE_RANK_FEATURE] = page_ranks[item_url]

user_sessions = log.groupby('user_id')

user_session_lengths = reduce(lambda l, kv: l + [len(kv[1])], user_sessions.groups.items(), list())

user_session_lengths_distribution = Counter(user_session_lengths)

user_df = pd.DataFrame(list(user_session_lengths_distribution.items()), columns=['session_length', 'user_cnt'])

"""
Plot distribution of counts for session lengths
"""
# plt.hist(user_df.session_length, list(range(1, 200)), weights=user_df.user_cnt); plt.show()

G = session_transition_graph(log)
assert(G.number_of_nodes() == 14457)
assert(G.number_of_edges() == 27315)

"""
Plot graph of user sessions parcours
"""
# pos = nx.spring_layout(G); nx.draw_networkx(G, pos, with_labels=False, node_size=1); plt.show()

print("degree_assortativity_coefficient %2.2f" % nx.degree_assortativity_coefficient(G))
print("degree_pearson_correlation_coefficient %2.2f" % nx.degree_pearson_correlation_coefficient(G))

assert(not nx.is_connected(G))
assert(nx.number_connected_components(G) == 171)

counter = Counter([c.number_of_edges() for c in nx.connected_component_subgraphs(G)])