class PageRank(object):
    def __init__(self, jump_prob=0.15, e=math.exp(-8)):

        self.db = Datasource()
        self.jump_prob = jump_prob
        self.e = e

        self.doc_ids = sorted([int(v[0]) for v in self.db.get_all_link_docs()])
        self.id2idx = {v: idx for idx, v in enumerate(self.doc_ids)}

    def __call__(self):
        forward_link = self._linkdb_to_dict(self.db.get_forward_link())
        doc_size = len(self.doc_ids)

        link_matrix = np.ones((doc_size, doc_size),
                              dtype=np.float32) * (self.jump_prob / doc_size)

        scores = np.ones(doc_size, dtype=np.float32) / doc_size

        for i in range(doc_size):
            doc_id = self.doc_ids[i]
            if doc_id in forward_link.keys():
                links = [self.id2idx[x] for x in forward_link[doc_id]]
                for link in links:
                    link_matrix[i, link] += (1 - self.jump_prob) / len(links)
            else:
                link_matrix[i, :] += (1 - self.jump_prob) / doc_size
        link_matrix = np.transpose(link_matrix)
        scores = scores[:, np.newaxis]
        delta = 1
        cnt = 0
        while delta >= self.e:
            prev = scores
            scores = np.dot(link_matrix, scores)
            delta = np.sum(np.abs(prev - scores))
            cnt += 1
        print(f"\tPage rank iteration: {cnt}")
        return list(zip(self.doc_ids, scores.tolist()))

    def _linkdb_to_dict(self, data):
        return {
            k: [int(tmp) for tmp in v.split(',')] if v is not None else []
            for k, v in data
        }