Пример #1
0
def main():
    data_set = DataSets.get_followers() - 1
    n = data_set.max(axis=0).max() + 1

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        print(
            sess.run(
                tf.scatter_nd(data_set.values.tolist(),
                              data_set.shape[0] * [1.0], [n, n])))
Пример #2
0
def main():
    steps = 20

    data_set = DataSets.get_wiki_vote()
    data_set -= 1
    n_raw = data_set.max(axis=0).max() + 1

    beta = tf.constant(0.85, tf.float32, name="Beta")
    n = tf.constant(n_raw, tf.float32, name="NodeCounts")

    a = tf.Variable(tf.transpose(
        tf.scatter_nd(data_set.values.tolist(), data_set.shape[0] * [1.0],
                      [n_raw, n_raw])),
                    tf.float64,
                    name="AdjacencyMatrix")

    v = tf.Variable(tf.fill([n_raw, 1], tf.pow(n, -1)), name="PageRankVector")

    o_degree = tf.reduce_sum(a, 0)

    condition = tf.not_equal(o_degree, 0)

    transition = tf.transpose(
        tf.where(condition,
                 tf.transpose(beta * tf.div(a, o_degree) + (1 - beta) / n),
                 tf.fill([n_raw, n_raw], tf.pow(n, -1))))

    page_rank = tf.matmul(transition, v, a_is_sparse=True)

    run_iteration = tf.assign(v, page_rank)

    ranks = tf.transpose(tf.py_func(ranked, [-v], tf.int64))[0]
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)

        for step in range(steps):
            sess.run(run_iteration)

        print(sess.run(v))
        print(sess.run(ranks))
        np.savetxt('logs/test.csv', sess.run(ranks), fmt='%i')
        tf.summary.FileWriter('logs/.', sess.graph)
        pass
Пример #3
0
        return self.gain_list.idxmax(), self.data[self.gain_list.idxmax()]

    def gain(self, subdata, h_S):
        result = pd.Series(index=subdata.columns)
        for column in subdata.columns:
            a = self.sub_entropy(subdata[column])
            counts = subdata[column].value_counts()
            p = (counts / counts.sum())
            result[column] = (h_S - (p * a).sum())
        return result, subdata

    def entropy(self, subdata):
        counts = subdata[self.class_name].value_counts()
        p = (counts / counts.sum())
        return (p * np.log2(1 / p)).sum()

    def sub_entropy(self, subdata):
        result = pd.Series(index=subdata.unique())
        cross = pd.concat([subdata, self.data[self.class_name]], axis=1)
        for cat in subdata.unique():
            result[cat] = self.entropy(cross[subdata == cat])
        return result

    def __str__(self):
        return str(self.gain_list)


if __name__ == '__main__':
    data_pd = DataSets.get_weber_nominal()
    print(GainRanking(data_pd, data_pd.columns[-1]))
from numerical.data_science.res import DataSets as ds
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier


def learn_function(a, b, c, d, e):
    return bool(not (a and b) or not (c and d)) != bool(e)


if __name__ == '__main__':
    np_data = ds.generate_from_logic_method(learn_function).data
    clf = tree.DecisionTreeClassifier()

    X_train, X_test, y_train, y_test = train_test_split(np_data[:, :-1],
                                                        np_data[:, -1],
                                                        test_size=0.33,
                                                        random_state=42)
    clf = DecisionTreeClassifier()
    clf = clf.fit(X_train, y_train)
Пример #5
0
        return 'AGE_4'


def discretize_PRE4(value):
    if value <= 2.66:
        return 'PRE4_1'
    elif 2.66 < value <= 2.88:
        return 'PRE4_2'
    else:
        return 'PRE4_3'


def discretize_PRE5(value):
    if value <= 2.05:
        return 'PRE5_1'
    else:
        return 'PRE5_2'


if __name__ == '__main__':
    pd_data = DataSets.get_thoraric_surgery()

    pd_data['PRE4'] = pd_data['PRE4'].apply(discretize_PRE4,
                                            1).astype('category')
    pd_data['PRE5'] = pd_data['PRE5'].apply(discretize_PRE5,
                                            1).astype('category')
    pd_data['AGE'] = pd_data['AGE'].apply(discretize_AGE, 1).astype('category')

    print(pd_data)
    pd_data.to_csv('ThoraricSurgery_discrete.csv', index=False)
Пример #6
0
from numerical.data_science.res import DataSets
from numerical.data_science import GainRankingContinous
from numerical.data_science import ID3


class J48(ID3):
    def __init__(self, training_set, class_name):
        ID3.__init__(self,
                     training_set,
                     class_name,
                     ranking=GainRankingContinous)


if __name__ == '__main__':
    data_pd_2 = DataSets.get_weather()
    j48_tennis = J48(data_pd_2, data_pd_2.columns[-1])
    print(j48_tennis)
        cuts = []
        for current in new_data[self.class_name]:
            if temp != current:
                cuts.append((new_data[subdata.name].iloc[i] +
                             new_data[subdata.name].iloc[i - 1]) / 2)
                temp = current
            i += 1
        alt = pd.DataFrame()
        for cut in cuts:
            alt = pd.concat([
                alt,
                subdata.apply(GainRankingContinous.discretize_split,
                              1,
                              args=(cut, )).astype('category').rename(cut)
            ],
                            axis=1)
        return alt[self.gain(alt, self.h_S)[0].idxmax()]

    @staticmethod
    def discretize_split(value, point):
        if value < point:
            return ' < ' + str(point)
        else:
            return '>= ' + str(point)


if __name__ == '__main__':
    data_pd_2 = DataSets.get_weather_semi_nominal().ix[:, 1:]

    print(GainRankingContinous(data_pd_2, data_pd_2.columns[-1]))
Пример #8
0
            elif d.shape[1] == 1:
                tree.add_child(v1, " ".join(d[self.class_name].unique()))

            elif d.shape[0] == 0:
                tree.add_child(v1, None)

            else:
                tree.add_child(v1, self.generate_tree(d))

        return tree

    def __str__(self):
        return str(self.tree)


if __name__ == '__main__':
    data_pd = DataSets.get_weber_nominal()
    id3_tennis = ID3(data_pd, data_pd.columns[-1])
    print(id3_tennis)

    '''
    pd_careval = DataSets.get_car_eval()
    id3_careval = ID3(pd_careval, pd_careval.columns[-1])
    print(id3_careval)
    '''

    pd_credit = DataSets.get_credit().ix[:, 1:]
    id3_credit = ID3(pd_credit, pd_credit.columns[0])
    print(id3_credit)