Пример #1
0
def ncp_run(N1, N2, N3, gR, dR, time):
    # ncp test
    X = synthetic_data_cp([N1, N2, N3], gR, 0)
    data_provider = Provider()
    data_provider.full_tensor = lambda: X
    env = Environment(data_provider, summary_path='/tmp/ncp_' + str(N1))
    ncp = NCP_BCU(env)
    args = NCP_BCU.NCP_Args(rank=dR, validation_internal=200)
    ncp.build_model(args)
    print('\n\nNCP with %dx%dx%d, gR=%d, dR=%d, time=%d' % (N1, N2, N3, gR, dR, time))
    loss_hist = ncp.train(6000)
    scale = str(N1) + '_' + str(gR) + '_' + str(dR)
    out_path = '/root/tensorD_f/data_out_tmp/python_out/ncp_' + scale + '_' + str(time) + '.txt'
    with open(out_path, 'w') as out:
        for loss in loss_hist:
            out.write('%.6f\n' % loss)
Пример #2
0
def cp_run(N1, N2, N3, gR, dR, time):
    # cp test
    X = synthetic_data_cp([N1, N2, N3], gR, 0)
    data_provider = Provider()
    data_provider.full_tensor = lambda: X
    env = Environment(data_provider, summary_path='/tmp/cp_' + str(N1))
    cp = CP_ALS(env)
    args = CP_ALS.CP_Args(rank=dR, validation_internal=50, tol=1.0e-4)
    cp.build_model(args)
    print('CP with %dx%dx%d, gR=%d, dR=%d, time=%d' % (N1, N2, N3, gR, dR, time))
    hist = cp.train(600)
    scale = str(N1) + '_' + str(gR) + '_' + str(dR)
    out_path = '/root/tensorD_f/data_out_tmp/python_out/cp_' + scale + '_' + str(time) + '.txt'
    with open(out_path, 'w') as out:
        for iter in hist:
            loss = iter[0]
            rel_res = iter[1]
            out.write('%.10f, %.10f\n' % (loss, rel_res))
Пример #3
0
def tucker_run(N1, N2, N3, gR, dR, time):
    # tucker
    X = synthetic_data_tucker([N1, N2, N3], [gR, gR, gR])
    data_provider = Provider()
    data_provider.full_tensor = lambda: X
    env = Environment(data_provider, summary_path='/tmp/tucker_' + str(N1))
    hooi = HOOI(env)
    args = HOOI.HOOI_Args(ranks=[dR, dR, dR], validation_internal=200)
    hooi.build_model(args)
    print('\n\nTucker with %dx%dx%d, gR=%d, dR=%d, time=%d' %
          (N1, N2, N3, gR, dR, time))
    loss_hist = hooi.train(6000)
    scale = str(N1) + '_' + str(gR) + '_' + str(dR)
    out_path = '/root/tensorD_f/data_out_tmp/python_out/tucker_' + scale + '_' + str(
        time) + '.txt'
    with open(out_path, 'w') as out:
        for loss in loss_hist:
            out.write('%.6f\n' % loss)
Пример #4
0
def ntucker_run(N1, N2, N3, gR, dR, time):
    # ntucker
    X = synthetic_data_tucker([N1, N2, N3], [gR, gR, gR], 0)
    data_provider = Provider()
    data_provider.full_tensor = lambda: X
    env = Environment(data_provider, summary_path='/tmp/ntucker_' + str(N1))
    ntucker = NTUCKER_BCU(env)
    args = NTUCKER_BCU.NTUCKER_Args(ranks=[dR, dR, dR],
                                    validation_internal=500,
                                    tol=1.0e-4)
    ntucker.build_model(args)
    print('\n\nNTucker with %dx%dx%d, gR=%d, dR=%d, time=%d' %
          (N1, N2, N3, gR, dR, time))
    loss_hist = ntucker.train(10000)
    scale = str(N1) + '_' + str(gR) + '_' + str(dR)
    out_path = '/root/tensorD_f/data_out_tmp/python_out/ntucker_' + scale + '_' + str(
        time) + '.txt'
    with open(out_path, 'w') as out:
        for loss in loss_hist:
            out.write('%.6f\n' % loss)
Пример #5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/10/4 PM8:41
# @Author  : Shiloh Leung
# @Site    :
# @File    : ncp_demo.py
# @Software: PyCharm Community Edition

from tensorD.factorization.env import Environment
from tensorD.dataproc.provider import Provider
from tensorD.factorization.ncp import NCP_BCU
from tensorD.demo.DataGenerator import *

if __name__ == '__main__':
    print('=========Train=========')
    X = synthetic_data_cp([30, 30, 30], 10)
    data_provider = Provider()
    data_provider.full_tensor = lambda: X
    env = Environment(data_provider, summary_path='/tmp/ncp_demo_' + '30')
    ncp = NCP_BCU(env)
    args = NCP_BCU.NCP_Args(rank=10, validation_internal=1)
    ncp.build_model(args)
    ncp.train(100)
    factor_matrices = ncp.factors
    lambdas = ncp.lambdas
    print('Training ends.\n\n\n')
Пример #6
0
# @Time    : 2018/1/17 PM4:30
# @Author  : Shiloh Leung
# @Site    :
# @File    : ml_ncp.py
# @Software: PyCharm Community Edition

from tensorD.dataproc.reader import TensorReader
import tensorflow as tf
from tensorD.factorization.env import Environment
from tensorD.dataproc.provider import Provider
from tensorD.factorization.ncp import NCP_BCU
from tensorD.demo.DataGenerator import *

if __name__ == '__main__':
    full_shape = [943, 1682, 31]
    base = TensorReader('/root/tensorD_f/data_out_tmp/u1.base.csv')
    base.read(full_shape=full_shape)
    with tf.Session() as sess:
        rating_tensor = sess.run(base.full_data)
    data_provider = Provider()
    data_provider.full_tensor = lambda: rating_tensor
    env = Environment(data_provider, summary_path='/tmp/ncp_ml')
    ncp = NCP_BCU(env)
    args = NCP_BCU.NCP_Args(rank=20, validation_internal=1)
    ncp.build_model(args)
    loss_hist = ncp.train(100)
    out_path = '/root/tensorD_f/data_out_tmp/python_out/ncp_ml_20.txt'
    with open(out_path, 'w') as out:
        for loss in loss_hist:
            out.write('%.6f\n' % loss)
Пример #7
0
def main():
    w2v = gensim.models.Word2Vec.load(
        '../data/skip_w2v_model_stemmed')  # pre-trained word embedding
    idf = pickle.load(
        open('../data/my_idf',
             'rb'))  # pre-trained idf value of all words in the w2v dictionary
    records = pickle.load(open("../data/records_final.pkl", 'rb'))
    print(len(records))
    #获取需要推荐的问题
    experiments = util.get_class_experiments()
    print(len(experiments))

    csvfile_path = os.path.join(args.output_path,
                                "topclass_expand11-10.csv")  #输出结果
    csvfile = open(csvfile_path, 'w', newline="")
    writer = csv.writer(csvfile)
    writer.writerow(
        ["question_title", "top5", "ground_truth_intersection", "true_apis"])
    #所有问题的api的集合,看这个集合里面是否有答案存在

    #统计能进行推荐的问题个数,推荐出来的问题的个数
    recommend_num = 0
    recommend_success_num = 0
    processnum = 0
    #统计指标
    mrr = 0.0
    map = 0.0
    precision = 0
    recall = 0
    ndcg = 0.0

    rec_num = args.rec_num
    start = time.clock()
    for experiment in experiments:
        experiment_method_annotation = experiment.method_annotation

        # print(experiment_method_annotation)
        experiment_now_method_flat = experiment.now_method_flat
        experiment_true_api = experiment.true_api
        experiment_now_api = experiment.now_api
        # 求差,取出交集
        experiment_true_api = set(experiment_true_api) - set(
            experiment_now_api)

        query = experiment_method_annotation
        query_words = WordPunctTokenizer().tokenize(query.lower())
        query_words = [
            SnowballStemmer('english').stem(word) for word in query_words
        ]
        query_matrix = similarity.init_doc_matrix(query_words, w2v)
        query_idf_vector = similarity.init_doc_idf_vector(query_words, idf)

        #获取相似的TOP-N问题
        top_questions = similarity.get_topk_questions(query_words,
                                                      query_matrix,
                                                      query_idf_vector,
                                                      records, 11, 0.0)
        #获取得到问题的长度
        # print(top_questions)
        similar_questions_length = len(top_questions)
        # print("similar_questions_length:",similar_questions_length)
        #查看现有问题是否在相似问题中,如果不在则加入,否则直接根据相似问题构建张量
        flag = False

        similar_records_list = list(top_questions.keys())
        for record in similar_records_list:
            if (record.title_words == query_words):
                flag = True
        processnum += 1
        #现有问题在相似问题里面
        record_method_annotation_words = list()
        record_method_flat = list()
        record_api = list()
        for record in similar_records_list:
            if record.title_words not in record_method_annotation_words:
                record_method_annotation_words.append(record.title_words)
            if record.method_block_flat not in record_method_flat:
                record_method_flat.append(record.method_block_flat)
            for api in record.method_api_sequence:
                if api not in record_api:
                    record_api.append(api)
        #加入编程环境中出现的api
        for now_api in experiment_now_api:
            if now_api not in record_api:
                record_api.append(now_api)

        api_rec_all = []

        if flag == True:
            recommend_num += 1
            #构建张量

            print(len(record_method_annotation_words), len(record_method_flat),
                  len(record_api))
            record_method_annotation_words_dict = dict(
                zip(range(len(record_method_annotation_words)),
                    record_method_annotation_words))
            record_method_flat_dict = dict(
                zip(range(len(record_method_flat)), record_method_flat))
            record_api_dict = dict(zip(range(len(record_api)), record_api))
            tensor = np.zeros((len(record_method_annotation_words),
                               len(record_method_flat), len(record_api)),
                              dtype=int)
            for record in similar_records_list:
                for concrete_api in record.method_api_sequence:
                    tensor[list(record_method_annotation_words_dict.keys(
                    ))[list(record_method_annotation_words_dict.values()).
                       index(record.title_words)],
                           list(record_method_flat_dict.keys()
                                )[list(record_method_flat_dict.values()).
                                  index(record.method_block_flat)],
                           list(record_api_dict.keys(
                           ))[list(record_api_dict.values()).index(concrete_api
                                                                   )]] = 1
            for api in experiment_now_api:
                if api in record_api_dict.values():
                    tensor[list(record_method_annotation_words_dict.keys(
                    ))[list(record_method_annotation_words_dict.values()).
                       index(query_words)], :,
                           list(record_api_dict.keys(
                           ))[list(record_api_dict.values()).index(api)]] = 1
            #处理不是张量的情况
            one = query_words
            if len(record_api) == 0:
                continue
            if (len(record_method_annotation_words) == 1
                    or len(record_method_flat) == 1 or len(record_api) == 1):
                if (len(record_method_annotation_words) == 1
                        and len(record_method_flat) == 1 or
                        len(record_method_flat) == 1 and len(record_api) == 1
                        or len(record_api) == 1
                        and len(record_method_annotation_words) == 1):
                    api_rec_all = record_api
                    for m in set(experiment_now_api):
                        if m in api_rec_all:
                            api_rec_all.remove(m)
                elif (len(record_api) == 1):
                    api_rec_all = record_api
                    for m in set(experiment_now_api):
                        if m in api_rec_all:
                            api_rec_all.remove(m)
                else:
                    if (len(record_method_annotation_words) == 1):
                        matrix = tl.unfold(tensor, mode=1)
                        nmf = nimfa.Nmf(matrix,
                                        max_iter=200,
                                        rank=round(min(matrix.shape) / 2),
                                        update='euclidean',
                                        objective='fro')
                        nmf_fit = nmf()
                        W = nmf_fit.basis()
                        H = nmf_fit.coef()
                        matrix = np.dot(W, H)
                        two = list(
                            similarity.get_topk_method_flat(
                                experiment_now_method_flat,
                                list(record_method_flat_dict.values()), 1, 1,
                                -1, 1).values())[0]
                        rec_combine_api_key = np.argsort(
                            -matrix[list(record_method_flat_dict.keys()
                                         )[list(record_method_flat_dict.values(
                                         )).index(two)], :]).tolist()[0]
                        api_rec_all = [
                            record_api_dict[i] for i in rec_combine_api_key
                        ]
                        for m in set(experiment_now_api):
                            if m in api_rec_all:
                                api_rec_all.remove(m)
                    elif (len(record_method_flat) == 1):
                        matrix = tl.unfold(tensor, mode=0)
                        nmf = nimfa.Nmf(matrix,
                                        max_iter=200,
                                        rank=round(min(matrix.shape) / 2),
                                        update='euclidean',
                                        objective='fro')
                        nmf_fit = nmf()
                        W = nmf_fit.basis()
                        H = nmf_fit.coef()
                        matrix = np.dot(W, H)
                        rec_combine_api_key = np.argsort(-matrix[
                            list(record_method_annotation_words_dict.keys(
                            ))[list(record_method_annotation_words_dict.values(
                            )).index(one)], :]).tolist()[0]
                        api_rec_all = [
                            record_api_dict[i] for i in rec_combine_api_key
                        ]
                        for m in set(experiment_now_api):
                            if m in api_rec_all:
                                api_rec_all.remove(m)

            else:
                #张量分解
                tf.reset_default_graph()
                tensor = tl.tensor(tensor).astype(np.float32)
                data_provider = Provider()
                data_provider.full_tensor = lambda: tensor
                env = Environment(data_provider, summary_path='/tensor/ncp_ml')
                ncp = NCP_BCU(env)
                arg = NCP_BCU.NCP_Args(rank=round(
                    min(len(record_method_annotation_words),
                        len(record_method_flat), len(record_api)) / 2),
                                       validation_internal=1)
                ncp.build_model(arg)
                loss_hist = ncp.train(100)
                factor_matrices = ncp.factors
                full_tensor = tl.kruskal_to_tensor(factor_matrices)

                two = list(
                    similarity.get_topk_method_flat(
                        experiment_now_method_flat,
                        list(record_method_flat_dict.values()), 1, 1, -1,
                        1).values())[0]

                rec_combine_api_key = np.argsort(
                    -full_tensor[list(record_method_annotation_words_dict.keys(
                    ))[list(record_method_annotation_words_dict.values()).
                       index(one)],
                                 list(record_method_flat_dict.keys()
                                      )[list(record_method_flat_dict.values()).
                                        index(two)], :]).tolist()
                # 推荐的API列表,去除情境中已经含有的api
                api_rec_all = [record_api_dict[i] for i in rec_combine_api_key]
                for m in set(experiment_now_api):
                    if m in api_rec_all:
                        api_rec_all.remove(m)

        #现有问题不在相似问题里面
        else:
            similar_questions_length += 1

            #去除找不到相似问题的问题
            if similar_questions_length == 1:
                continue
            recommend_num += 1
            #添加新来的query
            record_method_annotation_words.append(query_words)
            print(len(record_method_annotation_words), len(record_method_flat),
                  len(record_api))
            #构建张量
            record_method_annotation_words_dict = dict(
                zip(range(len(record_method_annotation_words)),
                    record_method_annotation_words))
            record_method_flat_dict = dict(
                zip(range(len(record_method_flat)), record_method_flat))
            record_api_dict = dict(zip(range(len(record_api)), record_api))
            tensor = np.zeros((len(record_method_annotation_words),
                               len(record_method_flat), len(record_api)),
                              dtype=int)
            for record in similar_records_list:
                for concrete_api in record.method_api_sequence:
                    tensor[list(record_method_annotation_words_dict.keys(
                    ))[list(record_method_annotation_words_dict.values()).
                       index(record.title_words)],
                           list(record_method_flat_dict.keys()
                                )[list(record_method_flat_dict.values()).
                                  index(record.method_block_flat)],
                           list(record_api_dict.keys(
                           ))[list(record_api_dict.values()).index(concrete_api
                                                                   )]] = 1

            for api in experiment_now_api:
                if api in record_api_dict.values():
                    tensor[list(record_method_annotation_words_dict.keys(
                    ))[list(record_method_annotation_words_dict.values()).
                       index(query_words)], :,
                           list(record_api_dict.keys(
                           ))[list(record_api_dict.values()).index(api)]] = 1
            #处理不是张量分解
            one = query_words
            if len(record_api) == 0:
                continue
            if (len(record_method_annotation_words) == 1
                    or len(record_method_flat) == 1 or len(record_api) == 1):
                if (len(record_method_annotation_words) == 1
                        and len(record_method_flat) == 1 or
                        len(record_method_flat) == 1 and len(record_api) == 1
                        or len(record_api) == 1
                        and len(record_method_annotation_words) == 1):
                    api_rec_all = record_api
                    for m in set(experiment_now_api):
                        if m in api_rec_all:
                            api_rec_all.remove(m)
                elif (len(record_api) == 1):
                    api_rec_all = record_api
                    for m in set(experiment_now_api):
                        if m in api_rec_all:
                            api_rec_all.remove(m)
                else:
                    if (len(record_method_annotation_words) == 1):
                        matrix = tl.unfold(tensor, mode=1)
                        nmf = nimfa.Nmf(matrix,
                                        max_iter=200,
                                        rank=round(min(matrix.shape) / 2),
                                        update='euclidean',
                                        objective='fro')
                        nmf_fit = nmf()
                        W = nmf_fit.basis()
                        H = nmf_fit.coef()
                        matrix = np.dot(W, H)
                        two = list(
                            similarity.get_topk_method_flat(
                                experiment_now_method_flat,
                                list(record_method_flat_dict.values()), 1, 1,
                                -1, 1).values())[0]
                        rec_combine_api_key = np.argsort(
                            -matrix[list(record_method_flat_dict.keys()
                                         )[list(record_method_flat_dict.values(
                                         )).index(two)], :]).tolist()[0]
                        api_rec_all = [
                            record_api_dict[i] for i in rec_combine_api_key
                        ]
                        for m in set(experiment_now_api):
                            if m in api_rec_all:
                                api_rec_all.remove(m)
                    elif (len(record_method_flat) == 1):
                        matrix = tl.unfold(tensor, mode=0)
                        nmf = nimfa.Nmf(matrix,
                                        max_iter=200,
                                        rank=round(min(matrix.shape) / 2),
                                        update='euclidean',
                                        objective='fro')
                        nmf_fit = nmf()
                        W = nmf_fit.basis()
                        H = nmf_fit.coef()
                        matrix = np.dot(W, H)
                        rec_combine_api_key = np.argsort(-matrix[
                            list(record_method_annotation_words_dict.keys(
                            ))[list(record_method_annotation_words_dict.values(
                            )).index(one)], :]).tolist()[0]
                        api_rec_all = [
                            record_api_dict[i] for i in rec_combine_api_key
                        ]
                        for m in set(experiment_now_api):
                            if m in api_rec_all:
                                api_rec_all.remove(m)

            else:
                # 张量分解
                tf.reset_default_graph()
                tensor = tl.tensor(tensor).astype(np.float32)
                data_provider = Provider()
                data_provider.full_tensor = lambda: tensor
                env = Environment(data_provider, summary_path='/tensor/ncp_ml')
                ncp = NCP_BCU(env)
                arg = NCP_BCU.NCP_Args(rank=round(
                    min(len(record_method_annotation_words),
                        len(record_method_flat), len(record_api)) / 2),
                                       validation_internal=1)
                ncp.build_model(arg)
                loss_hist = ncp.train(100)
                factor_matrices = ncp.factors
                full_tensor = tl.kruskal_to_tensor(factor_matrices)
                # one = query_words
                two = list(
                    similarity.get_topk_method_flat(
                        experiment_now_method_flat,
                        list(record_method_flat_dict.values()), 1, 1, -1,
                        1).values())[0]

                rec_combine_api_key = np.argsort(
                    -full_tensor[list(record_method_annotation_words_dict.keys(
                    ))[list(record_method_annotation_words_dict.values()).
                       index(one)],
                                 list(record_method_flat_dict.keys()
                                      )[list(record_method_flat_dict.values()).
                                        index(two)], :]).tolist()
                #推荐的API列表
                api_rec_all = [record_api_dict[i] for i in rec_combine_api_key]
                for m in set(experiment_now_api):
                    if m in api_rec_all:
                        api_rec_all.remove(m)
        #判断结果在相似的问题中有没有出现
        # print(experiment_true_api)
        # print('----------------------------------')
        experiment_true_api = [
            true_api.split('.')[-2] for true_api in experiment_true_api
        ]
        experiment_true_api = removelist(experiment_true_api)
        experiment_now_api = [
            true_api.split('.')[-2] for true_api in experiment_now_api
        ]
        experiment_now_api = removelist(experiment_now_api)
        #去除experiment_now_api
        experiment_true_api = set(experiment_true_api) - set(
            experiment_now_api)
        record_api = [true_api.split('.')[-2] for true_api in record_api]
        record_api = removelist(record_api)
        api_rec_all = [true_api.split('.')[-2] for true_api in api_rec_all]
        api_rec_all = removelist(api_rec_all)
        for m in set(experiment_now_api):
            if m in api_rec_all:
                api_rec_all.remove(m)
        api_rec = api_rec_all[:rec_num]

        pos = -1
        tmp_map = 0.0
        hits = 0.0
        vector = list()
        for i, api in enumerate(api_rec_all[:rec_num]):
            if api in set(experiment_true_api) and pos == -1:
                pos = i + 1
            if api in set(experiment_true_api):
                vector.append(1)
                hits += 1
                tmp_map += hits / (i + 1)
            else:
                vector.append(0)

        tmp_map /= len(set(experiment_true_api))
        tmp_mrr = 0.0
        if pos != -1:
            tmp_mrr = 1.0 / pos
        map += tmp_map
        mrr += tmp_mrr
        ndcg += calculateNDCG.ndcg_at_k(vector[:rec_num], rec_num)
        ground_truth_intersection = set(api_rec).intersection(
            set(experiment_true_api))
        if (len(ground_truth_intersection) > 0):
            recommend_success_num += 1
        precision += len(ground_truth_intersection) / rec_num
        recall += len(ground_truth_intersection) / len(
            set(experiment_true_api))
        writer.writerow([
            experiment_method_annotation, api_rec, ground_truth_intersection,
            experiment_true_api
        ])

    writer.writerow(["recommend_num", "recommend_success_num"])
    writer.writerow([recommend_num, recommend_success_num])
    writer.writerow([
        "mrr/recommend_num", "recommend_num", "map/recommend_num",
        "success_rate@N", "precision@N/recommend_num",
        "recall@N/recommend_num", "ndcg/recommend_num"
    ]),
    writer.writerow([
        mrr / recommend_num, recommend_num, map / recommend_num,
        recommend_success_num / recommend_num, precision / recommend_num,
        recall / recommend_num, ndcg / recommend_num
    ])
    csvfile.close()
    end = time.clock()

    print('Running time: %s Seconds' % (end - start))

    logging.info("Finish")
Пример #8
0
from tensorD.factorization.env import Environment
from tensorD.factorization.pitf_numpy import PITF_np
from tensorD.factorization.tucker import *
from tensorD.dataproc.provider import Provider
#from tensorD.dataproc.reader import TensorReader
import tensorflow as tf
import numpy as np

if __name__ == '__main__':
    data_provider = Provider()
    data_provider.full_tensor = lambda: tf.constant(
        np.random.rand(50, 50, 8) * 10, dtype=tf.float32)
    pitf_np_env = Environment(data_provider, summary_path='/tmp/tensord')
    pitf_np = PITF_np(pitf_np_env)
    sess_t = pitf_np_env.sess
    init_op = tf.global_variables_initializer()
    sess_t.run(init_op)
    tensor = pitf_np_env.full_data().eval(session=sess_t)
    args = PITF_np.PITF_np_Args(rank=5,
                                delt=0.8,
                                tao=12,
                                sample_num=100,
                                validation_internal=1,
                                verbose=False,
                                steps=500)
    y, X_t, Y_t, Z_t, Ef_t, If_t, Rf_t = pitf_np.exact_recovery(args, tensor)
    y = tf.convert_to_tensor(y)
    X = tf.convert_to_tensor(X_t)
    Y = tf.convert_to_tensor(Y_t)
    Z = tf.convert_to_tensor(Z_t)
    Ef = tf.convert_to_tensor(Ef_t)
Пример #9
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/10/3 PM4:16
# @Author  : Shiloh Leung
# @Site    : 
# @File    : ntucker_demo.py
# @Software: PyCharm Community Edition

import tensorflow as tf
from tensorD.factorization.env import Environment
from tensorD.dataproc.provider import Provider
from tensorD.factorization.ntucker import NTUCKER_BCU
from tensorD.demo.DataGenerator import *

if __name__ == '__main__':
    print('=========Train=========')
    X = synthetic_data_tucker([20, 20, 20], [10, 10, 10])
    data_provider = Provider()
    data_provider.full_tensor = lambda: X
    env = Environment(data_provider, summary_path='/tmp/ntucker_demo')
    ntucker = NTUCKER_BCU(env)
    args = NTUCKER_BCU.NTUCKER_Args(ranks=[10, 10, 10], validation_internal=10)
    ntucker.build_model(args)
    ntucker.train(2000)
    factor_matrices = ntucker.factors
    core_tensor = ntucker.core
    print('Train ends.\n\n\n')
Пример #10
0

from tensorD.dataproc.reader import TensorReader
from tensorD.factorization.env import Environment
from tensorD.dataproc.provider import Provider
from tensorD.factorization.tucker import HOOI
from tensorD.factorization.tucker import HOSVD
from tensorD.demo.DataGenerator import *

if __name__ == '__main__':
    full_shape = [943, 1682, 31]
    base = TensorReader('/root/tensorD_f/data_out_tmp/u1.base.csv')
    base.read(full_shape=full_shape)
    with tf.Session() as sess:
        rating_tensor = sess.run(base.full_data)
    data_provider = Provider()
    data_provider.full_tensor = lambda: rating_tensor
    env = Environment(data_provider, summary_path='/tmp/tucker_ml')
    hooi = HOOI(env)
    args = HOOI.HOOI_Args(ranks=[20, 20, 20], validation_internal=1)
    hooi.build_model(args)
    hist = hooi.train(100)
    out_path = '/root/tensorD_f/data_out_tmp/python_out/hooi_ml_20.txt'
    with open(out_path, 'w') as out:
        for iter in hist:
            loss = iter[0]
            rel_res = iter[1]
            out.write('%.10f, %.10f\n' % (loss, rel_res))


Пример #11
0
# @File    : tucker_test.py
# @Software: PyCharm Community Edition
import numpy as np
import tensorflow as tf
from numpy.random import rand
from tensorD.factorization.env import Environment
from tensorD.dataproc.provider import Provider
from tensorD.factorization.tucker import HOSVD
from tensorD.factorization.tucker import HOOI

if __name__ == '__main__':
    data_provider = Provider()
    X = np.arange(60).reshape(3, 4, 5)
    data_provider.full_tensor = lambda: X

    print('====HOSVD test====')
    hosvd_env = Environment(data_provider, summary_path='/tmp/tensord')
    hosvd = HOSVD(hosvd_env)
    args = HOSVD.HOSVD_Args(ranks=[2, 2, 2])
    hosvd.build_model(args)
    hosvd.train()
    print(hosvd.full - X)

    print('\n\n\n====HOOI test====')
    hooi_env = Environment(data_provider, summary_path='/tmp/tensord')
    hooi = HOOI(hooi_env)
    args = hooi.HOOI_Args(ranks=[2, 2, 2], validation_internal=5)
    hooi.build_model(args)
    hooi.train(100)
    print(hooi.full - X)