def getRawLog(N, user_amount, item_amount, features):
    'Get raw log'
    if N <= 0:
        return []
    
    raw_log = []
    all_user_dic = {}
    all_item_dic = {}
    
    for i in range(user_amount):
        user = User('user ' + str(i))
        user.createProfile(features)
        all_user_dic['user ' + str(i)] = user
    
    for j in range(item_amount):
        item = Item('item ' + str(j))
        item.createContentVector(features)
        all_item_dic['item ' + str(j)] = item
    
    moment = time.time()
    gap = 5
    j = 0
    for uid in all_user_dic.keys():
        for i in range(N):
            random_item = 'item ' + str((int)(random.random() * item_amount))
            random_time = moment + j * gap
            raw_log.append((uid, random_item, random_time))
            j += 1
    
    shuffle(raw_log)
    return raw_log, all_user_dic, all_item_dic
    def spliDtata(self):
        shuffle(self.train_data)

        # 切分出测试样本和训练样本
        split_index = int(np.ceil(len(self.train_data) * self.splitline))
        self.test_data = self.train_data[split_index:]
        self.train_data = self.train_data[:split_index]
示例#3
0
    def test_shuffle_of_array_of_objects(self):
        # Test that permuting an array of objects will not cause
        # a segfault on garbage collection.
        # See gh-7719
        random.seed(1234)
        a = np.array([np.arange(1), np.arange(4)])

        for _ in range(1000):
            random.shuffle(a)

        # Force Garbage Collection - should not segfault.
        import gc
        gc.collect()
    def test_shuffle_of_array_of_different_length_strings(self):
        # Test that permuting an array of different length strings
        # will not cause a segfault on garbage collection
        # Tests gh-7710
        random.seed(1234)

        a = np.array(['a', 'a' * 1000])

        for _ in range(100):
            random.shuffle(a)

        # Force Garbage Collection - should not segfault.
        import gc
        gc.collect()
    def generateLog(self, N):
        'Generate raw log'

        self.raw_log = []

        moment = time.time()
        gap = 2
        j = 0
        for u in range(self.user_amount):
            for i in range(N):
                random_item = 'item ' + str((int)(random.random() * self.item_amount))
                random_tag = 'tag ' + str((int)(random.random() * self.tag_amount))
                random_moment = moment + gap * j
                self.raw_log.append(('user ' + str(u), random_item, random_tag, random_moment))
        
        shuffle(self.raw_log)
示例#6
0
def getRawLog(N, user_amount, item_amount, tag_amount):
    'Get raw log whose each record is (u, i, t)'
    if N <= 0:
        return []

    raw_log = []

    for uid in range(user_amount):
        for i in range(N):
            random_u = 'user ' + str(uid)
            random_i = 'item ' + str((int)(random.random() * item_amount))
            random_t = 'tag ' + str((int)(random.random() * tag_amount))
            raw_log.append((random_u, random_i, random_t))

    shuffle(raw_log)
    return raw_log
示例#7
0
def getRawLog(N, user_amount, item_amount, tag_amount):
    'Get raw log whose each record is (u, i, t)'
    if N <= 0:
        return []
    
    raw_log = []
    
    for uid in range(user_amount):
        for i in range(N):
            random_u = 'user ' + str(uid)
            random_i = 'item ' + str((int)(random.random() * item_amount))
            random_t = 'tag ' + str((int)(random.random() * tag_amount))
            raw_log.append((random_u, random_i, random_t))
    
    shuffle(raw_log)
    return raw_log
示例#8
0
def autoGetRawLog(N, user_amount, item_amount):
    'Auto generate raw log'
    if N <= 0:
        return []

    raw_log = []
    moment = time.time()
    gap = 5
    j = 0
    for uid in range(user_amount):
        for i in range(N):
            random_item = 'item ' + str((int)(random.random() * item_amount))
            random_time = moment + j * gap
            raw_log.append(('user ' + str(uid), random_item, random_time))
            j += 1
    shuffle(raw_log)
    return raw_log
def autoGetRawLog(N, user_amount, item_amount):
    'Auto generate raw log'
    if N <= 0:
        return []
    
    raw_log = []
    moment = time.time()
    gap = 5
    j = 0
    for uid in range(user_amount):
        for i in range(N):
            random_item = 'item ' + str((int)(random.random() * item_amount))
            random_time = moment + j * gap
            raw_log.append(('user ' + str(uid), random_item, random_time))
            j += 1    
    shuffle(raw_log)
    return raw_log
示例#10
0
    def __init__(
            self,
            data_path=r'/home/xinye/workingdirectory/PyCodeFragment/data/resized_animal',
            splitline=0.9):
        """这个版本直将图片载入内存,对于4g图片,考虑一次只缓存图片的全路径,feed之前再读入图片"""
        # 各个图片文件夹的名字
        animal_path_name = os.listdir(data_path)

        # 生成每个类别的编码
        labels = np.zeros((len(animal_path_name), len(animal_path_name)))
        for i in range(len(labels)):
            labels[i][i] = 1
            print(animal_path_name[i], '==>', labels[i])

        # 图片文件夹全路径
        animal_paths = [
            os.path.join(data_path, filename) for filename in animal_path_name
        ]

        self.train_data = list()
        for i in range(len(animal_paths)):
            image_paths = self.getImagePaths(animal_paths[i])
            print('获得%s' % animal_path_name[i])
            for filename in image_paths:
                pic = Image.open(filename)
                self.train_data.append([np.asarray(pic).flatten(), labels[i]])
                if len(self.train_data[-1][0]) != height * width * channel:
                    print('图片大小异常(检查图片格式)%d--%s' %
                          (len(self.train_data[-1][0]), filename))
                pic.close()
        print('......\n图片提取完成\n')

        # 转换成numpy并打乱顺序
        self.train_data = np.array(self.train_data)
        print(self.train_data.shape)
        shuffle(self.train_data)
        shuffle(self.train_data)

        # 切分出测试样本和训练样本
        split_index = int(np.ceil(len(self.train_data) * splitline))
        self.test_data = self.train_data[split_index:]
        self.train_data = self.train_data[:split_index]

        # 取数据标记
        self.flag = 0
    def generateLog(self, N):
        'Generate raw log'

        self.raw_log = []

        moment = time.time()
        gap = 2
        j = 0
        for u in range(self.user_amount):
            for i in range(N):
                random_item = 'item ' + str(
                    (int)(random.random() * self.item_amount))
                random_tag = 'tag ' + str(
                    (int)(random.random() * self.tag_amount))
                random_moment = moment + gap * j
                self.raw_log.append(
                    ('user ' + str(u), random_item, random_tag, random_moment))

        shuffle(self.raw_log)
示例#12
0
def getRawLog(N, user_amount, item_amount):
    'Get raw log whose each record is (u, i, t)'
    if N <= 0:
        return []

    raw_log = []

    moment = time.time()
    gap = 5
    j = 0
    for u in range(user_amount):
        for i in range(N):
            random_i = 'item ' + str((int)(random.random() * item_amount))
            random_t = moment + gap * j
            raw_log.append(('user ' + str(u), random_i, random_t))
            j += 1

    shuffle(raw_log)
    return raw_log
示例#13
0
    def get_text_pairs(self):
        self.get_datasets()

        k_pairs = list(permutations(self.datasets.keys(), 2))

        print('\nCreating pairs: ')
        print('Progress: #', end='')

        for p in k_pairs:

            key = re.sub(r'\s[-]\sBíblia Completa.csv', '', str(p))
            key = re.sub(r'\s[-]\sNovo Testamento.csv', '', str(p))
            key = re.sub(r'\(', '', key)
            key = re.sub(r'\)', '', key)
            key = re.sub(r'[,]', ' -', key)
            key = re.sub(r'[\']', '', key)

            pair_text = []
            print('#', end='')
            self.datasets[p[0]]['Scripture'].align(
                self.datasets[p[1]]['Scripture'])

            for r_1, r_2 in zip(self.datasets[p[0]]['Scripture'],
                                self.datasets[p[1]]['Scripture']):

                try:

                    pair_text.append(' '.join(str(r_1).split()) + '\t' +
                                     ' '.join(str(r_2).split()) + '\n')

                except AttributeError:
                    print(AttributeError)

                    breakpoint()

            shuffle(pair_text)

            self.data_pairs[key] = pair_text

        return self.data_pairs
示例#14
0
def doArena(n: mcts2.INeuralNet, mcts: mcts2.MCTS, doTrain=True):
    # otherPlayer = Player("Marcel", lambda x: HumanPlayer().play(x))
    # otherPlayer = Player("random", lambda x: RandomPlayer().play(x))
    otherPlayer = Player("neural OLD", lambda x: np.argmax(mcts2.getActionProbabilities(x, 0)))
    neuralPlayer = Player("neural", lambda x: np.argmax(mcts.getActionProbabilities(x, 0)))
    a = Arena(neuralPlayer, otherPlayer, moaraGame, moara.args, mcts)

    result = a.playGames(5, verbose=False)
    if doTrain:
        # train the network based on the arena games
        trainExamples = []
        for e in a.trainExamplesHistory:
            trainExamples.extend(e)
        shuffle(trainExamples)
        if trainExamples != []:
            n.train(trainExamples)

            # test against the previous
            # if i % 5 == 0:
            #     # self.PitAgainst('no36.neural.data-ITER-390')
            #     PitAgainst(moara.filename - 1)
            n.save_checkpoint(folder=moara.args.checkpoint, filename_no=moara.args.filename)
    def __init__(self, data_path=r'/home/xinye/workingdirectory/PyCodeFragment/data/resized_animal', splitline=0.9):
        """这个版本直将图片载入内存,对于4g图片,考虑一次只缓存图片的全路径,feed之前再读入图片"""
        # 各个图片文件夹的名字
        animal_path_name = os.listdir(data_path)

        # 生成每个类别的编码
        labels = np.zeros((len(animal_path_name), len(animal_path_name)))
        for i in range(len(labels)):
            labels[i][i] = 1
            print(animal_path_name[i], '==>', labels[i])

        # 图片文件夹全路径
        animal_paths = [os.path.join(data_path, filename) for filename in animal_path_name]

        self.train_data = list()
        for i in range(len(animal_paths)):
            image_paths = self.getImagePaths(animal_paths[i])
            print('获得%s' % animal_path_name[i])
            for filename in image_paths:
                pic = Image.open(filename)
                self.train_data.append([np.asarray(pic).flatten(), labels[i]])
                if len(self.train_data[-1][0]) != height * width * channel:
                    print('图片大小异常(检查图片格式)%d--%s' % (len(self.train_data[-1][0]), filename))
                pic.close()
        print('......\n图片提取完成\n')

        # 转换成numpy并打乱顺序
        self.train_data = np.array(self.train_data)
        print(self.train_data.shape)
        shuffle(self.train_data)
        shuffle(self.train_data)

        # 切分出测试样本和训练样本
        split_index = int(np.ceil(len(self.train_data) * splitline))
        self.test_data = self.train_data[split_index:]
        self.train_data = self.train_data[:split_index]

        # 取数据标记
        self.flag = 0
示例#16
0
def train_test_split(samples_list, train_test_ratio=0.5):
    # print(samples_list)
    test_set_samples_count = int(len(samples_list) * 0.5)
    # print(test_set_samples_count)
    uncontrolled_samples_list = samples_list[samples_list[:,
                                                          2] == 'uncontrolled']
    insitu_samples_list = samples_list[samples_list[:, 2] == 'insitu']
    controlled_samples_list = samples_list[samples_list[:, 2] == 'controlled']
    shuffle(uncontrolled_samples_list)
    shuffle(insitu_samples_list)
    shuffle(controlled_samples_list)
    rearranged_samples_list = []
    rearranged_samples_list.extend(uncontrolled_samples_list)
    rearranged_samples_list.extend(insitu_samples_list)
    rearranged_samples_list.extend(controlled_samples_list)
    test_set_samples_list = rearranged_samples_list[:test_set_samples_count]
    training_set_samples_list = rearranged_samples_list[
        test_set_samples_count:]
    return training_set_samples_list, test_set_samples_list
示例#17
0
def cards():
    global game
    global check
    feed = 0
    p1d = []
    p2d = []
    Bicicle = deck()
    Bicicle.shufffle()
    purple = Bicicle.getdeck()
    p1 = []
    for x in range((int(len(purple)/2))):
        p1.append(purple[0])
        del purple[0]
    print(p1)
    p2 = purple
    print(p2)
    print(len(p1))
    print(len(p2))
    game = 'lol'
    print('hi')
    def stack(player):
            global game
            if player == 1:
                for asd in range(len(p1d)):
                    p1.append(p1d[0])
                    del p1d[0] 
            if player == 2:
                for abc in range(len(p2d)):
                    p2.append(p2d[0])
                    del p2d[0]
    def chek(player,thresh,w1,w2):
        global game
        global f1
        global f2
        if player == 1:
            if len(p1)+len(p1d) < thresh:
                print("thresh: ", thresh)
                game = 'xd'
                print('PLAYER 2 WINS!!!')
                f2 = f2+1
                return
            if len(p1) < thresh:
                stack(1)
        if player == 2:
            if len(p2)+len(p2d) <thresh:
                game = 'xd'
                print("thresh", thresh)
                print('PLAYER 1 WINS!!!') 
                f1 = f1+1
                return
            if len(p2) < thresh:
                stack(2)
    while game == 'lol':
        if game != 'lol':
            warq = 1
        if check == 'q':
            game = 'xd'
            gtry = 'no'
        chek(1,1,w1,w2)
        chek(2,1,w1,w2)
        if game == 'lol':
            drew1 = p1[0]
            drew2 = p2[0]  
            del p1[0]
            del p2[0]
            if game != 'lol':
                warq = 1
            print(game)
            shuffle(p1)
            shuffle(p2)
            chek(1,1,w1,w2)
            chek(2,1,w1,w2)
            #print('Player 1 Drew: ', drew1)
            #print('Player 2 Drew: ', drew2)
            if drew1.getnum() > drew2.getnum():
                #print('PLAYER 1 PICKS UP CARDS! ')
                p1d.append(drew1)
                p1d.append(drew2)
                print(drew1,drew2)
                print('no')
            elif drew1.getnum() < drew2.getnum():
                #print('PLAYER 2 PICKS UP CARDS! ')
                p2d.append(drew1)
                p2d.append(drew2)
                print('yes')
            else:
                warq = 0
                stwar = 0
                pp1 = 0
                pp2 = 0
                stw1 = 0
                stw2 = 0
                drew1 = []
                drew2 = []
                while warq == 0:
                    print('this is warq: ',warq)
                    stwar = stwar + 4
                    stw1 = stwar
                    stw2 = stwar
                    print(stwar)
                    if game != 'lol':
                        warq = 1
                    if game == 'lol':
                        chek(1,stwar+1,w1,w2)
                        chek(2,stwar+1,w1,w2)
                        if game != 'lol':
                            warq = 1
                        if game == 'lol':
                            #print('=====WAR!=====')
                            for x in range(stwar):
                                drew1.append(p1[0])
                                del p1[0]
                            for n in range(stwar):
                                drew2.append(p2[0])
                                del p2[0]
                            print(drew1)
                            print(drew2)
                            
                            #print('Player 1 Drew: ', drew1.toString())
                            #print('Player 2 Drew: ', drew2.toString())
                            if len(drew1) != 0 and len(drew2) != 0:
                                if drew1[-1].getnum() > drew2[-1].getnum():
                                    #print('Player 1 picks up cards! ')
                                    for jp in range(len(drew1)):
                                        if game == 'lol':
                                            p2d.append(drew1[0])
                                            del drew1[0]
                                    for jh in range(len(drew2)):
                                        p2d.append(drew2[0])
                                        del drew2[0]
                                    warq = 1
                                    print('boi')
                            if len(drew1) != 0 and len(drew2) != 0:
                                if drew1[-1].getnum() < drew2[-1].getnum():
                                    #print('Player 2 picks up cards! ')
                                    for jz in range(len(drew1)):
                                        if game == 'lol':
                                            p2d.append(drew1[0])
                                            del drew1[0]
                                    for ja in range(len(drew2)):
                                        p2d.append(drew2[0])
                                        del drew2[0]
                                    warq = 1
                                    print('boi')
                            chek(1,stw2+2,w1,w2)
                            chek(2,stw2+2,w1,w2)
示例#18
0
 def play(self, game):
     valids = game.getValidMoves(game.getCrtPlayer())
     shuffle(valids)
     return valids[0]
def vectorize(normed):
    sequences = [normed[i:i + slidingWindowSize] for i in range(len(normed) - slidingWindowSize)]
    shuffle(sequences)
#     sequences = np.array(sequences)
#     sequences = pd.DataFrame(sequences)
    return sequences
示例#20
0
# -*- coding: utf-8 -*-
import numpy as np
from numpy.random.mtrand import shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.tree import export_graphviz
import graphviz
import matplotlib.pyplot as plt
# 处理数据
filename = '../data/4.4.3-wine.csv'
data = np.loadtxt(open(filename, "rb"), delimiter=",", skiprows=0)
shuffle(data)
X = data[:, :-1]
y = data[:, -1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    random_state=99)

# 训练模型
clf = RandomForestClassifier(n_estimators=3)
#各种参数的含义和设置!
clf.fit(x_train, y_train)
label_predict = clf.predict(x_test)
# print(clf.estimators_[0].tree_.n_node_samples)
#模型评估
from sklearn.metrics import classification_report
print(classification_report(y_test, label_predict))
    def sgd(
            self,
            training_data,
            epochs=30,
            mini_batch_size=10,
            alpha=3.0,
            lmbda=0.1,
            evaluation_data=None,
            monitor_evaluation_cost=False,
            monitor_evaluation_accuracy=False,
            monitor_training_cost=False,
            monitor_training_accuracy=False
    ):
        """
        Train the neural network using mini-batch stochastic gradient
        descent. The ``training_data`` is a list of tuples ``(x, y)``
        representing the training inputs and the desired outputs. The
        other non-optional parameters are self-explanatory, as is the
        regularization parameter ``lmbda``. The method also accepts
        ``evaluation_data``, usually either the validation or test
        data. We can monitor the cost and accuracy on either the
        evaluation data or the training data, by setting the
        appropriate flags. The method returns a tuple containing four
        lists: the (per-epoch) costs on the evaluation data, the
        accuracies on the evaluation data, the costs on the training
        data, and the accuracies on the training data. All values are
        evaluated at the end of each training epoch. So, for example,
        if we train for 30 epochs, then the first element of the tuple
        will be a 30-element list containing the cost on the
        evaluation data at the end of each epoch. Note that the lists
        are empty if the corresponding flag is not set.
        """
        n_data = None
        if evaluation_data:
            n_data = len(evaluation_data)
        n = len(training_data)
        evaluation_cost, evaluation_accuracy = [], []
        training_cost, training_accuracy = [], []

        for j in range(epochs):
            shuffle(training_data)

            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]

            for mini_batch in mini_batches:
                self.update_mini_batch(
                    mini_batch, alpha, lmbda, len(training_data))

            print("Epoch %s training complete" % j)

            if monitor_training_cost:
                cost = self.total_cost(training_data, lmbda)
                training_cost.append(cost)
                print("Cost on training data: {}".format(cost))

            if monitor_training_accuracy:
                accuracy = self.accuracy(training_data, convert=True)
                training_accuracy.append(accuracy)
                print(
                    "Accuracy on training data: {} / {}"
                    .format(accuracy, n)
                )
            if monitor_evaluation_cost:
                cost = self.total_cost(evaluation_data, lmbda, convert=True)
                evaluation_cost.append(cost)
                print("Cost on evaluation data: {}".format(cost))
            if monitor_evaluation_accuracy:
                accuracy = self.accuracy(evaluation_data)
                evaluation_accuracy.append(accuracy)
                print(
                    "Accuracy on evaluation data: {} / {}"
                    .format(self.accuracy(evaluation_data), n_data)
                )
            alpha = alpha * 0.9 # slowly decrease alpha per training iteration
        return (
            evaluation_cost,
            evaluation_accuracy,
            training_cost,
            training_accuracy
        )