Exemplo n.º 1
0
    def load_data(train_data_file, test_data):
        # Train data.
        word_avg = WordAverage.WordAverage(train_data_file).word_avg()
        X, y = PrepareData.Data(word_avg).train_data(train_data_file)
        # Test data.
        x = PrepareData.Data(word_avg).test_data(test_data)

        return X, y, x
Exemplo n.º 2
0
def PrepareLearningData(StockFile):
    alist = PrepareData.GetBigCompany(StockFile)
    #alist = ['aapl','aap','orcl','f','azo']
    AllData = MyPredictDB.GetAllDataFromAList(alist, '2018-02-22', 600)
    GetPicturesAndLabels(AllData,KnownDays=KnownDays,PredictDays=10,StepDays=20,UpThreshold=0.05)
Exemplo n.º 3
0
def PreparePredictData(StockFile,TheDate):
    '''
    @param TheDate: 2018-02-02
    '''
    ###
    #   delete the old predict files first
    ###
    thedir = './data/Stock/predict/'
    for root, subdir, files in os.walk(thedir):
        for afile in files:
            os.remove(os.path.join(thedir,afile))
            
    alist = PrepareData.GetBigCompany(StockFile)
    #alist = ['aapl']
    AllData = MyPredictDB.GetAllDataFromAList(alist, TheDate, KnownDays+1)
    plt = myplt()
    convert = mdates.strpdate2num('%Y-%m-%d %H:%M:%S')
    '''
    '''
    # get all data
    
    #BigLists = ['AAPL']
    #print BigLists
    #AllData = MyPredictDB.GetAllDataFromAList(BigLists, '2017-07-21', 400)
    TotalSymbolNumber = len(AllData)
    CurrentSymbolNumber = 0
    for asymbol in AllData:
        print(asymbol+'\t'+str(CurrentSymbolNumber)+'/'+str(TotalSymbolNumber))
        TheData = AllData[asymbol]
        dates = []
        for anitem in TheData[0]:
            dates.append(convert(anitem.strftime('%Y-%m-%d %H:%M:%S')))
        dates.reverse()
        TheData[0].reverse()
        TheData[1].reverse()
        TheData[2].reverse()
        TheData[3].reverse()
        TheData[4].reverse()
        TheData[5].reverse()
        
        TheData[1] = _fix_zero_data(TheData[1])
        TheData[2] = _fix_zero_data(TheData[2])
        TheData[3] = _fix_zero_data(TheData[3])
        TheData[4] = _fix_zero_data(TheData[4])
        
        i = 0
        TotalNum = len(dates)
        f=open(os.path.join(thedir,asymbol+'.txt'),'w')
        while i+KnownDays<TotalNum:
            KnownDates = dates[i:i+KnownDays]
            KnownOpen = TheData[1][i:i+KnownDays]
            KnownClose = TheData[2][i:i+KnownDays]
            KnownHigh = TheData[3][i:i+KnownDays]
            KnownLow = TheData[4][i:i+KnownDays]
            KnownVolume = TheData[5][i:i+KnownDays]
            savename = os.path.join(thedir,asymbol+'_'+TheData[0][i+KnownDays-1].strftime('%Y-%m-%d')+'.png')
            plt.drawData(KnownDates, KnownOpen, KnownClose,KnownHigh,KnownLow,KnownVolume,True,savename)
            lastClose = KnownClose[-1]
            
            if lastClose==0.0:
                print ('Something is wrong')
                print (asymbol)
                print (KnownClose)
                print (KnownDates)
                return
            f.write(asymbol+'_'+TheData[0][i+KnownDays-1].strftime('%Y-%m-%d')+':::1:::0.0\n')
            i+=1
        f.close()
        CurrentSymbolNumber+=1
Exemplo n.º 4
0
            return [True, TheDatas[1][i]]
        # if all time low, sell
        elif ThisSymbolData[APreDate.strftime('%Y-%m-%d')] == True:
            i = 0
            while TheDatas[0][i] != aDateDate:
                i += 1

            return [True, TheDatas[1][i]]
        else:
            return [False, 0.0]


if __name__ == '__main__':

    Step = ''
    XLB = PrepareData.GetBigCompany("../data/XLB.txt")
    XLE = PrepareData.GetBigCompany("../data/XLE.txt")
    XLF = PrepareData.GetBigCompany("../data/XLF.txt")
    XLI = PrepareData.GetBigCompany("../data/XLI.txt")
    XLK = PrepareData.GetBigCompany("../data/XLK.txt")
    XLP = PrepareData.GetBigCompany("../data/XLP.txt")
    XLU = PrepareData.GetBigCompany("../data/XLU.txt")
    XLV = PrepareData.GetBigCompany("../data/XLV.txt")
    XLY = PrepareData.GetBigCompany("../data/XLY.txt")

    UpTrend = [XLB, XLF, XLI, XLK, XLV, XLY]

    BigLists = []
    for anitem in UpTrend:
        for aname in anitem:
            if not aname in BigLists:
Exemplo n.º 5
0
# -*- coding: utf-8 -*-

import sys
sys.path.append("..")
from src.svdd import SVDD
from src.visualize import Visualization as draw
from data import PrepareData as load

# load banana-shape data
trainData, testData, trainLabel, testLabel = load.iris()

# set SVDD parameters
parameters = {
    "positive penalty": 0.9,
    "negative penalty": 0.8,
    "kernel": {
        "type": 'gauss',
        "width": 1 / 24
    },
    "option": {
        "display": 'on'
    }
}

# construct an SVDD model
svdd = SVDD(parameters)

# train SVDD model
svdd.train(trainData, trainLabel)

# test SVDD model
Exemplo n.º 6
0
def ttest_model(args):
    EncDecAtt = pickle.load(open(args.setting_file, 'rb'))
    EncDecAtt.initModel(args)
    if args.setting_file and args.model:  # モデルをここで読み込む
        sys.stderr.write('Load model from: [%s]\n' % args.model)
        serializers.load_npz(args.model, EncDecAtt.model)
    else:
        assert 0, "ERROR"
    prepD = PrepareData(EncDecAtt)

    EncDecAtt.setToGPUs(args)
    sys.stderr.write('Finished loading model\n')

    sys.stderr.write('max_length is [%d]\n' % args.max_length)
    sys.stderr.write('w/o generating unk token [%r]\n' % args.wo_unk)
    sys.stderr.write('w/o generating the same words in twice [%r]\n' %
                     args.wo_rep_w)
    sys.stderr.write('beam size is [%d]\n' % args.beam_size)
    sys.stderr.write('output is [%s]\n' %
                     args.output if args.output else sys.stdout)

    ####################################
    decMaxLen = args.max_length

    begin = time.time()
    counter = 0

    fout = open(args.output, 'w') if args.output else sys.stdout

    # TODO: codecsでないとエラーが出る環境がある? 要調査 不要ならioにしたい
    with open(args.src, encoding='utf-8') as f:
        # with codecs.open(args.encDataFile, encoding='utf-8') as f:
        for sentence in f:
            sentence = sentence.strip()  # stripを忘れずに...
            # ここでは,入力された順番で一文ずつ処理する方式のみをサポート
            sourceSentence = prepD.sentence2index(sentence,
                                                  EncDecAtt.encoderVocab,
                                                  input_side=True)
            sourceSentence = np.transpose(
                np.reshape(np.array(sourceSentence, dtype=np.int32),
                           (1, len(sourceSentence))))
            # 1文ずつ処理するので,test時は基本必ずminibatch=1になる
            cMBSize = len(sourceSentence[0])
            outputBeam = decodeByBeamFast(EncDecAtt, sourceSentence, cMBSize,
                                          decMaxLen, args.beam_size, args)
            wposi = 4
            outloop = 1
            # if args.outputAllBeam > 0:
            #    outloop = args.beam_size

            # 長さに基づく正規化 このオプションを使うことを推奨
            if args.length_normalized:
                outputBeam = rerankingByLengthNormalizedLoss(outputBeam, wposi)

            for i in range(outloop):
                outputList = outputBeam[i][wposi]
                # score = outputBeam[i][0]
                if outputList[-1] != '</s>':
                    outputList.append('</s>')
                # if args.outputAllBeam > 0:
                # sys.stdout.write("# {} {} {}\n".format(i, score,
                # len(outputList)))

                print(' '.join(outputList[1:len(outputList) - 1]), file=fout)
                # sys.stdout.write('{}\n'.format(' '.join(outputList[1:len(outputList) - 1])))
                # charlenList = sum([ len(z)+1 for z in
                # 文末の空白はカウントしないので-1
                # outputList[1:len(outputList) - 1] ])-1
            counter += 1
            sys.stderr.write(
                '\rSent.Num: %5d %s  | words=%d | Time: %10.4f ' %
                (counter, outputList, len(outputList), time.time() - begin))
    fout.close()
    sys.stderr.write('\rDONE: %5d | Time: %10.4f\n' %
                     (counter, time.time() - begin))
Exemplo n.º 7
0
import sys
sys.path.append("..")
from src.svdd import SVDD
from src.visualize import Visualization as draw
from data import PrepareData as load
from sklearn.cluster import KMeans

# load data
trainData, testData, trainLabel, testLabel = load.banana()

# kernel list
kernelList = {
    "1": {
        "type": 'gauss',
        "width": 1 / 24
    },
    "2": {
        "type": 'linear',
        "offset": 0
    },
    "3": {
        "type": 'ploy',
        "degree": 2,
        "offset": 0
    },
    "4": {
        "type": 'tanh',
        "gamma": 1e-4,
        "offset": 0
    },
    "5": {
Exemplo n.º 8
0
def train_model(args):
    if args.setting_file:
        sys.stdout.write('# Loading initial data  config=[%s] model=[%s] \n' %
                         (args.setting_file, args.init_model_file))
        EncDecAtt = pickle.load(open(args.setting_file, 'rb'))
        data = PrepareData(EncDecAtt)
    else:
        data = PrepareData(args)
        encoderVocab = pickle.load(open(args.enc_vocab, 'rb'))
        decoderVocab = pickle.load(open(args.dec_vocab, 'rb'))
        EncDecAtt = EncoderDecoderAttention(encoderVocab, decoderVocab, args)

    if args.output_setting_file:
        fout = open(args.output_setting_file + '.setting', 'wb')
        pickle.dump(EncDecAtt, fout)
        fout.close()

    # モデルの初期化
    EncDecAtt.initModel(args)  # ここでモデルをいったん初期化
    args.embed_size = EncDecAtt.eDim  # 念の為,強制置き換え
    args.hidden_size = EncDecAtt.hDim  # 念の為,強制置き換え

    sys.stdout.write('#####################\n')
    sys.stdout.write('# [Params] {}'.format(args))
    sys.stdout.write('#####################\n')

    EncDecAtt.setToGPUs(args)  # ここでモデルをGPUに貼り付ける

    optimizer = setOptimizer(args, EncDecAtt)
    if args.weight_decay:
        optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay_v))
    if args.gradient_clipping:
        optimizer.add_hook(
            chainer.optimizer.GradientClipping(args.gradient_clipping_norm))

    ########################################
    # 学習済みの初期モデルがあればをここで読み込む
    if args.setting_file and args.init_model_file:
        sys.stderr.write('Load model from: [%s]\n' % args.init_model_file)
        serializers.load_npz(args.init_model_file, EncDecAtt.model)
    else:  # 学習済みの初期モデルがなければパラメタを全初期化する
        EncDecAtt.setInitAllParameters(optimizer,
                                       init_type=args.initializer_type,
                                       init_scale=args.initializer_scale)

    encSentLenDict = data.makeSentenceLenDict(args.train_src,
                                              EncDecAtt.encoderVocab,
                                              input_side=True)
    decSentLenDict = data.makeSentenceLenDict(args.train_tgt,
                                              EncDecAtt.decoderVocab,
                                              input_side=False)
    if args.mode_data_shuffle == 0:  # default
        trainData = data.makeBatch4Train(encSentLenDict,
                                         decSentLenDict,
                                         args.batch_size,
                                         shuffle_flag=True)
    if args.valid_src and args.valid_tgt:
        encSentLenDictDevel = data.makeSentenceLenDict(args.valid_src,
                                                       EncDecAtt.encoderVocab,
                                                       input_side=True)
        decSentLenDictDevel = data.makeSentenceLenDict(args.valid_tgt,
                                                       EncDecAtt.decoderVocab,
                                                       input_side=False)
        develData = data.makeBatch4Train(encSentLenDictDevel,
                                         decSentLenDictDevel,
                                         args.batch_size,
                                         shuffle_flag=False)

    prev_loss_valid = 1.0e+100
    prev_acc_valid = 0
    prev_loss_train = 1.0e+100

    # 学習ループ
    for epoch in range(args.epoch):
        ####################################
        # devの評価モード
        if args.valid_src and args.valid_tgt:
            train_mode = 0
            begin = time.time()
            sys.stdout.write(
                '# Dev. data | total mini batch bucket size = {0}\n'.format(
                    len(develData)))
            tInfo = train_model_sub(train_mode, epoch, develData, EncDecAtt,
                                    None, begin, args)
            msgA = tInfo.print_strings(train_mode, epoch, 0, 0, 0, begin, args)
            dL = prev_loss_valid - float(tInfo.lossVal)
            sys.stdout.write('\r# Dev.Data | %s | diff: %e\n' %
                             (msgA, dL / max(1, tInfo.instanceNum)))
            # learning rateを変更するならここ
            if args.optimizer == 'SGD':
                if epoch >= args.learning_rate_decay_from or (
                        epoch >= args.learning_rate_decay_from
                        and tInfo.lossVal > prev_loss_valid
                        and tInfo.corTot < prev_acc_valid):
                    optimizer.lr = max(
                        args.learning_rate * 0.01,
                        optimizer.lr * args.learning_rate_decay_rate)
                sys.stdout.write('SGD Learning Rate: %s  (initial: %s)\n' %
                                 (optimizer.lr, args.learning_rate))
            elif args.optimizer == 'Adam':
                if epoch >= args.learning_rate_decay_from or (
                        epoch >= args.learning_rate_decay_from
                        and tInfo.lossVal > prev_loss_valid
                        and tInfo.corTot < prev_acc_valid):
                    optimizer.alpha = max(
                        args.learning_rate * 0.01,
                        optimizer.alpha * args.learning_rate_decay_rate)
                sys.stdout.write(
                    'Adam Learning Rate: t=%s lr=%s ep=%s alpha=%s beta1=%s beta2=%s\n'
                    % (optimizer.t, optimizer.lr, optimizer.epoch,
                       optimizer.alpha, optimizer.beta1, optimizer.beta2))
            # develのlossとaccを保存
            prev_loss_valid = tInfo.lossVal
            prev_acc_valid = tInfo.corTot
        ####################################
        # 学習モード
        # shuffleしながらmini batchを全て作成する
        # epoch==0のときは長い順(メモリ足りない場合の対策 やらなくてもよい)
        train_mode = 1
        begin = time.time()
        if args.mode_data_shuffle == 0:  # default
            # encLenの長さでまとめたものをシャッフルする
            random.shuffle(trainData)
        elif args.mode_data_shuffle == 1:  # minibatchも含めてshuffle
            trainData = data.makeBatch4Train(encSentLenDict, decSentLenDict,
                                             args.batch_size, True)
        # minibatchも含めてshuffle + 最初のiterationは長さ順 (debug用途)
        elif args.mode_data_shuffle == 2:
            trainData = data.makeBatch4Train(encSentLenDict, decSentLenDict,
                                             args.batch_size, (epoch != 0))
        else:
            assert 0, "ERROR"
        sys.stdout.write(
            '# Train | data shuffle | total mini batch bucket size = {0} | Time: {1:10.4f}\n'
            .format(len(trainData),
                    time.time() - begin))
        # 学習の実体
        begin = time.time()
        tInfo = train_model_sub(train_mode, epoch, trainData, EncDecAtt,
                                optimizer, begin, args)
        msgA = tInfo.print_strings(train_mode, epoch, 0, 0, 0, begin, args)
        dL = prev_loss_train - float(tInfo.lossVal)
        sys.stdout.write('\r# Train END %s | diff: %e\n' %
                         (msgA, dL / max(1, tInfo.instanceNum)))
        prev_loss_train = tInfo.lossVal
        ####################################
        # モデルの保存
        if args.output_setting_file:
            if epoch + 1 == args.epoch or (args.eval_frequency != 0 and
                                           (epoch + 1) % args.eval_frequency
                                           == 0):
                fout = args.output_setting_file + '.epoch%s' % (epoch + 1)
                try:
                    sys.stdout.write("#output model [{}]\n".format(fout))
                    serializers.save_npz(fout,
                                         copy.deepcopy(
                                             EncDecAtt.model).to_cpu(),
                                         compression=True)
                    # chaSerial.save_hdf5(
                    #    outputFileName, copy.deepcopy(
                    #        EncDecAtt.model).to_cpu(), compression=9)
                except Exception as e:
                    # メモリエラーなどが発生しても処理を終了せずに
                    # そのサンプルをスキップして次に進める
                    sys.stdout.write('\r# SAVE Error? Skip! {} | {}\n'.format(
                        fout, type(e)))
                    sys.stdout.flush()
    ####################################
    sys.stdout.write('Done\n')
Exemplo n.º 9
0
            if Symbol in self.CurrentList:
                current = self.CurrentList[Symbol]
                current.append([NumberHold, BuyPrice, theDate])
                self.CurrentList[Symbol] = current
            else:
                self.CurrentList[Symbol] = [[NumberHold, BuyPrice, theDate]]

            print('sell ' + str(NumberHold) + ' ' + Symbol + ' with price ' +
                  str(BuyPrice) + ' at ' + theDate)
            print("we remain cash " + str(self.RemainMoney))
            print self.CurrentList


if __name__ == '__main__':
    '''BigLists=PrepareData.GetBigCompany("../data/BigCompany.txt")
    for i in [10,20,40,60,80,100,120,140,160,180,200]:
        astrategy = SellAllTimeLowWitList(6000,'2015-11-04','2016-02-04',i,0.05,BigLists)
        astrategy.RunAStrategy()
        print 'i==='+str(i)
        print '=====================================\n\n\n'
        '''

    BigLists = PrepareData.GetBigCompany("../data/BigCompany.txt")
    EndDate = '2017-06-27'
    astrategy = SellAllTimeLowWitList(6000, '2017-01-04', EndDate, 140, 0.03,
                                      BigLists)
    #astrategy.RunAStrategy()
    res = astrategy.GetBuyList(EndDate)

    #res = astrategy.ShouldBeSellNow('AAPL', '2017-06-15', '2017-05-03', 146)
    print res
Exemplo n.º 10
0
# -*- coding: utf-8 -*-

import sys

sys.path.append("..")
from src.svdd import SVDD
from src.visualize import Visualization as draw
from data import PrepareData as load

# load TE process data
trainData, testData, trainLabel, testLabel = load.TE()

# set SVDD parameters
parameters = {
    "positive penalty": 0.9,
    "negative penalty": [],
    "kernel": {
        "type": 'gauss',
        "width": 1 / 80
    },
    "option": {
        "display": 'on'
    }
}

# construct an SVDD model
svdd = SVDD(parameters)

# train SVDD model
svdd.train(trainData, trainLabel)
Exemplo n.º 11
0
        tf.summary.histogram('histogram', var)


if __name__ == '__main__':

    import scipy.io as sio
    hsi_file = '../hsi_data/Pavia/PaviaU.mat'
    gnd_file = '../hsi_data/Pavia/PaviaU_gt.mat'

    img = sio.loadmat(hsi_file)['paviaU']
    gnd_img = sio.loadmat(gnd_file)['paviaU_gt']
    img = img.astype(np.float32)
    gnd_img = gnd_img.astype(np.int32)

    # prepare data
    pd = PrepareData(img, gnd_img)
    train_data = pd.train_data

    # prepare nets
    config_ = config()
    cdl = SAE(img, gnd_img, config_)

    # sess = tf.InteractiveSession()
    # sess.run(tf.global_variables_initializer())
    # x, y = train_data[0],train_data[1]
    # sess.run(cdl.input_layer, {cdl.input_: x})

    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())
        cdl.load_AE_weights(sess, train_data)