def load_data(train_data_file, test_data): # Train data. word_avg = WordAverage.WordAverage(train_data_file).word_avg() X, y = PrepareData.Data(word_avg).train_data(train_data_file) # Test data. x = PrepareData.Data(word_avg).test_data(test_data) return X, y, x
def PrepareLearningData(StockFile): alist = PrepareData.GetBigCompany(StockFile) #alist = ['aapl','aap','orcl','f','azo'] AllData = MyPredictDB.GetAllDataFromAList(alist, '2018-02-22', 600) GetPicturesAndLabels(AllData,KnownDays=KnownDays,PredictDays=10,StepDays=20,UpThreshold=0.05)
def PreparePredictData(StockFile,TheDate): ''' @param TheDate: 2018-02-02 ''' ### # delete the old predict files first ### thedir = './data/Stock/predict/' for root, subdir, files in os.walk(thedir): for afile in files: os.remove(os.path.join(thedir,afile)) alist = PrepareData.GetBigCompany(StockFile) #alist = ['aapl'] AllData = MyPredictDB.GetAllDataFromAList(alist, TheDate, KnownDays+1) plt = myplt() convert = mdates.strpdate2num('%Y-%m-%d %H:%M:%S') ''' ''' # get all data #BigLists = ['AAPL'] #print BigLists #AllData = MyPredictDB.GetAllDataFromAList(BigLists, '2017-07-21', 400) TotalSymbolNumber = len(AllData) CurrentSymbolNumber = 0 for asymbol in AllData: print(asymbol+'\t'+str(CurrentSymbolNumber)+'/'+str(TotalSymbolNumber)) TheData = AllData[asymbol] dates = [] for anitem in TheData[0]: dates.append(convert(anitem.strftime('%Y-%m-%d %H:%M:%S'))) dates.reverse() TheData[0].reverse() TheData[1].reverse() TheData[2].reverse() TheData[3].reverse() TheData[4].reverse() TheData[5].reverse() TheData[1] = _fix_zero_data(TheData[1]) TheData[2] = _fix_zero_data(TheData[2]) TheData[3] = _fix_zero_data(TheData[3]) TheData[4] = _fix_zero_data(TheData[4]) i = 0 TotalNum = len(dates) f=open(os.path.join(thedir,asymbol+'.txt'),'w') while i+KnownDays<TotalNum: KnownDates = dates[i:i+KnownDays] KnownOpen = TheData[1][i:i+KnownDays] KnownClose = TheData[2][i:i+KnownDays] KnownHigh = TheData[3][i:i+KnownDays] KnownLow = TheData[4][i:i+KnownDays] KnownVolume = TheData[5][i:i+KnownDays] savename = os.path.join(thedir,asymbol+'_'+TheData[0][i+KnownDays-1].strftime('%Y-%m-%d')+'.png') plt.drawData(KnownDates, KnownOpen, KnownClose,KnownHigh,KnownLow,KnownVolume,True,savename) lastClose = KnownClose[-1] if lastClose==0.0: print ('Something is wrong') print (asymbol) print (KnownClose) print (KnownDates) return f.write(asymbol+'_'+TheData[0][i+KnownDays-1].strftime('%Y-%m-%d')+':::1:::0.0\n') i+=1 f.close() CurrentSymbolNumber+=1
return [True, TheDatas[1][i]] # if all time low, sell elif ThisSymbolData[APreDate.strftime('%Y-%m-%d')] == True: i = 0 while TheDatas[0][i] != aDateDate: i += 1 return [True, TheDatas[1][i]] else: return [False, 0.0] if __name__ == '__main__': Step = '' XLB = PrepareData.GetBigCompany("../data/XLB.txt") XLE = PrepareData.GetBigCompany("../data/XLE.txt") XLF = PrepareData.GetBigCompany("../data/XLF.txt") XLI = PrepareData.GetBigCompany("../data/XLI.txt") XLK = PrepareData.GetBigCompany("../data/XLK.txt") XLP = PrepareData.GetBigCompany("../data/XLP.txt") XLU = PrepareData.GetBigCompany("../data/XLU.txt") XLV = PrepareData.GetBigCompany("../data/XLV.txt") XLY = PrepareData.GetBigCompany("../data/XLY.txt") UpTrend = [XLB, XLF, XLI, XLK, XLV, XLY] BigLists = [] for anitem in UpTrend: for aname in anitem: if not aname in BigLists:
# -*- coding: utf-8 -*- import sys sys.path.append("..") from src.svdd import SVDD from src.visualize import Visualization as draw from data import PrepareData as load # load banana-shape data trainData, testData, trainLabel, testLabel = load.iris() # set SVDD parameters parameters = { "positive penalty": 0.9, "negative penalty": 0.8, "kernel": { "type": 'gauss', "width": 1 / 24 }, "option": { "display": 'on' } } # construct an SVDD model svdd = SVDD(parameters) # train SVDD model svdd.train(trainData, trainLabel) # test SVDD model
def ttest_model(args): EncDecAtt = pickle.load(open(args.setting_file, 'rb')) EncDecAtt.initModel(args) if args.setting_file and args.model: # モデルをここで読み込む sys.stderr.write('Load model from: [%s]\n' % args.model) serializers.load_npz(args.model, EncDecAtt.model) else: assert 0, "ERROR" prepD = PrepareData(EncDecAtt) EncDecAtt.setToGPUs(args) sys.stderr.write('Finished loading model\n') sys.stderr.write('max_length is [%d]\n' % args.max_length) sys.stderr.write('w/o generating unk token [%r]\n' % args.wo_unk) sys.stderr.write('w/o generating the same words in twice [%r]\n' % args.wo_rep_w) sys.stderr.write('beam size is [%d]\n' % args.beam_size) sys.stderr.write('output is [%s]\n' % args.output if args.output else sys.stdout) #################################### decMaxLen = args.max_length begin = time.time() counter = 0 fout = open(args.output, 'w') if args.output else sys.stdout # TODO: codecsでないとエラーが出る環境がある? 要調査 不要ならioにしたい with open(args.src, encoding='utf-8') as f: # with codecs.open(args.encDataFile, encoding='utf-8') as f: for sentence in f: sentence = sentence.strip() # stripを忘れずに... # ここでは,入力された順番で一文ずつ処理する方式のみをサポート sourceSentence = prepD.sentence2index(sentence, EncDecAtt.encoderVocab, input_side=True) sourceSentence = np.transpose( np.reshape(np.array(sourceSentence, dtype=np.int32), (1, len(sourceSentence)))) # 1文ずつ処理するので,test時は基本必ずminibatch=1になる cMBSize = len(sourceSentence[0]) outputBeam = decodeByBeamFast(EncDecAtt, sourceSentence, cMBSize, decMaxLen, args.beam_size, args) wposi = 4 outloop = 1 # if args.outputAllBeam > 0: # outloop = args.beam_size # 長さに基づく正規化 このオプションを使うことを推奨 if args.length_normalized: outputBeam = rerankingByLengthNormalizedLoss(outputBeam, wposi) for i in range(outloop): outputList = outputBeam[i][wposi] # score = outputBeam[i][0] if outputList[-1] != '</s>': outputList.append('</s>') # if args.outputAllBeam > 0: # sys.stdout.write("# {} {} {}\n".format(i, score, # len(outputList))) print(' '.join(outputList[1:len(outputList) - 1]), file=fout) # sys.stdout.write('{}\n'.format(' '.join(outputList[1:len(outputList) - 1]))) # charlenList = sum([ len(z)+1 for z in # 文末の空白はカウントしないので-1 # outputList[1:len(outputList) - 1] ])-1 counter += 1 sys.stderr.write( '\rSent.Num: %5d %s | words=%d | Time: %10.4f ' % (counter, outputList, len(outputList), time.time() - begin)) fout.close() sys.stderr.write('\rDONE: %5d | Time: %10.4f\n' % (counter, time.time() - begin))
import sys sys.path.append("..") from src.svdd import SVDD from src.visualize import Visualization as draw from data import PrepareData as load from sklearn.cluster import KMeans # load data trainData, testData, trainLabel, testLabel = load.banana() # kernel list kernelList = { "1": { "type": 'gauss', "width": 1 / 24 }, "2": { "type": 'linear', "offset": 0 }, "3": { "type": 'ploy', "degree": 2, "offset": 0 }, "4": { "type": 'tanh', "gamma": 1e-4, "offset": 0 }, "5": {
def train_model(args): if args.setting_file: sys.stdout.write('# Loading initial data config=[%s] model=[%s] \n' % (args.setting_file, args.init_model_file)) EncDecAtt = pickle.load(open(args.setting_file, 'rb')) data = PrepareData(EncDecAtt) else: data = PrepareData(args) encoderVocab = pickle.load(open(args.enc_vocab, 'rb')) decoderVocab = pickle.load(open(args.dec_vocab, 'rb')) EncDecAtt = EncoderDecoderAttention(encoderVocab, decoderVocab, args) if args.output_setting_file: fout = open(args.output_setting_file + '.setting', 'wb') pickle.dump(EncDecAtt, fout) fout.close() # モデルの初期化 EncDecAtt.initModel(args) # ここでモデルをいったん初期化 args.embed_size = EncDecAtt.eDim # 念の為,強制置き換え args.hidden_size = EncDecAtt.hDim # 念の為,強制置き換え sys.stdout.write('#####################\n') sys.stdout.write('# [Params] {}'.format(args)) sys.stdout.write('#####################\n') EncDecAtt.setToGPUs(args) # ここでモデルをGPUに貼り付ける optimizer = setOptimizer(args, EncDecAtt) if args.weight_decay: optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay_v)) if args.gradient_clipping: optimizer.add_hook( chainer.optimizer.GradientClipping(args.gradient_clipping_norm)) ######################################## # 学習済みの初期モデルがあればをここで読み込む if args.setting_file and args.init_model_file: sys.stderr.write('Load model from: [%s]\n' % args.init_model_file) serializers.load_npz(args.init_model_file, EncDecAtt.model) else: # 学習済みの初期モデルがなければパラメタを全初期化する EncDecAtt.setInitAllParameters(optimizer, init_type=args.initializer_type, init_scale=args.initializer_scale) encSentLenDict = data.makeSentenceLenDict(args.train_src, EncDecAtt.encoderVocab, input_side=True) decSentLenDict = data.makeSentenceLenDict(args.train_tgt, EncDecAtt.decoderVocab, input_side=False) if args.mode_data_shuffle == 0: # default trainData = data.makeBatch4Train(encSentLenDict, decSentLenDict, args.batch_size, shuffle_flag=True) if args.valid_src and args.valid_tgt: encSentLenDictDevel = data.makeSentenceLenDict(args.valid_src, EncDecAtt.encoderVocab, input_side=True) decSentLenDictDevel = data.makeSentenceLenDict(args.valid_tgt, EncDecAtt.decoderVocab, input_side=False) develData = data.makeBatch4Train(encSentLenDictDevel, decSentLenDictDevel, args.batch_size, shuffle_flag=False) prev_loss_valid = 1.0e+100 prev_acc_valid = 0 prev_loss_train = 1.0e+100 # 学習ループ for epoch in range(args.epoch): #################################### # devの評価モード if args.valid_src and args.valid_tgt: train_mode = 0 begin = time.time() sys.stdout.write( '# Dev. data | total mini batch bucket size = {0}\n'.format( len(develData))) tInfo = train_model_sub(train_mode, epoch, develData, EncDecAtt, None, begin, args) msgA = tInfo.print_strings(train_mode, epoch, 0, 0, 0, begin, args) dL = prev_loss_valid - float(tInfo.lossVal) sys.stdout.write('\r# Dev.Data | %s | diff: %e\n' % (msgA, dL / max(1, tInfo.instanceNum))) # learning rateを変更するならここ if args.optimizer == 'SGD': if epoch >= args.learning_rate_decay_from or ( epoch >= args.learning_rate_decay_from and tInfo.lossVal > prev_loss_valid and tInfo.corTot < prev_acc_valid): optimizer.lr = max( args.learning_rate * 0.01, optimizer.lr * args.learning_rate_decay_rate) sys.stdout.write('SGD Learning Rate: %s (initial: %s)\n' % (optimizer.lr, args.learning_rate)) elif args.optimizer == 'Adam': if epoch >= args.learning_rate_decay_from or ( epoch >= args.learning_rate_decay_from and tInfo.lossVal > prev_loss_valid and tInfo.corTot < prev_acc_valid): optimizer.alpha = max( args.learning_rate * 0.01, optimizer.alpha * args.learning_rate_decay_rate) sys.stdout.write( 'Adam Learning Rate: t=%s lr=%s ep=%s alpha=%s beta1=%s beta2=%s\n' % (optimizer.t, optimizer.lr, optimizer.epoch, optimizer.alpha, optimizer.beta1, optimizer.beta2)) # develのlossとaccを保存 prev_loss_valid = tInfo.lossVal prev_acc_valid = tInfo.corTot #################################### # 学習モード # shuffleしながらmini batchを全て作成する # epoch==0のときは長い順(メモリ足りない場合の対策 やらなくてもよい) train_mode = 1 begin = time.time() if args.mode_data_shuffle == 0: # default # encLenの長さでまとめたものをシャッフルする random.shuffle(trainData) elif args.mode_data_shuffle == 1: # minibatchも含めてshuffle trainData = data.makeBatch4Train(encSentLenDict, decSentLenDict, args.batch_size, True) # minibatchも含めてshuffle + 最初のiterationは長さ順 (debug用途) elif args.mode_data_shuffle == 2: trainData = data.makeBatch4Train(encSentLenDict, decSentLenDict, args.batch_size, (epoch != 0)) else: assert 0, "ERROR" sys.stdout.write( '# Train | data shuffle | total mini batch bucket size = {0} | Time: {1:10.4f}\n' .format(len(trainData), time.time() - begin)) # 学習の実体 begin = time.time() tInfo = train_model_sub(train_mode, epoch, trainData, EncDecAtt, optimizer, begin, args) msgA = tInfo.print_strings(train_mode, epoch, 0, 0, 0, begin, args) dL = prev_loss_train - float(tInfo.lossVal) sys.stdout.write('\r# Train END %s | diff: %e\n' % (msgA, dL / max(1, tInfo.instanceNum))) prev_loss_train = tInfo.lossVal #################################### # モデルの保存 if args.output_setting_file: if epoch + 1 == args.epoch or (args.eval_frequency != 0 and (epoch + 1) % args.eval_frequency == 0): fout = args.output_setting_file + '.epoch%s' % (epoch + 1) try: sys.stdout.write("#output model [{}]\n".format(fout)) serializers.save_npz(fout, copy.deepcopy( EncDecAtt.model).to_cpu(), compression=True) # chaSerial.save_hdf5( # outputFileName, copy.deepcopy( # EncDecAtt.model).to_cpu(), compression=9) except Exception as e: # メモリエラーなどが発生しても処理を終了せずに # そのサンプルをスキップして次に進める sys.stdout.write('\r# SAVE Error? Skip! {} | {}\n'.format( fout, type(e))) sys.stdout.flush() #################################### sys.stdout.write('Done\n')
if Symbol in self.CurrentList: current = self.CurrentList[Symbol] current.append([NumberHold, BuyPrice, theDate]) self.CurrentList[Symbol] = current else: self.CurrentList[Symbol] = [[NumberHold, BuyPrice, theDate]] print('sell ' + str(NumberHold) + ' ' + Symbol + ' with price ' + str(BuyPrice) + ' at ' + theDate) print("we remain cash " + str(self.RemainMoney)) print self.CurrentList if __name__ == '__main__': '''BigLists=PrepareData.GetBigCompany("../data/BigCompany.txt") for i in [10,20,40,60,80,100,120,140,160,180,200]: astrategy = SellAllTimeLowWitList(6000,'2015-11-04','2016-02-04',i,0.05,BigLists) astrategy.RunAStrategy() print 'i==='+str(i) print '=====================================\n\n\n' ''' BigLists = PrepareData.GetBigCompany("../data/BigCompany.txt") EndDate = '2017-06-27' astrategy = SellAllTimeLowWitList(6000, '2017-01-04', EndDate, 140, 0.03, BigLists) #astrategy.RunAStrategy() res = astrategy.GetBuyList(EndDate) #res = astrategy.ShouldBeSellNow('AAPL', '2017-06-15', '2017-05-03', 146) print res
# -*- coding: utf-8 -*- import sys sys.path.append("..") from src.svdd import SVDD from src.visualize import Visualization as draw from data import PrepareData as load # load TE process data trainData, testData, trainLabel, testLabel = load.TE() # set SVDD parameters parameters = { "positive penalty": 0.9, "negative penalty": [], "kernel": { "type": 'gauss', "width": 1 / 80 }, "option": { "display": 'on' } } # construct an SVDD model svdd = SVDD(parameters) # train SVDD model svdd.train(trainData, trainLabel)
tf.summary.histogram('histogram', var) if __name__ == '__main__': import scipy.io as sio hsi_file = '../hsi_data/Pavia/PaviaU.mat' gnd_file = '../hsi_data/Pavia/PaviaU_gt.mat' img = sio.loadmat(hsi_file)['paviaU'] gnd_img = sio.loadmat(gnd_file)['paviaU_gt'] img = img.astype(np.float32) gnd_img = gnd_img.astype(np.int32) # prepare data pd = PrepareData(img, gnd_img) train_data = pd.train_data # prepare nets config_ = config() cdl = SAE(img, gnd_img, config_) # sess = tf.InteractiveSession() # sess.run(tf.global_variables_initializer()) # x, y = train_data[0],train_data[1] # sess.run(cdl.input_layer, {cdl.input_: x}) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) cdl.load_AE_weights(sess, train_data)