Пример #1
0
def getInputData(batchsize):
    readdata = ReadData()
    trainingFiles, testingFiles = readdata.filePathConstructor()
    features = readdata.input_pipeline(trainingFiles, batchsize)
    example_batch = tf.reshape(features, [-1])
    item = tf.string_split(example_batch, delimiter="").values.eval()
    return [dict1[alp.decode().lower()] for alp in list(item)]
Пример #2
0
	def write2DB(self):
		from ReadData import ReadData

		#rd = ReadData(self.morning)

		ymd = str(self.date_time.year)+"-"+str(self.date_time.month)+"-"+str(self.date_time.day)+"-"+str(self.date_time.weekday())
		if self.morning==True:
			ymd = "m"+ymd
		file_bak = "../daily/" + ymd + ".txt"

		rd = ReadData(self.morning)
		rd.gbk2utf8("深沪A股.TXT", file_bak)
		f = open(file_bak)
		
		try:
			con = mdb.connect('localhost', 'root', '1', 'financedb', charset='utf8')
			cur = con.cursor()

			Regex1 = re.compile(r"\d")
			while True:
				line = f.readline()
				if not line:
					break
				if not re.search(Regex1, line):
					print line
					continue

				data = rd.splitItem(line)
				for w in data: print w,
				print ""

				create_time = self.date_time.strftime('%Y-%m-%d %H:%M:%S')
				sql = "INSERT INTO " + self.table_name + \
					"(idx, name, rise_rate, cur_price, rise_price, buy,\
					sale, total_stock, cur_stock, rise_v, exchange, day_begin,\
					up, down, yesterday, profit, total_price, quantity_ratio,\
					industry, area, amplitude, ave_price, inner_market,\
					outer_market, inner_outer_ratio, buy_volume, sale_volume,\
					currency_capital, market_cap, create_time)\
					VALUES( \'" + data[0] +"\',\'"+ data[1] +"\',"+ data[2] +","\
					+ data[3] +","+ data[4] +","+ data[5] +","+ data[6] +","\
					+ data[7] +","+ data[8] +","+ data[9] +","+ data[10] +","\
					+ data[11] +","+ data[12] +","+ data[13] +","+ data[14] +","\
					+ data[15] +","+ data[16] +","+ data[17] +",\'"+ data[18] +"\',\'"\
					+ data[19] +"\'," + data[20] +","+ data[21] +"," + data[22] +","\
					+ data[23] +","+ data[24] +","+ data[25] +","+ data[26] +","\
					+ data[27] +","+ data[28] +",\'"+ create_time + "\')"
				print sql
				cur.execute(sql)

		except mdb.Error, e:
			print "Error %d: %s" % (e.args[0], e.args[1])
			sys.exit(1)
def DenoiseByEMD():
    """
    This function denoises the original signal by removing n levels of noises.
    """
    globalData = ReadData()
    removeLevel = Settings.denoiseLevel
    filename = './Cashe/globalData_EMD_{}.pickle'.format(removeLevel)
    # if the denoising is not used (removeLevel = 0)
    if removeLevel < 1:
        globalDataEMD = globalData
        with open(filename, 'wb') as f:
            pickle.dump(globalDataEMD, f)
        return globalDataEMD
    if os.path.exists(filename):
        with open(filename, 'rb') as f:
            globalDataEMD = pickle.load(f)
        return globalDataEMD
    # Imfs
    globalDataImfs = GetImfs(globalData)
    globalDataEMD = {}
    for key in globalData:
        df = globalData[key]
        dfImfs = globalDataImfs[key]
        NLevels = dfImfs.shape[1]
        tsReons = dfImfs[range(Settings.denoiseLevel, NLevels)].sum(axis=1)
        tsReons.name = df.columns[0]
        globalDataEMD[key] =  pd.DataFrame(tsReons)
    # Save the data to the filename as a pickle file
    with open(filename, 'wb') as f:
        pickle.dump(globalDataEMD, f)
    return globalDataEMD
def main(args):
    #---set up path for training and test data (NUAA face liveness dataset)--------------
    model_name = args.model
    learning_rate = args.lr
    epoch = args.epoch
    with open(path) as file:
        print("Reading from json ... ")
        data = json.load(file)[model_name]
        accuracy = data['accuracy']
        model_file = data['file']
    print("Reading input from the NUAA dataset ... ")
    readd = ReadData()
    clientdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ClientNormalized/'
    imposterdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ImposterNormalized/'
    client_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_train_normalized.txt'
    imposter_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_train_normalized.txt'
    
    client_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_test_normalized.txt'
    imposter_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_test_normalized.txt'

    #---------------read training, test data----------------
    train_images, train_labels = readd.read_data(clientdir, imposterdir, client_train_normaized_file, imposter_train_normaized_file)
    test_images, test_labels = readd.read_data(clientdir, imposterdir, client_test_normaized_file, imposter_test_normaized_file)


    for i in range(0,1):

        #--pick one of the following models for face liveness detection---
        if model_name =='CNN':
            print("Selected CNN")
            cnn = CNNModel()  # simple CNN model for face liveness detection---
        else:
            print("Selected Inception")
            cnn = InceptionV4Model()  #Inception model for liveness detection

        if args.resume:
            print("Resuming from the best model")
            model = cnn.load_model(model_file)#to use pretrained model
        else:
            print("Starting from scratch by creating a new model")
            model = cnn.create_model(learning_rate)  # create and train a new model   
        print("Starting training ...")
        model = cnn.train_model(model, train_images,train_labels,test_images,test_labels, epoch, accuracy, model_file, model_name)
      
        test_loss, test_acc = cnn.evaluate(model, test_images,  test_labels)
        print('iteration = ' + str(i) + ' ---------------------------------------------========')
    print("**************************************Done***************************************")
Пример #5
0
    def __init__(self):
        self.data = ReadData().please_read_data()

        self.cluster1 = [
        ]  #define cluster array so we can cluster elements to them
        self.cluster2 = []
        self.cluster3 = []
        self.centroid1 = []  #define centroid array so we can update them
        self.centroid2 = []
        self.centroid3 = []
Пример #6
0
        '''Runs training until cost values converge to within some interval'''
        val = self.linreg(learning_rate, ind, dep)
        old_val = 0
        #Can change this variable to decide how much convergence is wanted
        while np.absolute(val-old_val) > 1:
            old_val = val
            val = self.linreg(learning_rate, ind, dep)
        self.getTheta()

    def getTheta(self):
        '''Prints out Value for current weight and bias variables'''
        print "Weight     Bias"
        print self.weight, self.bias

if __name__ == '__main__':
    #command line to run this properly
    #python NiceLinReg.py data.csv [2,3] 1
    np.random.seed(42)
    loader = ReadData()
    loader.load(sys.argv[1], sys.argv[2], int(sys.argv[3]))
    print "Temp Only"
    tempOnly = NiceLinReg()
    dailyTemp = loader.getInd(0)
    DOJIA = loader.getDep()
    tempOnly.train(.000005, dailyTemp, DOJIA)

    print "\nDiff in Temp and avg highest recorded temp"
    diff = NiceLinReg()
    diffList = loader.diff(0,1)
    diff.train(0.000000000049, diffList, DOJIA)
Пример #7
0
    def get_HvM(self, ):

        # Read Meta
        Meta = ReadMeta(neuralfeaturesdir)
        DF_img = Meta.get_DF_img()
        DF_neu = Meta.get_DF_neu()
        times = Meta.get_times()

        # Read Neural data
        Data = ReadData(datadir, DF_neu)
        IT, V4 = Data.get_data()

        D = Mapping.get_Neu_trial_V36(IT[1:], [70, 170], times)
        image_indices = np.random.randint(low=0, high=D.shape[1], size=ni)
        D = D[:, image_indices, :]
        D = np.swapaxes(D, 0, 1)
        nf = D.shape[1]
        nt = D.shape[2]

        mu = np.zeros((self.nf, self.ni))
        sd = np.zeros((self.nf, self.ni))
        for f in range(self.nf):
            for i in range(self.ni):
                mu[f, i] = D[i, f, :].mean()
                sd[f, i] = D[i, f, :].std()
        hf = h5py.File(resultdir + 'HvM_stats.h5', 'w')
        hf.create_dataset('mu', data=mu)
        hf.create_dataset('sd', data=sd)
        hf.close()

        # #test synthetic as HvM
        # nf = 168
        # nt = 46
        # noise_dist = 'poisson'
        # sds = np.logspace(-1, 1, num=int(nf))
        # D = np.zeros((ni, nf, nt))
        # D_mean = np.random.rand(ni, nf)
        # for tr in range(nt):
        #     D[:, :, tr] = D_mean
        #
        # noise1 = np.zeros((ni, nf, int(nt * splitfract)))
        # noise2 = np.zeros((ni, nf, int(nt * splitfract)))
        # for i in range(ni):
        #     if noise_dist == 'normal':
        #         n = np.random.rand()
        #         noise1[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds])
        #         noise2[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds])
        #     elif noise_dist == 'poisson':
        #         n = np.random.rand()
        #         noise1[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds])
        #         noise2[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds])
        #
        #     D[:, :, :int(nt * splitfract)] = D[:, :, :int(nt * splitfract)] + noise1
        #     D[:, :, int(nt * splitfract):] = D[:, :, int(nt * splitfract):] + noise2

        # to test  HvM as syntheic
        # hf = h5py.File(resultdir+'D.h5', 'w')
        # hf.create_dataset('D', data=D)
        # hf.close()

        sds = []
        Collinearity = 'HvM'
        noise_dist = 'HvM'

        return D
Пример #8
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat May  5 01:20:32 2018

@author: computer
"""

from ReadData import ReadData
from catboost import Pool, CatBoostRegressor
from sklearn.model_selection import train_test_split
import pandas as pd
import gc 

data = ReadData()

data.X_train.drop(data.X_train.index[:int( 1e+8 )], inplace=True)
data.y_train.drop(data.y_train.index[:int( 1e+8 )], inplace=True)



X_train, X_valid, y_train, y_valid = train_test_split(data.X_train, data.y_train, test_size=0.1)

cat_features = X_train.columns.get_indexer_for(data.CATEGORICAL_FEATURES)

del data; gc.collect()

train_pool = Pool(X_train, y_train, cat_features=cat_features)

del X_train; del y_train; gc.collect()
Пример #9
0
            5, 1, stride=2, activation_fn=tf.nn.sigmoid)).tensor


def get_generator_loss(D2):
    '''Loss for the genetor. Maximize probability of generating images that
    discrimator cannot differentiate.

    Returns:
        see the paper
    '''
    return tf.reduce_mean(
        tf.nn.relu(D2) - D2 + tf.log(1.0 + tf.exp(-tf.abs(D2))))


if __name__ == "__main__":
    rd = ReadData()

    input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, 32 * 32])

    with pt.defaults_scope(activation_fn=tf.nn.elu,
                           batch_normalize=True,
                           learned_moments_update_rate=0.0003,
                           variance_epsilon=0.001,
                           scale_after_normalization=True):
        with tf.variable_scope("model"):
            D1 = discriminator(input_tensor)  # positive examples
            D_params_num = len(tf.trainable_variables())
            G = generator()

        with tf.variable_scope("model", reuse=True):
            D2 = discriminator(G)  # generated examples
Пример #10
0
from ReadData import ReadData
import tensorflow as tf
import os.path
import numpy as np

rd = ReadData()

input_dim = 1024  # 32 * 32
hidden_encoder_dim = 400
hidden_decoder_dim = 400
latent_dim = 20
lam = 0


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.001)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0., shape=shape)
    return tf.Variable(initial)


# input
x = tf.placeholder("float", shape=[None, input_dim])  #  input_dim=32*32
l2_loss = tf.constant(0.0)  # l2_loss is a number

# hidden W, b
W_encoder_input_hidden = weight_variable([input_dim,
                                          hidden_encoder_dim])  # [1024, 400]
    inputs = [(args.no_comments, 512), (30, 1536)]
    model = RecurrentCNN(no_filters=hidden_size, no_classes=args.no_classes)
elif args.model == 'bilstm_rcnn':
    inputs = [(args.no_comments, 512), (30, 1536)]
    model = BiLSTMRecurrentCNN(hidden_size, no_classes=args.no_classes)

model = model.build(inputs)
model.load_weights(args.weights)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

reader = ReadData(dataset=args.dataset,
                  text_embedding_path=args.text_embedding,
                  video_feature_path=args.video_features,
                  data_shape=inputs,
                  train_val_split=1.)

results = []
labels = []

prog_bar = tqdm(total=int(reader.val_size / args.batch_size))

num_batches = int(reader.val_size / args.batch_size)

i = 0

for x, y in reader.generator_val(batch_size=args.batch_size):
    label = list(y)
    result = list(model.predict(x))
Пример #12
0
from GA import GA
from ReadData import ReadData
from utils import *

data = ReadData("easy_01_tsp.txt")
params = {'popSize': 100, 'noGen': 100}
ga = GA(params, data.problParams)
ga.initialisation()
ga.evaluation()
bestFitness = 0
bestDist = 0
bestChromoOverallRepres = None
for g in range(ga.getParam()['noGen']):
    ga.oneGenerationElitism()
    # ga.oneGeneration()
    # ga.oneGenerationSteadyState()
    bestChromo = ga.bestChromosome()
    if bestChromo.fitness > bestFitness:
        bestChromoOverallRepres = bestChromo.repres
        bestFitness = bestChromo.fitness
        bestDist = str(dist(bestChromo.repres, ga.getProblParam()))
    print('Best solution in generation ' + str(g) + ' is: ' +str(bestChromo.repres) + ' fitness = ' + str(bestChromo.fitness) + ' dist: ' + str(dist(bestChromo.repres,ga.getProblParam())))
print("\n")
print('Best solution overall is: ' +  str(bestChromoOverallRepres) + ' fitness = ' + str(bestFitness) + ' dist: ' + str(bestDist))

Пример #13
0
__version__ = "0.0.1"
__maintainer__ = "Agniv Sen"
__email__ = "*****@*****.**"
__status__ = "Protoyping"

"""



# ****************************************************
# This is the entry point of this entire project. 
# For someone who wants to understand the code flow, please start from this point
# ****************************************************


rd = ReadData();    #Initializing File Reader Class



featureMap = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,2))
featureMapProj = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,param.PARTICLE_COUNT,3))
featureStore = np.zeros((param.FEATURE_SIZE))
world = np.zeros((param.WORLD_SCALE_X, param.WORLD_SCALE_Y, param.WORLD_SCALE_Z));

stateVectorSize = (param.POSITION_VECTOR_SIZE + param.QUATERNION_SIZE + param.TRANSLATIONAL_VELOCITY_VECTOR_SIZE + param.ANGULAR_VELOCITY_VECTOR_SIZE)
cameraState = np.zeros((stateVectorSize))

# Variables for archiving position vector, quaternion and features
_x = []
_y = []
_z = []
Пример #14
0
'''
Created on 5 apr. 2020

@author: Alexandraah
'''
from GA import GA
from ReadData import ReadData
from utils import *
import matplotlib.pyplot as plt


data = ReadData("C:\\@Alexandra\\anul2\\semestrul2\\ai\\lab\\laborator4\\berlin.txt")
params = {'popSize': 500, 'noGen': 1000}
ga = GA(params, data.problParams)
ga.initialisation()
ga.evaluation()

res=[]
res1=[]
for i in range(params['noGen']):
    #ga.oneGeneration()
    ga.oneGenerationElitism()
    #ga.oneGenerationSteadyState()
    best = ga.bestChromosome()
    fitnesses = [c.fitness for c in ga.population]
    avgFitness = sum(fitnesses) / len(fitnesses)
    res.append(avgFitness)
    for c in ga.population:
        res1.append(c.fitness)
        print("Fiteness:"+str(c.fitness)+"\n")
    print('Generation: ' + str(i) + '\nBest chromosome: ' + str(best.repres) + '\nLocal best fitness: ' + str(best.fitness)
Пример #15
0
__credits__ = []
__license__ = "GNU GPL"
__version__ = "0.0.1"
__maintainer__ = "Agniv Sen"
__email__ = "*****@*****.**"
__status__ = "Protoyping"

"""

# ****************************************************
# This is the entry point of this entire project.
# For someone who wants to understand the code flow, please start from this point
# ****************************************************

__name__ = '__main__'
rd = ReadData()
#Initializing File Reader Class

featureMap = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES, 2))
featureMapProj = np.zeros(
    (param.MAX_OBSERVATION, param.MAX_FEATURES, param.PARTICLE_COUNT, 3))
featureStore = np.zeros((param.FEATURE_SIZE))
world = np.zeros(
    (param.WORLD_SCALE_X, param.WORLD_SCALE_Y, param.WORLD_SCALE_Z))

stateVectorSize = (param.POSITION_VECTOR_SIZE + param.QUATERNION_SIZE +
                   param.TRANSLATIONAL_VELOCITY_VECTOR_SIZE +
                   param.ANGULAR_VELOCITY_VECTOR_SIZE)
cameraState = np.zeros((stateVectorSize))

# Variables for archiving position vector, quaternion and features
Пример #16
0
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import math
############
from ReadData import ReadData
from SHE import SHE
#####Parameters of experimental device#########
global pixel_width  #pixel width,unit:nm
pixel_width = 320e3
lam_x = 0.124  # X-ray wavelength, unit:nm
dis_s2d = 5300e6  #sample-to-detector distance, unit:nm
############constant definition################
pi = math.pi
###############################################
init_data = ReadData("data/1.4/50k-132-1.401286.asc")
px_min = 122
px_max = 362
pz_min = 166
pz_max = 326
cen_px = (px_max + px_min) / 2
cen_pz = (pz_max + pz_min) / 2
I = init_data[pz_min:(pz_max + 1), px_min:(px_max + 1)]
plt.imshow(I[:, 40:200])
plt.axis('off')
plt.colorbar()
#EI = np.sqrt(I)
#I_max = 0.05*np.max(I)
#########################
#x = pixel_width*np.arange(px_min-cen_px,px_max-cen_px+1)
#z = pixel_width*np.arange(pz_min-cen_pz,pz_max-cen_pz+1)
Пример #17
0
    H = np.dot(np.linalg.pinv(R), P).T
    x_est = np.dot(H, y)
    err = x - x_est
    g = lambda x: np.dot(H, x)
    R = np.dot(err, err.T) / l
    return g, R


if os.name == 'posix':
    trainPath = os.path.abspath('.') + '/Data/train.txt'
    testPath = os.path.abspath('.') + '/Data/test.txt'
elif os.name == 'nt':
    trainPath = os.path.abspath('.') + '\\Data\\train.txt'
    testPath = os.path.abspath('.') + '\\Data\\test.txt'

TrainX, TrainY = ReadData(trainPath, 'train')
# TestX, TestY = ReadData(testPath, 'test')

# two types of Test DataSet
TrainX = TrainX[:, 0:-500]
TrainY = TrainY[:, 0:-500]
TestX = TrainX[:, -500:]
TestY = TrainY[:, -500:]

xTrainDim, TrainLen = np.shape(TrainX)
print(str(xTrainDim) + " " + str(TrainLen))
yTrainDim, TrainLen = np.shape(TrainY)
xTestDim, TestLen = np.shape(TestX)
print(str(yTrainDim) + " " + str(TestLen))

# kalman filter
Пример #18
0
    default='nmt_logs')
parser.add_argument(
    '--inference',
    action="store_true",
    help='Whether to run inference or simply train the network')
parser.add_argument('--pretrained_path', help='Path to Pre-trained Weights')

args = parser.parse_args()

assert args.dataset.endswith('csv'), "Dataset File needs to be in CSV format"
assert 0. <= args.train_val_split < 1., "Train-vs-Validation Split need to be between [0, 1)"

latent_dim = args.latent_dim

# Reading and Preparing Training/Validation Dataset
reader = ReadData(args.dataset, args.train_val_split, args.language_1,
                  args.language_2)
(X_train, y_train), (X_val, y_val) = reader.prep_data()
train_samples = len(X_train)
val_samples = len(X_val)
num_encoder_tokens = reader.num_encoder_tokens
num_decoder_tokens = reader.num_decoder_tokens

# Loading Embedding Matrix
lang1_embedding = Word2Vec.load(args.lang1_embedding)
lang1_tok = Tokenizer()
lang1_tok.fit_on_texts(reader.language_1_text)

encoder_embedding_matrix = np.zeros((num_encoder_tokens, latent_dim))
for word, i in lang1_tok.word_index.items():
    try:
        embedding_vector = lang1_embedding[word]
Пример #19
0
# ----------
# import original data
# ----------
Datapath = '../../methylation_imputation/data/'
DataSample_full = Datapath + 'intersected_final_chr1_cutoff_20_sample_full.bed'
DataSample_partial = Datapath + 'intersected_final_chr1_cutoff_20_sample_partial.bed'
DataTrain = Datapath + 'intersected_final_chr1_cutoff_20_train.bed'

# Training data
# use either one of the following three:
# DataTrain = ReadData(DataTrain)
# DataTrain = pd.read_csv('../result/Train_NaN_Meaned', sep = '\t')
DataTrain = pd.read_csv('../result/Train_NaN_Meaned_without_2627', sep = '\t')

# Sample data
DataSample_full = ReadData(DataSample_full)
DataSample_partial = ReadData(DataSample_partial)


# ----------
# Format Data so they are indexed by start position
# ----------
DataTrain.set_index('start', drop=False, inplace=True, verify_integrity=True)
DataSample_full.set_index('start', drop=False, inplace=True, verify_integrity=True)
DataSample_partial.set_index('start', drop=False, inplace=True, verify_integrity=True)


# ----------
# Read the Imputation result
# ----------
Resultpath = '../result/raw/'
Пример #20
0
from ReadData import ReadData
import codecs

today = datetime.now()
ym = str(today.year)+str(today.month)
ymd = str(today.year)+"-"+str(today.month)+"-"+str(today.day)

table_name = "stock" + ym

try:
	con = mdb.connect('localhost', 'root', '1', 'financedb', charset='utf8')
	cur = con.cursor()
	createTable(cur, ym)
	
	rd = ReadData()
	rd.gbktoutf8("深沪A股.TXT")

	file_name = ymd + "." + "txt"
	f = open(file_name)
	title = f.readline()	#read the title
	print title
	while True:
		line = f.readline()
		print line
		if not line:	#EOF
			break
		if not re.search(r"\d", line):
			continue
		data = rd.splitItem(line)
		for w in data:
Пример #21
0
    if args.model.endswith('lstm'):
        model = ConvLSTMModel1(hidden_states,
                               classes,
                               attention_size=attention_size,
                               use_attention=args.use_attention)
    elif args.model.endswith('deep'):
        model = ConvLSTMModel2(hidden_states,
                               classes,
                               attention_size=attention_size,
                               use_attention=args.use_attention)
    else:
        model = ConvModel(classes)

reader = ReadData(args.training_csv,
                  args.embedding,
                  args.classes,
                  batch_size=args.batch_size,
                  no_samples=args.no_samples,
                  train_val_split=args.train_val_split)

print('Reading Validation data.')
val_x, val_y = reader.read_all_val()
if args.model.startswith('cnn'):
    val_x = np.reshape(val_x, (val_x.shape[0], timesteps, embed_size, 1))

with tf.name_scope('Model'):
    prediction = model.model(x)

with tf.name_scope('Loss'):
    crossent = tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,
                                                          labels=y)
    cost_func = (tf.reduce_mean(crossent)) / args.batch_size
                  optimizer=optimizer,
                  metrics=['accuracy'])
else:
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
model.summary()

if args.check_build:
    exit()

embedding = {'type': args.embedding_type, 'path': args.embedding_path}
if args.model == 'sentence_pair':
    reader = ReadData(path_file=args.dataset,
                      embedding_config=embedding,
                      data_shape=inputs,
                      train_val_split=args.train_val_split,
                      sentence_pair=True)
else:
    reader = ReadData(path_file=args.dataset,
                      embedding_config=embedding,
                      data_shape=inputs,
                      train_val_split=args.train_val_split,
                      sentence_pair=False)

print('Reading Validation Data ..')
val_x, val_y = reader.read_val()

train_generator = reader.generator()

log_dir = args.model
Пример #23
0
def mainFunc():
    filepath = 'D:/Code/readfile/data.csv'
    d = ReadData(filepath)
    print(d.data.keys())
Пример #24
0
        ax1.spines['bottom'].set_color("#5998ff")
        ax1.spines['top'].set_color("#5998ff")
        ax1.spines['left'].set_color("#5998ff")
        ax1.spines['right'].set_color("#5998ff")
        #设置y轴刻度值的颜色
        ax1.tick_params(axis='y', colors='w')
        plt.gca().yaxis.set_major_locator(mticker.MaxNLocator(prune='upper'))
        ax1.tick_params(axis='x', colors='w')
        plt.ylabel('Stock price and Volume')
        plt.rcParams['font.sans-serif'] = ['SimHei']
        plt.suptitle("股票代码:{}".format(ts_code), color='w', fontsize=40)
        plt.show()

        print(stock_data)


if __name__ == "__main__":
    #股票代码
    ts_code = "000001.SZ"
    #对比结果
    result = {
        'start_time': '20160412',
        'end_time': '20160707',
        'pearson_index': 0.945269566803306
    }
    read_data = ReadData()
    source_data = read_data.mysql_read_date(ts_code, result["start_time"],
                                            result["end_time"])
    k_plot = KLinePlot()
    k_plot.plot_k_line(source_data, ts_code)
Пример #25
0
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from math import sqrt

from ReadData import ReadData
from ExtractSamples import ExtractSamples
from ColorModels import ColorModels
from CrossValidation import CrossValidation
from RunKMeans import RunKMeans
from RunEM_GMM import RunEM_GMM
from RunCommands import RunFCM, RunPCM
from sklearn.metrics import confusion_matrix, roc_curve
from scipy.stats import multivariate_normal

data_train, labels, locations = ReadData()  # Load all data
Data, ObjLabels = ExtractSamples(data_train, labels,
                                 locations)  # extract objects
plt.close("all")  # close all image plots
Data = Data / 255  #Normalize pixel values to be between 0 and 1
Data_HSV, Data_YIQ, Data_HLS = ColorModels(
    Data, ObjLabels)  # Transform RGB to different color spaces

DTrain, DVal, labelsTrain, labelsVal = CrossValidation(
    Data, ObjLabels, 0.8, 'RGB')  #80% of data for training and 20% for testing
DTrain_HSV, DVal_HSV, labelsTrain_HSV, labelsVal_HSV = CrossValidation(
    Data_HSV, ObjLabels, 0.8,
    'HSV')  #80% of data for training and 20% for testing
DTrain_YIQ, DVal_YIQ, labelsTrain_YIQ, labelsVal_YIQ = CrossValidation(
    Data_YIQ, ObjLabels, 0.8,
    'YIQ')  #80% of data for training and 20% for testing
Пример #26
0
# logits = drnn(X,W,b)
# predict = tf.nn.softmax(logits)
# Y = predict
# # Cost & Optimizer
# loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=Y))
# optimizer = tf.train.GradientDescentOptimizer(learning_rate=learningrate)
# train_op = optimizer.minimize(loss_op)

# # build accuracy
# correct_pred = tf.equal(tf.argmax(predict,1),tf.argmax(Y,1))
# accuracy = tf.reduce_mean(correct_pred,tf.float32)

# Initiate Global variable
init = tf.global_variables_initializer()

readdata = ReadData()
trainingFiles, testingFiles = readdata.filePathConstructor()
features = readdata.input_pipeline(trainingFiles, batch_size)

# Start training
with tf.Session() as sess:
    # init session
    sess.run(init)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    # loop training steps
    for step in range(training_steps):
        # read input data
        example_batch = tf.reshape(features, [-1])
Пример #27
0
                similar_value = 0.5 * self.calc_pearson(
                    mul_open, atom_open) + 0.5 * self.calc_pearson(
                        mul_close, atom_close)
                if (result["pearson_index"] < similar_value):
                    result = {
                        "start_time": temp_compare[0]["trade_date"],
                        "end_time": temp_compare[-1]["trade_date"],
                        "pearson_index": similar_value
                    }
        # cal_finish_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        # end_time = datetime.datetime.strptime(cal_finish_time, "%Y-%m-%d %H:%M:%S")
        # print("Calc Cost: {}".format(str((end_time - start_time).seconds)))
        return result


if __name__ == "__main__":
    read_data = ReadData()
    #设置比较的周期
    num = 60
    #选取需要查找的最后num天数的数据
    source_data = read_data.mysql_read_data("002936.SZ").iloc[-num:]
    if len(source_data) < num:
        num = len(source_data)
    #选取某一支对比的股票并除去最后的num天数据
    compare_data = read_data.mysql_read_data("000001.SZ").iloc[:-num]
    #实例化类,输入的格式为Dataframe
    compare = CompareSimilarKDynamic(source_data, compare_data, num)
    #返回对比的皮尔逊系数最高的一组数据
    result = compare.compare_dynamic()
    print(result)
Пример #28
0
def main():
    readdata = ReadData()

    trainingFiles, testingFiles = readdata.filePathConstructor()
    features = readdata.input_pipeline(trainingFiles, batch_size)

    with tf.Session() as sess:
        # Create the graph, etc.
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        # Start populating the filename queue.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        dict1 = {
            value: (int(key) + 1)
            for key, value in enumerate(list(string.ascii_lowercase))
        }
        dict1[' '] = 0
        dict1[';'] = -1
        dict1['-'] = -1
        vocab_size = len(dict1)
        for i in range(1):
            example_batch = tf.reshape(features, [-1])
            item = tf.string_split(example_batch, delimiter="").values.eval()
            chars = [dict1[alp.decode().lower()] for alp in list(item)]
            data_size = len(chars)
            print('Data has %d characters, %d unique.' %
                  (data_size, vocab_size))

            # # Hyper-parameters
            # hidden_size   = 100  # hidden layer's size
            # seq_length    = 25   # number of steps to unroll
            # learning_rate = 1e-1

            # inputs     = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="inputs")
            # targets    = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="targets")
            # init_state = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state")

            # intializer = tf.random_normal_initializer(stddev=1.0)

            # with tf.variable_scope("RNN") as scope:
            #     hs_t = init_state
            #     ys = []
            #     for t,xs_t in enumerate(tf.split(inputs,seq_length,axis=0)):
            #         if t > 0:scope.reuse_variables()
            #         Wxh = tf.get_variable("Wxh",shape=[vocab_size,hidden_size],dtype=tf.float32,intializer=intializer)
            #         Whh = tf.get_variable("Whh",shape=[hidden_size,hidden_size],dtype=tf.float32,intializer=intializer)
            #         Why = tf.get_variable("Why",shape=[hidden_size,vocab_size],dtype=tf.float32,intializer=initializer)
            #         bh = tf.get_variable("bh",shape=[hidden_size],intializer=intializer)
            #         by = tf.get_variable("by",shape=[vocab_size],initializer=intializer)

            #         hs_t = tf.tanh(tf.matmul(xs_t,Wxh) + tf.matmul(hs_t,Whh) + bh)
            #         ys_t = tf.matmul(hs_t,Why) + by
            #         ys.append(ys_t)

            # h_prev = hs_t

            # output_softmax = tf.nn.softmax(ys[-1])

            # outputs = tf.concat(ys,axis=0)
            # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets,logits=outputs))

            # #optimizer
            # minimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            # grad_and_vars = minimizer.compute_gradients(loss)

            # pred = RNN(chars,weights,biases)
            # # Loss and optimizer
            # # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
            # # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

            # # # Model evaluation
            # # correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
            # # accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
            # # print(example_batch)

        coord.request_stop()
        coord.join(threads)
Пример #29
0
                    self.assignAspect(sentence)
            self.populateAspectWordMat()
            changed=self.calcChiSq()
        self.corpus.aspectSentences.clear()
        for review in self.corpus.allReviews:
            for sentence in review.sentences:
                self.assignAspect(sentence)
        print(self.corpus.aspectKeywords)
    
    # Saves the object into the given file
    def saveToFile(self,fileName,obj):
        with open(modelDataDir+fileName,'w') as fp:
            json.dump(obj,fp)
            fp.close()
            
rd = ReadData()
rd.readAspectSeedWords()
rd.readStopWords()
rd.readReviewsFromJson()
rd.removeLessFreqWords()
bootstrapObj = BootStrap(rd)
bootstrapObj.bootStrap()
bootstrapObj.populateLists()
bootstrapObj.saveToFile("wList.json",bootstrapObj.wList)
bootstrapObj.saveToFile("ratingsList.json",bootstrapObj.ratingsList)
bootstrapObj.saveToFile("reviewIdList.json",bootstrapObj.reviewIdList)
bootstrapObj.saveToFile("vocab.json",list(bootstrapObj.corpus.wordFreq.keys()))
bootstrapObj.saveToFile("aspectKeywords.json",bootstrapObj.corpus.aspectKeywords)


# In[ ]:
Пример #30
0
                atom_close = self.dynamic(atom_close,"close",index)
                similar_value = 0.5 * self.calc_pearson(mul_open,atom_open) + 0.5 * self.calc_pearson(mul_close,atom_close)
                if (result["pearson_index"] < similar_value):
                    result = {
                        "start_time": temp_compare[0]["trade_date"],
                        "end_time": temp_compare[-1]["trade_date"],
                        "pearson_index": similar_value
                    }
        # cal_finish_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        # end_time = datetime.datetime.strptime(cal_finish_time, "%Y-%m-%d %H:%M:%S")
        # print("Calc Cost: {}".format(str((end_time - start_time).seconds)))
        return result


if __name__ =='__main__':
    read_data = ReadData()
    ts_code_list = read_data.mysql_read_ts_code()[:]
    print("----------------Complete ts_code reading--------------------:{}")
    start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    print("--------Start cal----------:{0}".format(start_time))
    results_dict = {}
    source_data = read_data.mysql_read_data("000009.SZ").iloc[-60:]
    for ts_code in ts_code_list:
        #print(ts_code)
        compare_data = read_data.mysql_read_data(ts_code).iloc[:-60]
        if len(compare_data) < 60:
            print("该股票数据不足")
        else:
            compare = CompareSimilarKDynamic(source_data, compare_data, 60)
            result = compare.compare_dynamic()
            results_dict[ts_code] = result
Пример #31
0
             #                                              ,test_size=test_size)
            #--------DROP ID column from train and test
            #if ISTRAIN == 1:
            tmpModel,df = trainingAlgo(X_train,y_train,X_test,y_test)
            model = tmpModel
            """
            _,acc,rocScore = models.evaluateModel(X_test,y_test,tmpModel)
            if roc < rocScore:
                roc = rocScore
                model = tmpModel
                print("Accurachy %f, ROC Score %f" % (acc,roc))
            """
        return model,df

#------Get feature set and create classes   
readData = ReadData(".","HomeCredit","sa","Pass@123")        
models = Models()

featureSet = readData.getData("dbo.FeatureSet")

featureSet = models.convertCategoricalVaribalesToOneHotEncoding(featureSet)
featureSet = models.addFeatures(featureSet)

train = featureSet[featureSet["TARGET"] != -1]
test = featureSet[featureSet["TARGET"] == -1]

test_ids = test["SK_ID_CURR"]

test.drop(["TARGET","SK_ID_CURR"],axis = 1,inplace = True)
train.drop(["SK_ID_CURR"],axis = 1,inplace = True)
train["TARGET"] = train["TARGET"].astype("category")
Пример #32
0
                  metrics=['accuracy'])
else:
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

model.load_weights(args.weights)

print('Model Loaded from {}.'.format(args.weights))

model.summary()

embedding = {'type': args.embedding_type, 'path': args.embedding_path}
if args.model == 'sentence_pair':
    reader = ReadData(path_file=None,
                      embedding_config=embedding,
                      data_shape=inputs,
                      sentence_pair=True)
else:
    reader = ReadData(path_file=None,
                      embedding_config=embedding,
                      data_shape=inputs,
                      sentence_pair=False)

test_data = pd.read_excel(args.dataset, sheet_name=None)['Sheet1']
test_data = test_data.sample(frac=1.0).reset_index(drop=True)
test_data = test_data.head(int(len(test_data) * args.size))

print(test_data.columns)

assert len(test_data.columns) > 1, "Labels of Test set not available."
Пример #33
0
def main(args):
    # judge input arguments length
    if len(args) != 6:
        print('Should Have Six Input Arguments')
        exit(0)

    # input parameters
    L = int(args[0])
    K = int(args[1])
    training_set_file_name = args[2]
    validation_set_file_name = args[3]
    test_set_file_name = args[4]
    to_print = True if args[5].lower() == 'yes' else False

    path = './' + DATA_DIRECTORY + '/'

    # read data from training set, test set, and validation set
    rd = ReadData()
    labels, training_set = rd.createDataSet(path + training_set_file_name)
    labels, validation_set = rd.createDataSet(path + validation_set_file_name)
    labels, test_set = rd.createDataSet(path + test_set_file_name)

    # build tree
    dt = DecisionTree()

    info_gain_tree_root = dt.buildDT(training_set, labels.copy(),
                                     'information_gain')
    pruned_info_gain_tree_root = dt.pruneTree(info_gain_tree_root, L, K,
                                              validation_set, labels)

    variance_impurity_tree_root = dt.buildDT(training_set, labels.copy(),
                                             'variance_impurity')
    pruned_variance_impurity_tree_root = dt.pruneTree(
        variance_impurity_tree_root, L, K, validation_set, labels)

    print()
    info_accuracy = dt.calAccuracy(test_set, info_gain_tree_root, labels)
    print('Accuracy of decision tree constructed using information gain: %s' %
          info_accuracy)
    variance_accuracy = dt.calAccuracy(test_set, variance_impurity_tree_root,
                                       labels)
    print('Accuracy of decision tree constructed using variance impurity: %s' %
          variance_accuracy)

    prune_info_accuracy = dt.calAccuracy(test_set, pruned_info_gain_tree_root,
                                         labels)
    print(
        'Accuracy of pruned decision tree constructed using information gain: %s'
        % prune_info_accuracy)

    pruned_variance_accuracy = dt.calAccuracy(
        test_set, pruned_variance_impurity_tree_root, labels)
    print(
        'Accuracy of pruned decision tree constructed using variance impurity: %s'
        % pruned_variance_accuracy)

    if (to_print):
        print()
        print('Build Decision Tree By Using Information Gain')
        info_gain_tree_root.printTree()

        print()

        print()
        print('Build Decision Tree By Using Variance Impurity')
        variance_impurity_tree_root.printTree()
        print()