def train(self, dataFile): '''Trains the Naive Bayes Sentiment Classifier.''' reader = DataReader(dataFile) #There may be a better way to get the two labels, but we just grabbed one label with the next three lines ... for label, tokens, company, date, price, risklength in reader: self.label2 = label break # ...and then grabbed the other label by iterating through until we found a label different from the first one for label, tokens, company, date, price, risklength in reader: if self.label2 != label: self.label1 = label reader = DataReader(dataFile) #Iterate through all of the documents in the training set for label, tokens, company, date, price, risklength in reader: #Check if the document is positive or negative, so that we can modify the according dictionary if label == self.label1: #using i and tokens[i], we iterate through all of the words in the document for i in range(0, len(tokens)): #for each word, add one to it's count in the dictionary, # add one to "total*" tracking the number of words in positive documents, # and add the word to "allwords" (this only changes anything if the word is not already in allwords) by setting its value equal to zero self.positivedict[tokens[i]] += 1 self.positivedict['total*'] += 1 self.allwords[tokens[i]] = 0 # Repeat for negative if label == self.label2: for i in range(0, len(tokens)): self.negativedict[tokens[i]] += 1 self.negativedict['total*'] += 1 self.allwords[tokens[i]] = 0
def WalkThroughAllOptimizers(option): dataReader = DataReader(x_data_name, y_data_name) XData, YData = dataReader.ReadData() X = dataReader.NormalizeX() Y = dataReader.NormalizeY() n_input, n_output = dataReader.num_feature, 1 n_hidden = option[2] eta, batch_size, max_epoch = option[1], 10, 10000 eps = 0.001 params = CParameters(n_input, n_hidden, n_output, eta, max_epoch, batch_size, eps, InitialMethod.Xavier, option[0]) loss_history = CLossHistory() net = TwoLayerNet(NetType.Fitting) wbs = net.train(dataReader, params, loss_history) trace = loss_history.GetMinimalLossData() print(trace.toString()) title = loss_history.ShowLossHistory(params) print("wait for 10 seconds...") ShowResult(net, X, Y, title, trace.wb1, trace.wb2)
def update_answer(self): dataset = db.DataReader() self.data2 = self.text2.get() if (self.data2 == "Positive" or self.data2 == "positive"): value = "Pos" if (self.data2 == "Negative" or self.data2 == "negative"): value = "Neg" dataset.Store_data(value, self.data)
def train(self, dataFile): '''Trains the Naive Bayes Sentiment Classifier.''' reader = DataReader(dataFile) # go through all the docs in our corpus for doc in reader: (label, data) = doc self.total_docs += 1 # if we haven't seen this label at all yet if label not in self.label_dictionary: self.label_dictionary[label] = 1 # note that we have seen one document of that label self.master_unigram_dictionary[label] = {} # add a new dictionary to the master dictionary under that label self.master_bigram_dictionary[label] = {} self.unigram_dictionary[label] = 0 # note that we have seen 0 words of that label self.bigram_dictionary[label] = 0 else: self.label_dictionary[label] += 1 # increment our count of the documents of this label # we now have to count the actual words in this doc unigram_dict = self.master_unigram_dictionary[label] bigram_dict = self.master_bigram_dictionary[label] # train on the unigrams for unigram in data: self.unigram_vocab.add( unigram ) # if we have not seen this unigram under this label if unigram not in unigram_dict: unigram_dict[unigram] = 1 # note that we have seen the unigram once under this label else: unigram_dict[unigram] += 1 # increment the count of this unigram under this label self.unigram_dictionary[label] += 1 # increment our count of the total non-unique unigrams under this label # train on the bigrams for i in range(len(data)-1): bigram = (data[i], data[i+1]) self.bigram_vocab.add( bigram ) # if we have not seen this bigram under this label if bigram not in bigram_dict: bigram_dict[bigram] = 1 # note that we have seen the bigram once under this label else: bigram_dict[bigram] += 1 # increment the count of this bigram under this label self.bigram_dictionary[label] += 1 # increment our count of the total non-unique bigrams under this label self.save(dataFile + ".best.pickled")
def Main(): dataReader = DataReader() allUserData = dataReader.loadData( "DSL-StrongPasswordData") #loads all users data classifier = Classifier() scalar = 1.0 scalarCap = 1.6 dimDeviation = 1 dimCap = 21 while (dimDeviation < dimCap): print "testing dims: " + str(dimDeviation) for k in range(0, 50): correct_person_accuracy = [] wrong_person_accuracy = [] owner_index = k # index for the user that is to be tested first_time = True # temp variable for checking if first time creating test_data_wrong #print "testing for person "+str(k)+" created!" for i in range(0, 50): userDataRaw = allUserData[i] #data from 1 user userData = dataReader.formatData( userDataRaw ) #formats data (strips user and session ids etc), returns Matrix. if i == owner_index: np.random.shuffle( userData ) # Shuffle to get data from different sessions person1 = DataCluster( userData[0:300], scalar) # creates the person to be tested test_data_right = userData[300:] # print test_data_right else: if first_time: test_data_wrong = userData first_time = False else: test_data_wrong = np.concatenate( (test_data_wrong, userData), axis=0) correct_person_accuracy.append( classifier.compare_all(person1, test_data_right, True, dimDeviation)) wrong_person_accuracy.append( classifier.compare_all(person1, test_data_wrong, False, dimDeviation)) print "False recognition rate: " + str( 1 - np.mean(correct_person_accuracy)) print "False acceptance rate: " + str(1 - np.mean(wrong_person_accuracy)) # scalar += 0.1 dimDeviation += 1
def __init__(self, file_tree, search_box, setup): # initialize the global values self.file_tree = file_tree self.search_box = search_box self.setup = setup self.directory = "" self.selected_file_path = "" self.file_list = None self.search_list = None self.index = None self.DataReader = DataReader.DataReader()
def __init__(self, sess, img_h, img_w, img_c, op): #---input setting---# self.sess = sess self.op = op self.output_height, self.output_width = img_h, img_w self.c_dim = img_c self.orig_embed_size = 4800 #---training data---# if op == "train": self.batch_size = 64 print "loading training data......" # path = '/home/master/05/john81923/data/VLDS2018/hw4_dataset/hw4_data' train_path = os.path.join(path,'train') self.data = DataReader.DataReader(batch_size=self.batch_size) self.data_objs = self.data.acgen_data() # #---testing data---# if op == "test": self.batch_size = 1 self.test_sent = tf.placeholder(tf.float32, shape= [1, self.orig_embed_size]) #---model network setting---# self.gf_dim = 64 self.df_dim = 64 self.z_dim = 100 self.embed_size = 128 self.keep_prob = tf.placeholder(tf.float32) self.cat_dim = 1 self.con_dim = 1 self.rand_dim = self.z_dim #---batch_norm of discriminator---# self.d_bn0 = batch_norm(name="d_bn0") self.d_bn1 = batch_norm(name="d_bn1") self.d_bn2 = batch_norm(name="d_bn2") self.d_bn3 = batch_norm(name="d_bn3") self.d_bn4 = batch_norm(name="d_bn4") #---batch_norm of generator---# self.g_bn0 = batch_norm(name="g_bn0") self.g_bn1 = batch_norm(name="g_bn1") self.g_bn2 = batch_norm(name="g_bn2") self.g_bn3 = batch_norm(name="g_bn3") #---build model---# print "building model......" self.build_model()
def eval(sText): totalaccuracy_numer = 0 totalaccuracy_denom = 0 for test in range(0, 10): thisaccuracy_numer = 0 thisaccuracy_denom = 0 split(sText, "output") for doc in range(0, 5): print "i is: ", doc totaldic = defaultdict(lambda: 0) totalcorrectdic = defaultdict(lambda: 0) bc = BayesClassifier() bc.train("output.train{0}".format(doc % 5)) bc.train("output.train{0}".format((doc + 1) % 5)) bc.train("output.train{0}".format((doc + 2) % 5)) bc.train("output.train{0}".format((doc + 3) % 5)) reader = DataReader("output.train{0}".format((doc + 4) % 5)) correct = 0 total = 0 hold = 0 for label, tokens, company, date, price, risklength in reader: print label tokenstring = " " tokenstring = tokenstring.join(tokens) print date if risklength == 1: print "invalid document; ignore" elif bc.classify(tokenstring, risklength, date) == "HOLD": #elif bc.classify(tokenstring, risklength) == "HOLD": hold += 1 else: totaldic[label] += 1 total += 1 if bc.classify(tokenstring, risklength, date) == label: #if bc.classify(tokenstring, risklength) == label: correct += 1 totalcorrectdic[label] += 1 print "Holds: ", hold print "Accuracy:", correct / float(total) thisaccuracy_numer += correct / float(total) thisaccuracy_denom += 1 for key in totaldic: print totalcorrectdic[key], totaldic[key] print key, " precision: ", totalcorrectdic[key] / float( totaldic[key]) print "This Round Accuracy: ", thisaccuracy_numer / thisaccuracy_denom totalaccuracy_numer += thisaccuracy_numer totalaccuracy_denom += thisaccuracy_denom print "Total Accuracy: ", totalaccuracy_numer / totalaccuracy_denom
def store_data(self): self.popup.destroy() dataset = db.DataReader() sta = Statistics.Stats(dataset.unique_pos, dataset.unique_neg) sa = Senti.SentimentalAnalysis() if ((sa.NaiveBayes(self.data, sta) == "Positive") and (sa.BayesianBayesResult(self.data, sta) == "Positive")): value = "Pos" if ((sa.NaiveBayes(self.data, sta) == "Negative") and (sa.BayesianBayesResult(self.data, sta) == "Negative")): value = "Neg" dataset.Store_data(value, self.data)
def prepareData(inputFilePath): total_row_list = list() reader = DataReader() df = reader.mergeData(inputFilePath) print(df.columns.values) cols = df.columns.values print(df.head()) for eachColName in cols: feature_data = df[eachColName] feature_data = feature_data[feature_data.notnull()] for colData in feature_data: each_row = generate_feature_list(colData, eachColName) total_row_list.append(each_row) features_df = pd.DataFrame(total_row_list, columns=output_field) return (cols, features_df)
def main(): #let`s take a look how profit (units*prices) depends on datetime for one product UPC = input("enter UPC for your product: ") reader = dataReader.DataReader() DisplayProfitAmongPeriod(reader, UPC, '2009-01-14', '2011-12-28', '2009-2011') # let`s take a look how profit depends on dates within one year for three years (2011, 2010, 2009) DisplayProfitAmongPeriod(reader, UPC, '2011-01-05', '2011-12-28', '2011') DisplayProfitAmongPeriod(reader, UPC, '2010-01-06', '2010-12-29', '2010') DisplayProfitAmongPeriod(reader, UPC, '2009-01-14', '2009-12-30', '2009') # let`s take a look how units depends on prices for one week (2011-12-07) DisplayUnitsVsPrices(reader, UPC, '2011-12-07') # as i can see there is no obvious dependencies within these features. # That`s why I decided to calculate best price this way. price = GetBestPriceForNextTwoWeeks(reader, UPC) print(price)
def train(self, batch_size=64, num_batches=1000000): self.sess.run(tf.global_variables_initializer()) # path = '/home/master/05/john81923/data/VLDS2018/hw4_dataset/hw4_data' train_path = os.path.join(path, 'train') data = DataReader.DataReader(batch_size=batch_size) data.get_data(train_path) # start_time = time.time() for t in range(0, num_batches): d_iters = 5 data_batch = data.minibatch() #if t % 500 == 0 or t < 25: # d_iters = 100 for _ in range(0, d_iters): bx = data_batch[_] bz = batch_z = np.random.uniform( -1, 1, [batch_size, self.z_dim]).astype(np.float32) self.sess.run(self.d_adam, feed_dict={self.x: bx, self.z: bz}) bz = self.z_sampler(batch_size, self.z_dim) self.sess.run(self.g_adam, feed_dict={self.z: bz, self.x: bx}) if t % 100 == 0: bx = data_batch[0] bz = batch_z = np.random.uniform( -1, 1, [batch_size, self.z_dim]).astype(np.float32) d_loss = self.sess.run(self.d_loss, feed_dict={ self.x: bx, self.z: bz }) g_loss = self.sess.run(self.g_loss, feed_dict={self.z: bz}) print('Iter [%8d] Time [%5.4f] d_loss [%.4f] g_loss [%.4f]' % (t, time.time() - start_time, d_loss, g_loss)) if t % 100 == 0: bz = self.z_sampler(batch_size, self.z_dim) bx = self.sess.run(self.x_, feed_dict={self.z: bz}) bx = xs.data2img(bx) #fig = plt.figure(self.data + '.' + self.model) #grid_show(fig, bx, xs.shape) bx = self.grid_transform(bx, xs.shape) imsave('logs/{}/{}.png'.format(self.data, t / 100), bx)
def train(ne, batch, eta): dataReader = DataReader(x_data_name, y_data_name) XData, YData = dataReader.ReadData() X = dataReader.NormalizeX(passthrough=True) Y = dataReader.NormalizeY() n_input, n_hidden, n_output = 1, ne, 1 eta, batch_size, max_epoch = eta, batch, 10000 eps = 0.001 params = CParameters(n_input, n_hidden, n_output, eta, max_epoch, batch_size, eps, LossFunctionName.MSE, InitialMethod.Xavier) loss_history = CLossHistory(params) net = TwoLayerFittingNet() wb1, wb2 = net.train(dataReader, params, loss_history) return loss_history
def Btn_Submit(self): StopWords = set(stopwords.words('english')) dataset = db.DataReader() sta = Statistics.Stats(dataset.unique_pos, dataset.unique_neg) self.data = self.text.get() words = self.data.split() for word in words: if word in StopWords or word in string.punctuation: continue else: temp = word print(temp) sa = Senti.SentimentalAnalysis() self.textbox.insert(tk.END, self.data) self.textbox.insert(tk.END, "\nNaiveBayes Result :") self.textbox.insert(tk.END, sa.NaiveBayes(temp, sta)) self.textbox.insert(tk.END, "\nBayesianBayes Result :") self.textbox.insert(tk.END, sa.BayesianBayesResult(temp, sta))
def train(init_method): dataReader = DataReader(x_data_name, y_data_name) XData, YData = dataReader.ReadData() X = dataReader.NormalizeX(passthrough=True) Y = dataReader.NormalizeY() n_input, n_hidden, n_output = 1, 4, 1 eta, batch_size, max_epoch = 0.5, 10, 30000 eps = 0.001 params = CParameters(n_input, n_hidden, n_output, eta, max_epoch, batch_size, eps, init_method, OptimizerName.SGD) loss_history = CLossHistory() net = TwoLayerNet(NetType.Fitting) net.train(dataReader, params, loss_history) trace = loss_history.GetMinimalLossData() print(trace.toString()) title = loss_history.ShowLossHistory(params) ShowResult(net, X, YData, title, trace.wb1, trace.wb2)
def run(self): """ run in new thread """ import Graph import DataReader dataReader = DataReader.DataReader(countriesData, worldData, amountOfTopCountries) yearsDict = init_years(dataReader) graph = Graph.Graph(dataReader, yearsDict, imageOutputFolder, videoOutputFolder) global FigureList FigureList = graph.getFigureList() global NumberOfFramesWouldBeRendered NumberOfFramesWouldBeRendered = graph.getNumberofFramesWouldBeRendered( ) graph.render()
def testClassifier(outputLabel): bc = BayesClassifier() bc.train(outputLabel + ".train") reader = DataReader(outputLabel + ".test") correctLabel = {} numberGuess = {} correct = 0.0 total = 0.0 for label, tokens in reader: if not label in correctLabel: correctLabel[label] = 0.0 guess = bc.classify(" ".join(tokens)) if not guess in numberGuess: numberGuess[guess] = 0.0 if guess == label: correctLabel[guess] += 1 correct += 1 numberGuess[guess] += 1 total += 1 for label in correctLabel: print "Correct " + label, "-", correctLabel[label] / numberGuess[label] print "Total accuracy -", correct / total
def WalkThroughAllOptimizers(option): dataReader = DataReader(x_data_name, y_data_name) XData,YData = dataReader.ReadData() X = dataReader.NormalizeX() Y = dataReader.ToOneHot() n_input, n_output = dataReader.num_feature, dataReader.num_category n_hidden = 8 eta, batch_size, max_epoch = option[1], 10, 10000 eps = 0.06 params = HyperParameters41(n_input, n_output, n_hidden, eta, max_epoch, batch_size, eps, LossFunctionName.CrossEntropy3, InitialMethod.Xavier, option[0]) loss_history = CLossHistory() net = TwoLayerClassificationNet() #ShowData(XData, YData) net.train(dataReader, params, loss_history) trace = loss_history.GetMinimalLossData() print(trace.toString()) title = loss_history.ShowLossHistory(params) print("wait for 10 seconds...") wbs_min = WeightsBias30(params) wbs_min.W1 = trace.dict_weights["W1"] wbs_min.W2 = trace.dict_weights["W2"] wbs_min.B1 = trace.dict_weights["B1"] wbs_min.B2 = trace.dict_weights["B2"] ShowAreaResult(X, wbs_min, net, title) ShowData(X, YData)
def WalkThroughAllOptimizers(option): dataReader = DataReader(x_data_name, y_data_name) XData,YData = dataReader.ReadData() X = dataReader.NormalizeX() Y = dataReader.NormalizeY() n_input, n_output = dataReader.num_feature, 1 n_hidden = 4 eta, batch_size, max_epoch = option[1], 10, 10000 eps = 0.001 params = CParameters(n_input, n_output, n_hidden, eta, max_epoch, batch_size, eps, LossFunctionName.MSE, InitialMethod.Xavier, option[0]) loss_history = CLossHistory() net = TwoLayerFittingNet() #ShowData(XData, YData) wbs = net.train(dataReader, params, loss_history) trace = loss_history.GetMinimalLossData() print(trace.toString()) title = loss_history.ShowLossHistory(params) print("wait for 10 seconds...") wbs_min = WeightsBias(params) wbs_min.W1 = trace.dict_weights["W1"] wbs_min.W2 = trace.dict_weights["W2"] wbs_min.B1 = trace.dict_weights["B1"] wbs_min.B2 = trace.dict_weights["B2"] ShowResult(X, Y, net, wbs_min, title)
def Train(): dataReader = DataReader(x_data_name, y_data_name) dataReader.ReadData() dataReader.NormalizeX() dataReader.NormalizeY() n_input, n_hidden, n_output = 1, 3, 1 eta, batch_size, max_epoch = 0.5, 10, 50000 eps = 0.001 params = CParameters(n_input, n_hidden, n_output, eta, max_epoch, batch_size, eps) # SGD, MiniBatch, FullBatch loss_history = CLossHistory() net = TwoLayerFittingNet() wb1, wb2 = net.train(dataReader, params, loss_history) trace = loss_history.GetMinimalLossData() print(trace.toString()) title = loss_history.ShowLossHistory(params) ShowResult(net, dataReader.X, dataReader.Y, title, trace.wb1, trace.wb2) trace.wb1.Save("wb1") trace.wb2.Save("wb2")
import subprocess # a rough and dirty check whether the raw data files are available, # call the request script otherwise dustfiles = subprocess.check_output(("ls dust_concentrations2018-04-1*grib"), shell=True).split() tempfiles = subprocess.check_output(("ls temp_v_gh2018-04-1*grib"), shell=True).split() if len(dustfiles) < 4 or len(tempfiles) < 4: subprocess.call(("python", "request.py")) if len (sys.argv) == 3: dates = eval(sys.argv[1]) hours = eval(sys.argv[2]) else: dates, hours = (range(1,22), (0,6,12,18)) # instantiate a data reader obeject to read in and preprocess # files for date range April 11-17, at 00:00 and 12:00 hours dr = DataReader((dates, hours), area=(80,15, -80,100)) #instantiate a Euromap instance em = Euromap(width=5000) # create a series of maps showing the dust transport rate # along north-south axis #for idx,time in enumerate(dr.timeslots): # em.densitymap(savefilename="North_South_Dust_Transport_{}_{}.png".format(*time), # formatsample=dr.dataformatsample[1:], # title = "North-South Dust Transport, 2018-04-{}, {}:00".format(*time), # array3d=dr.aggregatedrate.data[idx], # timeslot=time, # scale=np.arange(-100,110,10) ** 3 * 0.00000005, # name=dr.aggregatedrate.name)
while iteration * self.hparams.batch_size < self.hparams.training_size: train_cost, train_accuracy = self.sess.run( [self.train_loss, self.accuracy]) print("iterations: [%2d] time: %4.4f, loss: %.8f, accuracy: %.8f" % (iteration, time.time() - start_time, np.mean(train_cost), train_accuracy)) coord.request_stop() coord.join(threads) if __name__ == '__main__': dataset_name = "cnn" dataset_dir = "../data_2" dr = DataReader() hparams = tf.flags hparams.DEFINE_integer("training_size", 381000, "total number of training samples") #381000 hparams.DEFINE_integer("number_of_epochs", 200, "Epoch to train [25]") hparams.DEFINE_integer("vocab_size", 10000, "The size of vocabulary [10000]") hparams.DEFINE_integer("batch_size", 32, "The size of batch images [32]") hparams.DEFINE_integer("depth", 1, "Depth [1]") hparams.DEFINE_integer("max_nsteps", 1000, "Max number of steps [1000]") hparams.DEFINE_integer("number_of_hidden_units", 512, "The size of hidden layers") hparams.DEFINE_float("learning_rate", 5e-5, "Learning rate [0.00005]") hparams.DEFINE_float("momentum", 0.9, "Momentum of RMSProp [0.9]") hparams.DEFINE_float("keep_prob", 0.7, "keep_prob [0.5]")
logs = os.path.join(directory, 'logs') trainloss = os.path.join(logs, 'train_loss.txt') if os.path.isdir(logs) == False: os.makedirs(logs) # choose network, can be either DRN18 or DRN26 network = 'DRN26' # set parameters batch_size = 8 num_epochs = 100 use_weights = 1 num_classes = 5 image_dims = [500, 500, 3] data = DataReader(directory, batch_size, num_epochs, use_weights=1) train_data = data.train_batch(train_file) num_train_images = data.num_images test_data = data.test_batch(test_file) num_val_images = data.num_images # determine number of iterations based on number of images training_iterations = int(np.floor(num_train_images / batch_size)) validation_iterations = int(np.floor(num_val_images / batch_size)) handle = tf.placeholder(tf.string, shape=[]) # create iterator allowing us to switch between datasets iterator = tf.data.Iterator.from_string_handle(handle, train_data.output_types, train_data.output_shapes) next_element = iterator.get_next()
def _get_data_loader(self, data_conf): loader = DataReader(data_conf, self.logger, self.n_fold) return loader
from DataReader import * from AGDSStructure import * from AGDSKNearest import * import numpy as np def classify(data_holder, model, X): predicted_label = model.find_similarity(np.array(X)) win_class = data_holder.get_real_label(predicted_label) print(win_class) if __name__ == '__main__': data_reader = DataReader("IrisData.xls") agds_structure = AGDSStructure(data_reader.data_frame, data_reader.label) k_nearest = AGDSKNearest(agds_structure, 3) classify(data_reader, k_nearest, [4.5, 3.0, 1.1, 0.1]) classify(data_reader, k_nearest, [7.0, 3.2, 4.7, 1.4]) classify(data_reader, k_nearest, [5.0, 2.0, 4.0, 1.0]) classify(data_reader, k_nearest, [5.7, 2.5, 4.8, 1.6])
def train(args): learning_rate = args.learning_rate batch_size = args.batch_size training_epochs = args.training_epochs display_step = args.display_step checkpoint_step = args.checkpoint_step # save training results every check point step z_dim = args.z_dim # number of latent variables. path = args.indir #input_file if args.train: dirname = 'save_train' else: dirname = 'save' if not os.path.exists(dirname): os.makedirs(dirname) with open(os.path.join(dirname, 'config.pkl'), 'w') as f: cPickle.dump(args, f) vae = ConvVAE(learning_rate=learning_rate, batch_size=batch_size, z_dim=z_dim, train=args.train) #mnist = read_data_sets() #n_samples = mnist.num_examples celabdata = DataReader.DataReader(batch_size=batch_size) #path = '/home/master/05/john81923/data/VLDS2018/hw4_dataset/hw4_data' train_path = os.path.join(path, 'train') test_path = os.path.join(path, 'test') print train_path print test_path celabdata.get_data(train_path) test_data = celabdata.testdata(test_path) n_samples = celabdata.datanumb # load previously trained model if appilcable ckpt = tf.train.get_checkpoint_state(dirname) if ckpt: vae.load_model(dirname) # Training cycle step = 0 steps = [] KLD_fig = [] MSE_fig = [] print 'ploting fig1_2... please wait.....' for epoch in range(training_epochs): avg_cost = 0. #mnist.shuffle_data() train_batch = celabdata.minibatch() total_batch = int(n_samples / batch_size) print total_batch # Loop over all batches for i in range(total_batch): #batch_xs = mnist.next_batch(batch_size) batch_xs = train_batch[i] # Fit training using batch data cost, mse, kl_loss, new_image, z_log_sigma_sq = vae.partial_fit( batch_xs) # Display logs per epoch step if i % display_step == 0: scipy.misc.imsave('hat.jpg', new_image[0].reshape((64, 64, 3))) steps.append(step) KLD_fig.append(kl_loss) MSE_fig.append(mse) step += 1 # Compute average loss avg_cost += cost / n_samples * batch_size # save model if epoch >= 0 and epoch % checkpoint_step == 0: checkpoint_path = os.path.join('save', 'model.ckpt') vae.save_model(checkpoint_path, epoch) print "model saved to {}".format(checkpoint_path) save_path = 'repro/' fig = plt.figure() plt.title('KLD') plt.plot(steps, KLD_fig) plt.savefig(save_path + 'tmpfig1_2.jpg', format='png') fig = plt.figure() plt.title('MSE') plt.plot(steps, MSE_fig) plt.savefig(save_path + 'tmpfig1_2_.jpg', format='png') pillist = [save_path + 'tmpfig1_2.jpg', save_path + 'tmpfig1_2_.jpg'] pilimages = [] # images in each folder for file in pillist: pilimages.append(Image.open(file)) w, h = Image.open(file).size fig_1_2(pilimages, os.path.join(args.outdir, 'fig1_2.jpg'), w, h) # save model one last time, under zero label to denote finish. #vae.save_model(checkpoint_path, 0) return vae
def test(args): learning_rate = args.learning_rate batch_size = 1 #args.batch_size training_epochs = args.training_epochs display_step = args.display_step checkpoint_step = args.checkpoint_step # save training results every check point step z_dim = args.z_dim # number of latent variables. dirname = 'save' if not os.path.exists(dirname): os.makedirs(dirname) with open(os.path.join(dirname, 'config.pkl'), 'w') as f: cPickle.dump(args, f) vae = ConvVAE(learning_rate=learning_rate, batch_size=batch_size, z_dim=z_dim) #mnist = read_data_sets() #n_samples = mnist.num_examples celabdata = DataReader.DataReader(batch_size=batch_size) path = args.indir #input_file #train_path = os.path.join(path,'train') test_path = os.path.join(path, 'test') #print train_path print test_path #celabdata.get_data(train_path) test_data = celabdata.testdata(test_path) n_samples = celabdata.datanumb # load previously trained model if appilcable ckpt = tf.train.get_checkpoint_state(dirname) if ckpt: vae.load_model(dirname) UNIT_SIZE = 64 target = Image.new('RGB', (UNIT_SIZE * 10, UNIT_SIZE * 2), 255) leftone = 0 lefttwo = 0 rightone = UNIT_SIZE righttwo = UNIT_SIZE avg_cost = 0. #mnist.shuffle_data() #train_batch = celabdata.minibatch() #total_batch = int(n_samples / batch_size) #print total_batch # Loop over all batches steps = [] KLD_fig = [] MSE_fig = [] pillist = [] for i in range(10): #batch_xs = mnist.next_batch(batch_size) #batch_xs = train_batch[i] # Fit training using batch data new_image, z = vae.testing_fit(test_data[i].reshape((1, 64, 64, 3))) scipy.misc.imsave('repro/1_3out{}.jpg'.format(i), new_image[0].reshape( (64, 64, 3))) scipy.misc.imsave('repro/1_3in{}.jpg'.format(i), test_data[i].reshape( (64, 64, 3))) pillist.append('repro/1_3out{}.jpg'.format(i)) pillist.append('repro/1_3in{}.jpg'.format(i)) pilimages = [] # images in each folder for file in pillist: pilimages.append(Image.open(file)) pinjie(pilimages, os.path.join(args.outdir, 'fig1_3.jpg')) pillist_14 = [] for i in range(32): #eps = tf.random_normal((10, 512), 0.0, 1.0, dtype=tf.float32) z = np.random.uniform(-1, 1, [1, 512]).astype(np.float32) new_image = vae.testing_1_4(z) scipy.misc.imsave('repro/1_4out{}.jpg'.format(i), new_image[0].reshape( (64, 64, 3))) pillist_14.append('repro/1_4out{}.jpg'.format(i)) pilimages2 = [] # images in each folder for file in pillist_14: pilimages2.append(Image.open(file)) fig_1_4(pilimages2, os.path.join(args.outdir, 'fig1_4.jpg')) imgdata = celabdata.testdata_tsne(path) with open('repro/tsne_z.txt', 'wb') as f: mse_sum = 0 for i in range(len(imgdata)): _, mse, z = vae.mse_fit(imgdata[i].reshape((1, 64, 64, 3))) mse_sum += mse zin = '' for t in range(512): zin += '%.4f ' % (z[0][t]) #"%.2f" % x f.write(zin + '\n') print 'test set mse : ', mse_sum fdata = "repro/tsne_z.txt" ftarget = "repro/tsne_lb.txt" iris = chj_load_file(fdata, ftarget) X_tsne = TSNE(n_components=2, learning_rate=100).fit_transform(iris.data) #X_pca = PCA().fit_transform(iris.data) print("finishe!") plt.figure() #plt.subplot(121) plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=iris.target) plt.savefig(os.path.join(args.outdir, 'fig1_5.jpg'), format='png') return vae
import tensorflow as tf from CellSeg_CNN import * import numpy as np import DataReader #Reading the images data_reader = DataReader.DataReader() input_reader = data_reader.input_reader training_images = data_reader.training_images if (input_reader.use_data_rotation): rotated_images = data_reader.pi_half_rotated_images number_of_training_images = np.size(training_images, axis=0) image_height = np.size(training_images, axis=1) image_width = np.size(training_images, axis=2) test_images = data_reader.test_images number_of_test_images = np.size(test_images, axis=0) #Reading the ground truth classes [training_classes, training_defined_samples] = data_reader.training_classes if (input_reader.use_data_rotation): [rotated_classes, rotated_defined_mask] = data_reader.pi_half_rotated_classes_and_masks [test_classes, test_defined_samples] = data_reader.test_classes #Reading parameters learning_rate = input_reader.learning_rate regularisation_param = tf.constant(input_reader.regularisation_parameter) n_epochs = input_reader.number_of_epochs tensorboard_file_location = input_reader.tensorboard_location input_patch_width = input_reader.input_patch_width
def ShowResult(net, X, Y, title, wb1, wb2): # draw train data plt.plot(X[0, :], Y[0, :], '.', c='b') # create and draw visualized validation data TX = np.linspace(0, 1, 100).reshape(1, 100) dict_cache = net.ForwardCalculationBatch(TX, wb1, wb2) TY = dict_cache["Output"] plt.plot(TX, TY, 'x', c='r') plt.title(title) plt.show() #end def if __name__ == '__main__': dataReader = DataReader(x_data_name, y_data_name) dataReader.ReadData() dataReader.NormalizeX() dataReader.NormalizeY() n_input, n_hidden, n_output = 1, 3, 1 eta, batch_size, max_epoch = 0.5, 10, 50000 eps = 0.001 params = CParameters(n_input, n_hidden, n_output, eta, max_epoch, batch_size, eps) # SGD, MiniBatch, FullBatch loss_history = CLossHistory() net = TwoLayerFittingNet() wb1, wb2 = net.train(dataReader, params, loss_history)
#TrendsScraper #YahooFinanceScraper import DataReader from pandas.io.data from datetime import datetime goog = DataReader("GOOG", "yahoo", datetime(2000,1,1), datetime(2012,1,1)) goog["Adj Close"]