def __init__(self): self.test_data = load_data.LoadData().get_test_set() self.operator = calculate.Calculate() self.item_based = item_based.ItemBased()
def get_input(sample_type, shuffle_documents, pad, trained_sent2vec_model=None): # Returns X, Y # X: Each row is a sample # Y: A 1-D vector for ground truth # Also pads the sample input as per the mentioned value of INPUT_VECTOR_LENGTH is needed start = time.time() data_handler = DataHandler() print "===========================================" if sample_type == 1: # NOT SURE ABOUT THIS TYPE! sample_type, samples = data_handler.get_samples( ) # Get samples, each sample is a set of INPUT_VECTOR_LENGTH consecutive sentences. No document information captured elif sample_type == 2: ld = load_data.LoadData() sample_type, samples = ld.load_wikipedia_sequence() elif sample_type in (2, 3): # type2 : Get samples, each sample is a document (a set of sentences resulting in a sequence), or, (NUM_DOCUMENTS, NUM_SENTENCES, SENTENCE) # type3 : Same as type2 just merge the samples to remove the sequence information and treat as simple sentence classification problem, i.e. (TOTAL_NUM_SENTENCES, SENTENCE) # This processing will be done in the cnn_clssifier.py itself. sample_type, samples = data_handler.get_sequence_samples(sample_type) #sample_type, samples = data_handler.get_sequence_samples_PARALLEL() # Get samples, each sample is a document (a set of sentences resulting in a sequence) elif sample_type == 4: # type4: Clinical sequence of a multiple samples # X.shape = (MULTIPLE_SAMPLES, TOTAL_SENTENCES) # Y.shape = (MULTIPLE_SAMPLES, TOTAL_SENTENCES, 1) ld = load_data.LoadData() sample_type, samples = ld.load_clinical_sequence() elif sample_type == 5: # type5: Biography sequence of a single sample # X.shape = (1, TOTAL_SENTENCES) # Y.shape = (TOTAL_SENTENCES, 1) ld = load_data.LoadData() sample_type, samples = ld.load_biography_sequence() elif sample_type == 6: # type6: Fiction sequence of a multiple documents # X.shape = (NO_OF_BOOKS, TOTAL_SENTENCES) # Y.shape = (NO_OF_BOOKS, TOTAL_SENTENCES, 1) ld = load_data.LoadData() sample_type, samples = ld.load_fiction_sequence() elif sample_type == 7: # type7: Wiki sequence of a multiple sample # Data format is just like the clinical sequence as each line is a sentence # X.shape = (MULTIPLE_DOCUMENTS, TOTAL_SENTENCES) # Y.shape = (MULTIPLE_DOCUMENTS, TOTAL_SENTENCES, 1) ld = load_data.LoadData() sample_type, samples = ld.load_wikipedia_sequence() else: print "NOTE: INVALID SAMPLE_TYPE!" return None del data_handler print "Samples Loading took", time.time() - start, "seconds" model = trained_sent2vec_model if not trained_sent2vec_model: #model = TFIDF(samples) #model = MeanWord2vec() #model = TFIDFweightedMeanWord2vec(samples) model = CustomSent2vec() X, Y = [], [] _total_samples, _start_time = len(samples), time.time() print len(samples) #pdb.set_trace() for _idx, sample in enumerate(samples): # Each sample is a document # Each sample is a list of tuples with each tuple as (sentence, groundTruth) sentences, groundTruths = zip(*sample) # Unpack a sample ## Create Wikipedia test set CREATE_WIKI_TEST_SET = False if CREATE_WIKI_TEST_SET: wiki_prefix = "wiki_save/wiki_test" if _idx >= 300: break with open(wiki_prefix + "_" + str(_idx + 1) + ".ref", "a") as f: for (_s, _g) in sample: if _g: f.write("==========\r\n") f.write(_s + "\r\n") f.write("==========\r\n") else: # Traditional code if not _idx % 50: progbar.simple_update("Converting doc to martices", _idx + 1, _total_samples, time_elapsed=(time.time() - _start_time)) if sample_type == 1: # Correct groundtruth sync problem here sentences, groundTruths = model.convert_sample_to_vec( sentences, groundTruths) elif sample_type in (2, 3, 4, 5, 6, 7): sentences, groundTruths = model.convert_sequence_sample_to_vec( sentences, groundTruths) else: print "Wrong Sample TYPE" if sentences is None: continue X.append(sentences) # X[0].shape = matrix([[1,2,3,4.....]]) Y.append(np.asarray( groundTruths)) # Y[0] = [1, 0, 0, ..... 0, 1, 0, 1....] progbar.simple_update("Creating a standalone matrix for samples...", -1, -1) X, Y = np.asarray(X), np.asarray(Y) progbar.end() print "Total samples: %d" % (len(X)) if shuffle_documents: # Shuffle the X's and Y's if required # Both of them have to be in unison X, Y = unison_shuffled_copies(X, Y) print "SHUFFLE: Shuffled input document order! (X:", X.shape, ", Y:", Y.shape, ")" if sample_type == 2 and pad == False: print "NOTE: Sample type2 requires PADDING!" if pad: #### THIS PAD is messy!!!! ### Check once before padding if STATIC_PAD: max_len = AVERAGE_WORDS else: max_len = None # Uses the max length of the sequences doc_lengths = [len(doc) for doc in X] print "Padding sequences. Doc-lengths: Mean=%d, Std=%d" % ( np.mean(doc_lengths), np.std(doc_lengths)) X = pad_sequences(X, padding="post", truncating="post", value=0.0, dtype=np.float32) Y = pad_sequences(Y, padding="post", truncating="post", value=0.0, dtype=np.float32) print "Size of new X(after padding):", X.shape return sample_type, X, Y, model
def RunTest( params, model_name_template='models_3/{model}_{backbone}_{optimizer}_{augmented_image_size}-{padded_image_size}-{nn_image_size}_lrf{lrf}_{metric}_{CC}_f{test_fold_no}_{phash}' ): # # Params # In[ ]: DEV_MODE_RANGE = 0 # off # In[ ]: # In[ ]: def params_dict(): return { x[0]: x[1] for x in vars(params).items() if not x[0].startswith('__') } def params_str(): return '\n'.join([ repr(x[0]) + ' : ' + repr(x[1]) + ',' for x in vars(params).items() if not x[0].startswith('__') ]) def params_hash(shrink_to=6): import hashlib import json return hashlib.sha1( json.dumps(params_dict(), sort_keys=True).encode()).hexdigest()[:shrink_to] def params_save(fn, verbose=True): params_fn = fn + '.param.txt' with open(params_fn, 'w+') as f: s = params_str() hash = params_hash(shrink_to=1000) s = '{\n' + s + '\n}\nhash: ' + hash[:6] + ' ' + hash[6:] f.write(s) if verbose: print('params: ' + s + '\nsaved to ' + params_fn) # # Imports # In[ ]: import sys #sys.path.append(r'D:\Programming\3rd_party\keras') # In[ ]: import sys from imp import reload import numpy as np import keras import datetime import time from keras.models import Model, load_model from keras.layers import Input, Dropout, BatchNormalization, Activation, Add from keras.layers.core import Lambda from keras.layers.convolutional import Conv2D, Conv2DTranspose from keras.layers.pooling import MaxPooling2D from keras.layers.merge import concatenate from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger from keras import backend as K import tensorflow as tf # # Load data # In[ ]: import load_data load_data = reload(load_data) import keras_unet_divrikwicky_model keras_unet_divrikwicky_model = reload(keras_unet_divrikwicky_model) # In[ ]: train_df = load_data.LoadData(train_data=True, DEV_MODE_RANGE=DEV_MODE_RANGE, to_gray=False) # In[ ]: train_df.images[0].shape # In[ ]: train_images, train_masks, validate_images, validate_masks = load_data.SplitTrainData( train_df, params.test_fold_no) train_images.shape, train_masks.shape, validate_images.shape, validate_masks.shape # # Reproducability setup: # In[ ]: import random as rn import os os.environ['PYTHONHASHSEED'] = '0' np.random.seed(params.seed) rn.seed(params.seed) #session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) tf.set_random_seed(params.seed) #sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) sess = tf.Session(graph=tf.get_default_graph()) K.set_session(sess) # # IOU metric # In[ ]: thresholds = np.array( [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]) def iou(img_true, img_pred): assert (img_true.shape[-1] == 1) and (len(img_true.shape) == 3) or ( img_true.shape[-1] != 1) and (len(img_true.shape) == 2) i = np.sum((img_true * img_pred) > 0) u = np.sum((img_true + img_pred) > 0) if u == 0: return 1 return i / u def iou_metric(img_true, img_pred): img_pred = img_pred > 0.5 # added by sgx 20180728 if img_true.sum() == img_pred.sum() == 0: scores = 1 else: scores = (thresholds <= iou(img_true, img_pred)).mean() return scores def iou_metric_batch(y_true_in, y_pred_in): batch_size = len(y_true_in) metric = [] for batch in range(batch_size): value = iou_metric(y_true_in[batch], y_pred_in[batch]) metric.append(value) #print("metric = ",metric) return np.mean(metric) # adapter for Keras def my_iou_metric(label, pred): metric_value = tf.py_func(iou_metric_batch, [label, pred], tf.float64) return metric_value # # Data generator # In[ ]: mean_val = np.mean(train_images.apply(np.mean)) mean_std = np.mean(train_images.apply(np.std)) mean_val, mean_std ##################################### def FillCoordConvNumpy(imgs): print(imgs.shape) assert len(imgs.shape) == 4 assert imgs.shape[3] == 3 n = imgs.shape[2] hor_img = np.linspace(-1., 1., n).reshape((1, 1, n, 1)) n = imgs.shape[1] ver_img = np.linspace(-1., 1., n).reshape((1, n, 1, 1)) imgs[:, :, :, 0:1] = hor_img imgs[:, :, :, 2:3] = ver_img def FillCoordConvList(imgs): print(imgs.shape) assert len(imgs[0].shape) == 3 assert imgs[0].shape[2] == 3 for img in imgs: n = img.shape[1] hor_img = np.linspace(-1., 1., n).reshape((1, n, 1)) n = img.shape[0] ver_img = np.linspace(-1., 1., n).reshape((n, 1, 1)) img[:, :, 0:1] = hor_img img[:, :, 2:3] = ver_img if params.coord_conv: FillCoordConvList(train_images) FillCoordConvList(validate_images) print(train_images[0][0, 0, 0], train_images[0][0, 0, 2]) assert train_images[0][0, 0, 0] == -1. assert train_images[0][0, 0, 2] == 1. ###################################### from my_augs import AlbuDataGenerator # # model # In[ ]: sys.path.append('../3rd_party/segmentation_models') import segmentation_models segmentation_models = reload(segmentation_models) from segmentation_models.utils import set_trainable # In[ ]: if not hasattr(params, 'model_params'): params.model_params = {} if params.load_model_from: model = load_model(params.load_model_from, custom_objects={'my_iou_metric': my_iou_metric}) print('MODEL LOADED from: ' + params.load_model_from) else: model = None if params.model == 'FNN': model = segmentation_models.FPN( backbone_name=params.backbone, input_shape=(None, None, params.channels), encoder_weights=params.initial_weightns, freeze_encoder=True, dropout=params.dropout, **params.model_params) if params.model == 'FNNdrop': model = segmentation_models.FPNdrop( backbone_name=params.backbone, input_shape=(None, None, params.channels), encoder_weights=params.initial_weightns, freeze_encoder=True, dropout=params.dropout, **params.model_params) if params.model == 'Unet': model = segmentation_models.Unet( backbone_name=params.backbone, input_shape=(None, None, params.channels), encoder_weights=params.initial_weightns, freeze_encoder=True, **params.model_params) if params.model == 'Linknet': model = segmentation_models.Linknet( backbone_name=params.backbone, input_shape=(None, None, params.channels), encoder_weights=params.initial_weightns, freeze_encoder=True, **params.model_params) if params.model == 'divrikwicky': model = keras_unet_divrikwicky_model.CreateModel( params.nn_image_size, **params.model_params) params.backbone = '' assert model for l in model.layers: if isinstance( l, segmentation_models.fpn.layers.UpSampling2D) or isinstance( l, keras.layers.UpSampling2D): print(l) if hasattr(l, 'interpolation'): print(l.interpolation) if hasattr(params, 'model_params' ) and 'interpolation' in params.model_params: l.interpolation = params.model_params['interpolation'] else: print('qq') if hasattr(params, 'kernel_constraint_norm') and params.kernel_constraint_norm: for l in model.layers: if hasattr(l, 'kernel_constraint'): print('kernel_constraint for ', l, ' is set to ', params.kernel_constraint_norm) l.kernel_constraint = keras.constraints.get( keras.constraints.max_norm(params.kernel_constraint_norm)) # In[ ]: model_out_file = model_name_template.format( lrf=params.ReduceLROnPlateau['factor'], metric=params.monitor_metric[0], CC='CC' if params.coord_conv else '', **vars(params)) + '_f{test_fold_no}_{phash}'.format( test_fold_no=params.test_fold_no, phash=params_hash()) now = datetime.datetime.now() print('model: ' + model_out_file + ' started at ' + now.strftime("%Y.%m.%d %H:%M:%S")) assert not os.path.exists(model_out_file + '.model') params_save(model_out_file, verbose=True) log_out_file = model_out_file + '.log.csv' # In[ ]: #model = load_model(model1_file, ) #, 'lavazs_loss': lavazs_loss # # Train # In[ ]: optimizer = params.optimizer if optimizer == 'adam': optimizer = keras.optimizers.adam(**params.optimizer_params) elif optimizer == 'sgd': optimizer = keras.optimizers.sgd(**params.optimizer_params) model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["acc", my_iou_metric]) #, my_iou_metric # In[ ]: if params.coord_conv: mean = ((0, mean_val, 0), (1, mean_std, 1)) else: mean = (mean_val, mean_std) train_gen = AlbuDataGenerator(train_images, train_masks, batch_size=params.batch_size, nn_image_size=params.nn_image_size, mode=params.train_augmentation_mode, shuffle=True, params=params, mean=mean) val_gen = AlbuDataGenerator(validate_images, validate_masks, batch_size=params.test_batch_size, nn_image_size=params.nn_image_size, mode=params.test_augmentation_mode, shuffle=False, params=params, mean=mean) # In[ ]: sys.path.append('../3rd_party/keras-tqdm') from keras_tqdm import TQDMCallback, TQDMNotebookCallback # In[ ]: start_t = time.clock() if params.epochs_warmup: history = model.fit_generator( train_gen, validation_data=None, epochs=params.epochs_warmup, callbacks=[TQDMNotebookCallback(leave_inner=True)], validation_steps=None, workers=5, use_multiprocessing=False, verbose=0) set_trainable(model) batches_per_epoch = len(train_images) // params.batch_size print("batches per epoch: ", batches_per_epoch) test_epochs = 30 steps = test_epochs * batches_per_epoch val_period = steps // 1000 print("steps: ", steps, " val_period", val_period) lr_sheduler = EvalLrTest(log_out_file, val_gen, val_period=val_period, steps=steps) history = model.fit_generator( train_gen, validation_data=None, epochs=params.epochs, initial_epoch=params.epochs_warmup, callbacks=[TQDMNotebookCallback(leave_inner=True), lr_sheduler], validation_steps=None, workers=5, use_multiprocessing=False, verbose=0) # In[ ]: print(params_str()) print('done: ' + model_out_file) print('elapsed: {}s ({}s/iter)'.format( time.clock() - start_t, (time.clock() - start_t) / len(history.epoch))) return model
class ZjdxDataLinux(object): """Load Zhejiang Telecom data. Load Zhejiang Telecom to redis. Attributes: base_dir: input data path. """ def __init__(self): """inition for ZjdxDataLinux. Args: no """ self.dir_list = ['/home/zjdx/', '/home/jsdx/today_data/'] # self.dir_list = ['D:/home/zjdx/', 'D:/home/jsdx/'] self.mysqlconn = MySQLdb.connect(host='120.55.189.211', user='******', passwd='hadoop', db='stock') self.cursor = self.mysqlconn.cursor() def main(self): """Main function. Args: no """ for p in self.dir_list: self.just_do_it(p) self.cursor.close() self.mysqlconn.close() def just_do_it(self, Path): """just do it. Args: no """ all_file = os.listdir(Path) file_list = [] for file_name in all_file: result_1 = re.search(r'^kunyan_\d{10}$', file_name) result_2 = re.search(r'^jsdx_\d{10}$', file_name) result = result_1 or result_2 if result: file_list.append(file_name) # old_file_list = [] # new_file = open(dir+'files', 'a+') # new_file.close() # # file_old = open(dir+'files', 'r') # for lines in file_old: # old_file_list.append(lines.strip('\n')) # file_old.close() try: self.cursor.execute("select * from unbacked_redis_data") except Exception, e: print e result = self.cursor.fetchall() old_file_list = [] for line in result: old_file_list.append(line[0]) tag = 0 for line in file_list: if line not in old_file_list: tag = 1 log = open(Path+'log', 'a+') log_time = time.strftime('%Y-%m-%d %H:%M:%S') begin_out = line + " begin_time: " + log_time + "\n" log.write(begin_out) print begin_out log.close() load = load_data.LoadData(Path+line) load.main() try: self.cursor.execute("INSERT INTO unbacked_redis_data" "(unbacked_redis_file) VALUES ('%s')" % line) self.mysqlconn.commit() except Exception, e: print e isexists = os.path.exists(Path+"unbacked_redis_files/") if not isexists: os.makedirs(Path+"unbacked_redis_files/") print Path+"unbacked_redis_files" + u' 创建成功\n' isexists = os.path.exists(Path+"unbacked_redis_files/"+line) if isexists: file_size_1 = os.path.getsize(Path+"unbacked_redis_files/"+line) file_size_2 = os.path.getsize(Path+line) if file_size_1 < file_size_2: os.remove(Path+"unbacked_redis_files/"+line) shutil.move(Path+line, Path+"unbacked_redis_files") else: shutil.move(Path+line, Path+"unbacked_redis_files") # file_new = open(dir+'files', 'a+') # file_new.write(line) # file_new.write('\n') # file_new.close() log = open(Path+'log', 'a+') log_time = time.strftime('%Y-%m-%d %H:%M:%S') end_out = line + " end_time: " + log_time + "\n" log.write(end_out) print end_out log.close()
from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from sklearn.preprocessing import MinMaxScaler from sklearn.feature_selection import SelectKBest, chi2 from sklearn.decomposition import PCA from sklearn.ensemble import ExtraTreesClassifier from sklearn import linear_model, datasets from sklearn.cross_validation import train_test_split import load_data print("===================") d = load_data.LoadData() ### Generate Y y = d.loc[:, "IsDefault"].values count_default = sum(y) print "bad sample: {0}".format(count_default) print "Default Rate: {0}%".format(100.0 * count_default / len(y)) print("===================") ### Generate X x_data = d.loc[:, [ u"借款金额", u"借款期限", u"借款利率", u"年龄", u"历史成功借款次数", u"手机认证", u"户口认证", u"视频认证", u"学历认证", u"征信认证", u"淘宝认证" ]] #, u"初始评级" x_original = x_data.values
# -*- coding: utf-8 -*- import settings import load_data import json class DumpData: def __init__(self, data_path, data): self.data_path = data_path self.data = data def dump_json(self): with open(self.data_path, 'w') as f: json.dump(self.data, f) if __name__ == "__main__": json_train = load_data.LoadData(settings.DATA_TRAIN_PATH).get_json() DumpData(settings.DATA_TRAIN_JSON, json_train).dump_json() json_test = load_data.LoadData(settings.DATA_TEST_PATH).get_json() DumpData(settings.DATA_TEST_JSON, json_test).dump_json()
# this uses a hard-vote-esque method to determine the mismatch (adds up number of mismatches (1s) # then divides by number of models. if less than 0.5, then it is not mismatch, and vice versa) import learner_functions as lf import load_data as ld data = ld.LoadData() mismatch_labels = data.mismatch['mismatch'].tolist() def find_mismatch_indices_hard(models, data, labels, type="default"): predictionForEachModel = list() mismatchIndices = list() for model in models: predictionForEachModel.append( lf.make_test_prediction(model, data, labels, False)) for index in range(len(predictionForEachModel[0])): predictionSum = 0 for array in predictionForEachModel: predictionSum += array[index] finalPrediction = predictionSum / len(models) if (finalPrediction > 0.5): mismatchIndices.append(index) return mismatchIndices
import tensorflow as tf from sklearn import cross_validation import load_data as ld data_X, data_Y = ld.LoadData() X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(data_X, data_Y, test_size=0.2, random_state=0) size = 100 sizes = [100,100] iter = 200 batch_size = 50 epoch = int(X_train.shape[0]/batch_size) def Layer(input, in_size, out_size, active_function = None): W = tf.Variable(tf.zeros([in_size, out_size])) b = tf.Variable(tf.zeros([1, out_size])) output = tf.add(tf.matmul(input, W), b) if(active_function!=None): output = active_function(output) return output sess = tf.Session() with sess.as_default(): x = tf.placeholder("float", shape=[None, X_train.shape[1]]) y_ = tf.placeholder("float") # l1 = Layer(x, X_train.shape[1], size) # l1 = tf.nn.dropout(l1,0.7) # y = Layer(l1, size, 1) l1 = Layer(x, X_train.shape[1], sizes[0], active_function=tf.nn.relu) l2 = Layer(l1, sizes[0], sizes[1])
def evaluate(args): # load test data data = DATA.LoadData(args.path, args.dataset, args.seed).data_test save_file = make_save_file(args) # load the graph weight_saver = tf.train.import_meta_graph(save_file + '.meta') pretrain_graph = tf.get_default_graph() # load tensors feature_embeddings = pretrain_graph.get_tensor_by_name( 'feature_embeddings:0') nonzero_embeddings = pretrain_graph.get_tensor_by_name( 'nonzero_embeddings:0') feature_bias = pretrain_graph.get_tensor_by_name('feature_bias:0') bias = pretrain_graph.get_tensor_by_name('bias:0') fm = pretrain_graph.get_tensor_by_name('fm:0') fm_out = pretrain_graph.get_tensor_by_name('fm_out:0') out = pretrain_graph.get_tensor_by_name('out:0') train_features = pretrain_graph.get_tensor_by_name('train_features_fm:0') train_labels = pretrain_graph.get_tensor_by_name('train_labels_fm:0') dropout_keep = pretrain_graph.get_tensor_by_name('dropout_keep_fm:0') train_phase = pretrain_graph.get_tensor_by_name('train_phase_fm:0') # restore session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) weight_saver.restore(sess, save_file) # start evaluation num_example = len(data['Y']) feed_dict = { train_features: data['X'], train_labels: [[y] for y in data['Y']], dropout_keep: 1.0, train_phase: False } ne, fe = sess.run((nonzero_embeddings, feature_embeddings), feed_dict=feed_dict) _fm, _fm_out, predictions = sess.run((fm, fm_out, out), feed_dict=feed_dict) # calculate rmse y_pred = np.reshape(predictions, (num_example, )) y_true = np.reshape(data['Y'], (num_example, )) predictions_bounded = np.maximum(y_pred, np.ones(num_example) * min(y_true)) # bound the lower values predictions_bounded = np.minimum(predictions_bounded, np.ones(num_example) * max(y_true)) # bound the higher values RMSE = math.sqrt(mean_squared_error(y_true, predictions_bounded)) print("Test RMSE: %.4f" % (RMSE)) # Unify into dataframe y_df = pd.DataFrame({'label': y_true, 'pred': y_pred}) # Write if not os.path.exists(args.path_output): os.makedirs(args.path_output) fullpath_output = args.path_output + '/predictions_%s_%d.csv' % ( args.dataset, args.factor_k) y_df.to_csv(fullpath_output, index=False)
def train(args): # Dictionary of arguments argv = vars(args) # Data loading data = DATA.LoadData(args.path, args.dataset, args.seed) # Get arguments from data argv['features_p'] = data.features_p argv['col_m'] = data.col_m if args.verbose > 0: print( "FM: dataset=%s, factors=%d, #epoch=%d, batch=%d, lr=%.4f, lambda=%.1e, keep=%.2f, optimizer=%s, batch_norm=%d" % (args.dataset, args.factor_k, args.epoch, args.batch_size, args.learning_rate, args.lamda_bilinear, args.dropout_keep_rate, args.optimizer_type, args.batch_norm)) t1 = time() # Choose model if args.model_type == 'FM': model_class = model_classes.fm.FM model = model_class(features_p=data.features_p, factor_k=args.factor_k, col_m=data.col_m, lamda_bilinear=args.lamda_bilinear, dropout_keep_rate=args.dropout_keep_rate, epoch=args.epoch, batch_size=args.batch_size, learning_rate=args.learning_rate, optimizer_type=args.optimizer_type, batch_norm=args.batch_norm, pretrain_flag=args.pretrain, save_file=make_save_file(args), self_terminate=args.self_terminate, verbose=args.verbose, seed=1337) elif args.model_type == 'AFM': model_class = model_classes.afm.AFM model = model_class(features_p=data.features_p, pretrain_flag=args.pretrain, save_file=make_save_file(args), attention=args.attention, hidden_factor_1=args.hidden_factor_1, hidden_factor_2=args.hidden_factor_2, valid_dimension=data.col_m, activation_function=args.activation, freeze_fm=args.freeze_fm, epoch=args.epoch, batch_size=args.batch_size, learning_rate=args.learning_rate, lamda_attention=args.lamda_attention, keep_1=args.keep_1, keep_2=args.keep_2, optimizer_type=args.optimizer_type, batch_norm=args.batch_norm, decay=args.decay, verbose=args.verbose, micro_level_analysis=args.mla, random_seed=args.seed) else: print("=== Please select a model type.") return # Begin Training model.train(data.data_train, data.data_valid, data.data_test) # Find the best validation result across iterations best_valid_score = 0 best_valid_score = min(model.valid_rmse) best_epoch = model.valid_rmse.index(best_valid_score) print("Best Iter(validation)= %d\t train = %.4f, valid = %.4f [%.1f s]" % (best_epoch + 1, model.train_rmse[best_epoch], model.valid_rmse[best_epoch], time() - t1))
import load_data import numpy as np if __name__ == "__main__": dataset = load_data.LoadData() (nf_slave_c_pos, nf_master_j_pos, nf_master_j_vel, nf_mcurr_load) = dataset.get_no_feedback() (tf_slave_c_pos, tf_master_j_pos, tf_master_j_vel, tf_mcurr_load) = dataset.get_torque_feedback() (pf_slave_c_pos, pf_master_j_pos, pf_master_j_vel, pf_mcurr_load) = dataset.get_position_feedback() print(len(nf_slave_c_pos)) time = [] for i in range(len(nf_slave_c_pos)): time.append(np.linspace(0, 1, len(nf_slave_c_pos[i]))) aa = nf_slave_c_pos[0] # pos = np.array(aa['slave_c_pos']) # print(pos.shape)
import tensorflow as tf import cv2 from tensorflow.python.platform import gfile import dlib from imutils import face_utils from PIL import Image import numpy as np import configuration as cfg import load_data as ld import matplotlib.pyplot as plt configuration = cfg.Configuration() load = ld.LoadData() configuration.pickle_data_file = 'training_images.pickle' load.data(configuration) classes_n = configuration.data.classes_count classes = configuration.data.classes label_images = configuration.data.label_image video_capture = cv2.VideoCapture(0) frozen_graph_filename = 'model/train_model.pb' with gfile.FastGFile(frozen_graph_filename, "rb") as f: graph_def = tf.GraphDef() byte = f.read() graph_def.ParseFromString(byte) tf.import_graph_def(graph_def, name='') # for node in graph_def.node:
def main(): # 1.导入文件,确定日期列,填充列,目标列 data_name = input("请输入要转化的文件全名:(例如:all_in_one.csv)\n") date_column = input("请输入日期所在的列名:\n") target_column = input("请输入预测目标所在的列名:\n") type_column = input("请输入需要填充的item(目前仅支持一个):\n") loader = load_data.LoadData() origin_data = loader.load_data(data_name) # 2.根据时间戳范围,生成完整的时间戳 timer = GetTime(origin_data, date_column) data, first_day, last_day = timer.get_time() data[date_column] = pd.to_datetime(data[date_column]) generate_date = GenerateDate(first_day, last_day) dates = generate_date.generate() # 3.将时间戳和名称合并为DataFrame_1 name_list = list(set(data[type_column])) if str(name_list[0]) == "nan": name_list.pop(0) else: pass merge_list = merge(dates, name_list) df_merge = pd.DataFrame(merge_list, columns=[date_column, type_column]) # 4.将DataFrame_1和原来的DataFrame外连接合并 df_total = pd.merge(df_merge, data, how="outer", on=[date_column, type_column]) # 5.额外选项 df_total[date_column] = pd.to_datetime(df_total[date_column]) week_day_flag = input("是否需要生成星期几: y(Default)/n\n") generate_week_day(df_total, date_column, week_day_flag) open_flag = input("是否需要生成open: y(Default)/n\n") generate_open(df_total, target_column, open_flag) print("Filling N/A...") df_total = df_total.fillna(0) print(df_total.head()) # 6.特征工程 feature_flag = input("是否添加统计学特征: y(Default)/n\n") if feature_flag == "y" or feature_flag == "": period = input("请输入统计学特征间隔时间(Default:7):\n") factory_name = [] for i in range(len(name_list)): factory_name.append(df_total[i::len(name_list)]) if period == "": feature_process(factory_name, target_column) else: feature_process(factory_name, target_column, int(period)) df_total = pd.concat(factory_name, axis=0) df_total = df_total.sort_values(by=[date_column, type_column]) df_total = df_total.reset_index().drop(columns="index") else: pass # 6.导出 export_name_first = data_name.split(".")[0] + "_out" export_name_end = data_name.split(".")[-1] export_name = export_name_first + "." + export_name_end if export_name_end == "xlsx": df_total.to_excel(export_name, index_label="ID") elif export_name_end == "csv": df_total.to_csv(export_name, index_label="ID") else: pass