def visualize(self, predictions, model, save_dir='../../save/keras', plt_name='keras'): # Evaluate predictions using accuracy metrics accuracy = accuracy_score(self.y_test, predictions) print('{} Classification'.format(model)) print("Accuracy: %.2f%%" % (accuracy * 100.0)) # Evaluate predictions using confusion metrics and plot confusion matrix classification_report = metrics.classification_report( predictions, self.y_test, target_names=['NadaSportswear', 'Sportswear']) print(classification_report) # Calculating confusion matrix cnf_matrix = confusion_matrix(self.y_test, predictions) np.set_printoptions(precision=2) # Plot module is used for plotting confusion matrix, classification report plot = Plot() plot.plotly(cnf_matrix, classification_report, os.path.join(self.args.save_dir, embedding_type), plt_name)
def init_data(): X, y = import_power_plant_data() X, y = X.to_numpy(), y.to_numpy() #print(X,y) #exit() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,shuffle=True, random_state=1234) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) opt = SGD(lr=0.01) epoch = 10000 regressor = LinearRegression(opt, epoch=epoch) x_plot = list(range(1,epoch+1)) all_mse = regressor.fit(X_train, y_train) predicted = regressor.predict(X_test) #print(len(predicted)) #exit() mse_value = Metrics.mse(y_test, predicted) #print(len(x_plot), len(all_mse)) #print(mse_value) #y_pred_line = regressor.predict(X) #cmap = plt.get_cmap('viridis') #fig = plt.figure(figsize=(8,6)) #m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10) #m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10) #plt.plot(x_plot, all_mse, color = "blue", linewidth=2) Plot.plot_time_series(x_plot, all_mse, "mse_plot", "number of iterations", "Mean Square Error (MSE)", "MSE vs Number of iterations") plt.show()
def __init__(self, sax_engine, export = True): self.se_instance = sax_engine self.data = sax_engine.sax_data self.process_data = [] self.ps = None self.ploter = Plot(self) if export: self.export_format()
def __init__(self, dataset, net, common_params, solver_params): ''' :param dataset: :param net: :param common_params: :param solver_params: ''' self.learning_rate = solver_params['learning_rate'] self.beta1 = float(solver_params['beta1']) self.beta2 = float(solver_params['beta2']) self.batch_size = int(common_params['batch_size']) self.width = common_params['width'] self.height = common_params['height'] self.depth = common_params['depth'] self.channel = int(common_params['channel']) self.testing = common_params['testing'] if self.testing: self.test_batch_size = common_params['test_batch_size'] if 'pretrain_model_path' in solver_params: self.pretrain_path = solver_params['pretrain_model_path'] else: self.pretrain_path = 'None' self.model_name = solver_params['model_name'] self.train_dir = str(solver_params['train_dir']) self.max_iterators = int(solver_params['max_iterators']) self.eval_names = solver_params['eval_names'] if 'keep_prob' in solver_params: self.keep_prob = solver_params['keep_prob'] else: self.keep_prob = 1.0 if 'net_input' in solver_params: self.net_input = solver_params['net_input'] else: self.net_input = {} self.dataset = dataset self.net = net self.config = tf.ConfigProto() self.config.gpu_options.allow_growth = True self.construct_graph() self.do_plot = solver_params['plot'] if self.do_plot: self.plot = Plot(solver_params['plot_params']) return
def __init__(self, ss): self.ts = None self.ts_clust = None self.ts_name = None self.ss = ss self.sampler = 168 # 24/d - 168/w - 744[31](720[30]-696[29]-672[28])/m - 8760(8784)/y self.ploter = Plot(self) self.n = 5 self.capteurs_names = [] self.from_save = False self.proto = [] self.last_readed = {} self.store_path = "cluster/13_06/" self.name_file = None self.clust_name = "Master" self.metric = "" self.geo = Geo(self.ss.cwd) self.cluster_by_name = {} self.cluster_by_fullname = {} self.size_min = 0 self.nb_capteur = [] self.nb_week = []
def __init__(self, board_size: int, black: Agent, train_configs: List[Config], eval_configs: List[Config], test_configs: List[Config], human_configs: List[Config]) -> None: assert black.color is Color.BLACK, f'Invalid black agent: black agent\'s color is not black' self.board_size: int = board_size self.black = black self.train_configs: List[Config] = train_configs self.eval_configs: List[Config] = eval_configs self.test_configs: List[Config] = test_configs self.human_configs: List[Config] = human_configs self.total_episodes: int = 0 # initialize plot if isinstance(self.black, TrainableAgent): self.plot: Plot = Plot() self.scores: defaultdict = defaultdict(list) # initialize colors init()
running_batch_elapsed_time) / 60.0 print( "===== TRAINING STEP {} | ~{:.0f} MINUTES REMAINING =====".format( training_step, estimated_minutes_remaining)) print("CRITIC LOSS: {0}".format(running_critic_loss)) print("GENERATOR LOSS: {0}\n".format(running_generator_loss)) # Loss histories critic_losses_per_vis_interval.append(running_critic_loss) generator_losses_per_vis_interval.append(running_generator_loss) running_critic_loss = 0.0 running_generator_loss = 0.0 Plot.plot_histories( [critic_losses_per_vis_interval], ["Critic"], "{0}critic_loss_history.png".format(MODEL_OUTPUT_DIR)) Plot.plot_histories( [generator_losses_per_vis_interval], ["Generator"], "{0}generator_loss_history.png".format(MODEL_OUTPUT_DIR)) # Save model at checkpoint torch.save(generator.state_dict(), "{0}generator".format(MODEL_OUTPUT_DIR)) torch.save(critic.state_dict(), "{0}critic".format(MODEL_OUTPUT_DIR)) # Upsample and save samples sample_tags = brainpedia.preprocessor.decode_label( labels_batch.data[0]) real_sample_data = real_brain_img_data_batch[0].cpu().data.numpy( ).squeeze()
class PrefixSpanManager: """ Classe d'outil a l'utilisation de prefixspan Parameters: * sax_engine: SaxEngine Instance de preprocessing SAX * export: Boolean Si oui ou non les donnees sont deja exportees au bon format Variables: * se_instance: SaxEngine L'instance de class SAX * data: Array[] Les donnees au format SAX """ def __init__(self, sax_engine, export = True): self.se_instance = sax_engine self.data = sax_engine.sax_data self.process_data = [] self.ps = None self.ploter = Plot(self) if export: self.export_format() def run(self): """ Creer l'instance PrefixSpan avec les donnees pretraites """ self.ps = PrefixSpan(self.process_data) def export_format(self): """ Modifie le format pour correspondre au besoin de l'instance de PrefixSpan """ tmp = [] for elmt in self.data: tmp.append(elmt.ravel()) self.process_data = tmp def topk(self, n, c = True): """ Affiche les motifs les plus frequents(plus grand support) et par defaut les fermes Parameters: * n: int Nombre de motifs a afficher Returns: Liste de motifs frequent """ return self.ps.topk(n, closed = c) def frequent(self, n): """ Retourne les frequent de support n Parameters: * n: int Support minimal Returns: Liste des motifs de support minimal n """ return self.ps.frequent(n) def plot(self, l): self.ploter.plot_prefixspan(l)
def __init__(self, dataset, net, common_params, solver_params): ''' :param dataset: :param net: :param common_params: :param solver_params: ''' self.learning_rate = solver_params['learning_rate'] self.beta1 = float(solver_params['beta1']) self.beta2 = float(solver_params['beta2']) self.batch_size = int(common_params['batch_size']) self.width = common_params['width'] self.height = common_params['height'] self.channel = int(common_params['channel']) self.testing = common_params['testing'] if self.testing: self.test_batch_size = common_params['test_batch_size'] if 'pretrain_model_path' in solver_params: self.pretrain_path = solver_params['pretrain_model_path'] else: self.pretrain_path = 'None' self.model_name = solver_params['model_name'] self.train_dir = str(solver_params['train_dir']) self.max_iterators = int(solver_params['max_iterators']) self.eval_names = solver_params['eval_names'] if 'keep_prob' in solver_params: self.keep_prob = solver_params['keep_prob'] else: self.keep_prob = 1.0 if 'net_input' in solver_params: self.net_input = solver_params['net_input'] else: self.net_input = {} if 'aug' in solver_params: self.aug = solver_params['aug'] else: self.aug = None if 'label_type' in common_params: self.label_type = common_params['label_type'] else: self.label_type = 'matrix' if self.label_type == 'array': if 'label_len' in common_params: self.label_len = common_params['label_len'] else: raise Exception( 'Label type is array while not given label length!') self.dataset = dataset self.net = net self.config = tf.ConfigProto() self.config.gpu_options.allow_growth = True self.construct_graph() self.do_plot = solver_params['plot'] if self.do_plot: self.plot = Plot(solver_params['plot_params']) return
class Solver2D(Solver): '''2-D model solver ''' def __init__(self, dataset, net, common_params, solver_params): ''' :param dataset: :param net: :param common_params: :param solver_params: ''' self.learning_rate = solver_params['learning_rate'] self.beta1 = float(solver_params['beta1']) self.beta2 = float(solver_params['beta2']) self.batch_size = int(common_params['batch_size']) self.width = common_params['width'] self.height = common_params['height'] self.channel = int(common_params['channel']) self.testing = common_params['testing'] if self.testing: self.test_batch_size = common_params['test_batch_size'] if 'pretrain_model_path' in solver_params: self.pretrain_path = solver_params['pretrain_model_path'] else: self.pretrain_path = 'None' self.model_name = solver_params['model_name'] self.train_dir = str(solver_params['train_dir']) self.max_iterators = int(solver_params['max_iterators']) self.eval_names = solver_params['eval_names'] if 'keep_prob' in solver_params: self.keep_prob = solver_params['keep_prob'] else: self.keep_prob = 1.0 if 'net_input' in solver_params: self.net_input = solver_params['net_input'] else: self.net_input = {} if 'aug' in solver_params: self.aug = solver_params['aug'] else: self.aug = None if 'label_type' in common_params: self.label_type = common_params['label_type'] else: self.label_type = 'matrix' if self.label_type == 'array': if 'label_len' in common_params: self.label_len = common_params['label_len'] else: raise Exception( 'Label type is array while not given label length!') self.dataset = dataset self.net = net self.config = tf.ConfigProto() self.config.gpu_options.allow_growth = True self.construct_graph() self.do_plot = solver_params['plot'] if self.do_plot: self.plot = Plot(solver_params['plot_params']) return def _train(self, lr): '''Train model using ADAM optimizer ''' train = tf.train.AdamOptimizer(lr, self.beta1, self.beta2).minimize( self.loss, global_step=self.global_step, var_list=self.net.trainable_collection) #grads = opt.compute_gradients(self.loss) #train = opt.apply_gradients(grads, global_step=self.global_step) return train def construct_graph(self): self.global_step = tf.Variable(0, trainable=False) self.images = tf.placeholder( tf.float32, [None, self.height, self.width, self.channel]) if self.label_type == 'binary': self.labels = tf.placeholder(tf.float32, [None, 1, 1, 1]) elif self.label_type == 'array': self.labels = tf.placeholder(tf.float32, [None, 1, 1, self.label_len]) else: self.labels = tf.placeholder( tf.float32, [None, self.height, self.width, self.channel]) self.lr = tf.placeholder(tf.float32) self.keep_prob_holder = tf.placeholder(tf.float32) self.net_input['keep_prob'] = self.keep_prob_holder self.predicts = self.net.inference(self.images, **self.net_input) self.loss, self.evals = self.net.loss(self.predicts['out'], self.labels, self.eval_names) loss_summaries(self.loss) tf.summary.scalar('loss', self.loss) for key, value in self.evals.items(): tf.summary.scalar(key, value) self.train_op = self._train(self.lr) def initialize(self): #saver = tf.train.Saver() try: init = tf.global_variables_initializer() except: init = tf.initialize_all_variables() self.sess = tf.Session(config=self.config) self.sess.run(init) if self.pretrain_path != 'None': saver = tf.train.Saver(self.net.pretrained_collection, write_version=1) saver.restore(self.sess, self.pretrain_path) def solve(self): saver = tf.train.Saver(self.net.all_collection, write_version=1) #saver = tf.train.Saver() summary_op = tf.summary.merge_all() write_dir = self.train_dir + '/' + self.model_name + '/' + str( datetime.now()) + '/' train_writer = tf.summary.FileWriter(write_dir + 'train', self.sess.graph) test_writer = tf.summary.FileWriter(write_dir + 'test', self.sess.graph) if self.testing: n_batch = self.dataset.get_n_test_batch() for step in xrange(self.max_iterators): start_time = time.time() np_images, np_labels = self.dataset.batch() if self.aug is not None: np_images = self.aug.process(np_images) _, loss, evals = self.sess.run( [self.train_op, self.loss, self.evals], feed_dict={ self.images: np_images, self.labels: np_labels, self.lr: self.learning_rate[step], self.keep_prob_holder: self.keep_prob }) duration = time.time() - start_time assert not np.isnan(loss), 'Model diverged with loss = NaN' if step % 10 == 0: examples_per_sec = self.dataset.batch_size / duration sec_per_batch = float(duration) print( '%s: step %d, loss = %f (%.2f examples/sec; %.3f sec/batch)' % (datetime.now(), step, loss, examples_per_sec, sec_per_batch)) print(evals) sys.stdout.flush() summary_str = self.sess.run(summary_op, feed_dict={ self.images: np_images, self.labels: np_labels, self.keep_prob_holder: self.keep_prob }) train_writer.add_summary(summary_str, step) t_images, t_labels = self.dataset.test_random_batch() test_summary = self.sess.run(summary_op, feed_dict={ self.images: t_images, self.labels: t_labels, self.keep_prob_holder: 1.0 }) test_writer.add_summary(test_summary, step) if self.do_plot: self.plot.plot_train(step, loss, 0) if 'precision' in self.eval_names: self.plot.plot_train(step, evals['precision'], 1) if 'recall' in self.eval_names: self.plot.plot_train(step, evals['recall'], 2) if 'dice' in self.eval_names: self.plot.plot_train(step, evals['dice'], 3) elif 'f1' in self.eval_names: self.plot.plot_train(step, evals['f1'], 3) if step % 1000 == 999: saver.save(self.sess, self.train_dir + '/' + self.model_name + '.cpkt', global_step=self.global_step) if self.do_plot: self.plot.save_fig() if self.testing: if (step % 500 == 0) & (step != 0): temp_eval = {} for name in self.eval_names: temp_eval[name] = 0.0 temp_eval['loss'] = 0.0 for i in xrange(n_batch): t_start_time = time.time() t_images, t_labels = self.dataset.test_batch() if self.aug is not None: t_images = self.aug.process(t_images) t_loss, t_evals, t_summary = self.sess.run( [self.loss, self.evals, summary_op], feed_dict={ self.images: t_images, self.labels: t_labels, self.keep_prob_holder: 1.0 }) t_duration = (time.time() - t_start_time) print('%s: testing %d, loss = %f (%.3f sec/batch)' % (datetime.now(), i, t_loss, t_duration)) print(t_evals) temp_eval['loss'] += t_loss for name in self.eval_names: temp_eval[name] += t_evals[name] for key, value in temp_eval.items(): temp_eval[key] /= float(n_batch) print('testing finished.') print(temp_eval) if self.do_plot: self.plot.plot_test(step, temp_eval['loss'], 0) if 'precision' in temp_eval: self.plot.plot_test(step, temp_eval['precision'], 1) if 'recall' in temp_eval: self.plot.plot_test(step, temp_eval['recall'], 2) if 'dice' in temp_eval: self.plot.plot_test(step, temp_eval['dice'], 3) elif 'f1' in temp_eval: self.plot.plot_test(step, temp_eval['f1'], 3) # self.sess.close() if self.do_plot: self.plot.save_fig() return def forward(self, input): ''' :param input: :return: ''' if len(input.shape) == 1: input.shape = [ int(input.shape[0] / self.width / self.height / self.channel), self.width, self.height, self.channel ] elif len(input.shape) == 3: input.shape = [ int(input.shape[0] / self.channel), input.shape[1], input.shape[2], self.channel ] i = 0 if self.label_type == 'binary': predict = np.zeros([input.shape[0], 1, 1, 1], dtype=np.float32) elif self.label_type == 'array': predict = np.zeros([input.shape[0], 1, 1, self.label_len], dtype=np.float32) else: predict = np.zeros(input.shape, dtype=np.float32) while i < input.shape[0]: images = input[i:i + self.test_batch_size, :, :, :] if self.aug is not None: images = self.aug.process(images) predict_temp = self.sess.run([self.predicts['out']], feed_dict={ self.images: images, self.keep_prob_holder: 1.0 }) predict[i:i + self.test_batch_size, :, :, :] = predict_temp[0] i += self.test_batch_size return predict
type=int, default=640, help='the width of the input image to network') parser.add_argument( '--input_vid', default=None, help='Input video file to process. Training will be turned off.') opt = parser.parse_args() print(opt) print('=============================================================') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Object for class with visualization functions plotter = Plot() torch.manual_seed(opt.manual_seed) if torch.cuda.is_available(): torch.cuda.manual_seed(opt.manual_seed) np.random.seed(opt.manual_seed) # Create reader to process input video if provided # Training will be turned off in this case if opt.input_vid is not None: if not os.path.exists(opt.input_vid): sys.exit('Error: ' + opt.input_vid + ' file does not exist.') reader = imageio.get_reader(opt.input_vid) opt.image_width, opt.image_height = reader.get_meta_data()['size'] print('Video reader created. Frame Size: ({}, {})'.format( opt.image_height, opt.image_width))
from service.calendar import Calendar server_ip = "http://140.115.87.197:8090/" cal = Calendar('TW') payloads = { 'ticker_list': ['1524'], 'start_date': cal.get_trade_date('2010-01-01', (1+30)*-1, 'd'), 'end_date': cal.get_trade_date('2010-03-31', 1, 'd'), } response = requests.get(server_ip+"stk/get_ticker_period_stk", params=payloads) stk_dict = json.loads(response.text)['result'] stk_df = pd.DataFrame(stk_dict[ticker_list[0]]) stk_df['date'] = [datetime.datetime.strptime(elm, "%Y-%m-%d") for elm in stk_df['date']] stk_df.set_index("date", inplace=True) stk_df.columns = ['Close', 'High', 'Low', 'Open', 'Volume', 'outstanding_share'] stk_df = stk_df.drop('outstanding_share', axis=1) stk_df = stk_df.dropna() print(stk_df) up_band, mid, down_band = BBANDS( stk_df['Close'], timeperiod=30, nbdevup=1.5, nbdevdn=1.5, matype=0 ) plot = Plot() plot.plot_candlestick(df=stk_df, addplot_list=[up_band, mid, down_band])
classifier_running_losses[2])) print("NN CLASSIFIER TEST ACCURACY: {0:.2f}%".format( 100.0 * accuracies[0])) print("NN SYNTHETIC CLASSIFIER TEST ACCURACY: {0:.2f}%".format( 100.0 * accuracies[1])) print("NN MIXED CLASSIFIER TEST ACCURACY: {0:.2f}%\n\n".format( 100.0 * accuracies[2])) # Loss histories for i in range(num_classifiers): classifier_losses[i].append(classifier_running_losses[i]) classifier_running_losses[i] = 0.0 Plot.plot_histories( classifier_losses, ['[REAL] Loss', '[SYNTHETIC] Loss', '[REAL + SYNTHETIC] Loss'], "{0}loss_histories".format(args.output_dir)) Plot.plot_histories(classifier_accuracies, [ '[REAL] Test Accuracy', '[SYNTHETIC] Test Accuracy', '[REAL + SYNTHETIC] Test Accuracy' ], "{0}accuracy_histories".format(args.output_dir)) # Save model at checkpoint torch.save(classifiers[0].state_dict(), "{0}nn_classifier".format(args.output_dir)) torch.save(classifiers[1].state_dict(), "{0}synthetic_nn_classifier".format(args.output_dir)) torch.save(classifiers[2].state_dict(), "{0}mixed_nn_classifier".format(args.output_dir)) # Save final NN classifier results to results_f:
class ClusterTs: """Classe disposant des methodes de transformations et de manipulations des donnees a des fins de partitionnements classe mere de: * :class:`kmean` * :class:`kshape` Parameters: * ss : SeriesSupp instance du manager de series temporelles Variables: * ts: Array[[[float]]] les series temporelle au format desiree pour le clustering * ts_clust: Array[int] Chaque entier est selon son index le cluster auquel appartient l'index referant de *ts* * ts_name: Array[String] Nom de la serie temporelle, du capteur a sa granularite (annee, mois, semaine) * ss: SeriesSupp instance du manager de series temporelles * sampler: int Taille du sampling :func:`sampler` * ploter: :class:Plot Instance d'un objet d'affichage * n: int Nombre de cluster * capteurs_names: Array[String] Nom de la serie temporelle, du capteur a sa granularite (annee, mois, semaine) *Bientot supprime* * from_save: Bool True si les infos sont recuperees d'un cluster sauvegarde * proto: Array[[[float]]] Prototype de chaque cluster * last_readed: {Dict} Informations recuperer depuis le fichier 'Pickle' sauvegarde du cluster etudier * store_path: String Chemin vers le dossier de stockage des sauvegardes. N'est plus utilise depuis l'implementation d'une boite de dialogue pour la recherche de fichier de sauvegarde * name_file: String Chemin absolue vers fichier 'Pickle' * clust_name: String Nom de la technique de clustering de l'instance * metric: String Nom de la technique de clacul de distance de l'instance * geo: :class:Geo Instance Geo * cluster_by_name: {Dict} Clustering des series temporelles uniquement par le nom des capteurs sans redondance * cluster_by_fullname: {Dict} Clustering des series temporelles uniquement par le nom des capteurs et leurs granularite * size_min: int Taille minimale d'une serie pour etre garde lors du preprocessing * nb_capteur: {Dict} Clustering des series temporelles uniquement par le nom des capteurs redondance * nb_week: {Dict} Lors d'un decoupage en semaine, represente la redondance par capteur des semaines Example: See: Cluster_engine.ipynb Notes: *Dependencies*: - tslearn - pandas - Pickle """ def __init__(self, ss): self.ts = None self.ts_clust = None self.ts_name = None self.ss = ss self.sampler = 168 # 24/d - 168/w - 744[31](720[30]-696[29]-672[28])/m - 8760(8784)/y self.ploter = Plot(self) self.n = 5 self.capteurs_names = [] self.from_save = False self.proto = [] self.last_readed = {} self.store_path = "cluster/13_06/" self.name_file = None self.clust_name = "Master" self.metric = "" self.geo = Geo(self.ss.cwd) self.cluster_by_name = {} self.cluster_by_fullname = {} self.size_min = 0 self.nb_capteur = [] self.nb_week = [] #self.read_txt_line_info = {} def __repr__(self): """ Representation de l'instance via une chaine de caracteres explicative. Parameters: NA Returns: my_repr : str representation. """ my_repr = [ "Algorithm de clustering: " + self.clust_name, "Metric mesure: " + self.metric, "Espace de stockage: " + self.store_path, "Nombre de Clusters: " + str(self.n), "Sampler de taille : " + str(self.sampler) ] return '\n'.join('%s' % v for v in my_repr) def tslearn_format_export(self, other_data=None): """ Export la variable data vers le format utilise par tslearn pour la partitionnements Parameters: NA Returns:: NA """ df = [] dn = [] if self.ss.days: size_max = 170 else: size_max = 750 if other_data != None: data_dict = other_data else: data_dict = self.ss.get_data() for k, v in data_dict.items(): if not self.check_equal(v["Valeur"].values): if len(v["Valeur"].values) > self.size_min and len( v["Valeur"].values) < size_max: df.append(v["Valeur"].values) dn.append(k) self.capteurs_names.append(k) df_set = to_time_series_dataset(df) if self.sampler != 0: df_set = TimeSeriesResampler(self.sampler).fit_transform(df_set) self.ts = df_set self.ts_name = dn def set_size_min(self, size): """ Set taille minimale d'une TS pour etre gardee Parameters: * size: int Taille minimale Returns: NA """ self.size_min = size def check_equal(self, iterator): """ Verifie si la TS reste tout le temps sur la meme valeur Parameters: * iterator: iterator la TS Returns: Bool """ iterator = iter(iterator) try: first = next(iterator) except StopIteration: return True return all(first == rest for rest in iterator) def show_info(self): """ Affiche les informations recuperees depuis le txt d'info de la sauvegarde cluster lie a l'instance Parameters: NA Returns: NA """ file = open(str(self.name_file[:-4]) + ".txt", "r") print(file.read()) file.close() #i = 0 #with open(str(self.store_path) + str(self.name_file) + ".txt", "r") as f: # self.read_txt_line_info[i] = f.readlines() # i += 1 def store_cluster(self, name): """ Sauvegarde sur forme de fichier pickle associe a un txt d'information la partitionnement actuelle de l'instance Parameters: * name: String Nom du fichier, represente les parametre principaux de la partitionnement Returns: NA """ info_dict = {} info_dict["trace"] = self.ts info_dict["classe"] = self.ts_clust info_dict["name"] = self.ts_name info_dict["proto"] = self.km.cluster_centers_ info_dict["sample"] = self.sampler info_dict["years"] = self.ss.years info_dict["months"] = self.ss.months info_dict["days"] = self.ss.days info_dict["size_min"] = self.size_min info_dict["round"] = self.ss.rounded info_dict["smoothed"] = self.ss.smoothed outfile = open(self.store_path + name + ".pkl", "wb") pickle.dump(info_dict, outfile) outfile.close() file = open(self.store_path + name + ".txt", "w") file.write(str([i for i in self.ss.years]) + "\n") file.write(str([i for i in self.ss.months]) + "\n") file.write("Weeks split: " + str(self.ss.days) + "\n") file.write("Normalized: " + str(self.ss.norm) + "\n") file.write("min size of TS selected: " + str(self.size_min) + "\n") file.write("Sample size(0=None): " + str(self.sampler) + "\n") file.write("Algorithm used: " + str(self.clust_name) + "\n") file.write("nb cluster: " + str(self.n) + "\n") file.write("Distance measure: " + str(self.metric) + "\n") file.write("Rounded values: " + str(self.ss.rounded) + "\n") file.write("smoothed values: " + str(self.ss.smoothed) + "\n") file.close() def read_cluster(self, path=""): """ Ouvre et recupere toutes les informations d'un fichier pickle(sauvegarde d'un clustering) et update les variable de l'instance pour correspondre Parameters: * path: String Chemin d'acces au fichier Returns: NA """ infile = open(str(path), 'rb') info_dict = pickle.load(infile) infile.close() self.store_path = path self.name_file = path self.ts = info_dict["trace"] self.ts_clust = info_dict["classe"] self.ts_name = info_dict["name"] self.capteurs_names = info_dict["name"] self.proto = info_dict["proto"] self.n = len(info_dict["proto"]) self.sampler = info_dict["sample"] self.from_save = True self.last_readed = info_dict try: self.ss.years = info_dict["years"] self.ss.months = info_dict["months"] self.ss.days = info_dict["days"] except: pass try: self.ss.rounded = info_dict["round"] except: self.ss.rounded = "no information" try: self.ss.smoothed = info_dict["smoothed"] except: self.ss.smoothed = "no information" def get_cluster_n(self, n): """ Retourne les TS d'un cluster **n** Parameters: * n: int Numero de cluster souhaite Returns: res: Array[float] Ensemble des TS du cluster """ res = [] for xx in self.ts[self.ts_clust == n]: res.append(xx) return res def capteur_parser(self): """ Parser des noms de capteurs, pour pouvoir garder en memoire les nom des capteur et leur extension de date selon la TS Parameters: NA Returns: NA """ res = {} res_full = {} nb_capteur = {} nb_week = {} for i in range(0, self.n): res[i], res_full[i], nb_capteur[i], nb_week[i] = [], [], [], [] for elmt in self.ts_name: non_parse = str(elmt) parse = str(elmt[0:2] + elmt[3:6]) if parse not in res[self.ts_clust[self.ts_name.index(elmt)]]: res[self.ts_clust[self.ts_name.index(elmt)]].append(parse) nb_capteur[self.ts_clust[self.ts_name.index(elmt)]].append(parse) nb_week[self.ts_clust[self.ts_name.index(elmt)]].append( elmt[-2:].replace("_", "0")) res_full[self.ts_clust[self.ts_name.index(elmt)]].append(non_parse) self.cluster_by_name = res self.cluster_by_fullname = res_full self.nb_capteur = nb_capteur self.nb_week = nb_week def get_part_of_ts(self, data, elmt): """ Selon les Parameters d'elmt retrouve une partie des donnes depuis data Parameters: * data: {Dict} Donnee depuis les quelles on souhaite recuperer une partie precise * elmt: {Dict} Information liee a la demande (date) Returns: res_ts: Array[float] TS souhaitee """ res_ts = data[elmt["capteur"]].copy() res_ts = res_ts.set_index("Date") if elmt["week"] and not elmt["month"]: res_ts = res_ts[str(elmt["year"])] res_ts = res_ts.groupby(pd.Grouper(freq='W')) for i in res_ts: if i[0].week == elmt["week"]: res_ts = i[1] elif elmt["week"] and elmt["month"]: res_ts = res_ts[str(elmt["year"]) + "-" + str(elmt["month"])] res_ts = res_ts.groupby(pd.Grouper(freq='W')) for i in res_ts: if i[0].week == elmt["week"]: res_ts = i[1] elif elmt["month"]: res_ts = res_ts[str(elmt["year"]) + "-" + str(elmt["month"])] else: res_ts = res_ts[str(elmt["month"])] res_ts = res_ts.reset_index() res_ts = self.ss.normalize(res_ts) return res_ts def clust_hoverview_rng(self, n): """ DEPRECATED: Tire une TS random d'un cluster **n** pour se donner une idee des membres de ce dernier Parameters: * n: int Le cluster numero n Returns: NA """ #r_RG, r_GW = ss.SeriesSupp(cwd, self.ss.factory, "RG24"), ss.SeriesSupp(cwd, factory, "GW") rng_elmt = self.cluster_by_fullname[n][0] elmt = self.parse_capteur_split(rng_elmt) gw = self.get_part_of_ts(self.ss.dataset, elmt) elmt2 = elmt.copy() elmt2["capteur"] = "24h_RG007" # EN DUR TROUVER LE PLUS PROCHE rg = self.get_part_of_ts(self.ss.factory.get_RG24(), elmt2) self.ploter.plot_single_scatter({ elmt["capteur"]: gw, elmt2["capteur"]: rg }) def clust_hoverview(self, n): """ Affiche les TS d'un cluster **n** donnee Parameters: * n: int cluster selectionne Returns: NA """ elmt_clust = self.cluster_by_fullname[n] all_clust_origin_ts = {} for elmt in elmt_clust: parse = self.parse_capteur_split(elmt) #print(parse) all_clust_origin_ts[elmt] = self.get_part_of_ts( self.ss.dataset, parse) self.ploter.plot_scatter(all_clust_origin_ts) def parse_capteur_split(self, elmt): """ Recupere les information d'une TS depuis son nom comme le nom de son capteur et la date Parameters: * elmt: String Nom du capteur avec info Returns: res: {Dict} Les infos decoupes et range dans un dico """ elmt = elmt.split("_") capteur = elmt[0] + "_" + elmt[1] year = int(elmt[2]) if len(elmt) > 3 and not self.ss.days: month = int(elmt[3]) else: month = 0 if len(elmt) > 4: week = int(elmt[4]) else: week = 0 if len(elmt) > 3 and self.ss.days: week = int(elmt[3]) else: week = 0 res = {} res["capteur"], res["year"], res["month"], res[ "week"] = capteur, year, month, week return res def highlight_max(self, s): """ Parametre d'affichage surligne les max par ligne de DataFrame Parameters: * s: pandas Ligne du tableau Returns: unnamed: pands.style Affichage des max """ is_max = s == s.max() return ['background-color: red' if v else '' for v in is_max] def style_df(self, opt, t): if opt == "max": t_style = t.style.apply(self.highlight_max, axis=1) return t_style def get_captor_distribution_in_cluster(self): """ Retourne le nombre d'occurance des cpateur dans chacun des clusters Parameters: NA Returns: unnamed: DataFrame Tableau d'occurance """ tot = {} for k, v in self.nb_capteur.items(): tot[k] = Counter(v) return pd.DataFrame(tot) def get_ts_by_captor(self, cpt): """ recupere les TS pour capteur **cpt** donne et leur distribution au sein des cluster Parameters: * cpt: String Capteur target Returns: res: tuple(String, {Dict}) String represente le capteur et le dictionnaires la distribution des sous TS dans chaque clusters (key = cluster) """ res = (cpt, {}) i = 0 for elmt in range(len(self.proto)): res[1][i] = [] i += 1 i = 0 for string in self.ts_name: if cpt in string: res[1][self.ts_clust[i]].append([string, self.ts[i].ravel()]) i += 1 return res def get_clust_part_for_captor(self, cpt): """ recupere les cluster pour capteur **cpt** donne et leur distribution au sein des cluster Parameters: * cpt: String Capteur target Returns: res: String Seulement les noms des date pour chacun des clusters """ res = (cpt, {}) i = 0 for elmt in range(len(self.proto)): res[1][i] = [] i += 1 i = 0 for string in self.ts_name: if cpt in string: res[1][self.ts_clust[i]].append(string) i += 1 return res def aff_color(self): """ Affiche les couleurs utilise dans le clustering """ c = COLORS[:self.n + 1] df = pd.DataFrame({'colors': c}) print(df.T)
def run_pipeline(self): """ run_pipeline function runs the actual pipeline. :return: """ # Train & Test data split using sklearn train_test_split module X_train, X_test, y_train, y_test = train_test_split( self.data['url'], self.data['label'], test_size=0.33, random_state=21, stratify=self.data['label']) print( "*******************\nTrain set : {} \n Test set : {}\n*******************\n" .format(X_train.shape[0], X_test.shape[0])) # Running the pipeline model = self.pipeline.fit(X_train, y_train) print('Saving the {} model after fitting on training data.'.format( str(self.args.model).upper())) # Dumping tokenizer joblib.dump( model, os.path.join(self.args.checkpoint_dir, '{}.pickle'.format(self.args.model))) # Calculating time per prediction # Start time ****************************************************************************** start = timeit.default_timer() # Predicting label, confidence probability on the test data set predictions = model.predict(X_test) predictions_prob = model.predict_proba(X_test) # Binary class values : rounding them to 0 or 1 predictions = [round(value) for value in predictions] end = timeit.default_timer() # End Time ****************************************************************************** print('Time per prediction : {}'.format( (end - start) / X_test.shape[0])) # evaluate predictions using accuracy metrics accuracy = accuracy_score(y_test, predictions) print('{} Classification'.format(self.args.model)) print("Accuracy: %.2f%%" % (accuracy * 100.0)) # evaluate predictions using confusion metrics and plot confusion matrix classification_report = metrics.classification_report( predictions, y_test, target_names=['NadaSportswear', 'Sportswear']) print(classification_report) # Plotting confusion matrix cnf_matrix = confusion_matrix(y_test, predictions) np.set_printoptions(precision=2) # Plot module is used for plotting confusion matrix, classification report plot = Plot() plot.plotly(cnf_matrix, classification_report, self.args.save_dir, self.args.model)