def plot_dataset(dataset_name, time=None): data_model = DataProvider() if dataset_name not in data_model.supported_datatypes: raise ValueError("%s not in supported datatypes." % dataset_name) subject_name = SUBJECT_NAME or 'sample' plot_target = getattr(data_model, '%s' % dataset_name) plot_target.plot(subject_name)
def main(): data = DataProvider() # plt.plot(data.train_stc.times, data.train_stc.data[::100, :].T) plt.plot(data.stc.times, data.stc.data[:, :].T) plt.xlabel('time (ms)') plt.ylabel('Source amplitude') plt.title('train_stc dataset') plt.show()
def check_column_types(self) -> list: """ Sovrascrivo il metodo per il Rating di Legalità perché l'ultima colonna (Scadenza Revoca Annullamento Sospensione) contiene date e, dove non è specificato, un valore anomalo '(*)' che viene già gestito dal DB. Returns: list: lista di tipologie in formato np.dtypes() """ dp_copy = DataProvider(df=self.dp.df.copy(deep=True), column_types=self.dp.column_types, column_constraints=self.dp.column_constraints) column_to_mask_name = self.dp.df.columns.tolist()[-1] date_filter = self.dp.df[column_to_mask_name] == '(*)' dp_copy.df[column_to_mask_name].mask(date_filter, pd.NaT, inplace=True) return dp_copy.get_column_types()
def train(self, sess, cfg): self.data = DataProvider(cfg) counter = 0 self.model_initilization(sess, cfg) test_top1_col = collections.deque(maxlen=10000) while True: batch_images_lab, batch_labels_lab = self.data.load_label_batch(cfg.iBatchSize, counter) _, training_top1 = sess.run([self.d_optim, self.top1], feed_dict={self.input: batch_images_lab, self.lab_image_labels: batch_labels_lab}) counter += 1 #print('training_top1:%f' % training_top1) if np.mod(counter, 100) == 0 and cfg.bUseLabel: batch_images_lab, batch_labels_lab = self.data.load_test_batch(cfg.iBatchSize, counter // 100) test_top1 = sess.run(self.top1, feed_dict={self.input: batch_images_lab, self.lab_image_labels: batch_labels_lab}) test_top1_col.append(test_top1) avg_test_top1 = np.mean(test_top1_col) print('iter %d, test_top1:%f' % (counter, float(avg_test_top1))) if np.mod(counter, 5000) == 0: self.save(sess, cfg, counter)
def setUpClass(cls): data = { 'col1': ['00993883991', '00993884492', '00912383993', '00233883994'], 'col2': ['UD', 'PN', 'TS', 'GO'], 'col3': [0, 1, 1, 1], 'col4': ([np.nan] * 3) + [0.24], 'col5': pd.date_range(start=datetime.today(), periods=4) } df = pd.DataFrame(data) col_types = {0: 'object', 1: 'object', 2: 'int', 3: 'float', 4: 'date'} col_constraints = {0: False, 1: False, 2: True, 3: False, 4: False} cls.dp = DataProvider(df, col_types, col_constraints) cls.columns = [ AcceptanceBuilder.Columns(nome='fiscalcode', tipologia=np.dtype('O'), lunghezza=19, nullable=False, pk=True), AcceptanceBuilder.Columns(nome='pv', tipologia=np.dtype('O'), lunghezza=2, nullable=False, pk=True), AcceptanceBuilder.Columns(nome='sedeul', tipologia=np.dtype('int64'), lunghezza=50, nullable=True, pk=False), AcceptanceBuilder.Columns(nome='id_istat_province', tipologia=np.dtype('float64'), lunghezza=None, nullable=True, pk=False), AcceptanceBuilder.Columns(nome='istat_province_prcode', tipologia=np.dtype('<M8[ns]'), lunghezza=2, nullable=True, pk=False) ] cls.builder = AcceptanceBuilder(dp=cls.dp, dp_file_extension='csv', columns=cls.columns)
from data_provider.data_provider import DataProvider from net.seq2seq import Seq2Seq provider = DataProvider(1) seq_seq = Seq2Seq(provider=provider) seq_seq.predict(["hace mucho frio aqui."])
from data_provider.data_provider import DataProvider from net.seq2seq import Seq2Seq provider = DataProvider(64) seq_seq = Seq2Seq(provider=provider) seq_seq.train()
class ToyClassifier(object): def model_initilization(self, sess, cfg): ############################################################################################################################################ batch_images_lab, batch_labels_lab = self.data.load_label_batch(cfg.iBatchSize, 0) def initialization(): var_list = tf.global_variables() for var in var_list: sess.run(tf.variables_initializer([var]), feed_dict={self.input: batch_images_lab, self.lab_image_labels: batch_labels_lab}) print(var.op.name) print('optimizor initialization') if cfg.bLoadCheckpoint: if self.load(sess, cfg): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") initialization() else: initialization() def load(self, sess, cfg=None): print(" [*] Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(cfg.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: self.saver.restore(sess, ckpt.model_checkpoint_path) return True else: return False def save(self, sess, cfg=None, step=None): model_name = "model" if not os.path.exists(cfg.checkpoint_dir): os.makedirs(cfg.checkpoint_dir) self.saver.save(sess, os.path.join(cfg.checkpoint_dir, model_name), global_step=step) def build_model(self, num_classes, cfg): self.input = tf.placeholder(tf.float32, [cfg.iBatchSize, 2], name='lab_images') self.lab_image_labels = tf.placeholder(tf.int32, [cfg.iBatchSize], name='lab_image_labels') with tf.variable_scope('ccccccc'): from ops_wn import linear as linear_wn cts = {} ldc = [] h0 = self.input h0 = tf.nn.relu(linear_wn(h0, 1000, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 1000, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 1000, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 1000, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 1000, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 1000, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 1000, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 1000, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 500, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 500, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 250, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 250, cts=cts, ldc=ldc)) h0 = tf.nn.relu(linear_wn(h0, 250, cts=cts, ldc=ldc)) class_logit = linear_wn(h0, num_classes, cts=cts, ldc=ldc, init_scale=0.1) self.d_loss = tf.nn.softmax_cross_entropy_with_logits(logits=class_logit, labels=tf.one_hot(self.lab_image_labels, num_classes)) self.prediction = tf.nn.softmax(class_logit) self.top1 = tf.reduce_mean(tf.to_float(tf.nn.in_top_k(self.prediction, self.lab_image_labels, 1))) with tf.variable_scope('optimizer/D'): self.d_global_step = tf.Variable(0, trainable=False, name='d_global_step') self.fLrD = tf.train.exponential_decay(cfg.fLrIni, self.d_global_step, cfg.iLrStep, cfg.fLrDecay) self.d_optim = tf.train.AdamOptimizer(self.fLrD, beta1=cfg.fBeta1D, beta2=cfg.fBeta2D).minimize(self.d_loss, global_step=self.d_global_step) self.saver = tf.train.Saver(max_to_keep=10000) def train(self, sess, cfg): self.data = DataProvider(cfg) counter = 0 self.model_initilization(sess, cfg) test_top1_col = collections.deque(maxlen=10000) while True: batch_images_lab, batch_labels_lab = self.data.load_label_batch(cfg.iBatchSize, counter) _, training_top1 = sess.run([self.d_optim, self.top1], feed_dict={self.input: batch_images_lab, self.lab_image_labels: batch_labels_lab}) counter += 1 #print('training_top1:%f' % training_top1) if np.mod(counter, 100) == 0 and cfg.bUseLabel: batch_images_lab, batch_labels_lab = self.data.load_test_batch(cfg.iBatchSize, counter // 100) test_top1 = sess.run(self.top1, feed_dict={self.input: batch_images_lab, self.lab_image_labels: batch_labels_lab}) test_top1_col.append(test_top1) avg_test_top1 = np.mean(test_top1_col) print('iter %d, test_top1:%f' % (counter, float(avg_test_top1))) if np.mod(counter, 5000) == 0: self.save(sess, cfg, counter)
import matplotlib.pyplot as plt import numpy as np from data_provider.data_provider import DataProvider d = DataProvider() print "uniques for 50:" print np.unique(d.train_stc.data[:, 50:]) print "\n\n\n" print "uniques for 100:" print np.unique(d.train_stc.data[:, 100:]) print "\n\n\n" print "uniques for 200:" print np.unique(d.train_stc.data[:, 200:]) print "\n\n\n" print "uniques for 300:" print np.unique(d.train_stc.data[:, 300:]) plt.plot(d.train_stc.times, d.train_stc.data.T) plt.show()