def __init__(self, datapath):
        '''
        初始化
        默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
        '''
        self.MS_OUTPUT_SIZE = 1423 #拼音1421 + 1个特殊字符(为了处理音频数据需要) + 1个空白块
        self.label_max_string_length = config.LABEL_LENGTH
        self.AUDIO_LENGTH = 1600
        self.AUDIO_FEATURE_LENGTH = config.AUDIO_FEATURE_LENGTH
        self.datasetmanager = DataSetManager()
        speechmodel = mobilebase_model.SpeechModel()
        #speechmodel = vggbase_model.SpeechModel()
        self.base_model, self._model = speechmodel.create_model(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, self.MS_OUTPUT_SIZE, self.label_max_string_length)

        self.datapath = datapath
示例#2
0

# In[6]:


mu, sigma = 0, 0.1 # mean and standard deviation
n_train_samples = 60000
numero_especies = 100

train_data = np.random.normal(mu, sigma, (n_train_samples,numero_especies))
print("Shape")
print(train_data.shape)
print("One samples mean")
print(np.mean(train_data[0,:]))

my_ds = DataSetManager(train_data, norm=False)


# In[7]:

session_saver = tf.train.Saver()

pre_trained  = 0


# Train loop
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    data_gen = inf_train_gen()

    
def train_gan(train_set, indices: List, samples_per_N:int, repetition_n:int, identifier:str,experiment_name:str,  batch_size: int = 256, desired_epochs: int = 2000, use_bot = False):
    """
    The GAN is trained for 1000 epochs. If a a set of 60k samples is trained with a batchsize of 256,
    then a epoch equals 226 iterations. A budget of 100,000 iterations would equals to 426

    """
    assert train_set.shape[0] > len(indices)

    print(train_set.shape)
    print(len(indices))

    my_ds = DataSetManager(train_set[indices])


    # print("Set number of iterations to train\n")
    v5 = (desired_epochs*(train_set[indices].shape[0]))//batch_size +1

    print("ITERS "+str(v5))
    print("SIZE "+str(train_set[indices].shape))


    # print("Use pretrained model? (0 means No, some number different to 0 means yes)\n")
    decision_number = 0 #int( input() )

    # print("Type a name to save the model with?\n")
    model_tag = str(round(samples_per_N)) +'_'+ str(repetition_n)
    

    storing_path = 'data/'+ experiment_name + "/" + model_tag + '_data/'
    model_path = storing_path+ model_tag + '.ckpt'
    
    # Recall that os.mkdir isn't recursive, so it only makes on directoryt at a time
    try:
        # Create target Directory
        os.mkdir(storing_path)
        print("Directory " , storing_path ,  " Created ") 
    except FileExistsError:
        print("Directory " , storing_path ,  " already exists")

    # ===> Auxiliar functions <=== 
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """
    def save_history(files_prefix, gen_loss_record,disc_loss_record, jsd_error, current_epoch, epoch_record,my_ds,iter_, epochs, global_iters, BATCH_SIZE, low_lr, high_lr ):
        # Save losses per epoch

        df = pd.DataFrame(np.array(gen_loss_record))
        with open(files_prefix+'_gen_loss.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        df = pd.DataFrame(np.array(disc_loss_record))
        with open(files_prefix+'_disc_loss.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        df = pd.DataFrame(np.array(epoch_record))
        with open(files_prefix+'_epoch_record.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        # Save current iter and epochs

        training_history = {'epochs': [epochs + my_ds.epochs_completed],
                            'iters':  [global_iters + iter_],
                            'Batch Size': [BATCH_SIZE],
                            'low LR': [low_lr],
                            'high LR': [high_lr]}
        df = pd.DataFrame(training_history) 

        with open(files_prefix+'_training.csv', 'w+') as f:
            df.to_csv(f,  index=False) #, header=False, index=False

        with open(files_prefix+'_jsd_error.csv', 'a') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerow([current_epoch, jsd_error])         

    def send_bot_message(bot,my_ds, iter_, ITERS, identifier ):
        """ 
        Not quite straighforward since the critic draws many more samples.

        """

        message = "\nEpochs ["+str(my_ds.epochs_completed)+"] Iter: "+str(iter_)+";\t"+str(np.round(100* iter_/ITERS,2))+"% "
        message = message + identifier
        print(message)
        bot.set_status(message)
        # Send update message
        if bot.verbose:
            bot.send_message(message)                

        print("\n")

    def save_gen_samples(gen_op, disc_op, sess,path,  k, n = 4):
        """
        k: is the number of epochs used to trained the generator
        n: is the number of batches to draw samples
        """

        suffix = '_gen_samples_'+str(k)+'_epochs_'+'.csv'

        for k in range(n):

            samples = sess.run(gen_op)
            df = pd.DataFrame(np.array(samples))
            with open(path+suffix, 'a') as f:
                df.to_csv(f, header=False, index=False)

            # Score the samples using the critic
            scores = sess.run(disc_op)
            df = pd.DataFrame(np.array(scores))
            with open(path+'scores_'+suffix, 'a') as f:
                df.to_csv(f, header=False, index=False)

    # ===> Model Parameters <=== 
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """

    DIM = 512  # model dimensionality
    GEN_DIM = 100  # output dimension of the generator
    DIS_DIM = 1  # outptu dimension fo the discriminator
    FIXED_GENERATOR = False  # wheter to hold the generator fixed at ral data plus Gaussian noise, as in the plots in the paper
    LAMBDA = .1  # smaller lambda makes things faster for toy tasks, but isn't necessary if you increase CRITIC_ITERS enough
    BATCH_SIZE = batch_size   # batch size
    ITERS = v5 #100000 # how many generator iterations to train for
    FREQ = 250  # sample frequency
    
    print("==>>Using batch size of "+str(BATCH_SIZE))
    CRITIC_ITERS = 5  # homw many critic iteractions per generator iteration


    def Generator_Softmax(n_samples,  name='gen'):

        with tf.variable_scope(name):
            noise = tf.random_normal([n_samples, GEN_DIM])
            output01 = tf_utils.linear(noise, 2*DIM, name='fc-1')
            output01 = tf_utils.relu(output01, name='relu-1')
            
            output02 = tf_utils.linear(output01, 2*DIM, name='fc-2')
            output02 = tf_utils.relu(output02, name='relu-2')
            
            output03 = tf_utils.linear(output02, 2*DIM, name='fc-3')
            output03 = tf_utils.relu(output03, name='relu-3')

            output04 = tf_utils.linear(output03, GEN_DIM, name='fc-4')

            # Reminder: a logit can be modeled as a linear function of the predictors
            output05 = tf.nn.softmax(output04, name = 'softmax-1')

            return output05
            

    def Discriminator(inputs, is_reuse=True, name='disc'):
        with tf.variable_scope(name, reuse=is_reuse):
            print('is_reuse: {}'.format(is_reuse))
            output01 = tf_utils.linear(inputs, 2*DIM, name='fc-1')
            output01 = tf_utils.relu(output01, name='relu-1')

            output02 = tf_utils.linear(output01, 2*DIM, name='fc-2')
            output02 = tf_utils.relu(output02, name='relu-2')

            output03 = tf_utils.linear(output02, 2*DIM, name='fc-3')
            output03 = tf_utils.relu(output03, name='relu-3')

            output04 = tf_utils.linear(output03, DIS_DIM, name='fc-4')
            
            return output04
        
    real_data = tf.placeholder(tf.float32, shape=[None, GEN_DIM])
    fake_data = Generator_Softmax(BATCH_SIZE)

    disc_real = Discriminator(real_data, is_reuse=False)
    disc_fake = Discriminator(fake_data)

    disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
    gen_cost = - tf.reduce_mean(disc_fake)

    # WGAN gradient penalty parameters

    alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.)
    interpolates = alpha*real_data + (1.-alpha) * fake_data
    disc_interpolates = Discriminator(interpolates)
    gradients = tf.gradients(disc_interpolates, [interpolates][0])
    slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
    gradient_penalty = tf.reduce_mean((slopes - 1)**2)

    disc_cost += LAMBDA * gradient_penalty
        
    disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='disc')
    gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gen')



    disc_lr = tf.placeholder(tf.float32, shape=()) # 1e-4
    gen_lr = tf.placeholder(tf.float32, shape=()) # 1e-4

    disc_train_op = tf.train.AdamOptimizer(learning_rate=disc_lr, beta1=0.5, beta2=0.9).minimize(disc_cost, var_list=disc_vars)

    if len(gen_vars) > 0:
        gen_train_op = tf.train.AdamOptimizer(learning_rate=gen_lr, beta1=0.5, beta2=0.9).minimize(gen_cost, var_list=gen_vars)
    else:
        gen_train_op = tf.no_op()


    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """
    # ===> Model Parameters <=== 
 

  
    
    session_saver = tf.train.Saver()

    # files_prefix = 'model/'+ model_tag 

    if decision_number == 0:
        pre_trained  = False


        gen_loss_record = []  # type: List[float]
        disc_loss_record = []  # type: List[float]
        epoch_record = []  # type: List[float]

        epochs = 0
        global_iters = 0

        df = pd.DataFrame(np.array(indices))
        with open(storing_path+'training_indices.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)


    else:
        pre_trained  = True



        print(storing_path)
        print(storing_path+'training_indices.csv')
        _indices = (pd.read_csv(storing_path+'training_indices.csv',header=None  ).values).tolist()

        
        print(len(_indices))
        print(train_set[indices].shape)
        print(train_set[_indices].squeeze().shape)
        assert train_set[_indices].squeeze().shape ==  train_set[indices].shape
        my_ds = DataSetManager(train_set[_indices].squeeze())

        temp = pd.read_csv(storing_path+'_training.csv',header=None  ).values
        
        epochs, global_iters = temp.flatten()

        my_ds.epochs_completed  = epochs

        gen_loss_record = (pd.read_csv(storing_path+'_gen_loss.csv',header=None  ).values).tolist()
        disc_loss_record = (pd.read_csv(storing_path+'_disc_loss.csv',header=None  ).values).tolist()
        epoch_record = (pd.read_csv(storing_path+'_epoch_record.csv',header=None  ).values).tolist()


        print("State has been restored")




    # Create a DLBot instance

    if use_bot:
        bot = DLBot(token=telegram_token, user_id=telegram_user_id)
        # Activate the bot
        bot.activate_bot()

    print("\nTelegram bot has been activated ")


    iters_per_epoch = my_ds.num_examples/BATCH_SIZE

    total_iters = int(np.ceil((desired_epochs*iters_per_epoch)/CRITIC_ITERS))

    critic_iters = np.round((5/6)*total_iters)
    gen_iters = np.round((1/6)*total_iters)

    
    ITERS = total_iters

    # Train loop
    with tf.Session() as sess:
        
        if pre_trained == False: # false by default:
            sess.run(tf.global_variables_initializer())
        if pre_trained == True:
            
            session_saver.restore(sess,model_path)
        #
        # DUCK TAPE SOLUTION
        iter_ = 0

        """
        while my_ds.epochs_completed < desired_epochs:
            iter_ +=1
        """
        # r=10**-4.72, max_lr=10**-3.72,
        lr_multiplier :int = 1
        low_lr =  10**-5
        high_lr = 10**-4

        lr1 = low_lr # lr_multiplier*low_lr
        lr2 = low_lr #lr_multiplier*high_lr

        gen_lr_ = low_lr # CyclicLR(base_lr= lr1, max_lr= lr2, step_size=gen_iters)
        disc_lr_ = low_lr # CyclicLR(base_lr= lr1, max_lr= lr2, step_size=critic_iters)

        for iter_ in range(ITERS):
            batch_data, disc_cost_ = None, None
            
            previous_epoch =  my_ds.epochs_completed 

            # train critic
            for i_ in range(CRITIC_ITERS):
                batch_data =  my_ds.next_batch(BATCH_SIZE) # data_gen.__next__()
                disc_cost_, _ =  sess.run([disc_cost, disc_train_op], feed_dict={real_data: batch_data, disc_lr:disc_lr_ }) # .clr()
                # disc_lr_.on_batch_end()

            # train generator
            sess.run(gen_train_op, feed_dict={gen_lr : gen_lr_})   #  gen_lr_.clr()
            # gen_lr_.on_batch_end()

            gen_cost2 = sess.run(gen_cost)   

            current_epoch =  my_ds.epochs_completed 

            condition2 = current_epoch % 5 == 0
            if current_epoch > previous_epoch and condition2:
                disc_loss_record.append(disc_cost_)
                gen_loss_record.append(gen_cost2)
                epoch_record.append(my_ds.epochs_completed ) 
                # print("Diff "+str(current_epoch - previous_epoch))

            if (np.mod(iter_, FREQ) == 0) or (iter_+1 == ITERS):
                
                """
                print("===> Debugging")
                print(disc_loss_record)
                print(gen_loss_record)
                """
                if use_bot:
                    bot.loss_hist.append(disc_cost_)

                fake_samples = sess.run(fake_data) # , feed_dict={real_data: batch_data}
                # print("\n==> Sum-Simplex condition: " +str(np.sum(fake_samples, axis=1))) 
                fake_population = np.array([ sess.run(fake_data) for k in range(40)]).reshape(40*batch_size,train_set.shape[1])

                print(fake_population.shape)
                jsd_error = gan_error_all_species(fake_population, k3_test_set)

                print("JSD Error "+str(jsd_error))

                message = "\nEpochs ["+str(my_ds.epochs_completed)+"] Iter: "+str(iter_)+";\t"+str(np.round(100* iter_/ITERS,2))+"% "
                message = message + identifier
                print(message)

                if use_bot:
                    send_bot_message(bot,my_ds, iter_, ITERS, identifier)


                current_epoch = my_ds.epochs_completed

                session_saver.save(sess, model_path)
                save_history(storing_path, gen_loss_record,disc_loss_record, jsd_error, current_epoch, epoch_record, my_ds,iter_, epochs, global_iters, BATCH_SIZE, low_lr, high_lr)

                
                # save_gen_samples(fake_data, disc_fake ,sess, storing_path, k) # fake_data = Generator_Softmax(BATCH_SIZE)
                

            utils.tick()  #  _iter[0] += 1

        if iter_ == ITERS:
            session_saver.save(sess, model_path)
        
        # Create gan samples
        n_samples = len(indices)

        k_iter = n_samples//BATCH_SIZE +1

        gan_samples_path = storing_path+"gan_samples_" +model_tag+'.csv'

        for k in range(k_iter):
            fake_samples = sess.run(fake_data)

            df = pd.DataFrame(fake_samples)
            with open(gan_samples_path, 'a') as f:
                df.to_csv(f, header=False, index=False)

    # Clear variables valuies

    tf.reset_default_graph()

    current_epoch = my_ds.epochs_completed
    save_history(storing_path, gen_loss_record,disc_loss_record, jsd_error, current_epoch, epoch_record, my_ds,iter_, epochs, global_iters, BATCH_SIZE, low_lr, high_lr)   
    if use_bot:
        bot.stop_bot()

    print("Training is done")

    # Duct tapping the size of gan sample set to avoid changing the TF Graph

    temp1 = pd.read_csv(gan_samples_path, header=None).values
    temp1 = temp1[0:n_samples]
    df = pd.DataFrame(temp1)

    with open(gan_samples_path, 'w+') as f:
        df.to_csv(f, header=False, index=False)


    print("Training is done")
# ===> Model Parameters <===

print("==> Loading CSV")
v1 = 'data/' + str(archivero)
print(v1)
train_data = pd.read_csv(
    v1, header=None
).values  # np.random.normal(mu, sigma, (n_train_samples,numero_especies))
mean_vec = np.zeros(100)  # vector de 100 ceros

print("Shape")
print(train_data.shape)
print("One samples mean")
print(np.mean(train_data[0, :]))

my_ds = DataSetManager(train_data, norm=False)

session_saver = tf.train.Saver()

files_prefix = 'model/' + model_tag

if decision_number == 0:
    pre_trained = False

    gen_loss_record = []
    disc_loss_record = []
    epoch_record = []

    epochs = 0
    global_iters = 0
class ModelSpeech(): # 语音模型类
    def __init__(self, datapath):
        '''
        初始化
        默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块
        '''
        self.MS_OUTPUT_SIZE = 1423 #拼音1421 + 1个特殊字符(为了处理音频数据需要) + 1个空白块
        self.label_max_string_length = config.LABEL_LENGTH
        self.AUDIO_LENGTH = 1600
        self.AUDIO_FEATURE_LENGTH = config.AUDIO_FEATURE_LENGTH
        self.datasetmanager = DataSetManager()
        speechmodel = mobilebase_model.SpeechModel()
        #speechmodel = vggbase_model.SpeechModel()
        self.base_model, self._model = speechmodel.create_model(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, self.MS_OUTPUT_SIZE, self.label_max_string_length)

        self.datapath = datapath

    def TrainModel(self, datapath, epoch = 2, save_step = 1000, batch_size = 32, filename = abspath + 'model_speech/m' + ModelName + '/speech_model'+ModelName):
        '''
        训练模型
        参数:
            datapath: 数据保存的路径
            epoch: 迭代轮数
            save_step: 每多少步保存一次模型
            filename: 默认保存文件名,不含文件后缀名
        '''
        yielddatas = self.datasetmanager.data_generator(batch_size, self.AUDIO_LENGTH)

        for epoch in range(epoch): # 迭代轮数
            print('[running] train epoch %d .' % epoch)
            n_step = 0 # 迭代数据数
            while True:
                try:
                    print('[message] epoch %d . Have train datas %d+'%(epoch, n_step*save_step))
                    self._model.fit_generator(yielddatas, save_step)
                    n_step += 1
                except StopIteration:
                    print('[error] generator error. please check data format.')
                    break

                self.SaveModel(comment='_e_'+str(epoch)+'_step_'+str(n_step * save_step))
                self.TestModel(data_count=8)
                self.TestModel(data_count=8)

    def LoadModel(self,filename = abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName+'.model'):
        '''
        加载模型参数
        '''
        self._model.load_weights(filename)
        self.base_model.load_weights(filename + '.base')

    def SaveModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName, comment=''):
        '''
        保存模型参数
        '''
        self._model.save_weights(filename+comment+'.model')
        self._model.save(filename+comment+'.h5')
        self.base_model.save_weights(filename + comment + '.model.base')
        self.base_model.save(filename+comment+'.base.h5')
        f = open('step'+ModelName+'.txt','w')
        f.write(filename+comment)
        f.close()

    def TestModel(self, data_count = 32, io_step_print = 10, io_step_file = 10):
        '''
        测试检验模型效果
        io_step_print
            为了减少测试时标准输出的io开销,可以通过调整这个参数来实现
        io_step_file
            为了减少测试时文件读写的io开销,可以通过调整这个参数来实现
        '''
        #num_data = data.GetDataNum() # 获取数据的数量
        words_num = 0
        word_error_num = 0

        for x in range(0, data_count):
            test_data = self.datasetmanager.next_data()
            data_input, data_labels = test_data
            #data_input, data_labels = data.GetData((ran_num + i) % num_data)  # 从随机数开始连续向后取一定数量数据

            # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行
            if data_input.shape[0] > self.AUDIO_LENGTH:
                continue

            pre = self.Predict(data_input, data_input.shape[0] // 8)
            words_n = data_labels.shape[0] # 获取每个句子的字数
            words_num += words_n # 把句子的总字数加上
            edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离
            if(edit_distance <= words_n): # 当编辑距离小于等于句子字数时
                word_error_num += edit_distance # 使用编辑距离作为错误字数
            else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字
                word_error_num += words_n # 就直接加句子本来的总字数就好了

        print('*[Test Result] Speech Recognition set word error ratio: ', word_error_num / words_num * 100, '%')

    def Predict(self, data_input, input_len):
        '''
        预测结果
        返回语音识别后的拼音符号列表
        '''
        batch_size = 1
        in_len = np.zeros((batch_size),dtype = np.int32)
        in_len[0] = input_len
        x_in = np.zeros((batch_size, self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float)
        for i in range(batch_size):
            x_in[i,0:len(data_input)] = data_input
        base_pred = self.base_model.predict(x = x_in)
        base_pred =base_pred[:, :, :]
        r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1)
        r1 = K.get_value(r[0][0])
        r1=r1[0]
        return r1

    def RecognizeSpeech(self, wavsignal, fs):
        '''
        最终做语音识别用的函数,识别一个wav序列的语音
        不过这里现在还有bug
        '''
        # 获取输入特征
        data_input = GetMfccFeature(wavsignal, fs, config.AUDIO_MFCC_FEATURE_LENGTH)
        #data_input = GetFrequencyFeature3(wavsignal, fs)
        input_length = len(data_input)
        input_length = input_length // 8

        data_input = np.array(data_input, dtype = np.float)
        #print(data_input,data_input.shape)
        data_input = data_input.reshape(data_input.shape[0],data_input.shape[1],1)
        #t2=time.time()
        r1 = self.Predict(data_input, input_length)
        #t3=time.time()
        #print('time cost:',t3-t2)
        list_symbol_dic = self.datasetmanager.list_symbol # 获取拼音列表
        r_str = []
        for i in r1:
            r_str.append(list_symbol_dic[i])

        return r_str

    def RecognizeSpeech_FromFile(self, *fps):
        '''
        最终做语音识别用的函数,识别指定文件名的语音
        '''
        res = []
        for filename in fps:
            wavsignal,fs = read_wav_data(filename)
            print('read time: ', time.time())
            r = self.RecognizeSpeech(wavsignal, fs)
            print('reco time: ', time.time())
            res.append(r)
        return res

    @property
    def model(self):
        '''
        返回keras model
        '''
        return self._model
def train_gan(train_set, indices: List, samples_per_N: float,
              repetition_n: int):

    # print("Type a name to save the model with?\n")
    model_tag = str(round(samples_per_N)) + '_' + str(repetition_n)

    model_path = 'model/' + model_tag + '.ckpt'

    # print("Set number of iterations to train\n")
    v5 = 1000

    # print("Use pretrained model? (0 means No, some number different to 0 means yes)\n")
    decision_number = 0  #int( input() )

    storing_path = 'model/' + model_tag + '_data/'

    dirName = storing_path

    try:
        # Create target Directory
        os.mkdir(dirName)
        print("Directory ", dirName, " Created ")
    except FileExistsError:
        print("Directory ", dirName, " already exists")

    # ===> Auxiliar functions <===
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """
    def save_history(files_prefix, gen_loss_record, disc_loss_record,
                     epoch_record, my_ds, iter_, epochs, global_iters):
        # Save losses per epoch

        df = pd.DataFrame(np.array(gen_loss_record))
        with open(files_prefix + '_gen_loss.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        df = pd.DataFrame(np.array(disc_loss_record))
        with open(files_prefix + '_disc_loss.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        df = pd.DataFrame(np.array(epoch_record))
        with open(files_prefix + '_epoch_record.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)
        # Save current iter and epochs

        df = pd.DataFrame(
            np.array([epochs + my_ds.epochs_completed, global_iters + iter_]))

        with open(files_prefix + '_training.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

    def send_bot_message(bot, my_ds, iter_, ITERS):

        message = "\nEpochs [" + str(
            my_ds.epochs_completed) + "] Iter: " + str(iter_) + " , % " + str(
                100 * iter_ / ITERS)
        print(message)
        bot.set_status(message)
        # Send update message
        if bot.verbose:
            bot.send_message(message)

        print("\n")

    def save_gen_samples(gen_op, disc_op, sess, path, k, n=4):
        """
        k: is the number of epochs used to trained the generator
        n: is the number of batches to draw samples
        """

        suffix = '_gen_samples_' + str(k) + '_epochs_' + '.csv'

        for k in range(n):

            samples = sess.run(gen_op)
            df = pd.DataFrame(np.array(samples))
            with open(path + suffix, 'a') as f:
                df.to_csv(f, header=False, index=False)

            # Score the samples using the critic
            scores = sess.run(disc_op)
            df = pd.DataFrame(np.array(scores))
            with open(path + 'scores_' + suffix, 'a') as f:
                df.to_csv(f, header=False, index=False)

    # ===> Model Parameters <===
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """

    DIM = 512  # model dimensionality
    GEN_DIM = 100  # output dimension of the generator
    DIS_DIM = 1  # outptu dimension fo the discriminator
    FIXED_GENERATOR = False  # wheter to hold the generator fixed at ral data plus Gaussian noise, as in the plots in the paper
    LAMBDA = .1  # smaller lambda makes things faster for toy tasks, but isn't necessary if you increase CRITIC_ITERS enough
    BATCH_SIZE = 256  # batch size
    ITERS = v5  #100000 # how many generator iterations to train for
    FREQ = 250  # sample frequency

    CRITIC_ITERS = 5  # homw many critic iteractions per generator iteration

    def Generator_Softmax(n_samples, name='gen'):

        with tf.variable_scope(name):
            noise = tf.random_normal([n_samples, GEN_DIM])
            output01 = tf_utils.linear(noise, DIM, name='fc-1')
            output01 = tf_utils.relu(output01, name='relu-1')

            output02 = tf_utils.linear(output01, DIM, name='fc-2')
            output02 = tf_utils.relu(output02, name='relu-2')

            output03 = tf_utils.linear(output02, DIM, name='fc-3')
            output03 = tf_utils.relu(output03, name='relu-3')

            output04 = tf_utils.linear(output03, GEN_DIM, name='fc-4')

            # Reminder: a logit can be modeled as a linear function of the predictors
            output05 = tf.nn.softmax(output04, name='softmax-1')

            return output05

    def Discriminator(inputs, is_reuse=True, name='disc'):
        with tf.variable_scope(name, reuse=is_reuse):
            print('is_reuse: {}'.format(is_reuse))
            output01 = tf_utils.linear(inputs, DIM, name='fc-1')
            output01 = tf_utils.relu(output01, name='relu-1')

            output02 = tf_utils.linear(output01, DIM, name='fc-2')
            output02 = tf_utils.relu(output02, name='relu-2')

            output03 = tf_utils.linear(output02, DIM, name='fc-3')
            output03 = tf_utils.relu(output03, name='relu-3')

            output04 = tf_utils.linear(output03, DIS_DIM, name='fc-4')

            return output04

    real_data = tf.placeholder(tf.float32, shape=[None, GEN_DIM])
    fake_data = Generator_Softmax(BATCH_SIZE)

    disc_real = Discriminator(real_data, is_reuse=False)
    disc_fake = Discriminator(fake_data)

    disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
    gen_cost = -tf.reduce_mean(disc_fake)

    # WGAN gradient penalty parameters

    alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.)
    interpolates = alpha * real_data + (1. - alpha) * fake_data
    disc_interpolates = Discriminator(interpolates)
    gradients = tf.gradients(disc_interpolates, [interpolates][0])
    slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients),
                                   reduction_indices=[1]))
    gradient_penalty = tf.reduce_mean((slopes - 1)**2)

    disc_cost += LAMBDA * gradient_penalty

    disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  scope='disc')
    gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gen')

    #WGAN Training operations
    disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4,
                                           beta1=0.5,
                                           beta2=0.9).minimize(
                                               disc_cost, var_list=disc_vars)

    if len(gen_vars) > 0:
        gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4,
                                              beta1=0.5,
                                              beta2=0.9).minimize(
                                                  gen_cost, var_list=gen_vars)
    else:
        gen_train_op = tf.no_op()
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """
    # ===> Model Parameters <===

    print("==> Loading CSV")
    v1 = 'data/' + str(archivero)
    print(v1)
    train_data = pd.read_csv(
        v1, header=None
    ).values  # np.random.normal(mu, sigma, (n_train_samples,numero_especies))
    mean_vec = np.zeros(100)  # vector de 100 ceros

    print("Shape")
    print(train_data.shape)
    print("One samples mean")
    print(np.mean(train_data[0, :]))

    my_ds = DataSetManager(train_data, norm=False)

    session_saver = tf.train.Saver()

    files_prefix = 'model/' + model_tag

    if decision_number == 0:
        pre_trained = False

        gen_loss_record = []  # type: List[float]
        disc_loss_record = []  # type: List[float]
        epoch_record = []  # type: List[float]

        epochs = 0
        global_iters = 0

    else:
        pre_trained = True
        temp = pd.read_csv(storing_path + '_training.csv', header=None).values

        epochs, global_iters = temp.flatten()

        my_ds.epochs_completed = epochs

        gen_loss_record = (pd.read_csv(storing_path + '_gen_loss.csv',
                                       header=None).values).tolist()
        disc_loss_record = (pd.read_csv(storing_path + '_disc_loss.csv',
                                        header=None).values).tolist()
        epoch_record = (pd.read_csv(storing_path + '_epoch_record.csv',
                                    header=None).values).tolist()

    # Create a DLBot instance
    bot = DLBot(token=telegram_token, user_id=telegram_user_id)
    # Activate the bot
    bot.activate_bot()

    print("\nTelegram bot has been activated ")

    # Train loop
    with tf.Session() as sess:

        if pre_trained == False:  # false by default:
            sess.run(tf.global_variables_initializer())
        if pre_trained == True:
            # tf.reset_default_graph()
            session_saver.restore(sess, model_path)

        for iter_ in range(ITERS):
            batch_data, disc_cost_ = None, None

            previous_epoch = my_ds.epochs_completed

            # train critic
            for i_ in range(CRITIC_ITERS):
                batch_data = my_ds.next_batch(
                    BATCH_SIZE)  # data_gen.__next__()
                disc_cost_, _ = sess.run([disc_cost, disc_train_op],
                                         feed_dict={real_data: batch_data})

            # train generator
            sess.run(gen_train_op)

            gen_cost2 = sess.run(gen_cost)

            current_epoch = my_ds.epochs_completed

            condition2 = current_epoch % 5 == 0
            if current_epoch > previous_epoch and condition2:
                disc_loss_record.append(disc_cost_)
                gen_loss_record.append(gen_cost2)
                epoch_record.append(my_ds.epochs_completed)
                # print("Diff "+str(current_epoch - previous_epoch))

            if (np.mod(iter_, FREQ) == 0) or (iter_ + 1 == ITERS):
                """
                print("===> Debugging")
                print(disc_loss_record)
                print(gen_loss_record)
                """

                bot.loss_hist.append(disc_cost_)

                fake_samples = sess.run(
                    fake_data)  # , feed_dict={real_data: batch_data}
                # print("\n==> Sum-Simplex condition: " +str(np.sum(fake_samples, axis=1)))
                send_bot_message(bot, my_ds, iter_, ITERS)

                session_saver.save(sess, model_path)
                save_history(storing_path, gen_loss_record, disc_loss_record,
                             epoch_record, my_ds, iter_, epochs, global_iters)

                k = my_ds.epochs_completed
                save_gen_samples(
                    fake_data, disc_fake, sess, storing_path,
                    k)  # fake_data = Generator_Softmax(BATCH_SIZE)

            utils.tick()  #  _iter[0] += 1

        if iter_ == ITERS:
            session_saver.save(sess, model_path)

    save_history(storing_path, gen_loss_record, disc_loss_record, epoch_record,
                 my_ds, iter_, epochs, global_iters)
    k = my_ds.epochs_completed

    bot.stop_bot()

    print("Training is done")

    n_samples = len(indices)

    print("Training is done")