示例#1
0
def apply(df, config, header, dataset_features):

    debug = config['debug']

    #------------------------

    rows = df.shape[0]
    columns = df.shape[1]
    final_predictions = pd.DataFrame(np.zeros([rows, 1]),
                                     columns=['prediction'])

    worksheet = df.copy()
    worksheet['weight'] = 1  #/ rows

    tmp_df = df.copy()
    tmp_df['Decision'] = worksheet['weight'] * tmp_df[
        'Decision']  #normal distribution

    for i in range(0, 1):
        root = 1
        file = "outputs/rules/rules_" + str(i) + ".py"

        if debug == False: functions.createFile(file, header)

        #print(tmp_df)
        Training.buildDecisionTree(tmp_df, root, file, config,
                                   dataset_features)

    #print(final_predictions)
    """for row, instance in final_predictions.iterrows():
示例#2
0
def main():
    auto = Auto()

    if USER_SETTINGS.get('daily_bonus', 'on'):
        print_d("> Daily Bonus <")
        auto.get_daily_bonus()

    if USER_SETTINGS.get('bonus_from_partners', 'on'):
        print_d("> Bonus from Partners <")
        auto.get_bonus_from_partners()

    if USER_SETTINGS.get('club_sales', 'on'):
        print_d("> Club Sales <")
        auto.get_club_sales()

    if USER_SETTINGS.get('get_training_points', 'on'):
        print_d("> Training Points <")
        TP_settings = USER_SETTINGS.get_section_items('get_training_points')
        auto.get_training_points(**TP_settings)

    if USER_SETTINGS.get('morale', 'on'):
        print_d("> Morale <")
        morale_boost = MoraleBoost()
        morale_boost.__call__()

    if USER_SETTINGS.get('training', 'on'):
        print_d("> Training <")
        training = Training()
        training.__call__()

    if USER_SETTINGS.get('extra_training', 'on'):
        print_d("> Extra Training <")
        extra_training = ExtraTraining()
        extra_training.__call__()
示例#3
0
 def __init__(self, file_name, file_to_dump):
     try:
         self.file_name = "files/" + file_name
         self.file_to_dump = file_to_dump
         self.iterations = 2
         self.skipGram = Training(self.file_name)
         self.lexicon = FinanceLexicon()
         self.word_vectors = self.skipGram.load_vectors(self.file_to_dump)
     except:
         # do nothing
         print()
示例#4
0
文件: academy.py 项目: SeuRAUL/GAE-PD
  def add_training(self, t_day, t_time, t_instructor):

      t = Training(day = t_day, time = t_time, instructor = t_instructor)

      self.response.write("""
          <p>
            <b>Treino adicionado:</b><br/>
            <b>Dia:</b> %s - %s <br/>
            <b>Instrutor:</b> %s
          </p>
        """ %(t.day, t.time, t.instructor))

      t.put()
      Mailer.send_mail(self, "*****@*****.**", "Novo treino", "Novo treino cadastrado:\n\tDia: %s - %s\n\tProfessor: %s" %(t_day, t_time, t_instructor))
def handleOnKeyPress(event):
    if event.key == "enter":  # inicializamos y ejecutamos el algoritmo de entrenamiento
        if len(neuronas) > 0:
            algoritmo = Training(neuronas, ETA, TOLERANCIA, LIMITE_EPOCAS)
            algoritmo.run()
        else:
            print("[!] ERROR: no hay patrones en la lista.\n")

    elif event.key == "backspace":  # limpiamos el gráfico y la lista de patrones
        neuronas.clear()
        plt.cla()
        plt.title("Adaline")
        plt.grid(True)
        plt.xlim([-1, 4])
        plt.ylim([-1, 4])
        plt.draw()
示例#6
0
def apply(df, config, header, dataset_features):

    debug = config['debug']
    num_of_trees = config['num_of_trees']

    for i in range(0, num_of_trees):
        subset = df.sample(frac=1 / num_of_trees)

        root = 1

        file = "outputs/rules/rule_" + str(i) + ".py"

        if debug == False:
            functions.createFile(file, header)

        Training.buildDecisionTree(subset, root, file, config,
                                   dataset_features)
示例#7
0
文件: main.py 项目: kand/Neural_py
def learn():
    train = Training.readFromFile("training/AND")
    graph = train.train(0.5)
    print repr(graph)
    
    print "[0,0]", graph.activate([0,0],0.5)
    print "[1,0]", graph.activate([1,0],0.5)
    print "[0,1]", graph.activate([0,1],0.5)
    print "[1,1]", graph.activate([1,1],0.5)

    return graph
示例#8
0
def display_page(pathname):
    if pathname == '' or pathname == '/' or pathname == config.login_url:
        # print('--> login')
        return Login()
    elif pathname == config.home_url:
        # print('--> home')
        return Homepage()
    elif pathname == config.training_url:
        # print('--> training')
        return Training()
    elif pathname == config.classification_url:
        # print('--> classification')
        return Classification()
 def training():
     try:
         run_time_logger.add_in_logs("Started Model Training")
         global overall_accuracy
         overall_accuracy = Training().train(path , data_base_name , data_base_table_name)
         overall_accuracy = round(overall_accuracy , 2)
         return render_template("done.html" , text = "Training completed successfully")
     except Exception as e:
         run_time_logger.add_in_logs("Faced an error")
         run_time_logger.add_in_logs( "Training")
         run_time_logger.add_in_logs("Error on line number : {}".format(sys.exc_info()[-1].tb_lineno))
         run_time_logger.add_in_logs((str(e)))
         return render_template("error.html"  , text = str(e))
 def home_page():
     try:
         run_time_logger.add_in_logs("Started Model Training")
         global overall_accuracy
         overall_accuracy = Training().train(path , data_base_name , data_base_table_name)
         overall_accuracy = round(overall_accuracy , 2)
         run_time_logger.add_in_logs("Log in to application")
         return render_template("home_page.html")
     except:
         run_time_logger.add_in_logs("Faced an error")
         run_time_logger.add_in_logs( "Home Page module")
         run_time_logger.add_in_logs("Error on line number : {}".format(sys.exc_info()[-1].tb_lineno))
         run_time_logger.add_in_logs(str(e))
         return render_template("error.html"  , text = str(e) )
示例#11
0
 def train(self):
     X_train, y_train, X_test, y_test = self.dataset_ops()
     self.mdl = self.baseline_model()
     # print(self.mdl.summary())
     Training(model=self.mdl,
              X_train=X_train,
              Y_train=y_train,
              X_test=X_test,
              Y_test=y_test,
              optimizer=keras.optimizers.RMSprop(lr=1e-4),
              loss='categorical_crossentropy',
              metrics=['acc', self.f1],
              epochs=3,
              summaries_directory="./summaries",
              tensorboard_write_grad=True).train()
     return self.mdl
示例#12
0
文件: main.py 项目: c0d3d/SAD
def main(f=None):
    args = parser.parse_args()
    if args.cmd_type == "embeddings":
        build_embeddings(args.data, args.output)
    elif args.cmd_type == "train":
        print("Loading embeddings ...")
        embeddings = EmbeddingsData.load(args.embeddings_data_file)
        print("Loading the data ...")
        the_data = Data.make_data(args.train_file, args.dev_file,
                                  args.batch_size)
        print("Building the model ...")
        model = build_model(embeddings, args.batch_size)
        train_sess = Training.make_training(model, the_data, args.epoch_count)
        while train_sess.has_more_epochs():
            print("Next epoch ...")
            train_sess.next_epoch()
        # TODO save model
    else:
        # ?
        exit(1)
    return 0
示例#13
0
def fit(df, config):

    target_label = df.columns[len(df.columns) - 1]
    if target_label != 'Decision':
        print("Expected: Decision, Existing: ", target_label)
        raise ValueError(
            'Please confirm that name of the target column is "Decision" and it is put to the right in pandas data frame'
        )

    #------------------------

    #initialize params and folders
    config = functions.initializeParams(config)
    functions.initializeFolders()

    #------------------------

    algorithm = config['algorithm']

    valid_algorithms = ['ID3', 'C4.5', 'CART', 'Regression']

    if algorithm not in valid_algorithms:
        raise ValueError('Invalid algorithm passed. You passed ', algorithm,
                         " but valid algorithms are ", valid_algorithms)

    #------------------------

    enableRandomForest = config['enableRandomForest']
    num_of_trees = config['num_of_trees']
    enableMultitasking = config['enableMultitasking']

    enableGBM = config['enableGBM']
    epochs = config['epochs']
    learning_rate = config['learning_rate']

    enableAdaboost = config['enableAdaboost']

    #------------------------
    raw_df = df.copy()
    num_of_rows = df.shape[0]
    num_of_columns = df.shape[1]

    if algorithm == 'Regression':
        if df['Decision'].dtypes == 'object':
            raise ValueError(
                'Regression trees cannot be applied for nominal target values! You can either change the algorithm or data set.'
            )

    if df['Decision'].dtypes != 'object':  #this must be regression tree even if it is not mentioned in algorithm
        algorithm = 'Regression'
        config['algorithm'] = 'Regression'
        global_stdev = df['Decision'].std(ddof=0)

    if enableGBM == True:
        print("Gradient Boosting Machines...")
        algorithm = 'Regression'
        config['algorithm'] = 'Regression'

    if enableAdaboost == True:
        for j in range(0, num_of_columns):
            column_name = df.columns[j]
            if df[column_name].dtypes == 'object':
                raise ValueError(
                    'Adaboost must be run on numeric data set for both features and target'
                )

    #-------------------------

    print(algorithm, " tree is going to be built...")

    dataset_features = dict(
    )  #initialize a dictionary. this is going to be used to check features numeric or nominal. numeric features should be transformed to nominal values based on scales.

    header = "def findDecision("
    header = header + "obj"
    header = header + "): #"

    num_of_columns = df.shape[1] - 1
    for i in range(0, num_of_columns):
        column_name = df.columns[i]
        dataset_features[column_name] = df[column_name].dtypes
        header = header + "obj[" + str(i) + "]: " + column_name
        if i != num_of_columns - 1:
            header = header + ", "

    header = header + "\n"

    #------------------------

    begin = time.time()

    trees = []
    alphas = []

    if enableAdaboost == True:
        trees, alphas = adaboost.apply(df, config, header, dataset_features)

    elif enableGBM == True:

        if df['Decision'].dtypes == 'object':  #transform classification problem to regression
            trees, alphas = gbm.classifier(df, config, header,
                                           dataset_features)
            classification = True

        else:  #regression
            trees = gbm.regressor(df, config, header, dataset_features)
            classification = False

    elif enableRandomForest == True:
        trees = randomforest.apply(df, config, header, dataset_features)
    else:  #regular decision tree building

        root = 1
        file = "outputs/rules/rules.py"
        functions.createFile(file, header)
        trees = Training.buildDecisionTree(df, root, file, config,
                                           dataset_features)

    print("finished in ", time.time() - begin, " seconds")

    obj = {"trees": trees, "alphas": alphas, "config": config}

    return obj
示例#14
0
from training import Training
from database import Database

workout = Database()
workout.import_data()
first_try = Training(90, "ENG")  #set duration and language ("PL"/"ENG")
pattern = (first_try.import_pattern())[0]
first_try.generate_plan(pattern)
first_try.print_training()

#saving (line below) is optional, it stores instructions in a txt file
#first_try.save_training()
示例#15
0
def main():
    X = []
    Y = []
    char2intDict = None
    int2charDict = None
    vocabulary = None
    config = FileHelper.load_config('config.json')

    seq_length = config['preprocessing']['sequence_chars_length']

    # Load data or preprocess
    if not config['preprocessing']['exec_preprocessing']:
        X = FileHelper.load_object_from_file(
            config['preprocessing']['checkpoints']['X_file'])
        Y = FileHelper.load_object_from_file(
            config['preprocessing']['checkpoints']['Y_file'])
        char2intDict = FileHelper.load_object_from_file(
            config['preprocessing']['checkpoints']['char2intDict_file'])
        int2charDict = FileHelper.load_object_from_file(
            config['preprocessing']['checkpoints']['int2charDict_file'])
    else:
        preprocessing = Preprocessing(config)
        X, Y, char2intDict, int2charDict = preprocessing.preprocess()
        FileHelper.save_object_to_file(
            config['preprocessing']['checkpoints']['X_file'], X)
        FileHelper.save_object_to_file(
            config['preprocessing']['checkpoints']['Y_file'], Y)

    vocabulary = FileHelper.load_object_from_file(
        config['preprocessing']['checkpoints']['vocabulary_file'])

    # Save the unshaped version of X because it's needed for generation later
    X_unshaped = X

    # Transform the data to the format the LTSM expects it [samples, timesteps, features]
    X = numpy.reshape(X, (len(X), seq_length, 1))
    # Normalize/rescale all integers to range 0-1
    X = X / float(len(vocabulary))
    # As usual do one-hot encoding for categorial variables to the output variables (vector of zeros with a single 1 --> 0..N-1 categories)
    Y = np_utils.to_categorical(Y)

    training = Training(config)
    # Define the model
    model = training.define_model(X, Y)

    if config['training']['exec_training']:
        # Train the model
        model = training.train(X, Y, char2intDict, vocabulary, model)
    else:
        # Just set the previously trained weights for the model
        model.load_weights(config['training']['load_weights_filename'])
        model.compile(loss='categorical_crossentropy', optimizer='adam')

    if config['generation']['exec_generation']:
        # Generate the random seed used as starting value for text generation
        seed = generate_random_seed(X_unshaped)
        generatedText = generate_text(
            config['generation']['text_chars_length'], int2charDict,
            vocabulary, seed, model)

        # Save the generated text to file
        outputFilename = config['generation']['foldername'] + '/' + \
            datetime.datetime.now().strftime('%Y%m%d_%H_%M_%S') + '.txt'
        FileHelper.write_data(outputFilename, generatedText)
示例#16
0
def apply(df, config, header, dataset_features):

    models = []
    alphas = []

    initializeAlphaFile()

    num_of_weak_classifier = config['num_of_weak_classifier']

    #------------------------

    rows = df.shape[0]
    columns = df.shape[1]
    final_predictions = pd.DataFrame(np.zeros([rows, 1]),
                                     columns=['prediction'])

    worksheet = df.copy()
    worksheet['Weight'] = 1 / rows  #uniform distribution initially

    final_predictions = pd.DataFrame(np.zeros((df.shape[0], 2)),
                                     columns=['Prediction', 'Actual'])
    final_predictions['Actual'] = df['Decision']

    #for i in range(0, num_of_weak_classifier):
    pbar = tqdm(range(0, num_of_weak_classifier), desc='Adaboosting')
    for i in pbar:
        worksheet['Decision'] = worksheet['Weight'] * worksheet['Decision']

        root = 1
        file = "outputs/rules/rules_" + str(i) + ".py"

        functions.createFile(file, header)

        #print(worksheet)
        Training.buildDecisionTree(worksheet.drop(columns=['Weight']), root,
                                   file, config, dataset_features)

        #---------------------------------------

        moduleName = "outputs/rules/rules_" + str(i)
        fp, pathname, description = imp.find_module(moduleName)
        myrules = imp.load_module(moduleName, fp, pathname, description)
        models.append(myrules)

        #---------------------------------------

        df['Epoch'] = i
        worksheet['Prediction'] = df.apply(findPrediction, axis=1)
        df = df.drop(columns=['Epoch'])

        #---------------------------------------
        worksheet['Actual'] = df['Decision']
        worksheet['Loss'] = abs(worksheet['Actual'] -
                                worksheet['Prediction']) / 2
        worksheet[
            'Weight_Times_Loss'] = worksheet['Loss'] * worksheet['Weight']

        epsilon = worksheet['Weight_Times_Loss'].sum()
        alpha = math.log(
            (1 - epsilon) /
            epsilon) / 2  #use alpha to update weights in the next round
        alphas.append(alpha)

        #-----------------------------

        #store alpha
        addEpochAlpha(i, alpha)

        #-----------------------------

        worksheet['Alpha'] = alpha
        worksheet['New_Weights'] = worksheet['Weight'] * (
            -alpha * worksheet['Actual'] * worksheet['Prediction']).apply(
                math.exp)

        #normalize
        worksheet['New_Weights'] = worksheet['New_Weights'] / worksheet[
            'New_Weights'].sum()
        worksheet['Weight'] = worksheet['New_Weights']
        worksheet['Decision'] = df['Decision']

        final_predictions['Prediction'] = final_predictions[
            'Prediction'] + worksheet['Alpha'] * worksheet['Prediction']
        #print(final_predictions)
        worksheet = worksheet.drop(columns=[
            'New_Weights', 'Prediction', 'Actual', 'Loss', 'Weight_Times_Loss',
            'Alpha'
        ])

        mae = (np.abs(final_predictions['Prediction'].apply(functions.sign) -
                      final_predictions['Actual']) /
               2).sum() / final_predictions.shape[0]
        #print(mae)
        pbar.set_description("Epoch %d. Loss: %d. Process: " % (i + 1, mae))

    #------------------------------
    final_predictions['Prediction'] = final_predictions['Prediction'].apply(
        functions.sign)
    final_predictions['Absolute_Error'] = np.abs(
        final_predictions['Actual'] - final_predictions['Prediction']) / 2
    #print(final_predictions)
    mae = final_predictions['Absolute_Error'].sum(
    ) / final_predictions.shape[0]
    print("Loss (MAE) found ", mae, " with ", num_of_weak_classifier,
          ' weak classifiers')

    return models, alphas
示例#17
0
## Set configuration options ##
config = Config(sys.argv[1])
vb.level = config.verbose_level
vb.initialize()

if not config.runTraining and not config.runInference:
    vb.ERROR("RUN :  No configuration set ")
    vb.ERROR(
        "RUN :  Please set the arguments 'runTraining' or 'runInference' to define workflow "
    )
    vb.ERROR("RUN :  Exiting.")
    sys.exit(1)

## Setup Deep Learning class
dnn = Training()

dnn.variable_labels = plb.variable_labels()
dnn.sample_labels = plb.sample_labels()

dnn.hep_data = config.hep_data
dnn.model_name = config.dnn_data
dnn.msg_svc = vb
dnn.treename = config.treename
dnn.useLWTNN = True
dnn.dnn_name = "dnn"
dnn.output_dim = config.output_dim
dnn.loss = config.loss
dnn.init = config.init
dnn.nNodes = config.nNodes
dnn.dropout = None
示例#18
0
    os.path.join(args.input, args.save_data + ".unlabel.pth"))

# Reading the word vocab file
with open(os.path.join(args.input, args.save_data + '.vocab.pickle'),
          'rb') as f:
    id2w = pickle.load(f)

# Reading the label vocab file
with open(os.path.join(args.input, args.save_data + '.label.pickle'),
          'rb') as f:
    id2label = pickle.load(f)

args.id2w = id2w
args.n_vocab = len(id2w)
args.id2label = id2label
args.num_classes = len(id2label)

object = Training(args, logger)

logger.info('Corpus: {}'.format(args.corpus))
logger.info('Pytorch Model')
logger.info(repr(object.embedder))
logger.info(repr(object.encoder))
logger.info(repr(object.clf))
logger.info(repr(object.clf_loss))
if args.lambda_ae:
    logger.info(repr(object.ae))

# Train the model
object(train_data, dev_data, test_data, unlabel_data)
示例#19
0
 def __init__(self):
     Training.__init__(self)
示例#20
0
def RunTraining(patterns):
    if len(patterns) > 0:
        algoritmo = Training(patterns)
        algoritmo.run()
    else:
        print("[!] ERROR. No hay patrones en la lista.\n")
示例#21
0
def fit(df, config):

    target_label = df.columns[len(df.columns) - 1]
    if target_label != 'Decision':
        print("Expected: Decision, Existing: ", target_label)
        raise ValueError('Lỗi dữ liệu, hãy chuyển dữ liệu về đúng định dạng!')

    #------------------------

    #initialize params and folders
    config = functions.initializeParams(config)
    functions.initializeFolders()

    algorithm = config['algorithm']

    RandomForest = config['RandomForest']
    num_of_trees = config['num_of_trees']

    #------------------------
    raw_df = df.copy()
    num_of_rows = df.shape[0]
    num_of_columns = df.shape[1]

    if algorithm == 'Regression':
        if df['Decision'].dtypes == 'object':
            raise ValueError(
                'Lỗi dữ liệu khi chạy kết quả dạng Regression Tree')

    if df['Decision'].dtypes != 'object':
        algorithm = 'Regression'
        config['algorithm'] = 'Regression'
        global_stdev = df['Decision'].std(ddof=0)

    #-------------------------

    print(algorithm, ": Đang tiến hành tạo cây quyết định...")

    dataset_features = dict()  # dictionary

    header = "def findDecision("
    header = header + "obj"
    header = header + "): #"

    num_of_columns = df.shape[1] - 1
    for i in range(0, num_of_columns):
        column_name = df.columns[i]
        dataset_features[column_name] = df[column_name].dtypes
        header = header + "obj[" + str(i) + "]: " + column_name
        if i != num_of_columns - 1:
            header = header + ", "

    header = header + "\n"

    #------------------------

    begin = time.time()

    trees = []
    alphas = []

    if RandomForest == True:
        trees = randomforest.apply(df, config, header, dataset_features)
    else:
        root = 1
        file = "outputs/rules/rules.py"
        functions.createFile(file, header)
        trees = Training.buildDecisionTree(df, root, file, config,
                                           dataset_features)

    print("Thuật toán chạy hoàn thành trong:  ", time.time() - begin, " giây")

    obj = {"trees": trees, "alphas": alphas, "config": config}
    return obj
示例#22
0
preprocessor = Preprocessor()
if not os.path.exists(WORD2VEC_FILE):
    preprocessor.generateFixedLength(RAW_DATA_FILE, SEQUENCE_SIZE,
                                     TRAINING_DATE_RATE, TRAIN_FIXED_FILE,
                                     TEST_FIXED_FILE)
    W2V = preprocessor.makeW2Vfile(RAW_DATA_FILE, WORD2VEC_FILE, VECTOR_SIZE,
                                   SEQUENCE_SIZE, 0)
else:
    W2V = gensim.models.Word2Vec.load(WORD2VEC_FILE)

LEARNING_RATE = 0.01
BATCH_SIZE = 2059
ITER_NUM = 5
DROPOUT_RATE = 0.7
EARLY_STOP_COUNT = 3
""" training
X_DATA, Y_DATA = preprocessor.getVectorData(TRAIN_FIXED_FILE, W2V, SEQUENCE_SIZE)
training = Training(LEARNING_RATE, BATCH_SIZE, ITER_NUM, SEQUENCE_SIZE, VECTOR_SIZE, DROPOUT_RATE, EARLY_STOP_COUNT)
training.train(X_DATA, Y_DATA)
"""

sentence = "그책에는이별이야기가있을까어쩌면네가지금막귀퉁이를접고있는페이지에"
X_DATA = preprocessor.getXVectorData(sentence, W2V, SEQUENCE_SIZE)
predicting = Training(LEARNING_RATE,
                      1,
                      ITER_NUM,
                      SEQUENCE_SIZE,
                      VECTOR_SIZE,
                      drop_out_rate=1.0)
predicted_sentence = predicting.predict(X_DATA, sentence)
print(predicted_sentence)
示例#23
0
文件: gbm.py 项目: zzhsaga/chefboost
def classifier(df, config, header, dataset_features):

    models = []

    print("gradient boosting for classification")

    epochs = config['epochs']

    temp_df = df.copy()
    original_dataset = df.copy()
    worksheet = df.copy()

    classes = df['Decision'].unique()

    boosted_predictions = np.zeros([df.shape[0], len(classes)])

    pbar = tqdm(range(0, epochs), desc='Boosting')

    #store actual set, we will use this to calculate loss
    actual_set = pd.DataFrame(np.zeros([df.shape[0], len(classes)]),
                              columns=classes)
    for i in range(0, len(classes)):
        current_class = classes[i]
        actual_set[current_class] = np.where(df['Decision'] == current_class,
                                             1, 0)
    actual_set = actual_set.values  #transform it to numpy array

    #for epoch in range(0, epochs):
    for epoch in pbar:
        for i in range(0, len(classes)):
            current_class = classes[i]

            if epoch == 0:
                temp_df['Decision'] = np.where(df['Decision'] == current_class,
                                               1, 0)
                worksheet['Y_' + str(i)] = temp_df['Decision']
            else:
                temp_df['Decision'] = worksheet['Y-P_' + str(i)]

            predictions = []

            #change data type for decision column
            temp_df[['Decision']].astype('int64')

            root = 1
            file = "outputs/rules/rules-for-" + current_class + "-round-" + str(
                epoch) + ".py"

            functions.createFile(file, header)

            Training.buildDecisionTree(temp_df, root, file, config,
                                       dataset_features)
            #decision rules created
            #----------------------------

            #dynamic import
            moduleName = "outputs/rules/rules-for-" + current_class + "-round-" + str(
                epoch)
            fp, pathname, description = imp.find_module(moduleName)
            myrules = imp.load_module(moduleName, fp, pathname,
                                      description)  #rules0

            models.append(myrules)

            num_of_columns = df.shape[1]

            for row, instance in df.iterrows():
                features = []
                for j in range(0, num_of_columns - 1):  #iterate on features
                    features.append(instance[j])

                actual = temp_df.loc[row]['Decision']
                prediction = myrules.findDecision(features)

                predictions.append(prediction)

            #----------------------------
            if epoch == 0:
                worksheet['F_' + str(i)] = 0
            else:
                worksheet['F_' + str(i)] = pd.Series(predictions).values

            boosted_predictions[:, i] = boosted_predictions[:, i] + worksheet[
                'F_' + str(i)].values.astype(np.float32)

            #print(boosted_predictions[0:5,:])

            worksheet['P_' + str(i)] = 0

            #----------------------------
            temp_df = df.copy()  #restoration

        for row, instance in worksheet.iterrows():
            f_scores = []
            for i in range(0, len(classes)):
                f_scores.append(instance['F_' + str(i)])

            probabilities = functions.softmax(f_scores)

            for j in range(0, len(probabilities)):
                instance['P_' + str(j)] = probabilities[j]

            worksheet.loc[row] = instance

        for i in range(0, len(classes)):
            worksheet['Y-P_' +
                      str(i)] = worksheet['Y_' + str(i)] - worksheet['P_' +
                                                                     str(i)]

        prediction_set = np.zeros([df.shape[0], len(classes)])
        for i in range(0, boosted_predictions.shape[0]):
            predicted_index = np.argmax(boosted_predictions[i])
            prediction_set[i][predicted_index] = 1

        #----------------------------
        #find loss for this epoch: prediction_set vs actual_set
        classified = 0
        for i in range(0, actual_set.shape[0]):
            actual = np.argmax(actual_set[i])
            prediction = np.argmax(prediction_set[i])
            #print("actual: ",actual," - prediction: ",prediction)

            if actual == prediction:
                classified = classified + 1

        accuracy = str(100 * classified / actual_set.shape[0]) + "%"

        #----------------------------

        #print(worksheet.head())
        #print("round ",epoch+1)
        pbar.set_description("Epoch %d. Accuracy: %s. Process: " %
                             (epoch + 1, accuracy))

    return models, classes
示例#24
0
文件: gbm.py 项目: zzhsaga/chefboost
def regressor(df, config, header, dataset_features):
    models = []

    algorithm = config['algorithm']

    enableRandomForest = config['enableRandomForest']
    num_of_trees = config['num_of_trees']
    enableMultitasking = config['enableMultitasking']

    enableGBM = config['enableGBM']
    epochs = config['epochs']
    learning_rate = config['learning_rate']

    enableAdaboost = config['enableAdaboost']

    #------------------------------

    boosted_from = 0
    boosted_to = 0

    #------------------------------

    base_df = df.copy()

    #gbm will manipulate actuals. store its raw version.
    target_values = base_df['Decision'].values
    num_of_instances = target_values.shape[0]

    root = 1
    file = "outputs/rules/rules0.py"
    functions.createFile(file, header)

    Training.buildDecisionTree(df, root, file, config,
                               dataset_features)  #generate rules0

    df = base_df.copy()

    base_df['Boosted_Prediction'] = 0

    #------------------------------

    pbar = tqdm(range(1, epochs + 1), desc='Boosting')

    #for index in range(1,epochs+1):
    #for index in tqdm(range(1,epochs+1), desc='Boosting'):
    for index in pbar:
        #print("epoch ",index," - ",end='')
        loss = 0

        #run data(i-1) and rules(i-1), save data1

        #dynamic import
        moduleName = "outputs/rules/rules%s" % (index - 1)
        fp, pathname, description = imp.find_module(moduleName)
        myrules = imp.load_module(moduleName, fp, pathname,
                                  description)  #rules0

        models.append(myrules)

        new_data_set = "outputs/data/data%s.csv" % (index)
        f = open(new_data_set, "w")

        #put header in the following file
        columns = df.shape[1]

        mae = 0

        #----------------------------------------

        df['Epoch'] = index
        df['Prediction'] = df.apply(findPrediction, axis=1)

        base_df['Boosted_Prediction'] += df['Prediction']

        loss = (base_df['Boosted_Prediction'] -
                base_df['Decision']).pow(2).sum()

        if index == 1:
            boosted_from = loss / num_of_instances
        elif index == epochs:
            boosted_to = loss / num_of_instances

        df['Decision'] = int(learning_rate) * (df['Decision'] -
                                               df['Prediction'])
        df = df.drop(columns=['Epoch', 'Prediction'])

        #---------------------------------

        df.to_csv(new_data_set, index=False)
        #data(i) created

        #---------------------------------

        file = "outputs/rules/rules" + str(index) + ".py"

        functions.createFile(file, header)

        current_df = df.copy()
        Training.buildDecisionTree(df, root, file, config, dataset_features)
        df = current_df.copy(
        )  #numeric features require this restoration to apply findDecision function

        #rules(i) created

        loss = loss / num_of_instances
        #print("epoch ",index," - loss: ",loss)
        #print("loss: ",loss)
        pbar.set_description("Epoch %d. Loss: %d. Process: " % (index, loss))

        #---------------------------------

    print(num_of_instances, " instances are boosted from ", boosted_from,
          " to ", boosted_to, " in ", epochs, " epochs")

    return models
示例#25
0
np.random.seed(7)

if len(sys.argv) < 3:
    print("Usage blindspot data-file parameter-file test-suffix")
    exit()

mode = sys.argv[1]
if mode == "train" or mode == "onlyexport":
    data_file = sys.argv[2]
    parameter_file = sys.argv[3]
    examples_file = sys.argv[4]
    with codecs.open(parameter_file, 'r', 'utf-8') as f:
        params = json.load(f)
    with codecs.open(examples_file, 'r', 'utf-8') as f:
        examples = json.load(f)
    t = Training(data_file=data_file, examples=examples)

    for p in params:
        print("Trying:{}".format(p))
        p["only_export"] = (mode == "onlyexport")
        t.train(**p)

if mode == "predict":
    vocab_file = sys.argv[2]
    model_file = sys.argv[3]
    text = sys.argv[4]
    p = Prediction(model_file, vocab_file)
    print(p.predict(text))

if mode == "predict_server":
    data_file = sys.argv[2]
示例#26
0
def fit(df, config):

    #config parameters

    debug = config['debug']
    algorithm = config['algorithm']

    enableRandomForest = config['enableRandomForest']
    num_of_trees = config['num_of_trees']
    enableMultitasking = config['enableMultitasking']

    enableGBM = config['enableGBM']
    epochs = config['epochs']
    learning_rate = config['learning_rate']

    enableAdaboost = config['enableAdaboost']

    #------------------------
    if algorithm == 'Regression':
        if df['Decision'].dtypes == 'object':
            raise ValueError(
                'Regression trees cannot be applied for nominal target values! You can either change the algorithm or data set.'
            )

    if df['Decision'].dtypes != 'object':  #this must be regression tree even if it is not mentioned in algorithm
        algorithm = 'Regression'
        config['algorithm'] = 'Regression'
        global_stdev = df['Decision'].std(ddof=0)

    if enableGBM == True:
        debug = False  #gbm needs rules files to iterate
        algorithm = 'Regression'
        config['algorithm'] = 'Regression'

    #-------------------------

    print(algorithm, " tree is going to be built...")

    dataset_features = dict(
    )  #initialize a dictionary. this is going to be used to check features numeric or nominal. numeric features should be transformed to nominal values based on scales.

    if (True):  #header of rules files
        header = "def findDecision("
        num_of_columns = df.shape[1] - 1
        for i in range(0, num_of_columns):
            if debug == True:
                if i > 0:
                    header = header + ","
                header = header + df.columns[i]

            column_name = df.columns[i]
            dataset_features[column_name] = df[column_name].dtypes

        if debug == False:
            header = header + "obj"

        header = header + "):\n"

        if debug == True:
            print(header, end='')

    #------------------------

    begin = time.time()

    if enableAdaboost == True:
        adaboost.apply(df, config, header, dataset_features)

    elif enableGBM == True:

        if df['Decision'].dtypes == 'object':  #transform classification problem to regression
            gbm.classifier(df, config, header, dataset_features)

        else:  #regression
            gbm.regressor(df, config, header, dataset_features)

    elif enableRandomForest == True:
        randomforest.apply(df, config, header, dataset_features)
    else:  #regular decision tree building

        root = 1
        file = "outputs/rules/rules.py"
        if debug == False: functions.createFile(file, header)
        Training.buildDecisionTree(df, root, file, config, dataset_features)

    print("finished in ", time.time() - begin, " seconds")
示例#27
0
mischievous_p = Perceptron(mischievous_weights, 3)
angry_p = Perceptron(angry_weights, 4)
perceptron_array = [happy_p, sad_p, mischievous_p, angry_p]

# The training starts.
percentage = 0
# Trains as long as the score is lower than 75%
while percentage < 0.8:
    print("Training...")
    total_training_result = []
    for i in range(len(image_array)):
        for j in range(len(perceptron_array)):
            # Calculates the output for every perceptron on every image, and trains it.
            perceptron_array[j].activate_1(image_array[i])

            session = Training(image_array[i], perceptron_array[j], facit[i])
            session.train()
        # Look which perceptron that was most active.
        winner = get_winner(perceptron_array)
        total_training_result.append(winner)

    # Calculates the percentage of correct answers.
    percentage = calc_points(total_training_result, facit)
    percentage = percentage / len(image_array)

    print("I got %.2f percent correct this training round." %
          (percentage * 100))
    time.sleep(1)

print("Let´s do the test!")
print("________________________________")
示例#28
0
def apply(df, config, header, dataset_features):

    models = []

    num_of_trees = config['num_of_trees']

    pbar = tqdm(range(0, num_of_trees), desc='Bagging')

    for i in pbar:
        #for i in range(0, num_of_trees):
        pbar.set_description("Sub decision tree %d is processing" % (i + 1))
        subset = df.sample(frac=1 / num_of_trees)

        root = 1

        moduleName = "outputs/rules/rule_" + str(i)
        file = moduleName + ".py"

        functions.createFile(file, header)

        Training.buildDecisionTree(subset, root, file, config,
                                   dataset_features)

        #--------------------------------

        fp, pathname, description = imp.find_module(moduleName)
        myrules = imp.load_module(moduleName, fp, pathname, description)
        models.append(myrules)

    #-------------------------------
    #check regression or classification
    if df['Decision'].dtypes == 'object': problem_type = 'classification'
    else: problem_type = 'regression'

    actual_values = df['Decision'].values
    num_of_features = df.shape[1] - 1  #discard Decision
    number_of_instances = df.shape[0]

    global_predictions = []

    #if classification get the max number of prediction
    if problem_type == 'classification':
        for i in range(0, num_of_trees):

            moduleName = "outputs/rules/rule_" + str(i)
            fp, pathname, description = imp.find_module(moduleName)
            myrules = imp.load_module(moduleName, fp, pathname, description)

            predictions = []

            for index, instance in df.iterrows():
                params = []
                for j in range(0, num_of_features):
                    params.append(instance[j])

                #index row, i th column
                prediction = myrules.findDecision(params)
                predictions.append(prediction)
                #print(i,"th tree prediction: ",prediction)

            #print(predictions)
            global_predictions.append(predictions)

        #-------------------------------
        classified = 0
        for index, instance in df.iterrows():

            actual = actual_values[index]
            predictions = []
            for i in range(0, num_of_trees):
                prediction = global_predictions[i][index]
                if prediction != None:  #why None exists in some cases?
                    predictions.append(prediction)

            predictions = np.array(predictions)
            unique_values = np.unique(predictions)

            if unique_values.shape[0] == 1:
                prediction = unique_values[0]
            else:
                counts = []
                for unique in unique_values:
                    count = 0
                    for j in predictions:
                        if unique == j:
                            count = count + 1
                    counts.append(count)

                #print("unique: ",unique_values)
                #print("counts: ",counts)

                prediction = None

                if len(counts) > 0:
                    max_index = np.argmax(np.array(counts))
                    prediction = unique_values[max_index]

            #print(index,". actual: ",actual," - prediction: ", prediction)
            if actual == prediction:
                classified = classified + 1

        print("Accuracy: ", 100 * classified / number_of_instances, "% on ",
              number_of_instances, " instances")

    return models
示例#29
0
    def collect_gesture(self, capture, ges, photo_num):
        photo_num = photo_num
        vedeo = False
        predict = False
        count = 0
        # 读取默认摄像头
        cap = cv2.VideoCapture(capture)
        # 设置捕捉模式
        cap.set(10, 200)
        # 背景减法创建及初始化
        bgModel = cv2.createBackgroundSubtractorMOG2(0, self.bgSubThreshold)

        while True:
            # 读取视频帧
            ret, frame = cap.read()
            # 镜像转换
            frame = cv2.flip(frame, 1)

            cv2.imshow('Original', frame)
            # 双边滤波
            frame = cv2.bilateralFilter(frame, 5, 50, 100)

            # 绘制矩形,第一个为左上角坐标(x,y),第二个为右下角坐标
            # rec = cv2.rectangle(frame, (220, 50), (450, 300), (255, 0, 0), 2)
            rec = cv2.rectangle(frame, (self.x1, self.y1), (self.x2, self.y2),
                                (255, 0, 0), 2)

            # 定义roi区域,第一个为y的取值,第2个为x的取值
            # frame = frame[50:300, 220:450]
            frame = frame[self.y1:self.y2, self.x1:self.x2]

            # 背景减法运动检测
            bg = bgModel.apply(frame, learningRate=0)
            # 显示背景减法的窗口
            cv2.imshow('bg', bg)
            # 图像边缘处理--腐蚀
            fgmask = cv2.erode(bg, self.skinkernel, iterations=1)
            # 显示边缘处理后的图像
            cv2.imshow('erode', fgmask)
            # 将原始图像与背景减法+腐蚀处理后的蒙版做"与"操作
            bitwise_and = cv2.bitwise_and(frame, frame, mask=fgmask)
            # 显示与操作后的图像
            cv2.imshow('bitwise_and', bitwise_and)
            # 灰度处理
            gray = cv2.cvtColor(bitwise_and, cv2.COLOR_BGR2GRAY)
            # 高斯滤波
            blur = cv2.GaussianBlur(gray, (self.blurValue, self.blurValue), 2)
            # cv2.imshow('GaussianBlur', blur)

            # 使用自适应阈值分割(adaptiveThreshold)
            thresh = cv2.adaptiveThreshold(blur, 255,
                                           cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                           cv2.THRESH_BINARY, 11, 2)
            cv2.imshow('th3', thresh)

            Ges = cv2.resize(thresh, (100, 100))
            # 图像的阈值处理(采用ostu)
            # _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
            # cv2.imshow('threshold1', thresh)

            if predict == True:

                # img = cv2.resize(thresh, (100, 100))
                img = np.array(Ges).reshape(-1, 100, 100, 1) / 255
                prediction = p_model.predict(img)
                final_prediction = [result.argmax()
                                    for result in prediction][0]
                ges_type = self.gesture[final_prediction]
                print(ges_type)
                cv2.putText(rec,
                            ges_type, (self.x1, self.y1),
                            fontFace=cv2.FONT_HERSHEY_COMPLEX,
                            fontScale=2,
                            thickness=3,
                            color=(0, 0, 255))
                # cv2.putText(rec, ges_type, (150, 220), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=1, thickness=3, color=(0, 0, 255))

            cv2.imshow('Original', rec)
            if vedeo is True and count < photo_num:
                # 录制训练集
                cv2.imencode(
                    '.jpg',
                    Ges)[1].tofile(self.train_path + '{}_{}.jpg'.format(
                        str(random.randrange(1000, 100000)), str(ges)))
                count += 1
                print(count)
            elif count == photo_num:
                print('{}张测试集手势录制完毕,3秒后录制此手势测试集,共{}张'.format(
                    photo_num, photo_num * 0.43 - 1))
                time.sleep(3)
                count += 1
            elif vedeo is True and photo_num < count < photo_num * 1.43:
                cv2.imencode(
                    '.jpg',
                    Ges)[1].tofile(self.predict_path + '{}_{}.jpg'.format(
                        str(random.randrange(1000, 100000)), str(ges)))
                count += 1
                print(count)
            elif vedeo is True and count == photo_num * 1.43:
                vedeo = False
                ges += 1
                print('此手势录制完成,按l录制下一个手势,按t结束录制并进行训练')

            k = cv2.waitKey(1)
            if k == 27:
                break

            elif k == ord('l'):  # 录制手势
                vedeo = True
                count = 0

            elif k == ord('p'):  # 预测手势
                predict = True
                while True:
                    model_name = input('请输入模型的名字\n')
                    if model_name == 'exit':
                        break
                    if model_name in os.listdir('./'):
                        print('正在加载{}模型'.format(model_name))
                        p_model = load_model(model_name)
                        break
                    else:
                        print('模型名字输入错误,请重新输入,或输入exit退出')

            elif k == ord('r'):
                bgModel = cv2.createBackgroundSubtractorMOG2(
                    0, self.bgSubThreshold)
                print('背景重置完成')

            elif k == ord('t'):
                os.environ["CUDA_VISIBLE_DEVICES"] = "0"
                train = Training(batch_size=32,
                                 epochs=5,
                                 categories=len(self.gesture),
                                 train_folder=self.train_path,
                                 test_folder=self.predict_path,
                                 model_name=p_model)
                train.train()
                backend.clear_session()
示例#30
0
def processContinuousFeatures(algorithm, df, column_name, entropy, config):
	unique_values = sorted(df[column_name].unique())
	#print(column_name,"->",unique_values)
	
	subset_gainratios = []; subset_gains = []; subset_ginis = []; subset_red_stdevs = []
	
	for i in range(0, len(unique_values)-1):
		threshold = unique_values[i]
		
		subset1 = df[df[column_name] <= threshold]
		subset2 = df[df[column_name] > threshold]
		
		subset1_rows = subset1.shape[0]; subset2_rows = subset2.shape[0]
		total_instances = df.shape[0] #subset1_rows+subset2_rows
		
		subset1_probability = subset1_rows / total_instances
		subset2_probability = subset2_rows / total_instances
		
		if algorithm == 'ID3' or algorithm == 'C4.5':
			threshold_gain = entropy - subset1_probability*Training.calculateEntropy(subset1, config) - subset2_probability*Training.calculateEntropy(subset2, config)
			subset_gains.append(threshold_gain)
		
		if algorithm == 'C4.5': #C4.5 also need gain in the block above. That's why, instead of else if we used direct if condition here
			threshold_splitinfo = -subset1_probability * math.log(subset1_probability, 2)-subset2_probability*math.log(subset2_probability, 2)
			gainratio = threshold_gain / threshold_splitinfo
			subset_gainratios.append(gainratio)
				
		elif algorithm == 'CART':
			decision_for_subset1 = subset1['Decision'].value_counts().tolist()
			decision_for_subset2 = subset2['Decision'].value_counts().tolist()
			
			gini_subset1 = 1; gini_subset2 = 1
			
			for j in range(0, len(decision_for_subset1)):
				gini_subset1 = gini_subset1 - math.pow((decision_for_subset1[j]/subset1_rows),2)
			
			for j in range(0, len(decision_for_subset2)):
				gini_subset2 = gini_subset2 - math.pow((decision_for_subset2[j]/subset2_rows),2)
			
			gini = (subset1_rows/total_instances)*gini_subset1 + (subset2_rows/total_instances) * gini_subset2
			
			subset_ginis.append(gini)
		
		#----------------------------------
		elif algorithm == 'Regression':
			superset_stdev = df['Decision'].std(ddof=0)
			subset1_stdev = subset1['Decision'].std(ddof=0)
			subset2_stdev = subset2['Decision'].std(ddof=0)
			
			threshold_weighted_stdev = (subset1_rows/total_instances)*subset1_stdev + (subset2_rows/total_instances)*subset2_stdev
			threshold_reducted_stdev = superset_stdev - threshold_weighted_stdev
			subset_red_stdevs.append(threshold_reducted_stdev)
			
		#----------------------------------
	
	if algorithm == "C4.5":
		winner_one = subset_gainratios.index(max(subset_gainratios))
	elif algorithm == "ID3": #actually, ID3 does not support for continuous features but we can still do it
		winner_one = subset_gains.index(max(subset_gains))
	elif algorithm == "CART":
		winner_one = subset_ginis.index(min(subset_ginis))
	elif algorithm == "Regression":
		winner_one = subset_red_stdevs.index(max(subset_red_stdevs))
		
	winner_threshold = unique_values[winner_one]
	
	#print("theshold is ",winner_threshold," for ",column_name)
	df[column_name] = np.where(df[column_name] <= winner_threshold, "<="+str(winner_threshold), ">"+str(winner_threshold))
	
	return df
示例#31
0

def get_params():
    checkpoint_dir = '/Users/Nolsigan/PycharmProjects/rlntm-tensorflow/checkpoints'
    max_length = 6
    rnn_cell = rnn.BasicLSTMCell
    rnn_hidden = 128
    learning_rate = 0.003
    optimizer = tf.train.AdamOptimizer()
    gradient_clipping = 5
    batch_size = 100
    epochs = 30
    epoch_size = 100
    num_symbols = 10
    dup_factor = 2
    mem_dim = 128
    mem_move_table = [-1, 0, 1]
    in_move_table = [-1, 0, 1]
    out_move_table = [0, 1]
    return AttrDict(**locals())


mode = sys.argv[1]

if mode == '--train':
    Training(get_params())()
elif mode == '--test':
    Testing(get_params())()
else:
    print('no mode specified, please use --train or --test as first argument')
示例#32
0
config['epochs'] = 2
config['lr'] = 1e-5
config['accumulated'] = 2
fine_tuning = False

zalo = ZaloDatasetProcessor()
zalo.load_from_path(dataset_path='dataset',
                    train_filename='combine.json',
                    test_filename='test.json',
                    dev_filename='dev.json')
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
features_train = zalo.convert_examples_to_features(zalo.train_data,
                                                   zalo.label_list, 256,
                                                   tokenizer)
features_test = zalo.convert_examples_to_features(zalo.test_data,
                                                  zalo.label_list, 256,
                                                  tokenizer)
features_dev = zalo.convert_examples_to_features(zalo.dev_data,
                                                 zalo.label_list, 256,
                                                 tokenizer)
if __name__ == "__main__":

    NUM_OF_INTENT = 2
    config_model = BertConfig.from_pretrained('bert-base-multilingual-cased',
                                              output_hidden_states=True)
    model = QAModel(config_model, NUM_OF_INTENT)
    if fine_tuning:
        model.load_state_dict(torch.load('models/model-squad1.bin'))
    training = Training(features_train, features_dev, model, logger,
                        zalo.label_list, config)
    training.train()