def eval_model(cfg):
    cnn = Classifier(input_shape=cfg["shape"])
    cnn.set_default_AlexNet_Model()

    # if cfg["pre_trained"] is not None:
    #     dir_model = glob(os.path.join(st.DIR_LOG, cfg["log"], "*" + cfg["pre_trained"] + "*"))
    #     assert len(dir_model) == 1, "the len of chekpoint list is {}".format(len(dir_model))
    #     cnn.model.load_weights(os.path.join(dir_model[0], "cp-model.ckpt"))

    assert cfg["pre_trained"] is not None
    dir_model = glob(
        os.path.join(st.DIR_LOG, cfg["log"], "*" + cfg["pre_trained"] + "*"))
    assert len(dir_model) == 1, "the len of chekpoint list is {}".format(
        len(dir_model))
    model_file = os.path.join(dir_model[0], "cp-model.ckpt")
    # model_file = os.path.join(dir_model[0], "'model-086-1.000000.ckpt")
    cnn.model.load_weights(model_file)

    dt = Data()
    sum_pred = np.zeros(6, )
    lenght_data = np.zeros(6, )
    for idx in range(dt.length['train']):
        image_file, label_gt = dt.get_image_file('train',
                                                 idx,
                                                 encode_label=True)
        label_est = cnn.model_predict(image_file, cfg["dip"])
        lenght_data[label_gt] += 1
        if label_est == label_gt:
            sum_pred[label_gt] += 1

        acc = sum_pred / lenght_data
        print("{} - Acc:  {}".format((label_gt, label_est), acc))
Exemplo n.º 2
0
    def run_models(self):
        """run all the models defined in confi file
            Save model in pkl format,
            Save scores in csv files.
        
        Params:
            CONFIG (dict): configurations of task,
            SEARCH_SPACE (dict): hyperparameters of models,
            df (pandas dataframe): data frame read from raw data,
            model: Classifier instance,
            all_metrics (list): list to save all model training results.
        """

        CONFIG = self.load_config()
        DATA_PATH = self.DATA_PATH
        SEARCH_SPACE = self.SEARCH_SPACE
        df = self.load_data()
        model = Classifier(CONFIG, SEARCH_SPACE, df)
        all_metrics = []

        for model_name in self.CONFIG['RUN_MODELS']:
            model_name, grid_cv, best_model, X_test, y_test = model.train_model(
                model_name)
            metrics = model.gen_metrics(model_name, grid_cv, best_model,
                                        X_test, y_test)
            all_metrics.append(metrics)

            with open(
                    f'{self.CONFIG["MODEL_DIR"]}/{model_name}_best_model.pkl',
                    'wb') as f:
                pickle.dump(best_model, f)

        df_all_metrics = pd.concat(all_metrics)
        df_all_metrics.to_csv(
            f'{self.CONFIG["REPORT_DIR"]}/models_metrics.csv', index=False)
Exemplo n.º 3
0
def eval_model(cfg):
    cnn = Classifier(input_shape=cfg["shape"])
    cnn.set_default_AlexNet_Model()

    assert cfg["pre_trained"] is not None
    dir_model = glob(
        os.path.join(st.DIR_LOG, cfg["log"], "*" + cfg["pre_trained"] + "*"))
    assert len(dir_model) == 1, "the len of chekpoint list is {}".format(
        len(dir_model))
    model_file = os.path.join(dir_model[0], "cp-model.ckpt")
    # model_file = os.path.join(dir_model[0], "'model-086-1.000000.ckpt")
    cnn.model.load_weights(model_file)

    dt = red_csv_file()
    filename = os.path.join(
        st.DATA_DIR, "eval_test_data_{}_{}.csv".format(cfg["pre_trained"],
                                                       cfg["extra"]))
    print(filename)
    with open(filename, '+w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(("ID", "Label"))
    for idx in range(len(dt)):
        image_file = os.path.join(st.DATA_TEST_DIR, dt[idx])
        assert os.path.isfile(image_file)
        label_est = cnn.model_predict(image_file, cfg["dip"])

        with open(filename, 'a') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow((dt[idx], label_est))

        print("{0:} - progress:  {1:.1f}".format(idx, 100 * idx / len(dt)))
Exemplo n.º 4
0
def parte_a(train, test, numeric_attributes=IRIS_NUMERIC_ATTRIBUTES):
    classifier = Classifier(ID3(train, numeric_attributes))
    classifier.train()
    actual = []
    predicted = []
    for _, elem in test.iterrows():
        actual.append(elem.clazz)
        predicted.append(
            classifier.classify(elem.drop(columns=['clazz'], axis=1)))
    output_results(title='PARTE A', actual=actual, predicted=predicted)
Exemplo n.º 5
0
def run_train():
    parser = ArgumentParser()
    parser.add_argument('--settings', type=str, required=True,
                        help='Path to the training settings ini file')

    settings = configparser.ConfigParser()
    settings.read(parser.parse_args().settings)

    # create model
    predictor = ResNet50Layers(None)
    model = Classifier(predictor)

    # use selected gpu by id
    gpu = settings.getint('hardware', 'gpu')
    if gpu >= 0:
        chainer.cuda.get_device_from_id(gpu).use()
        model.to_gpu()

    label_handler, train_dataset, val_dataset = _create_datasets(settings['input_data'])

    train_iter = chainer.iterators.SerialIterator(train_dataset, settings.getint('trainer', 'batchsize'))
    val_iter = chainer.iterators.SerialIterator(val_dataset, settings.getint('trainer', 'batchsize'), repeat=False)

    output_dir = '{}/training_{}_{}'.format(settings.get('output_data', 'path'), settings.get('trainer', 'epochs'), settings.get('optimizer', 'optimizer'))

    # optimizer
    optimizer = _create_optimizer(settings['optimizer'])
    optimizer.setup(model)

    # trainer
    updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=gpu)
    trainer = chainer.training.Trainer(updater, (settings.getint('trainer', 'epochs'), 'epoch'), output_dir)

    trainer.extend(extensions.LogReport())
    trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1))
    evaluator = Evaluator(val_iter, model, device=gpu)
    trainer.extend(evaluator)
    trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png'))
    trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png'))

    trainer.run()

    # save model
    output_file_path = '{0}/resnet.model'.format(output_dir)
    chainer.serializers.save_npz(output_file_path, predictor)

    meta_output = {
        'trainer': settings._sections['trainer'],
        'optimizer': settings._sections['optimizer'],
        'train_data': train_dataset.get_meta_info(label_handler),
        'validation_data': val_dataset.get_meta_info(label_handler),
    }

    with open('{0}/meta.json'.format(output_dir), 'w') as f:
        json.dump(meta_output, f, indent=4)
Exemplo n.º 6
0
def main():
    directory = ''
    match = False
    inject = False
    db_name = 'certs.db'
    output = False
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hd:midb:-o',
                                   ['help', 'directory=', 'match', 'inject',
                                    'database=', '--output'])
    except:
        usage()
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            print('printing hep')
            usage()
        elif opt in ('-d', '--directory'):
            directory = arg
            if(not directory.endswith('/')):
                directory += '/'
        elif opt in ('-m', '--match'):
            match = True
        elif opt in ('-i', '--inject'):
            inject = True
        elif opt in ('-b', '--database'):
            db = arg
        elif opt in ('-o', '--output'):
            output = True
            outputfile = arg
            if(outputfile == ''):
                outputfile = 'matches.txt'
    if directory:
        print("Grabbing data...")
        grabber = Grabber(directory)
        classifier = Classifier(grabber)
        print("Classifying...")
        classifier.classify()
        certs, keys = classifier.get_data()
    db = Database(db_name)
    if inject:
        try:
            print("Creating the database")
            db.create_db()
        except:
            print("Database already exists.")
        print("Injecting data into the database...")
        db.insert_keys(keys)
        db.insert_certs(certs)
    if match:
        print("Matching data...")
        db.match_cert_key()
    if output:
        db.export_matches(outputfile)
Exemplo n.º 7
0
def parte_b(train, test, numeric_attributes=IRIS_NUMERIC_ATTRIBUTES):
    classifiers = []
    classes = train.clazz.unique()
    idx = 1
    for clazz in classes:
        classifier = Classifier(
            ID3(train, numeric_attributes, specific_class=clazz))
        classifier.train()
        classifiers.append(classifier)
        idx += 1

    actual = []
    predicted = []
    for _, elem in test.iterrows():
        actual.append(elem.clazz)
        predicted.append(
            vote_classify(classifiers, elem.drop(columns=['clazz'], axis=1)))
    output_results(title='PARTE B', actual=actual, predicted=predicted)
Exemplo n.º 8
0
    def __init__(self):
        # Classifier
        self.c = Classifier()
        self.c.load_classifier(TRAINING_SET)
        self.code = ''

        # Correios tracker
        self.t = Tracker()

        # Answers
        with open(GREETINGS_SET) as f:
            self.greetings_responses = f.readlines()
        with open(COMPLAIN_SET) as f:
            self.complain_responses = f.readlines()
        with open(COMPLAIN_SET) as f:
            self.complain_responses = f.readlines()
        with open(HELP_SET) as f:
            self.help_responses = f.readlines()
        with open(UNKNOWN_SET) as f:
            self.unknown_responses = f.readlines()
        with open(WRONG_CODE_SET) as f:
            self.wrong_code_responses = f.readlines()
        with open(TRACKING_SET) as f:
            self.tracking_responses = f.readlines()
        with open(QUIT_SET) as f:
            self.quit_responses = f.readlines()
        with open(GOODBYE_SET) as f:
            self.goodbye_responses = f.readlines()
        with open(RESULTS_SET) as f:
            results_responses = f.readlines()
            self.offline_response = results_responses[0]
            self.fail_response = results_responses[1]
            self.sent_response = results_responses[2]
            self.forwarded_response = results_responses[3]
            self.delivering_response = results_responses[4]
            self.arrived_response = results_responses[5]

        # If it is expecting something
        self.waiting_for = ''
        self.attempts = 0
Exemplo n.º 9
0
dataset = [x for x in dataset if len(x[0].split()) > 0]

dataset = list(set(dataset))

classifiers = [(SVC(kernel='rbf', C=2.9, gamma=1), 'svm_rbf'),
               (SVC(kernel='linear'), 'svm_linear')]
               # (KNeighborsClassifier(), 'knn'),
               # (MultinomialNB(), 'naive_bayes'),
               # (Perceptron(), 'perceptron')]

vectorizers = [(TfidfVectorizer(min_df=0.0, max_df=1.0, sublinear_tf=True, use_idf=True), 'tfidf')]
               # (CountVectorizer(min_df=0.0, max_df=1.0), 'count'),
               # (HashingVectorizer(), 'hash')]


c = Classifier(classifier=classifiers[1][0], vectorizer=vectorizers[0][0])
x = list(map(lambda a: a[0], dataset))
y = list(map(lambda a: a[1], dataset))

c.train(x_train=x, y_train=y)

from sklearn.pipeline import make_pipeline
import eli5

pipe = make_pipeline(vectorizers[0][0], classifiers[1][0])
pipe.fit(x, y)


file = open('C:/Users/Gustavo/Desktop/batata.html', 'wb')
file.write(eli5.show_weights(classifiers[1][0], vec=vectorizers[0][0], top=1000).data.encode('utf8'))
file.close()
Exemplo n.º 10
0
    # cv2.waitKey(0)
    return image


video_capture = cv2.VideoCapture(0)
font = cv2.FONT_HERSHEY_SIMPLEX

feelings_faces = []
for index, emotion in enumerate(EMOTIONS):
    feelings_faces.append(cv2.imread('../emojis/' + emotion + '.png', -1))

while True:
    # Capture frame-by-frame
    ret, frame = video_capture.read()
    # Predict result with network
    result = Classifier(format_image(frame)).data[0]

    # Draw face in frame
    # for (x,y,w,h) in faces:
    #   cv2.rectangle(frame, (x,y), (x+w,y+h), (255,0,0), 2)

    # Write results in frame
    if result is not None:
        for index, emotion in enumerate(EMOTIONS):
            cv2.putText(frame, emotion, (10, index * 20 + 20),
                        cv2.FONT_HERSHEY_PLAIN, 0.5, (0, 255, 0), 1)
            cv2.rectangle(frame, (130, index * 20 + 10),
                          (130 + int(result[index] * 100),
                           (index + 1) * 20 + 4), (255, 0, 0), -1)

        face_image = feelings_faces[np.argmin(result)]
Exemplo n.º 11
0
from src.stockProcessor import StockProcessor
from src.models import Models
from src.newsProcessor import NewsProcessor
from src.classifier import Classifier

if __name__ == '__main__':
    print('load Stock chart data')
    base_file_path = sys.argv[1]
    stockCharts = StockProcessor(base_file_path)
    print('Input stock price interval in minutes - 5, 15, 30, 60, 240, 1440')
    time_interval = input()
    amazon_stock_prices, apple_stock_prices = stockCharts.loadDataForInterval(time_interval)
     For training only
     document_vectors_amazon, document_vectors_apple = NewsProcessor(base_file_path).loadNewsArticles()
     # AMAZON
     classify = Classifier(base_file_path, 'amazon', time_interval, amazon_stock_prices, document_vectors_amazon)
     classify.label_documents()
     # APPLE
     classify = Classifier(base_file_path, 'apple', time_interval, apple_stock_prices, document_vectors_apple)
     classify.label_documents()
    amazon_model = Models(base_file_path, 'amazon', amazon_stock_prices, time_interval)
    amazon_model.naive_bayes_classifier()
    amazon_model.SVM_classifier()
    amazon_model.DT_classifier()
    amazon_model.SVM_poly_classifier()
    amazon_model.Logistic_Regression11_classifier()
    amazon_model.Logistic_Regression12_classifier()
    amazon_model.KNN_classifier()
    amazon_model.SGDC_classifier()
    amazon_model.accounting_factor()
Exemplo n.º 12
0
import os

import cv2 as cv
from os.path import join

from src.classifier import Classifier
from src.inference import draw_bboxes_with_classes
from src.preprocessing import prepare_image, extract_detection, IMG_WIDTH_SIZE
from util.constants import TEST_IMG_PATH, ORB, YOLO_CONFIDENCE, NMS_THRESHOLD
from src.object_detector import Yolo

TEMPLATE_IMGS = ["1uah.jpg", "2uah.jpg", "5uah.jpg", "10uah.jpg"]

if __name__ == '__main__':
    for test_image in os.listdir(join(TEST_IMG_PATH, "5uah_heads")):
        yolo = Yolo(confidence=YOLO_CONFIDENCE, nms_threshold=NMS_THRESHOLD)
        yolo.load_model()
        image = cv.imread(join(TEST_IMG_PATH, "5uah_heads", test_image))
        prepared_image = prepare_image(image.copy())
        img_for_viz = image.copy()
        # prepared_image = image
        yolo.load_data(prepared_image)
        layer_outputs = yolo.detect()
        bboxes = yolo.process_outputs(layer_outputs)
        clf = Classifier(ORB)
        draw_bboxes_with_classes(img_for_viz, bboxes, clf)
        cv.imshow("Detections", img_for_viz)
        cv.waitKey(0)
    cv.destroyAllWindows()
Exemplo n.º 13
0
    return image


if __name__ == '__main__':
    parser = _parse_args()
    args = parser.parse_args()

    if args.image is None and args.video is None:
        raise ValueError(
            "Neither image nor video arguments are provided! Please,"
            " specify either the path to an image or video or both.")

    # TODO: supply nms threshold and confidence for YOLO as script arguments
    object_detector = Yolo(args.object_detector)
    classifier = Classifier(args.feature_detector, args.feature_descriptor,
                            args.feature_matcher,
                            args.outlier_rejection_method)
    if args.image:
        if not isfile(args.image):
            raise ValueError(
                "The provided path to an image does not exist! "
                "Please, provide a path relative to the project:) "
                "The directory with test images is test_images/")

        image = cv.imread(args.image)
        # Image resizing is not necessary,
        # OpenCV's YOLO interface takes care of that.
        prepared_image = prepare_image(image.copy(), resize=False)
        img_for_viz = image.copy()

        object_detector.load_data(prepared_image)
Exemplo n.º 14
0
class Chatbot:

    # Regular expressions for parsing responses
    RE_CODE = re.compile(r'.*(\w\w\d{9}\w\w)')
    RE_SENT = re.compile(r'.*postado.*')
    RE_FORWARDED = re.compile(r'.*encaminhado.*')
    RE_DELIVERING = re.compile(r'.*saiu.*')
    RE_ARRIVED = re.compile(r'.*entregue.*')

    # Retries when waiting for an action
    MAX_ATTEMPTS = 2

    def __init__(self):
        # Classifier
        self.c = Classifier()
        self.c.load_classifier(TRAINING_SET)
        self.code = ''

        # Correios tracker
        self.t = Tracker()

        # Answers
        with open(GREETINGS_SET) as f:
            self.greetings_responses = f.readlines()
        with open(COMPLAIN_SET) as f:
            self.complain_responses = f.readlines()
        with open(COMPLAIN_SET) as f:
            self.complain_responses = f.readlines()
        with open(HELP_SET) as f:
            self.help_responses = f.readlines()
        with open(UNKNOWN_SET) as f:
            self.unknown_responses = f.readlines()
        with open(WRONG_CODE_SET) as f:
            self.wrong_code_responses = f.readlines()
        with open(TRACKING_SET) as f:
            self.tracking_responses = f.readlines()
        with open(QUIT_SET) as f:
            self.quit_responses = f.readlines()
        with open(GOODBYE_SET) as f:
            self.goodbye_responses = f.readlines()
        with open(RESULTS_SET) as f:
            results_responses = f.readlines()
            self.offline_response = results_responses[0]
            self.fail_response = results_responses[1]
            self.sent_response = results_responses[2]
            self.forwarded_response = results_responses[3]
            self.delivering_response = results_responses[4]
            self.arrived_response = results_responses[5]

        # If it is expecting something
        self.waiting_for = ''
        self.attempts = 0

    # Get text intent
    def get_intent(self, text):
        return self.c.classify(text)[0]

    # Return a random response from a set of responses
    def random_response(self, responses_set):
        return responses_set[random.randrange(len(responses_set))]

    # Return a response with formatted results
    def format_responses(self, code):
        info = self.t.track_latest(code)
        answer = ''
        status = ''

        if info['return'] == 'request_failed':
            answer = self.offline_response
        elif info['return'] == 'failure':
            answer = self.fail_response
        elif self.RE_SENT.match(info['status']):
            answer = self.sent_response
            answer = answer.format(code, info['when'])
        elif self.RE_FORWARDED.match(info['status']):
            answer = self.forwarded_response
            answer = answer.format(info['from'], info['to'])
        elif self.RE_DELIVERING.match(info['status']):
            answer = self.delivering_response
            answer = answer.format(info['where'])
        elif self.RE_ARRIVED.match(info['status']):
            answer = self.arrived_response
        return answer

    # Test if received a tracking code and respond
    def code_responses(self, text):
        # Respond to code input
        response = self.random_response(self.unknown_responses)

        # Try to parse a code
        test = self.RE_CODE.match(text)
        if test:
            # Return the results
            self.attempts = 0
            self.waiting_for = ''
            response = self.format_responses(test.group(0))
        else:
            # If failed respond
            if self.attempts == self.MAX_ATTEMPTS:
                # Quit after MAX_ATTEMPTS
                self.attempts = 0
                self.waiting_for = ''
                response = self.random_response(self.quit_responses)
            else:
                # Else, retry
                self.attempts = self.attempts + 1
                response = self.random_response(self.wrong_code_responses)
        return response

    # Get a response according to an intent
    def get_response(self, text):
        # Test for tracking code
        if self.waiting_for == 'code':
            return self.code_responses(text)

        # Else, get responses to chat
        intent = self.get_intent(text)
        if intent == 'T':
            self.waiting_for = 'code'
        responses = {
            'G': self.greetings_responses,
            'H': self.help_responses,
            'T': self.tracking_responses,
            'C': self.complain_responses,
            'U': self.unknown_responses,
            'B': self.goodbye_responses
        }.get(intent, 'U')
        return self.random_response(responses)
Exemplo n.º 15
0
import os
import sys
from src.classifier import Classifier
import tensorflow as tf

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


def print_usage_and_exit():
    print("Usage: " + sys.argv[0] +
          " <fileClass1> <fileClass2> <corpusDir> <modelFileName>")
    sys.exit(0)


if __name__ == '__main__':
    if len(sys.argv) != 5:
        print_usage_and_exit()
    classifier = Classifier(class1_file=sys.argv[1], class2_file=sys.argv[2])
    classifier.train(sys.argv[3], sys.argv[4])
    # in a well-defined initial state.
    np.random.seed(17)
    # The below is necessary for starting core Python generated random numbers
    # in a well-defined state.
    rn.seed(12345)


if __name__ == "__main__":
    set_reproductible()
    datadir = "../data/"
    trainfile = datadir + "traindata.csv"
    devfile = datadir + "devdata.csv"
    testfile = None
    # Basic checking
    start_time = time.perf_counter()
    classifier = Classifier()
    print("\n")
    # Training
    print("1. Training the classifier...\n")
    classifier.train(trainfile)
    # Evaluation on the dev dataset
    print("\n2. Evaluation on the dev dataset...\n")
    slabels = classifier.predict(devfile)
    glabels = load_label_output(devfile)
    eval_list(glabels, slabels)
    if testfile is not None:
        # Evaluation on the test data
        print("\n3. Evaluation on the test dataset...\n")
        slabels = classifier.predict(testfile)
        glabels = load_label_output(testfile)
        eval_list(glabels, slabels)
Exemplo n.º 17
0
def run(save_loc="mlp/ANN", n_hid=2, loss=Loss.CCE, activation=nn.ReLU):

    print("\n----- Running {} -----".format(os.path.basename(__file__)))

    ####################################################
    # Configure datasets.
    ####################################################

    dataset = Dataset.MNIST

    if dataset != Dataset.MNIST:
        save_loc += "_{}".format(str(dataset).split(".")[-1])

    batch_size_train = 64
    batch_size_test = 1000

    input_scaling = 1

    ####################################################
    # Configure Networks.
    ####################################################

    if loss == Loss.MSE:
        output = None
        loss_str = "mse"
    elif loss == Loss.CCE:
        output = lambda: nn.LogSoftmax(-1)
        loss_str = "nll"
    else:
        raise ValueError("Unrecognised loss :", loss)

    default_net_args = {
        'n_hid': [128] * n_hid,
        'n_in': 784,
        'n_out': 10 if dataset != Dataset.EMNIST else 47,
        'activation': activation,
        'output': output,
    }

    ####################################################
    # Train classifiers
    ####################################################

    n_seeds = 5

    losses = {}
    corrects = {}
    valid_scores = {}

    for i in range(n_seeds):

        lab = 'seed{}'.format(i)

        network = LinNet(**default_net_args)

        train_loader, test_loader, validation_loader = get_dataset_loaders(
            dataset=dataset,
            train_batch=batch_size_train,
            test_batch=batch_size_test,
            unroll_img=True,
            max_value=input_scaling,
            get_validation=True)

        classifier = Classifier(network,
                                train_loader,
                                test_loader,
                                n_epochs=50,
                                learning_rate=5e-4,
                                init_weight_mean=0,
                                init_weight_std=0.1,
                                loss=loss_str,
                                weight_range=None,
                                weight_normalisation=weight_norm.NONE,
                                log_interval=25,
                                n_test_per_epoch=0,
                                save_path=os.path.join(save_loc, lab))

        train_losses, test_correct = classifier.train()

        losses[lab] = train_losses
        corrects[lab] = test_correct

        ####################################################
        # Validation
        ####################################################

        classifier.load(classifier.network_save_path)

        valid_loss, valid_correct = classifier.validate(validation_loader)

        print("Validation accuracy : {:.2f}%".format(
            100. * valid_correct / len(validation_loader.dataset)))
        valid_scores[lab] = 100. * valid_correct / len(
            validation_loader.dataset)

        validation_save_path = os.path.join(classifier.save_path,
                                            "validation_score.pkl")
        with open(validation_save_path, 'wb+') as output:
            pickle.dump(np.array([valid_loss, valid_correct]), output,
                        pickle.HIGHEST_PROTOCOL)
            print('Validation scores saved to {}'.format(validation_save_path))

    print("Validation scores are:")
    for lab, score in valid_scores.items():
        print("\t{} : {:.2f}%".format(lab, score))

    ####################################################
    # Plot results
    ####################################################

    fig_fname = os.path.join(save_loc, "training_performance")

    with plt.style.context('seaborn-paper', after_reset=True):

        fig, (ax1, ax2) = plt.subplots(1,
                                       2,
                                       figsize=(7, 2.5),
                                       gridspec_kw={'wspace': 0.3})

        window = 25
        avg_mask = np.ones(window) / window

        for lab, data in losses.items():
            ax1.plot(np.convolve(data[:, 0], avg_mask, 'valid'),
                     np.convolve(data[:, 1], avg_mask, 'valid'),
                     label=lab,
                     linewidth=0.75,
                     alpha=0.8)
        ax1.legend()
        ax1.set_xlabel("Epoch")
        ax1.set_ylabel("Losses")

        for lab, data in corrects.items():
            ax2.plot(data[:, 0],
                     data[:, 1] / len(test_loader.dataset),
                     label=lab)
            print("{}: Best score {}/{}".format(lab, np.max(data),
                                                len(test_loader)))
        ax2.legend()
        ax2.set_xlabel("Epoch")
        ax2.set_ylabel("Accuracy")

        plt.savefig(fig_fname + ".png", bbox_inches='tight')
        plt.savefig(fig_fname + ".pdf", bbox_inches='tight')
Exemplo n.º 18
0
    set_reproducible()
    n_runs = 5
    if len(sys.argv) > 1:
        n_runs = int(sys.argv[1])
    datadir = "../data/"
    trainfile =  datadir + "traindata.csv"
    devfile =  datadir + "devdata.csv"
    testfile = None
    # testfile = datadir + "testdata.csv"

    # Runs
    start_time = time.perf_counter()
    devaccs = []
    testaccs = []
    for i in range(1, n_runs+1):
        classifier =  Classifier()
        devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i)
        devaccs.append(np.round(devacc,2))
        testaccs.append(np.round(testacc,2))
    print('\nCompleted %d runs.' % n_runs)
    total_exec_time = (time.perf_counter() - start_time)
    print("Dev accs:", devaccs)
    print("Test accs:", testaccs)
    print()
    print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
    print("Mean Test Acc.: %.2f (%.2f)" % (np.mean(testaccs), np.std(testaccs)))
    print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))



Exemplo n.º 19
0
 def __init__(self, file_name: str, features: list):
     self.classifier = Classifier(file_name, filter=False)
     self.plotter = Plotter(file_name, features, self.house_array_function)
     self.plotter.plot_scatter()
Exemplo n.º 20
0
from os.path import join

import cv2 as cv

from src.classifier import Classifier
from src.detector_descriptor import DetectorDescriptor
from util.constants import DATA_PATH, TEST_IMG_PATH, ORB

if __name__ == '__main__':
    template_img = cv.imread(join(DATA_PATH, "1uah_heads.jpg"))
    test_img = cv.imread(join(TEST_IMG_PATH, "1uah_heads", "top.jpeg"))

    detector_descriptor = DetectorDescriptor(ORB, ORB)

    template_keypoints, template_descriptions = detector_descriptor.detect_describe(
        template_img)
    test_keypoints, test_descriptions = detector_descriptor.detect_describe(
        test_img)

    bf_matcher = cv.BFMatcher_create(cv.NORM_HAMMING, True)

    matches = bf_matcher.match(template_descriptions, test_descriptions)

    clf = Classifier()

    print(len(matches))

    best_matches = clf.ransac_outlier_rejection(template_keypoints,
                                                test_keypoints, matches)

    print(len(best_matches))
def train(cfg):
    cnn = Classifier(input_shape=cfg["shape"], batch_size=cfg["batch"])
    if cfg["model"] == "AlexNet":
        cnn.set_default_AlexNet_Model()
    else:
        arch = cfg["arch"]
        cnn.set_custom_model(conv_layers=arch["conv"],
                             dense_layers=arch["dense"])
    if cfg["random"]:
        cnn.random_boost = True

    if cfg["pre_trained"] is not None:
        dir_model = glob(
            os.path.join(st.DIR_LOG, "RUNNING",
                         "*" + cfg["pre_trained"] + "*"))
        assert len(dir_model) == 1
        # model_file = os.path.join(dir_model[0], "'model-086-1.000000.ckpt")
        # cnn.model.load_weights(model_file)
        cnn.model.load_weights(os.path.join(dir_model[0], "cp-model.ckpt"))

    val_data = cnn.get_data_generator(st.DATA_VALIDATION_DIR,
                                      dip_filter=cfg["dip"])
    train_data = cnn.get_data_generator(st.DATA_TRAIN_DIR,
                                        dip_filter=cfg["dip"])
    cnn.set_log_name(cfg)
    cnn.model_select = cfg["msk"]
    cnn.lr["lr"] = cfg["lr"]
    cnn.lr["decay_steps"] = cfg["dc_st"]
    cnn.lr["decay_rate"] = cfg["dc"]
    cnn.lr["momentum"] = cfg["mt"]

    cnn.callbacks.append(
        TensorBoard(log_dir=os.path.join(st.DIR_LOG, "RUNNING", cnn.log_name)))
    cnn.train(gen_train=train_data, gen_val=val_data, epochs=cfg["epochs"])
Exemplo n.º 22
0
 def __init__(self, file_name: str):
     self.classifier = Classifier(file_name)
     self.plot_pair()
Exemplo n.º 23
0
def run():
    start_time = time()
    data_cand, data_part, full_data = load_data()
    # numeric_parties  = full_data.party.map(party_map)
    train_c, test_c = train_test_split(data_cand, test_size=0.2)
    train_p, test_p = train_test_split(data_part, test_size=0.2)
    candidatos_clf = Classifier(train_c.drop('candidatoId', axis=1), train_c.candidatoId)
    partidos_clf = Classifier(train_p.drop('idPartido', axis=1), train_p.idPartido)

    cand_solver = candidatos_clf._predict()
    n_cand, pca_cand_solver = candidatos_clf._pca()
    part_solver = partidos_clf._predict()
    n_part, pca_part_solver = partidos_clf._pca()

    cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, cand_solver)
    pca_cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, pca_cand_solver, n_cand)
    part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, part_solver)
    pca_part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, pca_part_solver, n_part)

    output_results(f'CANDIDATOS | {cand_solver}', test_c.candidatoId, cand_pred)
    output_results(f'CANDIDATOS_PCA | {pca_cand_solver}, {n_cand}', test_c.candidatoId, pca_cand_pred)
    output_results(f'PARTIDOS | {part_solver}', test_p.idPartido, part_pred)
    output_results(f'PARTIDOS_PCA | {pca_part_solver}, {n_part}', test_p.idPartido, pca_part_pred)
    cand_part_target, cand_part_pred = candidato_mapper(test_c.candidatoId, cand_pred)
    output_results(f'PARTIDOS CON CANDIDATO | {cand_solver}', cand_part_target, cand_part_pred)

    cm_cand = ConfusionMatrix(test_c.candidatoId, cand_pred)
    cm_pca_cand = ConfusionMatrix(test_c.candidatoId, pca_cand_pred)
    cm_part = ConfusionMatrix(test_p.idPartido, part_pred)
    cm_pca_part = ConfusionMatrix(test_p.idPartido, pca_part_pred)
    cm_cand_part = ConfusionMatrix(cand_part_target, cand_part_pred)

    elapsed_time = time() - start_time
    print(f'----------------------------------------')
    print(f'TOTAL TIME: {datetime.timedelta(seconds=elapsed_time)}')

    result = {
        'data': {
            'candidatos': (train_c, test_c),
            'partidos': (train_p, test_p),
        },
        'results': {
            'candidatos': (test_c.candidatoId, cand_pred),
            'candidatos_pca': (test_c.candidatoId, pca_cand_pred),
            'partidos': (test_p.idPartido, part_pred),
            'partidos_pca': (test_p.idPartido, pca_part_pred),
            'partidos_candidatos': (cand_part_target, cand_part_pred)
        },
        'matrices': {
            'candidatos': cm_cand,
            'candidatos_pca': cm_pca_cand,
            'partidos': cm_part,
            'partidos_pca': cm_pca_part,
            'partidos_candidatos': cm_cand_part
        }
    }
    return result
def run(save_loc="cnn/ONN",
        loss=Loss.MSE,
        OD=10,
        gradient=Gradient.APPROXIMATE):

    print("\n----- Running {} -----".format(os.path.basename(__file__)))

    ####################################################
    # Configure datasets.
    ####################################################

    dataset = Dataset.MNIST

    if dataset != Dataset.MNIST:
        save_loc += "_{}".format(str(dataset).split(".")[-1])

    batch_size_train = 64
    batch_size_test = 1000

    ####################################################
    # Configure Networks.
    ####################################################

    sat_abs_nl_args = {
        'I_sat': 1,
        'OD': OD,
        'encoding': Encoding.AMPLITUDE,
        'gradient': gradient
    }

    SANL = lambda: SatAbsNL(**sat_abs_nl_args)

    if loss == Loss.MSE:
        output = None
        loss_str = "mse"
    elif loss == Loss.CCE:
        output = lambda: nn.LogSoftmax(-1)
        loss_str = "nll"
    else:
        raise ValueError("Unrecognised loss :", loss)

    net_args = {
        'n_ch_conv': [32, 64],
        'kernel_size_conv': [5, 5],
        'n_in_fc': 1024,
        'n_hid_fc': [128],
        'activation_conv': [SANL, SANL],
        'activation_fc': SANL,
        'dropout': lambda: nn.Dropout(0.4),
        'conv_args': {
            'stride': 1,
            'padding': 0,
            'bias': False
        },
        'pool_conv': lambda: nn.AvgPool2d(kernel_size=2, stride=2),
        'n_out': 10 if dataset != Dataset.EMNIST else 47,
        'bias_fc': False,
        'output': output
    }

    ####################################################
    # Train classifiers
    ####################################################

    n_seeds = 5

    losses = {}
    corrects = {}
    valid_scores = {}

    for i in range(n_seeds):
        lab = 'seed{}'.format(i)

        network = ConvNet(**net_args)

        train_loader, test_loader, validation_loader = get_dataset_loaders(
            dataset=dataset,
            train_batch=batch_size_train,
            test_batch=batch_size_test,
            unroll_img=False,
            max_value=15 if OD > 10 else 5,
            get_validation=True)

        classifier = Classifier(
            network,
            train_loader,
            test_loader,
            n_epochs=30 if dataset == Dataset.MNIST else 40,
            learning_rate=5e-4,
            init_weight_mean=0.,
            init_weight_std=0.01,
            init_conv_weight_std=0.1,
            loss=loss_str,
            weight_range=None,
            weight_normalisation=weight_norm.NONE,
            log_interval=25,
            n_test_per_epoch=0,
            save_path=os.path.join(save_loc, lab))

        train_losses, test_correct = classifier.train()

        losses[lab] = train_losses
        corrects[lab] = test_correct

        ####################################################
        # Validation
        ####################################################

        classifier.load(classifier.network_save_path)

        valid_loss, valid_correct = classifier.validate(validation_loader)

        print("Validation accuracy : {:.2f}%".format(
            100. * valid_correct / len(validation_loader.dataset)))
        valid_scores[lab] = 100. * valid_correct / len(
            validation_loader.dataset)

        validation_save_path = os.path.join(classifier.save_path,
                                            "validation_score.pkl")
        with open(validation_save_path, 'wb+') as output:
            pickle.dump(np.array([valid_loss, valid_correct]), output,
                        pickle.HIGHEST_PROTOCOL)
            print('Validation scores saved to {}'.format(validation_save_path))

    print("Validation scores are:")
    for lab, score in valid_scores.items():
        print("\t{} : {:.2f}%".format(lab, score))

    ####################################################
    # Plot results
    ####################################################

    fig_fname = os.path.join(save_loc, "training_performance")

    with plt.style.context('seaborn-paper', after_reset=True):

        fig, (ax1, ax2) = plt.subplots(1,
                                       2,
                                       figsize=(7, 2.5),
                                       gridspec_kw={'wspace': 0.3})

        window = 25
        avg_mask = np.ones(window) / window

        for lab, data in losses.items():
            ax1.plot(np.convolve(data[:, 0], avg_mask, 'valid'),
                     np.convolve(data[:, 1], avg_mask, 'valid'),
                     label=lab,
                     linewidth=0.75,
                     alpha=0.8)
        ax1.legend()
        ax1.set_xlabel("Epoch")
        ax1.set_ylabel("Losses")

        for lab, data in corrects.items():
            ax2.plot(data[:, 0],
                     data[:, 1] / len(test_loader.dataset),
                     label=lab)
            print("{}: Best score {}/{}".format(lab, np.max(data),
                                                len(test_loader)))
        ax2.legend()
        ax2.set_xlabel("Epoch")
        ax2.set_ylabel("Accuracy")

        plt.savefig(fig_fname + ".png", bbox_inches='tight')
        plt.savefig(fig_fname + ".pdf", bbox_inches='tight')
Exemplo n.º 25
0
 def clasify(self):
     print("step 6. clasifier")
     from src.classifier import Classifier
     classifier = Classifier()
     classifier.parse_arguments(None)
     classifier.main()
Exemplo n.º 26
0
# Date: 11/04/2018
# Description: Main file

import os
from src.classifier import Classifier
from werkzeug.utils import secure_filename
from flask import Flask, render_template, request, session, jsonify, redirect, url_for, flash

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads'
app.config['SECRET_KEY'] = os.urandom(0x200)
ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg'])
PATH_TO_MODEL = 'ai/trained_models/cat_dog_3.h5'

classifier = Classifier(PATH_TO_MODEL)


@app.route('/')
def index():
    return render_template('index.html')


def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS


@app.route('/upload', methods=['POST'])
def upload():
    if 'file' not in request.files:
        flash('Upload an image')
Exemplo n.º 27
0
"""Generate keypoints and descriptions for template images and dump them."""
import cv2 as cv
import os
from os.path import join

from tqdm import tqdm

from src.classifier import Classifier
from src.preprocessing import prepare_image
from util.utils import CLASS_NAMES, FEATURE_DETECTORS, FEATURE_DESCRIPTORS, corner_case
from util.constants import DATA_PATH

if __name__ == '__main__':
    template_images = [(name,
                        prepare_image(cv.imread(join(DATA_PATH,
                                                     f"{name}.jpg"))))
                       for name in CLASS_NAMES]

    for detector in FEATURE_DETECTORS:
        for descriptor in FEATURE_DESCRIPTORS:
            if corner_case(detector, descriptor):
                continue
            clf = Classifier(detector, descriptor)
            for cls_name, image in tqdm(
                    template_images,
                    desc=f"Dumping features for {detector}_{descriptor}"):
                clf.dump_features(cls_name, image)
Exemplo n.º 28
0
client = MongoClient('localhost', 27017)
db = client['tcc']
collection = db['tweets']

events = collection.find({"classified": True})

dataset = pickle.load(open(conf.project_path + 'data\dataset_preprocessed.pickle', 'rb'))
dataset = [x for x in dataset if len(x[0].split()) > 0]

# positives = [x for x in dataset if x[1] == 'positive']
# negatives = [x for x in dataset if x[1] == 'negative']
#
# dataset = negatives + positives[0:len(negatives)]

vectorizer = TfidfVectorizer(min_df=0.0, max_df=1.0, sublinear_tf=True, use_idf=True)
classifier = SVC(kernel='rbf', C=2.9, gamma=1)
p = Preprocessor()

clf = Classifier(vectorizer=vectorizer, classifier=classifier)

x_train = [x[0] for x in dataset]
y_train = [x[1] for x in dataset]

clf.train(x_train=x_train, y_train=y_train)

print(clf.predict(p.preprocess('''''')))

# for ev in events:
#     print(ev['text'].replace('\n', '') + ' --> ' + clf.predict(p.preprocess(ev['text'])))
Exemplo n.º 29
0
def run_train():
    parser = ArgumentParser()
    parser.add_argument(
        '--paths',
        type=str,
        nargs='+',
        required=True,
        help='Root paths of folders that contain images and pascal voc files')
    parser.add_argument('--label_names',
                        type=str,
                        required=True,
                        help='Path to label names file')
    parser.add_argument('--training_splitsize',
                        type=float,
                        default=0.9,
                        help='Splitsize of training data')
    parser.add_argument('--batchsize',
                        type=int,
                        default=20,
                        help='Learning minibatch size')
    parser.add_argument('--epoch',
                        type=int,
                        default=10,
                        help='Numbers of epochs to train')
    parser.add_argument('--gpu',
                        type=int,
                        default=-1,
                        help='GPU ID, negative value indicates CPU')
    parser.add_argument('--out',
                        default='trainer_output',
                        help='Output directory of trainer')
    parser.add_argument('--val_batchsize',
                        type=int,
                        default=250,
                        help='Validation minibatch size')
    args = parser.parse_args()

    # create model
    predictor = ResNet50Layers(None)
    model = Classifier(predictor)

    # TODO: initmodel

    # use selected gpu by id
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # build datasets from paths
    label_handler = LabelHandler(args.label_names)

    # builder = LabeledImageDatasetBuilder(args.paths, label_handler)
    # train_dataset, val_dataset = builder.get_labeled_image_dataset_split(args.training_splitsize)
    builder = SortedImageDatasetBuilder(args.paths, label_handler)
    train_dataset, val_dataset = builder.get_sorted_image_dataset_split(
        args.training_splitsize)

    train_iter = chainer.iterators.SerialIterator(train_dataset,
                                                  args.batchsize)
    val_iter = chainer.iterators.SerialIterator(val_dataset,
                                                args.val_batchsize,
                                                repeat=False)

    # optimizer
    learning_rate = 0.01
    momentum = 0.9
    optimizer = chainer.optimizers.MomentumSGD(learning_rate, momentum)
    optimizer.setup(model)

    # trainer
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=args.gpu)
    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       args.out)

    trainer.extend(extensions.LogReport())
    trainer.extend(chainer.training.extensions.ProgressBar(update_interval=10))

    trainer.run()

    # save model
    output_file_path = '{0}/resnet_{1}_{2}.model'.format(
        args.out, args.batchsize, args.epoch)
    chainer.serializers.save_npz(output_file_path, predictor)