Exemplo n.º 1
0
def ejer5_loss(loss_fun, act_fun_last, label, nfig1, nfig2):
    print(label)
    reg1 = regularizador.L2(0.1)
    reg2 = regularizador.L2(0.1)

    proto = clasificador.Classifier(epochs=300, batch_size=50, eta=0.001)

    outputfile = "ejer5_" + label + "_v3.dat"

    (x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data()

    mean_train = x_train.mean()

    n_clasifi = 10
    X, Y = clasificador.flattening(x_train, y_train, n_clasifi, mean_train)
    X_test, Y_test = clasificador.flattening(x_test, y_test, n_clasifi,
                                             mean_train)

    proto.fit(X,
              Y,
              X_test,
              Y_test,
              act_function1=act.ReLU(0),
              reg1=reg1,
              loss_function=loss_fun,
              act_function2=act_fun_last,
              reg2=reg2)

    # plt.figure(nfig1)
    # plt.ylabel("Accuracy [%]")
    # plt.plot(proto.acc_vect, label="Entrenamiento", c='red', alpha=0.6, ls='--')
    # plt.plot(proto.pres_vect, label="Validación", c='blue', alpha=0.6)
    # plt.legend(loc=0)
    # plt.savefig("ejer5_acc_"+label+".pdf")

    # plt.figure(nfig2)
    # plt.ylabel("Pérdida")
    # plt.plot(proto.loss_vect, label="Entrenamiento", c='red', alpha=0.6, ls='--')
    # plt.plot(proto.loss_test, label="Validación", c='blue', alpha=0.6)
    # plt.legend(loc=0)
    # plt.savefig("ejer5_loss_"+label+".pdf")
    # plt.show()
    #plt.close()
    #plt.clf()

    np.savetxt(
        outputfile,
        np.array([
            proto.acc_vect, proto.pres_vect, proto.loss_vect, proto.loss_test
        ]).T)
def evaluate_classifier(model_filename):
    with tf.Graph().as_default():
        classfier_net = classifier.Classifier()
        classification_model = classfier_net.get_model(122, 122)
        classification_model.load(model_filename)

        annotations = data.load_annotations()
        image_list = data.create_image_list(annotations)

        ok = 0
        n = 0

        with open(predicted_annotations_path) as data_file:
            bounding_box_data = json.load(data_file)

        #bounding_box_data = data.load_annotations()

        for filepath in image_list:
            x = int(bounding_box_data[filepath][0])
            w = int(bounding_box_data[filepath][1])
            y = int(bounding_box_data[filepath][2])
            h = int(bounding_box_data[filepath][3])

            # Extending bounding box by 10%
            x -= int(0.1 * w)
            w += int(0.2 * w)
            y -= int(0.1 * h)
            h += int(0.2 * h)

            crop = cv2.imread(data.get_image_path(filepath))
            crop = crop[y:y + h, x:x + w]
            if crop is None:
                continue
            height, width, _ = crop.shape
            if height == 0 or width == 0:
                continue
            crop = cv2.resize(crop, (122, 122))

            classification = classification_model.predict([crop])[0]

            if data.classes[np.argmax(classification)] == data.get_image_label(
                    filepath):
                ok += 1

            #print(data.classes[np.argmax(classification)] + " " + data.get_image_label(filepath))

            n += 1

            print(ok / n)
Exemplo n.º 3
0
def main():
    config = get_config()
    random.seed(config["algorithm"]["random_seed"])
    train_data, test_data = acq.extract_data(config)

    if config["verbose"]["enabled"]:
        acq.show_data(train_data[0], config["verbose"]["sample_timeout"])

    algo = classifier.Classifier(config["algorithm"]["random_seed"])
    algo.train(train_data[0], train_data[1])

    estimates = algo.test(test_data[0])
    algo.eval(estimates, test_data[1])

    show_code_in_action(test_data, algo)
    print("Finished algorithm.")
Exemplo n.º 4
0
def _custom_classifier(features):
    clsf = classifier.Classifier(CLASSES, TRAINING_SAMPLES_LIMIT, features, 'avg')

    _train_classifier_from_data(clsf)
    clsf.finalize()

    stats = _test_custom_classifier(clsf)
    for cls in sorted(stats.keys()):
        util.debug("Class {} => {}% correct ({} total)".format(cls, round(stats[cls]['accuracy'] * 100, 1), stats[cls]['total']))

    # And some machine-readable output suitable for appending to a CSV for
    # analysis
    feature_id = ','.join([ d['name'] for d in features ])
    print("decision_mode,features,class,total,accuracy")
    for cls in stats:
        print("{},\"{}\",{},{},{}".format(clsf.decision_mode, feature_id, cls, stats[cls]['total'], stats[cls]['accuracy']))
Exemplo n.º 5
0
def main():
    makeSub = True
    featureImportance = False
    cvfold = True
    df = pd.read_csv('../data/cprobTrain15NA.csv')

    X, y = np.array(pd.read_csv('../data/train.csv',
                                usecols=range(1, 9))), np.array(
                                    pd.read_csv('../data/train.csv').ACTION)
    X = np.hstack((X, np.array(df)))

    params = {
        'max_depth': 4,
        'subsample': 0.5,
        'verbose': 0,
        'random_state': 1337,
        'min_samples_split': 10,
        'min_samples_leaf': 10,
        'max_features': 10,
        'n_estimators': 350,
        'learning_rate': 0.05
    }

    clf = GradientBoostingClassifier(**params)
    prefix = 'lib/gbm350d4m10c15'
    if cvfold:
        c = classifier.Classifier(X, y)
        c.validate(clf, nFolds=10, out=prefix + 'Train.csv')

    if makeSub:
        Xt = np.array(pd.read_csv('../data/test.csv', usecols=range(1, 9)))
        Xt = np.hstack(
            (Xt, np.array(pd.read_csv('../data/cprobTest15NA.csv'))))
        clf.fit(X, y)
        y_ = clf.predict_proba(Xt)[:, 1]
        out = pd.read_csv('subs/nbBaseTest.csv')
        out.ACTION = y_
        out.to_csv(prefix + 'Test.csv', index=False)

    if featureImportance:
        print "Feature ranking:"
        importances = clf.feature_importances_
        indices = np.argsort(importances)[::-1]
        np.savetxt('indices.txt', indices, delimiter=',')
        for f in xrange(df.shape[1]):
            print "%d. feature (%s,%f)" % (f + 1, df.columns[indices[f]],
                                           importances[indices[f]])
Exemplo n.º 6
0
def predict():
    """ PREDICT THE POTENTIAL PIECE OF EQUIPMENT BASED ON THE REQUEST """
    """
        RECEIVING JSON FORMAT:
        {
            "body" :
                "imgsource" : *base64 value*
        }
    """
    # Extract infomation from the JSON
    bodyJson = request.get_json(force=True).get('body')
    data = bodyJson.get('imgsource')

    path = "./temp/test.png"
    # Decode the BASE64 image and save it into the temp file
    imgdata = base64.b64decode(str(data))
    image = Image.open(BytesIO(imgdata))
    img = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
    cv2.imwrite(path, img)

    r = requests.post(
        'https://api.remove.bg/v1.0/removebg',
        files={'image_file': open('./temp/test.png', 'rb')},
        data={
            'size': 'auto',
            'bg_color': 'white'
        },
        headers={'X-Api-Key': 'iTUtE8hsnt76HMLmfjPAi2hp'},
    )
    if r.status_code == requests.codes.ok:
        with open('./temp/test.png', 'wb') as out:
            out.write(r.content)
    else:
        print("Error:", r.status_code, r.text)

    # Call the image recognition function
    predictor = classifier.Classifier()
    predicted_label = predictor.image_recognition('./temp/test.png')
    print(predicted_label)

    max_index = np.argmax(predicted_label[0])
    probability = str(float(predicted_label[0][max_index] * 100))
    data_response = {MAP.get(max_index): probability}
    response_json = json.dumps(data_response, indent=4)

    return Response(response=response_json, status=200)
Exemplo n.º 7
0
def main(argv):
    help = 'main.py [-t -v -f --test]\n'
    help += '-t or --twitter : runs classifier within twitter GUI\n'
    help += '-v or --verbose : prints extra information\n'
    help += '-f [folds] or --folds=[folds] : set number of folds '
    help += 'for validation\n'
    help += '--test : runs test code\n'
    help += '--timing : runs timing code\n'
    try:
        opts, args = getopt.getopt(
            argv, "htcvf:",
            ['twitter', 'classifier', 'verbose', 'test', 'timing', 'folds='])
    except getopt.GetoptError:
        print(help)
        sys.exit(2)

    twitter = False
    verbose = False
    test = False
    timing = False
    folds = 5
    for opt, arg in opts:
        if opt == '-h':
            print(help)
            sys.exit()
        elif opt in ('-t', '--twitter'):
            twitter = True
        elif opt in ('-v', '--verbose'):
            verbose = True
        elif opt in ('--test'):
            test = True
        elif opt in ('-f', '--folds'):
            folds = arg
        elif opt in ('--timing'):
            timing = True
    if test:
        subprocess.call(['pytest', '..\\Test\\test_net.py'])
    else:
        clf = classifier.Classifier(folds=folds, timing=timing)
        if verbose:
            clf.run(verb=True)
        else:
            clf.run()
        if twitter:
            win = twitterGUI.TwitWindow(clf)
            win.CreateWindow()
Exemplo n.º 8
0
 def init_classifier(self):
     '''
         SVM initialization, stores the list of SVM objects in a list
         at instance level
     '''
     svms = []
     print '\nSVM initialization:'
     startt = time.time()
     t_lab, trn, v_lab, val = self.init_ds()
     for n in self.names:
         tmp = classifier.Classifier(self.get_lab(n, t_lab), trn, \
                 self.get_lab(n, v_lab), val, n)
         print '\tSVM for %s initialized' % n
         svms.append(tmp)
     print 'Classifier initialized in %s sec.\n' % \
         (time.time() - startt)
     self.svms = svms
Exemplo n.º 9
0
    def __init__(self, classifier_folder):
        self.class_name = classifier_folder
        self.image_size = (227, 227)
        net_model = classifier_folder + "/net.caffemodel"
        net_proto = classifier_folder + "/deploy.prototxt"
        net_label = classifier_folder + "/labels.txt"
        self.classifier = classifier.Classifier(image_size=self.image_size,
                                                net=net_proto,
                                                weight=net_model,
                                                class_label=net_label)
        labels = self.classifier.get_label_list()

        super(UI, self).__init__()
        self.setGeometry(300, 300, 720, 720)
        self.setWindowTitle(self.class_name)
        self.w = dragarea()
        self.w.dropEvent = self.load_images_dd
        self.root_vbox = QtGui.QVBoxLayout()
        self.w.setLayout(self.root_vbox)
        self.setCentralWidget(self.w)

        self.detail_btn = QtGui.QPushButton("Details")
        self.detail_btn.clicked.connect(self.click_detail)

        self.detail_w = detail_panel(labels)
        self.detail_page = QtGui.QWidget()
        self.controller_hbox = QtGui.QHBoxLayout()
        self.detail_page.setLayout(self.controller_hbox)
        self.prev_image = QtGui.QPushButton(" < prev")
        self.prev_image.clicked.connect(self.prev_detail)
        self.next_image = QtGui.QPushButton(" next >")
        self.next_image.clicked.connect(self.next_detail)
        self.return_table = QtGui.QPushButton(" score table ")
        self.return_table.clicked.connect(self.display_table)
        self.controller_hbox.addWidget(self.prev_image)
        self.controller_hbox.addWidget(self.return_table)
        self.controller_hbox.addWidget(self.next_image)

        self.image_added = False
        self.image_acceptable = True
        self.detail_view = False
        self.detail_index = -1
        self.table = None
        self.images = []
        self.scores = []
        self.show()
def trainClassifer (third_dataset, learning_rate=0.0001, decay_rate=0.99,
          batch_size=10, training_epochs=10, display_step=1, n_samples=1000, noise=1):
    cl = classifier.Classifier(learning_rate=learning_rate)

    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(n_samples / batch_size)

        for i in range(total_batch):
            batch_xs, batch_ys = third_dataset[i*batch_size: (i+1)*batch_size], train_labels[i*batch_size: (i+1)*batch_size]
            cl.train_step.run({cl.x: batch_xs, cl.y_: batch_ys})
            # Display logs per epoch step
        if epoch % 10 == 0:
            print("Epoch:", '%04d' % (epoch+1), \
                  "cross_entropy=", "{:.9f}".format(cl.cross_entropy.eval({cl.x: third_dataset, cl.y_: train_labels[0:n_samples]})))
        cl.learning_rate *= decay_rate

    # Test trained model
    print(cl.accuracy.eval({cl.x: third_dataset, cl.y_: train_labels[0:n_samples]}))
Exemplo n.º 11
0
def HyperSearch():
    # Courtesy of Miroslaw Horbal
    base = [127, 96, 53, 3, 103, 71, 151, 1, 65, 152]
    f = fileio.Preprocessed('../data/quads10Threshold.csv')
    f.encode(base)
    train, truth = f.transformTrain(base)
    print "Performing hyperparameter selection..."

    clf = LogisticRegression(C=2.3, class_weight='auto')
    # Hyperparameter selection loop
    score_hist = []
    Cvals = np.linspace(1, 4, 32)
    eval_ = classifier.Classifier(train, truth)
    for C in Cvals:
        clf.C = C
        score = eval_.holdout(clf, nFolds=10, fraction=0.2)
        score_hist.append((score, C))
        print "C: %f Mean AUC: %f" % (C, score)
    bestC = sorted(score_hist)[-1][1]
    print "Best C value: %f" % (bestC)
Exemplo n.º 12
0
def classify_posts(posts):
    with classifier.Classifier() as c:
        for post in posts:
            timestamp = post['taken_at_timestamp']
            picture_url = post['display_url']

            print(f"checking url {picture_url}")

            if queries.image_exists(picture_url):
                print(f"\talready processed. skipping {picture_url}")
                continue

            print(f"\timage not loaded, adding to db {picture_url}")

            image = Image(url=picture_url, post_date=timestamp)

            print(f"\tclassifying image {picture_url}")
            c.classify(image)
            queries.add(image)

    queries.commit()
Exemplo n.º 13
0
def classification_test(verbose=True):
    rdr = reader.Reader()
    clssfr = classifier.Classifier()

    test_data_tables = rdr.get_classifier_test_data_tables()
    correct_count = 0
    total_count = 0
    for data_table in test_data_tables:
        col_idx = 0
        while True:
            col = data_table.get_col(col_idx)
            if col == None:  # out of columns to read
                break

            (header, records) = col
            classified_type = clssfr.classify(col_idx, header, records)
            correct_type = data_table.get_type(col_idx)
            if verbose or classified_type != correct_type:
                print("Column    :", data_table.csv_file,
                      "(Column " + str(col_idx) + ")")
                print("Classified:", classified_type)
                print("Correct   :", correct_type)
            if verbose and classified_type == correct_type:
                print()
            if classified_type == correct_type:
                correct_count += 1
            else:
                print("Header    :", repr(header))
                print("Records   :", records)
                print()
            total_count += 1

            col_idx += 1

    print("===================================================")
    print("Overall result:",
          str(correct_count) + "/" + str(total_count),
          "(" + str(round(correct_count / total_count * 100, 2)) + "%)",
          "correct classifications")
def fleetclassify_status(status):
    #pp.pprint(status)
    image_url = get_image_url_from_status(status)
    print("image: " + str(image_url))
    if not image_url:
        return None

    image_filepath = download_image_file(image_url)
    print("image filepath: " + str(image_filepath))
    image_paths = [image_filepath]

    result_txt = ''

    classifier_ = classifier.Classifier(LEARNED_MODEL_FILEPATH)
    predictions = classifier_.classify(image_paths)
    for image_path, prediction in zip(image_paths, predictions):
        if prediction < 0.5:
            result_txt += "艦これ"
        else:
            result_txt += "アズールレーン"
        result_txt += ' (%.3f)' % (prediction)

    return result_txt
                "--yolo",
                default='make_model_classifier/yolo-coco',
                help="base path to YOLO directory")
ap.add_argument("-c",
                "--confidence",
                type=float,
                default=0.5,
                help="minimum probability to filter weak detections")
ap.add_argument("-t",
                "--threshold",
                type=float,
                default=0.3,
                help="threshold when applying non-maxima suppression")
args = vars(ap.parse_args())

car_color_classifier = classifier.Classifier()

# load the COCO class labels our YOLO model was trained on
labelsPath = os.path.sep.join([args["yolo"], "coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")

# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([args["yolo"], "yolov3.weights"])
configPath = os.path.sep.join([args["yolo"], "yolov3.cfg"])

# load our YOLO object detector trained on COCO dataset (80 classes)
# print("[INFO] loading YOLO from disk...")
pcad_index = int(list(sys.argv)[1])

window_sizes = [None, 5, 10, 50, 100, 200]
window_size = window_sizes[pcad_index % 6]
if pcad_index > 5:
    baselined = True
    output_txt_dir = os.path.join(
        out_dir,
        'increasing_ordered_PCA_comps_ws_%s_baselined.txt' % str(window_size))

else:
    baselined = False
    output_txt_dir = os.path.join(
        out_dir, 'increasing_ordered_PCA_comps_ws_%s.txt' % str(window_size))

test_class = cl.Classifier()
test_class.recordings = recordings
test_class.pre_trial_window = 2
test_class.post_trial_window = 2
test_class.make_unit_response(['20Hz_cor_AB', '20Hz_acor_BA', '20Hz_acor_AB'],
                              baseline=baselined)
test_class.test_size = 0.2
n_components = 599

pcad_response, y_var = test_class.make_pcad_response(
    n_components, ['20Hz_cor_AB', '20Hz_acor_AB', '20Hz_acor_BA'],
    reassign_y_var=[['20Hz_acor_AB', '20Hz_acor_BA']],
    window_size=window_size)

with open(output_txt_dir, 'a') as f:
    all_accs = []
Exemplo n.º 17
0
    def predict_aspect_file(self, inputFilePath, outputFilePath, mindmap,
                            language):
        config_path = 'sentiment_config.ini'
        import pickle
        classifier = pickle.load(open("english_classifier.pickle", 'rb'),
                                 fix_imports=True,
                                 encoding="latin1")
        print(1)
        try:
            clf = classifier.Classifier(config_path=config_path,
                                        language=language)
        except:
            self.messageBoxSignal.emit("Error",
                                       "Failed to load the classifier file")
            self.clear()
            return

        #analyzer = aspect_detector.AspectDetector()
        asa = aspect_sentiment_analyzer.AspectSentimentAnalyzer()

        try:
            data, fieldName = utils.read_excel_dict(inputFilePath)
            #data.columns = map(str.lower, data.columns)
        except:
            self.messageBoxSignal.emit("Error",
                                       "Failed to read the input file")
            self.clear()
            return

        full_df = pd.DataFrame()
        '''
        try:
            verbatim = str(data[0]['verbatim'])
        except:
            self.messageBoxSignal.emit("Error","There is no 'verbatim' column in the input file")
            self.clear()
            return

        try:
            verbatim = str(data[0]['date'])
        except:
            self.messageBoxSignal.emit("Error","There is no 'date' column in the input file")
            self.clear()
            return
        '''
        progress_step = int(len(data) / 80)
        c = 0
        step = 20

        for row in data:
            verbatim = str(row['verbatim'])
            cleaned_verbatim = clf.clean_verbatim(verbatim)
            # return dictionary consist of key as sentences, and value as a tuple consist of category and subcategory

            predicted_categories, words_in_mindmap_per_sentence = asa.predict_verbatim_aspect(
                verbatim.lower(), mindmap)

            rest_data_frame_columns = pd.DataFrame(row, index=[0])

            rest_data_frame_columns = rest_data_frame_columns.loc[:,
                                                                  rest_data_frame_columns
                                                                  .columns !=
                                                                  'verbatim']

            words_in_mindmap_per_verbatim = utils.change_listoflist_to_list(
                words_in_mindmap_per_sentence)
            words_in_mindmap_per_verbatim = ','.join(
                words_in_mindmap_per_verbatim)
            #words_in_mindmap_per_sentence=utils.change_listoflist_to_listofstrings(words_in_mindmap_per_sentence)

            # get month from 'Response Date' Column
            if '/' in str(row['date']):
                date = str(row['date']).split("/")
            elif '-' in str(row['date']):
                date = str(row['date']).split("-")
            else:
                self.messageBoxSignal.emit(
                    "Error",
                    "Please check date format in the file , it should have '/' or '-' separator"
                )
                self.clear()
                return

            month = calendar.month_name[int(date[1])]
            #mindmap_index=0

            try:
                sentiment, cleaned_sentence = clf.predict_sentence(
                    cleaned_verbatim)
            except:
                self.messageBoxSignal.emit("Error",
                                           "Failed to predict sentiment")
                self.clear()
                return

            if len(predicted_categories) > 0:

                for sentence in predicted_categories:
                    category_subcategory = predicted_categories[sentence]

                    # check if after cleaning there are still any word in the sentence

                    rest_data_frame_columns['month'] = month
                    rest_data_frame_columns['original verbatim'] = verbatim
                    rest_data_frame_columns[
                        'cleaned verbatim'] = cleaned_verbatim
                    rest_data_frame_columns['sentiment'] = sentiment

                    for category, subcategory in category_subcategory:
                        rest_data_frame_columns['category'] = category
                        rest_data_frame_columns['subcategory'] = subcategory

                        #rest_data_frame_columns['words in mindmap'] = words_in_mindmap_per_verbatim + '| ' + words_in_mindmap_per_sentence[mindmap_index]
                        rest_data_frame_columns[
                            'words in mindmap'] = words_in_mindmap_per_verbatim
                        full_df = pd.concat([full_df, rest_data_frame_columns],
                                            sort=False,
                                            axis=0)

            elif cleaned_verbatim != '':
                rest_data_frame_columns['month'] = month
                rest_data_frame_columns['original verbatim'] = verbatim
                rest_data_frame_columns['cleaned verbatim'] = cleaned_verbatim
                rest_data_frame_columns['sentiment'] = sentiment

                rest_data_frame_columns['category'] = "Not Defined"
                rest_data_frame_columns['subcategory'] = "Not Defined"
                rest_data_frame_columns['words in mindmap'] = "No Words"
                full_df = pd.concat([full_df, rest_data_frame_columns],
                                    sort=False,
                                    axis=0)
                #mindmap_index+=1
            c += 1

            if c % progress_step == 0:
                step += 1
                if step <= 99:
                    self.countChanged.emit(step)

        try:
            self.countChanged.emit(100)
            full_df.drop_duplicates(inplace=True)
            full_df.to_csv(outputFilePath, encoding='utf-8', index=False)
            self.messageBoxSignal.emit("Done",
                                       "The file was written successfully")
            self.clear()
        except:
            self.messageBoxSignal.emit("Error",
                                       "Failed to write the file correctly")
            self.clear()
            return
Exemplo n.º 18
0
 if args.study:
     # To conduct a study with n number of trials as parameter and the type of the model
     study = paramstudy.conduct_study(args.n_trials, args.model)
     if args.plot == True:
         # Functionality for loading a study dump is dropped in the last build since it was not a necessity
         #   and only used during development, but it can be used by removing the comment sign on the next line.
         # study = joblib.load("Results/Study/convpool_study_6.pkl")
         paramstudy.generate_graphs_from_study(study)
     exit()
 #Load the cfg for the selected model
 if (args.model == "capsnet"):
         cfg = variables.capsnet_cfg
 elif (args.model == "convpool"):
     cfg = variables.convpool_cfg
 # Initializate the model
 TClassifier = classifier.Classifier(cfg)
 # If selected, load the weights from file
 if args.load_weights is not None:
     TClassifier.load_weights(variables.saved_weights_path + args.load_weights)
     print("Weights loaded.")
 #If train is selected, train the model
 if args.train:
     TClassifier.load_images()
     TClassifier.train(cfg["epochs"], cfg["mini_batch_size"], cfg["test_batch_size"])
     #Plot the results of the training if required
     if args.plot:
         TClassifier.plot_loss()
         TClassifier.plot_accuracy()
         TClassifier.plot_test_accuracy()
 
 # Evaluate the model
Exemplo n.º 19
0
n_steps = 100  # number of steps in game
C = 1.0  # classifier parameter
norm_trsh = 0.001
train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = dp.read_data(
)
m = len(train_dataset[0])
print('data read complete')
steps = np.arange(n_initial, len(train_labels),
                  int(np.ceil((len(train_labels) - n_initial) / n_steps)))
svm1 = svm.SVC(kernel='linear', C=C).fit(train_dataset, train_labels)
err_best = 1 - accuracy_score(test_labels, svm1.predict(test_dataset))
print('best possible performance on test dataset is ', 1 - err_best)
classifiers = []

classifier1 = cl.Classifier(train_dataset[:n_initial, :],
                            train_labels[:n_initial], valid_dataset,
                            valid_labels, C, 8.2)
err_test1 = 1 - accuracy_score(test_labels, classifier1.predict(test_dataset))
print('classifier1: initiated, error on test dataset is ', err_test1)
classifiers.append(classifier1)

classifier2 = cl.Classifier(train_dataset[:n_initial, :],
                            train_labels[:n_initial], valid_dataset,
                            valid_labels, C, 8, 'asc')
err_test2 = 1 - accuracy_score(test_labels, classifier2.predict(test_dataset))
print('classifier2: initiated, error on test dataset is ', err_test2)
classifiers.append(classifier2)

classifier3 = cl.Classifier(train_dataset[:n_initial, :],
                            train_labels[:n_initial], valid_dataset,
                            valid_labels, C, 12, 'desc')
Exemplo n.º 20
0
import base64
import json
import os
import tempfile

import classifier
import picto_matcher

import web

urls = ('/post_capture', 'PostCapture')

db_file = './../synsets/database/synset.sqlite3'
db = web.database(dbn='sqlite', db=db_file)

cl = classifier.Classifier()

app = web.application(urls, globals())


class PostCapture(object):

    def POST(self):

        data = web.input()
        
        wnid = data["wnid"]
        imagestring = data["captured_picto"]

        fd, path = tempfile.mkstemp(suffix=".jpeg", prefix='pc_')
Exemplo n.º 21
0
#-------Samples of the training set to reducte computational time-------#
# n_train_s = 80000
# X_train_s, y_train_s = sampling(X_train, y_train, n_train_s, balanced=False)
# text+= '\n-sampled train = True (%s samples)'%n_train_s
#----------------

#print 'classRatio =  ', float(len(y_train[y_train == 1]))/len(y_train[y_train == 0])

#######################################################################################################################
#                                            Train and predict                                                        #
#######################################################################################################################

print '--Training \n'

clf = classifier.Classifier(verbose=True)

start = time.time()
#clf.fit(X_train_s, y_train_s)
clf.fit(X_train, y_train)
#weight = train_mrf(y_pred_svm_train, img_train, nb_class, max_map_iter, [alpha, beta, sigma_blur], threshold_learning, y_train, threshold_sensitivity, threshold_error)
training_time = time.clock() - start

print '\n--Prediction \n'

start = time.time()
y_pred = parallized_pred(X_test, clf)
y_pred_train = parallized_pred(X_train, clf)
prediction_time = time.time() - start

weight = train_mrf(y_pred_train, img_train, nb_class, max_map_iter,
Exemplo n.º 22
0
    'learning_rate': 0.05,
    'n_estimators': 100,
    'max_depth': 4,
    'subsample': 0.5,
    'n_jobs': 4,
    'min_child_weight': 15
}

train_params = {'early_stopping_rounds': 10, 'verbose': 0}
xgb = XGBClassifier(**params)
xgb.set_params(**train_params)

xgb_kin = clf.Classifier(model=xgb,
                         cv=skf,
                         variables=var_kin,
                         model_name='XGBoost',
                         var_name='kinetic',
                         fig_name='xgb',
                         train_params=train_params)
xgb_kin.fit(train)
xgb_kin.check_ks_and_cvm(train,
                         check_agreement=check_agreement,
                         check_correlation=check_correlation)
xgb_kin.predict(data=test)
params = {
    'learning_rate': 0.05,
    'n_estimators': 200,
    'max_depth': 4,
    'subsample': 0.5,
    'n_jobs': 4,
    'min_child_weight': 15
Exemplo n.º 23
0
 def classify_context(self, raw_text=None, type="string"):
     cl_o = cl.Classifier()
     return cl_o.classifier_handler(raw_text)
Exemplo n.º 24
0
    else:
########## generate image features and classification evalution #########
        generator = Generator(latent_dim,class_embed_dim,feature_dim)
        if torch.cuda.is_available():
            generator = generator.cuda()
        checkpoint = torch.load(Model_GAN_path)
        print("===> Loading Wasserstein_GAN Model... Start Epoch:{}".format(checkpoint['epoch']))
        generator.load_state_dict(checkpoint['G_state_dict'])
        
        if validation:
            # for fake data generation
            unique_attributes_val, unique_labels_val = get_unique_vector(data.attributes_val,data.labels_val)
            gen_features,gen_labels = generate_img_feature(generator,unique_attributes_val,unique_labels_val)

            gen_labels = map_label(gen_labels.astype(int))
            cls = classifier.Classifier(gen_features,gen_labels,data,zsl_classifier_path,lr=0.0001,batch_size=64,epoch=100,validation=True,generalized=False)
            unseen_acc = cls.unseen_acc
        else:
            if generalized:
                #unique_attributes_trainval, unique_labels_trainval = get_unique_vector(data.attributes_train, data.labels_train)
                #gen_features_trainval,gen_labels_trainval = generate_img_feature(generator,unique_attributes_trainval,unique_labels_trainval)
                unique_attributes_test_unseen, unique_labels_test_unseen = get_unique_vector(data.attributes_test_unseen, data.labels_test_unseen)
                gen_features_test_unseen,gen_labels_test_unseen = generate_img_feature(generator,unique_attributes_test_unseen,unique_labels_test_unseen)

                features_train = np.concatenate((data.features_train, gen_features_test_unseen), axis=0)
                labels_train = np.concatenate((data.labels_train,gen_labels_test_unseen),axis=0)
                labels_train = np.where(labels_train.astype(int)==1)[1]
                cls = classifier.Classifier(features_train,labels_train,data,gzsl_classifier_path,lr=0.0001,batch_size=64,epoch=100,validation=False,generalized=True)
                best_H, seen_acc, unseen_acc = cls.best_H, cls.seen_acc, cls.unseen_acc
            else:
                unique_attributes_test_unseen, unique_labels_test_unseen = get_unique_vector(data.attributes_test_unseen, data.labels_test_unseen)
import pandas as pd
from sklearn.cross_validation import StratifiedShuffleSplit
import classifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import accuracy_score, roc_curve, auc

target_column_name = 'TARGET'
# point it to your training file
filename = '../data/public/train.csv'

if __name__ == '__main__':
    df = pd.read_csv(filename)
    y = df[target_column_name].values
    X = df.drop(target_column_name, axis=1).values
    skf = StratifiedShuffleSplit(y, n_iter=2, test_size=0.5, random_state=57)
    for valid_train_is, valid_test_is in skf:
        X_valid_train = X[valid_train_is]
        y_valid_train = y[valid_train_is]
        X_valid_test = X[valid_test_is]
        y_valid_test = y[valid_test_is]
        #clf = model.Classifier()
        #clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
        clf_c = classifier.Classifier()
        clf_c.fit(X_valid_train, y_valid_train)
        y_valid_pred = clf_c.predict(X_valid_test)
        y_valid_proba = clf_c.predict_proba(X_valid_test)
        fpr, tpr, _ = roc_curve(y_valid_test, y_valid_proba[:,1])
        print 'auc = ', auc(fpr, tpr)
        print 'accuracy = ', accuracy_score(y_valid_pred, y_valid_test)
Exemplo n.º 26
0
def get_news(sources=['spiegel','faz','welt','zeit']):
    '''
    Collects all news articles from political ressort of major German newspapers
    Articles are transformed to BoW vectors and assigned to a political party
    For better visualization, articles' BoW vectors are also clustered into topics

    INPUT
    folder      the model folder containing classifier and BoW transformer
    sources     a list of strings for each newspaper for which a crawl is implemented
                default ['zeit','sz']

    '''
    import classifier
    from bs4 import BeautifulSoup
    from api import fetch_url
    import urllib2
    
    articles = []
    
    # the classifier for prediction of political attributes 
    clf = classifier.Classifier(train=False)
    
    for source in sources:

        if source is 'spiegel':
            # fetching articles from sueddeutsche.de/politik
            url = 'http://www.spiegel.de/politik'
            site = BeautifulSoup(urllib2.urlopen(url).read())
            titles = site.findAll("div", { "class" : "teaser" })
            urls = ['http://www.spiegel.de'+a.findNext('a')['href'] for a in titles]
         
        if source is 'faz':
            # fetching articles from sueddeutsche.de/politik
            url = 'http://www.faz.net/aktuell/politik'
            site = BeautifulSoup(urllib2.urlopen(url).read())
            titles = site.findAll("a", { "class" : "TeaserHeadLink" })
            urls = ['http://www.faz.net'+a['href'] for a in titles]
         
        if source is 'welt':
            # fetching articles from sueddeutsche.de/politik
            url = 'http://www.welt.de/politik'
            site = BeautifulSoup(urllib2.urlopen(url).read())
            titles = site.findAll("a", { "class" : "as_teaser-kicker" })
            urls = [a['href'] for a in titles]
         
        if source is 'sz-without-readability':
            # fetching articles from sueddeutsche.de/politik
            url = 'http://www.sueddeutsche.de/politik'
            site = BeautifulSoup(urllib2.urlopen(url).read())
            titles = site.findAll("div", { "class" : "teaser" })
            urls = [a.findNext('a')['href'] for a in titles]
       
        if source is 'zeit':
            # fetching articles from zeit.de/politik
            url = 'http://www.zeit.de/politik'
            site = BeautifulSoup(urllib2.urlopen(url).read())
            urls = [a['href'] for a in site.findAll("a", { "class" : "teaser-small__combined-link" })]

        print "Found %d articles on %s"%(len(urls),url)
         
        # predict party from url for this source
        print "Predicting %s"%source
        for url in urls:
            try:
                title,text = fetch_url(url)
                prediction = clf.predict(text)
                prediction['url'] = url
                prediction['source'] = source
                articles.append((title,prediction))
            except:
                print('Could not get text from %s'%url)
                pass

    # do some topic modeling
    topics = kpca_cluster(map(lambda x: x[1]['text'][0], articles))
  
    # remove original article text for faster web-frontend
    for a in articles:
        a[1]['text'] = 'deleted'

    # store current news and topics
    json.dump(articles,open('news.json','wb'))
    json.dump(topics,open('topics.json','wb'))
Exemplo n.º 27
0
import labeler
import classifier

l1 = labeler.Labeler()
list1 = l1.extractAndPrint(
    "http://money.cnn.com/2018/03/21/technology/mark-zuckerberg-cambridge-analytica-response/index.html"
)
print(list1)
print("\n")

l2 = labeler.Labeler()
list2 = l2.extractAndPrint(
    "https://www.nytimes.com/2018/03/27/world/europe/whistle-blower-data-mining-cambridge-analytica.html"
)
print(list2)
print("\n")

c1 = classifier.Classifier()
c1.readAndCompare(list1, list2)
Exemplo n.º 28
0
        #roc_auc = auc(fpr, tpr)
        #mean_tpr /= len(cv)
        #mean_tpr[-1] = 1.0
        mean_auc = auc(mean_fpr, mean_tpr)

        plt.plot(mean_fpr,
                 mean_tpr,
                 'k--',
                 label='Mean ROC (area = %0.2f) for diseases %d' %
                 (mean_auc, i),
                 lw=2)
        #plt.legend(loc="lower right")

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC and AUC for 3 diseases')
    plt.show()


if __name__ == '__main__':
    nb_clf = clf.Classifier('NB')
    tree_clf = clf.Classifier('Tree')

    nb_y_hat = cro_vld(nb_clf, 10)
    tree_y_hat = cro_vld(tree_clf, 10)

    plot_auc(nb_clf.y, nb_y_hat)
    plot_auc(tree_clf.y, tree_y_hat)
Exemplo n.º 29
0
        print('--------------------------')
        X_train_df = X_df.iloc[train_is].copy()
        y_train_df = y_df.iloc[train_is].copy()
        X_test_df = X_df.iloc[test_is].copy()
        y_test_df = y_df.iloc[test_is].copy()
        y_train_clf = y_train_df['molecule'].values
        y_train_reg = y_train_df['concentration'].values
        y_test_clf = y_test_df['molecule'].values
        y_test_reg = y_test_df['concentration'].values

        fe_clf = feature_extractor_clf.FeatureExtractorClf()
        fe_clf.fit(X_train_df, y_train_clf)
        X_train_array_clf = fe_clf.transform(X_train_df)
        X_test_array_clf = fe_clf.transform(X_test_df)

        clf = classifier.Classifier()
        clf.fit(X_train_array_clf, y_train_clf)
        y_proba_clf = clf.predict_proba(X_test_array_clf)
        y_pred_clf = labels[np.argmax(y_proba_clf, axis=1)]
        error = 1 - accuracy_score(y_test_clf, y_pred_clf)
        print('error = %s' % error)

        fe_reg = feature_extractor_reg.FeatureExtractorReg()
        for i, label in enumerate(labels):
            X_train_df.loc[:, label] = (y_train_df['molecule'] == label)
            X_test_df.loc[:, label] = y_proba_clf[:, i]
        fe_reg.fit(X_train_df, y_train_reg)
        X_train_array_reg = fe_reg.transform(X_train_df)
        X_test_array_reg = fe_reg.transform(X_test_df)

        reg = regressor.Regressor()
Exemplo n.º 30
0
def callback():
    # Auth Step 4: Requests refresh and access tokens
    access_token = request.args['access_token']

    # Auth Step 6: Use the access token to access Spotify API
    authorization_header = {"Authorization": "Bearer {}".format(access_token)}
    params = {"limit": "50"}

    # Get profile data
    user_profile_api_endpoint = "{}/me/top/artists".format(SPOTIFY_API_URL)
    profile_response = requests.get(user_profile_api_endpoint,
                                    params=params,
                                    headers=authorization_header)
    profile_data = json.loads(profile_response.text)

    print(profile_data)

    # Combine profile and playlist data to display
    display_arr = [profile_data]

    list_of_genres = [a['genres'] for a in profile_data['items']]

    BB = []
    CC = []

    for ll in list_of_genres:
        LL = list(set(ll))
        for kk in LL:
            BB.append(kk.split())
        CC.append(BB)
        BB = []

    flat_genres = [
        item2 for sublist in CC for item in sublist for item2 in item
    ]

    numOfGenres = len(genres)

    mean_genre = [0] * numOfGenres
    for genre in flat_genres:
        genre = clean_genre(genre)
        try:
            idx = genres.index(genre)
        except ValueError:
            continue
        mean_genre[idx] += 1

    normalizer = sum(mean_genre)
    if normalizer == 0:
        normalizer = 1
    for i in range(0, len(mean_genre)):
        mean_genre[i] = mean_genre[i] / normalizer
    clf = classifier.Classifier(genres, k)
    maxScores, pred = clf.classify(mean_genre, X, Y)

    result = []

    for j in range(0, 20):
        for i in range(0, k):
            for track in Y[1][pred[1][0][i]]:
                if track['id'] == maxScores[j][0]:
                    #print(track['permalink_url'])
                    #print(maxScores[j][1])
                    #print(track['id'])
                    #print(track['genre'])
                    result.append(track['stream_url'])
                    break

    return {"data": result}