Exemplo n.º 1
0
def case_worksheet_10B():
    # Code for this system: https://rsmopt.com/system/concrete-strength/

    # C: cement = 1.8 and 4.2 kg)
    # W: amount of water (between 0.4 and 1.1 L)

    c1 = c(2.5,
           3,
           2.5,
           3,
           center=2.75,
           range=[2.5, 3],
           name="cement",
           units='kg')
    w1 = c(0.5,
           0.5,
           0.9,
           0.9,
           center=0.7,
           range=[0.5, 0.9],
           name='Throughput',
           units='parts/hour')
    C1 = c1.to_coded()
    W1 = w1.to_coded()
    y1 = c(14476, 14598, 14616, 14465, name="Strength", units="-")
    expt1 = gather(C=C1, W=W1, y=y1, title="First experiment")

    mod_base1 = lm("y ~ C * W", data=expt1)
    summary(mod_base1)
    contour_plot(mod_base1, "C", "W")

    # Predict the points, using the model:
    prediction_1 = predict(mod_base1, C=C1, W=W1)
    print(prediction_1)
    print(y1 - prediction_1)

    # Very nonlinear: saddle: up left, or bottom right
    # Bottom right: (C, W) = (2, -2)
    C2 = C1.extend([2])
    W2 = W1.extend([-2])

    # Predict at this point: 14794
    predict(mod_base1, C=C2, W=W2)
    c2 = C2.to_realworld()
    w2 = W2.to_realworld()

    # Actual: at c=3.25; w=0.4 (constraint): 14362. So wrong direction
    y1 = c(14476, 14598, 14616, 14465, name="Strength", units="-")
    expt1 = gather(C=C1, W=W1, y=y1, title="First experiment")

    # Try the other way: C, W= -2, 2
    C2 = C1.extend([-2])
    W2 = W1.extend([+2])

    # Predict at this point: 14830
    predict(mod_base1, C=C2, W=W2)
    c2 = C2.to_realworld()  # 2.25
    w2 = W2.to_realworld()  # 1.1
Exemplo n.º 2
0
def main():
    # generate data from physical model
    T = 16  # time interval
    dt = 0.1  # integration step
    # train data (oscillation with initial angle = pi/6):
    t, train = phys.simulate_pendulum(0, np.array([np.pi / 6, 0]), T, dt=dt)
    # test data (oscillation with initial angle = pi/4):
    _, test1 = phys.simulate_pendulum(0, np.array([np.pi / 4, 0]), T, dt=dt)
    # test data (fixed point with angle = 0):
    _, test2 = phys.simulate_pendulum(0, np.array([0.0, 0.0]), T, dt=dt)

    N = 5  # size of the history window (for LSTM)

    # train lstm model
    # model_lstm = models.train_lstm(train, N)
    # model_lstm.save('bin_models/pendulum_lstm')
    # or load pre-built one
    model_lstm = load_model('bin_models/pendulum_lstm')

    # train linear model
    # model_linear = models.train_linear(train, N)
    # model_linear.save('bin_models/pendulum_linear')
    # or load pre-built one
    model_linear = load_model('bin_models/pendulum_linear')

    for model, n, title in zip([model_lstm, model_linear], [N, 1], [
            "LSTM: non-physical predictions, training data are memorized",
            "Linear map: physical behaviour generalization"
    ]):
        # use model for prediction with training initial angle = pi/6
        pred = models.predict(model,
                              train[:n],
                              step_count=int(T / dt - n),
                              N=n)
        # use model for prediction with initial angle = pi/4
        pred1 = models.predict(model,
                               test1[:n],
                               step_count=int(T / dt - n),
                               N=n)
        # use lstm for prediction with initial angle = 0
        pred2 = models.predict(model,
                               test2[:n],
                               step_count=int(T / dt - n),
                               N=n)

        plot(title, train, test1, test2, pred, pred1, pred2)

    plt.show()
    return 0
Exemplo n.º 3
0
    def get(self):

        # GET PARAMS
        args = parser.parse_args()

        ## General
        user_query = args['content']
        user_model = args['model']
        user_lang = args['lang']

        if (args['content']):

            print('[INFO REQUEST] Received Parameters')
            print('[INFO REQUEST - content ] ', user_query)
            print('[INFO REQUEST - Model ] ', user_model)
            print('[INFO REQUEST - Lang ] ', user_lang)

            application.logger.info('[LOG] Query: ', user_query)

            # PREDICT

            result_prediction = predict(user_query, user_lang, user_model)

            print(result_prediction)

            return result_prediction
        else:
            return "message : Missing Parameters"
Exemplo n.º 4
0
def get_file(name):
    times, _ = load_data('var_tec_reshape.npz')
    model = load_model('models/%s' % name)
    x = timestamp_to_features(times)
    p = predict(x, model)

    np.savez('data/'+name, times=times, var_tec_maps=p)
Exemplo n.º 5
0
def run(task):
    """Run the train/predict flow for `task`."""
    st.markdown(f'<h1 align="center">{task}</h1>', unsafe_allow_html=True)
    train_button = st.sidebar.button("Train")
    sidebar_train_message = st.sidebar.empty()
    main_train_message = st.empty()
    slug = slugify(task)
    trained = is_trained(slug)
    if trained:
        sidebar_train_message.success(TRAINED_MODEL_MESSAGE)
    else:
        sidebar_train_message.warning(NO_TRAINED_MODEL_MESSAGE)
        main_train_message.warning(NO_TRAINED_MODEL_MESSAGE)
    if train_button:
        sidebar_train_message.info(TRAINING_MESSAGE)
        main_train_message.info(TRAINING_MESSAGE)
        train(slug, sidebar_train_message, TRAINING_MESSAGE)
        sidebar_train_message.success(TRAINED_MODEL_MESSAGE)
        main_train_message.empty()
        trained = is_trained(slug)
    if trained:
        show_metrics = st.sidebar.checkbox("Show metrics")
        if show_metrics:
            metrics = get_metrics(slug)
            for key, value in metrics.items():
                st.sidebar.text(f"{key}: {value}")
        user_input = st.text_area("Input")
        st.text("Output")
        if user_input:
            output = predict(slug, user_input)
            display_function = DISPLAY_FUNCTIONS[slug]
            display_function(output)
Exemplo n.º 6
0
def hello_world():
    form = InputForm()
    res = {}
    if form.validate_on_submit():
        input_text = form.input_text.data
        res = predict(input_text)
    return render_template('index.html', form=form, res=res)
Exemplo n.º 7
0
def case_w2():
    """
    Teaching case week 2: https://yint.org/w2
    """
    # T = time used for baking:
    #      (-1) corresponds to 80 minutes and (+1) corresponds to 100 minutes
    T = c(-1, +1, -1, +1, lo=80, hi=100)

    # F = quantity of fat used:
    #      (-1) corresponds to 20 g and (+1) corresponds to 30 grams
    F = c(-1, -1, +1, +1, lo=20, hi=30)

    # Response y is the crispiness
    y = c(37, 57, 49, 53, units='crispiness')

    # Fit a linear model
    expt = gather(T=T, F=F, y=y)
    model_crispy = lm("y ~ T + F + T*F", expt)
    summary(model_crispy)

    # See how the two factors affect the response:
    contour_plot(model_crispy)
    #interaction_plot(T, F, y)
    #interaction_plot(F, T, y)

    # Make a prediction with this model:
    xT = +2  # corresponds to 110 minutes
    xF = -1  # corresponds to 20 grams of fat
    y_hat = predict(model_crispy, T=xT, F=xF)
    print(f'Predicted value is: {y_hat} crispiness.')
Exemplo n.º 8
0
 def nlp(self):
     # keyword extraction wrapper
     if not self.is_downloaded or not self.is_parsed:
         raise Exception('You should download and parse first!')
     tem = transform(self.text, self.mapping)
     result = predict(self.model, tem)
     self.set_category(result[0])
Exemplo n.º 9
0
def classify():
    results = None
    form = InputForm()
    if form.validate_on_submit():
        content = form.content.data
        results = predict(config, content)
        results = json.dumps(results)
        print(results)
    return render_template('classify.html', form=form, results=results)
Exemplo n.º 10
0
def main():
    # generate data from physical model
    T = 10 # time interval
    dt = 0.1 # integration step
    # train data:
    t, train = phys.simulate_epidemiology(0, np.array([0.99, 0.01, 0]), T, dt=dt)
    # test data 1st:
    _, test1 = phys.simulate_epidemiology(0, np.array([0.4, 0.1, 0]), T, dt=dt)
    # test data 2nd:
    _, test2 = phys.simulate_epidemiology(0, np.array([1, 0.0, 0]), T, dt=dt)


    N = 5 # size of the history window (for LSTM)

    # train lstm model
    model_lstm = models.train_lstm(train, N)
    model_lstm.save('bin_models/epidemiology_lstm')
    # or load pre-built one
    model_lstm = load_model('bin_models/epidemiology_lstm')


    # for training a polynomial neural network (matrix Lie transform) follow link
    # https://github.com/andiva/DeepLieNet/blob/master/demo/SIR_Identification.ipynb
    # load pre-built 3rd order Lie transform:
    model_linear =  SIR_Lie_Transform()


    for model, n, title in zip([model_lstm, model_linear],
                               [N, 1],
                               ["LSTM: non-physical predictions, training data are memorized",
                                "Linear map: physical behaviour generalization"]):
        # use model for prediction with training initial conditions
        pred = models.predict(model, train[:n], step_count=int(T/dt-n), N=n)
        # use model for prediction with initial condition of test1
        pred1 = models.predict(model, test1[:n], step_count=int(T/dt-n), N=n)
        # use lstm for prediction with initial condition of test2
        pred2 = models.predict(model, test2[:n], step_count=int(T/dt-n), N=n)

        plot(title, train, test1, test2, pred, pred1, pred2)
    
    plt.show()
    return 0
Exemplo n.º 11
0
def competition_run():
    data = GalaxyData()

    (training_features, training_solutions) = data.get_training_data()
    (test_features, _) = data.get_test_data()

    # Predict
    (clf, columns) = models.default_model(training_features, training_solutions, 5)
    predicted_solutions = models.predict(clf, test_features, columns)

    data.save_solution(predicted_solutions)
    def stream_train(train_dataset, stream_dataset):
        logger.info('---------------- stream train ----------------')

        logger.info('---------------- initial train ----------------')
        novelty_detector = train(trainset)
        logger.info('---------------- initial test ----------------')
        test(stream_dataset, novelty_detector)

        novelty_dataset = dataset.NoveltyDataset(train_dataset)
        iter_streamloader = enumerate(
            DataLoader(dataset=stream_dataset, batch_size=1, shuffle=True))
        buffer = []

        for i, (feature, label) in iter_streamloader:
            sample = (feature.squeeze(dim=0), label.squeeze(dim=0))
            with torch.no_grad():
                net.eval()
                feature, label = feature.to(net.device), label.item()
                feature, out = net(feature)
                predicted_label, distance = models.predict(feature, prototypes)
                prob = models.probability(feature,
                                          predicted_label,
                                          prototypes,
                                          gamma=config.gamma)
                detected_novelty = novelty_detector(predicted_label, distance)
                real_novelty = label not in novelty_detector.known_labels

            if detected_novelty:
                buffer.append(sample)

            logger.debug("[stream %5d]: %d, %d, %7.4f, %7.4f, %5s, %5s, %4d",
                         i + 1, label, predicted_label, prob, distance,
                         real_novelty, detected_novelty, len(buffer))

            if len(buffer) == 1000:
                logger.info("novelty dataset size before extending: %d",
                            len(novelty_dataset))

                # todo try different sample methods by YW.
                # novelty_dataset.extend(buffer, config.novelty_buffer_sample_rate)

                novelty_dataset.extend_by_select(
                    buffer, config.novelty_buffer_sample_rate, prototypes, net,
                    soft, use_log)

                logger.info("novelty dataset size after extending: %d",
                            len(novelty_dataset))
                logger.info(
                    '---------------- incremental train ----------------')
                novelty_detector = train(novelty_dataset)
                buffer.clear()

        return novelty_detector
Exemplo n.º 13
0
def index():
    if not request.is_json:
        return create_json(203, "Format is not a JSON. Check headers.")

    test = request.json
    missing = []

    for value in values_list:
        if value not in test.keys():
            missing.append(value)

    if len(missing) > 0:
        return create_json(204, "Missing values in request",
                           {"values": missing})

    from models import predict
    from keras.models import model_from_json
    # load json and create model
    json_file = open('model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    # load weights into new model
    model.load_weights("model.h5")

    try:
        req_data = request.get_json(force=True)
        id, summary, host, lat, lng, prop_type, room, accom, baths, bedrooms,\
        beds, dep, fee, extra, mini, cancel = \
        req_data['id'], req_data['summary'], req_data['host_is_superhost'], \
        req_data['latitude'], req_data['longitude'], req_data[
            'property_type'], \
        req_data['room_type'], req_data['accomodates'], req_data['bathrooms'], \
        req_data['bedrooms'], req_data['beds'], req_data['security_deposit'], \
        req_data['cleaning_fee'], req_data['extra_people'], req_data[
            'minimum_nights'], req_data['cancellation_policy']
    except Exception as e:
        return create_json(400, e)
    else:
        try:
            result = predict(id, summary, host, lat, lng, prop_type, room,
                             accom, baths, bedrooms, beds, dep, fee, extra,
                             mini, cancel, model)

            req_data['prediction'] = result[1]
            f = open("predict.log", "a")
            f.write(json.dumps(req_data) + "\n")
            f.close()
            t = {"listing_id": result[0], "listing_prediction": result[1]}
            return create_json(200, "Listing Updated", t)
        except Exception as e:
            return "{}".format(e)
    shutdown_server()
Exemplo n.º 14
0
def index():
    if(request.method == 'GET'):
        return render_template('index.html', data = '')
    elif(request.method == 'POST'):
        data = [0]*13
        for k, v in request.form.items():
            if('v' in k):
                i = int(k[1:].strip()) - 1
                data[i] = int(v) if v else 0
        data = models.predict(data)[0]
        return render_template('index.html', data = data)
    else:
        return request.url, 404
Exemplo n.º 15
0
def login_page():
    error = ''
    try:
        c, conn = connection()
        if request.method == "POST":

            data = c.execute(
                "SELECT * FROM users WHERE username = '******'".format(
                    thwart(request.form['username'])))

            data = c.fetchone()[2]

            kd = pd.read_csv("/var/www/FlaskApp/data.csv")
            subjects = kd["subject"].unique()
            subject = subjects[2]
            vector = kd.loc[kd.subject == subject,
                            "H.period":"H.Return"].iloc[34].values
            pv = prepare_data(kd, subject)
            d = model(pv[0],
                      pv[2],
                      pv[1],
                      pv[3],
                      num_iterations=4000,
                      learning_rate=0.05,
                      print_cost=False)
            arr = np.array([vector, vector])
            lr_res = predict(d['w'], d['b'], arr.transpose())
            sc = StatisticClassifier(kd, 0.95)
            sc_res = sc.singleClassification(kd, vector)

            if (sha256_crypt.verify(request.form['password'], data)) and (
                (lr_res[0, 0] + sc_res) / 2 > 0.5):
                session['logged_in'] = True
                session['username'] = request.form['username']

                flash("You are now logged in")
                return redirect(url_for("reg"))

            else:
                error = "Invalid credentials, try again."

        gc.collect()

        return render_template("login.html", error=error)

    except Exception as e:
        flash(e)
        error = "Invalid credentials, try again."
        return render_template("login.html", error=error)
Exemplo n.º 16
0
def prediction(frame, answer, name):
    x = name.get()
    name.delete(0, "end")

    if len(x) == 0 or x == "None":
        answer.config(text="please fill every field", fg="red")
    else:
        out = predict(x)
        # out = x

        if out == 1:
            answer.config(text="ABUSIVE", fg="red")

        else:
            answer.config(text="NON ABUSIVE", fg="green")
Exemplo n.º 17
0
def pred_edges(df, pkl, baseline='n'):
    if baseline == 'y':
        print('Performing baseline segmentaiton...')
        df['edges'] = 0
        return

    # X for prediction and add to df under edges column
    feature_cols = df.columns[~df.columns.isin(exclude_cols)]
    X = df[feature_cols]
    # load_model
    obj = load_model(pkl)
    # predict
    grid = obj['grid']
    y_pred = predict(X, grid)
    df['edges'] = y_pred
    return
Exemplo n.º 18
0
    def test(test_dataset, novelty_detector):
        logger.info('---------------- test ----------------')
        dataloader = DataLoader(dataset=test_dataset,
                                batch_size=1,
                                shuffle=False)

        logger.info("known labels: %s", novelty_detector.known_labels)
        logger.info("distance average: %s", novelty_detector.average_distances)
        logger.info("distance std: %s", novelty_detector.std_distances)
        logger.info("detector threshold: %s", novelty_detector.thresholds)

        detection_results = []

        with torch.no_grad():
            net.eval()
            for i, (feature, label) in enumerate(dataloader):
                feature, label = feature.to(net.device), label.item()
                feature, out = net(feature)
                predicted_label, distance = models.predict(feature, prototypes)
                prob = models.probability(feature,
                                          predicted_label,
                                          prototypes,
                                          gamma=config.gamma)
                detected_novelty = novelty_detector(predicted_label, distance)
                real_novelty = label not in novelty_detector.known_labels

                detection_results.append(
                    (label, predicted_label, real_novelty, detected_novelty))

                logger.debug("[test %5d]: %d, %d, %7.4f, %7.4f, %5s, %5s",
                             i + 1, label, predicted_label, prob, distance,
                             real_novelty, detected_novelty)

        tp, fp, fn, tn, cm, acc, acc_all = novelty_detector.evaluate(
            detection_results)
        precision = tp / (tp + fp + 1)
        recall = tp / (tp + fn + 1)

        logger.info("accuracy of known labels: %.4f", acc)
        logger.info("accuracy of all labels: %.4f", acc_all)
        logger.info("true positive: %d", tp)
        logger.info("false positive: %d", fp)
        logger.info("false negative: %d", fn)
        logger.info("true negative: %d", tn)
        logger.info("precision: %7.4f", precision)
        logger.info("recall: %7.4f", recall)
        logger.info("confusion matrix: \n%s", cm)
Exemplo n.º 19
0
def execute_trade(n_intervals, buy_sell_data, entry_exit_df, model):
    if entry_exit_df:
        entry_exit_df = pd.DataFrame.from_dict(entry_exit_df)
    is_sma = (model == 'SMA10')
    entry_exit_df = crypto_stream.generate_signals(
        crypto_stream.get_data_from_table())
    if not is_sma and len(entry_exit_df) > 20:
        entry_exit_df = models.predict(entry_exit_df, model, 20)
    if len(entry_exit_df) < 10:
        raise PreventUpdate
    else:
        account = buy_sell_data[-1]
        account = crypto_stream.execute_trade_strategy(entry_exit_df, account)
        print(account)
        if account:
            buy_sell_data.append(account)
    return buy_sell_data, entry_exit_df.to_dict('series'), get_trade_fig(
        entry_exit_df), get_sma_fig(entry_exit_df)
Exemplo n.º 20
0
def inference(params, dataloaders, use_tqdm=True):
    device = utils.get_device()
    
    # mc dropout
    predict = load_trained_model(params, device)
    ymc_hats, eta_1s = mc_dropout(params, predict, dataloaders['test'], device)
    
    # inherent noise
    predict.apply(dropout_off)
    for x,y in dataloaders['valid']:
        x,y = x.to(device), y.to(device)
        break
    eta_2sq = np.mean(cpu(predict((x, y[:,0,1:])))[:,0])
    
    # total noise
    etas = np.sqrt(eta_1s + eta_2sq)
    
    return ymc_hats, etas
Exemplo n.º 21
0
def mc_dropout(params, predict, dataloader, device, use_tqdm=True):
    predict = predict.apply(dropout_on)
    
    pbar = range(params['inference']['B'])
    if use_tqdm:
        from tqdm import tqdm
        pbar = tqdm(pbar)

    y_hats = []
    for b in pbar:
        for x,y in dataloader:
            x,y = x.to(device), y.to(device)
            break
        y_hat_b = predict((x, y[:,0,1:]))
        y_hats.append(cpu(y_hat_b))
    
    ymc_hats = np.mean(y_hats, axis=0)
    eta_1s   = np.mean((ymc_hats[:,0] - np.stack(y_hats)[:,:,0])**2, axis=0)
    return ymc_hats, eta_1s
Exemplo n.º 22
0
def run_training_test(model, verbose=0):
    """Entry Point to run models

    Args:
        model: model function to run.
    """
    # Load the data and split into training and validation sets
    data = GalaxyData(feature_extraction.raw_9, scale_features=False)

    (test_features, test_solutions) = data.get_test_data()
    (training_features, training_solutions) = data.get_training_data()

    # Train and Predict Model
    (clf, columns) = model(training_features, training_solutions, verbose)
    predicted_solutions = models.predict(clf, test_features, columns)

    # Evaluate Predictions
    score = evaluate.get_rmse(test_solutions, predicted_solutions)
    print(score)
Exemplo n.º 23
0
def run(model, verbose=0):
    """Entry Point to run models

    Args:
        model: model function to run.
    """
    # Load the data and split into training and validation sets
    data = GalaxyData(feature_extraction.hog_features, scale_features=False)

    (training_features, training_solutions,
     validation_features, validation_solutions) = data.split_training_and_validation_data(50)

    # Train and Predict Model
    (clf, columns) = model(training_features, training_solutions, verbose)
    predicted_validation_solutions = models.predict(clf, validation_features, columns)

    # Evaluate Predictions
    valid_rmse = evaluate.get_errors_clf(clf, validation_features, validation_solutions)
    train_rmse = evaluate.get_errors_clf(clf, training_features, training_solutions)
    print " Validation RMSE: ", valid_rmse
    print " Training RMSE: ", train_rmse
Exemplo n.º 24
0
def _predict(model_type, params):
    """ Prediction on the test set """

    _check_key(params, batch_size_k)
    batch_size = int(params[batch_size_k])

    # Predictions of a neural network model
    if model_type == NN_k:
        # Checking that specific attributes for NN are specified
        _check_key(params, last_layer_width_k)
        _check_key(params, depth_k)
        _check_key(params, hidden_act_k)
        _check_key(params, outlayer_act_k)

        last_layer_width = int(params[last_layer_width_k])
        depth = int(params[depth_k])
        hidden_act = params[hidden_act_k]
        outlayer_act = params[outlayer_act_k]

        return model_nn.predict(paths.model_loc, paths.test_prepared_input_loc, paths.test_labels_loc, batch_size,
                                last_layer_width, depth, hidden_act, outlayer_act)

    elif model_type == SVM_k or model_type == kernel_ridge_k or model_type == ridge_k:
        return models.predict(paths.model_loc, paths.test_prepared_input_loc, paths.test_labels_loc, batch_size)
def train(sess, model, optimizer, log_dir, batch_size, num_sweeps_per_summary,
          num_sweeps_per_save, train_input_seqs, train_reset_seqs,
          train_label_seqs, test_input_seqs, test_reset_seqs, test_label_seqs):
    """ Train a model and export summaries.

    `log_dir` will be *replaced* if it already exists, so it certainly
    shouldn't be anything generic like `/home/user`.

    Args:
        sess: A TensorFlow `Session`.
        model: An `LSTMModel`.
        optimizer: An `Optimizer`.
        log_dir: A string. The full path to the log directory.
        batch_size: An integer. The number of sequences in a batch.
        num_sweeps_per_summary: An integer. The number of sweeps between
            summaries.
        num_sweeps_per_save: An integer. The number of sweeps between saves.
        train_input_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, input_size]`.
        train_reset_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, 1]`.
        train_label_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, 1]`.
        test_input_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, input_size]`.
        test_reset_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, 1]`.
        test_label_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, 1]`.
    """

    ema = tf.train.ExponentialMovingAverage(decay=0.5)
    update_train_loss_ema = ema.apply([model.loss])
    train_loss_ema = ema.average(model.loss)
    tf.scalar_summary('train_loss_ema', train_loss_ema)

    train_accuracy = tf.placeholder(tf.float32, name='train_accuracy')
    train_edit_dist = tf.placeholder(tf.float32, name='train_edit_dist')
    test_accuracy = tf.placeholder(tf.float32, name='test_accuracy')
    test_edit_dist = tf.placeholder(tf.float32, name='test_edit_dist')
    values = [train_accuracy, train_edit_dist, test_accuracy, test_edit_dist]
    tags = [value.op.name for value in values]
    tf.scalar_summary('learning_rate', optimizer.learning_rate)
    tf.scalar_summary(tags, tf.pack(values))

    summary_op = tf.merge_all_summaries()

    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)
    summary_writer = tf.train.SummaryWriter(logdir=log_dir, graph=sess.graph)
    saver = tf.train.Saver()

    sess.run(tf.initialize_all_variables())

    num_sweeps_visited = 0
    start_time = time.time()
    train_gen = data.sweep_generator(
        [train_input_seqs, train_reset_seqs, train_label_seqs],
        batch_size=batch_size, shuffle=True, num_sweeps=None)
    while num_sweeps_visited <= optimizer.num_train_sweeps:

        if num_sweeps_visited % num_sweeps_per_summary == 0:

            train_prediction_seqs = models.predict(
                sess, model, train_input_seqs, train_reset_seqs)
            train_accuracy_, train_edit_dist_ = metrics.compute_metrics(
                train_prediction_seqs, train_label_seqs)
            test_prediction_seqs = models.predict(
                sess, model, test_input_seqs, test_reset_seqs)
            test_accuracy_, test_edit_dist_ = metrics.compute_metrics(
                test_prediction_seqs, test_label_seqs)
            summary = sess.run(summary_op,
                               feed_dict={train_accuracy: train_accuracy_,
                                          train_edit_dist: train_edit_dist_,
                                          test_accuracy: test_accuracy_,
                                          test_edit_dist: test_edit_dist_})
            summary_writer.add_summary(summary, global_step=num_sweeps_visited)

            status_path = os.path.join(log_dir, 'status.txt')
            with open(status_path, 'w') as f:
                line = '%05.1f      ' % ((time.time() - start_time)/60)
                line += '%04d      ' % num_sweeps_visited
                line += '%.6f  %08.3f     ' % (train_accuracy_,
                                               train_edit_dist_)
                line += '%.6f  %08.3f     ' % (test_accuracy_,
                                               test_edit_dist_)
                print(line, file=f)

            label_path = os.path.join(log_dir, 'test_label_seqs.pkl')
            with open(label_path, 'w') as f:
                cPickle.dump(test_label_seqs, f)

            pred_path = os.path.join(log_dir, 'test_prediction_seqs.pkl')
            with open(pred_path, 'w') as f:
                cPickle.dump(test_prediction_seqs, f)

            vis_filename = 'test_visualizations_%06d.png' % num_sweeps_visited
            vis_path = os.path.join(log_dir, vis_filename)
            fig, axes = data.visualize_predictions(test_prediction_seqs,
                                                   test_label_seqs,
                                                   model.target_size)
            axes[0].set_title(line)
            plt.tight_layout()
            plt.savefig(vis_path)
            plt.close(fig)

        if num_sweeps_visited % num_sweeps_per_save == 0:
            saver.save(sess, os.path.join(log_dir, 'model.ckpt'))

        train_inputs, train_resets, train_labels = train_gen.next()
        # We squeeze here because otherwise the targets would have shape
        # [batch_size, duration, 1, num_classes].
        train_targets = data.one_hot(train_labels, model.target_size)
        train_targets = train_targets.squeeze(axis=2)

        _, _, num_sweeps_visited = sess.run(
            [optimizer.optimize_op,
             update_train_loss_ema,
             optimizer.num_sweeps_visited],
            feed_dict={model.inputs: train_inputs,
                       model.resets: train_resets,
                       model.targets: train_targets,
                       model.training: True})
Exemplo n.º 26
0
MODEL_PATH = './model/model'

X_TRAIN_PATH = sys.argv[1]
Y_TRAIN_PATH = sys.argv[2]
X_TEST_PATH = sys.argv[3]
ANS_PATH = sys.argv[4]

x = utils.load_data(X_TRAIN_PATH)
y = utils.load_data(Y_TRAIN_PATH).flatten()
x_test = utils.load_data(X_TEST_PATH)

x, max, min = utils.rescaling(x)
x_test = utils.scaling(x_test, max, min)

b, w = models.logistic_regression(x,
                                  y,
                                  lr=1,
                                  epoch=10000,
                                  validation_rate=0.1,
                                  optimizer='adagrad',
                                  early_stopping=True,
                                  patience=10)

y_pred = models.predict(x_test, b, w)

# print(y_pred)

# utils.save_ans(y_pred)
utils.save_ans_dir(y_pred, ANS_PATH)
# utils.save_model(b, w, MODEL_PATH)
# utils.save_scaler(max, min, SCALER_PATH)
Exemplo n.º 27
0
def case_worksheet_10():

    # Price: 0 # 0.25 above and 0.25 $/part below
    p = c(0.75,
          0.75,
          0.65,
          0.85,
          0.65,
          0.85,
          center=0.75,
          range=[0.65, 0.85],
          name="Price",
          units='$/part')
    t = c(325,
          325,
          250,
          250,
          400,
          400,
          center=325,
          range=[250, 400],
          name='Throughput',
          units='parts/hour')
    P1 = p.to_coded()
    T1 = t.to_coded()
    y1 = c(7740,
           7755,
           5651,
           5812,
           7363,
           7397,
           name="Response: profit per hour",
           units="$/hour")
    expt1 = gather(P=P1, T=T1, y=y1, title="First experiment")

    mod_base1 = lm("y ~ P * T", data=expt1)
    summary(mod_base1)
    contour_plot(mod_base1, "P", "T", show=False)

    # Predict the points, using the model:
    prediction_1 = predict(mod_base1, P=P1, T=T1)
    print(prediction_1)
    print(y1 - prediction_1)

    # We see clear non-linearity, especially when viewed in the direction of T

    # Try anyway to make a prediction, to verify it
    # P ~ 0.15 and T ~ 2.0:
    P2 = P1.extend([0.15])
    T2 = T1.extend([2.0])
    p2 = P2.to_realworld()
    t2 = T2.to_realworld()
    print(t2)  # 0.765
    print(p2)  # 475
    print(predict(mod_base1, P=P2, T=T2))

    # Should have a predicted profit of 8599, but actual is 4654.
    # Confirms our model is in a very nonlinear region in the T=Throughput
    # direction.

    # Perhaps our factorial was far too big. Make the range smaller in T.
    # Prior range = [250;400]; now try [287.5; ]

    # Second factorial: re-use some of the points
    # * Original center point become bottom left
    # * Original (+1, +1) become top right
    p3 = c(0.75,
           0.85,
           0.75,
           0.85,
           0.65,
           0.85,
           0.765,
           center=0.80,
           range=[0.75, 0.85],
           name="Price",
           units='$/part')
    t3 = c(325,
           325,
           400,
           400,
           400,
           250,
           475,
           center=(325 + 400) / 2,
           range=(325, 400),
           name='Throughput',
           units='parts/hour')

    # 2nd,
    y3 = c(7755,
           7784,
           7373,
           7397,
           7363,
           5812,
           4654,
           name="Response: profit per hour",
           units="$/hour")
    P3 = p3.to_coded()
    T3 = t3.to_coded()
    expt3 = gather(P=P3, T=T3, y=y3, title="Smaller ranges")
    mod_base3 = lm("y ~ P * T", data=expt3)
    summary(mod_base3)
    contour_plot(mod_base3, "P", "T")

    # Predict directly from least squares model, the next experiment
    # at coded values of (+2, +2) seems good
    predict(mod_base3, P=+2, T=+2)
    # Prediction is 7855

    # In RW units that corresponds to: p=0.9 and t=437.5 = 438 parts/hour
    P4 = P3.extend([+2])
    T4 = T3.extend([+2])
    print(P4.to_realworld())
    print(T4.to_realworld())

    # ACTUAL value achieved is 6325. Not a good prediction yet either.
    # Add this point to the model. This point is below any of the base factorial
    # points!
    y4 = y3.extend([6325])
    expt4 = gather(P=P4, T=T4, y=y4, title="Adding the next exploration")
    mod_base4 = lm("y ~ P * T", data=expt4)
    contour_plot(mod_base4, "P", "T")

    # It is clear that this model does not meet our needs. We need a model with
    # quadratic fitting, nonlinear terms, to estimate the nonlinear surface.
    expt5 = expt4.copy()
    mod_base5 = lm("y ~ P*T + I(P**2) + I(T**2)", data=expt5)
    print(summary(mod_base5))

    # add the xlim input in a second round
    contour_plot(mod_base5, "P", "T", xlim=(-2, 4))

    # Run at (P=3, T=-0.3) for the next run
    P6 = P4.extend([+3])
    T6 = T4.extend([-0.3])
    print(P6.to_realworld())
    print(T6.to_realworld())

    # Corresponds to p = 0.95 $/part, t=351 parts/hour
    # Predict = 7939
    # Actual = 7969. Really good matching.
    # UPdate the model and check
    y6 = y4.extend([7969])
    expt6 = gather(P=P6,
                   T=T6,
                   y=y6,
                   title="After extrapolation, based on quadratic term")
    mod_base6 = lm("y ~ P*T + I(P**2) + I(T**2)", data=expt6)
    contour_plot(mod_base6, "P", "T", xlim=(-2, 5))

    # Extrapolate again to (P=5, T=-0.3) for the next run
    P7 = P6.extend([+5])
    T7 = T6.extend([-0.3])
    print(P7.to_realworld())
    print(T7.to_realworld())
    predict(mod_base6, P=5, T=-0.3)

    # to P = 1.05, T=351 parts/hour
    # Predict = 7982
    # Actual = 8018. Better than predicted. Perhaps surface is a steeper quadratic.
    # Update the model and check
    y7 = y6.extend([7982])
    expt7 = gather(P=P7, T=T7, y=y7, title="With 2 extrapolations")
    mod_base7 = lm("y ~ P*T + I(P**2) + I(T**2)", data=expt7)
    contour_plot(mod_base7, "P", "T", xlim=(-2, 148))
Exemplo n.º 28
0
def main():
    logger = setup_logger(filename='log.txt')

    train_epoch_number = 10
    batch_size = 100

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # dataset = np.loadtxt(models.Config.dataset_path, delimiter=',')
    # np.random.shuffle(dataset[:5000])

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # trainset = models.DataSet(dataset[:5000])
    trainset = torchvision.datasets.CIFAR10(root='./data/cifar10',
                                            train=True,
                                            download=False,
                                            transform=transform)
    trainloader = DataLoader(dataset=trainset,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=24)

    # testset = models.DataSet(dataset[5000:])
    testset = torchvision.datasets.CIFAR10(root='./data/cifar10',
                                           train=False,
                                           download=False,
                                           transform=transform)
    testloader = DataLoader(dataset=testset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=24)

    prototypes = {}

    # net = models.CNNNet(device=device)
    net = models.DenseNet(device=device,
                          number_layers=8,
                          growth_rate=12,
                          drop_rate=0.0)
    logger.info("DenseNet Channels: %d", net.channels)

    gcpl = models.GCPLLoss(threshold=models.Config.threshold,
                           gamma=models.Config.gamma,
                           b=models.Config.threshold,
                           tao=1.0,
                           beta=0.5,
                           lambda_=0.001)
    sgd = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    if not os.path.exists("pkl"):
        os.mkdir("pkl")

    if os.path.exists(models.Config.pkl_path):
        state_dict = torch.load(models.Config.pkl_path)
        try:
            net.load_state_dict(state_dict)
            logger.info("Load state from file %s.", models.Config.pkl_path)
        except RuntimeError:
            logger.error("Loading state from file %s failed.",
                         models.Config.pkl_path)

    for epoch in range(train_epoch_number):
        logger.info("Trainset size: %d, Epoch number: %d", len(trainset),
                    epoch + 1)

        running_loss = 0.0

        for i, (features, labels) in enumerate(trainloader):
            features = features.to(net.device)
            sgd.zero_grad()
            features = net(features).view(batch_size, 1, -1)
            loss = gcpl(features, labels, prototypes)
            loss.backward()
            sgd.step()

            running_loss += loss.item() / batch_size

            logger.debug("[%3d, %5d] loss: %7.4f", epoch + 1, i + 1,
                         loss.item() / batch_size)

        torch.save(net.state_dict(), models.Config.pkl_path)

        prototype_count = 0

        for c in prototypes:
            prototype_count += len(prototypes[c])

        logger.info("Prototypes Count: %d", prototype_count)

        # if (epoch + 1) % 5 == 0:
        distance_sum = 0.0
        correct = 0

        for i, (feature, label) in enumerate(testloader):
            feature = net(feature.to(net.device)).view(1, -1)
            predicted_label, probability, min_distance = models.predict(
                feature, prototypes)

            if label == predicted_label:
                correct += 1

            distance_sum += min_distance

            logger.debug(
                "%5d: Label: %d, Prediction: %d, Probability: %7.4f, Distance: %7.4f, Accuracy: %7.4f",
                i + 1, label, predicted_label, probability, min_distance,
                correct / (i + 1))

        logger.info("Distance Average: %7.4f", distance_sum / len(testloader))
        logger.info("Accuracy: %7.4f\n", correct / len(testloader))
Exemplo n.º 29
0
def train(sess, model, optimizer, log_dir, batch_size, num_sweeps_per_summary,
          num_sweeps_per_save, train_input_seqs, train_reset_seqs,
          train_label_seqs, test_input_seqs, test_reset_seqs, test_label_seqs):
    """ Train a model and export summaries.

    `log_dir` will be *replaced* if it already exists, so it certainly
    shouldn't be anything generic like `/home/user`.

    Args:
        sess: A TensorFlow `Session`.
        model: An `LSTMModel`.
        optimizer: An `Optimizer`.
        log_dir: A string. The full path to the log directory.
        batch_size: An integer. The number of sequences in a batch.
        num_sweeps_per_summary: An integer. The number of sweeps between
            summaries.
        num_sweeps_per_save: An integer. The number of sweeps between saves.
        train_input_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, input_size]`.
        train_reset_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, 1]`.
        train_label_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, 1]`.
        test_input_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, input_size]`.
        test_reset_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, 1]`.
        test_label_seqs: A list of 2-D NumPy arrays, each with shape
            `[duration, 1]`.
    """

    ema = tf.train.ExponentialMovingAverage(decay=0.5)
    update_train_loss_ema = ema.apply([model.loss])
    train_loss_ema = ema.average(model.loss)
    tf.summary.scalar('train_loss_ema', train_loss_ema)

    train_accuracy = tf.placeholder(tf.float32, name='train_accuracy')
    train_edit_dist = tf.placeholder(tf.float32, name='train_edit_dist')
    test_accuracy = tf.placeholder(tf.float32, name='test_accuracy')
    test_edit_dist = tf.placeholder(tf.float32, name='test_edit_dist')
    #values = [train_accuracy, train_edit_dist, test_accuracy, test_edit_dist]
    #tags = [value.op.name for value in values]

    tf.summary.scalar('learning_rate', optimizer.learning_rate)
    for value in [
            train_accuracy, train_edit_dist, test_accuracy, test_edit_dist
    ]:
        tf.summary.scalar(value.op.name, value)

    #tf.summary.scalar(tags, tf.stack(values))

    summary_op = tf.summary.merge_all()

    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)
    summary_writer = tf.summary.FileWriter(logdir=log_dir, graph=sess.graph)
    saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())

    num_sweeps_visited = 0
    start_time = time.time()
    train_gen = data.sweep_generator(
        [train_input_seqs, train_reset_seqs, train_label_seqs],
        batch_size=batch_size,
        shuffle=True,
        num_sweeps=None)
    while num_sweeps_visited <= optimizer.num_train_sweeps:

        if num_sweeps_visited % num_sweeps_per_summary == 0:

            train_prediction_seqs = models.predict(sess, model,
                                                   train_input_seqs,
                                                   train_reset_seqs)
            train_accuracy_, train_edit_dist_ = metrics.compute_metrics(
                train_prediction_seqs, train_label_seqs)
            test_prediction_seqs = models.predict(sess, model, test_input_seqs,
                                                  test_reset_seqs)
            test_accuracy_, test_edit_dist_ = metrics.compute_metrics(
                test_prediction_seqs, test_label_seqs)
            summary = sess.run(summary_op,
                               feed_dict={
                                   train_accuracy: train_accuracy_,
                                   train_edit_dist: train_edit_dist_,
                                   test_accuracy: test_accuracy_,
                                   test_edit_dist: test_edit_dist_
                               })
            summary_writer.add_summary(summary, global_step=num_sweeps_visited)

            status_path = os.path.join(log_dir, 'status.txt')
            with open(status_path, 'w') as f:
                line = '%05.1f      ' % ((time.time() - start_time) / 60)
                line += '%04d      ' % num_sweeps_visited
                line += '%.6f  %08.3f     ' % (train_accuracy_,
                                               train_edit_dist_)
                line += '%.6f  %08.3f     ' % (test_accuracy_, test_edit_dist_)
                print(line, file=f)

            label_path = os.path.join(log_dir, 'test_label_seqs.pkl')
            with open(label_path, 'wb') as f:
                cPickle.dump(test_label_seqs, f)

            pred_path = os.path.join(log_dir, 'test_prediction_seqs.pkl')
            with open(pred_path, 'wb') as f:
                cPickle.dump(test_prediction_seqs, f)

            vis_filename = 'test_visualizations_%06d.png' % num_sweeps_visited
            vis_path = os.path.join(log_dir, vis_filename)
            fig, axes = data.visualize_predictions(test_prediction_seqs,
                                                   test_label_seqs,
                                                   model.target_size)
            axes[0].set_title(line)
            plt.tight_layout()
            plt.savefig(vis_path)
            plt.close(fig)

        if num_sweeps_visited % num_sweeps_per_save == 0:
            saver.save(sess, os.path.join(log_dir, 'model.ckpt'))

        train_inputs, train_resets, train_labels = train_gen.__next__()
        # We squeeze here because otherwise the targets would have shape
        # [batch_size, duration, 1, num_classes].
        train_targets = data.one_hot(train_labels, model.target_size)
        train_targets = train_targets.squeeze(axis=2)

        _, _, num_sweeps_visited = sess.run(
            [
                optimizer.optimize_op, update_train_loss_ema,
                optimizer.num_sweeps_visited
            ],
            feed_dict={
                model.inputs: train_inputs,
                model.resets: train_resets,
                model.targets: train_targets,
                model.training: True
            })
Exemplo n.º 30
0
def case_worksheet_10C():
    # Price: 0 # 0.05 above and 0.05 $/part below
    p1 = c(0.75,
           0.75,
           0.7,
           0.8,
           0.7,
           0.80,
           center=0.75,
           range=[0.70, 0.80],
           name="Price",
           units='$/part')
    t1 = c(325,
           325,
           300,
           300,
           350,
           350,
           center=325,
           range=[300, 350],
           name='Throughput',
           units='parts/hour')
    P1 = p1.to_coded()
    T1 = t1.to_coded()
    y1 = c(7082,
           7089,
           6637,
           6686,
           7181,
           7234,
           name="Response: profit per hour",
           units="$/hour")
    expt1 = gather(P=P1, T=T1, y=y1, title="First experiment")

    mod_base1 = lm("y ~ P * T", data=expt1)
    summary(mod_base1)
    contour_plot(mod_base1, "P", "T")

    # Predict the points, using the model:
    prediction_1 = predict(mod_base1, P=P1, T=T1)
    print(prediction_1)
    print(y1 - prediction_1)

    # We see clear non-linearity, especially when viewed in the direction of T

    # Try anyway to make a prediction, to verify it
    # P ~ 0.7 and T ~ 2.0:
    P2 = P1.extend([0.7])
    T2 = T1.extend([2.0])
    p2 = P2.to_realworld()
    t2 = T2.to_realworld()
    print(p2)  # 0.785
    print(t2)  # 375
    print(predict(mod_base1, P=P2, T=T2))

    # Should have a predicted profit of 7550, but actual is 7094.
    # Confirms our model is in a very nonlinear region in the T=Throughput
    # direction.

    # Add axial points, starting in the T direction:
    P3 = P2.extend([0, 0])
    T3 = T2.extend([1.68, -1.68])
    p3 = P3.to_realworld()
    t3 = T3.to_realworld()
    print(p3)  # 0.75, 0.75
    print(t3)  # 367, 283

    # Now build model with quadratic term in the T direction
    y3 = y1.extend([7094, 7174, 6258])
    expt3 = gather(P=P3, T=T3, y=y3, title="With axial points")
    mod_base3 = lm("y ~ P * T + I(T**2)", data=expt3)
    summary(mod_base3)
    contour_plot(mod_base3, "P", "T", xlim=(-1.5, 5))
    #

    #Try extrapolating far out: (P, T) = (4, 1)
    P4 = P3.extend([4])
    T4 = T3.extend([1])
    p4 = P4.to_realworld()
    t4 = T4.to_realworld()
    print(p4)  # 0.95
    print(t4)  # 350

    predict(mod_base3, P=P4, T=T4)  # 7301
    # Actual: 7291  # great! Keep going
    y4 = y3.extend([7291])

    #Try extrapolating far out: (P, T) = (6, 1)
    P5 = P4.extend([6])
    T5 = T4.extend([1])
    p5 = P5.to_realworld()
    t5 = T5.to_realworld()
    print(p5)  # 1.05
    print(t5)  # 350

    predict(mod_base3, P=P5, T=T5)  # 7344
    # Actual: 7324  # great! Keep going
    y5 = y4.extend([7324])

    # Visualize model first
    y5 = y
    expt5 = gather(P=P5, T=T5, y=y5, title="With extrapolated points")
    mod_base5 = lm("y ~ P * T + I(T**2)", data=expt5)
    summary(mod_base5)
    contour_plot(mod_base5, "P", "T", xlim=(-1.5, 18))

    #Try extrapolating further out: (P, T) = (10, 1)
    P6 = P5.extend([10])
    T6 = T5.extend([1])
    p6 = P6.to_realworld()
    t6 = T6.to_realworld()
    print(p6)  # 1.25
    print(t6)  # 350

    predict(mod_base3, P=P6, T=T6)  # 7431
    # Actual: 7378  # Not matching; rebuild the model eventually.
    y6 = y5.extend([7378])
Exemplo n.º 31
0
def predict(store=None, data=None, predict_index=None, **kwargs):
    if predict_index is None:
        raise ValueError("You must specify a predict_index kw arg")
    return models.predict(Configuration(**kwargs),
                          DataContext(store, data),
                          predict_index=predict_index)
Exemplo n.º 32
0
def predict(store=None, data=None, predict_index=None, **kwargs):
    if predict_index is None:
        raise ValueError("You must specify a predict_index kw arg")
    return models.predict(Configuration(**kwargs),
            DataContext(store, data), predict_index=predict_index)
Exemplo n.º 33
0
def modelo():
    if request.method == 'POST':
        file = request.files['file']
        test_run = request.form['test']

        X_train, X_test, y_train, y_test, df_recoleta = models.prepararDatos(
            file, test_run)

        data_input = {
            'X_train': X_train,
            'y_train': y_train,
            'X_test': X_test,
            'y_test': y_test,
            'test_run': test_run,
            'df': df_recoleta
        }

        result_lr = models.predict(data_input, 'lr')
        result_lasso = models.predict(data_input, 'lasso')
        result_ridge = models.predict(data_input, 'ridge')

        if (test_run == 'Simple'):
            data_complex = {
                'LinearRegression': {
                    'score': result_lr['score']
                },
                'Lasso': {
                    'score': result_lasso['score'],
                    'coef': {},
                    'alpha_': ''
                },
                'Ridge': {
                    'score': result_ridge['score'],
                    'coef': {},
                    'alpha_': ''
                }
            }
        else:
            data_complex = {
                'LinearRegression': {
                    'score': result_lr['score'].tolist()
                },
                'Lasso': {
                    'score': result_lasso['score'].tolist(),
                    'coef': result_lasso['coef_'],
                    'alpha_': result_lasso['alpha_']
                },
                'Ridge': {
                    'score': result_ridge['score'].tolist(),
                    'coef': result_ridge['coef_'],
                    'alpha_': result_ridge['alpha_']
                }
            }

        results = {
            'LinearRegression': {
                'score': data_complex['LinearRegression']['score'],
                'intercept': result_lr['intercept_'],
                'coef': result_lr['coef_'].tolist(),
                'r2': result_lr['r2'],
                'mae': result_lr['MAE'],
                'mse': result_lr['MSE'],
                'rmse': result_lr['RMSE']
            },
            'Lasso': {
                'score': data_complex['Lasso']['score'],
                'intercept': result_lasso['intercept_'],
                'coef': data_complex['Lasso']['coef'],
                'r2': result_lasso['r2'],
                'mae': result_lasso['MAE'],
                'mse': result_lasso['MSE'],
                'rmse': result_lasso['RMSE'],
                'alpha': data_complex['Lasso']['alpha_']
            },
            'Ridge': {
                'score': data_complex['Ridge']['score'],
                'intercept': result_ridge['intercept_'],
                'coef': data_complex['Ridge']['coef'],
                'r2': result_ridge['r2'],
                'mae': result_ridge['MAE'],
                'mse': result_ridge['MSE'],
                'rmse': result_ridge['RMSE'],
                'alpha': data_complex['Ridge']['alpha_']
            }
        }

        return jsonify({'result': results})
Exemplo n.º 34
0
    print_cv_metrics(metrics)

save_cv_metrics(metrics=results,
                file_name=args.results_folder +
                'exp1_raw-data_cv_cv-results.txt')

print('\nPlotting algorithm comparison...')
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(scores)
ax.set_xticklabels(names)
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)
plt.show()
plt.savefig(args.graphs_folder + 'exp1_raw-data_alg-comparison.png')

print('\nPredicting...')
test_results = []
for name, model in SPOT_CHECK_MODELS:
    print("\nMaking predictions with %s..." % name)
    predictions = predict(model, X_train, y_train, X_test)
    metrics = calculate_metrics(y_test, predictions)
    metrics['model_name'] = name

    test_results.append(metrics)
    print_model_metrics(metrics)

save_model_metrics(metrics=test_results,
                   file_name=args.results_folder +
                   'exp1_raw-data_cv_model-results.txt')
Exemplo n.º 35
0
def run_from_config(args):
    # parse config file
    params = parse_config_file(main_folder_path + args.path)

    #### training set ####

    # if flag is set to true generate a training set to given path
    if params["make-training-set"]:
        # get pdb ids from file
        pdb_ids = params["training-ids"]
        # if the first id is equal to 'all' then use all ids in dataset
        if pdb_ids[0] == "all":
            pdb_ids = prot_dataset.get_prot_list()

        # generate examples
        res, X, y = prot_dataset.generate_random_examples(
            pdb_ids,
            short_win=params["short-window"],
            large_win=params["large-window"],
            contact_threshold=params["contact-threshold"],
            ex_per_chain=params["examples-per-chain"])
        # if balance flag is set to true
        if params["balance"]:
            # balance number of positive and negative examples
            res, X, y = prot_dataset.balance_neg_pos(res, X, y,
                                                     params["positive-lb"])
        # output dataset
        prot_dataset.training_set_out(
            X, y, main_folder_path + params["training-set-path"])

    #### model fitting ####

    # if flag is set to true fit the model

    if params["fit-model"]:
        # parse training set
        training_set = prot_dataset.training_set_in(
            main_folder_path + params["training-set-path"])

        # initialize the standard classifier
        predictor = models.make_predictor(model_type=params["model-type"],
                                          config=params[params["model-type"]],
                                          training_set=training_set,
                                          features=params["features"])

        models.model_out(predictor,
                         main_folder_path + params["trained-model-path"])

    #### prediction ####

    # if flag is set to true predict given ids
    if params["predict"]:

        # load model from file
        predictor = models.model_in(main_folder_path +
                                    params["trained-model-path"])
        pdb_ids = params["predict-ids"]
        # if specified in command, sobstiture
        if args.pdb_ids:
            pdb_ids = args.pdb_ids
        # if the first element is 'all'
        if pdb_ids[0] == "all":
            # clear list
            pdb_ids = []
            # append every id that is not in the training set
            for pdb in prot_dataset.get_prot_list():
                if pdb not in params["training-ids"]:
                    pdb_ids.append(pdb)

        # check if pdb and ring file exists
        prot_dataset.download_pdb(pdb_ids)
        prot_dataset.download_ring(pdb_ids)

        # run predict command with given parameters
        models.predict(clf=predictor,
                       pdb_ids=pdb_ids,
                       features=params["features"],
                       short_win=params["short-window"],
                       large_win=params["large-window"],
                       contact_threshold=params["contact-threshold"],
                       path=main_folder_path + params["result-folder"],
                       blur=params["probability-blur"],
                       blur_w=params["probability-blur-len"])

    return