Пример #1
0
def predict():
    ''' get current sound data and predict '''
    y = sound.get_data()
    y1 = [0]
    _yy = [0, 0, 0, 0, 0, 0]
    if y:
        _y = list(y)
        y1 = model.predict(preprocessor(y)).tolist()[0]
        _yy = model.predict_proba(preprocessor(_y)).tolist()[0]

    # probas
    y2 = _yy[0]
    y3 = _yy[1]
    y4 = _yy[2]
    y5 = _yy[3]
    y6 = _yy[4]
    y7 = _yy[5]
    rv = jsonify(points=[y1, y2, y3, y4, y5, y6, y7])
    return rv
Пример #2
0
def main():

    cfg = Config()

    data_dir = '/kaggle/input/lish-moa'
    save_path = './'
    load_path = '../input/model-resnet-tensorflow'
    runty = 'eval'
    assert runty == 'traineval' or runty == 'eval',  \
        "Run type is wrong. Should be 'traineval' or 'eval'"

    train_features = pd.read_csv(os.path.join(data_dir, 'train_features.csv'))
    train_targets_scored = pd.read_csv(
        os.path.join(data_dir, 'train_targets_scored.csv'))
    train_targets_nonscored = pd.read_csv(
        os.path.join(data_dir, 'train_targets_nonscored.csv'))
    test_features = pd.read_csv(os.path.join(data_dir, 'test_features.csv'))
    sub = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))

    train_targets_scored = train_targets_scored.drop(['sig_id'], axis=1)
    train_targets_nonscored = train_targets_nonscored.drop(['sig_id'], axis=1)

    non_ctl_idx = train_features.loc[
        train_features['cp_type'] != 'ctl_vehicle'].index.to_list()
    train_features = train_features.drop(
        ['sig_id', 'cp_type', 'cp_time', 'cp_dose'], axis=1)
    train_features = train_features.iloc[non_ctl_idx]
    train_targets_scored = train_targets_scored.iloc[non_ctl_idx]
    train_targets_nonscored = train_targets_nonscored.iloc[non_ctl_idx]
    test_features = test_features.drop(['sig_id', 'cp_dose', 'cp_time'],
                                       axis=1)

    gs = train_features.columns.str.startswith('g-')
    cs = train_features.columns.str.startswith('c-')

    # read the main predictors
    with open('../input/src-resnet-tensorflow/main_predictors.json') as f:
        tmp = json.load(f)
        preds = tmp['start_predictors']

    oof = tf.constant(0.0)
    predictions = np.zeros(
        (test_features.shape[0], train_targets_scored.shape[1]))

    for seed in cfg.seeds:

        mskf = MultilabelStratifiedKFold(n_splits=cfg.nfolds,
                                         shuffle=True,
                                         random_state=seed)

        for f, (t_idx, v_idx) in enumerate(
                mskf.split(X=train_features, y=train_targets_scored)):
            x_train, x_valid = preprocessor(train_features.iloc[t_idx].values,
                                            train_features.iloc[v_idx].values,
                                            gs, cs)
            _, data_test = preprocessor(
                train_features.iloc[t_idx].values,
                test_features.drop('cp_type', axis=1).values, gs, cs)
            x_train_2, x_valid_2 =   \
                preprocessor_2(train_features.iloc[t_idx][preds].values,
                               train_features.iloc[v_idx][preds].values)
            _, data_test_2 = preprocessor_2(
                train_features.iloc[t_idx][preds].values,
                test_features[preds].values)
            y_train_sc = train_targets_scored.iloc[t_idx].values
            y_train_ns = train_targets_nonscored.iloc[t_idx].values
            y_valid_sc = train_targets_scored.iloc[v_idx].values
            y_valid_ns = train_targets_nonscored.iloc[v_idx].values
            n_features = x_train.shape[1]
            n_features_2 = x_train_2.shape[1]

            trte = train_test(x_train=x_train,
                              x_valid=x_valid,
                              data_test=data_test,
                              x_train_2=x_train_2,
                              x_valid_2=x_valid_2,
                              data_test_2=data_test_2,
                              y_train_sc=y_train_sc,
                              y_train_ns=y_train_ns,
                              y_valid_sc=y_valid_sc,
                              y_valid_ns=y_valid_ns,
                              save_path=save_path,
                              load_path=load_path,
                              fold=f,
                              runty=runty)

            y_val, predictions_ = trte.run_k_fold(seed)
            oof += logloss(tf.constant(y_valid_sc, dtype=tf.float32),
                           tf.constant(y_val, dtype=tf.float32)) / (
                               cfg.nfolds * len(cfg.seeds))
            predictions += predictions_ / (cfg.nfolds * len(cfg.seeds))

    print("CV log_loss: ", oof)

    target_cols = train_targets_scored.columns

    sub.iloc[:, 1:] = predictions
    sub.loc[test_features['cp_type'] == 'ctl_vehicle', sub.columns[1:]] = 0

    # clip the submission
    # sub_c = sub_clip(sub, test_features)
    # sub_c.to_csv('submission.csv', index=False)

    # sub.loc[test_features['cp_type']=='ctl_vehicle', submission.columns[1:]] = 0
    sub.to_csv('submission_resnet.csv', index=False)
    """ if (runty == 'train'):
Пример #3
0
# This file creates the 'pipe' NLP model and saves it as model.joblib

# Import libraries
import pandas as pd
import joblib

from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
from utils import preprocessor

tfidf = TfidfVectorizer()
classifier = LinearSVC()

if __name__ == "__main__":
   #may need to change the following to your location of sentiments.csv
   df = pd.read_csv('DATA/sentiments.csv')  
   pipe = make_pipeline(preprocessor(), tfidf, classifier)
   pipe.fit(df['text'],df['sentiment'])
   joblib.dump(pipe, open('model.joblib','wb'))
   newpipe = joblib.load(open('model.joblib','rb'))
   print('sentiment of "awesome place" is ', newpipe.predict(pd.Series('awesome place'))[0])
Пример #4
0
import pandas as pd
import utils
from nltk.corpus import stopwords

df = pd.read_csv('./movie_data.csv')

print(df.loc[0, 'review'][-50:])

print(utils.preprocessor(df.loc[0, 'review'][-50:]))

df.loc['review'] = df.loc['review'].apply(utils.preprocessor)

stop = stopwords.words('english')
            sql = """
            select `id`, `published`, `title`, `description`
            from zero_day19 
            where attackType = 'dos'
            and platform = 'windows'
            and published BETWEEN '2012-01-01' AND '{}-{}-{}'
            order by `id`;
            """.format(str(year), month[id], day[id])
        cur.execute(sql)

        results = cur.fetchall()

        for row in results:
            postid = [row[0] for row in results]
            postdatetime = [row[1] for row in results]
            threadtitle = [row[2] for row in results]
            postcontent = [row[3] for row in results]

        store = []

        for i in range(len(postid)):
            # post = [postid[i], str(postdatetime[i]), utils.preprocessor(str(threadtitle[i] + postcontent[i]))]
            post = [postid[i], str(postdatetime[i]), utils.preprocessor(str(threadtitle[i]))]
            store.append(post)
            # string = "'{0}', '{1}', '{2}'".format(str(post[0]), str(post[1]), str(post[2]))
            # allopenscedges.write("%s\n" % string)

        pickle.dump(store,preprocessedposts)
        preprocessedposts.close()

# output pre-processed text into a notepad file (one record per line, comma seperated)