def main():
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()

    ap.add_argument("-d",
                    "--data_dir",
                    required=True,
                    help="Path to the images directory")
    ap.add_argument("-m",
                    "--model_path",
                    required=True,
                    help="Path to the the model")
    ap.add_argument("-i",
                    "--input",
                    type=int,
                    required=True,
                    default=299,
                    help="The input size")
    ap.add_argument("-o",
                    "--output",
                    required=True,
                    help="Path to the output file")

    args = vars(ap.parse_args())
    size = args['input']

    # model
    print("Loading model...")
    subdir = args["model_path"]
    model_path = glob.glob(subdir + '*.h5')[-1]
    model = load_model(model_path)

    # data
    print("Reading data...")
    filenames, _, _ = read_data(args["data_dir"])
    n_files = len(filenames)

    # encoding
    print("Encoding images...")
    index_to_filename = {}
    filename_to_path = {}
    features = np.zeros((n_files, model.output.shape[1]))
    for i in tqdm.tqdm(range(n_files)):
        image_id = extract_image_id(filenames[i])
        index_to_filename[i] = image_id
        filename_to_path[image_id] = filenames[i]
        #print("->", image_id)
        image = load_image(filenames[i], (size, size))
        image = image.reshape((1, ) + image.shape)

        features[i] = np.squeeze(model(image))

    # save transfer values
    np.save(args["output"], features)
    with open("index_to_filename.json", "w") as f:
        json.dump(index_to_filename, f, indent=4, ensure_ascii=False)
    with open("filename_to_path.json", "w") as f:
        json.dump(filename_to_path, f, indent=4, ensure_ascii=False)
示例#2
0
文件: bw_cv.py 项目: tglauch/KDE_Tool
def cross_validate(args):
    assert len(args['bw_key']) == len(args['bw'])
    if not os.path.exists(args['outfolder']):
        os.makedirs(args['outfolder'])
    args['phi0'] *= 1e-18  # correct units
    kf = KFold(n_splits=args['kfold'], random_state=args['rs'], shuffle=True)
    config = read_config()
    print('Load MC: {}'.format(config['IC_MC']['path']))
    mc = np.load(str(config['IC_MC']['path']))[:]
    mc = mc_cut(mc)
    if args['weights'] == 'pl':
        weights = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'],
                                       gamma=args['gamma'])
    elif args['weights'] == 'conv':
        weights = mc['conv']
    elif args['weights'] == 'conv+pl':
        diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'],
                                           gamma=args['gamma'])
        weights = mc['conv'] + diff_weight
        print('Rates [1/yr]:')
        print(np.sum(mc['conv']) * np.pi * 1e7)
        print(np.sum(diff_weight) * np.pi * 1e7)
    else:
        print('{} is not a valid weights argument'.format(args['weights']))
        sys.exit(0)
    mc = append_fields(mc, 'cur_weight', weights)
    args['weights'] = 'default'
    model, mname = load_model(args['model'])
    bw_dict = dict()
    for i, key in enumerate(args['bw_key']):
        bw_dict[key] = args['bw'][i]
    lh_arr, zero_arr = [], []
    for train_index, val_index in kf.split(mc):
        args['no_save'] = True
        res_dict = create_KDE(args, mc=mc[train_index], bws=bw_dict)
        mc_val = mc[val_index]
        val_settings, grid = model.setup_KDE(mc_val)
        lh, zeros = do_validation(res_dict, val_settings, mc_val['cur_weight'])
        print('Number of zeros {}'.format(zeros))
        print('Likelihood Value {}'.format(lh))
        zero_arr.append(zeros)
        lh_arr.append(lh)
    fname = ''
    for i in range(len(args['bw'])):
        fname += '{}_{}_'.format(args['bw_key'][i], args['bw'][i])
    fname = fname[:-1] + '.npy'
    odict = {'zeros': zero_arr, 'lh': lh_arr}
    np.save(os.path.join(args['outfolder'], fname), odict)
示例#3
0
def main():
    #Args
    args1 = arg_parser_test()
    #Load the model
    model=load_model(args1.checkpoint)  
    #label 
    with open(args1.cat_name_dir,'r') as json_file:
        cat_to_name = json.load(json_file)
    #Prediction
    probabilities = predict(args1.image_path, model, args1.top_k)
    labels = [cat_to_name[str(index + 1)] for index in np.array(probabilities[1][0])]
    probability = np.array(probabilities[0][0])
    i=0
    while i < args1.top_k:
        print("{} with a probability of {}".format(labels[i], probability[i]))
        i += 1
    print("Predictiion is done !")
示例#4
0
def main():
    #print("hello world") for luck

    in_arg = predict_input_args()

    print("   image =", in_arg.image_dir, "\n   model checkpoint =",
          in_arg.load_dir, "\n   top k =", in_arg.top_k, "\n   device =",
          in_arg.device, "\n   json =", in_arg.json)

    model, optimizer = load_model(in_arg.load_dir, in_arg.device)

    probs, classes, labels = predict(in_arg.image_dir, model, in_arg.json,
                                     in_arg.top_k, in_arg.device)

    results = dict(zip(labels, probs))
    print("-" * 40)
    for x in results:
        print("   {:20s}   {:.2f}%".format(x.title(), results[x] * 100))
def main():
    args = parser_fun_test()

    with open(args.cat_to_name, 'r') as f:
        cat_to_name = json.load(f)

    # load the model
    loaded = torch.load("trained_model.pth")
    model = load_model(loaded)

    # prediction
    probabilities = predict(args.image_path, model, args.top_k, 'gpu')
    labels = [
        cat_to_name[str(index + 1)] for index in np.array(probabilities[1][0])
    ]
    probability = np.array(probabilities[0][0])

    i = 0
    while i < args.top_k:
        print("{} with a probability of {:.5f}".format(labels[i],
                                                       probability[i]))
        i += 1
    print("Predictiion is done !")
示例#6
0
文件: predict.py 项目: rusoiba/OC_P7
def predict(id_curr):
    """returns the elements of prediction page"""
    sns.reset_orig()
    X_train = load_training_data()
    X_test = load_test_data()

    lgbm = load_model()
    print("id data", hex(id(X_train)))
    print("id lgbm", hex(id(lgbm)))
    explainer = compute_tree_explainer(lgbm, X_train)
    print("id explainer", hex(id(explainer)))

    ids_avail = X_test["SK_ID_CURR"]
    if (ids_avail == id_curr).sum() > 0:
        to_analyse = X_test.loc[X_test["SK_ID_CURR"] == id_curr, :].drop(
            columns=["SK_ID_CURR"])

        st.write("Default loan probability for client id", id_curr, "is",
                 predict_api(to_analyse.iloc[0, :]), "%")

        st.subheader("Score interpretation")
        st.write("The following plot must be intepreted as follows :")
        st.write(
            "- **Arrows are contribution** of each client attribute (family status, income, ...) on the **final score**, the bigger the arrow, the greater its contribution"
        )
        st.write(
            "- **Blue** arrows are **good contributions** : they tend to reduce client's default risk"
        )
        st.write(
            "- **Red** arrows are **bad contributions** : they tend to increase client's default risk"
        )
        st.write(
            "- Intersection of blue and red arrows is the predicted level of risk"
        )
        st.write(
            "- This intersection is surrounded feature contributions, from big to small as step aside from predicted value"
        )
        shap.initjs()

        shap_values = explainer.shap_values(to_analyse, check_additivity=True)

        shap.force_plot(explainer.expected_value,
                        shap_values[0],
                        to_analyse.round(2),
                        matplotlib=True,
                        link="logit")

        st.pyplot(bbox_inches='tight', dpi=500, pad_inches=0)

        shap_named = pd.Series(
            np.copy(shap_values[0]),
            index=X_test.drop(columns=["SK_ID_CURR"]).columns)

        most_imp_feat = abs(shap_named).sort_values(
            ascending=False).head(10).index
        displ_feat = shap_named[most_imp_feat].sort_values()
        variables = load_variable_description()

        info_feat = st.selectbox(
            "Select the variable you want to know more about",
            displ_feat.index)
        st.write(info_feat)
        st.write(
            to_analyse.loc[:, info_feat].values[0].round(2),
            variables.loc[variables["Row"] == info_feat,
                          "Description"].values[0])

    else:
        st.error("Solve error in the sidebar before accessing this module")
示例#7
0
            if not os.path.exists('logs'):
                os.mkdir('logs')
            # set logging to save log file to logs folder
            logging.basicConfig(
                filename=
                f"logs/{sys.argv[0].replace('.py', '')}-{datetime.now().strftime('%Y%d%m%H%M%S')}.log",
                filemode='w',
                level=logging.INFO)
        else:
            logging.basicConfig(level=logging.INFO)
    logging.info(
        f"using {m}.{e} model to calculate submitid {i}") if v else None

    # load word embedding model
    start = datetime.now()
    vectors = load_model(m, e)
    logging.info(f"model loaded in {datetime.now() - start}") if v else None

    # get source code and problem text from database that corresponds with input submit ID
    code, problem = get(i)

    # preprocessing includes normalization and tokenization
    logging.info("preprocessing code and problem text...") if v else None
    problem_processed, comments_processed, code_only = preprocess(
        problem, code)
    # count words in code comment
    comment_word_count_raw = 0
    for line in comments_processed:
        comment_word_count_raw += len(line)
    logging.info("preprocessing finished") if v else None
示例#8
0
"""
    Another useful tool is the T-distributed Stochastic Neighbor Embedding (TSNE)
    a nonlinear dimensionality reduction technique well-suited for embedding
    high-dimensional data for visualization in a low-dimensional space of two or
    three dimensions.
"""

from sklearn.manifold import TSNE
import pandas as pd
from functions import load_model

# load data generated in 3_Clustering and 4_PCA
pca_result = load_model('models/PCA.sav')
clustered = load_model('models/clustered.pkl')

# instantiate TSNE object with two main clusters
tsne = TSNE(n_components=2)
# we fit (train) and transform into TSNE
tsne_result = tsne.fit_transform(pca_result)

# DataFrame creation with information generated
TSNE_df = pd.DataFrame(tsne_result)
# columns renamed
TSNE_df.columns = ['x1', 'x2']
# create cluster column
TSNE_df['cluster'] = clustered

# saving our DataFrame
TSNE_df.to_csv('data/TSNE.csv')
# displaying a 20 line preview of the DataFrame
print(TSNE_df.head(20))
示例#9
0
    and it is a tool commonly used for dimensionality reduction.

    The way it work is by projecting each data point onto only
    the first few principal components to obtain lower-dimensional
    data while preserving as much of the data's variation as possible.
"""

from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
from functions import load_model
import pickle

# we load data generated in 2_Word2Vec_SentenceVectors and 3_Clustering
X = np.load('data/X.npy')
clustered = load_model('models/clustered.pkl')

# instantiate PCA object into two main components
pca = PCA(n_components=2)
# we fit (train) and transform into PCA
pca_result = pca.fit_transform(X)

# DataFrame creation with information generated
PCA_df = pd.DataFrame(pca_result)
# columns renamed
PCA_df.columns = ['x1', 'x2']
# create cluster column
PCA_df['cluster'] = clustered

# saving our DataFrame
PCA_df.to_csv('data/PCA.csv')
示例#10
0
"""
    It's time to see the accuracy of our model by
    evaluating our own news, this news were manually
    downloaded from internet and saved in a txt file.

    This script only pre process the news, same as did before
"""

import numpy as np
import pickle
from glob import glob
from functions import prepare_news, load_model

# load data generated in 1_DataAnalysis_Cleanup, 2_Word2Vec_SentenceVectors and 3_Clustering
processed_data = np.load('data/processed_data.npy', allow_pickle=True)
model = load_model('models/model.pkl')
kmeans = load_model('models/kmeans.pkl')

# getting a list of all news paths to analyze
glob_list = glob('news/*.txt')
# getting a list of all news in string format
news_list = [open(new, 'r', encoding='utf8').read() for new in glob_list]

# processing news and saving result
pickle.dump(prepare_news(news_list), open('news/news_list.pkl', 'wb'))
示例#11
0
def logger(metadata):
    with open('online.log', 'a') as f:
        f.write(metadata)


if logging:
    logger('timestamp(s) : dns bucket : predictions : diverged')

if __name__ == '__main__':

    gravity = fn.load_gravity()['domain'].values
    tokenizer = fn.load_yttm()
    max_timestamp = 1600397395  # arbitrary recent timestamp

    model = fn.load_model()  #model that learns online
    ref_model = tf.keras.models.clone_model(model)  #reference model
    ref_model.set_weights(model.get_weights())

    model.optimizer.lr.assign(1e-4)
    df = fn.load_gravity(table='domainlist')
    epsilon = 0.1
    i = 0
    bad_domains_all = []
    while True:
        queries, pos_samples, neg_samples, anchor_pos_samples, anchor_neg_samples, parsed_df, max_timestamp = fn.run_all(
            tokenizer=tokenizer, timestamp=max_timestamp)
        if i == 0:
            i = +1
            continue
        print('{0} pieces of matter entering the photon sphere..'.format(
示例#12
0
def create_KDE(args, inds=None, bws={}, mc=None):
    if 'mc' not in args.keys():
        args['mc'] = None
    if 'phi0' not in args.keys():
        args['phi0'] = 1
    if args['outfolder'] is None:
        args['outfolder'] = os.path.join(os.path.dirname(args['model']), 'out')
    args['phi0'] *= 1e-18  # correct units
    t0 = time.time()
    model, mname = load_model(args['model'])
    print('---- Run KDE with args:')
    print(args)
    if not os.path.exists(args['outfolder']):
        os.makedirs(args['outfolder'])

    print('Load and Update the Monte Carlo')
    config = read_config()
    cfg_keys = config['keys']
    if mc is None:
        if args['mc'] is not None:
            mc_path = args['mc']
        else:
            mc_path = str(config['IC_MC']['path'])
        mc = np.load(str(mc_path))
        mc = mc_cut(mc, config)
        if inds is not None:
            print('Cut on given indices..')
            mc = mc[inds]
    settings, grid = model.setup_KDE(mc, cfg_keys)
    mc_conv = len(mc)
    print('Use {} mc events'.format(mc_conv))
    for key in settings.keys():
        settings[key]['name'] = key
    for key in bws.keys():
        settings[key]['bandwidth'] = bws[key]

    plaw = np.vectorize(powerlaw)

    # create binned pdf
    if args['weights'] == 'default':
        print('Use pre-calculated input weights')
        weights = mc['cur_weight']
    elif args['weights'] == 'pl':
        weights = mc[cfg_keys['ow']] * plaw(
            mc[cfg_keys['trueE']], phi0=args['phi0'], gamma=args['gamma'])
    elif args['weights'] == 'conv':
        weights = mc[cfg_keys['conv']]
    elif args['weights'] == 'conv+pl':
        #diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'],
        #                                   gamma=args['gamma'])
        weights = mc[cfg_keys['conv']] + mc[cfg_keys['astro']]
        print('Rates [1/yr]:')
        print(np.sum(mc[cfg_keys['conv']]) * np.pi * 1e7)
        print(np.sum(mc[cfg_keys['astro']]) * np.pi * 1e7)
    else:
        print('{} is not a valid weights argument'.format(args['weights']))
        sys.exit(0)

    inp_arr = [settings[key] for key in settings.keys()]
    if args['adaptive']:
        m_input = meerkat_input(inp_arr, weights, mc_conv=mc_conv)
        m_kde4d_fb = meerkat_kde(m_input)
        adtv_input = meerkat_input(inp_arr,
                                   weights,
                                   pdf_seed=m_kde4d_fb.kde,
                                   adaptive=True,
                                   mc_conv=mc_conv)
        m_kde4d = meerkat_kde(adtv_input)
    else:
        m_input = meerkat_input(inp_arr, weights, mc_conv=mc_conv)
        m_kde4d = meerkat_kde(m_input)

    nbins = args['eval_bins']
    eval_grid = OrderedDict()
    if grid is None:
        grid = {}

    for key in settings.keys():
        if key in grid.keys():
            if isinstance(grid[key], list):
                eval_grid[key] = np.linspace(grid[key][0], grid[key][1], nbins)
            elif isinstance(grid[key], numpy.ndarray):
                eval_grid[key] = grid[key]
        else:
            eval_grid[key] = np.linspace(settings[key]['range'][0],
                                         settings[key]['range'][1], nbins)
    print(eval_grid.keys())
    out_bins = [eval_grid[key] for key in settings.keys()]
    coords = np.array(list(itertools.product(*out_bins)))
    bws = [settings[key]['bandwidth'] for key in settings.keys()]

    print('Evaluate KDEs:')
    pdf_vals = np.asarray([m_kde4d.eval_point(coord) for coord in coords])
    shpe = np.ones(len(settings.keys()), dtype=int) * nbins
    pdf_vals = pdf_vals.reshape(*shpe)

    add_str = ''
    if args['weights'] != 'pl':
        add_str = '_' + args['weights']
    else:
        add_str = '_' + 'g_{}'.format(args['gamma'])
    if args['save_str'] != '':
        add_str = add_str + '_' + args['save_str']

    odict = dict({
        'vars': eval_grid.keys(),
        'bins': out_bins,
        'coords': coords,
        'pdf_vals': pdf_vals,
        'bw': bws
    })

    if not args['no_save']:
        with open(os.path.join(args['outfolder'], mname + add_str + '.pkl'),
                  'wb') as fp:
            pickle.dump(odict, fp)
    t1 = time.time()
    print('Finished after {} minutes'.format((t1 - t0) / 60))
    return odict
示例#13
0
    if len(sys.argv) > -1:
        ap.add_argument('--modeldir', required=True, help="Path to the model")
        ap.add_argument('--datasetdir', required=False, help="Path to the dataset")
        ap.add_argument('--nepochs', required=False, help="Number of training epochs")
        ap.add_argument('--lr', required=False, help="Learning rate")
    args = vars(ap.parse_args())
    model_directory = args['modeldir']
    dataset_directory = value_or_default(args['datasetdir'], os.path.join('..', 'dataset'))
    nepochs = value_or_default(args['nepochs'], 100)
    lr = value_or_default(float(args['lr']), 1e-3) 

    architecture_file = os.path.join(model_directory, 'architecture.json')
    parameters_file = os.path.join(model_directory, 'parameters.json')
    assert(os.path.isfile(architecture_file) and os.path.isfile(parameters_file)), 'No architecture or parameters found in the specified directory.'
    
    model = load_model(model_directory)
    parameters = json.load(parameters_file)

    date = datetime.datetime.now().strftime("%Y%m%dT_%H%M%S")
    session_directory = os.path.join(model_directory, f"session_{date}_epochs_{nepochs}")
    if(not os.path.isdir(session_directory)):
        os.makedirs(session_directory)

    training_generator = generator(filenames=load_filenames(os.path.join(dataset_directory, 'training')), batch_size=parameters['batchsize'], dim=[*parameters['shape']])
    validation_generator = generator(filenames=load_filenames(os.path.join(dataset_directory, 'validation')), batch_size=parameters['batchsize'], dim=[*parameters['shape']])

    
    checkpoint = ModelCheckpoint(os.path.join(session_directory, "weights.h5"), monitor='val_acc', verbose=1, save_best_only=True, mode='min')
    earlystop = EarlyStopping(monitor='val_loss', patience=20, verbose=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1, mode='auto', min_delta=0.01, cooldown=0, min_lr=1e-9)
    callbacks_list = [checkpoint, earlystop, reduce_lr]
示例#14
0
image = results.image_path

top_k = results.topk

gpu = results.gpu

cat_names = results.cat_name_dir

with open(cat_names, 'r') as f:
    cat_to_name = json.load(f)
    
model = getattr(models,pt_model)(pretrained = True)

# Load model
loaded_model = load_model(model, save_dir, gpu)

# Preprocess image
processed_image = process_image(image)

# Define top K likely classes with probabilities
probs, classes = predict(processed_image, loaded_model, top_k, gpu)

# Define names for Classes
names = [cat_to_name[i] for i in classes]

# Print out top K classes and probabilities
print(f"Top {top_k} classes are: {classes}, with assocatied probabilities: {probs}")

# Print out most likely output
print(f"The most likely outcome is a: '{names[0]} ({round(probs[0]*100, 2)}%)'")
示例#15
0

# actual code starts from here
# logging setup (tensorboard, log.txt)
logging_handlers = [logging.StreamHandler()]
if args.save:
    logging_handlers += [logging.FileHandler(f'{args.save_path}/log.txt')]
    if args.tensorboard:
        writer = SummaryWriter('runs/' + args.suffix + "_" +
                               args.timestamp[-5:])
logging.basicConfig(level=logging.INFO,
                    format='%(message)s',
                    handlers=logging_handlers)

# load model & tokenizer
model, tokenizer = load_model(args, BERT_DIR, device)
if args.verbose:
    if args.bert_path != None:
        logging.info(
            f"* loaded a bert model from {args.bert_path} and its tokenizer")
        if args.load_from:
            logging.info(f"* loaded a bert params from {args.load_from}")
    else:
        logging.info(f"* loaded {args.bert_model} model and its tokenizer")

# load data
if args.verbose:
    logging.info(f"loading data from {args.data_path}")
emo_mapping_e2i = {e: i for i, e in enumerate(emotions)}
target_emotion_idx = emo_mapping_e2i[args.target_emotion]
data, src_data, tgt_data = import_data(args.data_path, args.source,
示例#16
0
# Img uploader
img = st.file_uploader(label="Load X-Ray Chest image", type=['jpeg', 'jpg', 'png'], key="xray")

if img is not None:
    # Preprocessing Image
    p_img = functions.preprocess_image(img)

    if st.checkbox('Zoom image'):
        image = np.array(Image.open(img))
        st.image(image, use_column_width=True)
    else:
        st.image(p_img)

    # Loading model
    loading_msg = st.empty()
    loading_msg.text("Predicting...")
    model = functions.load_model()

    # Predicting result
    prob, prediction = functions.predict(model, p_img)

    loading_msg.text('')

    if prediction:
        st.markdown(unsafe_allow_html=True, body="<span style='color:red; font-size: 50px'><strong><h4>Pneumonia! :slightly_frowning_face:</h4></strong></span>")
    else:
        st.markdown(unsafe_allow_html=True, body="<span style='color:green; font-size: 50px'><strong><h3>Healthy! :smile: </h3></strong></span>")

    st.text(f"*Probability of pneumonia is {round(prob[0][0] * 100, 2)}%")

示例#17
0
"""
    Here we predict the now processed news
    and see results in a DataFrame
"""

import pandas as pd
from functions import load_model

# load data generated in 3_Clustering and 7_TestNewsProcessing
kmeans = load_model('models/kmeans.pkl')
news_list = load_model('news/news_list.pkl')

# printing predictions
prediction = kmeans.predict(news_list)
print(prediction)

# printing a DataFrame with results obtained
df = pd.DataFrame({
    'Sentence':
    ['news' + str(num + 1).zfill(2) for num, item in enumerate(news_list)],
    'Prediction':
    prediction
})
print(df)