示例#1
0
def main():
    global done
    t = threading.Thread(target=animate)
    t.start()

    current_date = datetime.today().strftime('%Y%m%d')
    root_directory = utilities.get_root_dir()
    file_directory = "{}/{}".format(root_directory, USERS)
    files = utilities.get_data(file_directory)
    import_report_df = check_users(files)
    print("\tDONE: Check Users import")

    file_directory = "{}/{}".format(root_directory, CONTACT_ROLES)
    files = utilities.get_data(file_directory)
    import_report_df = check_contact_roles(import_report_df, files)
    print("\tDONE: Check Contact Roles import")

    file_directory = "{}/{}".format(root_directory, DO_NOT_FLAGS)
    files = utilities.get_data(file_directory)
    import_report_df = check_do_not_flags(import_report_df, files)
    print("\tDONE: Check Do Not Flag import")

    output_file = "{}/{}_import_results.csv".format(root_directory,
                                                    current_date)
    utilities.df_to_csv(import_report_df, output_file)
    done = True
    print("\nIMPORT REPORT: {}".format(output_file))
    os.system("open {}".format(output_file))
示例#2
0
def upload_file():
    if request.method == "POST":
        subject_id = int(request.form['subject_id'])
        data_no = int(request.form['data_no'])
        start_time = time.time()
        query_subject = get_data(subject_no=subject_id, data_no=data_no)
        query_subject = np.reshape(query_subject, (query_subject.shape[0], 1))
        input_pair_group = np.zeros((109, 2, 3000, 1), dtype=np.float64)
        count = 0

        for i, input_data in enumerate(subject_data):
            input_data = np.reshape(input_data, (input_data.shape[0], 1))
            input_pair_group[i, 0, :, :] = query_subject
            input_pair_group[i, 1, :, :] = input_data

        pred = loaded_model([input_pair_group[:, 0], input_pair_group[:, 1]])
        pred = 1.0 - np.reshape(pred, (pred.shape[0], ))

        subject_results = []

        for i, j in enumerate(pred):
            subject_results.append((i, j))

        subject_results.sort(key=lambda e: e[1], reverse=True)
        end_time = time.time()
        final_result = [i for i in subject_results[:6] if i[1] > 0.7]
        response = {
            "results": final_result,
            "inference_time": round(end_time - start_time, 2)
        }
        return json.dumps(str(response))
示例#3
0
def validate(model):
    directory = settings.directory
    datasource = utilities.get_data(settings.testsetpath)
    datagen = utilities.limited_gen_data(datasource)
    settings.saveMean = False

    #model = cnn_lstm.create_cnn_lstm(weightsfile)
    #sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    #model.compile(optimizer=sgd, loss='categorical_crossentropy')
    print('\nin validate method!!!!!!')
    print(type(model))
    posxs = []
    posqs = []
    howmanyaccepted = 0
    counter = 0
    print('looping on test set!')
    for ims, xs, qs in datagen:
        print(len(ims))
        howmanyaccepted += 1
        print howmanyaccepted
        inputs = np.zeros([1, 3, 3, 224, 224])
        inputs[0, :] = ims
        out = model.predict(inputs)
        posx = out[0][0][1]  #.mean(0)#xyz
        posq = out[1][0][1]  #.mean(0)#wpqr
        actualx = xs[1]  #.mean(0)
        actualq = qs[1]  #.mean(0)
        errx, theta = getError(posx, posq, actualx, actualq)
        posxs.append(errx)
        posqs.append(theta)
        print('error should report here!')
        print 'errx ', errx, ' m and ', 'errq ', theta, ' degrees'
    return np.median(posxs), np.median(posqs), howmanyaccepted
示例#4
0
 def get_data(self):
     # TODO Return the eye position
     print "client.py/Client.get_data"
     data = utilities.get_data()
     if len(data):
         return data
     else:
         return None
示例#5
0
def evaluateModel(model_name, model_path):
    clf = get_model(model_name)
    x_train, x_test, y_train, y_test = get_data()
    print('------------- Training Started -------------')
    clf.fit(x_train, y_train)
    print('------------- Training Ended -------------')
    score = clf.score(x_test, y_test)
    print("accuracy: {:.2f}%".format(score * 100.))
    util.save_speaker_model(model_path, clf)
示例#6
0
def main():
    """
    Usage: python main.py [ -c config_file -s config_section ]
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--config_file', default="default.ini")
    parser.add_argument('-s', '--config_section', default="DEFAULT")
    args = parser.parse_args()
    config_file = args.config_file  #"default.ini"
    config_section = args.config_section  #"DEFAULT" # "DISJOINT" "TEST"
    CONFIG = configuration.Configuration(config_file, config_section)
    n_seed = int(CONFIG.get("random_seed"))
    if n_seed != -1:
        random.seed(n_seed)  # for reproducibility
    else:
        n_seed = None
    n_run = int(CONFIG.get("n_run"))
    knn = int(CONFIG.get("knn"))
    model_type = CONFIG.get("model_type")
    prediction_type = CONFIG.get("prediction_type")
    features = set(CONFIG.get("features").split("|"))
    recalculate_similarity = CONFIG.get_boolean("recalculate_similarity")
    disjoint_cv = CONFIG.get_boolean("disjoint_cv")
    try:
        split_both = CONFIG.get_boolean("pairwise_disjoint")
    except:
        split_both = False
    output_file = CONFIG.get("output_file")
    n_fold = int(CONFIG.get("n_fold"))
    n_proportion = int(CONFIG.get("n_proportion"))
    n_subset = int(CONFIG.get("n_subset"))  # for faster results - subsampling
    drug_disease_file = CONFIG.get("drug_disease_file")
    drug_side_effect_file = CONFIG.get("drug_side_effect_file")
    drug_structure_file = CONFIG.get("drug_structure_file")
    drug_target_file = CONFIG.get("drug_target_file")
    # Get data
    data = get_data(drug_disease_file, drug_side_effect_file,
                    drug_structure_file, drug_target_file)
    # Check prediction accuracy of ML classifier on the data set using the parameters above
    check_ml(data,
             n_run,
             knn,
             n_fold,
             n_proportion,
             n_subset,
             model_type,
             prediction_type,
             features,
             recalculate_similarity,
             disjoint_cv,
             split_both,
             output_file,
             model_fun=None,
             n_seed=n_seed)
    return
示例#7
0
def evaluateModel(model_name):
    """
    generate a model train it test it and display its metrics
    :param model_name:
    """
    clf = get_model(model_name)
    x_train, x_test, y_train, y_test = get_data()
    print('------------- Training Started -------------')
    clf.fit(x_train, y_train)
    print('------------- Training Ended -------------')
    y_pred = clf.predict(x_test)
    display_metrics(y_pred, y_test)
示例#8
0
 def get_data(self):
     # print "client.py/Client.get_data"
     # data = str(self.tempName) + ": " + str(self.tempCounter)
     # self.tempCounter += 1
     data = utilities.get_data()
     if len(data):
         if data is not self.current_data:
             self.current_data = data
             return data
         else:  # ignore duplicates
             return None
     else:
         return None
示例#9
0
def main():
    in_arg = get_input_args()  # Creates and returns command line arguments

    print('\nData Directory:\n', in_arg.data_directory, '\n')

    print('Optional Command Line Arguments:\n',
          'Save Checkpoint [--save_dir]: ', in_arg.save_dir, '\n',
          'Pretrained Network [--arch]: ', in_arg.arch, '\n',
          'Learning Rate [--learning_rate]: ', in_arg.learning_rate, '\n',
          'Hidden Units [--hidden_units]: ', in_arg.hidden_units, '\n',
          'Epochs [--epochs]: ', in_arg.epochs, '\n', 'GPU [--gpu]: ',
          in_arg.gpu, '\n')

    if 'checkpoints' not in listdir(
    ):  # makes checkpoints folder if it doesn't already exist
        mkdir('checkpoints')

    train_dir, valid_dir, test_dir = util.get_data(
        in_arg.data_directory
    )  # Returns Train, Validation and Test Directories

    transformed_train, transformed_valid, transformed_test = mod.transform_data(
        train_dir, valid_dir, test_dir)  # Returns transformed datasets

    train_loader, valid_loader, test_loader = mod.load_data(
        transformed_train, transformed_valid,
        transformed_test)  # Returns Data loaders

    model = mod.build_model(
        util.label_count(train_dir), in_arg.hidden_units, in_arg.arch,
        transformed_train.class_to_idx)  # Returns built model

    epochs = in_arg.epochs  # Epochs initially set by command line argument in_arg.epochs.  Can be changed with m.load_checkpoint()
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.classifier.parameters(),
                           lr=in_arg.learning_rate)

    use_gpu = mod.use_gpu(model,
                          in_arg.gpu)  # Returns True or False for GPU use

    mod.train(
        model, criterion, optimizer, train_loader, valid_loader, use_gpu,
        in_arg.epochs
    )  # Trains the model.  Prints Training Loss, Validation Loss & Validation Accuracy

    mod.save_checkpoint(
        in_arg.arch,
        model.classifier.state_dict(), transformed_train.class_to_idx,
        util.label_count(train_dir), in_arg.hidden_units, in_arg.epochs,
        in_arg.save_dir
    )  # Saves classifier and other model parameters to checkpoint
示例#10
0
def training(mode, model_path, dataset_folder, class_labels):
    # Read data
    global x_train, y_train, x_test, y_test
    x_train, x_test, y_train, y_test = get_data(dataset_folder,
                                                class_labels,
                                                flatten=False)
    y_train = np_utils.to_categorical(y_train)
    y_test = np_utils.to_categorical(y_test)

    model_id = mode - 1
    if model_id == 0:
        # Model is CNN so have to reshape the data
        in_shape = x_train[0].shape
        x_train = x_train.reshape(x_train.shape[0], in_shape[0], in_shape[1],
                                  1)
        x_test = x_test.reshape(x_test.shape[0], in_shape[0], in_shape[1], 1)
    elif model_id > len(models):
        sys.stderr.write('Model Not Implemented yet')
        sys.exit(-1)

    model = get_model(class_labels, models[model_id], x_train[0].shape)

    accuracy = evaluateModel(model, model_path)
    return accuracy
示例#11
0
import numpy as np
from logistic_regression import LogisticRegression
from utils import bin_feat_heart, con_feat_heart, name_features_heart
import utilities

TRAIN = 'heart_train.csv'
TEST = 'heart_test.csv'

if __name__ == '__main__':
    path = utilities.get_path()
    X_train, y_train = utilities.get_data(path / TRAIN, 10)
    X_test, y_test = utilities.get_data(path / TEST, 10)

    encoder = utilities.OneHotEncoder()
    scaler = utilities.StandardScaler()

    encoder.fit(X_train[:, bin_feat_heart])

    X_train_new = np.hstack(
        (encoder.transform(X_train[:,
                                   bin_feat_heart]), X_train[:,
                                                             con_feat_heart]))

    X_test_new = np.hstack(
        (encoder.transform(X_test[:, bin_feat_heart]), X_test[:,
                                                              con_feat_heart]))

    scaler.fit(X_train_new)
    X_train_scaled = scaler.transform(X_train_new)
    X_test_scaled = scaler.transform(X_test_new)
示例#12
0
def data_prep(full_path):
    im_list_tr, att_list_tr, im_list_val, att_list_val, im_list_test, att_list_test = get_data(full_path)
    save2lists(im_list_tr, att_list_tr,'training_list.lst')
    save2lists(im_list_val, att_list_val,'valid_list.lst')
    save2lists(im_list_test, att_list_test,'testing_list.lst')
示例#13
0
import numpy as np
from linear_regression import LinearRegression
from utils import mapper, bin_feat_reg, con_feat_reg, name_features_insurance
import utilities

TRAIN = 'insurance_train.csv'
TEST = 'insurance_test.csv'

if __name__ == '__main__':
    path = utilities.get_path()

    X_train, y_train = utilities.get_data(path / TRAIN, 2, mapper)
    X_test, y_test = utilities.get_data(path / TEST, 2, mapper)

    encoder = utilities.OneHotEncoder()
    scaler = utilities.StandardScaler()

    encoder.fit(X_train[:, bin_feat_reg])

    X_train_new = np.hstack(
        (encoder.transform(X_train[:, bin_feat_reg]), X_train[:,
                                                              con_feat_reg]))

    X_test_new = np.hstack(
        (encoder.transform(X_test[:, bin_feat_reg]), X_test[:, con_feat_reg]))

    scaler.fit(X_train_new)
    X_train_scaled = scaler.transform(X_train_new)
    X_test_scaled = scaler.transform(X_test_new)

    model = LinearRegression(learning_rate=10e-5, penalty='l2')
示例#14
0
import os
import cv2
import pickle
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append("/home/hank/libsvm-3.24/python")
from svmutil import *

from utilities import get_data, plot_heatmap, plot_res
from bag_of_features import bag_of_features

train_x, train_y, test_x, test_y = get_data(gray=False)

if os.path.isfile('vocab.pkl'):
    with open('vocab.pkl', 'rb') as handle:
    voc = pickle.load(handle)
    with open('train_features.pkl', 'rb') as handle:
        im_features = pickle.load(handle)
    with open('test_features.pkl', 'rb') as handle:
        test_features = pickle.load(handle)
else:
    im_features, test_features, voc = bag_of_features(train_x, test_x, k=400)
    with open('vocab.pkl', 'wb') as handle:
        pickle.dump(voc, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open('train_features.pkl', 'wb') as handle:
        pickle.dump(im_features, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open('test_features.pkl', 'wb') as handle:
        pickle.dump(test_features, handle, protocol=pickle.HIGHEST_PROTOCOL)
from networks import *
import utilities as util
import time
import numpy as np
from skimage.exposure import equalize_hist

imgs, labels, test_imgs, test_labels = util.get_data(40)

start_time = time.time()
print(imgs.shape)
for img in imgs:
    img = util.filter_anisotropic_diffusion(img, n_iter=1, gamma=0.01, kappa=1)

print("time for diffusion filter {}".format(time.time() - start_time))

t1 = time.time()
for img in imgs:
    img = equalize_hist(img)
print("time for HE {}".format(time.time() - t1))

t2 = time.time()
imgs = imgs.reshape(-1, 256 * 256)

for img in imgs:
    img /= np.std(img) + 1e-5
    img -= np.mean(img)
imgs = imgs.reshape(-1, 256, 256, 1)
print("time for rescaling: {}".format(time.time() - t2))

print("overall time for preprocessing: {}".format(time.time() - start_time))
示例#16
0
def train():

    try:
        train_data = utilities.get_data(TRAIN_PATH)
        test_data = utilities.get_data(TEST_PATH)
    except Exception as e:
        print(e)
        num_api = numerapi.NumerAPI(PUBLIC_KEY, SECRET_GUY, verbosity="info")
        num_api.download_current_dataset(dest_path='../data/')
        feature_names = utilities.get_feature_names(TRAIN_PATH)
        train_data = utilities.get_data(TRAIN_PATH)
        test_data = utilities.get_data(TEST_PATH)

    feature_names = utilities.get_feature_names(train_data)

    #use pca for dimensionality reduction
    pca = PCA(n_components=N_COMPONENTS)
    pca.fit(train_data[feature_names])
    x_train_pca = pca.transform(train_data[feature_names])
    x_test_pca = pca.transform(test_data[feature_names])

    #corrupt dataset using gaussian noise
    mu, sigma = 0, 0.1
    noise = np.random.normal(mu, sigma, x_train_pca.shape)
    x_train_pca_noise = x_train_pca + noise

    #train an LGBMRegressor model - use random search for parameter tuning
    #with cross validation
    lgb = LGBMRegressor()
    lgb_randomsearch = RandomizedSearchCV(estimator=lgb,
                                          cv=CV,
                                          param_distributions=params,
                                          n_iter=100)
    lgb_model = lgb_randomsearch.fit(x_train_pca_noise[:100],
                                     train_data['target'][:100])
    lgb_model_best = lgb_model.best_estimator_
    lgb_model_best = lgb_model_best.fit(x_train_pca_noise[:100],
                                        train_data['target'][:100])

    print("Generating all predictions...")
    train_data['prediction'] = lgb_model_best.predict(x_train_pca_noise)
    test_data['prediction'] = lgb_model_best.predict(x_test_pca)

    train_corrs = (evaluation.per_era_score(train_data))
    print('train correlations mean: {}, std: {}'.format(
        train_corrs.mean(), train_corrs.std(ddof=0)))
    #print('avg per-era payout: {}'.format(evaluation.payout(train_corrs).mean()))

    valid_data = test_data[test_data.data_type == 'validation']
    valid_corrs = evaluation.per_era_score(valid_data)
    #valid_sharpe = evaluation.sharpe(valid_data)
    print('valid correlations mean: {}, std: {}'.format(
        valid_corrs.mean(), valid_corrs.std(ddof=0)))
    #print('avg per-era payout {}'.format(evaluation.payout(valid_corrs.mean())))
    #print('valid sharpe: {}'.format(valid_sharpe))

    #live_data = test_data[test_data.data_type == "test"]
    #live_corrs = evaluation.per_era_score(test_data)
    #test_sharpe = evaluation.sharpe(test_data)
    #print('live correlations - mean: {}, std: {}'.format(live_corrs.mean(),live_corrs.std(ddof=0)))
    #print('avg per-era payout is {}'.format(evaluation.payout(live_corrs).mean()))
    #print('live Sharpe: {}'.format(test_sharpe))

    #pickle and save the model
    with open('lgbm_model_round_253.pkl', 'wb') as f:
        pickle.dump(lgb_model, f)

    #save down predictions
    valid_corrs.to_csv('valid_predictions.csv')
示例#17
0
def main():
    parser = ArgumentParser()
    parser.add_argument(
        "--output-dir", help="output directory", type=str, required=True
    )
    parser.add_argument("--month", help="month", type=str, required=True)
    parser.add_argument("--year", help="year", type=str, required=True)
    parser.add_argument(
        "--import-csv",
        help="yes/no : Whether to import existing csv file.Default is 'no'",
        type=str,
        default="no",
    )
    args = parser.parse_args()
    main_dir = args.output_dir
    n_month, n_year = args.month.lower(), args.year
    work_dir = main_dir + "//" + n_month + "_" + n_year
    create_directory(main_dir)
    create_directory(work_dir)
    log_file_write = open(work_dir + "//scrape_en-hi_log_file.txt", mode="w")
    log_file_write.write(f"{n_month,n_year}\n")

    if args.import_csv.lower() == "yes":
        set_import = True
    elif args.import_csv.lower() == "no":
        set_import = False
    else:
        log_file_write.write(f"\n Please enter a valid option for import-csv")

    scrape_loc_en = work_dir + "//" + "scrape_file_en_" + n_month + "_" + n_year
    scrape_loc_hi = work_dir + "//" + "scrape_file_hi_" + n_month + "_" + n_year
    create_directory(scrape_loc_hi)
    create_directory(scrape_loc_en)
    url_file_loc = "file:///" + HTML_FOLDER + "//Press Information Bureau."
    filename_url_en = url_file_loc + "_en_" + n_month + "_" + n_year + ".html"
    filename_url_hi = url_file_loc + "_hi_" + n_month + "_" + n_year + ".html"

    ministy_pa_list = pd.read_csv(
        MINISTRY_NAME_PARALLEL_LOCATION,
        encoding="utf-16",
    )
    parse_url_en = get_html(filename_url_en)
    parse_url_hi = get_html(filename_url_hi)
    no_of_result_en = int(
        (parse_url_en.find("div", {"class": "search_box_result"}).contents[0]).split()[
            1
        ]
    )
    no_of_result_hi = int(
        (parse_url_hi.find("div", {"class": "search_box_result"}).contents[0]).split()[
            1
        ]
    )
    log_file_write.write(f"\nNo of search result in {n_month} of {n_year}:")
    log_file_write.write(f"\n English: {no_of_result_en} \n Hindi: {no_of_result_hi}")
    log_file_write.write(
        f"\nNo of Ministry in English search result:\
                         {len(parse_url_en.findAll('h3',{'class':'font104'}))}"
    )
    log_file_write.write(
        f"\nNo of Ministry in Hindi search result:\
                         {len(parse_url_hi.findAll('h3',{'class':'font104'}))}"
    )

    # Import or Create english dataframe
    df_en = get_data(
        n_month,
        n_year,
        filename_url_en,
        ministy_pa_list,
        "en",
        log_file_write,
        import_data=set_import,
        import_data_dir=work_dir,
    )
    if "PRID" not in df_en.columns.tolist():
        df_en["PRID"] = df_en["Link"].apply(lambda x: x.split("=")[-1])
    log_file_write.write(f"\n English Datframe \n")
    log_file_write.write(f"\n Datframe Info:\n")
    df_en.info(buf=log_file_write)

    # Write the English Dataframe
    df_en.to_csv(
        os.path.join(work_dir, "English_data_" + n_month + "_" + n_year + ".csv"),
        index=False,
        encoding="utf-16",
    )

    # Scraping English Documents
    iter_f = df_en.shape[0]
    log_file_write.write("\nStarting scraping for English Document")
    for i in range(iter_f):
        en_scrape_file = (
            scrape_loc_en
            + "//"
            + str(i).zfill(4)
            + "_en_"
            + "_".join(df_en.loc[i, ["English_Ministry_Name"]].values[0].split())
            + "_"
            + df_en.loc[i, ["Posting_Date"]].values[0].strftime("%Y-%m-%d")
            + "_"
            + str(df_en.loc[i, ["PRID"]].values[0])
            + ".txt"
        )
        m = 0
        while m == 0:
            try:
                b = get_html(df_en.Link[i], "lxml")
                m = b.body.form.find(
                    "div", {"class": "innner-page-main-about-us-content-right-part"}
                )
            except:
                log_file_write.write("\nerror:retrying")
                m = 0
        if m is None:
            log_file_write.write(
                f"\nindex: {i}, Link: {df_en.Link[i]}, no english content found"
            )
            continue
        k_en = [
            str(k.get_text()).strip()
            for k in m.findAll(
                [
                    "div",
                    "tr",
                    "td",
                    "p",
                    "ol",
                    "h2",
                    "h3",
                    "h4",
                    "ul",
                    "pre",
                    "span",
                    "li",
                ]
            )
            if len(
                k.find_parents(["p", "ol", "h2", "h3", "h4", "ul", "pre", "span", "li"])
            )
            == 0
        ]
        if len(k_en) == 0:
            log_file_write.write(
                f"\nindex: {i}, Link: {df_en.Link[i]},no English content in variuos tags"
            )
            continue
        log_file_write.write(f"\nindex: {i}, number of lines: {len(k_en)}")
        write_scrape_text_file(en_scrape_file, k_en, df_en.English_Ministry_Name[i])
    log_file_write.write(f"\nDone scraping for English Document")

    # Import or Create hindi dataframe
    df_hi = get_data(
        n_month,
        n_year,
        filename_url_hi,
        ministy_pa_list,
        "hi",
        log_file_write,
        import_data=set_import,
        import_data_dir=work_dir,
    )
    if "PRID" not in df_hi.columns.tolist():
        df_hi["PRID"] = df_hi["Link"].apply(lambda x: x.split("=")[-1])
    log_file_write.write(f"\nHindi Datframe \n")
    log_file_write.write(f"\nDatframe Info:\n")
    df_hi.info(buf=log_file_write)

    # Write the Hindi Dataframe
    df_hi.to_csv(
        os.path.join(work_dir, "Hindi_data_" + n_month + "_" + n_year + ".csv"),
        index=False,
        encoding="utf-16",
    )

    # Scraping Hindi Documents
    iter_f = df_hi.shape[0]
    log_file_write.write("\nStarting scraping for Hindi Document")
    for i in range(iter_f):
        hi_scrape_file = (
            scrape_loc_hi
            + "//"
            + str(i).zfill(4)
            + "_hi_"
            + "_".join(df_hi.loc[i, ["English_Ministry_Name"]].values[0].split())
            + "_"
            + df_hi.loc[i, ["Posting_Date"]].values[0].strftime("%Y-%m-%d")
            + "_"
            + str(df_hi.loc[i, ["PRID"]].values[0])
            + ".txt"
        )
        m = 0
        while m == 0:
            try:
                b = get_html(df_hi.Link[i], "lxml")
                m = b.body.form.find(
                    "div", {"class": "innner-page-main-about-us-content-right-part"}
                )
            except:
                log_file_write.write("\nerror:retrying")
                m = 0
        if m is None:
            log_file_write.write(
                f"\nindex: {i}, Link: {df_hi.Link[i]}, no hindi content found"
            )
            continue
        k_hi = [
            str(k.get_text()).strip()
            for k in m.findAll(
                [
                    "div",
                    "tr",
                    "td",
                    "p",
                    "ol",
                    "h2",
                    "h3",
                    "h4",
                    "ul",
                    "pre",
                    "span",
                    "li",
                ]
            )
            if len(
                k.find_parents(["p", "ol", "h2", "h3", "h4", "ul", "pre", "span", "li"])
            )
            == 0
        ]
        if len(k_hi) == 0:
            log_file_write.write(
                f"\nindex: {i}, Link: {df_hi.Link[i]},no hindi content in variuos tags"
            )
            continue
        log_file_write.write(f"\nindex: {i}, number of lines: {len(k_hi)}")
        write_scrape_text_file(hi_scrape_file, k_hi, df_hi.Hindi_Ministry_Name[i])
    log_file_write.write("\nDone scraping for Hindi Document")
    log_file_write.close()
示例#18
0
import numpy as np
#import matplotlib.pyplot as plt
#import os
import sys
#import cgs as cgs #this is my own script that contains cgs constants
import utilities as util
#import re
#import glob
AU = 149597870700e-3  #km

rp = sys.argv[1]
#N = int(sys.argv[2])#desired number of particles
R = float(sys.argv[2])  #in km
R /= AU

rp, t = util.get_data(rp, units='raw')


def calc_N(rp, R):
    r = np.sqrt(rp["x"]**2 + rp["y"]**2 + rp["z"]**2)
    indices = np.where(r < R)
    N = len(rp['index1'][indices])
    return N


def carve(rp, R):
    r = np.sqrt(rp["x"]**2 + rp["y"]**2 + rp["z"]**2)
    indices = np.where(r < R)
    util.write_data(rp, 'carved', indices)
    return 1
示例#19
0
from keras.layers import Dense, Dropout, Conv1D, Flatten, BatchNormalization, Activation, MaxPooling1D
from keras.callbacks import TensorBoard,ModelCheckpoint,EarlyStopping
from keras.optimizers import Adam,SGD
import numpy as np
import os
import shutil
import time
from utilities import get_data 

dataset_path = '3_emotion'

print('Dataset path:',dataset_path)
print('Emotion:',os.listdir(dataset_path))
print('Num emotion:',len(os.listdir(dataset_path)))

x_train, x_test, y_train, y_test = get_data(dataset_path=dataset_path, max_duration = 4.0)

y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
print('x_train:',x_train.shape)
print('y_train:',y_train.shape)

#create model
model = Sequential()

Ckernel_size = 3
Cstrides = 1
Ppool_size = 2
Pstrides = 2
padding = 'SAME'
acti = 'relu'
示例#20
0
 def __init__(self):
     self.image_data = get_data()
     self.camera = str
     self.predict = PREDICT()
示例#21
0
print "Filtering for rater consensus of: {}%".format(
    REQUIRED_AGREEMENT_PERCENT
) if REQUIRED_AGREEMENT_PERCENT else "Using rater average"
print "Minimum word count threshold:     {}".format(WORD_COUNT_MIN)

##########################
##########################
#     SELECT DATASET     #
##########################
##########################
dataset_name = "clickdata"
# dataset_name = "moviedata"
traindata, testdata = get_data(dataset_name=dataset_name,
                               model_technique=MODEL_TECHNIQUE,
                               manual_class_centers=MANUAL_CLASS_CENTERS,
                               num_kmeans_classes=NUM_KMEANS_CLASSES,
                               test_split_percent=TEST_SPLIT_PERCENT,
                               rerandomize=RERANDOMIZE,
                               training_sentence_max=TRAINING_SENTENCE_MAX)

#############
# PLOT DATA #
#############
# traindata.plot_valences()
# traindata.plot_all_data("Traindata (Indiv. User Ratings) ({})".format(traindata.num_ratings()))
# traindata.plot_mean_data("Traindata (Mean Ratings) ({})".format(traindata.num_sentences()))
# if TEST_SPLIT_PERCENT > 0:
#     testdata.plot_all_data("Testdata (Indiv. User Ratings) ({})".format(testdata.num_ratings()))
#     testdata.plot_mean_data("Testdata (Mean Ratings) ({})".format(testdata.num_sentences()))

# raw_input("enter")
示例#22
0
def rotation_loss3(y_true, y_pred):
    print "####### IN THE ROTATION LOSS FUNCTION #####"
    return BETA * K.sqrt(K.sum(K.square((y_true - y_pred))))


#batchSize=25
nb_epochs = 30000
print "creating the model"
model = cnn_lstm.create_cnn_lstm(startweight)
sgd = settings.optimizer
#sgd = SGD(lr=0.000001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss=[pose_loss3, rotation_loss3])

#for e in range(nb_epoch):
#print("epoch %d" % e)
datasource = utilities.get_data(settings.traindata)

data_gen = utilities.gen_data_batch(datasource)
for i in range(nb_epochs):

    X_batch, Y_batch = next(data_gen)
    #model.train(X_batch,Y_batch)
    #history = model.fit(X_batch, Y_batch,batch_size=32,shuffle=True,nb_epoch=1)
    #print Y_batch[0].shape
    #print Y_batch[1].shape
    #print len(Y_batch)
    history = model.fit(X_batch,
                        Y_batch,
                        nb_epoch=1,
                        batch_size=utilities.batchSize)
    #history = model.fit(X_batch,{'pose_wpqr': Y_batch[1], 'pose_xyz': Y_batch[0]},
示例#23
0
                        tuple(text_origin + label_size)],
                       fill="white")
        draw.text(text_origin, label, fill=(0, 0, 0), font=font, color="b")
        del draw


#     plt.imshow(image)
#     image = image.resize((image.size[0]//2,image.size[1]//2))
    plt.imshow(image)
    plt.show()
    ts = int(datetime.now().timestamp() * 10000)
    #     plt.imsave(TMP_MOVIE+str(ts)+".png",image)
    plt.close()
    return image
if __name__ == "__main__":
    image_data = get_data()

    for camera in image_data["camera"].unique():
        images = image_data[image_data["camera"] == camera]["path"].values
        images = np.sort(images)
        img_train = images[:len(images) // 2]
        park_data = create_boxes(img_train)
        park_slots = look_for_slots(park_data,
                                    img=img_train,
                                    plot=False,
                                    PRUNE_TH=1,
                                    PRUNE_STEP=10,
                                    MERGE_STEP=50,
                                    MERGE_TH=0.8)
        park_slots.drop(park_slots[park_slots["found"] < 3].index,
                        inplace=True)
示例#24
0
import cv2
import math
import numpy as np
import glob
from datetime import datetime
from utilities import get_data, plot_res, plot_heatmap

training_set, train_ans, testing_set, test_ans = get_data(gray=True,
                                                          size=16,
                                                          normal=True)

training_set = np.array([x.flatten() for x in training_set])
testing_set = np.array([x.flatten() for x in testing_set])

print('=====KNN with cv2.ml.KNearest function=====')

knn = cv2.ml.KNearest_create()
knn.train(training_set, cv2.ml.ROW_SAMPLE, train_ans)

for k in range(5):
    start = datetime.now()
    ret, results, neighbours, dist = knn.findNearest(testing_set, k + 1)

    count = 0
    hit = 0
    for i in range(len(testing_set)):
        if results[i] == test_ans[i]:
            hit += 1
        count += 1
    end = datetime.now()
    print('accu with ', k + 1, ' neighbors = ', hit / count)
示例#25
0
    ("Weekly Comparison", "Post Comparison", "Annual Forecast"))

# choose metric to display in "Weekly Comparison" and "Annual Forecast" views
metric = st.sidebar.radio("Metric:", ("Pageviews", "RPM", "Earnings"))

# choose to display confidence interval in
# "Weekly Comparison" and "Annual Forecast" views
low_hi = st.sidebar.checkbox("Show Low & High Forecast", value=False)

# choose time period for comparison in "Post Comparison" view
comparison = st.sidebar.radio("Comparison Period (Post Comparison View Only)",
                              ("Last Week", "Last Year"))

# LOGIC TO DISPLAY CHARTS
# load latest data
df_rpm, df_views, df_holiday = get_data()

# fit prophet model and make 365 days of predictions
forecast_views, forecast_rpm = fit_predict(df_rpm, df_views, df_holiday)

# merge views and rpm together for earnings forecast
df = merge_forecast(forecast_rpm, forecast_views, df_rpm, df_views)

# create four dfs for weekly plot comparison
# next week, this week, last week, last year during the same week
next_wk = df_between_dates(df, 0, 1)
this_wk = df_between_dates(df, -1, 0)
last_wk = df_between_dates(df, -2, -1)
last_yr_wk = df_between_dates(df, -53, -52)

# plot weekly comparison chart and percentage table
示例#26
0
config.gpu_options.allow_growth = True
session = tf.Session(config=config)


def plot_history(acc, val_acc):
    epochs = range(1, len(acc) + 1)
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and Validation acc')
    plt.legend()
    plt.show()


## AlexNet

train_x, train_y, test_x, test_y = get_data(size=150)
train_y = to_categorical(train_y)
test_y = to_categorical(test_y)
train_x.shape


def modeling(input_shape):
    model = Sequential()
    model.add(Conv2D(64, (3, 3), padding='same', input_shape=input_shape))
    model.add(
        BatchNormalization(momentum=0.5,
                           epsilon=1e-5,
                           gamma_initializer="uniform"))
    model.add(LeakyReLU(alpha=0.1))
    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(
示例#27
0
    "ss.[0-9]*[0-9]"))  #get list of strings with full paths to each ss file

target, impactor, t = util.get_sorted_data(ss_files[0], units='cgs')

M_T = np.sum(target['mass'])
R_T = np.max(np.sqrt(target['x']**2 + target['y']**2 + target['z']**2))
r_T = np.mean(target['radius'])

#calculate approx kinetic and potential energy of each particle in each frame
f = open(output, 'w')

for frame in ss_files:
    sys.stdout.write('Current Frame: {0} of {1}\r'.format(frame, ss_files[-1]))
    sys.stdout.flush()
    #print("Current Frame: {0}".format(frame))
    data, t = util.get_data(frame, units='cgs')
    v2 = data['xdot']**2 + data['ydot']**2 + data['zdot']**2
    r = np.sqrt(data['x']**2 + data['y']**2 + data['z']**2)
    E_k = 0.5 * data['mass'] * v2
    E_pot = -cgs.G * data['mass'] * M_T / r
    E = E_k + E_pot
    bound_ind = np.where(
        (E <= 0.0) & (r > R_T + r_T)
    )  #indices where particles is on bound orbit, but not touching surface (approx)
    unbound_ind = np.where(E > 0.0)
    bound_ind = bound_ind[0]
    unbound_ind = unbound_ind[0]
    N_esc = len(unbound_ind)
    M_esc = np.sum(data['mass'][unbound_ind])
    N_disk = len(bound_ind)
    M_disk = np.sum(data['mass'][bound_ind])
示例#28
0
import numpy as np
import old_cnn_lstm as cnn_lstm
from scipy.misc import imread, imresize
from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation
from keras.models import Model
from keras.regularizers import l2
from keras.optimizers import SGD
from custom_layers import PoolHelper  # ,LRN
#import caffe
import cv2
import utilities
from LRN2D import LRN2D as LRN
import settings
from similarityMeasures import getError
directory = settings.directory  # "/usr/prakt/w065/posenet/sm/"
datasource = utilities.get_data(settings.testsetpath)
datagen = utilities.limited_gen_data(datasource)
settings.saveMean=False
#outputDirectory = "/usr/prakt/w065/posenet/TFData/"
#meanFileLocation = 'smmean.binaryproto'
# 'tfsmtrainedweights.h5'#'75batbhessmtrainedweights.h5'#'smtrainedweights.h5'
weightsfile = settings.testweights




# weightsfile='shoptrainedweights.h5'
#poses = []  # will contain poses followed by qs
#images = []
#settings.oldmean=True
# limitingCounter=3
示例#29
0
#batchSize=25
nb_epochs = 30000
print "creating the model"
model = posenet.create_posenet(startweight)
sgd = settings.optimizer
#sgd = SGD(lr=0.000001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd,
              loss=[
                  pose_loss12, rotation_loss12, pose_loss12, rotation_loss12,
                  pose_loss3, rotation_loss3
              ])

#for e in range(nb_epoch):
#print("epoch %d" % e)
datasource = utilities.get_data()

data_gen = utilities.gen_data_batch(datasource)
print "beta=", BETA
for i in range(nb_epochs):

    X_batch, Y_batch = next(data_gen)
    #model.train(X_batch,Y_batch)
    #history = model.fit(X_batch, Y_batch,batch_size=32,shuffle=True,nb_epoch=1)

    history = model.fit(X_batch, {
        'cls1_fc_pose_wpqr': Y_batch[1],
        'cls1_fc_pose_xyz': Y_batch[0],
        'cls2_fc_pose_wpqr': Y_batch[1],
        'cls2_fc_pose_xyz': Y_batch[0],
        'cls3_fc_pose_wpqr': Y_batch[1],
if __name__ == "__main__":

    if len(sys.argv) != 2:
        sys.stderr.write('Invalid arguments\n')
        sys.stderr.write('Usage python2 train_DNN.py <model_number>\n')
        sys.stderr.write('1 - CNN\n')
        sys.stderr.write('2 - LSTM\n')
        sys.exit(-1)

    n = int(sys.argv[1]) - 1
    print ('model given', models[n])

    # Read data
    global x_train, y_train, x_test, y_test
    x_train, x_test, y_train, y_test = get_data(flatten=False)
    y_train = np_utils.to_categorical(y_train)
    y_test = np_utils.to_categorical(y_test)

    if n == 0:
        # Model is CNN so have to reshape the data
        in_shape = x_train[0].shape
        print(x_train.shape)
        print(in_shape)
        x_train = x_train.reshape(x_train.shape[0], in_shape[0], in_shape[1], 1)
        x_test = x_test.reshape(x_test.shape[0], in_shape[0], in_shape[1], 1)
    elif n > len(models):
        sys.stderr.write('Model Not Implemented yet')
        sys.exit(-1)

    model = get_model(models[n], x_train[0].shape)
示例#31
0
def train():
    
    try:
        train_data=utilities.get_data(TRAIN_PATH)
        test_data=utilities.get_data(TEST_PATH)
    except Exception as e:
        print(e)
        num_api = numerapi.NumerAPI(PUBLIC_KEY, SECRET_GUY,verbosity="info")
        num_api.download_current_dataset(dest_path='../data/')
        feature_names=utilities.get_feature_names(TRAIN_PATH)
        train_data=utilities.get_data(TRAIN_PATH)
        test_data=utilities.get_data(TEST_PATH)

    feature_names=utilities.get_feature_names(train_data)
    x_train=train_data[feature_names]
    x_test=test_data[feature_names]
    #call autoencoder for dimensionality reduction
    ae=AutoEncoder(x_train.shape,N_COMPONENTS)
    model=ae.build()
    model.compile(optimizer=OPT, loss=LOSS)
    history=model.fit(x_train, x_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=2, validation_data=(x_test,x_test))
    
    #get the autoencoder representation
    x_train_ae = model.predict(x_train)
    x_test_ae = model.predict(x_test)

    #corrupt dataset using gaussian noise
    #mu,sigma=0,0.1
    #noise=np.random.normal(mu,sigma,x_train_pca.shape)
    #x_train_pca_noise=x_train_pca+noise

    #train an LGBMRegressor model - use random search for parameter tuning
    #with cross validation
    lgb=LGBMRegressor()
    lgb_randomsearch=RandomizedSearchCV(estimator=lgb,cv=CV,param_distributions=params, n_iter=100)
    lgb_model=lgb_randomsearch.fit(x_train_ae[:100],train_data['target'][:100])
    lgb_model_best=lgb_model.best_estimator_
    lgb_model_best=lgb_model_best.fit(x_train_ae[:100],train_data['target'][:100])
    
    print("Generating all predictions...")
    train_data['prediction'] = lgb_model.predict(x_train_ae)
    test_data['prediction'] = lgb_model.predict(x_test_ae)

    train_corrs = (evaluation.per_era_score(train_data))
    print('train correlations mean: {}, std: {}'.format(train_corrs.mean(), train_corrs.std(ddof=0)))
    #print('avg per-era payout: {}'.format(evaluation.payout(train_corrs).mean()))

    valid_data = test_data[test_data.data_type == 'validation']
    valid_corrs = evaluation.per_era_score(valid_data)
    #valid_sharpe = evaluation.sharpe(valid_data)
    print('valid correlations mean: {}, std: {}'.format(valid_corrs.mean(), valid_corrs.std(ddof=0)))
    #print('avg per-era payout {}'.format(evaluation.payout(valid_corrs.mean())))
    #print('valid sharpe: {}'.format(valid_sharpe))

    #live_data = test_data[test_data.data_type == "test"]
    #live_corrs = evaluation.per_era_score(test_data)
    #test_sharpe = evaluation.sharpe(test_data)
    #print('live correlations - mean: {}, std: {}'.format(live_corrs.mean(),live_corrs.std(ddof=0)))
    #print('avg per-era payout is {}'.format(evaluation.payout(live_corrs).mean()))
    #print('live Sharpe: {}'.format(test_sharpe))
    
    #pickle and save the model
    with open('lgbm_model_round_253.pkl', 'wb') as f:
        pickle.dump(lgb_model,f)

    #save down predictions
    valid_corrs.to_csv('valid_predictions.csv')