Пример #1
0
def add_face_to_data():
    cam_hd = cv2.VideoCapture(0)
    cam_ld = cv2.VideoCapture(1)
    face_cascade = cv2.CascadeClassifier(
        './haarcascade_frontalface_default.xml')
    
    dir_name = get_dir()

    i, k = 0, 0
    while True:
        gray_hd, gray_ld, faces_hd, faces_ld = cap_get_faces(cam_hd, cam_ld,
                                                             face_cascade)
        show_face(gray_hd, gray_ld, faces_hd, faces_ld)
        if len(faces_hd) > 0 and i < 10:
            x, y, w, h = faces_hd[0]
            save_img(gray_hd[y: y+h, x: x+w], dir_name)
            i += 1
        if len(faces_ld) > 0 and k < 10:
            x, y, w, h = faces_ld[0]
            save_img(gray_ld[y: y+h, x: x+w], dir_name, cam='ld')
            k += 1

        if i >= 9 and k >= 9:
            break
        print('HD picks:', i)
        print('LD picks:',k)
        sleep(1)

    cam_hd.release()
    cam_ld.release()

    main()
    main_loop()
Пример #2
0
def main():
    """
        Run the west nile virus prediction, and intervention recommendation model.
    """
    print('Running model...')
    process.main()  # process data
    if not os.path.isfile(
            'models/wnv_predict.pkl'):  # check if trained model exists
        train.main()  # train model if one does not exist
    predict.main()  # generate wnv predictions
    recommend.main()  # generate recommendations
    print('Done.')
Пример #3
0
def main():
    """
        Train prediction model

    """
    print('Training model...')
    data_file = "processed_data/processed_train.csv"
    try:
        model_data = pd.read_csv(data_file)
    except:
        print('Processed data missing.\nProcessing raw data.')
        process.main()
        model_data = pd.read_csv(data_file)

    model_data = model_data.drop(['date', 'nummosquitos'], axis=1)
    y = model_data.pop('wnvpresent')
    X = model_data
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    try:
        forest = joblib.load('models/wnv_predict.pkl')
        print('Existing model found.')
    except:
        forest = RandomForestClassifier(class_weight='balanced_subsample',
                                        n_jobs=-1,
                                        random_state=42)

        param_grid = dict(max_depth=np.random.randint(1, 10, 10),
                          min_samples_split=np.random.sample(5),
                          min_samples_leaf=np.random.sample(5)/2,
                          min_weight_fraction_leaf=np.random.sample(5)/2,
                          max_features=np.random.sample(5))

        grid = GridSearchCV(forest,
                            param_grid,
                            cv=10,
                            scoring='neg_log_loss',
                            n_jobs=-1,
                            verbose=True)

        grid.fit(X_train, y_train)
        forest = grid.best_estimator_
        print(forest)
        joblib.dump(forest, 'models/wnv_predict.pkl')

    probabilities = forest.predict_proba(X)

    probs = pd.DataFrame(probabilities[:, 1], columns=['wnv_probablitiy'])
    probs.index.rename('Id', inplace=True)
    probs.index = probs.index + 1
    probs.to_csv('data/prediction_probabilities.csv')
    print('Training complete.')
Пример #4
0
def get_country_clicks():
    auth_header = request.headers.get('Authorization')
    # check if authorization header is included and parse token
    if auth_header:
        if 'Bearer' not in auth_header:
            return jsonify({
                'status': 'fail',
                'message': 'Token must be Bearer Token'
            })

        try:
            access_token = auth_header.split(' ')[1]
        except IndexError:
            return jsonify({'status': 'fail', 'message': 'Token malformed'})
    else:
        return jsonify({
            'status':
            'fail',
            'message':
            'Token empty. Please provide a valid bearer token'
        })

    country_id = None
    if 'country_id' in request.args:
        country_id = request.args['country_id']

    country_clicks = process_data.main(access_token,
                                       country_id=country_id)['response']
    return jsonify(country_clicks)
    def test_main_without_days(self, dir_path, data_path, input_path,
                               template_path, output_path, aws_session,
                               check_available_days, get_available_files,
                               output_dict, add_location_to_stop_data,
                               create_csv_data, write_info_to_kepler_file,
                               config):
        dir_path.return_value = self.data_path
        data_path.return_value = self.data_path
        input_path.return_value = self.data_path
        template_path.return_value = self.data_path
        output_path.return_value = self.data_path
        check_available_days.return_value = []

        with self.assertRaises(SystemExit) as cm:
            process_data.main(
                ['process_data', '2020-05-08', '2020-05-08', 'output'])

            self.assertEqual(cm.exception.code, 1)
 def test_main(self, dir_path, data_path, input_path, template_path,
               output_path, aws_session, check_available_days,
               get_available_files, output_dict, add_location_to_stop_data,
               add_location_to_metro_data,
               add_location_to_metrotren_station_data, create_csv_data,
               write_info_to_kepler_file, config):
     dir_path.return_value = self.data_path
     data_path.return_value = self.data_path
     input_path.return_value = self.data_path
     template_path.return_value = self.data_path
     output_path.return_value = self.data_path
     check_available_days.return_value = [
         datetime.strptime('2020-02-05', "%Y-%m-%d")
     ]
     output_dict.return_value = [
         mock.MagicMock(),
         mock.MagicMock(),
         mock.MagicMock()
     ]
     process_data.main(
         ['process_data', '2020-05-08', '2020-05-08', 'output'])
def main():
    process_data.main()
    sparse_user_item = load_npz("./output/sparse_user_item.npz")
    train_data, test_data, users_altered = test_train_split(sparse_user_item)
    als_model, user_vecs, item_vecs = train_model(
        train_data.T
    )  # the parameter to train_model should be item - user matrix
    print(
        "implicit_recomm_auc,popularity_auc",
        evaluate_model(train_data, users_altered,
                       [csr_matrix(user_vecs),
                        csr_matrix(item_vecs.T)], test_data))

    directory = './output'
    if not os.path.exists(directory):
        os.makedirs(directory)
    np.save('./output/item_vecs', item_vecs)
    np.save('./output/user_vecs', user_vecs)

    with open('./output/als_model', 'wb') as file:
        pickle.dump(als_model, file)
Пример #8
0
def main():
    train_features, test_features, train_targets, test_targets = process_data.main(
    )

    tree_acc = tree_learner(train_features, test_features, train_targets,
                            test_targets)

    svm_acc = svm_learner(train_features, test_features, train_targets,
                          test_targets)

    nb_acc = nb_learner(train_features, test_features, train_targets,
                        test_targets)

    print(tree_acc)  # 0.9809
    print(svm_acc)  # 0.9826
    print(nb_acc)  # 0.9287
Пример #9
0
def main():
    # output_dir = "./output_wmgm"  # TODO: be able to set with argument
    file_output = "results_all.csv"  # csv output
    # fdata2 = "data2.nii.gz"

    # Get list of files in folder1
    folder1, folder2 = folder_data
    fname_csv_list = sorted(glob.glob(os.path.join(folder1, "*.csv")))

    # initialize dataframe
    results_all = pd.DataFrame(columns={'WM',
                                        'GM',
                                        'Noise',
                                        'Smooth',
                                        'SNR',
                                        'Contrast',
                                        'Sharpness'})

    # loop and process
    i = 0
    for fname_csv in fname_csv_list:
        # get file name
        metadata = pd.Series.from_csv(fname_csv, header=None).to_dict()
        file_data = metadata["File"]
        # get fname of each nifti file
        fname1 = os.path.join(folder1, file_data)
        fname2 = os.path.join(folder2, file_data)
        # display
        print("\nData #1: " + fname1)
        print("Data #2: " + fname2)
        # process pair of data
        results = process_data.main([fname1, fname2], file_seg, file_gmseg, register=register, verbose=verbose)
        # append to dataframe
        results_all = results_all.append({'WM': metadata['WM'],
                                          'GM': metadata['GM'],
                                          'Noise': metadata['Noise'],
                                          'Smooth': metadata['Smooth'],
                                          'SNR_single': results.loc['SNR_single'][0],
                                          'SNR_diff': results.loc['SNR_diff'][0],
                                          'Contrast': results.loc['Contrast'][0]}, ignore_index=True)
                                          # 'Sharpness': results.loc['Sharpness'][0]}, ignore_index=True)
    results_all.to_csv(file_output)
Пример #10
0
def main(new_exp, p_raw_files, raw_f_subfold, p_stages, p_ref_csv_files,
         wrk_dir, f_vars_to_extract, f_pattern_ref, tests, spinup, lclean,
         ltestsuite, lverbose):

    # init logger
    logger_config.init_logger(lverbose, __file__)

    log.banner('Start sanity checker')

    # make all paths from user to absolute paths
    wrk_dir = utils.abs_path(wrk_dir)
    p_stages = utils.abs_path(p_stages)
    p_ref_csv_files = utils.abs_path(p_ref_csv_files)
    f_pattern_ref = utils.abs_path(f_pattern_ref)

    # create directories
    os.makedirs(p_stages, exist_ok=True)
    os.makedirs(wrk_dir, exist_ok=True)

    # go to working directory
    os.chdir((wrk_dir))
    log.info('Working directory is {}'.format(wrk_dir))

    # data processing takes a while, check that no step is done twice
    actions = utils.determine_actions_for_data_processing(
        new_exp, tests, p_stages, lclean)

    # create dataframe out of raw data
    results_data_processing = process_data.main(
        new_exp,
        actions,
        tests,
        spinup,
        p_raw_files=p_raw_files,
        p_stages=p_stages,
        raw_f_subfold=raw_f_subfold,
        f_vars_to_extract=f_vars_to_extract,
        f_pattern_ref=f_pattern_ref)

    results_test, references = perform_test.main(
        new_exp,
        results_data_processing=results_data_processing,
        p_stages=p_stages,
        tests=tests,
        p_ref_csv_files=p_ref_csv_files,
        ltestsuite=ltestsuite,
        f_vars_to_extract=f_vars_to_extract)

    if 'welch' in tests:
        test = 'welch'
        plt.plt_welchstest(references[test].append(
            results_data_processing[test], sort=False),
                           new_exp,
                           results_test[test],
                           p_stages=p_stages)

    # Add experiment to the reference pool
    #--------------------------------------------------------------------
    log.banner('')
    log.banner('Check results again before adding to reference pool')
    log.banner('')

    for test in tests:
        test_cfg = test_config.get_config_of_current_test(test)
        utils.print_warning_if_testresult_is_bad(test, results_test[test],
                                                 test_cfg.metric_threshold,
                                                 test_cfg.metric)

    if ltestsuite:
        asw = 'YES'
    else:
        asw = input('If you are happy with this experiment, '
                    'do you want to add it to the reference pool ?'
                    '(yes/[No])\n')

    if (asw.strip().upper() == 'YES') or (asw.strip().upper() == 'Y'):
        add_exp_to_ref.main(new_exp,
                            tests,
                            p_stages=p_stages,
                            ltestsuite=ltestsuite,
                            p_ref_csv_files=p_ref_csv_files)
    else:
        args_for_manual_execution = \
            utils.derive_arguments_for_add_exp_to_ref(new_exp,
                                                      tests,
                                                      p_stages,
                                                      p_ref_csv_files)

        log.info('The experiment {} is NOT added to '
                 'the reference pool \n'.format(new_exp))
        log.info('If you want to add the experiment {} '
                 'to the reference pool later on, type '
                 'the following line when you are ready:'.format(
                     new_exp, new_exp))

        log.info('')
        log.info(
            'python add_exp_to_ref.py {}'.format(args_for_manual_execution))

    log.banner('')
    log.banner('Sanity test finished')
    log.banner('')
Пример #11
0
    for filename in files:
        fetch(address, filename, version, HTTPBasicAuth(username, password))
    reformat_data(version)


if __name__ == "__main__":
    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "version",
        nargs="?",
        help=
        ("Experiment version. This corresponds to the experiment_code_version "
         "parameter in the psiTurk config.txt file that was used when the "
         "data was collected."))

    c = configparser.ConfigParser()
    c.read('config.txt')
    sp = c['Server Parameters']

    url = 'https://' + sp['adserver_revproxy_host'] + '/data'

    version = parser.parse_args().version
    if version == None:
        version = c["Task Parameters"]["experiment_code_version"]
        print("Fetching data for current version: ", version)
    main(version, url, sp['login_username'], sp['login_pw'])

    sys.path.append('bin')
    import process_data
    process_data.main(version)
Пример #12
0
            if now.minute % burst_int == 0 and now.second == 0:

                logger.info("starting burst")
                #create file name
                fname = dataDir + floatID + '_GPS_' + "{:%d%b%Y_%H%M%SUTC.dat}".format(
                    datetime.utcnow())
                logger.info("file name: %s" % fname)
                #call record_gps
                u, v, z, lat, lon = record_gps(ser, fname)

                #check if burst completed with 2048 poi
                try:
                    if os.path.isfile(fname) and os.path.getsize(fname) > 0:
                        #call data processing script
                        logger.info('starting to process data')
                        print(u.shape)
                        process_data.main(u, v, z, lat, lon)
                    else:
                        logger.info(
                            'data file does not exist or does not contain enough data for processing'
                        )

                except OSError as e:
                    logger.info(e)
                    sys.exit(1)
            else:
                sleep(0.25)
    else:
        logger.info("GPS not initialized, exiting")
        sys.exit(1)
import process_data
from elasticsearch import Elasticsearch,helpers

#load data
recipes,ingredients_df=process_data.main()

#elasticsearch should be up and running on localhost:9200
es = Elasticsearch(host="localhost", port=9200)

actions = [{
            "_index":"recipe_index",
            "_id": recipes_dict['id'][i],
            "Url": recipes_dict['Url'][i],
            "Instructions": recipes_dict['Instructions'][i],
            "Ingredients": recipes_dict['Ingredients'][i],
            "Ingredients_unique":ingredients_df[ingredients_df.id == recipes_dict['id'][i]].ingredient.values.tolist(),
            "Name": recipes_dict['Name'][i]
        }
for i in range(0, len(recipes_dict['Url'].keys()))
]

#bulk insert to index
helpers.bulk(es,actions)

#view the data
#localhost:9200/recipe_index/_search?pretty
Пример #14
0
#Get and transform user's resume or linkedin
user_input = st.text_area("copy and paste your resume or linkedin here", '')

user_input = str(user_input)
user_input = re.sub('[^a-zA-Z0-9\.]', ' ', user_input)
user_input = user_input.lower()

user_input = pd.Series(user_input)

#load NLP + classification models

topic_model = pickle.load(open('topic_model.sav', 'rb'))
classifier = pickle.load(open('classification_model.sav', 'rb'))
vec = pickle.load(open('job_vec.sav', 'rb'))

classes, prob = pda.main(user_input, topic_model, classifier, vec)

data = pd.DataFrame(zip(classes.T, prob.T), columns=['jobs', 'probability'])


#Plot probability of person belonging to a job class
def plot_user_probability():
    #plt.figure(figsize = (2.5,2.5))
    plt.barh(data['jobs'], data['probability'], color='r')
    plt.title('Percent Match of Job Type')
    st.pyplot()


#Plot where user fits in with other job clusters
def plot_clusters():
    st.markdown(