Exemplo n.º 1
0
def harvest_ratio(in_path, out_path, classifier):
    relevants = {}
    num_pages = {}

    for site in DOMAINS.iterkeys():
        relevants[site] = 0
        num_pages[site] = 0

    count = 0
    for site, html in read_file_multiple(in_path):
        count += 1
        print count
        num_pages[site] += 1
        if classifier.classify(html):
            relevants[site] += 1

    rows = [[
        'Domain', '# Relevant pages', '# Downloaded pages', '# Harvest Ratio'
    ]]

    total_pages = 0
    total_relevants = 0

    for site in DOMAINS.iterkeys():
        total_pages += num_pages[site]
        total_relevants += relevants[site]
        domain_hr = float(relevants[site]) / num_pages[site]
        rows.append([site, relevants[site], num_pages[site], domain_hr])

    hr = float(total_relevants) / total_pages
    rows.append(['Total', total_relevants, total_pages, hr])

    save_csv(out_path, rows)
Exemplo n.º 2
0
 def report(self, n=None, sort='v', directory=None, filename=None):
     if self._detail is None:
         raise Exception('Nothing to report')
     else:
         vars = [
             'experiment', 'alg', 'batch_size', 'learning_rate',
             'learning_rate_sched', 'learning_rate_sched_label',
             'stop_metric', 'stop_metric_label', 'time_decay', 'step_decay',
             'step_epochs', 'exp_decay', 'precision', 'i_s', 'maxiter',
             'stop_metric', 'epochs', 'iterations', 'duration',
             'final_costs', 'final_mse'
         ]
         df = self._summary
         df = df[vars]
         if sort == 't':
             df = df.sort_values(by=['final_costs', 'duration'])
         else:
             df = df.sort_values(by=['final_mse', 'duration'])
         if directory:
             if filename is None:
                 filename = self._alg + ' Grid Search.csv'
             save_csv(df, directory, filename)
         if n:
             df = df.iloc[:n]
         return (df)
Exemplo n.º 3
0
def preprocessing_main(filepath, model_name, subset_name):
    columns = ["ID", "tweet", "affect_dimension", "intensity_class"]
    df = read_csv(filepath, columns=columns)
    
    df = pandas_explode_column(df, "intensity_class", "intensity_scores", "intensity_descriptions", delimiter=":")
    
    if model_name == "dl":
        preprocessed_tweet_list = prep_dl_data(df)
        preprocessed_filename = "dl_" + subset_name
        preprocessed_filepath = "data/preprocessed_data/DL_data"
    
    elif model_name == "ml":
        preprocessed_tweet_list = prep_ml_data(df)
        preprocessed_tweet_list = [' '.join(x) for x in preprocessed_tweet_list]  # remove outer list
        preprocessed_filename = "ml_" + subset_name
        preprocessed_filepath = "data/preprocessed_data/ML_data"
    
    else:
        print("No Preprocessing for models other than dl and ml")


    df["tweets"] = preprocessed_tweet_list
    save_csv(df, preprocessed_filepath, preprocessed_filename, "\t")
    
    print("Finished Preprocessing Programme")
Exemplo n.º 4
0
def main():
    args = parser.parse_args()
    header, dataset = utils.load_csv(args.input)
    if len(dataset) == 0:
        parser.error("Invalid input: file does not exist or is empty.")

    normalized = standardize(dataset)
    dendrogram_info = clusterize(normalized, args.linkage)

    fig = plot(dendrogram_info)
    fig.savefig(args.output + "_full.png", format="png")
    plt.show()

    weights = [args.average_weight, args.sd_weight]
    trees = cut(dendrogram_info, weights, args.class_range)
    fig = plot(trees)
    fig.savefig(args.output + ".png", format="png")
    plt.show()

    print("%d clusters were generated." % len(trees))
    classified = [header + ["Classification"]]
    clusters = get_clusters(trees)
    for i in range(len(dataset)):
        classified.append(dataset[i] + [clusters[i]])
    utils.save_csv(args.output + ".csv", classified)
Exemplo n.º 5
0
def codebook_to_csv(k=128, des_name=constants.ORB_FEAT_NAME):
    if not os.path.exists(constants.FILES_DIR_NAME):
        os.makedirs(constants.FILES_DIR_NAME)
    codebook = utils.load(filenames.codebook(k, des_name))
    filename = "{0}/codebook_{1}_{2}.csv".format(constants.FILES_DIR_NAME, k, des_name)
    utils.save_csv(filename, codebook)
    print("Copied codebook into the file with name {0}. Press any key to exit...".format(filename))
    cv2.waitKey()
Exemplo n.º 6
0
def save_locations(match_locations):
    header_locations = ["id", "name", "latitude", "longitude"]
    formatted_results = []
    # Flat the dictionary
    for location in match_locations:
        formatted_results.append(
            [match_locations[location].get(h, None) for h in header_locations])

    save_csv(CSV_OUTPUT_LOCATIONS, header_locations, formatted_results)
Exemplo n.º 7
0
 def detail(self, directory=None, filename=None):
     if self._alg is None:
         raise Exception('No algorithm selected.')
     else:
         if directory is not None:
             if filename is None:
                 filename = self._alg + ' Detail.csv'
             save_csv(self._detail, directory, filename)             
         return(self._detail)    
Exemplo n.º 8
0
 def eval(self, directory=None, filename=None):
     if self._eval is None:
         raise Exception('No search results to report.')
     else:
         if directory is not None:
             if filename is None:
                 filename = self._alg + ' Evaluation.csv'
             save_csv(self._eval, directory, filename)             
         return(self._eval)    
Exemplo n.º 9
0
 def summary(self, directory=None, filename=None):
     if self._summary is None:
         raise Exception('No search results to report.')
     else:
         if directory is not None:
             if filename is None:
                 filename = self._alg + ' Summary.csv'
             save_csv(self._summary, directory, filename) 
         return(self._summary)
Exemplo n.º 10
0
def compliance_csv(reports, path):
    header = [
        "Type", "Name", "Total Domains", "Uses HTTPS", "Enforces HTTPS",
        "HSTS", "Uses HTTPS (%)", "Enforces HTTPS (%)", "HSTS (%)"
    ]
    rows = []
    for report in reports:
        rows.append(compliance_csv_row(report))

    utils.save_csv(header, rows, path)
Exemplo n.º 11
0
def compare_classifiers():
    db = tools.load_database()
    X, Y = tools.prepare_database(db)

    bag_of_words, bag_of_words_vectors = tools.create_bag_of_words(X)
    tfidf, tfidf_vectors = tools.create_TfIdf(X)

    print "Testing Bag of Words"

    file_path = os.path.join(consts.RESULTS_DIR, 'bag-of-words-results.csv')

    rows = [[
        'Algorithm', 'Training Time', 'Accuracy', 'Precision', 'Recall',
        'F1-Measure'
    ]]

    features_train, features_test, labels_train, labels_test = tools.split_dataset(
        bag_of_words_vectors, Y)

    rows += try_naive_bayes(features_train, features_test, labels_train,
                            labels_test)
    rows += try_regression(features_train, features_test, labels_train,
                           labels_test)
    rows += try_random_forest(features_train, features_test, labels_train,
                              labels_test)
    rows += try_svm(features_train, features_test, labels_train, labels_test)
    rows += try_ada_boost(features_train, features_test, labels_train,
                          labels_test)
    rows += try_knn(features_train, features_test, labels_train, labels_test)

    save_csv(file_path, rows)

    print "Testing TF-IDF"

    file_path = os.path.join(consts.RESULTS_DIR, 'tf-idf-resultss.csv')
    rows = [[
        'Algorithm', 'Training Time', 'Accuracy', 'Precision', 'Recall',
        'F1-Measure'
    ]]

    features_train, features_test, labels_train, labels_test = tools.split_dataset(
        tfidf_vectors, Y)

    rows += try_naive_bayes(features_train, features_test, labels_train,
                            labels_test)
    rows += try_regression(features_train, features_test, labels_train,
                           labels_test)
    rows += try_random_forest(features_train, features_test, labels_train,
                              labels_test)
    rows += try_svm(features_train, features_test, labels_train, labels_test)
    rows += try_ada_boost(features_train, features_test, labels_train,
                          labels_test)
    rows += try_knn(features_train, features_test, labels_train, labels_test)

    save_csv(file_path, rows)
Exemplo n.º 12
0
def codebook_to_csv(k=128, des_name=constants.ORB_FEAT_NAME):
    if not os.path.exists(constants.FILES_DIR_NAME):
        os.makedirs(constants.FILES_DIR_NAME)
    codebook = utils.load(filenames.codebook(k, des_name))
    filename = "{0}/codebook_{1}_{2}.csv".format(constants.FILES_DIR_NAME, k,
                                                 des_name)
    utils.save_csv(filename, codebook)
    print(
        "Copied codebook into the file with name {0}. Press any key to exit..."
        .format(filename))
    cv2.waitKey()
Exemplo n.º 13
0
def save_players(players):
    header_players = [
        "id",
        "name",
    ]
    formatted_results = []
    # Flat the dictionary
    for p in players:
        formatted_results.append([players[p][h] for h in header_players])

    save_csv(CSV_OUTPUT_PLAYERS, header_players, formatted_results)
Exemplo n.º 14
0
def save_teams(teams):
    header_teams = [
        "id",
        "country",
    ]
    formatted_results = []
    # Flat the dictionary
    for t in teams:
        formatted_results.append([teams[t][h] for h in header_teams])

    save_csv(CSV_OUTPUT_TEAMS, header_teams, formatted_results)
Exemplo n.º 15
0
 def summary(self, nbest=0, directory=None, filename=None):
     if self._summary is None:
         raise Exception("No summary to report")
     else:
         if directory is not None:
             if filename is None:
                 filename = self._alg + ' Lab Summary.csv'
             save_csv(self._summary, directory, filename)
         if nbest:
             s = self._summary.sort_values(by=['final_costs', 'duration'])
             return (s.head(nbest))
         return (self._summary)
Exemplo n.º 16
0
def main():

    js_projects = np.array(list(get_repos_for_code_search(QUERY_ALL_JS)))
    js_projects_with_express = np.array(
        list(get_repos_for_code_search(QUERY_WITH_EXPRESS)))
    js_projects_with_helmet = np.array(
        list(get_repos_for_code_search(QUERY_WITH_HELMET)))

    node_based_docker_projects = np.array(
        list(get_repos_for_code_search(QUERY_NODE_BASED_DOCKER_IMAGE)))
    node_based_docker_projects_2 = np.array(
        list(get_repos_for_code_search(QUERY_NODE_BASED_DOCKER_IMAGE_2)))

    concatenated_node_based_docker_projects = np.concatenate(
        (node_based_docker_projects, node_based_docker_projects_2))

    all_concatenated = np.concatenate(
        (js_projects, js_projects_with_express, js_projects_with_helmet,
         concatenated_node_based_docker_projects))

    all_projects = [
        dict(t) for t in {tuple(d.items())
                          for d in all_concatenated}
    ]
    all_projects_sorted = sorted(all_projects, key=lambda item: item['name'])

    fieldnames = [
        'Reference', 'Uses Express', 'Node-based docker image',
        'Likely nodejs app', 'Uses Helmet'
    ]

    def likely_node_app(item):
        return (item in concatenated_node_based_docker_projects) or (
            item in js_projects_with_express)

    rows = list(
        map(
            lambda item: {
                fieldnames[0]:
                item['name'],
                fieldnames[1]:
                'X' if (item in js_projects_with_express) else '',
                fieldnames[2]:
                'X'
                if (item in concatenated_node_based_docker_projects) else '',
                fieldnames[3]:
                'X' if likely_node_app(item) else '',
                fieldnames[4]:
                'X' if (item in js_projects_with_express) else '',
            }, all_projects_sorted))

    save_csv(OUTPUT_FILE, fieldnames, rows)
Exemplo n.º 17
0
def process_dataset(dataset, colors):
    y = np.load("./data/" + dataset + '_labels.npy')
    pred = np.load("./data/" + dataset + '_clasification.npy')
    segments = np.load("./results/" + dataset + '_segments.npy')
    test_mask = np.load("./data/" + dataset + '_test_mask.npy').reshape(
        y.shape)
    sc_pred = classify_segments(pred, segments)
    sc_score = utils.balanced_score(y[test_mask], sc_pred[test_mask])
    sc_cm = utils.confusion_matrix(y[test_mask], sc_pred[test_mask])
    utils.save_json({"sc": sc_score}, dataset + "_sc_score")
    utils.save_csv(sc_cm, dataset + "_sc_cm")
    color_map = color_true_map(sc_pred, labels_colors=colors)
    save_image(color_map, dataset + "_sc_clasification")
Exemplo n.º 18
0
def main():
    logger.info(' Preprocessing ...')

    data_path = 'data/netflix-prize-data/combined_data_1.txt'
    min_num_users = 50
    min_num_movies = 15
    t_start_0 = time.time()
    data_matrix, uid_idx_map, mid_idx_map = filtering(
        read_combined_data(data_path),
        min_num_users=min_num_users,
        min_num_movies=min_num_movies)
    logger.debug(
        'after preprocessed, time used {: 2f} sec.'.format(time.time() -
                                                           t_start_0))
    logger.debug('# of data = {}, user x movie = {}'.format(
        data_matrix.nnz, data_matrix.shape))

    print()
    logger.info(' Training ...')
    t_start = time.time()
    k = 50
    max_iter = 100
    W, H = rs.non_negative_matrix_factorization(data_matrix,
                                                k=k,
                                                max_iter=max_iter)
    del data_matrix
    logger.debug('after NMF, time used {: 2f} sec.'.format(time.time() -
                                                           t_start))
    logger.info('Totally used {: 2f} sec.'.format(time.time() - t_start_0))

    print('>> W ( shape={} )'.format(W.shape))
    print('>> H ( shape={} )'.format(H.shape))

    # testing qualifying
    logger.info(' Testing ...')
    test_path = 'data/netflix-prize-data/qualifying.txt'
    test_df = read_combined_data(test_path, mode='test')
    test_matrix, filtered_test_df = filtering_test(test_df, uid_idx_map,
                                                   mid_idx_map)
    del test_df

    scored_test_matrix = test_matrix.toarray() * np.inner(W, H.T)
    test_scores = [
        scored_test_matrix[uid][mid]
        for uid, mid in zip(test_matrix.row, test_matrix.col)
    ]
    filtered_test_df['rating'] = test_scores

    save_csv(
        filtered_test_df,
        'result/result_nu{}_nm{}.csv'.format(min_num_users, min_num_movies))
Exemplo n.º 19
0
 def detail(self, nbest=0, directory=None, filename=None):
     if self._detail is None:
         raise Exception("No detail to report")
     else:
         if directory is not None:
             if filename is None:
                 filename = self._alg + ' Lab Detail.csv'
             save_csv(self._detail, directory, filename)
         if nbest:
             s = self.summary(nbest=nbest)
             d = self._detail
             d = d.loc[d['experiment'].isin(s['experiment'])]
             return (d)
         return (self._detail)
Exemplo n.º 20
0
    def run_training_testing(self, model_weight_path, gpu_memory_fraction):

        # train the network
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction

        train_generator_obj = self.obj.train_generator()

        with tf.Session(config=config) as sess:
            summary_writer = tf.summary.FileWriter('./checkpoints/', sess.graph)
            saver = tf.train.Saver(max_to_keep=2)
            self.model.optimize()           

            sess.run(tf.global_variables_initializer())

            self.model.load_weight(sess, model_weight_path)
            
            loss = 0
            true_positives = 0
            for epochs in range(1, self.num_epochs+1):
                start_time = time.time()
                for step in range(len(self.obj.train_list)//self.batch_size + 1):
                    x_batch, y_batch = get_batch(train_generator_obj, 'train', height=self.model.height, width=self.model.width)
                    #temp1 = sess.run([self.pool] , feed_dict={self.model.x:x_batch, self.model.y:y_batch})
                    #print(temp1.shape)
                    _, loss_curr, predicted = sess.run([self.model.optimizer, self.model.loss, self.model.pred] , feed_dict={self.model.x:x_batch, self.model.y:y_batch})
                    loss = 0.9*loss + 0.1*loss_curr
                    true_positives = true_positives + np.sum(predicted == np.argmax(y_batch,1))

                end_time = time.time()
                print('time_taken', end_time -start_time)    
                print('epochs:',epochs, ' train-loss:', loss, 'train-acc:', true_positives*100.0/len(self.obj.train_list))                    
                true_positives = 0

                saver.save(sess, './checkpoints/', global_step=step)
                self.evaluate(sess, 'val')
                print('')


        # predict values for test dataset
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction
        
        with tf.Session(config=config) as sess:
            saver.restore(sess, tf.train.latest_checkpoint('./checkpoints/'))
            model_pred = self.predict(sess, 'test')    


        #save the results in the required csv format
        save_csv(model_pred, self.obj)
Exemplo n.º 21
0
def save_match_player_relations(matches):
    header_rel = [
        "player_id",
        "match_id",
        "is_substitute",
        "num_goals",
        "team_id",
    ]
    formatted_results = []
    # Flat the dictionary
    for m in matches:
        formatted_results.append([matches[m][h] for h in header_rel])

    save_csv(CSV_OUTPUT_MATCH_PLAYERS_REL, header_rel, formatted_results)
Exemplo n.º 22
0
def upload_file(request):
    file = request.FILES["file"]
    response = utils.save_csv(file)
    if response != "address column not found":
        return utils.JsonResponse({'response': response})
    if response == "address column not found":
        return utils.JsonResponse({'response': response}, status=400)
def run(
    dataset: List[List[Any]] = None,
    config: Dict = CONFIG,
):
    """Run the module.

    Args:
        config (dict): Dictionary containing configuration options.
        dataset (list): Source dataset to transform and save
    """
    transformed: List[List[Any]] = transform(
        source=dataset,
        include_scorer_skills=config['include_scorer_skills'],
    )
    save_csv(array=transformed, path=config['output_file_path'])
    print('Saved to: ' + config['output_file_path'])
def main():

    username = input(
        "Digite o seu nome de usuário, sem a arroba, e aperte < ENTER >. Exemplo: jack\n"
    )

    api = utils.login()

    favs = utils.fetch_favs(api, username)

    if not os.path.exists("downloads/"):
        os.makedirs("downloads/")

    now = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-")
    fpath = "downloads/favs-by-" + username + "-" + now + ".csv"

    utils.save_csv(favs, fpath)
Exemplo n.º 25
0
def calc_thiessen(hidroweb_dir, inventory, list_ids, shp, poly, attr, buffer,
                  dates, dir_out):
    
    loc_stations = pre_process(hidroweb_dir, inventory)
    
    if list_ids:
        IDS = pd.read_csv(list_ids)
        stations_in = loc_stations[np.where(loc_stations == IDS.values)[0]]
    
    else:
        if not attr:
            attr = 'ID' # nome padrão do atributo caso não informado
        
        if buffer:
            buffer = float(buffer)
        else:
            buffer = False # padrão caso não informado
        
        # extrai vertices do poligono (poly) do shape informado
        vertices = getvert(shp, poly, attr=attr, buffer=buffer)
        
        # verifica quais postos estão dentro do polígono
        isin = isinpoly3(loc_stations[:,2], loc_stations[:,1], vertices)
        
        # seleciona apenas postos dentro do polígono
        stations_in = loc_stations[isin,:]
    
    # converte date de string para datetime
    dates = pd.to_datetime(dates, format='%d/%m/%Y')
    dates = pd.date_range(dates[0], dates[1])
    
    # extrai precipitação dos postos dentro do polígono para a data informada
    pr_med = []
    for date in dates:
        pr_estations = open_files(stations_in, hidroweb_dir, date)
        
        # cálculo da precipitação média usando o método de thiessen
        pr_med.append([date, thiessen(pr_estations[:,1], pr_estations[:,0],
                                      vertices[:,0], vertices[:,1],
                                      pr_estations[:,2])])
    
    # salva a precipitação média no formato .csv
    save_csv(dir_out, pr_med, date, poly)
    
    return None
Exemplo n.º 26
0
def save_matches(matches):
    header_matches = [
        "id",
        "home_team_id",
        "away_team_id",
        "match_date",
        "location_id",
        "competition",
        "winning_team_id",
        "home_team_score",
        "away_team_score",
    ]
    formatted_results = []
    # Flat the dictionary
    for m in matches:
        formatted_results.append([matches[m][h] for h in header_matches])

    save_csv(CSV_OUTPUT_MATCHES, header_matches, formatted_results)
Exemplo n.º 27
0
def _recommend(cfg: dict):
    top_n = cfg['top_n']
    cfg_model, cfg_dataset = cfg['model'], cfg['dataset']
    cfg_results = cfg['results']

    _, col_files = cfg_dataset['cols']
    x_df = utils \
        .read_csv(cfg_dataset['path'], usecols=cfg_dataset['cols']) \
        .pipe(utils.to_list_of_strings, col=col_files)
    model_recommender = model.deserialize(cfg_model['path'])

    y_df = model.recommend(model_recommender,
                           x_df,
                           cfg_dataset['cols'],
                           top_n=top_n)

    if cfg_results.get('save', True):
        utils.save_csv(cfg_results['out'], y_df)
Exemplo n.º 28
0
def main():
    args = get_args()
    if args.use_dropout == 0:
        args.use_dropout = False

    if args.use_dropout ==0:
        args.use_dropout = False

    for x in vars(args).items():
        print(x)
    #from utils import data_transforms
    #print(data_transforms)

    if args.lr_sch ==5 and torch.__version__ != '0.4.0' :
        print("for cosine annealing, change to torch==0.4.0 in setup.py")
        raise AssertionError()
    elif args.lr_sch !=5 and torch.__version__ == '0.4.0':
        print("warning : this is torch version {}! nsml report will not be recorded".format(torch.__version__))


    model, optimizer, scheduler = model_all.get_model(args)

    if args.use_gpu:
        if torch.cuda.device_count() > 1:
            print("[gpu] Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
            model = torch.nn.DataParallel(model)
        elif torch.cuda.device_count() == 1:
            print("[gpu] Let's use", torch.cuda.device_count(), "GPUs!")
        else:
            print("[gpu] no available gpus")
        model = model.cuda()
    

    nsml.bind(infer=infer, model=model, optimizer=optimizer)

    if args.pause:
        nsml.paused(scope=locals())

    nsml.save()
    if args.mode == 'train':
        dataloaders, dataset_sizes = utils.data_loader(args, train=True, batch_size=args.batch_size)
        model = train.train_test(model, optimizer, scheduler, dataloaders, dataset_sizes, args)
    
    utils.save_model(model, 'model_state')
    with open('args.pickle', 'wb') as farg:
        pickle.dump(args, farg)

    loader = utils.data_loader(args, train=False, batch_size=1)
    predict, acc = utils.get_forward_result(model, loader, args)
    predict = torch.cat(predict, 0)
    nsml.bind(save=lambda x: utils.save_csv(x,
                                            data_csv_fname=os.path.join(DATASET_PATH, 'train', 'test') + '/test_data',
                                            results=predict,
                                            test_loader=loader))
    nsml.save('result')
Exemplo n.º 29
0
def process_dataset(dataset, colors):
    y = np.load("./data/" + dataset + '_labels.npy')
    pred = np.load("./data/" + dataset + '_clasification.npy')
    test_mask = np.load("./data/" + dataset + '_test_mask.npy').reshape(
        y.shape)
    mv_sizes = [3, 5, 9]
    mv_pred = [majority_vote(pred, size) for size in mv_sizes]
    mv_scores = [
        utils.balanced_score(y[test_mask], p[test_mask]) for p in mv_pred
    ]
    mv_cm = [
        utils.confusion_matrix(y[test_mask], p[test_mask]) for p in mv_pred
    ]
    keys = ["_mv_{}".format(size) for size in mv_sizes]
    utils.save_json(dict(zip(keys, mv_scores)), dataset + "_mv_scores")
    for i in range(len(mv_sizes)):
        utils.save_csv(mv_cm[i], dataset + keys[i] + "_cm")
        color_map = color_true_map(mv_pred[i], labels_colors=colors)
        save_image(color_map, dataset + keys[i] + "_clasification")
Exemplo n.º 30
0
def main():

    username = input(
        "Digite o nome do usuário que você quer baixar, sem a arroba, e aperte < ENTER >. Exemplo: jack\n"
    )

    api = utils.login()

    output = utils.fetch_tweets(api, username)

    if not os.path.exists("downloads/"):
        os.makedirs("downloads/")

    now = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-")
    fpath = "downloads/tweets-by-" + username + "-" + now + ".csv"

    utils.save_csv(output, fpath)

    print("Pronto!")
Exemplo n.º 31
0
def main():

    js_projects = np.array(list(get_repos_for_repo_search(QUERY_JS)))
    ts_projects = np.array(list(get_repos_for_repo_search(QUERY_TS)))
    concatenated = np.concatenate((js_projects, ts_projects))
    all_projects = [dict(t) for t in {tuple(d.items()) for d in concatenated}]
    all_projects_sorted = sorted(all_projects, key=lambda item: item['name'])

    fieldnames = ['Reference', 'JS', 'TS', 'Archived']
    rows = list(
        map(
            lambda item: {
                fieldnames[0]: item['name'],
                fieldnames[1]: 'X' if (item in js_projects) else '',
                fieldnames[2]: 'X' if (item in ts_projects) else '',
                fieldnames[3]: 'X' if item['archived'] else ''
            }, all_projects_sorted))

    save_csv(OUTPUT_FILE, fieldnames, rows)
Exemplo n.º 32
0
def main(is_interactive=True, k=64, des_option=constants.ORB_FEAT_OPTION, svm_kernel=cv2.SVM_LINEAR):
    if not is_interactive:
        experiment_start = time.time()
    # Check for the dataset of images
    if not os.path.exists(constants.DATASET_PATH):
        print("Dataset not found, please copy one.")
        return
    dataset = Dataset(constants.DATASET_PATH)
    dataset.generate_sets()

    # Check for the directory where stores generated files
    if not os.path.exists(constants.FILES_DIR_NAME):
        os.makedirs(constants.FILES_DIR_NAME)

    if is_interactive:
        des_option = input("Enter [1] for using ORB features or [2] to use SIFT features.\n")
        k = input("Enter the number of cluster centers you want for the codebook.\n")
        svm_option = input("Enter [1] for using SVM kernel Linear or [2] to use RBF.\n")
        svm_kernel = cv2.SVM_LINEAR if svm_option == 1 else cv2.SVM_RBF

    des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME

    log = Log(k, des_name, svm_kernel)

    codebook_filename = filenames.codebook(k, des_name)
    if is_interactive:
        codebook_option = input("Enter [1] for generating a new codebook or [2] to load one.\n")
    else:
        codebook_option = constants.GENERATE_OPTION
    if codebook_option == constants.GENERATE_OPTION:
        # Calculate all the training descriptors to generate the codebook
        start = time.time()
        des = descriptors.all_descriptors(dataset, dataset.get_train_set(), des_option)
        end = time.time()
        log.train_des_time(end - start)
        # Generates the codebook using K Means
        print("Generating a codebook using K-Means with k={0}".format(k))
        start = time.time()
        codebook = descriptors.gen_codebook(dataset, des, k)
        end = time.time()
        log.codebook_time(end - start)
        # Stores the codebook in a file
        utils.save(codebook_filename, codebook)
        print("Codebook saved in {0}".format(codebook_filename))
    else:
        # Load a codebook from a file
        print("Loading codebook ...")
        codebook = utils.load(codebook_filename)
        print("Codebook with shape = {0} loaded.".format(codebook.shape))

    # Train and test the dataset
    classifier = Classifier(dataset, log)
    svm = classifier.train(svm_kernel, codebook, des_option=des_option, is_interactive=is_interactive)
    print("Training ready. Now beginning with testing")
    result, labels = classifier.test(codebook, svm, des_option=des_option, is_interactive=is_interactive)

    # Store the results from the test
    classes = dataset.get_classes()
    log.classes(classes)
    log.classes_counts(dataset.get_classes_counts())
    result_filename = filenames.result(k, des_name, svm_kernel)
    test_count = len(dataset.get_test_set()[0])
    result_matrix = np.reshape(result, (len(classes), test_count))
    utils.save_csv(result_filename, result_matrix)

    # Create a confusion matrix
    confusion_matrix = np.zeros((len(classes), len(classes)), dtype=np.uint32)
    for i in range(len(result)):
        predicted_id = int(result[i])
        real_id = int(labels[i])
        confusion_matrix[real_id][predicted_id] += 1

    print("Confusion Matrix =\n{0}".format(confusion_matrix))
    log.confusion_matrix(confusion_matrix)
    log.save()
    print("Log saved on {0}.".format(filenames.log(k, des_name, svm_kernel)))
    if not is_interactive:
        experiment_end = time.time()
        elapsed_time = utils.humanize_time(experiment_end - experiment_start)
        print("Total time during the experiment was {0}".format(elapsed_time))
    else:
        # Show a plot of the confusion matrix on interactive mode
        utils.show_conf_mat(confusion_matrix)
        raw_input("Press [Enter] to exit ...")