示例#1
0
 def __init__(self, ids=["oxfess"], file="posts.csv", depth=5, delay=2):
     self.ids = ids
     self.out_file = file
     self.depth = depth + 1
     self.delay = delay
     # browser instance
     self.browser = webdriver.Chrome(executable_path=GECKODRIVER)
     utils.create_csv(self.out_file)
示例#2
0
文件: search.py 项目: kage08/PGM_1
def greedy_learn(steps=5,
                 seed=None,
                 splits=10,
                 save=True,
                 savepath='submissions/greedy.csv',
                 failed_runs=10000):
    rg = np.random.RandomState(seed)
    toposort = list(rg.permutation(22))
    extra_edges = set()
    nodes = 23

    #Initial Edges
    edges = [(22, i) for i in range(22)]
    train = get_train()
    #Get score on naive Bayes
    max_score = get_kfold_accuracy(BayesN(nodes=nodes, edges=edges), train,
                                   splits)

    st = 0
    f = 0
    while st < steps and f < failed_runs:
        while True:
            #Select Edges
            u = rg.randint(22)
            v = rg.randint(22)
            if u == v or (u, v) in edges or (v, u) in edges: continue
            cand_edge = (u,
                         v) if toposort.index(u) < toposort.index(v) else (v,
                                                                           u)
            break
        edges.append(cand_edge)

        #Get Score with new network
        score = get_kfold_accuracy(BayesN(nodes=nodes, edges=edges), train,
                                   splits)

        #Check if addtion of edge increased performance
        if score > max_score:
            extra_edges.add(cand_edge)
            max_score = score
            st += 1
            print('Step', st, ':', max_score, 'Extra Edges:', extra_edges)
        else:
            edges.pop()
            f += 1
    print('Max Score:', max_score)
    print('Extra Edges:', extra_edges)
    if save:
        test_data = read_csv(utils.TEST_PATH)[:, 1:]
        model = BayesN(nodes=nodes, edges=edges)
        model.fit(train)
        y_pred = model.predict(test_data)
        create_csv(y_pred, savepath)
    return model
示例#3
0
 def __init__(self, ids=["oxfess"], file="posts.csv", depth=5, delay=2):
     self.ids = ids
     self.out_file = file
     self.depth = depth + 1
     self.delay = delay
     # browser instance
     self.browser = webdriver.Firefox(
         executable_path=GECKODRIVER,
         firefox_binary=FIREFOX_BINARY,
         firefox_profile=PROFILE,
     )
     utils.create_csv(self.out_file)
示例#4
0
def gen_training_accuracy(args):
    # load data and model
    params = utils.load_params(args.model_dir)
    ckpt_dir = os.path.join(args.model_dir, 'checkpoints')
    ckpt_paths = [int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"]
    ckpt_paths = np.sort(ckpt_paths)
    
    # csv
    headers = ["epoch", "acc_train", "acc_test"]
    csv_path = utils.create_csv(args.model_dir, 'accuracy.csv', headers)

    for epoch, ckpt_paths in enumerate(ckpt_paths):
        if epoch % 5 != 0:
            continue
        net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True)
        # load data
        train_transforms = tf.load_transforms('test')
        trainset = tf.load_trainset(params['data'], train_transforms, train=True)
        trainloader = DataLoader(trainset, batch_size=500, num_workers=4)
        train_features, train_labels = tf.get_features(net, trainloader, verbose=False)

        test_transforms = tf.load_transforms('test')
        testset = tf.load_trainset(params['data'], test_transforms, train=False)
        testloader = DataLoader(testset, batch_size=500, num_workers=4)
        test_features, test_labels = tf.get_features(net, testloader, verbose=False)

        acc_train, acc_test = svm(args, train_features, train_labels, test_features, test_labels)
        utils.save_state(args.model_dir, epoch, acc_train, acc_test, filename='accuracy.csv')
    print("Finished generating accuracy.")
示例#5
0
def gen_testloss(args):
    # load data and model
    params = utils.load_params(args.model_dir)
    ckpt_dir = os.path.join(args.model_dir, 'checkpoints')
    ckpt_paths = [int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"]
    ckpt_paths = np.sort(ckpt_paths)
    
    # csv
    headers = ["epoch", "step", "loss", "discrimn_loss_e", "compress_loss_e", 
        "discrimn_loss_t",  "compress_loss_t"]
    csv_path = utils.create_csv(args.model_dir, 'losses_test.csv', headers)
    print('writing to:', csv_path)

    # load data
    test_transforms = tf.load_transforms('test')
    testset = tf.load_trainset(params['data'], test_transforms, train=False)
    testloader = DataLoader(testset, batch_size=params['bs'], shuffle=False, num_workers=4)
    
    # save loss
    criterion = MaximalCodingRateReduction(gam1=params['gam1'], gam2=params['gam2'], eps=params['eps'])
    for epoch, ckpt_path in enumerate(ckpt_paths):
        net, epoch = tf.load_checkpoint(args.model_dir, epoch=epoch, eval_=True)
        for step, (batch_imgs, batch_lbls) in enumerate(testloader):
            features = net(batch_imgs.cuda())
            loss, loss_empi, loss_theo = criterion(features, batch_lbls, 
                                            num_classes=len(testset.num_classes))
            utils.save_state(args.model_dir, epoch, step, loss.item(), 
                *loss_empi, *loss_theo, filename='losses_test.csv')
    print("Finished generating test loss.")
示例#6
0
        return ans

    def predict(self, features):
        y_pred = []
        for i in range(features.shape[0]):
            probabs = [
                self.infer_logprobab(features[i], y) for y in self.classes
            ]
            y = self.classes[np.argmax(np.array(probabs))]
            y_pred.append(y)
        return np.array(y_pred)


if __name__ == "__main__":
    laplacians = [1, 0.5, 0.1, 2]
    from utils import read_csv, create_csv
    import utils

    for l in laplacians:
        model = NBayes(l)
        train_data = read_csv(utils.TRAIN_PATH)
        x_train = train_data[:, 1:]
        y_train = train_data[:, 0]

        model.fit(x_train, y_train)

        test_data = read_csv(utils.TEST_PATH)[:, 1:]
        y_pred = model.predict(test_data)

        create_csv(y_pred, 'submissions/NBayes_' + str(l) + '.csv')
示例#7
0
                if verb_print == True:
                    print('\033[94m' + df.columns[feature_1] + '\033[0m',
                          end='')
                    print(' vs ', end='')
                    print('\033[96m' + df.columns[feature_2] + '\033[0m')

                    print('Accuracy: ', end='')
                    if ac > min_accuracy:
                        print('\033[92m', end='')
                    else:
                        print('\033[91m', end='')
                    print(str(ac) + '\033[0m\n')
                if verb_standardize == True and i == 0:
                    utils.show_standardize(x, y, house[i], df, feature_1,
                                           feature_2, theta)
                if verb_cost == True and i == 0:
                    utils.show_cost(history_err)
                if ac >= min_accuracy:
                    row_list.append([
                        house[i], df.columns[feature_1], df.columns[feature_2],
                        theta[0], theta[1], theta[2], mean[0], mean[1], std[0],
                        std[1], ac
                    ])
    utils.create_csv(row_list, 'weights.csv')
    global_acc = 0
    i = 1
    while i <= len(row_list) - 1:
        global_acc += row_list[i][10]
        i += 1
    print('\033[92m' + 'Average Accuracy: ' + '\033[0m' + str(global_acc /
                                                              (i - 1)))
示例#8
0
    def __init__(self,
                 name,
                 start_date=None,
                 end_date=None,
                 headers=None,
                 response=None,
                 delay=DELAY,
                 log=None,
                 proxies=None,
                 bot_last_status=None,
                 requests=requests,
                 *args,
                 **kwargs):
        # the name of the package is used to resolve resources from inside the
        # package or the folder the module is contained in the resolve.
        self.name = name

        # start date and end date is needed so that the parser is able to have a
        # set time limit within which the bot will parse the data in the pages.
        # This is also needed for giving the determining the name of the csv
        # files and the log files where the respective information will go.
        self.start_date = start_date
        self.end_date = end_date

        # This will need to be defined by the user, as he would have already
        # seen the page and would have a clear picture about the headers that
        # should be there in the csv file
        # self.headers = UserAgentRotator().generate_header()
        self.headers = header = {
            "Connection":
            "close",
            "User-Agent":
            "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"
        }

        # The class that is used for response objects. In the present scenario
        # this is a response object of the requests class.
        self.response = response

        # a filename will need to be decided that will define the file names
        # that will be there
        filename = self.name

        # log handler is set, where the file name is based on the starting date
        # and the name of the bot as given by the user. if the logging flag is
        # set to stdout, log file is not initialised and the stdout is to
        # stdout.
        log_date = convert_to_filenameable(self.start_date)
        self.log = log

        filename = "{filename}_{log_date}".format(filename=filename,
                                                  log_date=log_date)
        if scraper_logging == "stdout":
            log_file = None
        elif scraper_logging == "file":
            log_file = "log/{filename}.log".format(filename=filename)
        else:
            raise ValueError("please set correct logging flag for"
                             "scraper_logging")
        if log_date:
            self.log = setup_logging(log_file)
        else:
            raise ValueError("cannot create log file, "
                             "log date is not resolved. "
                             "log_date: {log_date}".format(log_date=log_date))

        # a delay of `DELAY` seconds will be set for the bot. This is 1 second
        # by default. You can change this by changing the delay parameter.
        self.delay = delay

        # an instance will have a default proxy that
        # will be fixed for the particular instance and will act as a random
        # agent for the bot
        if not proxies:
            self.proxies = None  #ProxyRotator().proxies
        else:
            self.proxies = proxies

        # create a csv file if it does not exist. Also check if there is a csv
        # file that already exists.
        self.csvfile = create_csv(filename)

        # will have a definition for bot last status to keep a track of the last
        # status of the bot. This will be a dictionary
        self.bot_last_status = bot_last_status

        # dependency injection for requests
        self.requests = requests
示例#9
0
文件: search.py 项目: kage08/PGM_1
def greedy_learn2(steps=5,
                  seed=None,
                  splits=10,
                  save=True,
                  savepath='submissions/greedy.csv',
                  failed_runs=10000):
    rg = np.random.RandomState(seed)
    g = nx.DiGraph()
    for i in range(22):
        g.add_edge(22, i)
    nodes = 23

    #initialize edges as Naive Bayes
    edges = set([(22, i) for i in range(22)])
    train = get_train()

    #Get score for Naive Bayes
    max_score = get_kfold_accuracy(BayesN(nodes=nodes, edges=edges), train,
                                   splits)
    print('Step 0:', max_score)
    st = 0
    f = 0
    while st < steps and f < failed_runs:
        #Select r
        r = rg.randint(3)
        gr = copy.deepcopy(g)
        ed = copy.deepcopy(edges)

        #Add random edge
        if r == 0:
            while True:
                u = rg.randint(22)
                v = rg.randint(22)
                if u == v or (u, v) in g.edges: continue
                g.add_edge(u, v)
                #Reject if G in not DAG
                if not nx.is_directed_acyclic_graph(g):
                    g.remove_edge(u, v)
                    continue
                break
            if nx.is_directed_acyclic_graph(g):
                edges.add((u, v))
        #Delete a random edge
        elif r == 1:
            if len(edges) <= 3: continue
            del_edge = random.sample(edges, 1)[0]
            edges.remove(del_edge)
            g.remove_edge(*del_edge)

        #Reverse orientation of random edge
        else:
            if len(edges) <= 3: continue
            for i in range(len(edges)):
                act_edge = random.sample(edges, 1)[0]
                rev_edge = (act_edge[1], act_edge[0])
                g.remove_edge(*act_edge)
                g.add_edge(*rev_edge)
                if nx.is_directed_acyclic_graph(g):
                    break
                else:
                    g.remove_edge(*rev_edge)
                    g.add_edge(*act_edge)
            if nx.is_directed_acyclic_graph(g):
                edges.remove(act_edge)
                edges.add(rev_edge)

        #Get Score on new BN
        score = get_kfold_accuracy(BayesN(nodes=nodes, edges=edges), train,
                                   splits)
        if score > max_score:
            max_score = score
            st += 1
            print('Step', st, ':', max_score, 'Edges:', edges)
        else:
            edges = ed
            g = gr
            f += 1
    print('Max Score:', max_score)
    print('Edges:', edges)
    if save:
        test_data = read_csv(utils.TEST_PATH)[:, 1:]
        model = BayesN(nodes=nodes, edges=edges)
        model.fit(train)
        y_pred = model.predict(test_data)
        create_csv(y_pred, savepath)
    return model
示例#10
0
import argparse

from datetime import datetime
import math

from utils import parse_xls, create_csv

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Find distance")
    parser.add_argument("-i",
                        "--input",
                        help="Input file (default: ./files/test.xlsx)",
                        default="./files/test.xlsx")
    parser.add_argument("-s",
                        "--slice",
                        help="Slice number of final list (default: None)",
                        default=None)
    args = parser.parse_args()

    places = parse_xls(args.input, int(args.slice))
    create_csv(places)
示例#11
0
results = {}
image_filenames = get_files_in_directory(f"{dataset_dir}/test")

# make a square image while keeping aspect ratio and filling with fill_color
def make_square(im, min_size=256, fill_color=(0, 0, 0, 0)):
    x, y = im.size
    size = max(min_size, x, y)
    new_im = Image.new('RGB', (size, size), fill_color)
    new_im.paste(im, (int((size - x) / 2), int((size - y) / 2)))
    return new_im

for image_name in image_filenames:
    img = Image.open(f"{dataset_dir}/test/{image_name}").convert('RGB')
    # NN input is 256, 256
    img = img.resize((256, 256))
    img_array = np.expand_dims(np.array(img), 0) 
    # Normalize
    img_array = img_array / 255.

    # Get prediction
    softmax = model.predict(x=img_array)
    # Get predicted class (index with max value)
    prediction = tf.argmax(softmax, 1)
    # Get tensor's value
    prediction = tf.keras.backend.get_value(prediction)[0]

    results[image_name] = prediction

create_csv(results, MODEL_NAME)
示例#12
0
              'python3.9 logreg_predict.py dataset_test.csv weights.csv')
        sys.exit()
    df = pd.read_csv(sys.argv[1])
    weights = pd.read_csv(sys.argv[2])
    show_chart = True

    row_list = [["Index", "Hogwarts House"]]

    for i in range(len(df)):
        tmp = predict_house(df.loc[i], weights.to_numpy())
        student_results.append(tmp)
        row_list.append([i, tmp])
    if (contain_value_from_train(df)):
        accuracy = get_accuracy(df["Hogwarts House"].tolist(), student_results)
        print("Accuracy: ", end='')
        if accuracy >= 0.98:
            print('\033[92m', end='')
        else:
            print('\033[91m', end='')
        print(accuracy, '\033[0m')

        if (show_chart == True):
            utils.show_repartition(student_results,
                                   df["Hogwarts House"].tolist())
    else:
        print(
            '\033[93m' +
            '⚠ Houses are missing, pls provide a csv with them if you want to see Accuracy.'
            + '\033[0m')
    utils.create_csv(row_list, "houses.csv")
示例#13
0
 def print_generations_makespan(self):
     print("POPULATION MAKESPAN\n")
     self.populations_makespan[1] = self.populations_makespan[1][-1]
     print(self.populations_makespan)
     create_csv(self.num_jobs, self.num_machines, self.pop, self.iteration,
                self.populations_makespan, self.instance_name)