Пример #1
0
def main():
    # Read form our list of sites
    sites = np.genfromtxt("site_full.txt", dtype="str")
    bot = Crawler(bm.BaseModel(), SKIP)
    bot.downloadHTML(sites)
    results = bot.parseCache("cache")

    # Output results
    print(results)
    # Output counts
    print(detected(results))

    # Generate a results file with proper names
    names = []
    for i in results:
        names.append((sites[int(i[0][:-5])], i[1]))

    # Save results as a text file
    np.savetxt("results.txt", names, fmt="%s")
Пример #2
0
 def GET(self):
     basemodel_instance = basemodel.BaseModel()
     all_commits = basemodel_instance.get_data()
     return render.commits(all_commits)
Пример #3
0
 def GET(self):
     basemodel_instance = basemodel.BaseModel()
     commit_instance = commit.Commit()
     all_commits = commit_instance.findAll()
     return render.commits(all_commits)
Пример #4
0
import json, basemodel
from pprint import pprint

all_data = basemodel.BaseModel().get_data()

all_projects = all_data[1]['projects']


class Project:
    def __init__(self):
        self.id = None
        self.title = None
        self.description = None

    def find_all(self):
        return all_projects

    def find(self, key, value):
        projects_found = [
            project for project in all_projects if project[key] == value
        ]
        return projects_found

    def find_one(self, key, value):
        project_found = [
            project for project in all_projects if project[key] == value
        ]
        return project_found[0]


p = Project()
Пример #5
0
# torch.backends.cudnn.benchmark = True
# torch.backends.cudnn.enabled = True


parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--write_pred_file', default=None, type=str, help='prediction file')
args = parser.parse_args()

history_length = int(utils.config.get('train', 'historyLength'))
input_data = utils.CoQADataset(utils.config.get('train', 'devFile'), history_length)
test_loader = DataLoader(input_data[0:10], batch_size=1, shuffle=False)


CUDA = torch.cuda.is_available()
model = basemodel.BaseModel(384, 64, 30, use_gpu = eval(utils.config.get('train', 'useGPU')))
state_dict = torch.load('resources/model.pth')
model.load_state_dict(state_dict)
if CUDA and eval(utils.config.get('train', 'useGPU')):
    model = model.cuda()
model.eval()
start = time.time()
batch_size = int(utils.config.get('train', 'batchSize'))

with torch.no_grad():
    for step, input_batch in enumerate(test_loader):
        gc.collect()
        print("Step " + str(step))
        para_tokens = [x[0] for x in input_batch["para_tokens"]]
        batch_data = []
        for q_a in input_batch["question_answer_list"]:
Пример #6
0
def train_model():
    history_length = int(utils.config.get('train', 'historyLength'))
    input_data = utils.CoQADataset(utils.config.get('train', 'trainFile'),
                                   history_length)
    train_loader = DataLoader(input_data, batch_size=1, shuffle=True)
    model = basemodel.BaseModel(384,
                                64,
                                30,
                                use_gpu=eval(
                                    utils.config.get('train', 'useGPU')))

    CUDA = torch.cuda.is_available()
    if CUDA and eval(utils.config.get('train', 'useGPU')):
        model = model.cuda()
    model.train()
    start = time.time()
    batch_size = int(utils.config.get('train', 'batchSize'))
    lr = float(utils.config.get('train', 'learningRate'))
    beta1 = float(utils.config.get('train', 'beta1'))
    beta2 = float(utils.config.get('train', 'beta2'))
    l2_weight_decay = float(utils.config.get('train', 'l2WeightDecay'))
    optimizer = optim.Adam(model.parameters(),
                           lr=lr,
                           betas=(beta1, beta2),
                           weight_decay=l2_weight_decay)
    # optimizer = optim.SGD(model.parameters(), lr=lr,  momentum=0.9)
    criterion = nn.NLLLoss()

    for step, input_batch in enumerate(train_loader):
        gc.collect()
        print("Step " + str(step))
        para_tokens = [x[0] for x in input_batch["para_tokens"]]
        batch_data = []
        for q_a in input_batch["question_answer_list"]:
            if q_a["invalid_flag"]:
                continue
            q_a["question"] = [
                q_a["question"][0], [x[0] for x in q_a["question"][1]]
            ]
            for i in range(len(q_a["history_questions"])):
                q_a["history_questions"][i] = [
                    q_a["history_questions"][i][0],
                    [x[0] for x in q_a["history_questions"][i][1]]
                ]
            for i in range(len(q_a["history_answers_input"])):
                q_a["history_answers_input"][i] = [
                    q_a["history_answers_input"][i][0],
                    [x[0] for x in q_a["history_answers_input"][i][1]]
                ]
            for i in range(len(q_a["history_answers_span"])):
                q_a["history_answers_span"][i] = [
                    q_a["history_answers_span"][i][0],
                    [x[0] for x in q_a["history_answers_span"][i][1]]
                ]
            q_a["para_tokens"] = para_tokens

            batch_data.append(copy.deepcopy(q_a))

        print("batch start - " + str(len(batch_data)))
        for i in range(0, len(batch_data)):

            if batch_size * i >= len(batch_data):
                break

            batch_input = batch_data[batch_size *
                                     i:min(batch_size *
                                           (i + 1), len(batch_data))]
            optimizer.zero_grad()
            (prob_start, prob_end, prob_ans, start_pos, end_pos,
             ans_type) = model.forward(batch_input)

            if eval(utils.config.get('train', 'useGPU')):
                start_pos = torch.tensor(start_pos).cuda()
                end_pos = torch.tensor(end_pos).cuda()
                ans_type = torch.tensor(ans_type).cuda()
            else:
                start_pos = torch.tensor(start_pos)
                end_pos = torch.tensor(end_pos)
                ans_type = torch.tensor(ans_type)

            log_prob_start = torch.log(prob_start)
            log_prob_end = torch.log(prob_end)
            log_prob_ans = torch.log(prob_ans)
            loss_start = criterion(log_prob_start, start_pos)
            loss_end = criterion(log_prob_end, end_pos)
            loss_ans = criterion(log_prob_ans, ans_type)
            overall_loss = loss_start + loss_end + loss_ans

            if utils.config.get('train', 'useGPU'):
                del start_pos, end_pos, ans_type, log_prob_start, log_prob_end, log_prob_ans, loss_start, loss_end, loss_ans
                torch.cuda.empty_cache()

            overall_loss.backward()
            optimizer.step()

            del batch_input, prob_start, prob_end, prob_ans
            gc.collect()
            torch.cuda.empty_cache()

        print("batch end")

        if (step > 0) and (step %
                           int(utils.config.get('train', 'saveModelFreq'))
                           == 0) and (eval(
                               utils.config.get('train', 'saveModel'))):
            torch.save(model.state_dict(),
                       utils.config.get('train', 'savePath'))
            print("Model saved after %d steps" % step)

    if eval(utils.config.get('train', 'saveModel')):
        torch.save(model.state_dict(), utils.config.get('train', 'savePath'))
    print("Time taken: " + str(time.time() - start))