Пример #1
0
def create_thread(slug):
    json_data = request.get_json()
    user = User().get_by_nickname(json_data['author'], hide_id=False)
    forum = Forum().get_by_slug_with_id(slug)
    if not (user and forum):
        return json_response({
            "message": "Can't find user or forum"
        }, 404)
    if 'slug' in json_data.keys():
        thread_exists = Thread.get_serialised_with_forum_user_by_id_or_slug(slug=json_data['slug'])
        if thread_exists:
            thread_exists['created'] = format_time(thread_exists['created'])
            return json_response(thread_exists, 409)
    thread = Thread.create_and_get_serialized(
        user_id=user['id'],
        forum_id=forum['id'],
        title=json_data['title'],
        message=json_data['message'],
        user_nickname=user['nickname'],
        forum_slug=forum['slug'],
        created=json_data['created'] if 'created' in json_data else None,
        slug=json_data['slug'] if 'slug' in json_data else None,
    )
    thread['author'] = user['nickname']
    thread['forum'] = forum['slug']
    thread['created'] = format_time(thread['created'])
    return Response(
        response=json.dumps(thread),
        status=201,
        mimetype="application/json"
    )
Пример #2
0
def create_thread(slug_or_id):
    posts_data = request.get_json()
    thread = Thread.get_by_slug_or_id_with_forum_id(slug_or_id)
    usernames = [i['author'] for i in posts_data]
    authors = User.get_user_ids_by_slug(usernames)
    posts_ids = set([i['parent'] for i in posts_data if 'parent' in i])
    if posts_ids:
        posts = Post.get_posts_by_id_in_thread(thread['id'], posts_ids)
        if len(posts) != len(posts_ids):
            return json_response({'message': 'Parent in other thread'}, 409)
    if len(authors) != len(set(usernames)):
        return json_response({'message': 'user not found'}, 404)
    for post in posts_data:
        for a in authors:
            if a['nickname'] == post['author'].lower():
                post['author_id'] = a['id']
                post['author_nickname'] = a['nickname']
        post['parent_id'] = post['parent'] if 'parent' in post else 0

    if len(thread) > 0:
        posts = Post.create_posts(posts_data, thread['id'], thread['forum_id'])
    else:
        return json_response({'message': 'thread not found'}, 404)
    for item in posts:
        item['created'] = format_time(item['created'])
    return json_response(posts, 201)
Пример #3
0
    def get_post(cls, post_id):
        sql = """
                    SELECT
                      m.nickname as author,
                      p.created as created,
                      f.slug as forum,
                      p.id as id,
                      p.message as message,
                      p.thread_id as thread,
                      p.parent_id as parent,
                      p.is_edited as isEdited
                    FROM {tbl_name} AS p
                    JOIN {u_tbl_name} AS m ON m.id = p.user_id
                    JOIN {t_tbl_name} AS t ON t.id = p.thread_id
                    JOIN {f_tbl_name} AS f ON f.id = t.forum_id
                    WHERE p.id = %(post_id)s
                    """.format_map({
            'tbl_name': cls.tbl_name,
            'u_tbl_name': User.tbl_name,
            't_tbl_name': Thread.tbl_name,
            'f_tbl_name': Forum.tbl_name,
        })

        post = DbConnector.execute_get(sql, {'post_id': post_id})
        if post:
            post[0]['created'] = format_time(post[0]['created'])
        return post[0] if post else []
Пример #4
0
def update_thread(slug_or_id):
    json_data = request.get_json()
    thread = Thread.get_by_slug_or_id(slug_or_id)
    if not thread:
        return json_response({'message': 'Thread not found'}, 404)
    thread = Thread.update_thread(thread['id'], json_data, thread)
    thread['created'] = format_time(thread['created'])
    return json_response(thread, 200)
Пример #5
0
def vote_thread(slug_or_id):
    post_data = request.get_json()
    thread_id, user_id = Thread.check_user_and_thread(
        thread_slug_or_id=slug_or_id, nickname=post_data['nickname'])
    if not user_id and not thread_id:
        return json_response({'message': 'Thread OR USER not found'}, 404)
    thread = Vote.vote_for_thread(user_id, post_data['voice'], thread_id)
    thread['created'] = format_time(thread['created'])
    return json_response(thread, 200)
Пример #6
0
    def get_info(cls, post_id, related):
        sql = """
            SELECT
              m.nickname as author,
              p.created as created,
              f.slug as forum,
              p.id as id,
              p.message as message,
              p.thread_id as thread,
              p.parent_id as parent,
              p.is_edited as isEdited
            FROM {tbl_name} AS p
            JOIN {u_tbl_name} AS m ON m.id = p.user_id
            JOIN {t_tbl_name} AS t ON t.id = p.thread_id
            JOIN {f_tbl_name} AS f ON f.id = t.forum_id
            WHERE p.id = %(post_id)s
            """.format_map({
            'tbl_name': cls.tbl_name,
            'u_tbl_name': User.tbl_name,
            't_tbl_name': Thread.tbl_name,
            'f_tbl_name': Forum.tbl_name,
        })

        post = DbConnector.execute_get(sql, {'post_id': post_id})
        if post:
            post[0]['isEdited'] = post[0]['isedited']
            post[0]['created'] = format_time(post[0]['created'])
        data = {'post': post[0] if post else None}
        if related and post:
            related = related.split(',')
            if 'user' in related:
                data['author'] = User().get_by_nickname(post[0]['author'])
            if 'thread' in related:
                data[
                    'thread'] = Thread.get_serialised_with_forum_user_by_id_or_slug(
                        id=post[0]['thread'])
                data['thread']['created'] = format_time(
                    data['thread']['created'])
            if 'forum' in related:
                data['forum'] = Forum().get_by_slug(post[0]['forum'])
        return data
Пример #7
0
def get_thread_messages(slug_or_id):
    sort = request.args.get('sort')
    since = request.args.get('since')
    limit = request.args.get('limit')
    desc = request.args.get('desc')
    thread = Thread.get_by_slug_or_id(slug_or_id)
    posts = []
    if not thread:
        return json_response({'message': 'Thread not found'}, 404)
    if sort == "flat" or sort is None:
        posts = Thread.get_posts_flat_sorted(thread['id'], since, limit, desc)
    elif sort == "tree":
        posts = Thread.get_posts_tree_sorted(thread['id'], since, limit, desc)
    elif sort == "parent_tree":
        posts = Thread.get_posts_parent_tree_sorter(thread['id'], since, limit,
                                                    desc)
    for post in posts:
        post['created'] = format_time(post['created'])
    return json_response(posts, 200)
Пример #8
0
def get_threads_list(slug):
    limit = request.args.get('limit')
    since = request.args.get('since')
    desc = request.args.get('desc')
    threads = Thread.get_threads_list(slug, limit, since, desc)
    forum = Forum().get_by_slug(slug)
    for thread in threads:
        thread['created'] = format_time(thread['created'])
    if forum:
        return Response(
                response=json.dumps(threads),
                status=200,
                mimetype="application/json"
            )
    else:
        return json_response(
            {'message': 'not found'},
            404
        )
Пример #9
0
def bert_train_val(model, dataloaders, starting_epoch, optimizer, scheduler,
                   epochs, device):
    print("\n\n" + "-" * 15)
    print("| TRAINING... |")
    print("-" * 15)
    set_seed()
    start_training_time = time.time()

    # Define running history for train and val
    train_loss_history = []
    val_loss_history = []
    train_acc_history = []
    val_acc_history = []

    # Training loop
    for epoch in range(starting_epoch, epochs):
        train_loss = 0
        train_acc = 0
        model.train()
        for step, batch in tqdm(enumerate(dataloaders['train_dataloader']),
                                total=len(dataloaders['train_dataloader'])):
            # Load and feed data to model
            input_ids = batch[0].to(device)
            attention_masks = batch[1].to(device)
            labels = batch[2].to(device)

            model.zero_grad()

            outputs = model(input_ids,
                            labels=labels,
                            attention_mask=attention_masks)
            loss = outputs.loss
            logits = outputs.logits

            batch_loss = loss.item()
            train_loss += batch_loss

            logits = logits.detach().cpu().numpy()
            labels = labels.to('cpu').numpy()

            predictions = np.argmax(logits, axis=1).flatten()
            # labels = labels.flatten()

            correct = 0
            for i in range(0, len(predictions)):
                if predictions[i] == labels[i]:
                    correct = correct + 1
            batch_accuracy = correct / len(labels)
            train_acc += batch_accuracy

            if step % 100 == 0:
                print("Epoch: ", epoch + 1, "/", epochs, "Batch: ", step + 1,
                      "/", len(dataloaders['train_dataloader']), "Loss: ",
                      train_loss / (step + 1), "Accuracy: ", batch_accuracy)

            loss.backward()
            # Apply gradient clipping
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            # Optimzer/Learning rate schedular step
            optimizer.step()
            scheduler.step()

            torch.cuda.empty_cache()

        # Loss and accuracy results by epoch
        end_epoch_time = time.time()
        epoch_train_accuracy = train_acc / len(dataloaders['train_dataloader'])
        epoch_train_loss = train_loss / len(dataloaders['train_dataloader'])
        epoch_train_time = format_time(start_training_time, end_epoch_time)
        train_loss_history.append(epoch_train_loss)
        train_acc_history.append(epoch_train_accuracy)

        print(
            f' epoch: {epoch + 1}, train loss: {epoch_train_loss:.6f}, train accuracy: {epoch_train_accuracy:.6f}, train time:{epoch_train_time}'
        )

        # Switch to evaluation mode and run validation
        print("Validating...")

        start_val_time = time.time()
        model.eval()
        val_loss = 0
        val_acc = 0
        with torch.no_grad():
            for step, batch in tqdm(enumerate(dataloaders['val_dataloader']),
                                    total=len(dataloaders['val_dataloader'])):
                # Load and feed data to model
                input_ids = batch[0].to(device)
                attention_masks = batch[1].to(device)
                labels = batch[2].to(device)

                model.zero_grad()

                outputs = model(input_ids,
                                labels=labels,
                                attention_mask=attention_masks)
                loss = outputs.loss
                logits = outputs.logits

                batch_loss = loss.item()
                val_loss += batch_loss

                logits = logits.detach().cpu().numpy()
                labels = labels.to('cpu').numpy()

                predictions = np.argmax(logits, axis=1).flatten()

                correct = 0
                for i in range(0, len(predictions)):
                    if predictions[i] == labels[i]:
                        correct = correct + 1

                batch_accuracy = correct / len(labels)
                val_acc += batch_accuracy

                torch.cuda.empty_cache()
                end_val_time = time.time()

        epoch_val_time = format_time(start_val_time, end_val_time)
        epoch_val_loss = val_loss / len(dataloaders['val_dataloader'])
        epoch_val_acc = val_acc / len(dataloaders['val_dataloader'])
        val_loss_history.append(epoch_val_loss)
        val_acc_history.append(epoch_val_acc)

        print(
            f' epoch: {epoch + 1}, val loss: {epoch_val_loss:.6f}, val accuracy: {epoch_val_acc:.6f}, val_time: {epoch_val_time}'
        )

        # Record results to dictionary to return
        performance_history = {
            'train_loss': train_loss_history,
            'val_loss': val_loss_history,
            'train_accuracy': train_acc_history,
            'val_accuracy': val_acc_history,
            'num_epochs': epochs
        }

        # Save model checkpoint at end of train_val run, also saves performance history
        if epoch == epochs - 1:
            checkpoint = {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'performance_history': performance_history,
                'epoch': epoch + 1,
            }
            save_checkpoint(checkpoint,
                            f"./BERTcheckpoint_{checkpoint['epoch']}.pth.tar")
        print("")
        print("Training Finished")

    return performance_history
def gpt2_train_val(model, dataloaders, tokenizer, starting_epoch, optimizer,
                   scheduler, epochs, device):
    print("\n\n" + "-" * 15)
    print("| TRAINING... |")
    print("-" * 15)
    set_seed()
    start_training_time = time.time()

    # Define running history for train and val
    train_loss_history = []
    val_loss_history = []
    train_perplexity_history = []
    val_perplexity_history = []

    # Training loop
    for epoch in range(starting_epoch, epochs):
        train_loss = 0
        model.train()
        for step, batch in tqdm(enumerate(dataloaders['train_dataloader']),
                                total=len(dataloaders['train_dataloader'])):
            # Load and feed data to model
            input_ids = batch.to(device)
            model.zero_grad()
            outputs = model(input_ids, labels=input_ids)

            loss = outputs[0]
            batch_loss = loss.item()
            train_loss += batch_loss

            if step % 200 == 199:
                print("Epoch:", epoch + 1, "/", epochs, "Batch:", step + 1,
                      "/", len(dataloaders['train_dataloader']), "Loss",
                      train_loss / 200)
                train_loss = 0.0

            # Generates a model output including special tokens in order to visualise the training process and model learning
            model.eval()
            if step % 100 == 0 and step != 0:
                samples = model.generate(  # decoder_start_token_id=50258,
                    bos_token_id=50257,
                    do_sample=True,
                    top_k=50,
                    max_length=50,
                    min_length=15,
                    top_p=0.95,
                    num_return_sequences=1,
                    repition_penalty=1.1,
                    no_repeat_ngram_size=2,
                    temperature=1.1)

                for i, sample in enumerate(samples):
                    print("{}".format(
                        tokenizer.decode(sample, skip_special_tokens=False)))

            # Return to train mode and back propagate loss
            model.train()
            loss.backward()
            # Apply gradient clipping
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            # Optimizer/Learning rate scheduler step
            optimizer.step()
            scheduler.step()

            torch.cuda.empty_cache()

        # Loss and perplexity results by epoch
        end_epoch_time = time.time()
        epoch_train_loss = train_loss / len(dataloaders['train_dataloader'])
        epoch_train_perplexity = torch.exp(torch.tensor(epoch_train_loss))
        epoch_train_time = format_time(start_training_time, end_epoch_time)
        train_loss_history.append(epoch_train_loss)
        train_perplexity_history.append(epoch_train_perplexity)

        print(
            f' epoch: {epoch + 1}, train loss: {epoch_train_loss:.6f}, train ppl: {epoch_train_perplexity:.6f}, train time:{epoch_train_time}'
        )

        # Switch to evaluation mode and run validation
        print("Validating...")

        start_val_time = time.time()
        model.eval()
        val_loss = 0
        val_steps = 0
        with torch.no_grad():
            for step, batch in tqdm(enumerate(dataloaders['val_dataloader']),
                                    total=len(dataloaders['val_dataloader'])):
                input_ids = batch[0].to(device)
                outputs = model(input_ids, labels=input_ids)
                loss = outputs[0]
                # loss, logits= outputs[:2] # outputs has two elements loss and logits
                batch_loss = loss.item()
                val_loss += batch_loss

                torch.cuda.empty_cache()
                end_val_time = time.time()

        epoch_val_time = format_time(start_val_time, end_val_time)
        epoch_val_loss = val_loss / len(dataloaders['val_dataloader'])
        epoch_val_perplexity = torch.exp(torch.tensor(epoch_val_loss))
        val_loss_history.append(epoch_val_loss)
        val_perplexity_history.append(epoch_val_perplexity)
        # print("Validation time: ", epoch_val_time)

        print(
            f' epoch: {epoch + 1}, val loss: {epoch_val_loss:.6f}, val ppl: {epoch_val_perplexity:.6f}, val_time: {epoch_val_time}'
        )

        # Record results to dictionary to return
        performance_history = {
            'train_loss': train_loss_history,
            'val_loss': val_loss_history,
            'train_perplexity': train_perplexity_history,
            'val_perplexity': val_perplexity_history,
            'num_epochs': epochs
        }

        # Save model checkpoint at end of train_val run, also saves performance history
        if epoch == epochs - 1:
            checkpoint = {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'performance_history': performance_history,
                'epoch': epoch + 1,
            }
            save_checkpoint(checkpoint,
                            f"./checkpoint_{checkpoint['epoch']}.pth.tar")
        print("")
        print("Training Finished")

    return performance_history
Пример #11
0
def get_thread_details(slug_or_id):
    thread = Thread.get_by_slug_or_id(slug_or_id)
    if thread:
        thread['created'] = format_time(thread['created'])
        return json_response(thread, 200)
    return json_response({'message': 'thread not found'}, 404)