示例#1
0
def train_ei_reg(emotion, pretrained=None, finetune=True, unfreeze=0):
    """
    1. Task EI-reg: Detecting Emotion Intensity (regression)

    Given:

        - a tweet
        - an emotion E (anger, fear, joy, or sadness)

    Task: determine the  intensity of E that best represents the mental state of
    the tweeter—a real-valued score between 0 and 1:

        - a score of 1: highest amount of E can be inferred
        - a score of 0: lowest amount of E can be inferred

    For each language: 4 training sets and 4 test sets: one for each emotion E.

    (Note that the absolute scores have no inherent meaning --
    they are used only as a means to convey that the instances
    with higher scores correspond to a greater degree of E
    than instances with lower scores.)

    :param emotion: emotions = ["anger", "fear", "joy", "sadness"]
    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_EIREG
    task = 'EI-reg'
    X_train, y_train = parse(task=task, emotion=emotion, dataset="train")
    X_dev, y_dev = parse(task=task, emotion=emotion, dataset="dev")
    X_test, y_test = parse(task=task, emotion=emotion, dataset="gold")

    # keep only scores
    y_train = [y[1] for y in y_train]
    y_dev = [y[1] for y in y_dev]
    y_test = [y[1] for y in y_test]

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    name = model_config["name"] + "_" + emotion
    trainer = define_trainer("reg", config=model_config, name=name,
                             datasets=datasets,
                             monitor="dev",
                             pretrained=pretrained, finetune=finetune)

    model_training(trainer, model_config["epochs"], unfreeze=unfreeze)

    desc = emotion
    if pretrained is not None:
        desc += "_PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(model_config["name"], desc)
示例#2
0
def train_v_reg(pretrained=None, finetune=True, unfreeze=0):
    """
    3. Task V-reg: Detecting Valence or Sentiment Intensity (regression)

    Given:
     - a tweet

    Task: determine the intensity of sentiment or valence (V)
    that best represents the mental state of the tweeter—a real-valued score
    between 0 and 1:

        a score of 1: most positive mental state can be inferred
        a score of 0: most negative mental state can be inferred

    For each language: 1 training set, 1 test set.

    (Note that the absolute scores have no inherent meaning --
    they are used only as a means to convey that the instances
    with higher scores correspond to a greater degree of positive sentiment
    than instances with lower scores.)

    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_VREG

    task = 'V-reg'
    # load the dataset and split it in train and val sets
    X_train, y_train = parse(task=task, dataset="train")
    X_dev, y_dev = parse(task=task, dataset="dev")
    X_test, y_test = parse(task=task, dataset="gold")

    # keep only scores
    y_train = [y[1] for y in y_train]
    y_dev = [y[1] for y in y_dev]
    y_test = [y[1] for y in y_test]

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    name = model_config["name"]
    trainer = define_trainer("reg", config=model_config, name=name,
                             datasets=datasets,
                             monitor="dev",
                             pretrained=pretrained, finetune=finetune)

    model_training(trainer, model_config["epochs"], unfreeze=unfreeze)

    desc = ""
    if pretrained is not None:
        desc += "PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(name, desc)
示例#3
0
def train_ei_oc(emotion, pretrained=None, finetune=True, unfreeze=0):
    """
    2. Task EI-oc: Detecting Emotion Intensity (ordinal classification)

    Given:

    a tweet
    an emotion E (anger, fear, joy, or sadness)

    Task: classify the tweet into one of four ordinal classes of intensity of E
    that best represents the mental state of the tweeter:

        0: no E can be inferred
        1: low amount of E can be inferred
        2: moderate amount of E can be inferred
        3: high amount of E can be inferred

    For each language: 4 training sets and 4 test sets: one for each emotion E.

    :param emotion: emotions = ["anger", "fear", "joy", "sadness"]
    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_EIOC

    task = 'EI-oc'
    X_train, y_train = parse(task=task, emotion=emotion, dataset="train")
    X_dev, y_dev = parse(task=task, emotion=emotion, dataset="dev")
    X_test, y_test = parse(task=task, emotion=emotion, dataset="gold")

    # keep only scores
    y_train = [y[1] for y in y_train]
    y_dev = [y[1] for y in y_dev]
    y_test = [y[1] for y in y_test]

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    name = model_config["name"] + "_" + emotion
    trainer = define_trainer("clf", config=model_config, name=name,
                             datasets=datasets,
                             monitor="dev",
                             ordinal=True,
                             pretrained=pretrained,
                             finetune=finetune)

    model_training(trainer, model_config["epochs"], unfreeze=unfreeze,
                   checkpoint=True)

    desc = emotion
    if pretrained is not None:
        desc += "_PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(name, desc)
示例#4
0
def train_e_c(pretrained=None, finetune=True, unfreeze=0):
    """
    5. Task E-c: Detecting Emotions (multi-label classification)
    -- This is a traditional Emotion Classification Task

    Given:
     - a tweet

    Task: classify the tweet as 'neutral or no emotion' or as one, or more,
    of eleven given emotions that best represent the mental state
    of the tweeter:

        - anger (also includes annoyance and rage) can be inferred
        - anticipation (also includes interest and vigilance) can be inferred
        - disgust (also includes disinterest, dislike and loathing) can be
            inferred
        - fear (also includes apprehension, anxiety, concern, and terror)
            can be inferred
        - joy (also includes serenity and ecstasy) can be inferred
        - love (also includes affection) can be inferred
        - optimism (also includes hopefulness and confidence) can be inferred
        - pessimism (also includes cynicism and lack of confidence) can be
            inferred
        - sadness (also includes pensiveness and grief) can be inferred
        - surprise (also includes distraction and amazement) can be inferred
        - trust (also includes acceptance, liking, and admiration) can be
            inferred

    For each language: 1 training set, 1 test set.

    (Note that the set of emotions includes the eight basic emotions
    as per Plutchik (1980), as well as a few other emotions that are common
    in tweets (love, optimism, and pessimism).)

    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_EC
    task = 'E-c'
    # load the dataset and split it in train and val sets
    X_train, y_train = parse(task=task, dataset="train")
    X_dev, y_dev = parse(task=task, dataset="dev")
    X_test, y_test = parse(task=task, dataset="gold")

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    name = model_config["name"]
    trainer = define_trainer("mclf", config=model_config, name=name,
                             datasets=datasets,
                             monitor="dev",
                             pretrained=pretrained,
                             finetune=finetune)

    model_training(trainer, model_config["epochs"], unfreeze=unfreeze)

    desc = ""
    if pretrained is not None:
        desc += "PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(name, desc)
示例#5
0
def train_v_oc(pretrained=None, finetune=True, unfreeze=0):
    """
    4. Task V-oc: Detecting Valence (ordinal classification)
    -- This is the traditional Sentiment Analysis Task

    Given:
     - a tweet

    Task: classify the tweet into one of seven ordinal classes,
    corresponding to various levels of positive and negative sentiment
    intensity, that best represents the mental state of the tweeter:

        3: very positive mental state can be inferred
        2: moderately positive mental state can be inferred
        1: slightly positive mental state can be inferred
        0: neutral or mixed mental state can be inferred
        -1: slightly negative mental state can be inferred
        -2: moderately negative mental state can be inferred
        -3: very negative mental state can be inferred

    For each language: 1 training set, 1 test set.

    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_VOC

    task = 'V-oc'
    # load the dataset and split it in train and val sets
    X_train, y_train = parse(task=task, dataset="train")
    X_dev, y_dev = parse(task=task, dataset="dev")
    X_test, y_test = parse(task=task, dataset="gold")

    # keep only scores
    y_train = [str(y[1]) for y in y_train]
    y_dev = [str(y[1]) for y in y_dev]
    y_test = [str(y[1]) for y in y_test]

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    # pass explicit mapping
    label_map = {"-3": 0, "-2": 1, "-1": 2, "0": 3, "1": 4, "2": 5, "3": 6, }
    inv_label_map = {v: int(k) for k, v in label_map.items()}
    transformer = LabelTransformer(label_map, inv_label_map)

    name = model_config["name"]
    trainer = define_trainer("clf", config=model_config, name=name,
                             datasets=datasets,
                             monitor="dev",
                             ordinal=True,
                             label_transformer=transformer,
                             pretrained=pretrained,
                             finetune=finetune)

    model_training(trainer, model_config["epochs"], unfreeze=unfreeze)

    desc = ""
    if pretrained is not None:
        desc += "PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(name, desc)
示例#6
0
def train_ei_oc(emotion, pretrained=None, finetune=True, unfreeze=0):
    """
    2. Task EI-oc: Detecting Emotion Intensity (ordinal classification)

    Given:

    a tweet
    an emotion E (anger, fear, joy, or sadness)

    Task: classify the tweet into one of four ordinal classes of intensity of E
    that best represents the mental state of the tweeter:

        0: no E can be inferred
        1: low amount of E can be inferred
        2: moderate amount of E can be inferred
        3: high amount of E can be inferred

    For each language: 4 training sets and 4 test sets: one for each emotion E.

    :param emotion: emotions = ["anger", "fear", "joy", "sadness"]
    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_EIOC

    task = 'EI-oc'
    X_train, y_train = parse(task=task, emotion=emotion, dataset="train")
    X_dev, y_dev = parse(task=task, emotion=emotion, dataset="dev")
    X_test, y_test = parse(task=task, emotion=emotion, dataset="gold")

    # keep only scores
    y_train = [y[1] for y in y_train]
    y_dev = [y[1] for y in y_dev]
    y_test = [y[1] for y in y_test]

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    name = model_config["name"] + "_" + emotion
    trainer = define_trainer("clf",
                             config=model_config,
                             name=name,
                             datasets=datasets,
                             monitor="dev",
                             ordinal=True,
                             pretrained=pretrained,
                             finetune=finetune)

    model_training(trainer,
                   model_config["epochs"],
                   unfreeze=unfreeze,
                   checkpoint=True)

    desc = emotion
    if pretrained is not None:
        desc += "_PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(name, desc)
from dataloaders.task3 import parse
from model.params import TASK3_A, TASK3_B
from utils.train import define_trainer, model_training

TASK = 'a'
# select config by args.task
if TASK == "a":
    model_config = TASK3_A
else:
    model_config = TASK3_B

X_train, y_train = parse(task=TASK, dataset="train")
X_test, y_test = parse(task=TASK, dataset="gold")

datasets = {
    "train": (X_train, y_train),
    "gold": (X_test, y_test),
}

name = "_".join([model_config["name"], model_config["token_type"]])

trainer = define_trainer("clf",
                         config=model_config,
                         name=name,
                         datasets=datasets,
                         monitor="gold")

model_training(trainer, model_config["epochs"])

trainer.log_training(name, model_config["token_type"])
示例#8
0
def train_e_c(pretrained=None, finetune=True, unfreeze=0):
    """
    5. Task E-c: Detecting Emotions (multi-label classification)
    -- This is a traditional Emotion Classification Task

    Given:
     - a tweet

    Task: classify the tweet as 'neutral or no emotion' or as one, or more,
    of eleven given emotions that best represent the mental state
    of the tweeter:

        - anger (also includes annoyance and rage) can be inferred
        - anticipation (also includes interest and vigilance) can be inferred
        - disgust (also includes disinterest, dislike and loathing) can be
            inferred
        - fear (also includes apprehension, anxiety, concern, and terror)
            can be inferred
        - joy (also includes serenity and ecstasy) can be inferred
        - love (also includes affection) can be inferred
        - optimism (also includes hopefulness and confidence) can be inferred
        - pessimism (also includes cynicism and lack of confidence) can be
            inferred
        - sadness (also includes pensiveness and grief) can be inferred
        - surprise (also includes distraction and amazement) can be inferred
        - trust (also includes acceptance, liking, and admiration) can be
            inferred

    For each language: 1 training set, 1 test set.

    (Note that the set of emotions includes the eight basic emotions
    as per Plutchik (1980), as well as a few other emotions that are common
    in tweets (love, optimism, and pessimism).)

    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_EC
    task = 'E-c'
    # load the dataset and split it in train and val sets
    X_train, y_train = parse(task=task, dataset="train")
    X_dev, y_dev = parse(task=task, dataset="dev")
    X_test, y_test = parse(task=task, dataset="gold")

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    name = model_config["name"]
    trainer = define_trainer("mclf",
                             config=model_config,
                             name=name,
                             datasets=datasets,
                             monitor="dev",
                             pretrained=pretrained,
                             finetune=finetune)

    model_training(trainer, model_config["epochs"], unfreeze=unfreeze)

    desc = ""
    if pretrained is not None:
        desc += "PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(name, desc)
示例#9
0
def train_ei_reg(emotion, pretrained=None, finetune=True, unfreeze=0):
    """
    1. Task EI-reg: Detecting Emotion Intensity (regression)

    Given:

        - a tweet
        - an emotion E (anger, fear, joy, or sadness)

    Task: determine the  intensity of E that best represents the mental state of
    the tweeter—a real-valued score between 0 and 1:

        - a score of 1: highest amount of E can be inferred
        - a score of 0: lowest amount of E can be inferred

    For each language: 4 training sets and 4 test sets: one for each emotion E.

    (Note that the absolute scores have no inherent meaning --
    they are used only as a means to convey that the instances
    with higher scores correspond to a greater degree of E
    than instances with lower scores.)

    :param emotion: emotions = ["anger", "fear", "joy", "sadness"]
    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_EIREG
    task = 'EI-reg'
    X_train, y_train = parse(task=task, emotion=emotion, dataset="train")
    X_dev, y_dev = parse(task=task, emotion=emotion, dataset="dev")
    X_test, y_test = parse(task=task, emotion=emotion, dataset="gold")

    # keep only scores
    y_train = [y[1] for y in y_train]
    y_dev = [y[1] for y in y_dev]
    y_test = [y[1] for y in y_test]

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    name = model_config["name"] + "_" + emotion
    trainer = define_trainer("reg",
                             config=model_config,
                             name=name,
                             datasets=datasets,
                             monitor="dev",
                             pretrained=pretrained,
                             finetune=finetune)

    model_training(trainer, model_config["epochs"], unfreeze=unfreeze)

    desc = emotion
    if pretrained is not None:
        desc += "_PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(model_config["name"], desc)
示例#10
0
def train_v_oc(pretrained=None, finetune=True, unfreeze=0):
    """
    4. Task V-oc: Detecting Valence (ordinal classification)
    -- This is the traditional Sentiment Analysis Task

    Given:
     - a tweet

    Task: classify the tweet into one of seven ordinal classes,
    corresponding to various levels of positive and negative sentiment
    intensity, that best represents the mental state of the tweeter:

        3: very positive mental state can be inferred
        2: moderately positive mental state can be inferred
        1: slightly positive mental state can be inferred
        0: neutral or mixed mental state can be inferred
        -1: slightly negative mental state can be inferred
        -2: moderately negative mental state can be inferred
        -3: very negative mental state can be inferred

    For each language: 1 training set, 1 test set.

    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_VOC

    task = 'V-oc'
    # load the dataset and split it in train and val sets
    X_train, y_train = parse(task=task, dataset="train")
    X_dev, y_dev = parse(task=task, dataset="dev")
    X_test, y_test = parse(task=task, dataset="gold")

    # keep only scores
    y_train = [str(y[1]) for y in y_train]
    y_dev = [str(y[1]) for y in y_dev]
    y_test = [str(y[1]) for y in y_test]

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    # pass explicit mapping
    label_map = {
        "-3": 0,
        "-2": 1,
        "-1": 2,
        "0": 3,
        "1": 4,
        "2": 5,
        "3": 6,
    }
    inv_label_map = {v: int(k) for k, v in label_map.items()}
    transformer = LabelTransformer(label_map, inv_label_map)

    name = model_config["name"]
    trainer = define_trainer("clf",
                             config=model_config,
                             name=name,
                             datasets=datasets,
                             monitor="dev",
                             ordinal=True,
                             label_transformer=transformer,
                             pretrained=pretrained,
                             finetune=finetune)

    model_training(trainer, model_config["epochs"], unfreeze=unfreeze)

    desc = ""
    if pretrained is not None:
        desc += "PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(name, desc)
示例#11
0
def train_v_reg(pretrained=None, finetune=True, unfreeze=0):
    """
    3. Task V-reg: Detecting Valence or Sentiment Intensity (regression)

    Given:
     - a tweet

    Task: determine the intensity of sentiment or valence (V)
    that best represents the mental state of the tweeter—a real-valued score
    between 0 and 1:

        a score of 1: most positive mental state can be inferred
        a score of 0: most negative mental state can be inferred

    For each language: 1 training set, 1 test set.

    (Note that the absolute scores have no inherent meaning --
    they are used only as a means to convey that the instances
    with higher scores correspond to a greater degree of positive sentiment
    than instances with lower scores.)

    :param pretrained:
    :param finetune:
    :param unfreeze:
    :return:
    """
    model_config = TASK1_VREG

    task = 'V-reg'
    # load the dataset and split it in train and val sets
    X_train, y_train = parse(task=task, dataset="train")
    X_dev, y_dev = parse(task=task, dataset="dev")
    X_test, y_test = parse(task=task, dataset="gold")

    # keep only scores
    y_train = [y[1] for y in y_train]
    y_dev = [y[1] for y in y_dev]
    y_test = [y[1] for y in y_test]

    datasets = {
        "train": (X_train, y_train),
        "dev": (X_dev, y_dev),
        "gold": (X_test, y_test),
    }

    name = model_config["name"]
    trainer = define_trainer("reg",
                             config=model_config,
                             name=name,
                             datasets=datasets,
                             monitor="dev",
                             pretrained=pretrained,
                             finetune=finetune)

    model_training(trainer, model_config["epochs"], unfreeze=unfreeze)

    desc = ""
    if pretrained is not None:
        desc += "PT:" + pretrained
        if finetune is not None:
            desc += "_FT:" + str(finetune)

    trainer.log_training(name, desc)
示例#12
0
from model.params import SEMEVAL_2017
from utils.train import define_trainer, model_training

config = SEMEVAL_2017
train = load_data_from_dir(os.path.join(DATA_DIR, 'semeval_2017_4A'))
X = [obs[1] for obs in train]
y = [obs[0] for obs in train]

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.05,
                                                    stratify=y,
                                                    random_state=42)

# pass a transformer function, for preparing tha labels for training
label_map = {label: idx for idx, label in
             enumerate(sorted(list(set(y_train))))}
inv_label_map = {v: k for k, v in label_map.items()}
transformer = LabelTransformer(label_map, inv_label_map)

datasets = {
    "train": (X_train, y_train),
    "val": (X_test, y_test),
}

name = "_".join([config["name"], config["token_type"]])

trainer = define_trainer("clf", config=config, name=name, datasets=datasets,
                         monitor="val", label_transformer=transformer)

model_training(trainer, config["epochs"], checkpoint=True)
示例#13
0
args = parser.parse_args()
print(args)

if args.context not in ["none", "mean", "last"]:
    raise ValueError("Invalid attention type!")

model_config = TASK2_A
model_config["attention_context"] = args.context
model_config["embed_finetune"] = args.finetune

X_train, y_train = load_task2("train")
X_trial, y_trial = load_task2("trial")
X_test, y_test = load_task2("test")

datasets = {
    "train": (X_train, y_train),
    "trial": (X_trial, y_trial),
    "gold": (X_test, y_test)
}

name = "_".join([model_config["name"], model_config["token_type"]])

trainer = define_trainer("clf", config=model_config, name=name,
                         datasets=datasets, monitor="gold")

model_training(trainer, model_config["epochs"])

desc = "context:{}, finetuning:{}".format(args.context, str(args.finetune))
trainer.log_training(name, desc)
示例#14
0
from dataloaders.task2 import load_task2
from model.params import TASK2_A
from utils.train import define_trainer, model_training

model_config = TASK2_A

X_train, y_train = load_task2("train")
X_trial, y_trial = load_task2("trial")
X_test, y_test = load_task2("test")

datasets = {
    "train": (X_train, y_train),
    "trial": (X_trial, y_trial),
    "gold": (X_test, y_test),
}

name = "_".join([model_config["name"]])

trainer = define_trainer("clf",
                         config=model_config,
                         name=name,
                         datasets=datasets,
                         monitor="gold",
                         absolute_path=False,
                         embedding_size_auto_detect=True)

model_training(trainer, model_config["epochs"], checkpoint=True)