Пример #1
0
from ai_harness.configuration import Arguments, configclass, field, ComplexArguments, export, merge_fields, \
    to_json_string
from ai_harness import configclasses
from ai_harness import harnessutils as aiutils
from ai_harness.fileutils import join_path, FileLineReader

log = aiutils.getLogger('transformersx')


@configclass()
class ArgumentsBase:
    pass
Пример #2
0
from typing import Optional

from ai_harness.configclasses import configclass, field, fields, is_configclass
from ai_harness import harnessutils as utils

log = utils.getLogger('aiharness')


@configclass
class Address:
    phone: str = field("139", "phone nubmer")


@configclass
class Config:
    name: str = field(default='TestName', help="name help")
    age: int = field(default=10, help="age help")
    value: Optional[int] = field(None, "value")
    address: Address = field(Address(), "Address help")

    def update(self):
        self.name = 1000


class Test_Dataclasses:
    def test_field(self):
        config = Config()
        config.update()
        config.age = '1000'
        for f in fields(config):
            if str(type(f.type)) == "typing.Union":
Пример #3
0
from ai_harness import xml2object
from ai_harness import harnessutils as utils

log = utils.getLogger('test')


def test_xml2object():
    obj = xml2object.parse('configuration.xml')
    assert len(obj.configuration.group) == 2
    assert obj.configuration.group[0]['name'] == 'model'
    assert obj.configuration.group[0].arg[0]['name'] == 'test1'
    assert hasattr(obj.configuration, 'group')
    assert isinstance(obj.configuration.group, list)
Пример #4
0
from tqdm.auto import tqdm, trange
from transformers import PreTrainedTokenizer, torch_distributed_zero_first, \
    RobertaTokenizer, RobertaTokenizerFast, XLMRobertaTokenizer
from typing import List, Optional, Union

from transformers.tokenization_utils import BatchEncoding

from .data_args import DataArguments
from .dataprocessor import DataProcessor

###TODO: The Dataset should be refactored for many data sources
###TODO: Behind the dataset, it should have a data sources. So the data processor should be the data sources.
from ..train.trainer_utils import InputFeatures, InputExample
from ai_harness import harnessutils as aiutils

log = aiutils.getLogger('task')


class TaskDataset(Dataset):
    """
    This will be superseded by a framework-agnostic approach
    soon.
    """

    args: DataArguments
    features: List[InputFeatures]

    def __init__(self,
                 args: DataArguments,
                 tokenizer: PreTrainedTokenizer,
                 processor: DataProcessor,
Пример #5
0
import torch.nn as nn
from transformers import ElectraPreTrainedModel, ElectraModel
from transformers.file_utils import add_start_docstrings_to_callable
from transformers.modeling_electra import ELECTRA_INPUTS_DOCSTRING
from ai_harness import harnessutils as utils

log = utils.getLogger('task')


class ElectraForSequenceClassificationX(ElectraPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)

        self.electra = ElectraModel(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.init_weights()

    @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING)
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
    ):
        r"""