from ai_harness.configuration import Arguments, configclass, field, ComplexArguments, export, merge_fields, \ to_json_string from ai_harness import configclasses from ai_harness import harnessutils as aiutils from ai_harness.fileutils import join_path, FileLineReader log = aiutils.getLogger('transformersx') @configclass() class ArgumentsBase: pass
from typing import Optional from ai_harness.configclasses import configclass, field, fields, is_configclass from ai_harness import harnessutils as utils log = utils.getLogger('aiharness') @configclass class Address: phone: str = field("139", "phone nubmer") @configclass class Config: name: str = field(default='TestName', help="name help") age: int = field(default=10, help="age help") value: Optional[int] = field(None, "value") address: Address = field(Address(), "Address help") def update(self): self.name = 1000 class Test_Dataclasses: def test_field(self): config = Config() config.update() config.age = '1000' for f in fields(config): if str(type(f.type)) == "typing.Union":
from ai_harness import xml2object from ai_harness import harnessutils as utils log = utils.getLogger('test') def test_xml2object(): obj = xml2object.parse('configuration.xml') assert len(obj.configuration.group) == 2 assert obj.configuration.group[0]['name'] == 'model' assert obj.configuration.group[0].arg[0]['name'] == 'test1' assert hasattr(obj.configuration, 'group') assert isinstance(obj.configuration.group, list)
from tqdm.auto import tqdm, trange from transformers import PreTrainedTokenizer, torch_distributed_zero_first, \ RobertaTokenizer, RobertaTokenizerFast, XLMRobertaTokenizer from typing import List, Optional, Union from transformers.tokenization_utils import BatchEncoding from .data_args import DataArguments from .dataprocessor import DataProcessor ###TODO: The Dataset should be refactored for many data sources ###TODO: Behind the dataset, it should have a data sources. So the data processor should be the data sources. from ..train.trainer_utils import InputFeatures, InputExample from ai_harness import harnessutils as aiutils log = aiutils.getLogger('task') class TaskDataset(Dataset): """ This will be superseded by a framework-agnostic approach soon. """ args: DataArguments features: List[InputFeatures] def __init__(self, args: DataArguments, tokenizer: PreTrainedTokenizer, processor: DataProcessor,
import torch.nn as nn from transformers import ElectraPreTrainedModel, ElectraModel from transformers.file_utils import add_start_docstrings_to_callable from transformers.modeling_electra import ELECTRA_INPUTS_DOCSTRING from ai_harness import harnessutils as utils log = utils.getLogger('task') class ElectraForSequenceClassificationX(ElectraPreTrainedModel): def __init__(self, config): super().__init__(config) self.electra = ElectraModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights() @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING) def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, ): r"""