Exemplo n.º 1
0
 def __call__(self, func, *args, **kwargs):
     if isinstance(func, FilterInterface):
         return DataProcessor(self._data.loc[func.apply(self._data)])
     else:
         ret_value = apply_func(self._data, func, *args, **kwargs)
         if not isinstance(ret_value, type(self._data)):
             raise TypeError(
                 f'Call to DataProcessor should return type {type(self._data)} but returned {type(ret_value)}')
         return DataProcessor(ret_value)
Exemplo n.º 2
0
                                             "id":
                                             row.get(id_labels_column),
                                             "text":
                                             row.get(text_column),
                                             "lang":
                                             row.get(lang_column),
                                             "concept_id":
                                             row.get(concept_id_column),
                                             "precision":
                                             row.get(precision_column)
                                         }]).dict()


def train_model(client):
    client.model.train(model_id)
    return str(client.model.wait_training(model_id))


cl_client = get_client(get_recipe_config())

init_model(cl_client)
apply_func(upload_concepts,
           client=cl_client,
           input_dataset="concepts_input_dataset",
           output_dataset="concepts_output_dataset")
apply_func(upload_labels,
           client=cl_client,
           input_dataset="labels_input_dataset",
           output_dataset="labels_output_dataset")
train_model(cl_client)
Exemplo n.º 3
0
        for k, v in row.items() if k.startswith(variation_prefix)
    }
    return client.faq.create_questions(model_id,
                                       questions=[{
                                           "id":
                                           row.get(questions_id_column),
                                           "variations":
                                           variations,
                                           "answer_id":
                                           row.get(answer_id_column)
                                       }]).dict()


def train_model(client):
    client.model.train(model_id)
    return str(client.model.wait_training(model_id))


cl_client = get_client(get_recipe_config())

init_model(cl_client)
apply_func(upload_answers,
           client=cl_client,
           input_dataset="answers_input_dataset",
           output_dataset="answers_output_dataset")
apply_func(upload_questions,
           client=cl_client,
           input_dataset="questions_input_dataset",
           output_dataset="questions_output_dataset")
train_model(cl_client)
Exemplo n.º 4
0
from dataiku.customrecipe import get_recipe_config
from utils import apply_func

url_column = get_recipe_config().get("url_column")


def call_api(client, row):
    return client.html.extract_article(row.get(url_column)).dict()


apply_func(call_api)
Exemplo n.º 5
0
    except ModelNotFound:
        client.model.create(model_id, "clf")


def call_api(client, row):
    return client.classifier.create_documents(model_id,
                                              documents=[{
                                                  "text":
                                                  row.get(text_column),
                                                  "lang":
                                                  row.get(lang_column),
                                                  "class_id":
                                                  row.get(class_id_column),
                                                  "id":
                                                  row.get(id_column)
                                              }]).dict()


def train_model(client):
    client.model.train(model_id,
                       model_type=model_type,
                       model_config={"train_ratio": train_ratio})
    return str(cl_client.model.wait_training(model_id))


cl_client = get_client(get_recipe_config())

init_model(cl_client)
apply_func(call_api, client=cl_client)
train_model(cl_client)