Exemplo n.º 1
0
def test_meta_pipeline_graph():
    # Test List algorithm generation
    build_pipeline_graph(input=List(Word()),
                         output=List(Word()),
                         registry=[WordToWordAlgorithm])
    
    # Test Tuple breakdown feature
    build_pipeline_graph(input=Tuple(Word(), Matrix()),
                         output=Text(),
                         registry=[WordToWordAlgorithm])
    
    # Test Tuple breakdown feature and List algorithm generation
    build_pipeline_graph(input=Tuple(List(Word()), Matrix()),
                         output=List(Word()),
                         registry=[WordToWordAlgorithm])
Exemplo n.º 2
0
def _build_pipeline():
    builder = build_pipelines(input=Tuple(List(List(Flags())),
                                          List(List(Category()))),
                              output=List(List(Category())),
                              registry=find_classes(include="CRF"))

    return builder.sample(sampler=Sampler(random_state=0))
Exemplo n.º 3
0
def test_save_load_tuple():
    TupleClass = build_composite_tuple(
        1,
        input_type=Tuple(MatrixContinuousDense(), CategoricalVector()),
        output_type=Tuple(MatrixContinuousDense(), CategoricalVector()),
    )
    algorithm = TupleClass(DummyAlgorithm)

    fp = BytesIO()

    Pickler(fp).dump(algorithm)
    fp.seek(0)

    algorithm2 = Unpickler(fp).load()

    assert repr(algorithm) == repr(algorithm2)
Exemplo n.º 4
0
    def _make_pipeline_builder(self):
        registry = self.registry or find_classes(include=self.include_filter,
                                                 exclude=self.exclude_filter)

        return build_pipelines(
            input=Tuple(self.input, self.output),
            output=self.output,
            registry=registry,
        )
Exemplo n.º 5
0
    def run(self, input: Sentence()) -> Tuple(List(Word()), List(Flags())):
        tokenized = self.nlp(input)

        tokens = []
        flags = []

        for token in tokenized:
            token_flags = {}
            if self.extract_lemma:
                token_flags["lemma"] = token.lemma_
            if self.extract_pos_tag:
                token_flags["pos"] = token.pos_

                for kv in token.tag_.split("|"):
                    kv = kv.split("=")
                    if len(kv) == 2:
                        token_flags["tag_" + kv[0]] = kv[1]
                    else:
                        token_flags["tag_" + kv[0]] = True

            if self.extract_dep:
                token_flags["dep"] = token.dep_
            if self.extract_entity:
                token_flags["ent_type"] = token.ent_type_
                token_flags["ent_kb_id"] = token.ent_kb_id_
            if self.extract_details:
                token_flags["is_alpha"] = token.is_alpha
                token_flags["is_ascii"] = token.is_ascii
                token_flags["is_digit"] = token.is_digit
                token_flags["is_lower"] = token.is_lower
                token_flags["is_upper"] = token.is_upper
                token_flags["is_title"] = token.is_title
                token_flags["is_punct"] = token.is_punct
                token_flags["is_left_punct"] = token.is_left_punct
                token_flags["is_right_punct"] = token.is_right_punct
                token_flags["is_space"] = token.is_space
                token_flags["is_bracket"] = token.is_bracket
                token_flags["is_quote"] = token.is_quote
                token_flags["is_currency"] = token.is_currency
                token_flags["like_url"] = token.like_url
                token_flags["like_num"] = token.like_num
                token_flags["like_email"] = token.like_email
                token_flags["is_oov"] = token.is_oov
                token_flags["is_stop"] = token.is_stop
            if self.extract_sentiment:
                token_flags["sentiment"] = token.sentiment

            tokens.append(token.text)
            flags.append(token_flags)

        return tokens, flags
import logging

logging.basicConfig(level=logging.DEBUG)


@nice_repr
class A:
    def run(self, input: Sentence()) -> List(Word()):
        pass


@nice_repr
class B:
    def run(self, input: List(Word())) -> List(Vector()):
        pass


@nice_repr
class C:
    def run(self, input: List(Vector())) -> Matrix():
        pass


builder = build_pipelines(input=Tuple(Sentence(), Vector()),
                          output=Matrix(),
                          registry=[A, B, C])

pipeline = builder.sample()
print(pipeline)
print(pipeline.run([[[True], [False, True]]]))
Exemplo n.º 7
0
 def run(
     self, input: Tuple(Sentence(), List(Tuple(Entity(), Entity(), Category())))
 ) -> Tuple(List(Vector()), CategoricalVector()):
     pass
Exemplo n.º 8
0
 def run(
     self, input: Tuple(Sentence(), List(Entity()))
 ) -> Tuple(List(Word()), List(Postag())):
     pass
Exemplo n.º 9
0
 def run(
     self, input: Tuple(List(MatrixContinuousDense()), List(List(Postag())))
 ) -> List(List(Postag())):
     return super().run(input)
Exemplo n.º 10
0
 def run(
     self, input: Tuple(Tensor3(),
                        CategoricalVector())) -> CategoricalVector():
     return super().run(input)
Exemplo n.º 11
0
 def run(
     self, input: Tuple(MatrixContinuousDense(), CategoricalVector())
 ) -> CategoricalVector():
     return super().run(input)
Exemplo n.º 12
0
 def run(
     self, input: Tuple(MatrixContinuousDense(), CategoricalVector())
 ) -> CategoricalVector():
     X, y = input
     return y
Exemplo n.º 13
0
 def run(
     self, input: Tuple(List(List(Flags())), List(List(Category())))
 ) -> List(List(Category())):
     return SklearnEstimator.run(self, input)