示例#1
0
 def __init__(
     self,
     tokenizer: algorithm(Document(), List(Sentence())),
     feature_extractor: algorithm(Sentence(), Flags()),
 ):
     self.tokenizer = tokenizer
     self.feature_extractor = feature_extractor
示例#2
0
 def __init__(
     self,
     tokenizer: algorithm(Sentence(), List(Word())),
     feature_extractor: algorithm(Word(), Flags()),
     include_text: Boolean(),
 ):
     self.tokenizer = tokenizer
     self.feature_extractor = feature_extractor
     self.include_text = include_text
示例#3
0
    def run(self, input: Sentence()) -> Flags():
        tokens = self.tokenizer.run(input)
        flags = [self.feature_extractor(w) for w in tokens]

        if self.include_text:
            return {
                f"{w}|{f}": v for w, flag in zip(tokens, flags) for f, v in flag.items()
            }
        else:
            return {f: v for flag in flags for f, v in flag.items()}
示例#4
0
    def run(self, input: Sentence()) -> Tuple(List(Word()), List(Flags())):
        tokenized = self.nlp(input)

        tokens = []
        flags = []

        for token in tokenized:
            token_flags = {}
            if self.extract_lemma:
                token_flags["lemma"] = token.lemma_
            if self.extract_pos_tag:
                token_flags["pos"] = token.pos_

                for kv in token.tag_.split("|"):
                    kv = kv.split("=")
                    if len(kv) == 2:
                        token_flags["tag_" + kv[0]] = kv[1]
                    else:
                        token_flags["tag_" + kv[0]] = True

            if self.extract_dep:
                token_flags["dep"] = token.dep_
            if self.extract_entity:
                token_flags["ent_type"] = token.ent_type_
                token_flags["ent_kb_id"] = token.ent_kb_id_
            if self.extract_details:
                token_flags["is_alpha"] = token.is_alpha
                token_flags["is_ascii"] = token.is_ascii
                token_flags["is_digit"] = token.is_digit
                token_flags["is_lower"] = token.is_lower
                token_flags["is_upper"] = token.is_upper
                token_flags["is_title"] = token.is_title
                token_flags["is_punct"] = token.is_punct
                token_flags["is_left_punct"] = token.is_left_punct
                token_flags["is_right_punct"] = token.is_right_punct
                token_flags["is_space"] = token.is_space
                token_flags["is_bracket"] = token.is_bracket
                token_flags["is_quote"] = token.is_quote
                token_flags["is_currency"] = token.is_currency
                token_flags["like_url"] = token.like_url
                token_flags["like_num"] = token.like_num
                token_flags["like_email"] = token.like_email
                token_flags["is_oov"] = token.is_oov
                token_flags["is_stop"] = token.is_stop
            if self.extract_sentiment:
                token_flags["sentiment"] = token.sentiment

            tokens.append(token.text)
            flags.append(token_flags)

        return tokens, flags
示例#5
0
    def __init__(
            self,
            lowercase: Boolean(),
            stopwords_remove: Boolean(),
            binary: Boolean(),
            inner_tokenizer: algorithm(Sentence(), List(Word())),
            inner_stemmer: algorithm(Word(), Stem()),
            inner_stopwords: algorithm(List(Word()), List(Word())),
    ):
        self.stopwords_remove = stopwords_remove
        self.inner_tokenizer = inner_tokenizer
        self.inner_stemmer = inner_stemmer
        self.inner_stopwords = inner_stopwords

        SklearnTransformer.__init__(self)
        _CountVectorizer.__init__(self, lowercase=lowercase, binary=binary)
示例#6
0
# The next line will print all the algorithms that AutoGOAL found
# in the `contrib` library, i.e., anything that could be potentially used
# to solve an AutoML problem.

for cls in find_classes():
    print("Using: %s" % cls.__name__)

# ## Experimentation

# Instantiate the classifier.
# Note that the input and output types here are defined to match the problem statement,
# i.e., text classification.

classifier = AutoML(
    search_algorithm=PESearch,
    input=List(Sentence()),
    output=CategoricalVector(),
    search_iterations=args.iterations,
    score_metric=f1_score,
    search_kwargs=dict(
        pop_size=args.popsize,
        search_timeout=args.global_timeout,
        evaluation_timeout=args.timeout,
        memory_limit=args.memory * 1024**3,
    ),
)

# This custom logger is used for debugging purposes, to be able later to recover
# the best pipelines and all the errors encountered in the experimentation process.

 def run(self, input: Sentence()) -> List(Word()):
     pass
import logging

logging.basicConfig(level=logging.DEBUG)


@nice_repr
class A:
    def run(self, input: Sentence()) -> List(Word()):
        pass


@nice_repr
class B:
    def run(self, input: List(Word())) -> List(Vector()):
        pass


@nice_repr
class C:
    def run(self, input: List(Vector())) -> Matrix():
        pass


builder = build_pipelines(input=Tuple(Sentence(), Vector()),
                          output=Matrix(),
                          registry=[A, B, C])

pipeline = builder.sample()
print(pipeline)
print(pipeline.run([[[True], [False, True]]]))
示例#9
0
 def run(
     self, input: Tuple(Sentence(), List(Tuple(Entity(), Entity(), Category())))
 ) -> Tuple(List(Vector()), CategoricalVector()):
     pass
示例#10
0
 def __init__(self,
     tokenizer: algorithm(Sentence(), List(Word())),
     token_feature_extractor: algorithm(Word(), Flags()),
     # token_sentence_encoder: algorithm(Word(), )
 ):
     pass
示例#11
0
 def run(
     self, input: Tuple(Sentence(), List(Entity()))
 ) -> Tuple(List(Word()), List(Postag())):
     pass
示例#12
0
 def __init__(self, tokenizer: algorithm(Sentence(), List(Word()))):
     self.tokenizer = tokenizer
示例#13
0
 def run(self, input: List(Sentence())) -> MatrixContinuousSparse():
     return SklearnTransformer.run(self, input)
示例#14
0
 def run(self, input:List(Sentence())) -> Document():
     pass
示例#15
0
 def run(self, input:List(Word())) -> Sentence():
     pass