Пример #1
0
    def __init__(self, tokenize):
        self.tokenize = tokenize
        nof_docs = {
            "pos": sum(1 for _ in docs("train", "pos")),
            "neg": sum(1 for _ in docs("train", "neg")),
        }
        nof_docs_total = sum(nof_docs.values())

        voc = vocabulary(self.tokenize)

        self.dict = {
            "pos": {
                "counter": voc["pos"]
            },
            "neg": {
                "counter": voc["neg"]
            }
        }

        for feel in self.dict:
            self.dict[feel]["nof_tokens"] = sum(
                self.dict[feel]["counter"].values())
            self.dict[feel]["log_prob"] = math.log(nof_docs[feel] /
                                                   nof_docs_total)

        set_pos = set(self.dict["pos"]["counter"])
        set_neg = set(self.dict["neg"]["counter"])
        set_total = set_pos | set_neg

        self.dict["voc_size"] = len(set_total)
Пример #2
0
def measure(classifier, dataset: str = "test", alpha: int = 1):
    marks_pos = [classifier.is_pos(f, alpha) for f in docs(dataset, "pos")]
    marks_neg = [classifier.is_pos(f, alpha) for f in docs(dataset, "neg")]
    ms = measure_metrics(marks_pos, marks_neg)

    print(f"       | Precision | Recall | F-measure")
    print(f' Micro | {ms["P_micro"]:9.2f} | {ms["R_micro"]:6.2f} | {ms["F_micro"]:8.2f}')
    print(f' Macro | {ms["P_macro"]:9.2f} | {ms["R_macro"]:6.2f} | {ms["F_macro"]:8.2f}')
    print()

    return marks_pos, marks_neg
Пример #3
0
def vocabulary(tokenize):
    vocab = {
        "pos": Counter(),
        "neg": Counter(),
    }
    for feel, counter in vocab.items():
        for doc in docs("train", feel):
            counter += tokenize(doc)
    return vocab
def dump_debug():
    """Debug screen (active routes)"""
    if not app.settings.debug:
        abort(400, "Access Denied")
    app.template_vars.update({
        'title': 'Debug information',
        'modules': utils.docs(bottle_default_app())
    })
    return app.template_vars
Пример #5
0
def dump_debug():
    """Debug screen (active routes)"""
    if not app.settings.debug:
        abort(400, "Access Denied")
    app.template_vars.update({
        'title': 'Debug information',
        'modules': utils.docs(bottle_default_app())
    })
    return app.template_vars
Пример #6
0
def index_word_appearancefraction():
    vocabulary = set()
    sets = (list(), list())
    for i, feel in enumerate(["pos", "neg"]):
        for doc in docs(feel=feel):
            toks = tokens(doc)
            vocabulary |= toks
            sets[i].append(toks)
    return {
        "index": {
            word: (sum(1 for s in sets[0]
                       if word in s), sum(1 for s in sets[1] if word in s))
            for word in vocabulary
        },
        "total_nof_docs": tuple([len(s) for s in sets]),
    }
Пример #7
0
def send_doc_wrapper(path):
    """Render documentation for a specific API module"""
    docs = utils.docs()
    if path in docs:
        return {"title": path.title(), "docs": docs[path]}
    abort(404,"Not Found")
Пример #8
0
def send_doc_wrapper(path):
    """Render documentation for a specific API module"""
    docs = utils.docs()
    if path in docs:
        return {"title": path.title(), "docs": docs[path]}
    abort(404, "Not Found")
def dump_debug():
    """Debug screen (active routes)"""
    if not app.config.debug:
        abort(400, "Access Denied")
    app.template_vars.update({"title": "Debug information", "modules": utils.docs(bottle_default_app())})
    return app.template_vars