Python KNN.train примеры использования

Язык программирования: Python

Пространство имен/Пакет: pattern.vector

Класс/Тип: KNN

Метод/Функция: train

Примеров на hotexamples.com: 5

Python KNN.train - 5 примеров найдено. Это лучшие примеры Python кода для pattern.vector.KNN.train, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

KNN(11)

test(4)

train(4)

classify(3)

load(1)

save(1)

Пример #1

Показать файл

def setup():
    global pages
    global urlalias
    global revurlalias
    global knn
    pages = dict()
    urlalias = dict()
    revurlalias = dict()
    knn = KNN()
    db = MySQLdb.connect(host="192.168.200.26",
                         user="******",
                         passwd="xxxsecretxxx",
                         db="pla")
    cur = db.cursor()
    cur.execute("select source, alias from url_alias")
    for row in cur.fetchall():
        urlalias[row[1]] = row[0]
        revurlalias[row[0]] = row[1]
    cur.execute("select tid, name, description, vid from taxonomy_term_data;")
    for row in cur.fetchall():
        url = 'taxonomy/term/' + str(row[0])
        pages[url] = row[1]
        if url in revurlalias:
            pages[revurlalias[url]] = row[1]
            url = revurlalias[url]
        if row[3] == 3:
            soup = bs4.BeautifulSoup(row[2])
            the_text = re.sub(r'[\n\r]+', r'  ', soup.get_text(' ')).lower()
            knn.train(Document(the_text, stemmer=PORTER), url)
            knn.train(Document(row[1].lower()), url)
    cur.execute(
        "select a.tid, c.body_value, d.title from taxonomy_term_data as a inner join field_data_field_practice_areas as b on (a.tid=b.field_practice_areas_tid and b.entity_type='node' and b.bundle != 'professionals' and b.deleted=0) inner join field_data_body as c on (b.entity_id=c.entity_id and b.entity_type=c.entity_type) inner join node as d on (c.entity_id=d.nid);"
    )
    for row in cur.fetchall():
        url = 'taxonomy/term/' + str(row[0])
        if url in revurlalias:
            url = revurlalias[url]
        soup = bs4.BeautifulSoup(row[1])
        the_text = re.sub(r'[\n\r]+', r'  ', soup.get_text(' ')).lower()
        knn.train(Document(the_text, stemmer=PORTER), url)
        knn.train(Document(row[2].lower()), url)
    cur.execute("select nid, title from node where status=1;")
    for row in cur.fetchall():
        url = 'node/' + str(row[0])
        pages[url] = row[1]
        if url in revurlalias:
            pages[revurlalias[url]] = row[1]
    db.close()
    pgcur = conn.cursor()
    pgcur.execute(
        "select query, target from website_queries where target is not null group by query, target"
    )
    for row in pgcur.fetchall():
        words = re.split(r'[\n\r,;]+ *', row[1])
        for word in words:
            print("training on " + row[0].lower() + " for " + word)
            knn.train(Document(row[0].lower()), word)
    conn.commit()
    pgcur.close()

Пример #2

Показать файл

    def train(cls, train_file, model_file):
        sents_dic = (json.loads(jsonl)
                     for jsonl in SoftSkills.load(train_file))
        model = KNN()

        for sent in sents_dic:
            text = sent['text']
            v = count([word for word, pos in tag(text)])  # {'sweet': 1}
            if v:
                model.train(v, type=sent['soft skill'])
        model.save(model_file)
        return model

Пример #3

Показать файл

Файл: 04-KNN.py Проект: ADA110/Cibus

        s = Sentence(parse(s))  # parse tree with part-of-speech tags
        s = search('JJ', s)  # adjectives in the tweet
        s = [match[0].string for match in s]  # adjectives as a list of strings
        s = " ".join(s)  # adjectives as string
        if len(s) > 0:
            m.append(Document(s, type=p, stemmer=None))

# Train k-Nearest Neighbor on the model.
# Note that this is a only simple example: to build a robust classifier
# you would need a lot more training data (e.g., tens of thousands of tweets).
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN(baseline=None)  # By default, baseline=MAJORITY
for document in m:  # (classify unknown documents with the most frequent type).
    classifier.train(document)

# These are the adjectives the classifier has learned:
print sorted(classifier.features)
print

# We can now ask it to classify documents containing these words.
# Note that you may get different results than the ones below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
# If None is returned, the word was not recognized,
# and the classifier returned the default value (see above).
print classifier.classify('sweet potato burger')  # yields 'WIN'
print classifier.classify('stupid autocorrect')  # yields 'FAIL'

# "What can I do with it?"

Пример #4

Показать файл

Файл: twittermining.py Проект: zbhuiyan/SoftDesSp15

            parse(s)
        )  #parse anlayzes & gives strings that are annotated with specified tags
        s = search('JJ',
                   s)  #searches for adjectives in tweets (JJ = adjectiive)
        s = [match[0].string for match in s]
        s = ' '.join(s)
        if len(s) > 0:
            corpus.append(Document(s, type=p))
            corpus.append(Document(s, type=m))

classifier = KNN()  #k-nearest neighbor classifier = K-NN
objects = []

for document in corpus:  #documents are an unordered bag of given sentences.

    classifier.train(
        document)  #adjective vectors in corpus trains the classifier
    objects.append(classifier.classify('awesome'))  #predicts awesome as win
    objects.append(classifier.classify('cool'))  #predicts cool as win
    objects.append(classifier.classify('damn'))  #predicts damn as fail
    objects.append(classifier.classify('sucks'))  #predicts sucks as fail

print objects
wincounter = 0
failcounter = 0
for thing in objects:
    if thing == 'WIN':
        wincounter += 1
    elif thing == 'FAIL':
        failcounter += 1
    else:
        pass

Пример #5

Показать файл

Файл: 04-KNN.py Проект: DataBranner/pattern

        s = search('JJ', s)                  # adjectives in the tweet
        s = [match[0].string for match in s]  # adjectives as a list of strings
        s = " ".join(s)                      # adjectives as string
        if len(s) > 0:
            m.append(Document(s, type=p, stemmer=None))

# Train k-Nearest Neighbor on the model.
# Note that this is a only simple example: to build a robust classifier
# you would need a lot more training data (e.g., tens of thousands of tweets).
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN(baseline=None)  # By default, baseline=MAJORITY
# (classify unknown documents with the most frequent type).
for document in m:
    classifier.train(document)

# These are the adjectives the classifier has learned:
print(sorted(classifier.features))
print()

# We can now ask it to classify documents containing these words.
# Note that you may get different results than the ones below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
# If None is returned, the word was not recognized,
# and the classifier returned the default value (see above).
print(classifier.classify('sweet potato burger'))  # yields 'WIN'
print(classifier.classify('stupid autocorrect'))  # yields 'FAIL'

# "What can I do with it?"