Python load_scws 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: xapian_weibo.utils

메소드/함수: load_scws

hotexamples.com에서의 예제들: 4

Python load_scws - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 xapian_weibo.utils.load_scws에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: xapian_zmq_work.py 프로젝트: ashhher3/xapian_weibo

    poller = zmq.Poller()
    poller.register(receiver, zmq.POLLIN)
    poller.register(controller, zmq.POLLIN)

    parser = ArgumentParser()
    parser.add_argument('-r', '--remote_stub', action='store_true', help='remote stub')
    args = parser.parse_args(sys.argv[1:])
    remote_stub = args.remote_stub

    dbpath = XAPIAN_DB_PATH
    xapian_indexer = XapianIndex(dbpath, SCHEMA_VERSION, remote_stub)

    fill_field_funcs = []
    from consts import XAPIAN_EXTRA_FIELD
    from triple_sentiment_classifier import triple_classifier

    def fill_sentiment(item):
        sentiment = triple_classifier(item)
        item[XAPIAN_EXTRA_FIELD] = sentiment
        return item
    fill_field_funcs.append(fill_sentiment)

    s = load_scws()

    def cut_text(item):
        text = item['text'].encode('utf-8')
        item['terms'] = cut(s, text, cx=False)
        return item
    fill_field_funcs.append(cut_text)
    xapian_index_forever(xapian_indexer, receiver, controller, poller, fill_field_funcs=fill_field_funcs)

예제 #2

파일 보기

파일: word_seg_server.py 프로젝트: MOON-CLJ/utils4scrapy

#! /usr/bin/env python
# -*- coding: utf-8 -*-

import eventlet
from eventlet import wsgi
from xapian_weibo.utils import load_scws
import json
import urllib

JSON_HEADER = [('content-Type', 'application/json;charset=UTF-8'),
               ("Access-Control-Allow-Origin", "*"),
               ('Server', 'WDC-eventlet')]

s = load_scws()


def cut(text, f=None):
    global s
    if f:
        return [
            token[0].decode('utf-8') for token in s.participle(text)
            if token[1] in f and (token[0].isalnum() or len(token[0]) > 3)
        ]
    else:
        return [
            token[0].decode('utf-8') for token in s.participle(text)
            if token[0].isalnum() or len(token[0]) > 3
        ]


def word_seg(env, start_response):

예제 #3

파일 보기

파일: triple_sentiment_classification.py 프로젝트: huxiaoqian/project

import nltk
import re
from gensim import corpora, models, similarities
import math
import string
from nltk import probability
from nltk.probability import FreqDist
import cPickle as pickle
import leveldb
from xapian_weibo.xapian_backend import XapianSearch
from xapian_weibo.xapian_backend_extra import _load_weibos_from_xapian
from xapian_weibo.utils import load_scws
from xapian_weibo.utils import cut


cut_str = load_scws()

##情绪类标
HAPPY = 1
ANGRY = 2
SAD = 3


def emoticon(zan_set, angry_set, sad_set, text):
    """ text是微博文本，不是关键词"""

    emotion_pattern = r'\[(\S+?)\]'
    remotions = re.findall(emotion_pattern, text)
    zan = 0
    angry = 0
    sad = 0

예제 #4

파일 보기

# -*- coding: utf-8 -*-

#  gathering snmp data
from __future__ import division
import re
import opencc
import os
from gensim import corpora
import cPickle as pickle
from xapian_weibo.utils import load_scws, cut, load_emotion_words

AB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')

cut_str = load_scws()

cc = opencc.OpenCC('s2t', opencc_path='/usr/bin/opencc')
emotions_words = load_emotion_words()
emotions_words = [unicode(e, 'utf-8') for e in emotions_words]
t_emotions_words = [cc.convert(e) for e in emotions_words]
emotions_words.extend(t_emotions_words)
emotions_words = [w.encode('utf-8') for w in emotions_words]
emotions_words_set = set(emotions_words)
emotion_pattern = re.compile(r'\[(\S+?)\]')


def if_emoticoned_weibo(r):
    # 微博是否包含指定的表情符号集
    emotions = re.findall(emotion_pattern, r['text'])
    is_emoticoned = 1 if set(emotions) & emotions_words_set else 0
    return is_emoticoned