Python SEG.SEG примеры использования

Язык программирования: Python

Пространство имен/Пакет: smallseg

Класс/Тип: SEG

Метод/Функция: SEG

Примеров на hotexamples.com: 5

Python SEG.SEG - 5 примеров найдено. Это лучшие примеры Python кода для smallseg.SEG.SEG, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

cut(6)

SEG(5)

set(2)

Основные методы

cut (6)

SEG (5)

set (2)

Пример #1

Показать файл

Файл: myseg.py Проект: chaoswork/web-classify

def seg(text):
    seg = SEG()
    wlist = seg.cut(text)
    word_nums = {}

    for w in wlist:
        if len(w) < 2: continue
        if word_nums.has_key(w):
            word_nums[w] += 1
        else:
            word_nums[w] = 1

    return word_nums.items()

Пример #2

Показать файл

def get_data():
    '''
	Get the training and text datasets from local folds
	Positive and negative datasets were stored in different folds
	When loading the datasets , do sentences segmentation with smallseg tool
	'''
    posPath = '/home/zhouxc/skindetector/AdultWebsiteText/'
    negPath = '/home/zhouxc/skindetector/NormalWebsiteText/'
    posFiles = os.listdir(posPath)
    negFiles = os.listdir(negPath)

    trainingData = []
    seg = SEG()
    seg.set(dic)
    c = 0
    print '---------------------Read Positive DataSet-----------------'
    for fileName in posFiles:
        #if c > 100: break
        c += 1
        print "PositiveData" + str(c)
        path = posPath + fileName
        data = seg.cut(open(path).read())
        text = [
            word.encode('utf-8') for word in data
            if word.encode('utf-8') in pornDict
        ]
        trainingData.append((text, 'Positive'))
    print '---------------------Positive DataSet done-----------------'
    c = 0

    print '---------------------Read Negative DataSet-----------------'
    for fileName in negFiles:
        #if c > 100:	break
        c += 1
        print "NegativeData" + str(c)
        path = negPath + fileName
        data = seg.cut(open(path).read())
        text = [
            word.encode('utf-8') for word in data
            if word.encode('utf-8') in pornDict
        ]
        trainingData.append((text, 'Negative'))
    print '--------Negative DataSet  done-----------------------------------'

    return trainingData, trainingData

Пример #3

Показать файл

    def get(self):
        Access_CronJob = True
        headers = self.request.headers.items()

        for key, value in headers:
            if (key == 'X-Appengine-Cron') and (value == 'true'):
                Access_CronJob = True
                break
        # 如果不是CronJob来源的请求，记录日志并放弃操作
        if (not Access_CronJob):
            logging.debug('CronJobCheck() access denied!')
            logging.critical(
                '如果这个请求不是由你手动触发的话，这意味者你的CronJobKey已经泄漏！请立即修改CronJobKey以防被他人利用')
            return

        mydate = datetime.utcnow() + timedelta(hours=+8)
        ts_hour = mydate.time().hour
        ts_min = mydate.time().minute

        dbug = self.request.get('debug')
        logging.debug(dbug)

        # 7:00早安世界
        if (((ts_hour == 7) and (30 <= ts_min <= 32))
                or (dbug == 'morning')):  # 7:00
            error = False
            try:
                wther = weather.weather()
            except weather.FetchError:
                logging.error("Weather Fetch Error!")
                error = True
            msg_idx = random.randint(0, len(config.MSG_GET_UP) - 1)
            if error:
                msg = '%s%s' % (config.MSG_GET_UP[msg_idx], config.BOT_HASHTAG)
            else:
                msg = '%s 今天%s的天气是:%s %s' % \
                    (config.MSG_GET_UP[msg_idx], config.CITY, wther, config.BOT_HASHTAG)

            OAuth_UpdateTweet(msg)  # 早安世界
            logging.info("%s:%d" % (msg, wther))

        # 23:30 晚安世界
        elif ((ts_hour == 23) and (30 <= ts_min <= 32)):  # 23:30
            msg_idx = random.randint(0, len(config.MSG_SLEEP) - 1)
            msg = '%s%s' % (config.MSG_SLEEP[msg_idx], config.BOT_HASHTAG)
            OAuth_UpdateTweet(msg)  # 晚安世界
            logging.info(msg)

        # 每小时一条命令
        elif (((7 <= ts_hour <= 23) and (15 <= ts_min <= 17))
              or (dbug == 'cli')):
            msg = command.random()
            if msg != None:
                msg = msg.replace(
                    "# commandlinefu.com by David Winterbottom\n\n#", "//")
                msg = '%s %s' % ("叮咚！小bot教CLI时间到了！", msg[:-1])
                msg += "#commandlinefu #xdlinux"
                logging.info(msg)
                OAuth_UpdateTweet(msg)

        # 扫TL，转推
        auth = tweepy.OAuthHandler(config.CONSUMER_KEY, config.CONSUMER_SECRET)
        auth.set_access_token(config.ACCESS_TOKEN, config.ACCESS_SECRET)
        api = tweepy.API(auth)

        #since id
        tweetid = SinceID.all().get()
        logging.info(tweetid)
        if (tweetid == None):
            logging.warning("Initial!")
            tweetid = SinceID()
            timeline = api.home_timeline()
        else:
            logging.info("Since ID is: %d" % tweetid.since_id)
            timeline = api.home_timeline(since_id=tweetid.since_id)

        #self.response.out.write('GETTING TIMELINE<br />')
        regx = re.compile(config.RT_REGEX, re.I | re.M)
        mgc = re.compile(config.MGC, re.I | re.M)
        talk_to_me = re.compile(config.TALK, re.I | re.M)
        tweets = timeline[::-1]  # 时间是倒序的
        if tweets == []:
            logging.info("no new tweets!")
            return

        msg = None
        seg = SEG()
        for tweet in tweets:
            user = tweet.user.screen_name
            if user == 'xdtuxbot':
                continue
            text = tweet.text
            n = mgc.search(text)
            if n != None:
                continue

            t = talk_to_me.search(text)
            if (not t) and text[0] == '@':
                continue

            wlist = seg.cut(text.encode('utf-8'))
            logging.info(' '.join(wlist))
            for w in wlist:
                if w in config.RT_LIST:
                    break
            else:
                continue

            if t:
                bot = TalkBot()
                reply = bot.respond(talk_to_me.sub("", text)).decode('UTF-8')
                if reply != '':
                    msg = u"@%s %s" % (user, reply)

            try:
                if msg:
                    OAuth_UpdateTweet(msg, reply=tweet.id)  # 发送到Twitter
                    logging.info('Send Tweet: %s' % (msg))
                else:
                    api.retweet(tweet.id)
            except tweepy.TweepError, e:
                logging.error('Tweepy Error:%s' % e)
            except Exception, e:
                logging.error('Uknow Error:%s' % e)

Пример #4

Показать файл

Файл: zh.py Проект: JustMeliyu/Anger_Sphinx

 def init(self, options):
     print("reading Chiniese dictionary")
     self.seg = SEG()

Пример #5

Показать файл

def idf(word, documentList):
    return math.log(
        len(documentList) / (0.01 + numDocsContaining(word, documentList)))


def tfidf(document, documentList):
    retdict = {}
    for word in document:
        retdict[word] = document.count(word) / float(len(document)) * idf(
            word, documentList)
    return retdict


if __name__ == '__main__':
    seg = SEG()
    documentList = []
    documentList.append(
        seg.cut(
            """新华网布鲁塞尔3月24日电 (记者张伟)北约秘书长拉斯穆森24日晚宣布，北约成员国当天决定在利比亚设立禁飞区，北约将在数天内从美国手中接管对利比亚军事行动指挥权。
　　当天，北约28个成员国大使在布鲁塞尔举行会议，拉斯穆森在会后发表声明宣布了上述决定。
　　声明说，北约所采取的行动是“广泛国际行动的一部分”，旨在保护利比亚平民的安全。声明还说，北约成员国均致力于履行联合国安理会决议所规定的义务，“这也是北约决定承担禁飞区责任的原因”。
　　本月17日，联合国安理会通过第1973号决议，同意在利比亚设立禁飞区。从19日开始，法国、美国和英国等国对利比亚展开军事行动。目前，这一行动由美国指挥，但美方已明确表示希望在本周末把指挥权移交出去。
　　拉斯穆森24日晚在接受美国有线电视新闻网采访时说，北约已做好必要的准备，将在“未来数天内”从美国手中接管禁飞区的行动指挥权，行动将统归北约最高军事长官、欧洲盟军最高司令詹姆斯·斯塔夫里迪斯指挥。
　　拉斯穆森解释说，北约成员国目前只是决定执行设立禁飞区的任务，并正在考虑承担“更为广泛的责任”，但目前尚未做出决定。
　　北约22日决定对利比亚实施武器禁运，来自北约7个成员国的16艘海军舰艇参与这一行动。此前，土耳其、法国和德国一直反对北约在利比亚设立禁飞区，谈判一度陷入僵局。"""
        ))
    words = {}
    for document in documentList:
        words = tfidf(document, documentList)
    for item in sorted(words.items(), key=itemgetter(1)):