Python unicodify示例

编程语言: Python

命名空间/包名称: util

方法/功能: unicodify

hotexamples.com的示例: 7

Python unicodify - 已找到7个示例。这些是从开源项目中提取的最受好评的util.unicodify现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： frontend.py 项目： AnnuSachan/tweetmotif

def type_clean(val,type):
  if type==bool:
    if val in (False,0,'0','f','false','False','no','n'): return False
    if val in (True,1,'1','t','true','True','yes','y'): return True
    raise Exception("bad bool value %s" % repr(val))
  if type==str or type==unicode:
    # nope no strings, you're gonna get unicode instead!
    return util.unicodify(val)
  return type(val)

示例#2

显示文件

def type_clean(val, type):
    if type == bool:
        if val in (False, 0, '0', 'f', 'false', 'False', 'no', 'n'):
            return False
        if val in (True, 1, '1', 't', 'true', 'True', 'yes', 'y'): return True
        raise Exception("bad bool value %s" % repr(val))
    if type == str or type == unicode:
        # nope no strings, you're gonna get unicode instead!
        return util.unicodify(val)
    return type(val)

示例#3

显示文件

文件： overview.py 项目： brendano/viewdb

def truncate_at(s, max=40):
  s = util.unicodify(s)
  if len(s) > max:
    s = s[:max] + "&hellip;"
  return s

示例#4

显示文件

文件： overview.py 项目： brendano/viewdb

def output(s):
  print util.unicodify(s)

示例#5

显示文件

import sys
sys.path.insert(0, '/usr2/corpora/tweets/tweetmotif')
import twokenize, util, bigrams
util.fix_stdio()
from sane_re import *

AposFix = _R(r"( |^)(' [stm])( |$)")

for line in sys.stdin:
    parts = util.unicodify(line[:-1]).split("\t")
    text = parts[-1]
    toks = twokenize.simple_tokenize(text)
    toked = " ".join(toks)
    #print "\t".join(parts[:-1]) + "\t" + toked
    #try: AposFix.show_match(toked)
    #except: pass
    featstr = AposFix.gsub(toked,
                           lambda m: m[1] + m[2].replace(" ", "") + m[3])
    featstr = featstr.lower()
    toks = featstr.split()
    feats = [ug[0] for ug in bigrams.filtered_unigrams(toks)]
    feats += ["_".join(ng) for ng in bigrams.filtered_bigrams(toks)]

    print "\t".join(parts[:-1]) + "\t" + util.unicodify(" ".join(feats))

示例#6

显示文件

文件： watch.py 项目： jasontyu/bird-watcher

from __future__ import print_function
import json
import tweepy
import datetime  # used for time stamping program start/stop
import traceback

from util import unicodify

# Attributes I don't want
UNWANTED_ATTR = [
    "contributors", "current_user_retweet", "favorited", "geo", "id",
    "in_reply_to_status_id", "lang", "quoted_status_id", "retweeted", "source",
    "in_reply_to_user_id", "entities", "extended_entities",
    "in_reply_to_screen_name"
]
UNWANTED_ATTR = unicodify(UNWANTED_ATTR)

# Nested attributes that I want
WANTED_NESTED_ATTR = {
    "user": [
        "followers_count", "friends_count", "geo_enabled", "id_str",
        "location", "protected", "time_zone", "statuses_count", "created_at"
    ],
    "extended_tweet": ["full_text"]
}
WANTED_NESTED_ATTR = unicodify(WANTED_NESTED_ATTR)


def trim_tweet(tweet_json):
    """Trims and returns a tweet (JSON object).

示例#7

显示文件

文件： fe.py 项目： AnnuSachan/tweetmotif

import sys
sys.path.insert(0,'/usr2/corpora/tweets/tweetmotif')
import twokenize,util,bigrams
util.fix_stdio()
from sane_re import *

AposFix = _R(r"( |^)(' [stm])( |$)")


for line in sys.stdin:
  parts = util.unicodify(line[:-1]).split("\t")
  text = parts[-1]
  toks = twokenize.simple_tokenize(text)
  toked = " ".join(toks)
  #print "\t".join(parts[:-1]) + "\t" + toked
  #try: AposFix.show_match(toked)
  #except: pass
  featstr = AposFix.gsub(toked, lambda m: m[1]+m[2].replace(" ","")+m[3])
  featstr = featstr.lower()
  toks = featstr.split()
  feats = [ug[0] for ug in bigrams.filtered_unigrams(toks)]
  feats += ["_".join(ng) for ng in bigrams.filtered_bigrams(toks)]

  print "\t".join(parts[:-1]) + "\t" + util.unicodify(" ".join(feats))