Пример #1
0
#coding=utf-8




from b2 import system2
import xlrd


system2.reload_utf8()

xls = xlrd.open_workbook("texts/成都女司机data_3.xlsx")
for booksheet in xls.sheets():
    for row in xrange(booksheet.nrows):
        print booksheet.cell(row, booksheet.ncols - 1).value
Пример #2
0
#coding=utf-8

import sys
import os
import re
import collections
import json
import math
from b2 import file2
from optparse import OptionParser
from b2 import system2

system2.reload_utf8()


def print_msg(msg):
    sys.stdout.write("%s\n" % msg.encode("utf-8"))


def word_split(input_file_path, save_path, ngram=2):
    word_freq = collections.defaultdict(int)
    if input_file_path == "stdin":
        inputs = sys.stdin
    else:
        inputs = file2.FilesRead([input_file_path])
    for line in inputs:
        words = line.rstrip().replace(",", " ").replace("。", " ").split()
        for word in words:
            try:
                word = word.decode("utf-8")
            except Exception: