Python WordMatrix示例

编程语言: Python

命名空间/包名称: matrix_management

类/类型: WordMatrix

hotexamples.com的示例: 5

Python WordMatrix - 已找到5个示例。这些是从开源项目中提取的最受好评的matrix_management.WordMatrix现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_tokens(2)

WordMatrix(1)

add(1)

get(1)

kn_columns(1)

kn_cooccurences(1)

示例#1

显示文件

文件： converter.py 项目： VOVAN1993/python2012

tokens_filtered = []
for token in tokens:
    if token in stopwords.words('english'):
        tokens_filtered += ["*"]
    else:
        tokens_filtered += [ token ]

# stemming
#normalized_tokens = [stemmer.stem(token) for token in tokens if token not in stopwords.words('english')]
normalized_tokens = [stemmer.stem(token) for token in tokens_filtered]

print "Tokens set filtered and stemmed :", normalized_tokens

window_size = 10
matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second], window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

print "Co-occurence counted"

示例#2

显示文件

tokens_filtered = []
for token in tokens:
    if token in stopwords.words('english'):
        tokens_filtered += ["*"]
    else:
        tokens_filtered += [token]

# stemming
#normalized_tokens = [stemmer.stem(token) for token in tokens if token not in stopwords.words('english')]
normalized_tokens = [stemmer.stem(token) for token in tokens_filtered]

print "Tokens set filtered and stemmed :", normalized_tokens

window_size = 10
matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second],
                       window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

示例#3

显示文件

文件： converter.py 项目： A-Kulikov/python2012

tokens_filtered = []
for token in tokens:
    if token in stopwords.words('english'):
        tokens_filtered += ["*"]
    else:
        tokens_filtered += [ token ]

# stemming
#normalized_tokens = [stemmer.stem(token) for token in tokens if token not in stopwords.words('english')]
normalized_tokens = [stemmer.stem(token) for token in tokens_filtered]

print "Tokens set filtered and stemmed :", normalized_tokens

window_size = 10
matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second], window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

print "Co-occurence counted"

示例#4

显示文件

文件： converter.py 项目： kimank/python2012

print

# maybe should first do sent_tokenize, then word_tokenize
tokens = word_tokenize(text)
normalized_tokens = []

# i chose the one everybody knows
stemmer = PorterStemmer()

# tokenization and stemming
for token in tokens:
    normalized_tokens += [stemmer.stem(token)]

window_size = 5

matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start : win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second], window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1

# todo: tabs stuff, cool printing

示例#5

显示文件

文件： converter.py 项目： viosng/python2012

print

# maybe should first do sent_tokenize, then word_tokenize
tokens = word_tokenize(text)
normalized_tokens = []

# i chose the one everybody knows
stemmer = PorterStemmer()

# tokenization and stemming
for token in tokens:
    normalized_tokens += [stemmer.stem(token)]

window_size = 5

matrix = WordMatrix()

win_start = 0
while win_start + window_size <= len(normalized_tokens):
    window = normalized_tokens[win_start:win_start + window_size]
    first = 0
    second = 1
    while first < len(window):
        second = first + 1
        while second < len(window):
            matrix.add(window[first], window[second],
                       window_size - second + first + 1)
            second += 1
        first += 1
    win_start += 1