Пример #1
0
def linesfilter(postName, commentName, postfix):
    fpost = open(postName)
    fcomment = open(commentName)
    postLine = fpost.readlines()
    commentLine = fcomment.readlines()
    #can remove some lines which is too long..
    assert (len(postLine) == len(commentLine))
    postTextLines = _getTextList(postLine)
    commentTextLines = _getTextList(commentLine)
    postLineWrite = []
    commentLineWrite = []
    maxlen = 200
    for i in xrange(len(postTextLines)):
        if len(postTextLines[i]) > maxlen or len(commentTextLines[i]) > maxlen :
            print postTextLines[i], len(postTextLines[i])
            print 'too long ', i
            continue
        text = postTextLines[i]
        templine = Filter.urlFilter(text)
        templine = Filter.spaceFilter(templine)
        postLineWrite.append(templine + '\n')

        text = commentTextLines[i]
        templine = Filter.urlFilter(text)
        templine = Filter.spaceFilter(templine)
        commentLineWrite.append(templine + '\n')

    filteredPostName = postName + postfix
    filteredCommentName = commentName + postfix
    print filteredPostName
    fpost = open(filteredPostName, 'w')
    fcomment = open(filteredCommentName, 'w')
    fpost.writelines(postLineWrite)
    fcomment.writelines(commentLineWrite)
Пример #2
0
def testSpaceFilter(contents):
    filted = Filter.spaceFilter(text)
    return filted