def test_count_char1(self):
     """
     统计特殊字符数测试(转义字符)
     """
     filename = "test_count_char1.txt"
     dir = os.getcwd() + "/" + filename
     str = "1\r\n2\n3\'\t4\"5\f\a"
     with open(dir, "w", encoding="utf-8") as f:
         f.write(str)
     WordCount.file_read_out(filename, "testout_count_char1")
 def test_count_row1(self):
     """
     行数测试(有空行包括非空白字符)
     """
     str = "whuihu\n\t\n     \nwww"
     filename = "test_count_row1.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         f.write(str)
     WordCount.file_read_out(filename, "testout_count_row1")
 def test_count_row(self):
     """
     行数测试(只有换行符情况)
     """
     str = "whuihu\n\n\nwww"
     filename = "test_count_row.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         f.write(str)
     WordCount.file_read_out(filename, "testout_count_row")
Exemplo n.º 4
0
 def test_count_words(self):
     '''
     统计单词词频测试
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have a brother.have a brother. He is four years older than me. Now he is fifteen years old, and he is a student of Grade Nine. He is tall and handsome. His classmates like playing with him. He works hard in study. His teachers speak highly of him. Besides, basketball and running are his favorites."
     filename = "test_count_words.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding='utf-8') as f:
         f.write(str)
     WordCount.count_file(dir, "output_count_words.txt")
Exemplo n.º 5
0
 def test_count_word(self):
     '''
     统计单词数测试  包含对特殊字符的测试
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have! a \t \r brother? \n He is four years older than me. "
     filename = "test_count_word.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir,"w",encoding='utf-8') as f:
         f.write(str)
     WordCount.count_file(dir,"output_count_word.txt")
Exemplo n.º 6
0
 def test_count_char(self):
     '''
     统计字符数测试 包含对特殊字符的测试
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have! a \t \r brother? \n"
     filename = "test_count_char.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir,"w",encoding="utf-8") as f:
         f.write(str)
     WordCount.count_file(dir,"output_count_char.txt")
Exemplo n.º 7
0
 def test_count_line(self):
     '''
     统计文件行数
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "hello\nworld\t\nwwwwI \thave! a \t \r brother? \naaaaa\n"
     filename = "test_count_line.txt"
     dir = os.getcwd()+"/"+filename
     with open(dir,"w",encoding="utf-8") as f:
         f.write(str)
     WordCount.count_file(dir,"output_count_line.txt")
    def _test_word_count(self, projname, running_median_method):

        # call
        WordCount.main(indir=self._indir(projname),
                       outdir=self._outdir(projname),
                       running_median_method=running_median_method)

        # assert
        self.assertFilesEqual(os.path.join(self._expdir(projname), self.exp_wc_filename),
                              os.path.join(self._outdir(projname), self.exp_wc_filename))
        self.assertFilesEqual(os.path.join(self._expdir(projname), self.exp_rm_filename),
                              os.path.join(self._outdir(projname), self.exp_rm_filename))
 def test_count_word2(self):
     """
     统计单词数测试(判断是不是单词情况)
     """
     filename = "test_count_word2.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         str = "123file;wwww;wWwW,file123;file;fil,\n"
         i = 0
         while i < 100:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_word2")
Exemplo n.º 10
0
 def test_count_word1(self):
     """
     统计单词数测试(英文大小写不区分情况)
     """
     filename = "test_count_word1.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         str = "WwwW;wwww;wWwW,yyyyyy\n"
         i = 0
         while i < 100:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_word1")
Exemplo n.º 11
0
 def test_count_word(self):
     """
     统计单词数测试(一般情况)
     """
     filename = "test_count_word.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         str = "file123,filE,ahbyfgy12\n"
         i = 0
         while i < 100:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_word")
Exemplo n.º 12
0
 def test_huge_data(self):
     """
     测试大数据量100000  0.645s
     """
     filename = "test_huge_data.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         str = "test test\n"
         while i < 100000:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_huge_data")
Exemplo n.º 13
0
 def test_count_fword3(self):
     """
     统计最多的10个单词及其词频测试(大小写测试)
     """
     str = ('WWWWW EGFERGeeeeeWW\n')
     filename = "test_count_fword3.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         while i < 20:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_fword3")
Exemplo n.º 14
0
 def test_count_fword(self):
     """
     统计最多的10个单词及其词频测试(未超过10个单词)
     """
     str = ('filr yyyyt  NUgYTR OOOO NUGYTR ttyw buiygy TCTihrr\n')
     filename = "test_count_fword.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         while i < 20:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_fword")
Exemplo n.º 15
0
 def test_count_fword2(self):
     """
     统计最多的10个单词及其词频测试(频率相同的单词,优先输出字典序靠前的单词)
     """
     str = ('windows95 windows95 windows98 windows96 ' 'windows2000\n')
     filename = "test_count_fword2.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         while i < 20:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_fword2")
Exemplo n.º 16
0
 def test_word(self):
     '''
     单独测试单词总数
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have! a \t \r brother? \n He is four years older than me. "
     filename = "s_test_count_word.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir,"w",encoding='utf-8') as f:
         f.write(str)
     s = WordCount.count_word(dir)
     WordCount.clear_file("s_output_count_word.txt")
     WordCount.out_file("s_output_count_word.txt", s)
Exemplo n.º 17
0
 def test_chars(self):
     '''
     单独测试总字符数
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have! a \t \r brother? \n"
     filename = "s_test_count_char.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         f.write(str)
     s = WordCount.count_chars(dir)
     WordCount.clear_file("s_output_count_chars.txt")
     WordCount.out_file("s_output_count_chars.txt",s)
Exemplo n.º 18
0
    def _test_word_count(self, projname, running_median_method):

        # call
        WordCount.main(indir=self._indir(projname),
                       outdir=self._outdir(projname),
                       running_median_method=running_median_method)

        # assert
        self.assertFilesEqual(
            os.path.join(self._expdir(projname), self.exp_wc_filename),
            os.path.join(self._outdir(projname), self.exp_wc_filename))
        self.assertFilesEqual(
            os.path.join(self._expdir(projname), self.exp_rm_filename),
            os.path.join(self._outdir(projname), self.exp_rm_filename))
Exemplo n.º 19
0
 def test_count_fword1(self):
     """
     统计最多的10个单词及其词频测试(超过10个单词,最终只输出频率最高的10个)
     """
     str = ('windows95 windows95 windows98 windows96 '
            'windows2000 teee file123 123file file325 file666 '
            'filr yyyyt  NUGYTR OOOO NUGYTR ttyw buiygy TCTihrr\n')
     filename = "test_count_fword1.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         while i < 20:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_fword1")
Exemplo n.º 20
0
def count(string='basic statistics',
          tBeg=TimeConversion.default_begin_time(),
          tEnd=TimeConversion.default_end_time()):
    '''
    Counts the number of scores of word string.
    '''

    # Get data.
    csvfile_list = csvFile.read_file()[1:]
    Ids = [each[0] for each in csvfile_list]
    reviews = [each[1] for each in csvfile_list]
    rates = [each[2] for each in csvfile_list]
    timeStamp = [each[3] for each in csvfile_list]
    if string == 'basic statistics' :
        return WordCount.basic_count(string, tBeg, tEnd, Ids,
                                     reviews, timeStamp, rates)
    else:
        return WordCount.word_count(string, tBeg, tEnd, Ids,
                                reviews, timeStamp, rates)
Exemplo n.º 21
0
def count(string='basic statistics',
          tBeg=TimeConversion.default_begin_time(),
          tEnd=TimeConversion.default_end_time()):
    '''
    Counts the number of scores of word string.
    '''

    # Get data.
    csvfile_list = csvFile.read_file()[1:]
    Ids = [each[0] for each in csvfile_list]
    reviews = [each[1] for each in csvfile_list]
    rates = [each[2] for each in csvfile_list]
    timeStamp = [each[3] for each in csvfile_list]
    if string == 'basic statistics':
        return WordCount.basic_count(string, tBeg, tEnd, Ids, reviews,
                                     timeStamp, rates)
    else:
        return WordCount.word_count(string, tBeg, tEnd, Ids, reviews,
                                    timeStamp, rates)
Exemplo n.º 22
0
 def test_count_char(self):
     """
     统计多字符数测试(一般情况)
     """
     filename = "test_count_char.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         j = 0
         str = ""
         while i < 100:
             str += "test,"
             i += 1
             j += 1
             if j % 5 == 0:
                 j = 0
                 str += "\n"
                 f.write(str)
                 str = ""
         f.write(str)
     WordCount.file_read_out(filename, "testout_count_char")
 def test_add(self):
     self.assertEqual(WordCount.WordCount("I am testing this function"), 5)
     self.assertEqual(
         WordCount.WordCount("But this function assumes proper grammar"), 6)
     self.assertEqual(WordCount.WordCount("So"), 1)
     self.assertEqual(WordCount.WordCount("I must"), 2)
     self.assertEqual(WordCount.WordCount("Maintain the proper"), 3)
     self.assertEqual(WordCount.WordCount("Grammar or else the"), 4)
     self.assertEqual(
         WordCount.WordCount("Program totally will not work for me"), 7)
def WordCount_case(x):
    assert WordCount("Hello,") == 1
    assert WordCount("Can you hear me?") == 4
    assert WordCount("I'm in California") == 3
    assert WordCount("Dreaming of who we used to be") == 7
    assert WordCount("When we were younger") == 4
    assert WordCount("and free") == 2
def sepfreq(path, filename, speCharList):
    file = open(path + '\\' + filename, 'r+', encoding="ISO-8859-1")
    str = file.read()
    #清楚空格
    str = re.sub(' ', '', str)
    #转为字符列表
    testSet = list(str)
    alltime = 0
    for spechar in speCharList:
        rel = WordCount.word_count(path, filename, spechar)
        alltime = alltime + rel
        if (len(testSet) == 0):
            return 0
        else:
            return alltime / len(testSet)
Exemplo n.º 26
0
 def __init__(self):
     self.wc = WordCount.WordsCount()
     self.wordsDic = dict()
     self.filter = ['a', 'the', 'to']
 def test_missing_input_directory(self):
     with self.assertRaises(Exception):
         WordCount.main(indir='does_not_exist')
Exemplo n.º 28
0
        '--output', type=str, help='output file that contains all the logging (Default: logs.txt)', default="logs.txt")
    args = parser.parse_args()

    wiki_reader = WikiReader(args.input)
    macroCMS = {}
    mapping_distribution = {}
    log_file = open(args.output, 'w', encoding='utf-8')

    for cat in macro_categories:
        macroCMS[cat] = CountMinSketch(
            fraction=0.0005, tolerance=0.0001, allowed_failure_probability=0.01)
        mapping_distribution[cat] = 0

    cnt = 0
    time_start = time.time()
    mrJob = WordCount.WikiWordCount(args=[article_list])
    for page_dict in wiki_reader:
        with open(tmp_file, 'w', encoding='utf-8') as f:
            if page_dict['revision']['text'].startswith('#REDIRECT'):
                continue
            f.write(page_dict['revision']['text'])

        cnt += 1
        if cnt < int(args.skip):
            continue

        if cnt > int(args.parse):
            break

        open(output_file, 'w').close()
        mrJob.run_job()
Exemplo n.º 29
0
 def test_three(self):
     with pytest.raises(AttributeError):
         assert WordCount.wordCount(2)
Exemplo n.º 30
0
            ##每日公告赋值完成
            ###计数
            NowFile = NowFile + 1
            print('%d/%d' % (NowFile, TotalFile))
            ###

    ###此时DateAnncDict已经准备完成
    try:
        del AnncSingleData  #释放寄存字典
    except UnboundLocalError:
        pass
    ###由{日期:{标题:[词,词,词],标题:[词,词,词]}}
    ###获得
    ###{日期:{词:词数,词:词数}}
    ###每日字典更新
    DateDict = WordCount.WordCount(DateAnncDict)
    return DateDict, DateAnncDict
    ###


# ##测试
# if __name__ == "__main__":
#     str="没门"
#     #str=str.encode('utf-8')
#     # DateAnncDict={'20170101':str}
#     DateCrawled=['20190531']#要处理的日期
#     DIR="爬取文件"#路径
#     DateDict,DateAnncDict=InfoPreprocessor(DIR,DateCrawled)
#     for DateAnnc in DateAnncDict['20190531']:
#         # for Annc in DateAnnc.keys():
#         #     print Annc
Exemplo n.º 31
0
 def test_one(self):
     x = "This is an activity"
     assert WordCount.wordCount(x) == 4
Exemplo n.º 32
0
 def test_two(self):
     assert WordCount.wordCount() == 0
Exemplo n.º 33
0
def callRiskTimes(path,filename,functionList):
    alltime = 0
    for func in functionList:
        rel = WordCount.word_count(path, filename, func)
        alltime = alltime + rel
    return alltime