Exemplos de WordCount em Python, exemplos de WordCount em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_char1(self):
     """
     统计特殊字符数测试（转义字符）
     """
     filename = "test_count_char1.txt"
     dir = os.getcwd() + "/" + filename
     str = "1\r\n2\n3\'\t4\"5\f\a"
     with open(dir, "w", encoding="utf-8") as f:
         f.write(str)
     WordCount.file_read_out(filename, "testout_count_char1")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_row1(self):
     """
     行数测试(有空行包括非空白字符)
     """
     str = "whuihu\n\t\n     \nwww"
     filename = "test_count_row1.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         f.write(str)
     WordCount.file_read_out(filename, "testout_count_row1")

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_row(self):
     """
     行数测试（只有换行符情况）
     """
     str = "whuihu\n\n\nwww"
     filename = "test_count_row.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         f.write(str)
     WordCount.file_read_out(filename, "testout_count_row")

Exemplo n.º 4

0

Exibir arquivo

 def test_count_words(self):
     '''
     统计单词词频测试
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have a brother.have a brother. He is four years older than me. Now he is fifteen years old， and he is a student of Grade Nine. He is tall and handsome. His classmates like playing with him. He works hard in study. His teachers speak highly of him. Besides， basketball and running are his favorites."
     filename = "test_count_words.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding='utf-8') as f:
         f.write(str)
     WordCount.count_file(dir, "output_count_words.txt")

Exemplo n.º 5

0

Exibir arquivo

 def test_count_word(self):
     '''
     统计单词数测试  包含对特殊字符的测试
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have! a \t \r brother? \n He is four years older than me. "
     filename = "test_count_word.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir,"w",encoding='utf-8') as f:
         f.write(str)
     WordCount.count_file(dir,"output_count_word.txt")

Exemplo n.º 6

0

Exibir arquivo

 def test_count_char(self):
     '''
     统计字符数测试 包含对特殊字符的测试
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have! a \t \r brother? \n"
     filename = "test_count_char.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir,"w",encoding="utf-8") as f:
         f.write(str)
     WordCount.count_file(dir,"output_count_char.txt")

Exemplo n.º 7

0

Exibir arquivo

 def test_count_line(self):
     '''
     统计文件行数
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "hello\nworld\t\nwwwwI \thave! a \t \r brother? \naaaaa\n"
     filename = "test_count_line.txt"
     dir = os.getcwd()+"/"+filename
     with open(dir,"w",encoding="utf-8") as f:
         f.write(str)
     WordCount.count_file(dir,"output_count_line.txt")

Exemplo n.º 8

0

Exibir arquivo

Arquivo: TestWordCount.py Projeto: thomas-schreiter/Insight-Coding-Challenge

    def _test_word_count(self, projname, running_median_method):

        # call
        WordCount.main(indir=self._indir(projname),
                       outdir=self._outdir(projname),
                       running_median_method=running_median_method)

        # assert
        self.assertFilesEqual(os.path.join(self._expdir(projname), self.exp_wc_filename),
                              os.path.join(self._outdir(projname), self.exp_wc_filename))
        self.assertFilesEqual(os.path.join(self._expdir(projname), self.exp_rm_filename),
                              os.path.join(self._outdir(projname), self.exp_rm_filename))

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_word2(self):
     """
     统计单词数测试（判断是不是单词情况）
     """
     filename = "test_count_word2.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         str = "123file;wwww;wWwW,file123;file;fil,\n"
         i = 0
         while i < 100:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_word2")

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_word1(self):
     """
     统计单词数测试（英文大小写不区分情况）
     """
     filename = "test_count_word1.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         str = "WwwW;wwww;wWwW,yyyyyy\n"
         i = 0
         while i < 100:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_word1")

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_word(self):
     """
     统计单词数测试（一般情况）
     """
     filename = "test_count_word.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         str = "file123,filE,ahbyfgy12\n"
         i = 0
         while i < 100:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_word")

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_huge_data(self):
     """
     测试大数据量100000  0.645s
     """
     filename = "test_huge_data.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         str = "test test\n"
         while i < 100000:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_huge_data")

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_fword3(self):
     """
     统计最多的10个单词及其词频测试(大小写测试)
     """
     str = ('WWWWW EGFERGeeeeeWW\n')
     filename = "test_count_fword3.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         while i < 20:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_fword3")

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_fword(self):
     """
     统计最多的10个单词及其词频测试(未超过10个单词)
     """
     str = ('filr yyyyt  NUgYTR OOOO NUGYTR ttyw buiygy TCTihrr\n')
     filename = "test_count_fword.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         while i < 20:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_fword")

Exemplo n.º 15

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_fword2(self):
     """
     统计最多的10个单词及其词频测试(频率相同的单词，优先输出字典序靠前的单词)
     """
     str = ('windows95 windows95 windows98 windows96 ' 'windows2000\n')
     filename = "test_count_fword2.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         while i < 20:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_fword2")

Exemplo n.º 16

0

Exibir arquivo

 def test_word(self):
     '''
     单独测试单词总数
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have! a \t \r brother? \n He is four years older than me. "
     filename = "s_test_count_word.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir,"w",encoding='utf-8') as f:
         f.write(str)
     s = WordCount.count_word(dir)
     WordCount.clear_file("s_output_count_word.txt")
     WordCount.out_file("s_output_count_word.txt", s)

Exemplo n.º 17

0

Exibir arquivo

 def test_chars(self):
     '''
     单独测试总字符数
     :return:
     '''
     warnings.simplefilter('ignore', ResourceWarning)
     str = "I have! a \t \r brother? \n"
     filename = "s_test_count_char.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         f.write(str)
     s = WordCount.count_chars(dir)
     WordCount.clear_file("s_output_count_chars.txt")
     WordCount.out_file("s_output_count_chars.txt",s)

Exemplo n.º 18

0

Exibir arquivo

    def _test_word_count(self, projname, running_median_method):

        # call
        WordCount.main(indir=self._indir(projname),
                       outdir=self._outdir(projname),
                       running_median_method=running_median_method)

        # assert
        self.assertFilesEqual(
            os.path.join(self._expdir(projname), self.exp_wc_filename),
            os.path.join(self._outdir(projname), self.exp_wc_filename))
        self.assertFilesEqual(
            os.path.join(self._expdir(projname), self.exp_rm_filename),
            os.path.join(self._outdir(projname), self.exp_rm_filename))

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_fword1(self):
     """
     统计最多的10个单词及其词频测试(超过10个单词,最终只输出频率最高的10个)
     """
     str = ('windows95 windows95 windows98 windows96 '
            'windows2000 teee file123 123file file325 file666 '
            'filr yyyyt  NUGYTR OOOO NUGYTR ttyw buiygy TCTihrr\n')
     filename = "test_count_fword1.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         while i < 20:
             f.write(str)
             i += 1
     WordCount.file_read_out(filename, "testout_count_fword1")

Exemplo n.º 20

0

Exibir arquivo

Arquivo: main.py Projeto: xuxx09/CustomerEvaluationSystem

def count(string='basic statistics',
          tBeg=TimeConversion.default_begin_time(),
          tEnd=TimeConversion.default_end_time()):
    '''
    Counts the number of scores of word string.
    '''

    # Get data.
    csvfile_list = csvFile.read_file()[1:]
    Ids = [each[0] for each in csvfile_list]
    reviews = [each[1] for each in csvfile_list]
    rates = [each[2] for each in csvfile_list]
    timeStamp = [each[3] for each in csvfile_list]
    if string == 'basic statistics' :
        return WordCount.basic_count(string, tBeg, tEnd, Ids,
                                     reviews, timeStamp, rates)
    else:
        return WordCount.word_count(string, tBeg, tEnd, Ids,
                                reviews, timeStamp, rates)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: main.py Projeto: xuxx09/CustomerEvaluationSystem

def count(string='basic statistics',
          tBeg=TimeConversion.default_begin_time(),
          tEnd=TimeConversion.default_end_time()):
    '''
    Counts the number of scores of word string.
    '''

    # Get data.
    csvfile_list = csvFile.read_file()[1:]
    Ids = [each[0] for each in csvfile_list]
    reviews = [each[1] for each in csvfile_list]
    rates = [each[2] for each in csvfile_list]
    timeStamp = [each[3] for each in csvfile_list]
    if string == 'basic statistics':
        return WordCount.basic_count(string, tBeg, tEnd, Ids, reviews,
                                     timeStamp, rates)
    else:
        return WordCount.word_count(string, tBeg, tEnd, Ids, reviews,
                                    timeStamp, rates)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: liuruij/PersonalProject-Java

 def test_count_char(self):
     """
     统计多字符数测试（一般情况）
     """
     filename = "test_count_char.txt"
     dir = os.getcwd() + "/" + filename
     with open(dir, "w", encoding="utf-8") as f:
         i = 0
         j = 0
         str = ""
         while i < 100:
             str += "test,"
             i += 1
             j += 1
             if j % 5 == 0:
                 j = 0
                 str += "\n"
                 f.write(str)
                 str = ""
         f.write(str)
     WordCount.file_read_out(filename, "testout_count_char")

Exemplo n.º 23

0

Exibir arquivo

Arquivo: test_WordCount2.py Projeto: hotchkma/CS362_ICA_Unittest_Pytest

 def test_add(self):
     self.assertEqual(WordCount.WordCount("I am testing this function"), 5)
     self.assertEqual(
         WordCount.WordCount("But this function assumes proper grammar"), 6)
     self.assertEqual(WordCount.WordCount("So"), 1)
     self.assertEqual(WordCount.WordCount("I must"), 2)
     self.assertEqual(WordCount.WordCount("Maintain the proper"), 3)
     self.assertEqual(WordCount.WordCount("Grammar or else the"), 4)
     self.assertEqual(
         WordCount.WordCount("Program totally will not work for me"), 7)

Exemplo n.º 24

0

Exibir arquivo

Arquivo: test_WordCount.py Projeto: hotchkma/CS362_ICA_Unittest_Pytest

def WordCount_case(x):
    assert WordCount("Hello,") == 1
    assert WordCount("Can you hear me?") == 4
    assert WordCount("I'm in California") == 3
    assert WordCount("Dreaming of who we used to be") == 7
    assert WordCount("When we were younger") == 4
    assert WordCount("and free") == 2

Exemplo n.º 25

0

Exibir arquivo

Arquivo: speChar.py Projeto: HBDforme/Malicious-Javascript-code-dectector

def sepfreq(path, filename, speCharList):
    file = open(path + '\\' + filename, 'r+', encoding="ISO-8859-1")
    str = file.read()
    #清楚空格
    str = re.sub(' ', '', str)
    #转为字符列表
    testSet = list(str)
    alltime = 0
    for spechar in speCharList:
        rel = WordCount.word_count(path, filename, spechar)
        alltime = alltime + rel
        if (len(testSet) == 0):
            return 0
        else:
            return alltime / len(testSet)

Exemplo n.º 26

0

Exibir arquivo

 def __init__(self):
     self.wc = WordCount.WordsCount()
     self.wordsDic = dict()
     self.filter = ['a', 'the', 'to']

Exemplo n.º 27

0

Exibir arquivo

Arquivo: TestWordCount.py Projeto: thomas-schreiter/Insight-Coding-Challenge

 def test_missing_input_directory(self):
     with self.assertRaises(Exception):
         WordCount.main(indir='does_not_exist')

Exemplo n.º 28

0

Exibir arquivo

Arquivo: main.py Projeto: cenh/Wikipedia-Heavy-Hitters

        '--output', type=str, help='output file that contains all the logging (Default: logs.txt)', default="logs.txt")
    args = parser.parse_args()

    wiki_reader = WikiReader(args.input)
    macroCMS = {}
    mapping_distribution = {}
    log_file = open(args.output, 'w', encoding='utf-8')

    for cat in macro_categories:
        macroCMS[cat] = CountMinSketch(
            fraction=0.0005, tolerance=0.0001, allowed_failure_probability=0.01)
        mapping_distribution[cat] = 0

    cnt = 0
    time_start = time.time()
    mrJob = WordCount.WikiWordCount(args=[article_list])
    for page_dict in wiki_reader:
        with open(tmp_file, 'w', encoding='utf-8') as f:
            if page_dict['revision']['text'].startswith('#REDIRECT'):
                continue
            f.write(page_dict['revision']['text'])

        cnt += 1
        if cnt < int(args.skip):
            continue

        if cnt > int(args.parse):
            break

        open(output_file, 'w').close()
        mrJob.run_job()

Exemplo n.º 29

0

Exibir arquivo

Arquivo: test_pytest_wordCount.py Projeto: Bazzikk/activityUnPy

 def test_three(self):
     with pytest.raises(AttributeError):
         assert WordCount.wordCount(2)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: InfoPreprocessor.py Projeto: goddess5321/Relevance-Miner

            ##每日公告赋值完成
            ###计数
            NowFile = NowFile + 1
            print('%d/%d' % (NowFile, TotalFile))
            ###

    ###此时DateAnncDict已经准备完成
    try:
        del AnncSingleData  #释放寄存字典
    except UnboundLocalError:
        pass
    ###由{日期：{标题：[词，词，词]，标题：[词，词，词]}}
    ###获得
    ###{日期：{词:词数,词:词数}}
    ###每日字典更新
    DateDict = WordCount.WordCount(DateAnncDict)
    return DateDict, DateAnncDict
    ###


# ##测试
# if __name__ == "__main__":
#     str="没门"
#     #str=str.encode('utf-8')
#     # DateAnncDict={'20170101':str}
#     DateCrawled=['20190531']#要处理的日期
#     DIR="爬取文件"#路径
#     DateDict,DateAnncDict=InfoPreprocessor(DIR,DateCrawled)
#     for DateAnnc in DateAnncDict['20190531']:
#         # for Annc in DateAnnc.keys():
#         #     print Annc

Exemplo n.º 31

0

Exibir arquivo

Arquivo: test_pytest_wordCount.py Projeto: Bazzikk/activityUnPy

 def test_one(self):
     x = "This is an activity"
     assert WordCount.wordCount(x) == 4

Exemplo n.º 32

0

Exibir arquivo

Arquivo: test_pytest_wordCount.py Projeto: Bazzikk/activityUnPy

 def test_two(self):
     assert WordCount.wordCount() == 0

Exemplo n.º 33

0

Exibir arquivo

def callRiskTimes(path,filename,functionList):
    alltime = 0
    for func in functionList:
        rel = WordCount.word_count(path, filename, func)
        alltime = alltime + rel
    return alltime

Exemplos de WordCount, resume-categorizer em Python