示例#1
0
def preprocess_file(file_name):
    sf = open(server.short_file_path(file_name), 'w')
    output_file = open(server.count_file_path(file_name), 'w')
    z = zipfile.ZipFile(server.zip_file_path(file_name), 'r')
    internal_name = z.namelist()[0]
    f = z.open(internal_name)

    word = ''
    word2 = ''
    count = 0
    count_pair = 0

    for line in f:
      tokens = line.split()
      if tokens[0] != word:
        append_word_to_file(output_file, word, count)
        append_word_to_short_file(sf, word, word2, count_pair)
        count = 0
        word = tokens[0]
        word2 = tokens[1]
        count_pair = 0
      elif tokens[1] != word2:
        append_word_to_short_file(sf, word, word2, count_pair)
        word2 = tokens[1]
        count_pair = 0
      count += int(tokens[3])
      count_pair += int(tokens[3])

    append_word_to_file(output_file, word, count)
    append_word_to_short_file(sf, word, word2, count_pair)
          
    f.close()
    sf.close()
    z.close()
    output_file.close()
示例#2
0
def word_from_file(file_name, word, index):
    f = open(server.short_file_path(file_name), 'r')
    for line in f:
        tokens = line.split()
        if tokens[0] == word:
            times = int(tokens[2])
            if times >= index:
              return tokens[1]
            else:
              index -= times
	elif count_word.is_word_less_than_candidate(word, tokens[0]):
	    break
    f.close()
    z.close()