Exemplo n.º 1
0
  def __init__(self, seed=None):
    # if no seed is provided, return an error
    if seed == None:
      print "Initializing an Analyzer object requires a seed.  Please try again."
      return
    # initialize an empty dictionary
    dict = {}
    # ---USER FEEDBACK---
    print "\nAttempting to read seed file..."
    # open the seed file
    f = Util.open_file(seed)
    # if the file doesn't exist, return
    if not f:
      print "Initialization failed; please verify that the seed exists then try again."
      return
    # begin reading
    while True :
      # read heuristic line
      line = f.readline()
      # quit if end of file
      if not line : break
      # store new heuristic
      current_heuristic = Util.strip(line)
      # ---USER FEEDBACK---
      print "Reading files for heuristic \'" + current_heuristic + "\'..."
      # read filenames
      next_line = f.readline()
      # if there isn't another line, quit - incorrect syntax
      if not next_line : 
        print ("Incorrect seed structure.  Exiting")
        sys.exit()
      # try to store number of files for this heuristic
      try:
        num_files = int(next_line)
      # if an exception is thrown...
      except ValueError:
        # print out an error and return nothing
        print "Seed file is of incorrect format.  Please try again."
        return
      # create document array variable
      docs = []
      # iterate over files
      for i in range(num_files) :
        # try to open the file
        filename = current_heuristic + "/" + str(i) + ".txt"
        new_doc = Document(None, filename)
        # if the new document's text is successfuly initialized...
        if new_doc.text :  
          # add it to the array
          docs.append(new_doc)
      # add new heuristic and docs to dict
      dict[current_heuristic] = docs
      # store dictionary
      self.dict = dict

    # ---USER FEEDBACK---
    print "Done reading files!\n"

    # calculate required values
    # initialize dictionaries
    self.word_counts = {}
    self.log_values = {}
    # analyze for each heuristic found
    for key in self.dict:
      self.consolidate(key)
      self.transform(key, True)

    # ---USER FEEDBACK---
    print "Analyzer object initialized!\n"