class UserwiseDivergenceAnalysis:
  def __init__(self, dataFile, userJoins):
    sys.stderr.write('In Constructor\n')
    self.distComparer = DistComparer()
    self.dataHandler = DataHandler(dataFile, userJoins)
    self.dataHandler.loadActiveForums()
    self.__loadData()
    self.sampledUsers = set()
  
  def __loadData(self):
    stopWords = set([s.strip() for s in open("/usr0/home/pgadde/Work/Ethnic/Hoodup/DataExploration/SampledPosts/Regression/stopWords")])
    self.dataHandler.preprocessVocab(stopWords)

  def sampleUsers(self):
    #self.dataHandler.userStats(outFile)
    self.sampledUsers = self.dataHandler.sampleUsers()

  def doDivergenceAnalysisPerUser(self, outFile):
    outFile = open(outFile,'w')
    for user in self.sampledUsers:
      #print "User:"******"Month:",month
        outFile.write(str(user)+'\t'+str(month)+'\t'+str(userDivergences[month][0])+'\t'+str(userDivergences[month][1])+'\n')
    outFile.close()
    
  def prepareUserDivergencesActive(self, userNum):
    divergences = {}
    userMonths = self.dataHandler.getUserMonths(userNum)
    activeForum  = self.dataHandler.getActiveForum(userNum)
    if activeForum.find("Talk")<0:
      return -1
    userInitialData = self.dataHandler.makeDist(self.dataHandler.getForumInitialData(self.dataHandler.getActiveForum(userNum)))
    userMaturedData = self.dataHandler.makeDist(self.dataHandler.getForumMaturedData(self.dataHandler.getActiveForum(userNum)))
    for userMonth in userMonths:
      monthData = self.dataHandler.makeDist(self.dataHandler.getUserDataForDivergence(userNum, userMonth))
      divergences[userMonth] = (self.distComparer.jsDivergence(userInitialData,monthData), self.distComparer.jsDivergence(monthData, userMaturedData))
    return divergences

  def prepareUserDivergencesBackground(self, userNum):
    divergences = {}
    userMonths = self.__dataHandler.getUserMonths(userNum)
    userInitialData = self.dataHandler.makeDist(self.dataHandler.getForumInitialData("AllTalk"))
    userMaturedData = self.dataHandler.makeDist(self.dataHandler.getForumMaturedData("AllTalk"))
    for userMonth in userMonths:
      monthData = self.dataHandler.makeDist(self.dataHandler.getUserDataForDivergence(userNum, userMonth))
      divergences[userMonth] = (self.distComparer.jsDivergence(userInitialData,monthData), self.distComparer.jsDivergence(monthData, userMaturedData))
    return divergences

  def prepareUserDivergences(self, userNum):
    divergences = {}
    userMonths = self.dataHandler.getUserMonths(userNum)
    userInitialData = self.dataHandler.makeDist(self.dataHandler.getUserInitialData(userNum))
    userMaturedData = self.dataHandler.makeDist(self.dataHandler.getUserMaturedData(userNum))
    for userMonth in userMonths:
      monthData = self.dataHandler.makeDist(self.dataHandler.getUserDataForDivergence(userNum, userMonth))
      divergences[userMonth] = (self.distComparer.jsDivergence(userInitialData,monthData), self.distComparer.jsDivergence(monthData, userMaturedData))
    return divergences