class preProcessor:
    def __init__(self, dataFile, userJoins):
        self.dataHandler = DataHandler(dataFile, userJoins)
        self.dataHandler.loadActiveForums()
        self.tokenizedData = []

    def prepareTokenizedCSV(self):
        self.tokenizedData = self.dataHandler.getTokenizedCSV()

    def prepareTokenizedUserMonthCSV(self):
        self.tokenizedData = self.dataHandler.getTokenizedUserMonthCSV()

    def prepareTokenizedUserMonthForumCSV(self):
        self.tokenizedData = self.dataHandler.getTokenizedUserMonthForumCSV()

    def printDataForTMT(self, outFile):
        outFile = csv.writer(open(outFile, "w"))
        index = 1
        for record in self.tokenizedData:
            record.insert(0, index)
            outFile.writerow(record)
            index += 1

    def getRequiredDataFromRecord(self, record):
        indices = [0]

    def initializeUserMonthRecord(self, user, month):
        return self.dataHandler.getBasicUserMonthRecord(user, month)

    def isProperUnicode(self, text):
        try:
            dummy = unicode(text)
            return True
        except:
            return False

    def printInferDataForTMT(self, outFile):
        f = codecs.open(outFile, encoding="utf-8", mode="w+")
        outFile = csv.writer(f)
        index = 1
        for user in self.tokenizedData.iterkeys():
            for month in self.tokenizedData[user].iterkeys():
                numPosts = 0
                userMonthRecord = self.initializeUserMonthRecord(user, month)
                for recordText in self.tokenizedData[user][month]:
                    if self.isProperUnicode(recordText):
                        userMonthRecord[-1].append(recordText)
                        numPosts += 1
                userMonthRecord[-1] = " ".join(userMonthRecord[-1])
                userMonthRecord.insert(0, index)
                userMonthRecord.append(numPosts)
                try:
                    outFile.writerow([unicode(s).encode("utf-8") for s in userMonthRecord])
                    index += 1
                except:
                    pass

    def printInferDataForTMTWithForum(self, outFile):
        f = codecs.open(outFile, encoding="utf-8", mode="w+")
        outFile = csv.writer(f)
        index = 1
        for user in self.tokenizedData.iterkeys():
            for month in self.tokenizedData[user].iterkeys():
                totalPosts = 0
                allForumsRecord = self.initializeUserMonthRecord(user, month)
                for forum in self.tokenizedData[user][month]:
                    numPosts = 0
                    userMonthRecord = self.initializeUserMonthRecord(user, month)
                    for recordText in self.tokenizedData[user][month][forum]:
                        if self.isProperUnicode(recordText):
                            userMonthRecord[-1].append(recordText)
                            numPosts += 1
                    forumPosts = copy.deepcopy(userMonthRecord[-1])
                    userMonthRecord[-1] = " ".join(userMonthRecord[-1])
                    userMonthRecord.insert(0, index)
                    userMonthRecord.append(numPosts)
                    userMonthRecord.append(forum)
                    try:
                        outFile.writerow([unicode(s).encode("utf-8") for s in userMonthRecord])
                        index += 1
                        totalPosts += numPosts
                        allForumsRecord[-1].extend(forumPosts)
                    except:
                        pass
                allForumsRecord[-1] = " ".join(allForumsRecord[-1])
                allForumsRecord.insert(0, index)
                index += 1
                allForumsRecord.append(totalPosts)
                allForumsRecord.append("AllForums")
                outFile.writerow([unicode(s).encode("utf-8") for s in allForumsRecord])