Exemplo n.º 1
0
def generateJSONForD3(filemanager, mergedSet):
    """
    Generates the data formatted nicely for the d3 visualization library.

    Args:
        mergedSet: Boolean saying whether to merge all files into one dataset or, if false,
            create a list of datasets.

    Returns:
        An object, formatted in the JSON that d3 needs, either a list or a dictionary.
    """
    chosenFileIDs = [int(x) for x in request.form.getlist('segmentlist')]

    activeFiles = []
    if chosenFileIDs:
        for ID in chosenFileIDs:
            activeFiles.append(filemanager.files[ID])
    else:
        for lFile in filemanager.files.values():
            if lFile.active:
                activeFiles.append(lFile)

    if mergedSet:  # Create one JSON Object across all the chunks
        minimumLength = int(request.form['minlength']) if 'minlength' in request.form else 0
        masterWordCounts = {}
        for lFile in activeFiles:
            wordCounts = lFile.getWordCounts()

            for key in wordCounts:
                if len(key) <= minimumLength:
                    continue

                if key in masterWordCounts:
                    masterWordCounts[key] += wordCounts[key]
                else:
                    masterWordCounts[key] = wordCounts[key]

        if 'vizmaxwords' in request.form:
            maxNumWords = int(request.form['maxwords'])
            sortedwordcounts = sorted(masterWordCounts, key=masterWordCounts.__getitem__)
            j = len(sortedwordcounts) - maxNumWords
            for i in xrange(len(sortedwordcounts) - 1, -1, -1):
                if i < j:
                    del masterWordCounts[sortedwordcounts[i]]

        returnObj = general_functions.generateD3Object(masterWordCounts, objectLabel="tokens", wordLabel="name",
                                                       countLabel="size")

    else:  # Create a JSON object for each chunk
        returnObj = []
        for lFile in activeFiles:
            returnObj.append(lFile.generateD3JSONObject(wordLabel="text", countLabel="size"))

    return returnObj  # NOTE: Objects in JSON are dictionaries in Python, but Lists are Arrays are Objects as well.
Exemplo n.º 2
0
    def generateD3JSONObject(self, wordLabel, countLabel):
        """
        Generates a JSON object for d3 from the word counts of the file.

        Args:
            wordLabel: Label to use for identifying words in the sub-objects.
            countLabel: Label to use for identifying counts in the sub-objects.

        Returns:
            The resultant JSON object, formatted for d3.
        """
        wordCounts = self.getWordCounts()
        return general_functions.generateD3Object(wordCounts, self.label, wordLabel, countLabel)
Exemplo n.º 3
0
def generateJSONForD3(filemanager, mergedSet):
    """
    Generates the data formatted nicely for the d3 visualization library.

    Args:
        mergedSet: Boolean saying whether to merge all files into one dataset or, if false,
            create a list of datasets.

    Returns:
        An object, formatted in the JSON that d3 needs, either a list or a dictionary.
    """
    chosenFileIDs = [int(x) for x in request.form.getlist('segmentlist')]

    activeFiles = []
    if chosenFileIDs:
        for ID in chosenFileIDs:
            activeFiles.append(filemanager.files[ID])
    else:
        for lFile in filemanager.files.values():
            if lFile.active:
                activeFiles.append(lFile)

    if mergedSet:  # Create one JSON Object across all the chunks
        minimumLength = int(request.form['minlength']) if 'minlength' in request.form else 0
        masterWordCounts = {}

        for lFile in activeFiles:
            wordCounts = lFile.getWordCounts()

            for key in wordCounts:
                if len(key) <= minimumLength:
                    continue

                if key in masterWordCounts:
                    masterWordCounts[key] += wordCounts[key]
                else:
                    masterWordCounts[key] = wordCounts[key]

        if 'vizmaxwords' in request.form:
            # Make sure there is a number in the input form
            checkForValue = request.form['maxwords']
            if checkForValue == "":
                maxNumWords = 100
            else:
                maxNumWords = int(request.form['maxwords'])
            sortedwordcounts = sorted(masterWordCounts, key=masterWordCounts.__getitem__)
            j = len(sortedwordcounts) - maxNumWords
            for i in xrange(len(sortedwordcounts) - 1, -1, -1):
                if i < j:
                    del masterWordCounts[sortedwordcounts[i]]

        returnObj = general_functions.generateD3Object(masterWordCounts, objectLabel="tokens", wordLabel="name",
                                                       countLabel="size")


    else:  # Create a JSON object for each chunk
        returnObj = []
        for lFile in activeFiles:
            returnObj.append(lFile.generateD3JSONObject(wordLabel="text", countLabel="size"))

    return returnObj  # NOTE: Objects in JSON are dictionaries in Python, but Lists are Arrays are Objects as well.