import sys
import sm_libs.scriptParams as params
import sm_libs.tcMatrix as tcMatrix
##############################################################################
corpusDirectory = 0             # -c: the directory containing the corpus information
matrixDirectory = "tcMatrix"    # -m: the directory that will be created and where the matrix info will be stored
targetFilename = 0              # -T: the path of the file containing the list of targets to use
documentFilename = 0            # -D: the path of the file containing the (sub)list of documents to use
windowSize = 0                  # -W: the window size over which co-occurrences will be counted
##############################################################################

# process the script parameters
corpusDirectory, matrixDirectory, targetFilename, documentFilename, windowSize = params.createCollapsedTCMatrix(sys.argv[1:], corpusDirectory, matrixDirectory, targetFilename, documentFilename, windowSize)

# import corpus info, target list, and document list
the_tcMatrix = tcMatrix.tcMatrix()
the_tcMatrix.initializeMatrix(corpusDirectory, matrixDirectory, windowSize, 0)
the_tcMatrix.getTargetList(targetFilename)
the_tcMatrix.getDocumentList(documentFilename)

# count the corpus
the_tcMatrix.processCorpusCollapsed()

# output the data
the_tcMatrix.outputTargetInfo()
the_tcMatrix.outputCollapsedCoocMatrix()
the_tcMatrix.outputCollapsedMatrixInfo()
Exemplo n.º 2
0
featureFilename = 0  # -F
documentFilename = 0  # -D
normalizationMethod = 1  # -N    how to normalize the vectors. See documentation for choices (1: row sum; 2: row length; 13 PosPMI; 14: Coals)

##############################################################################
# import python libraries
import sys
import sm_libs.tcMatrix as tcMatrix
import sm_libs.sentiments as sent
import sm_libs.scriptParams as params
##############################################################################

# process script parameters
matrixDirectory, outputDirectory, targetFilename, featureFilename, documentFilename, normalizationMethod = params.calcSentiments(
    sys.argv[1:], matrixDirectory, outputDirectory, targetFilename,
    featureFilename, documentFilename, normalizationMethod)

# import the matrix data
the_tcMatrix = tcMatrix.tcMatrix()
the_tcMatrix.importMatrixInfo(matrixDirectory)
the_tcMatrix.importTargetInfo(0, 0)
the_tcMatrix.importContextInfo(0, 0)

# import the matrix data
the_sentiments = sent.sentimentModel()
the_sentiments.initializeModel(matrixDirectory, outputDirectory,
                               targetFilename, featureFilename,
                               documentFilename, normalizationMethod)
the_sentiments.getTargetList(the_tcMatrix)
the_sentiments.getFeatureList(the_tcMatrix)