import sys import sm_libs.scriptParams as params import sm_libs.tcMatrix as tcMatrix ############################################################################## corpusDirectory = 0 # -c: the directory containing the corpus information matrixDirectory = "tcMatrix" # -m: the directory that will be created and where the matrix info will be stored targetFilename = 0 # -T: the path of the file containing the list of targets to use documentFilename = 0 # -D: the path of the file containing the (sub)list of documents to use windowSize = 0 # -W: the window size over which co-occurrences will be counted ############################################################################## # process the script parameters corpusDirectory, matrixDirectory, targetFilename, documentFilename, windowSize = params.createCollapsedTCMatrix(sys.argv[1:], corpusDirectory, matrixDirectory, targetFilename, documentFilename, windowSize) # import corpus info, target list, and document list the_tcMatrix = tcMatrix.tcMatrix() the_tcMatrix.initializeMatrix(corpusDirectory, matrixDirectory, windowSize, 0) the_tcMatrix.getTargetList(targetFilename) the_tcMatrix.getDocumentList(documentFilename) # count the corpus the_tcMatrix.processCorpusCollapsed() # output the data the_tcMatrix.outputTargetInfo() the_tcMatrix.outputCollapsedCoocMatrix() the_tcMatrix.outputCollapsedMatrixInfo()
featureFilename = 0 # -F documentFilename = 0 # -D normalizationMethod = 1 # -N how to normalize the vectors. See documentation for choices (1: row sum; 2: row length; 13 PosPMI; 14: Coals) ############################################################################## # import python libraries import sys import sm_libs.tcMatrix as tcMatrix import sm_libs.sentiments as sent import sm_libs.scriptParams as params ############################################################################## # process script parameters matrixDirectory, outputDirectory, targetFilename, featureFilename, documentFilename, normalizationMethod = params.calcSentiments( sys.argv[1:], matrixDirectory, outputDirectory, targetFilename, featureFilename, documentFilename, normalizationMethod) # import the matrix data the_tcMatrix = tcMatrix.tcMatrix() the_tcMatrix.importMatrixInfo(matrixDirectory) the_tcMatrix.importTargetInfo(0, 0) the_tcMatrix.importContextInfo(0, 0) # import the matrix data the_sentiments = sent.sentimentModel() the_sentiments.initializeModel(matrixDirectory, outputDirectory, targetFilename, featureFilename, documentFilename, normalizationMethod) the_sentiments.getTargetList(the_tcMatrix) the_sentiments.getFeatureList(the_tcMatrix)