def main(argv): __usage__ = ''' usage: python sentiment.py logs/fileprefix ClassifierName,s methodName,s ngramVal,s negtnVal,s ClassifierName,s: %s methodName,s: %s ngramVal,s: %s negtnVal,s: %s ''' % (str(LIST_CLASSIFIERS), str(LIST_METHODS), str([1, 3]), str([0, 1])) import sanderstwitter02 import stanfordcorpus import stats fileprefix = '' if (len(argv) >= 1): fileprefix = str(argv[0]) else: fileprefix = 'logs/run' classifierNames = [] if (len(argv) >= 2): classifierNames = [ name for name in argv[1].split(',') if name in LIST_CLASSIFIERS ] else: classifierNames = ['NaiveBayesClassifier'] methodNames = [] if (len(argv) >= 3): methodNames = [ name for name in argv[2].split(',') if name in LIST_METHODS ] else: methodNames = ['1step'] ngramVals = [] if (len(argv) >= 4): ngramVals = [int(val) for val in argv[3].split(',') if val.isdigit()] else: ngramVals = [1] negtnVals = [] if (len(argv) >= 5): negtnVals = [ bool(int(val)) for val in argv[4].split(',') if val.isdigit() ] else: negtnVals = [False] if (len(fileprefix) == 0 or len(classifierNames) == 0 or len(methodNames) == 0 or len(ngramVals) == 0 or len(negtnVals) == 0): print __usage__ return tweets1 = sanderstwitter02.getTweetsRawData('sentiment.csv') tweets2 = stanfordcorpus.getNormalisedTweets('stanfordcorpus/' + stanfordcorpus.FULLDATA + '.5000.norm.csv') #random.shuffle(tweets1) #random.shuffle(tweets2) tweets = tweets1 + tweets2 random.shuffle(tweets) #tweets = tweets[:100] sys.stderr.write('\nlen( tweets ) = ' + str(len(tweets))) #sys.stderr.write( '\n' ) #stats.preprocessingStats( tweets1, fileprefix='logs/stats_'+TIME_STAMP+'/TSC' ) #sys.stderr.write( '\n' ) #stats.preprocessingStats( tweets2, fileprefix='')#logs/stats_'+TIME_STAMP+'/STAN' ) #sys.stderr.write( '\n' ) #stats.stepStats( tweets , fileprefix='logs/stats_'+TIME_STAMP+'/Both' ) #generateARFF(tweets, fileprefix) #print classifierNames, methodNames, ngramVals, negtnVals TIME_STAMP = get_time_stamp() for (((cname, mname), ngramVal), negtnVal) in grid(grid(grid(classifierNames, methodNames), ngramVals), negtnVals): try: trainAndClassify(tweets, classifier=cname, method=mname, feature_set={ 'ngram': ngramVal, 'negtn': negtnVal }, fileprefix=fileprefix + '_' + TIME_STAMP) except Exception, e: print e
def main(argv) : __usage__=''' usage: python sentiment.py --logstore logs/fileprefix --classifier ClassifierName,s --negate negtnVal,s --since date(yyyy-mm-aa) --until date(yyyy-mm-aa) --querysearch search_text --near city_of_tweets --maxtweets max_number --output filename_for_csv("Rajasthan_BJP.csv") --party partyname --sample sample_size_for_training ClassifierName,s: %s negtnVal,s: %s ''' % ( str( LIST_CLASSIFIERS ), str([0,1]) ) import stanfordcorpus import electiontweets import stats import Exporter if len(argv) == 0: print('You must pass some parameters. Use \"-h\" to help.') return if len(argv) == 1 and argv[0] == '-h': print(__usage__) return opts, args = getopt.getopt(argv, "", ("logstore=", "classifier=", "negate=", "username="******"near=", "within=", "since=", "until=", "querysearch=", "toptweets", "maxtweets=", "output=", "party=", "sample=")) fileprefix = 'logs/run' classifierNames = ['NaiveBayesClassifier'] methodNames = ['1step'] ngramVals = [ 1 ] negtnVals = [ False ] electionfile = 'output_got.csv' sample = 1000000 for opt,arg in opts: if opt == '--logstore': fileprefix = arg if opt == '--classifier': classifierNames = [name for name in arg.split(',') if name in LIST_CLASSIFIERS] if opt == '--negate': negtnVals = [bool(int(val)) for val in arg.split(',') if val.isdigit()] if opt == '--output': electionfile = str(arg) if opt == '--sample': sample = str(arg) if not os.path.isfile(electionfile): Exporter.main(argv) if (len( fileprefix )==0 or len( classifierNames )==0 or len( methodNames )==0 or len( ngramVals )==0 or len( negtnVals )==0 ): print __usage__ return if not os.path.isfile('stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.norm.csv'): stanfordcorpus.randomSampleCSV('stanfordcorpus/'+stanfordcorpus.FULLDATA, 'stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.sample.csv', K=int(sample)) stanfordcorpus.getNormalisedCSV('stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.sample.csv', 'stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.norm.csv') tweets = stanfordcorpus.getNormalisedTweets('stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.norm.csv') tweets3 = electiontweets.getTweetsRawData(electionfile) random.shuffle( tweets ) sys.stderr.write( '\nlen( tweets ) = '+str(len( tweets )) ) #sys.stderr.write( '\n' ) #stats.preprocessingStats( tweets1, fileprefix='logs/stats_'+TIME_STAMP+'/TSC' ) #sys.stderr.write( '\n' ) #stats.preprocessingStats( tweets2, fileprefix='')#logs/stats_'+TIME_STAMP+'/STAN' ) #sys.stderr.write( '\n' ) #stats.stepStats( tweets , fileprefix='logs/stats_'+TIME_STAMP+'/Both' ) #generateARFF(tweets, fileprefix) #print classifierNames, methodNames, ngramVals, negtnVals TIME_STAMP = get_time_stamp() for (((cname, mname), ngramVal), negtnVal) in grid( grid( grid( classifierNames, methodNames), ngramVals ), negtnVals ): try: trainAndClassify( tweets, tweets3, classifier=cname, method=mname, feature_set={'ngram':ngramVal, 'negtn':negtnVal}, fileprefix=fileprefix+'_'+TIME_STAMP, ansfile=electionfile) except Exception, e: print e
def main(argv): ##################################################################### """python sentimenttry.py logs/fileprefix NaiveBayesClassifier,MaxentClassifier,DecisionTreeClassifier,SvmClassifier 1step,2step 1,3 0""" __usage__ = ''' usage: python sentiment.py logs/fileprefix ClassifierName,s methodName,s ngramVal,s negtnVal,s example: python sentimenttry.py logs/fileprefix NaiveBayesClassifier,MaxentClassifier,DecisionTreeClassifier,SvmClassifier 1step,2step 1,3 0 ClassifierName,s: %s methodName,s: %s ngramVal,s: %s negtnVal,s: %s ''' % (str(LIST_CLASSIFIERS), str(LIST_METHODS), str([1, 3]), str([0, 1])) fileprefix = '' if (len(argv) >= 1): fileprefix = str(argv[0]) else: fileprefix = 'logs/run' classifierNames = [] if (len(argv) >= 2): classifierNames = [ name for name in argv[1].split(',') if name in LIST_CLASSIFIERS ] else: classifierNames = ['NaiveBayesClassifier'] methodNames = [] if (len(argv) >= 3): methodNames = [ name for name in argv[2].split(',') if name in LIST_METHODS ] else: methodNames = ['1step'] ngramVals = [] if (len(argv) >= 4): ngramVals = [int(val) for val in argv[3].split(',') if val.isdigit()] """ngramVals = [] for val in argv[3].split(','): if val.isdigit(): ngramVals.append(int(val))""" else: ngramVals = [2] negtnVals = [] if (len(argv) >= 5): negtnVals = [ bool(int(val)) for val in argv[4].split(',') if val.isdigit() ] else: negtnVals = [False] print(classifierNames, methodNames, ngramVals, negtnVals) if (len(fileprefix) == 0 or len(classifierNames) == 0 or len(methodNames) == 0 or len(ngramVals) == 0 or len(negtnVals) == 0): print(__usage__) return tweets2 = stanfordcorpus.getNormalisedTweets('./stanfordcorpus/' + stanfordcorpus.FULLDATA + '.100000.norm.csv') random.shuffle(tweets2) sys.stderr.write("starting sentimental analysis") sys.stderr.write('\nlen( tweets ) = ' + str(len(tweets2))) sys.stderr.write('\n') sys.stdout.flush() try1.preprocessingStats(tweets2, fileprefix='logs/stats_' + TIME_STAMP + '/STAN') #logs/stats_'+TIME_STAMP+'/STAN' ) #print(tweets2) #tweets2 = ["gave my mother her mother's day present. she loved it ", 'pos', 'NO_QUERY', []] """for ((k,x),y) in grid(grid( classifierNames, methodNames),ngramVals): print((k,x),y) output for above ('NaiveBayesClassifier', '1step') 1 ('NaiveBayesClassifier', '1step') 3 ('NaiveBayesClassifier', '2step') 1 ('NaiveBayesClassifier', '2step') 3 ('MaxentClassifier', '1step') 1 ('MaxentClassifier', '1step') 3 ('MaxentClassifier', '2step') 1 ('MaxentClassifier', '2step') 3 ('DecisionTreeClassifier', '1step') 1 ('DecisionTreeClassifier', '1step') 3 ('DecisionTreeClassifier', '2step') 1 ('DecisionTreeClassifier', '2step') 3 ('SvmClassifier', '1step') 1 ('SvmClassifier', '1step') 3 ('SvmClassifier', '2step') 1 ('SvmClassifier', '2step') 3 for (((k,x),y),p) in grid(grid(grid( classifierNames, methodNames),ngramVals),negtnVals): print(((k,x),y),p) (('NaiveBayesClassifier', '1step'), 1) False (('NaiveBayesClassifier', '1step'), 3) False (('NaiveBayesClassifier', '2step'), 1) False (('NaiveBayesClassifier', '2step'), 3) False (('MaxentClassifier', '1step'), 1) False (('MaxentClassifier', '1step'), 3) False (('MaxentClassifier', '2step'), 1) False (('MaxentClassifier', '2step'), 3) False (('DecisionTreeClassifier', '1step'), 1) False (('DecisionTreeClassifier', '1step'), 3) False (('DecisionTreeClassifier', '2step'), 1) False (('DecisionTreeClassifier', '2step'), 3) False (('SvmClassifier', '1step'), 1) False (('SvmClassifier', '1step'), 3) False (('SvmClassifier', '2step'), 1) False (('SvmClassifier', '2step'), 3) False""" TIME_STAMP1 = get_time_stamp() #defined above for (((cname, mname), ngramVal), negtnVal) in grid(grid(grid(classifierNames, methodNames), ngramVals), negtnVals): try: #print(classifierNames, methodNames, ngramVals, negtnVals) print("Attempting trainAndClassify with These parameters", cname, mname, ngramVal, negtnVal) trainAndClassify(tweets2, classifier=cname, method=mname, feature_set={ 'ngram': ngramVal, 'negtn': negtnVal }, fileprefix=fileprefix + '_' + TIME_STAMP1) except Exception as e: print(e)
def main(argv) : __usage__=''' usage: python sentiment.py logs/fileprefix ClassifierName,s methodName,s ngramVal,s negtnVal,s ClassifierName,s: %s methodName,s: %s ngramVal,s: %s negtnVal,s: %s ''' % ( str( LIST_CLASSIFIERS ), str( LIST_METHODS ), str([1,3]), str([0,1]) ) import sanderstwitter02 import stanfordcorpus import stats fileprefix = '' if (len(argv) >= 1) : fileprefix = str(argv[0]) else : fileprefix = 'logs/run' classifierNames = [] if (len(argv) >= 2) : classifierNames = [name for name in argv[1].split(',') if name in LIST_CLASSIFIERS] else : classifierNames = ['NaiveBayesClassifier'] methodNames = [] if (len(argv) >= 3) : methodNames = [name for name in argv[2].split(',') if name in LIST_METHODS] else : methodNames = ['1step'] ngramVals = [] if (len(argv) >= 4) : ngramVals = [int(val) for val in argv[3].split(',') if val.isdigit()] else : ngramVals = [ 1 ] negtnVals = [] if (len(argv) >= 5) : negtnVals = [bool(int(val)) for val in argv[4].split(',') if val.isdigit()] else : negtnVals = [ False ] if (len( fileprefix )==0 or len( classifierNames )==0 or len( methodNames )==0 or len( ngramVals )==0 or len( negtnVals )==0 ): print __usage__ return tweets1 = sanderstwitter02.getTweetsRawData('sentiment.csv') tweets2 = stanfordcorpus.getNormalisedTweets('stanfordcorpus/'+stanfordcorpus.FULLDATA+'.5000.norm.csv') #random.shuffle(tweets1) #random.shuffle(tweets2) tweets = tweets1 + tweets2 random.shuffle( tweets ) #tweets = tweets[:100] sys.stderr.write( '\nlen( tweets ) = '+str(len( tweets )) ) #sys.stderr.write( '\n' ) #stats.preprocessingStats( tweets1, fileprefix='logs/stats_'+TIME_STAMP+'/TSC' ) #sys.stderr.write( '\n' ) #stats.preprocessingStats( tweets2, fileprefix='')#logs/stats_'+TIME_STAMP+'/STAN' ) #sys.stderr.write( '\n' ) #stats.stepStats( tweets , fileprefix='logs/stats_'+TIME_STAMP+'/Both' ) #generateARFF(tweets, fileprefix) #print classifierNames, methodNames, ngramVals, negtnVals TIME_STAMP = get_time_stamp() for (((cname, mname), ngramVal), negtnVal) in grid( grid( grid( classifierNames, methodNames), ngramVals ), negtnVals ): try: trainAndClassify( tweets, classifier=cname, method=mname, feature_set={'ngram':ngramVal, 'negtn':negtnVal}, fileprefix=fileprefix+'_'+TIME_STAMP ) except Exception, e: print e