Python getNormalisedTweets示例

编程语言: Python

命名空间/包名称: stanfordcorpus

方法/功能: getNormalisedTweets

hotexamples.com的示例: 4

Python getNormalisedTweets - 已找到4个示例。这些是从开源项目中提取的最受好评的stanfordcorpus.getNormalisedTweets现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： sentiment.py 项目： sebastien-auclair/UCL

def main(argv):
    __usage__ = '''
    usage: python sentiment.py logs/fileprefix ClassifierName,s methodName,s ngramVal,s negtnVal,s
        ClassifierName,s:   %s
        methodName,s:       %s
        ngramVal,s:         %s
        negtnVal,s:         %s
    ''' % (str(LIST_CLASSIFIERS), str(LIST_METHODS), str([1, 3]), str([0, 1]))
    import sanderstwitter02
    import stanfordcorpus
    import stats

    fileprefix = ''

    if (len(argv) >= 1):
        fileprefix = str(argv[0])
    else:
        fileprefix = 'logs/run'

    classifierNames = []
    if (len(argv) >= 2):
        classifierNames = [
            name for name in argv[1].split(',') if name in LIST_CLASSIFIERS
        ]
    else:
        classifierNames = ['NaiveBayesClassifier']

    methodNames = []
    if (len(argv) >= 3):
        methodNames = [
            name for name in argv[2].split(',') if name in LIST_METHODS
        ]
    else:
        methodNames = ['1step']

    ngramVals = []
    if (len(argv) >= 4):
        ngramVals = [int(val) for val in argv[3].split(',') if val.isdigit()]
    else:
        ngramVals = [1]

    negtnVals = []
    if (len(argv) >= 5):
        negtnVals = [
            bool(int(val)) for val in argv[4].split(',') if val.isdigit()
        ]
    else:
        negtnVals = [False]

    if (len(fileprefix) == 0 or len(classifierNames) == 0
            or len(methodNames) == 0 or len(ngramVals) == 0
            or len(negtnVals) == 0):
        print __usage__
        return

    tweets1 = sanderstwitter02.getTweetsRawData('sentiment.csv')
    tweets2 = stanfordcorpus.getNormalisedTweets('stanfordcorpus/' +
                                                 stanfordcorpus.FULLDATA +
                                                 '.5000.norm.csv')
    #random.shuffle(tweets1)
    #random.shuffle(tweets2)
    tweets = tweets1 + tweets2
    random.shuffle(tweets)
    #tweets = tweets[:100]
    sys.stderr.write('\nlen( tweets ) = ' + str(len(tweets)))

    #sys.stderr.write( '\n' )
    #stats.preprocessingStats( tweets1, fileprefix='logs/stats_'+TIME_STAMP+'/TSC' )
    #sys.stderr.write( '\n' )
    #stats.preprocessingStats( tweets2, fileprefix='')#logs/stats_'+TIME_STAMP+'/STAN' )
    #sys.stderr.write( '\n' )
    #stats.stepStats( tweets , fileprefix='logs/stats_'+TIME_STAMP+'/Both' )

    #generateARFF(tweets, fileprefix)

    #print classifierNames, methodNames, ngramVals, negtnVals
    TIME_STAMP = get_time_stamp()
    for (((cname, mname), ngramVal),
         negtnVal) in grid(grid(grid(classifierNames, methodNames), ngramVals),
                           negtnVals):
        try:
            trainAndClassify(tweets,
                             classifier=cname,
                             method=mname,
                             feature_set={
                                 'ngram': ngramVal,
                                 'negtn': negtnVal
                             },
                             fileprefix=fileprefix + '_' + TIME_STAMP)
        except Exception, e:
            print e

示例#2

显示文件

def main(argv) :
    __usage__='''
    usage: python sentiment.py --logstore logs/fileprefix --classifier ClassifierName,s --negate negtnVal,s --since date(yyyy-mm-aa) --until date(yyyy-mm-aa) --querysearch search_text --near city_of_tweets --maxtweets max_number --output filename_for_csv("Rajasthan_BJP.csv") --party partyname --sample sample_size_for_training
        ClassifierName,s:   %s
        negtnVal,s:         %s
    ''' % ( str( LIST_CLASSIFIERS ), str([0,1]) )
    import stanfordcorpus
    import electiontweets
    import stats
    import Exporter
    if len(argv) == 0:
        print('You must pass some parameters. Use \"-h\" to help.')
        return

    if len(argv) == 1 and argv[0] == '-h':
        print(__usage__)
        return

    opts, args = getopt.getopt(argv, "", ("logstore=", "classifier=", "negate=", "username="******"near=", "within=", "since=", "until=", "querysearch=", "toptweets", "maxtweets=", "output=", "party=", "sample="))
    fileprefix = 'logs/run'
    classifierNames = ['NaiveBayesClassifier']
    methodNames = ['1step']
    ngramVals = [ 1 ]
    negtnVals = [ False ]
    electionfile = 'output_got.csv'
    sample = 1000000
    for opt,arg in opts:
        if opt == '--logstore':
            fileprefix = arg
        if opt == '--classifier':
            classifierNames = [name for name in arg.split(',') if name in LIST_CLASSIFIERS]
        if opt == '--negate':
            negtnVals = [bool(int(val)) for val in arg.split(',') if val.isdigit()]
        if opt == '--output':
            electionfile = str(arg)
        if opt == '--sample':
            sample = str(arg)
    if not os.path.isfile(electionfile):
        Exporter.main(argv)

    if (len( fileprefix )==0 or len( classifierNames )==0 or len( methodNames )==0 or len( ngramVals )==0 or len( negtnVals )==0 ):
        print __usage__
        return
    if not os.path.isfile('stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.norm.csv'):
        stanfordcorpus.randomSampleCSV('stanfordcorpus/'+stanfordcorpus.FULLDATA, 'stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.sample.csv', K=int(sample))
        stanfordcorpus.getNormalisedCSV('stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.sample.csv', 'stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.norm.csv')
    tweets = stanfordcorpus.getNormalisedTweets('stanfordcorpus/'+stanfordcorpus.FULLDATA+'.'+sample+'.norm.csv')
    tweets3 = electiontweets.getTweetsRawData(electionfile)
    random.shuffle( tweets )
    sys.stderr.write( '\nlen( tweets ) = '+str(len( tweets )) )

    #sys.stderr.write( '\n' )
    #stats.preprocessingStats( tweets1, fileprefix='logs/stats_'+TIME_STAMP+'/TSC' )
    #sys.stderr.write( '\n' )
    #stats.preprocessingStats( tweets2, fileprefix='')#logs/stats_'+TIME_STAMP+'/STAN' )
    #sys.stderr.write( '\n' )
    #stats.stepStats( tweets , fileprefix='logs/stats_'+TIME_STAMP+'/Both' )

    #generateARFF(tweets, fileprefix)

    #print classifierNames, methodNames, ngramVals, negtnVals
    TIME_STAMP = get_time_stamp()
    for (((cname, mname), ngramVal), negtnVal) in grid( grid( grid( classifierNames, methodNames), ngramVals ), negtnVals ):
        try:
            trainAndClassify(
                tweets, tweets3, classifier=cname, method=mname,
                feature_set={'ngram':ngramVal, 'negtn':negtnVal},
                fileprefix=fileprefix+'_'+TIME_STAMP,
                ansfile=electionfile)
        except Exception, e:
            print e

示例#3

显示文件

def main(argv):

    #####################################################################
    """python sentimenttry.py logs/fileprefix NaiveBayesClassifier,MaxentClassifier,DecisionTreeClassifier,SvmClassifier 1step,2step 1,3 0"""
    __usage__ = '''
    usage: python sentiment.py logs/fileprefix ClassifierName,s methodName,s ngramVal,s negtnVal,s
        example: python sentimenttry.py logs/fileprefix NaiveBayesClassifier,MaxentClassifier,DecisionTreeClassifier,SvmClassifier 1step,2step 1,3 0
        ClassifierName,s:   %s
        methodName,s:       %s
        ngramVal,s:         %s
        negtnVal,s:         %s
    ''' % (str(LIST_CLASSIFIERS), str(LIST_METHODS), str([1, 3]), str([0, 1]))

    fileprefix = ''
    if (len(argv) >= 1):
        fileprefix = str(argv[0])
    else:
        fileprefix = 'logs/run'

    classifierNames = []
    if (len(argv) >= 2):
        classifierNames = [
            name for name in argv[1].split(',') if name in LIST_CLASSIFIERS
        ]
    else:
        classifierNames = ['NaiveBayesClassifier']

    methodNames = []
    if (len(argv) >= 3):
        methodNames = [
            name for name in argv[2].split(',') if name in LIST_METHODS
        ]
    else:
        methodNames = ['1step']

    ngramVals = []
    if (len(argv) >= 4):
        ngramVals = [int(val) for val in argv[3].split(',') if val.isdigit()]
        """ngramVals = []
        for val in argv[3].split(','):
            if val.isdigit():
                ngramVals.append(int(val))"""
    else:
        ngramVals = [2]

    negtnVals = []
    if (len(argv) >= 5):
        negtnVals = [
            bool(int(val)) for val in argv[4].split(',') if val.isdigit()
        ]
    else:
        negtnVals = [False]

    print(classifierNames, methodNames, ngramVals, negtnVals)
    if (len(fileprefix) == 0 or len(classifierNames) == 0
            or len(methodNames) == 0 or len(ngramVals) == 0
            or len(negtnVals) == 0):
        print(__usage__)
        return

    tweets2 = stanfordcorpus.getNormalisedTweets('./stanfordcorpus/' +
                                                 stanfordcorpus.FULLDATA +
                                                 '.100000.norm.csv')

    random.shuffle(tweets2)

    sys.stderr.write("starting sentimental analysis")

    sys.stderr.write('\nlen( tweets ) = ' + str(len(tweets2)))

    sys.stderr.write('\n')
    sys.stdout.flush()

    try1.preprocessingStats(tweets2,
                            fileprefix='logs/stats_' + TIME_STAMP +
                            '/STAN')  #logs/stats_'+TIME_STAMP+'/STAN' )

    #print(tweets2)

    #tweets2 = ["gave my mother her mother's day present. she loved it ", 'pos', 'NO_QUERY', []]
    """for ((k,x),y) in grid(grid( classifierNames, methodNames),ngramVals):
        print((k,x),y)
    output for above ('NaiveBayesClassifier', '1step') 1
    ('NaiveBayesClassifier', '1step') 3
    ('NaiveBayesClassifier', '2step') 1
    ('NaiveBayesClassifier', '2step') 3
    ('MaxentClassifier', '1step') 1
    ('MaxentClassifier', '1step') 3
    ('MaxentClassifier', '2step') 1
    ('MaxentClassifier', '2step') 3
    ('DecisionTreeClassifier', '1step') 1
    ('DecisionTreeClassifier', '1step') 3
    ('DecisionTreeClassifier', '2step') 1
    ('DecisionTreeClassifier', '2step') 3
    ('SvmClassifier', '1step') 1
    ('SvmClassifier', '1step') 3
    ('SvmClassifier', '2step') 1
    ('SvmClassifier', '2step') 3 
    for (((k,x),y),p) in grid(grid(grid( classifierNames, methodNames),ngramVals),negtnVals):
        print(((k,x),y),p)
    (('NaiveBayesClassifier', '1step'), 1) False
    (('NaiveBayesClassifier', '1step'), 3) False
    (('NaiveBayesClassifier', '2step'), 1) False
    (('NaiveBayesClassifier', '2step'), 3) False
    (('MaxentClassifier', '1step'), 1) False
    (('MaxentClassifier', '1step'), 3) False
    (('MaxentClassifier', '2step'), 1) False
    (('MaxentClassifier', '2step'), 3) False
    (('DecisionTreeClassifier', '1step'), 1) False
    (('DecisionTreeClassifier', '1step'), 3) False
    (('DecisionTreeClassifier', '2step'), 1) False
    (('DecisionTreeClassifier', '2step'), 3) False
    (('SvmClassifier', '1step'), 1) False
    (('SvmClassifier', '1step'), 3) False
    (('SvmClassifier', '2step'), 1) False
    (('SvmClassifier', '2step'), 3) False"""
    TIME_STAMP1 = get_time_stamp()
    #defined above
    for (((cname, mname), ngramVal),
         negtnVal) in grid(grid(grid(classifierNames, methodNames), ngramVals),
                           negtnVals):
        try:
            #print(classifierNames, methodNames, ngramVals, negtnVals)
            print("Attempting trainAndClassify with These parameters", cname,
                  mname, ngramVal, negtnVal)
            trainAndClassify(tweets2,
                             classifier=cname,
                             method=mname,
                             feature_set={
                                 'ngram': ngramVal,
                                 'negtn': negtnVal
                             },
                             fileprefix=fileprefix + '_' + TIME_STAMP1)
        except Exception as e:
            print(e)

示例#4

显示文件

文件： sentiment.py 项目： Vishruit/Twitter-Sentiment

def main(argv) :
    __usage__='''
    usage: python sentiment.py logs/fileprefix ClassifierName,s methodName,s ngramVal,s negtnVal,s
        ClassifierName,s:   %s
        methodName,s:       %s
        ngramVal,s:         %s
        negtnVal,s:         %s
    ''' % ( str( LIST_CLASSIFIERS ), str( LIST_METHODS ), str([1,3]), str([0,1]) )
    import sanderstwitter02
    import stanfordcorpus
    import stats

    fileprefix = ''

    if (len(argv) >= 1) :
        fileprefix = str(argv[0])
    else :
        fileprefix = 'logs/run'

    classifierNames = []
    if (len(argv) >= 2) :
        classifierNames = [name for name in argv[1].split(',') if name in LIST_CLASSIFIERS]
    else :
        classifierNames = ['NaiveBayesClassifier']

    methodNames = []
    if (len(argv) >= 3) :
        methodNames = [name for name in argv[2].split(',') if name in LIST_METHODS]
    else :
        methodNames = ['1step']

    ngramVals = []
    if (len(argv) >= 4) :
        ngramVals = [int(val) for val in argv[3].split(',') if val.isdigit()]
    else :
        ngramVals = [ 1 ]

    negtnVals = []
    if (len(argv) >= 5) :
        negtnVals = [bool(int(val)) for val in argv[4].split(',') if val.isdigit()]
    else :
        negtnVals = [ False ]

    if (len( fileprefix )==0 or len( classifierNames )==0 or len( methodNames )==0 or len( ngramVals )==0 or len( negtnVals )==0 ):
        print __usage__
        return
    
    tweets1 = sanderstwitter02.getTweetsRawData('sentiment.csv')
    tweets2 = stanfordcorpus.getNormalisedTweets('stanfordcorpus/'+stanfordcorpus.FULLDATA+'.5000.norm.csv')
    #random.shuffle(tweets1)
    #random.shuffle(tweets2)
    tweets = tweets1 + tweets2
    random.shuffle( tweets )
    #tweets = tweets[:100]
    sys.stderr.write( '\nlen( tweets ) = '+str(len( tweets )) )

    #sys.stderr.write( '\n' )
    #stats.preprocessingStats( tweets1, fileprefix='logs/stats_'+TIME_STAMP+'/TSC' )
    #sys.stderr.write( '\n' )
    #stats.preprocessingStats( tweets2, fileprefix='')#logs/stats_'+TIME_STAMP+'/STAN' )
    #sys.stderr.write( '\n' )
    #stats.stepStats( tweets , fileprefix='logs/stats_'+TIME_STAMP+'/Both' )

    #generateARFF(tweets, fileprefix)

    #print classifierNames, methodNames, ngramVals, negtnVals
    TIME_STAMP = get_time_stamp()
    for (((cname, mname), ngramVal), negtnVal) in grid( grid( grid( classifierNames, methodNames), ngramVals ), negtnVals ):
        try:
            trainAndClassify(
                tweets, classifier=cname, method=mname,
                feature_set={'ngram':ngramVal, 'negtn':negtnVal},
                fileprefix=fileprefix+'_'+TIME_STAMP )
        except Exception, e:
            print e