def detectEvents(limit=300,similarityType=MED_SIM,minimalTermPerTweet=MIN_TERM_OCCURENCE,remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW,printEvents=True) :
    mongoDBHandler=MongoDBHandler()
    tweets=mongoDBHandler.getAllTweets(limit=limit)

    if similarityType==LED_SIM :
        s=LEDSimilarityMatrixBuilder(timeThreshold=TIME_RESOLUTION,distanceThreshold=DISTANCE_RESOLUTION)
        eventDetector=EventDetector(tweets,s)
        events=eventDetector.getEvents(minimalTermPerTweet=minimalTermPerTweet,remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    elif similarityType==MED_SIM :
        s=MEDSimilarityMatrixBuilder(timeResolution=TIME_RESOLUTION,distanceResolution=DISTANCE_RESOLUTION,scaleNumber=SCALE_NUMBER,minSimilarity=MIN_SIMILARITY)
        eventDetector=EventDetector(tweets,s)
        events=eventDetector.getEvents(minimalTermPerTweet=minimalTermPerTweet,remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    else :
        eventDetector=OptimisedEventDetectorMEDBased(tweets,timeResolution=TIME_RESOLUTION,distanceResolution=DISTANCE_RESOLUTION,scaleNumber=SCALE_NUMBER,minSimilarity=MIN_SIMILARITY)
        events=eventDetector.getEvents(minimalTermPerTweet=minimalTermPerTweet,remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
        
    print ""
    print "-"*40
    print "{0} Event detected : ".format(len(events))
    print "-"*40

    if printEvents :
        eventDetector.showTopEvents(top=10)

    return events
def detectEvents(limit=300,
                 minimalTermPerTweet=MIN_TERM_OCCURENCE,
                 remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW,
                 printEvents=True):
    mongoDBHandler = MongoDBHandler()
    tweets = mongoDBHandler.getAllTweets(limit=limit)

    eventDetector = OptimisedEventDetectorMEDBased(
        tweets,
        timeResolution=TIME_RESOLUTION,
        distanceResolution=DISTANCE_RESOLUTION,
        scaleNumber=SCALE_NUMBER,
        minSimilarity=MIN_SIMILARITY)
    events = eventDetector.getEvents(
        minimalTermPerTweet=minimalTermPerTweet,
        remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)

    print ""
    print "-" * 40
    print "{0} Event detected : ".format(len(events))
    print "-" * 40

    if printEvents: eventDetector.showTopEvents(top=10)

    return events
Пример #3
0
def showTweetsSpaceDistribution(limit=300,
                                mongoDBName='Twitter',
                                mongoCollectionName="tweets"):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTweetsInSpaceDistribution(tweets)
Пример #4
0
def showTermSpaceDistributionByOrder(limit=300,
                                     topTermOrder=0,
                                     mongoDBName='Twitter',
                                     mongoCollectionName="tweets"):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTermInSpaceDistributionWithOrder(tweets, topTermOrder=topTermOrder)
Пример #5
0
def showTermSpaceDistributionByTerm(limit=300,
                                    term="#shopping",
                                    mongoDBName='Twitter',
                                    mongoCollectionName="tweets"):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTermInSpaceDistribution(tweets, term)
Пример #6
0
def showTermOccurenceDistribution(limit=300,
                                  mongoDBName='Twitter',
                                  mongoCollectionName="tweets",
                                  useOnlyHashtags=False):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTermOccurencesDistribution(tweets, useOnlyHashtags=useOnlyHashtags)
Пример #7
0
def getTweetsFromTwitterAndSave(count=100,
                                export=False,
                                mongoDBName='Twitter',
                                mongoCollectionName="tweets"):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    api = MyTwitterAPI("twitter_config_file.txt")
    tweets = api.getTweets(count=count, export=export)
    mongoDBHandler.saveTweets(tweets)
Пример #8
0
def showTweetsNumberSignal(limit=300,
                           granularity=3600,
                           dyadic=True,
                           mongoDBName='Twitter',
                           mongoCollectionName="tweets"):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTweetsApparitionInTime(tweets, granularity=granularity, dyadic=dyadic)
Пример #9
0
def showTermOccurenceSignalByOrder(limit=300,
                                   topTermOrder=0,
                                   granularity=3600,
                                   dyadic=True):
    mongoDBHandler = MongoDBHandler()
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTermApparitionInTimeWithOrder(tweets,
                                      topTermOrder=topTermOrder,
                                      granularity=granularity,
                                      dyadic=dyadic)
Пример #10
0
def showTermOccurenceSignalByTerm(limit=300,
                                  term="#shopping",
                                  granularity=3600,
                                  dyadic=True):
    mongoDBHandler = MongoDBHandler()
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTermApparitionInTime(tweets,
                             term,
                             granularity=granularity,
                             dyadic=dyadic)
Пример #11
0
def showTermOccurenceSignalByTerm(limit=300,
                                  term="#shopping",
                                  granularity=3600,
                                  dyadic=True,
                                  mongoDBName='Twitter',
                                  mongoCollectionName="tweets"):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTermApparitionInTime(tweets,
                             term,
                             granularity=granularity,
                             dyadic=dyadic)
Пример #12
0
def showTermOccurenceSignalByOrder(limit=300,
                                   topTermOrder=0,
                                   granularity=3600,
                                   dyadic=True,
                                   mongoDBName='Twitter',
                                   mongoCollectionName="tweets"):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTermApparitionInTimeWithOrder(tweets,
                                      topTermOrder=topTermOrder,
                                      granularity=granularity,
                                      dyadic=dyadic,
                                      mongoDBName='Twitter',
                                      mongoCollectionName="tweets")
def detectEvents(limit=300,similarityType=MED_SIM,minimalTermPerTweet=MIN_TERM_OCCURENCE,remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW,printEvents=True,printInFile="events.txt",useOnlyHashtags=False,mongoDBName='Twitter',mongoCollectionName="tweets") :
    staringTime=time.time()
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    tweets=mongoDBHandler.getAllTweets(limit=limit)

    if similarityType==LED_SIM :
        s=LEDSimilarityMatrixBuilder(timeThreshold=TIME_RESOLUTION,distanceThreshold=DISTANCE_RESOLUTION,useOnlyHashtags=useOnlyHashtags)
        eventDetector=EventDetector(tweets,s)
        events=eventDetector.getEvents(minimalTermPerTweet=minimalTermPerTweet,remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    elif similarityType==MED_SIM :
        s=MEDSimilarityMatrixBuilder(timeResolution=TIME_RESOLUTION,distanceResolution=DISTANCE_RESOLUTION,scaleNumber=SCALE_NUMBER,minSimilarity=MIN_SIMILARITY,useOnlyHashtags=useOnlyHashtags)
        eventDetector=EventDetector(tweets,s)
        events=eventDetector.getEvents(minimalTermPerTweet=minimalTermPerTweet,remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    else :
        eventDetector=OptimisedEventDetectorMEDBased(tweets,timeResolution=TIME_RESOLUTION,distanceResolution=DISTANCE_RESOLUTION,scaleNumber=SCALE_NUMBER,minSimilarity=MIN_SIMILARITY,useOnlyHashtags=useOnlyHashtags)
        events=eventDetector.getEvents(minimalTermPerTweet=minimalTermPerTweet,remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
        
    print ""
    print "-"*40
    print "{0} Event detected : ".format(len(events))
    print "-"*40

    if printEvents :
        eventDetector.showTopEvents(top=len(events))

    if printInFile :
        txtFile=open(printInFile, 'w')
        elapsed_time=(time.time()-staringTime)
        txtFile.write("Total elapsed time : {0}s\n".format(elapsed_time))
        txtFile.write("-"*40+"\n")
        SEPARATOR="\t|"
        HEADER="|"+SEPARATOR.join(["Median time","estimated duration (s)","mean latitude","mean longitude","radius (m)","user number","tweets number","top hashtags"])+SEPARATOR+"\n"
        txtFile.write(HEADER)
        txtFile.write("-"*40+"\n")
        for event in events :
            line=eventDetector.getStringOfEvent(event)
            txtFile.write(line+"\n")
            txtFile.write("-"*40+"\n")
        txtFile.close();
        
    return events
Пример #14
0
def getTweetsFromJSONRepositoryAndSave(repositoryPath="E:\\tweets",
                                       mongoDBName='Twitter',
                                       mongoCollectionName="tweets"):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    mongoDBHandler.saveTweetsFromJSONRepository(repositoryPath)
def showTweetsNumberSignal(limit=300,granularity=3600, dyadic=True) :
    mongoDBHandler=MongoDBHandler()
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTweetsApparitionInTime(tweets, granularity=granularity, dyadic=dyadic)
def showTermOccurenceSignalByOrder(limit=300,topTermOrder=0, granularity=3600, dyadic=True) :
    mongoDBHandler=MongoDBHandler()
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTermApparitionInTimeWithOrder(tweets,topTermOrder=topTermOrder, granularity=granularity, dyadic=dyadic)
def showTweetsSpaceDistribution(limit=300,mongoDBName='Twitter',mongoCollectionName="tweets") :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTweetsInSpaceDistribution(tweets)
def showTermOccurenceDistribution(limit=300,mongoDBName='Twitter',mongoCollectionName="tweets",useOnlyHashtags=False) :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTermOccurencesDistribution(tweets,useOnlyHashtags=useOnlyHashtags)
def showTermSpaceDistributionByOrder(limit=300,topTermOrder=0,mongoDBName='Twitter',mongoCollectionName="tweets") :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTermInSpaceDistributionWithOrder(tweets,topTermOrder=topTermOrder)
def getTweetsFromTwitterAndSave(count=100,export=False) :
    mongoDBHandler=MongoDBHandler()
    api = MyTwitterAPI("twitter_config_file.txt")
    tweets = api.getTweets(count=count,export=export)
    mongoDBHandler.saveTweets(tweets)
def showTermSpaceDistributionByTerm(limit=300,term="#shopping") :
    mongoDBHandler=MongoDBHandler()
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTermInSpaceDistribution(tweets,term)
def showTermOccurenceSignalByOrder(limit=300,topTermOrder=0, granularity=3600, dyadic=True,mongoDBName='Twitter',mongoCollectionName="tweets") :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTermApparitionInTimeWithOrder(tweets,topTermOrder=topTermOrder, granularity=granularity, dyadic=dyadic,mongoDBName='Twitter',mongoCollectionName="tweets")
def showTweetsNumberSignal(limit=300,granularity=3600, dyadic=True,mongoDBName='Twitter',mongoCollectionName="tweets") :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTweetsApparitionInTime(tweets, granularity=granularity, dyadic=dyadic)
def getTweetsFromCSVFileAndSave(csvFilePath="D:\\PRJS\\Data\\final.csv",mongoDBName='Twitter',mongoCollectionName="tweetsMehdi") :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    mongoDBHandler.saveTweetsFromCSVFile(csvFilePath)
def getTweetsFromJSONRepositoryAndSave(repositoryPath="E:\\tweets",mongoDBName='Twitter',mongoCollectionName="tweets") :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    mongoDBHandler.saveTweetsFromJSONRepository(repositoryPath)
def getTweetsFromTwitterAndSave(count=100,export=False,mongoDBName='Twitter',mongoCollectionName="tweets") :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    api = MyTwitterAPI("twitter_config_file.txt")
    tweets = api.getTweets(count=count,export=export)
    mongoDBHandler.saveTweets(tweets)
Пример #27
0
def getTweetsFromCSVFileAndSave(csvFilePath="D:\\PRJS\\Data\\final.csv",
                                mongoDBName='Twitter',
                                mongoCollectionName="tweetsMehdi"):
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    mongoDBHandler.saveTweetsFromCSVFile(csvFilePath)
Пример #28
0
def showTermSpaceDistributionByOrder(limit=300, topTermOrder=0):
    mongoDBHandler = MongoDBHandler()
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTermInSpaceDistributionWithOrder(tweets, topTermOrder=topTermOrder)
Пример #29
0
def detectEvents(limit=300,
                 similarityType=MED_SIM,
                 minimalTermPerTweet=MIN_TERM_OCCURENCE,
                 remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW,
                 printEvents=True,
                 printInFile="events.txt",
                 useOnlyHashtags=False,
                 mongoDBName='Twitter',
                 mongoCollectionName="tweets"):
    staringTime = time.time()
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    tweets = mongoDBHandler.getAllTweets(limit=limit)

    if similarityType == LED_SIM:
        s = LEDSimilarityMatrixBuilder(timeThreshold=TIME_RESOLUTION,
                                       distanceThreshold=DISTANCE_RESOLUTION,
                                       useOnlyHashtags=useOnlyHashtags)
        eventDetector = EventDetector(tweets, s)
        events = eventDetector.getEvents(
            minimalTermPerTweet=minimalTermPerTweet,
            remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    elif similarityType == MED_SIM:
        s = MEDSimilarityMatrixBuilder(timeResolution=TIME_RESOLUTION,
                                       distanceResolution=DISTANCE_RESOLUTION,
                                       scaleNumber=SCALE_NUMBER,
                                       minSimilarity=MIN_SIMILARITY,
                                       useOnlyHashtags=useOnlyHashtags)
        eventDetector = EventDetector(tweets, s)
        events = eventDetector.getEvents(
            minimalTermPerTweet=minimalTermPerTweet,
            remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    else:
        eventDetector = OptimisedEventDetectorMEDBased(
            tweets,
            timeResolution=TIME_RESOLUTION,
            distanceResolution=DISTANCE_RESOLUTION,
            scaleNumber=SCALE_NUMBER,
            minSimilarity=MIN_SIMILARITY,
            useOnlyHashtags=useOnlyHashtags)
        events = eventDetector.getEvents(
            minimalTermPerTweet=minimalTermPerTweet,
            remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)

    print ""
    print "-" * 40
    print "{0} Event detected : ".format(len(events))
    print "-" * 40

    if printEvents:
        eventDetector.showTopEvents(top=len(events))

    if printInFile:
        txtFile = open(printInFile, 'w')
        elapsed_time = (time.time() - staringTime)
        txtFile.write("Total elapsed time : {0}s\n".format(elapsed_time))
        txtFile.write("-" * 40 + "\n")
        SEPARATOR = "\t|"
        HEADER = "|" + SEPARATOR.join([
            "Median time", "estimated duration (s)", "mean latitude",
            "mean longitude", "radius (m)", "user number", "tweets number",
            "top hashtags"
        ]) + SEPARATOR + "\n"
        txtFile.write(HEADER)
        txtFile.write("-" * 40 + "\n")
        for event in events:
            line = eventDetector.getStringOfEvent(event)
            txtFile.write(line + "\n")
            txtFile.write("-" * 40 + "\n")
        txtFile.close()

    return events
Пример #30
0
def getTweetsFromTwitterAndSave(count=100, export=False):
    mongoDBHandler = MongoDBHandler()
    api = MyTwitterAPI("twitter_config_file.txt")
    tweets = api.getTweets(count=count, export=export)
    mongoDBHandler.saveTweets(tweets)
def getTweetsFromJSONRepositoryAndSave(repositoryPath="E:\\tweets") :
    mongoDBHandler=MongoDBHandler()
    mongoDBHandler.saveTweetsFromJSONRepository(repositoryPath)
Пример #32
0
def showTweetsNumberSignal(limit=300, granularity=3600, dyadic=True):
    mongoDBHandler = MongoDBHandler()
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTweetsApparitionInTime(tweets, granularity=granularity, dyadic=dyadic)
Пример #33
0
def showTweetsSpaceDistribution(limit=300):
    mongoDBHandler = MongoDBHandler()
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTweetsInSpaceDistribution(tweets)
def showTermSpaceDistributionByTerm(limit=300,term="#shopping",mongoDBName='Twitter',mongoCollectionName="tweets") :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTermInSpaceDistribution(tweets,term)
Пример #35
0
def showTermSpaceDistributionByTerm(limit=300, term="#shopping"):
    mongoDBHandler = MongoDBHandler()
    tweets = mongoDBHandler.getAllTweets(limit=limit)
    plotTermInSpaceDistribution(tweets, term)
def showTermSpaceDistributionByOrder(limit=300,topTermOrder=0) :
    mongoDBHandler=MongoDBHandler()
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTermInSpaceDistributionWithOrder(tweets,topTermOrder=topTermOrder)
Пример #37
0
def getTweetsFromJSONRepositoryAndSave(repositoryPath="E:\\tweets"):
    mongoDBHandler = MongoDBHandler()
    mongoDBHandler.saveTweetsFromJSONRepository(repositoryPath)
def showTweetsSpaceDistribution(limit=300) :
    mongoDBHandler=MongoDBHandler()
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTweetsInSpaceDistribution(tweets)
def showTermOccurenceSignalByTerm(limit=300,term="#shopping", granularity=3600, dyadic=True) :
    mongoDBHandler=MongoDBHandler()
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTermApparitionInTime(tweets,term, granularity=granularity, dyadic=dyadic)
def showTermOccurenceSignalByTerm(limit=300,term="#shopping", granularity=3600, dyadic=True,mongoDBName='Twitter',mongoCollectionName="tweets") :
    mongoDBHandler=MongoDBHandler(database_name=mongoDBName,collection_name=mongoCollectionName)
    tweets=mongoDBHandler.getAllTweets(limit=limit)
    plotTermApparitionInTime(tweets,term, granularity=granularity, dyadic=dyadic)