Пример #1
0
 def generateStatsToDetermineFixedWindowLength():
     global maxLength
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         for noOfDays in Utilities.getClassifierLengthsByDay(currentDay, maxLength): 
             classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
             classifier.load()
             data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1}
             data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
             Utilities.writeAsJsonToFile(data, Settings.stats_to_determine_fixed_window_length)
         currentDay+=timedelta(days=1)
Пример #2
0
 def generateStatsForTopFeatures():
     global maxLength
     currentDay = Settings.startTime
     noOfDays = 1
     while currentDay<=Settings.endTime:
         classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDays)
         classifier.load()
         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram}
         data['features']=classifier.showMostInformativeFeatures(2000)
         Utilities.writeAsJsonToFile(data, Settings.stats_for_most_informative_features)
         currentDay+=timedelta(days=1)
Пример #3
0
 def generateStatsToCompareDifferentDocumentTypes():
     global maxLength, idealModelLength
     dataTypes = [DocumentType.typeRuuslUnigram, DocumentType.typeCharBigram, DocumentType.typeCharTrigram, DocumentType.typeRuuslBigram, DocumentType.typeRuuslTrigram, DocumentType.typeRuuslSparseBigram,
                  DocumentType.typeRuuslUnigramNouns, DocumentType.typeRuuslUnigramWithMeta, DocumentType.typeRuuslUnigramNounsWithMeta]
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength))))
         for noOfDays in noOfDaysList: 
             for dataType in dataTypes:
                 print currentDay, noOfDays, dataType
                 classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=noOfDays)
                 classifier.load()
                 data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'classifier_length': noOfDays, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': dataType, 'test_data_days': 1}
                 data['value'] = classifier.getAUCM(TestDocuments(currentTime=currentDay+timedelta(days=1), numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=1).iterator())
                 Utilities.writeAsJsonToFile(data, Settings.stats_to_compare_different_document_types)
         currentDay+=timedelta(days=1)
Пример #4
0
 def generateStatsForDiminishingAUCM():
     currentDay = datetime(2011, 3, 26)
     for i in range(5):
         print currentDay
         try:
             testDay = currentDay+timedelta(days=1)
             noOfDays = [1, 4, 8]
             for daysInFuture in range(1, 20):
                 for noOfDay in noOfDays:
                         classifier = FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=noOfDay)
                         classifier.load()
                         data = {'day': datetime.strftime(currentDay, Settings.twitter_api_time_format), 'test_day': datetime.strftime(testDay, Settings.twitter_api_time_format), 'classifier_length': noOfDay, 'metric': 'aucm', 'number_of_experts': Settings.numberOfExperts, 'data_type': DocumentType.typeRuuslUnigram, 'test_data_days': 1, 'no_of_days_in_future': daysInFuture}
                         data['value'] = classifier.getAUCM(TestDocuments(currentTime=testDay, numberOfExperts=Settings.numberOfExperts, dataType=DocumentType.typeRuuslUnigram, noOfDays=1).iterator())
                         Utilities.writeAsJsonToFile(data, Settings.stats_for_diminishing_aucm)
                 testDay+=timedelta(days=1)
         except: pass
         currentDay+=timedelta(days=1)
Пример #5
0
 def fixedWindowOfDifferentLengthsAndDataTypes():
     global maxLength, idealModelLength
     dataTypes = [DocumentType.typeRuuslUnigramNounsWithMeta]
     currentDay = Settings.startTime
     while currentDay<=Settings.endTime:
         noOfDaysList = list(set([idealModelLength]).intersection(set(Utilities.getClassifierLengthsByDay(currentDay, maxLength))))
         print currentDay, noOfDaysList
         for noOfDays in noOfDaysList: 
             for dataType in dataTypes: FixedWindowClassifier(currentTime=currentDay, numberOfExperts=Settings.numberOfExperts, dataType=dataType, noOfDays=noOfDays).trainAndSave()
         currentDay+=timedelta(days=1)