예제 #1
0
def printSuggRankedDocs(smoothness, usedTestFiles):
    result = False
    try:
        testScore = smoothness.sum(axis=1)
        testMapping = {}
        for files in range(len(usedTestFiles)):
            testMapping[usedTestFiles[files]] = testScore[files]
        sorted_x = sorted(testMapping.items(),
                          key=operator.itemgetter(1),
                          reverse=True)
        todayDateFolder = util.getTodayDateFolder()
        write_directory = os.path.join(Constants.ROOT_FOLDER,
                                       Constants.RECOMMENDATION_DIR,
                                       Constants.ENGINE_DIR, todayDateFolder,
                                       Constants.SUGG_GOOGLENEWS)
        if not os.path.exists(write_directory):
            os.makedirs(write_directory)
        outfile = open(
            os.path.join(write_directory, Constants.SMOOTHNESS_FILE), 'w')
        json_write = {}
        count = 1
        for (key, val) in sorted_x:
            json_write[key] = count
            count = count + 1
        json.dump(json_write, outfile)
        outfile.close()
        result = True
    except Exception, e:
        util.logger.eror(
            "Exception at printing Smoothness GoogleSugg docs for data : %s" %
            write_directory)
예제 #2
0
파일: Relevance.py 프로젝트: weberna/SuPrIA
def printSuggRankedDocs(testMapping):
    result = False
    try:
        sorted_x = sorted(testMapping.items(),
                          key=operator.itemgetter(1),
                          reverse=True)
        todayDateFolder = util.getTodayDateFolder()
        write_directory = os.path.join(Constants.ROOT_FOLDER,
                                       Constants.RECOMMENDATION_DIR,
                                       Constants.ENGINE_DIR, todayDateFolder,
                                       Constants.SUGG_GOOGLENEWS)
        if not os.path.exists(write_directory):
            os.makedirs(write_directory)
        outfile = open(os.path.join(write_directory, Constants.RELEVANCE_FILE),
                       'w')
        json_write = {}
        count = 1
        for (key, val) in sorted_x:
            json_write[key] = count
            count = count + 1
        json.dump(json_write, outfile)
        outfile.close()
        result = True
    except Exception, e:
        util.logger.error(
            "Exception at printing Relevance docs for GoogleSugg : %s" %
            write_directory)
예제 #3
0
def RemoveBoiler():

    todayDate = util.getTodayDateFolder()
    lastSuggNewsBoiled = util.loadSettings(
        Constants.LAST_BOILER_SUGGGOOGLENEWS)
    lastNewsBoiled = util.loadSettings(Constants.LAST_BOILER_GOOGLENEWS)
    lastDataBoiled = util.loadSettings(Constants.LAST_BOILER_DATA_DIR)

    if lastNewsBoiled:
        util.logger.info("Google news last boiled for =" + lastNewsBoiled)
    else:
        util.logger.info("Google news last boiled for None")

    if lastSuggNewsBoiled:
        util.logger.info("Sugg Google news last boiled for =" +
                         lastSuggNewsBoiled)
    else:
        util.logger.info("Sugg Google news last boiled for None")

    if lastDataBoiled:
        util.logger.info("data last boiled for =" + lastDataBoiled)
    else:
        util.logger.info("data last boiled for = None")

    boilerNewsStatus = True
    boilerSuggNewsStatus = True
    boilerDataStatus = True

    #Check whether today links have been extracted or not
    if lastNewsBoiled != todayDate:
        boilerNewsStatus = BoilerNews(todayDate)
    else:
        util.logger.info("Boiler news already done for today :" + todayDate)

    #Check whether today sugg links have been extracted or not
    if lastSuggNewsBoiled != todayDate:
        boilerSuggNewsStatus = BoilerSuggNews(todayDate)
    else:
        util.logger.info("Sugg Boiler news already done for today :" +
                         todayDate)

    trainingFolders = util.findTrainingDays()
    anyTrainingFolderBoiled = True
    trainingDoneForDays = 0
    if lastDataBoiled == trainingFolders[0]:
        util.logger.info("Boiler data already done for today :" +
                         lastDataBoiled)
    else:
        anyTrainingFolderBoiled = False
        folderIndex = 0
        if lastDataBoiled != None:
            try:
                folderIndex = trainingFolders.index(lastDataBoiled)
                anyTrainingFolderBoiled = True
            except Exception, e:
                folderIndex = Constants.MAX_PREVIOUS_DAYS
        else:
예제 #4
0
def printRecommendedDocs(recDocs, downloadDate):
    jsonData = readLinksJson(downloadDate)
    if jsonData is None:
        return False

    result = False
    jsonData['suggestGoogle'][Constants.BASELINE] = []
    recommInfo = {}

    googleLinks = jsonData['suggestGoogle'][Constants.GOOGLE]
    for linkObj in googleLinks:
        download = linkObj['download']
        htmlFile = linkObj['id']
        if download == "yes":
            recommInfo[htmlFile] = linkObj
    try:
        sorted_x = sorted(recDocs.items(), key=operator.itemgetter(1))
        todayDateFolder = util.getTodayDateFolder()
        write_directory = os.path.join(Constants.ROOT_FOLDER,
                                       Constants.FINAL_DIR, todayDateFolder)
        if not os.path.exists(write_directory):
            os.makedirs(write_directory)
        outfile = open(os.path.join(write_directory, Constants.ULTIMATE_FILE),
                       'w')

        feedback_directory = os.path.join(Constants.ROOT_FOLDER,
                                          Constants.FEEDBACK_DIR,
                                          todayDateFolder)
        if not os.path.exists(feedback_directory):
            os.makedirs(feedback_directory)
        feedbackfile = open(
            os.path.join(feedback_directory, Constants.ULTIMATE_FILE), 'w')

        json_write = {}
        count = 1
        for (key, val) in sorted_x:
            if key in recommInfo:
                linkObj = recommInfo[key]
                linkObj['rank'] = -1
                jsonData['suggestGoogle'][Constants.BASELINE].append(linkObj)
                count = count + 1
                if count >= Constants.RECOMMENDED_LINKS:
                    break
            else:
                util.logger.error(
                    "Key not found in baseline suggestgoogle news = " + key)
        json.dump(jsonData, outfile)
        outfile.close()
        json.dump(jsonData, feedbackfile)
        feedbackfile.close()
        result = True
    except Exception, e:
        util.logger.error(
            "Exception at writing final GoogleSugg Recommendation docs for data : %s"
            % write_directory)
예제 #5
0
def RecommendationMetric():
     todayDateFolder = util.getTodayDateFolder()
     lastRecommended= util.loadSettings(Constants.LAST_RECOMMENDATION_DONE)
     if todayDateFolder == lastRecommended:
         return True
     try:
         relevance_json = {}
                  
         #Get Relevance json
         readRelevanceDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.RELEVANCE_FILE)
         if os.path.isfile(readRelevanceDir) is True:
             with open(readRelevanceDir) as json_data:
                json_text = json_data.read()
                relevance_json = json.loads(json_text)
                json_data.close()
            
        #Get Smoothness json
         smoothness_json = {}
         readSmoothnessDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.SMOOTHNESS_FILE)
         if os.path.isfile(readSmoothnessDir) is True:
             with open(readSmoothnessDir) as json_data:
                json_text = json_data.read()
                smoothness_json = json.loads(json_text)
                json_data.close()
            
        #Get Clarity json
         clarity_json = {}
         
         readClarityDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.CLARITY_FILE)
         if os.path.isfile(readClarityDir) is True:
             with open(readClarityDir) as json_data:
                json_text = json_data.read()
                clarity_json = json.loads(json_text)
                json_data.close()
                
        #Lets not apply any linear weight formula for now
         cou = Counter()
         cou.update(relevance_json)
         cou.update(smoothness_json)
         cou.update(clarity_json)
         
         #Convert above back to a dictionary
         final_json = dict(cou)
         result = printRecommendedDocs(final_json, todayDateFolder)
         if result == True:
             util.saveSettings(Constants.LAST_RECOMMENDATION_DONE, todayDateFolder)
             util.logger.info("Recommended links done for ="+ todayDateFolder)
         pass
     except Exception, e:
            util.logger.error( "Exception at recommending links for : %s Exception = %s" 
                               % (todayDateFolder,traceback.print_exc))
예제 #6
0
def RecommendationMetric():
     todayDateFolder = util.getTodayDateFolder()
     lastRecommended= util.loadSettings(Constants.LAST_RECOMMENDATION_DONE)
     if todayDateFolder == lastRecommended:
         return True
     try:
         relevance_json = {}
                  
         #Get Relevance json
         readRelevanceDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.GOOGLENEWS,Constants.RELEVANCE_FILE)
         if os.path.isfile(readRelevanceDir) is True:
             with open(readRelevanceDir) as json_data:
                json_text = json_data.read()
                relevance_json = json.loads(json_text)
                json_data.close()
            
        #Get Smoothness json
         smoothness_json = {}
         readSmoothnessDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.GOOGLENEWS,Constants.SMOOTHNESS_FILE)
         if os.path.isfile(readSmoothnessDir) is True:
             with open(readSmoothnessDir) as json_data:
                json_text = json_data.read()
                smoothness_json = json.loads(json_text)
                json_data.close()
            
        #Get Clarity json
         clarity_json = {}
         
         readClarityDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.GOOGLENEWS,Constants.CLARITY_FILE)
         if os.path.isfile(readClarityDir) is True:
             with open(readClarityDir) as json_data:
                json_text = json_data.read()
                clarity_json = json.loads(json_text)
                json_data.close()
                
        #Lets not apply any linear weight formula for now
         cou = Counter()
         cou.update(relevance_json)
         cou.update(smoothness_json)
         cou.update(clarity_json)
         
         #Convert above back to a dictionary
         final_json = dict(cou)
         result = printRecommendedDocs(final_json, todayDateFolder)
         if result == True:
             util.saveSettings(Constants.LAST_RECOMMENDATION_DONE, todayDateFolder)
             util.logger.info("Recommended Google links done for ="+ todayDateFolder)
         pass
     except Exception, e:
            util.logger.error( "Exception at recommending google links for : %s Exception = %s" 
                               % (todayDateFolder,traceback.print_exc))
예제 #7
0
def ConnectionClarity():
    todayDate = util.getTodayDateFolder()
    lastClarityDate = util.loadSettings(Constants.LAST_CLARITY_DIR)
    if todayDate == lastClarityDate :
        util.logger.info("Clarity signal done for today =" + todayDate)
        return True
    trainFiles = util.findTrainingFiles()
    testFiles = util.findTestFiles()
    trainCorpus, usedTrainFiles = util.findCorpus(trainFiles)
    testCorpus, usedTestFiles = util.findCorpus(testFiles)   
    clarityobj = Clarity(trainCorpus,testCorpus)
    clarityScore = clarityobj.ClarityScore()
    ret = printRankedDocs(clarityScore, usedTestFiles)
    if ret == True:
        util.saveSettings(Constants.LAST_CLARITY_DIR, todayDate)
        util.logger.info("Clarity info just completed for ="+todayDate)
    return ret
예제 #8
0
파일: NER.py 프로젝트: shashankneo/projects
def printRankedDocs(smoothness, usedTestFiles):
    
    testScore = smoothness.sum(axis=1)
    testMapping = {}
    for files in range(len(usedTestFiles)):
        testMapping[usedTestFiles[files]] = testScore[files] 
    sorted_x = sorted(testMapping.items(), key=operator.itemgetter(1), reverse=True)
    todayDateFolder = util.getTodayDateFolder()
    write_directory = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR,todayDateFolder)
    if not os.path.exists(write_directory):
            os.makedirs(write_directory)
    outfile = open(os.path.join(write_directory,Constants.SMOOTHNESS_FILE), 'w')
    json_write = {}
    count = 1
    for (key,val) in sorted_x:
        json_write[key] = count
        count = count + 1
    json.dump(json_write, outfile)
    outfile.close()
예제 #9
0
def printRankedDocs(testMapping):
    result = False
    try:
    sorted_x = sorted(testMapping.items(), key=operator.itemgetter(1), reverse=True)
    todayDateFolder = util.getTodayDateFolder()
    write_directory = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR,todayDateFolder)
    if not os.path.exists(write_directory):
            os.makedirs(write_directory)
    outfile = open(os.path.join(write_directory,Constants.RELEVANCE_FILE), 'w')
    json_write = {}
    count = 1
    for (key,val) in sorted_x:
        json_write[key] = count
        count = count + 1
    json.dump(json_write, outfile)
    outfile.close()
        result = True
    except Exception, e:
        util.logger.error("Exception at printing Relevance docs for data : %s" % write_directory)
예제 #10
0
def GoogleNews():
    downloadedLinks = []
    todayDate = util.getTodayDateFolder()
    lastNewsDownloaded = util.loadSettings(Constants.LAST_GOOGLENEWS_DOWNLOAD)
    lastLinksDownloaded = util.loadSettings(Constants.LAST_GOOGLELINKS_DOWNLOAD)
    
    googleLinksStatus = True
    googleNewsStatus = True
    #Check whether today links have been extracted or not
    if lastLinksDownloaded != todayDate:
        googleLinksStatus = getGoogleLinks(todayDate)
    else:
        util.logger.info("Google links downloaded successfully for = "+todayDate)
                     
    #Check whether today news has been extracted or not
    if todayDate != lastNewsDownloaded:
      googleNewsStatus =  downloadGoogleNews(todayDate)
    else:
        util.logger.info("Google news already downloaded successfully for = "+ todayDate)
    
    return googleLinksStatus & googleNewsStatus
    
예제 #11
0
def Relevance():
    todayDate = util.getTodayDateFolder()
    lastRelevanceDate = util.loadSettings(Constants.LAST_RELEVANCE_DIR)
    if todayDate == lastRelevanceDate :
        util.logger.info("Relevance signal already done for today :" + todayDate)
        return True
    trainFiles = util.findTrainingFiles()
    testFiles = util.findTestFiles()
    trainCorpus, usedTrainFiles = util.findCorpus(trainFiles)
    testCorpus, usedTestFiles = util.findCorpus(testFiles)   
    all_tokens = sum(trainCorpus, [])
    tokens_once = set(word for word in set(all_tokens) if all_tokens.count(word) == 1)
    texts = [[word for word in text if word not in tokens_once]
         for text in trainCorpus]
    pass
    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    tfidf = models.TfidfModel(corpus=corpus, id2word=dictionary,normalize=True)
    index = similarities.SparseMatrixSimilarity(tfidf[corpus],num_features=len(dictionary))
    count = 0
    testJson = {}
    for text in testCorpus:
        vec=dictionary.doc2bow(text)
        sims = index[tfidf[vec]]
        score = sum(sims)
        #print(list(enumerate(sims))) 
        testJson[usedTestFiles[count]] = score
        count = count + 1
    ret = printRankedDocs(testJson)
    if ret == True:
        util.saveSettings(Constants.LAST_RELEVANCE_DIR, todayDate)
        util.logger.info("Relevance info just completed for ="+todayDate)
    return ret




            
예제 #12
0
def printRankedDocs(clarityScore, usedTestFiles):
    result = False
    try:
    testScore = clarityScore.sum(axis=1)
    testMapping = {}
    for files in range(len(usedTestFiles)):
        testMapping[usedTestFiles[files]] = testScore[files] 
    sorted_x = sorted(testMapping.items(), key=operator.itemgetter(1), reverse=True)
    todayDateFolder = util.getTodayDateFolder()
    write_directory = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR,todayDateFolder)
    if not os.path.exists(write_directory):
            os.makedirs(write_directory)
        outfile = open(os.path.join(write_directory,Constants.CLARITY_FILE), 'w')
    json_write = {}
    count = 1
    for (key,val) in sorted_x:
        json_write[key] = count
        count = count + 1
    json.dump(json_write, outfile)
    outfile.close()
        result = True
    except Exception, e:
        util.logger.error("Exception at printing Clarity docs for data : %s" % write_directory)
예제 #13
0
def RemoveBoiler():
    
     todayDate = util.getTodayDateFolder()
     lastNewsBoiled = util.loadSettings(Constants.LAST_BOILER_GOOGLENEWS)
     lastDataBoiled = util.loadSettings(Constants.LAST_BOILER_DATA_DIR)
     
     boilerNewsStatus = True
     boilerDataStatus = True
     
     #Check whether today links have been extracted or not
#      if lastNewsBoiled != todayDate:
#         boilerNewsStatus = BoilerNews(todayDate)
#      else:
#          util.logger.info("Boiler news already done for today :" + todayDate)
#                      
     trainingFolders = util.findTrainingDays()  
     anyTrainingFolderBoiled = True
     if lastDataBoiled == trainingFolders[0]:
         util.logger.info("Boiler data already done for today :" + lastDataBoiled)
     else:
         anyTrainingFolderBoiled = False
         folderIndex = 0
         if lastDataBoiled != None:
             folderIndex = trainingFolders.index(lastDataBoiled) 
         else:
             folderIndex = Constants.TRAINING_DAY
         if folderIndex < 0:
             folderIndex = 0
             util.logger.info("Boiler data for none of the last %d days have been downloaded" % Constants.TRAINING_DAY)
         for folder in range(folderIndex - 1, -1, -1):                   
                boilerDataStatus = BoilerData(trainingFolders[folder])
                if boilerDataStatus == False:
                    util.logger.error("Boiler data not done for today :" + trainingFolders[folder])
                else:
                    anyTrainingFolderBoiled = True
    
     return boilerNewsStatus & anyTrainingFolderBoiled
예제 #14
0
def printRecommendedDocs(recDocs, downloadDate):
    jsonData = readLinksJson(downloadDate)
    if jsonData is None:
        return False
    
    result = False
    final_json = {}
    algo1List = []
    try:
        sorted_x = sorted(recDocs.items(), key=operator.itemgetter(1))
        todayDateFolder = util.getTodayDateFolder()
        write_directory = os.path.join(Constants.ROOT_FOLDER,Constants.FINAL_DIR,todayDateFolder)
        if not os.path.exists(write_directory):
                os.makedirs(write_directory)
        outfile = open(os.path.join(write_directory,Constants.ULTIMATE_FILE), 'w')
        json_write = {}
        count = 0
        for (key,val) in sorted_x:
            if key not in jsonData:
                continue
            linkObj = {}
            linkObj['url'] = jsonData[key]['url']
            linkObj['content'] = jsonData[key]['content']
            linkObj['title'] = jsonData[key]['title']
            linkObj['id'] = key
            linkObj['use']  = 'NA'
            algo1List.append(linkObj)
            count = count + 1
            if count >= Constants.RECOMMENDED_LINKS:
                break
        final_json['algo1'] = algo1List
        json.dump(final_json, outfile)
        outfile.close()
        result = True
    except Exception, e:
        util.logger.error("Exception at writing final Recommendation docs for data : %s" % write_directory)
예제 #15
0
def GoogleNews():
    downloadedLinks = []
    todayDate = util.getTodayDateFolder()
    lastNewsDownloaded = util.loadSettings(Constants.LAST_GOOGLENEWS_DOWNLOAD)
    lastLinksDownloaded = util.loadSettings(
        Constants.LAST_GOOGLELINKS_DOWNLOAD)

    googleLinksStatus = True
    googleNewsStatus = True
    #Check whether today links have been extracted or not
    if lastLinksDownloaded != todayDate:
        googleLinksStatus = getGoogleLinks(todayDate)
    else:
        util.logger.info("Google links downloaded successfully for = " +
                         todayDate)

    #Check whether today news has been extracted or not
    if todayDate != lastNewsDownloaded:
        googleNewsStatus = downloadGoogleNews(todayDate)
    else:
        util.logger.info("Google news already downloaded successfully for = " +
                         todayDate)

    return googleLinksStatus & googleNewsStatus