def Main(site,db_name, runDate, targetDate):
    mongodb = dh.ToMongoDB(*dh.AWS_MongoDB_Information())
    dbname = db_name
    useDb = dh.Use_Database(mongodb, dbname)
    slack = cb.Slacker(cb.slacktoken())
    slack.chat.post_message('# general', 'Start : {}, targetData : {} '.format(site, targetDate))
    startTime = datetime.now()
    newsDf, commentsDf = Main_Naver(targetDate)
    newsCollectionName = 'newsNaver2018'
    middleTime = datetime.now()
    runningTime = middleTime = middleTime - startTime
    print ('Start Uploading')
    useCollection_daum_news = dh.Use_Collection(useDb, newsCollectionName)
    useCollection_daum_news.insert_many(newsDf.to_dict('records'))
    useCollection_comment = dh.Use_Collection(useDb, 'comments2018')
    useCollection_comment.insert_many(commentsDf.to_dict('records'))
    print ('End Uploading')
    endTime = datetime.now()
    uploadTime = endTime - middleTime
    outcome_info = '{}, news : {}, comment : {}'.format(site, len(newsDf), len(commentsDf))
    date_info = 'run date : {}, target date : {}'.format(runDate.strftime('%Y%m%d'), targetDate)
    time_info = 'running time : {}, uploading time'.format(runningTime, uploadTime)
    slack.chat.post_message('# general', outcome_info)
    slack.chat.post_message('# general', date_info)
    slack.chat.post_message('# general', time_info)
    slack.chat.post_message('# general', 'Complete Upload In AWS Mongodb')
    mongodb.close()
示例#2
0
            element = WebDriverWait(driver, 3).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'tag_relate')))
            keywords = driver.find_elements_by_class_name('tag_relate')
            keywords = list(map(lambda x: x.text, keywords))
            keywords = list(map(lambda x: re.sub('#', '', x), keywords))
        driver.quit()
    return keywords


if __name__ == '__main__':
    site = 'Naver'
    collection = 'newsNaver'
    mongodb = dh.ToMongoDB(*dh.AWS_MongoDB_Information())
    dbname = 'hy_db'
    useDb = dh.Use_Database(mongodb, dbname)
    slack = cb.Slacker(cb.slacktoken())
    useCollection = dh.Use_Collection(useDb, collection)
    dataList = useCollection.find({'site': site})
    for data in dataList:
        if not 'keywords' in data.keys():
            keywords = SearchKeywordsFromDaumForNaver2(data['title'])
            useCollection.update({"_id": data['_id']},
                                 {'$set': {
                                     "keywords": keywords
                                 }})
            print(keywords)

        elif 'keywords' in data.keys() and data['keywords'] == 'NaN':
            keywords = SearchKeywordsFromDaumForNaver2(data['title'])
            useCollection.update({"_id": data['_id']},
                                 {'$set': {