import http.client import time ############# setting elasticsearch es_ip = "211.39.140.49" es_port = 9200 ############# setting tousflux sentiment_score = { "POSITIVE" : "긍정", "NEGATIVE" : "부정", "ETC" : "중립" } ############# setting logging logger = myLogger.getMyLogger('posneg', hasConsoleHandler=False, hasRotatingFileHandler=True, logLevel=logging.DEBUG) logger.info("ES Connection %s %d" % (es_ip, es_port) ) ############# setting slack slack = slackweb.Slack("https://hooks.slack.com/services/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") slackChannel = "#dmap_error_alert" slackUserName = "******" slackIconEmoji = ":ghost:" ############# setting search options MAX_TOUSFLUX_NUM=10 PAGE_SIZE=10 INDEX_EMOTIONS="documents-*" TYPE_DOC="doc"
''' Created on 2017. 6. 13. @author: Holly ''' import http.client as hc import logging import json from com.wisenut.enums.query import Query import traceback from com.wisenut import myLogger ############# setting logging logger = myLogger.getMyLogger('esclient', False, True, logging.DEBUG) ############# Elasticsearch 정보 세팅 es_ip = "211.39.140.96" es_port = 9201 es_conn = hc.HTTPConnection(es_ip, es_port, timeout=60) attr_dict = {"brand": "브랜드", "region": "지명", "person": "인명"} def clear_scroll(scroll_id): try: es_conn.request("DELETE", "/_search/scroll", json.dumps({"scroll_id": scroll_id}), {"Content-Type": "application/json"}) except OSError as oserror: ex = traceback.format_exc() logger.error("[clear_scroll] OS error : %s. Traceback >> %s " %
''' Created on 2017. 5. 30. @author: Holly ''' import pymysql import slackweb from com.wisenut.config import Config from com.wisenut import myLogger import logging ############# setting config conf = Config() ############# setting logging logger = myLogger.getMyLogger('mariadbclient', True, False, logging.DEBUG) ############# setting slack slack = slackweb.Slack( "https://hooks.slack.com/services/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" ) ############# DB 정보 세팅 mariadb_ip = conf.get_mariadb_ip() mariadb_port = conf.get_mariadb_port() mariadb_user = conf.get_mariadb_user() mariadb_password = conf.get_mariadb_password() mariadb_db = conf.get_mariadb_db() mariadb_charset = conf.get_mariadb_charset()
''' Created on 2017. 5. 30. @author: Holly ''' # -*- coding : utf-8 -*- import logging import sys, os import zipfile from com.wisenut.utils import file_util from com.wisenut.config import Config from com.wisenut import myLogger ############# setting logging logger = myLogger.getMyLogger('excel_downloader', True, False, logging.DEBUG) # zip file directory conf = Config() BASE_EXCEL_DIRECTORY = conf.get_report_home() if __name__ == '__main__': logger.info("excel downloader starts.") if len(sys.argv) < 4: print("[ Usage ]") print( "\texcel_downloader <target_file_path> <save_file_path> <save_file_name>" ) print("") exit
# -*- coding: utf-8 -*- ''' Created on 2017. 6. 13. @author: Holly ''' import http.client as hc import logging import json, re import com.wisenut.dao.mariadbclient as mariadb from com.wisenut.enums.query import Query import traceback from com.wisenut import myLogger ############# logger 세팅 logger = myLogger.getMyLogger("esclient", False, True, logging.DEBUG) ############# Elasticsearch 정보 세팅 #es_ip = "ec2-13-124-161-198.ap-northeast-2.compute.amazonaws.com" es_ip = "211.39.140.96" es_port = 9200 es_conn = hc.HTTPConnection(es_ip, es_port, timeout=300) attr_dict = {"brand": "브랜드", "region": "지명", "person": "인명"} class EsRejectedExecutionException(Exception): pass def clear_scroll(scroll_id):
#for e in arr: m.update(repr(arr).encode('utf-8')) return m.hexdigest() if __name__ == '__main__': mode = sys.argv[1] start_date = sys.argv[2] end_date = sys.argv[3] project_seqs = mariadbclient.get_all_projectseqs_of( 'kdic') # DB에서 kdic에 해당하는 project를 전체 가져와야함. ############# setting logging logger = myLogger.getMyLogger('kdic-topics-' + mode, hasConsoleHandler=False, hasRotatingFileHandler=True, logLevel=logging.DEBUG) logger.info( "=================================================================") logger.info("- ES Connection %s %d" % (es_ip, es_port)) logger.info("- mode\t\t:\t%s" % mode) logger.info("- project_seqs\t:\t%s" % ','.join(str(seq[0]) for seq in project_seqs)) logger.info("- start_date\t:\t%s" % start_date) logger.info("- end_date\t:\t%s" % end_date) logger.info( "=================================================================") #for project_seq in project_seqs.split(","): #logger.info(">>>>> project_seq %s" % project_seq)
# -*- coding: utf-8 -*- ''' Created on 2017. 6. 13. @author: Holly ''' import http.client as hc import logging import json, re import com.wisenut.dao.mariadbclient as mariadb from com.wisenut.enums.query import Query import traceback from com.wisenut import myLogger ############# logger 세팅 logger = myLogger.getMyLogger("esclient", False, True, logging.DEBUG) ############# Elasticsearch 정보 세팅 #es_ip = "ec2-13-124-161-198.ap-northeast-2.compute.amazonaws.com" es_ip="211.39.140.96" es_port = 9200 es_conn = hc.HTTPConnection(es_ip, es_port, timeout=300) attr_dict = { "brand" : "브랜드", "region" : "지명", "person" : "인명" }
class Report: seq = -1 reg_dt = "" report_type = "" project_name = "" channel = "" start_date = "" end_date = "" dataset_names = "" compare = False queryObj = None file_name = "" file_path = "" #BASE_EXCEL_DIRECTORY='/data/dmap-data/dmap-excel' conf = Config() BASE_EXCEL_DIRECTORY=conf.get_report_home() DOCUMENTS_FIELDS = [ 'doc_datetime','doc_writer','doc_url','doc_title','doc_content','depth1_nm','depth2_nm','depth3_nm'] DOCUMENTS_FIELDS_KOREAN = [ '게시일','작성자','URL','제목','내용','채널1','채널2','채널3'] #EMOTIONS_FIELDS = [ 'conceptlevel1', 'conceptlevel2', 'conceptlevel3', 'emotion_type', 'matched_text.string', 'depth1_nm', 'depth2_nm', 'depth3_nm', 'doc_datetime', 'doc_writer', 'doc_url', 'doc_title'] EMOTIONS_FIELDS = [ 'conceptlevel1', 'conceptlevel2', 'conceptlevel3', 'emotion_type', 'matched_text.string'] #EMOTIONS_FIELDS_KOREAN = [ '대분류', '중분류', '소분류', '감성', '분석문장', '채널1', '채널2', '채널3', '게시일', '작성자', 'URL', '제목'] EMOTIONS_FIELDS_KOREAN = [ '대분류', '중분류', '소분류', '감성', '분석문장'] HEADER_FORMAT = { 'bold' : True, 'font_size' : 9, 'bg_color' : '#F2F2F2', 'align' : 'center', 'border' : 1 } DEFAULT_FORMAT = { 'font_size' : 9, 'border' : 1 } logger = myLogger.getMyLogger("report", False, True, logging.DEBUG) def __init__(self, params): self.compare = True if params['compare_yn']=='Y' else False self.start_date = re.sub("[-:\s]", "", params['start_date'])[:8] self.end_date = re.sub("[-:\s]", "", params['end_date'])[:8] self.seq = params['seq'] self.reg_dt = re.sub("[-:\s]", "", params['reg_dt']) self.report_type = db.get_exceltype_name(params['type_cd']) # RSP -> 리포트_소셜모니터링_추이분석 self.project_name = db.get_project_name(params['project_seq']) self.channel = '전체' if not params['channels'] or params['channels']=='all' else "채널일부" self.dataset_names = ",".join([db.get_dataset_name(x) if db.get_dataset_name(x)!=None else 'unknown' for x in params['datasets'].split("^")]) if params['datasets'] else '' # 6^7^15 -> 신라면,안성탕면,짜파게티 if os.name == 'nt' and bool(re.match("[\/\\\"*?<>\|]", self.dataset_names)): self.dataset_names = re.sub("[\/\\\"*?<>\|]", "_", self.dataset_names) self.queryObj = Query() compare_yn = "동일기간비교" if params['compare_yn']=='Y' else "해당기간" if not params['datasets']: # 검색트렌드 self.file_name = "_".join([str(self.seq), self.report_type, self.start_date, self.end_date, compare_yn]) + ".xlsx" else: # 소셜모니터링 if len(params['datasets'].split("^"))>1: self.file_name = "_".join([str(self.seq), self.report_type, self.channel, self.start_date, self.end_date, compare_yn]) + ".xlsx" else: self.file_name = "_".join([str(self.seq), self.report_type+"("+self.dataset_names+")", self.channel, self.start_date, self.end_date, compare_yn]) + ".xlsx" self.logger.info("=======================================================================================") for k, v in params.items(): self.logger.info(k + " :\t\t" + str(v)) self.logger.info("=======================================================================================") def get_file_name(self): return self.file_name def create_file_path(self): self.file_path = os.path.join(self.BASE_EXCEL_DIRECTORY, self.reg_dt) return file_util.search_create_directory( self.file_path ) # 표지 def cover_page(self, params): worksheet = self.workbook.add_worksheet('표지') worksheet.write(0, 0, '프로젝트명', self.header) worksheet.write(1, 0, '분석메뉴', self.header) worksheet.write(2, 0, '데이터셋', self.header) worksheet.write(3, 0, '기간(당기)', self.header) worksheet.write(4, 0, '채널', self.header) worksheet.write(0, 1, self.project_name, self.default) worksheet.write(1, 1, self.report_type, self.default) worksheet.write(2, 1, self.dataset_names, self.default) if params['compare_yn']=='Y': arr_date = [] # 기준날짜 start_date = date(int(params['start_date'][0:4]), int(params['start_date'][5:7]), int(params['start_date'][8:10])) end_date = date(int(params['end_date'][0:4]), int(params['end_date'][5:7]), int(params['end_date'][8:10])) for i in range(4): time_interval = end_date-start_date # 비교 날짜들(1time_interval before) this_end_date = end_date - (time_interval+timedelta(days=1))*i # 곱해진 간격만큼 이전 날짜를 구함 arr_date.append("%s ~ %s"%((this_end_date-time_interval).strftime('%Y.%m.%d(%a)'), this_end_date.strftime('%Y.%m.%d(%a)'))) worksheet.write(3, 1, ", ".join(arr_date), self.default) else: worksheet.write(3, 1, "~".join([self.start_date, self.end_date]), self.default) if self.channel=="채널일부" and len(params['channels'].split(";"))>1: self.channel += "(" for c in params['channels'].split(";"): channel_info = db.get_channel_name(Channel.DEPTH1.value, c.split("^")[0]) if channel_info: self.channel += channel_info[0] + "," self.channel = re.sub(",$", "", self.channel) self.channel += ")" worksheet.write(4, 1, self.channel, self.default)
# -*- coding : utf-8 -*- ''' Created on 2017. 5. 30. @author: Holly ''' import com.wisenut.dao.mariadbclient as db from com.wisenut.reports.report_emotions import ReportEmotions from com.wisenut.reports.report_stats import ReportStatistics from com.wisenut.reports.report_count import ReportCount from com.wisenut.reports.report_trend import ReportTrend import traceback import logging from com.wisenut import myLogger logger = myLogger.getMyLogger("excel_maker", False, True, logging.DEBUG) if __name__ == '__main__': #1. 엑셀 다운로드 요청 목록을 테이블에서 가져옴. for req in db.get_excel_request(): logger.debug(req) #2. 리포트 타입별로 엑셀 꾸미기 if req['type_cd']=='RSS': # 수집문서통계 report = ReportStatistics(req) elif req['type_cd']=='RSE': # 감성분석 report = ReportEmotions(req) elif req['type_cd']=='RTC': # 검색트렌드 - 조회수 report = ReportCount(req) elif req['type_cd']=='RTT': # 검색트렌드 - 트렌드 report = ReportTrend(req)
import math from datetime import datetime as dt from com.wisenut.enums.query import Query from datetime import timedelta from com.wisenut.config import Config import sys import xlsxwriter import copy import pymysql import time import socket from com.wisenut import myLogger import logging ############# setting logging logger = myLogger.getMyLogger('kdic-report-maker', False, True, logging.DEBUG) MODE_DOCUMENTS='documents' MODE_TOPICS='topics' MODE_EMOTIONS='emotions' MODE_TREND='trend' INDEX_DOCUMENTS="documents-*" INDEX_TOPICS="topics-*" INDEX_EMOTIONS="emotions-*" RETRY_TIMES=5 SLEEP_FOR_WHEN_RETRY=30 # seconds class ReportKDICDocuments: mode = ""
import aiohttp import urllib3 import traceback import math PAGE_SIZE=1000 # =========== elasticsearch =========== es_ip = "211.39.140.96" es_port = 9201 INDEX_TOPICS="topics" INDEX_DOCUMENTS="documents" TYPE_DOC="doc" # =========== Logger =========== logger = myLogger.getMyLogger('related_word_sticker', hasConsoleHandler=False, hasRotatingFileHandler=True, logLevel=logging.DEBUG) class EsError(Exception): pass class NoMecabAvailable(Exception): pass async def isNoun(text, debug=False): from subprocess import run, PIPE
# -*- coding : utf-8 -*- ''' Created on 2017. 5. 30. @author: Holly ''' import com.wisenut.dao.mariadbclient as db from com.wisenut.reports.report_emotions import ReportEmotions from com.wisenut.reports.report_stats import ReportStatistics from com.wisenut.reports.report_count import ReportCount from com.wisenut.reports.report_trend import ReportTrend import traceback import logging from com.wisenut import myLogger logger = myLogger.getMyLogger("excel_maker", False, True, logging.DEBUG) if __name__ == '__main__': #1. 엑셀 다운로드 요청 목록을 테이블에서 가져옴. for req in db.get_excel_request(): logger.debug(req) #2. 리포트 타입별로 엑셀 꾸미기 if req['type_cd'] == 'RSS': # 수집문서통계 report = ReportStatistics(req) elif req['type_cd'] == 'RSE': # 감성분석 report = ReportEmotions(req) elif req['type_cd'] == 'RTC': # 검색트렌드 - 조회수 report = ReportCount(req) elif req['type_cd'] == 'RTT': # 검색트렌드 - 트렌드 report = ReportTrend(req)
# -*- coding: utf-8 -*- ''' Created on 2017. 5. 30. @author: Holly ''' import pymysql import re import logging from com.wisenut import myLogger ############# logger 세팅 logger = myLogger.getMyLogger("mariadbclient", False, True, logging.DEBUG) ############# DB 정보 세팅 mariadb_ip = "211.39.140.249" mariadb_port = 3306 mariadb_user = "******" mariadb_password = "******" mariadb_db = "dmap_base" mariadb_charset = "utf8" ''' 조회수 쿼리 ''' def get_data_for_report_count(type_cd, trend_grp_seq, start_date, end_date, trend_dataset_seq=0,
# -*- coding: utf-8 -*- ''' Created on 2017. 5. 30. @author: Holly ''' import pymysql import re import logging from com.wisenut import myLogger ############# logger 세팅 logger = myLogger.getMyLogger("mariadbclient", False, True, logging.DEBUG) ############# DB 정보 세팅 mariadb_ip="211.39.140.249" mariadb_port=3306 mariadb_user="******" mariadb_password="******" mariadb_db="dmap_base" mariadb_charset="utf8" ''' 조회수 쿼리 ''' def get_data_for_report_count(type_cd, trend_grp_seq, start_date, end_date, trend_dataset_seq=0, trend_keyword_seq=0): conn = pymysql.connect(host=mariadb_ip, port=mariadb_port, user=mariadb_user, password=mariadb_password, db=mariadb_db, charset=mariadb_charset, connect_timeout=60) curs = conn.cursor() sql = ''' SELECT TG.name AS trend_grp
m = hashlib.md5() #for e in arr: m.update(repr(arr).encode('utf-8')) return m.hexdigest() if __name__ == '__main__': process_name = sys.argv[1] project_seqs = sys.argv[2] # only one project_seq per emotional analysis start_date = sys.argv[3] end_date = sys.argv[4] ############# setting logging logger = myLogger.getMyLogger(process_name, hasConsoleHandler=False, hasRotatingFileHandler=True, logLevel=logging.DEBUG) logger.info( "=================================================================") logger.info("- ES Connection %s %d" % (es_ip, es_port)) logger.info("- process_name\t:\t%s" % process_name) logger.info("- project_seqs\t:\t%s" % project_seqs) logger.info("- start_date\t:\t%s" % start_date) logger.info("- end_date\t:\t%s" % end_date) logger.info( "=================================================================") for project_seq in project_seqs.split(","): logger.info( "================================================================="