def messageReceived(destination,message,headers): global es records=0 starttime = time.time() logger.info("==> "*10) logger.info("Message Received %s" % destination) logger.info(headers) time.sleep(3) now = datetime.now() local_timezone = tzlocal.get_localzone() obj = json.loads(message) start_ts = obj['start_ts'] #end_ts = obj['end_ts'] end_ts = int(datetime.timestamp(now)+7200)*1000 query = {"query": {"bool": {"must": [{"query_string": {"query": "-equipment:obw AND lot:5", "analyze_wildcard": True}}, { "range": { "@timestamp": { "gte": start_ts, "lte": end_ts, "format": "epoch_millis" } } }]}}} logger.info(query) df_ret = es_search_with_scroll( es, "biac_availability*", "doc", query, 10000, '2m') df_from_es = df_ret.copy() df_from_es['month'] = pd.to_datetime(df_from_es['@timestamp'], unit='ms') df_from_es['month'] = df_from_es['month'].apply( lambda x: x.strftime("%Y-%m")) dfs = [] dfkpi1 = getKPI('kpi1', ['gth_sorter_a','gth_sorter_b'], df_from_es) dfs.append(dfkpi1) dfkpi2 = getKPI('kpi2', ['gth_oog'], df_from_es) dfs.append(dfkpi2) dfkpi3a = getKPI('kpi3a', ['gth_reclaim'], df_from_es) dfs.append(dfkpi3a) dfkpi3b = getKPI('kpi3b', ['gth_transfer_inbound', 'gtha inbound infeed area'], df_from_es) dfs.append(dfkpi3b) kpi4 = getKPI('kpi4', ['gth_belt_island'], df_from_es) dfs.append(kpi4) kpi5 = getKPI('kpi5', ['gth_checkin_rows'], df_from_es) dfs.append(kpi5) for df in dfs: es_helper.dataframe_to_elastic(es, df) logger.info("<== "*10)
def create_default_records_2(es): """ Create default record (01JAN2019) 0/0 100% in biac_month_2_kizeo. Parameters ---------- es elasticsearch connection """ logger.info('^¨' * 80) logger.info('create_default_records_2') kpi = [ '201', '202', '203', '204', '205', '207', '208', '209', '210', '211', '213', '216', '217', '303' ] lot = ['1', '2', '3', '4'] technic = [ 'acces', 'fire fighting', 'cradle', 'dnb', 'sanitair', 'hvac pa', 'elektriciteit', 'hvac pb' ] df_kpi = pd.DataFrame({'kpi': kpi, 'merge': 1}) df_lot = pd.DataFrame({'lot': lot, 'merge': 1}) df_tec = pd.DataFrame({'technic': technic, 'merge': 1}) df_all = df_tec.merge(df_lot, how='outer') df_all = df_all.merge(df_kpi, how='outer') del df_all['merge'] df_all['screen_name'] = df_all.apply(extract_screen_name, axis=1) df_all['contract'] = df_all.apply(extract_contract, axis=1) df_all['check_conform'] = 0 df_all['check_no_conform'] = 0 df_all['check_number'] = 0 df_all['month'] = '2019-01' df_all['percentage_conform'] = 100 df_all['percentage_no_conform'] = 0 local_timezone = tzlocal.get_localzone() df_all['last_update'] = local_timezone.localize( datetime(2019, 1, 1, 0, 0, 0, 0)) df_all['_index'] = 'biac_month_2_kizeo' df_all['_id'] = df_all.apply(lambda row: row['lot'] + '_' + str(row['kpi']) + '_' + str(row['screen_name']), axis=1) del df_all['technic'] es_helper.dataframe_to_elastic(es, df_all)
def test_empty_attr(self): global server, pwd, user print("==> test_empty_attr") try: host_params1 = {'host': server, 'port': 9200, 'use_ssl': True} # RPINUM es = ES([host_params1], connection_class=RC, http_auth=( user, pwd), use_ssl=True, verify_certs=False) print(es.info()) try: es.indices.delete('test_empty_attr') except elasticsearch.NotFoundError: pass arr = [ { '_id': 't1', 'attr1': 'test' }, { '_id': 't2', 'attr2': 'test' }, { '_id': 't3', 'attr3': 'test' }, ] df = pd.DataFrame(arr) INDEX_NAME = 'test_empty_attr' df['_index'] = INDEX_NAME es_helper.dataframe_to_elastic(es, df) time.sleep(1) rec = es.get(index = INDEX_NAME, id = 't1', doc_type = 'doc') self.assertTrue("attr1" in rec['_source']) self.assertTrue("attr2" not in rec['_source']) self.assertTrue("attr3" not in rec['_source']) finally: pass
def compute_kpi102_monthly(df_kpi102): global es starttime = time.time() df_kpi102['month'] = pd.to_datetime(df_kpi102['_timestamp'], unit='ms').dt.strftime('%Y-%m') # df_index = pd.date_range(start=min(df_kpi102['month']), end=max(df_kpi102['month']), freq='MS') df_index = pd.date_range(start=min(df_kpi102['month']), end=datetime.now().strftime('%Y-%m'), freq='MS') df_index = df_index.strftime('%Y-%m') df_empty = pd.DataFrame(df_index) df_empty = pd.DataFrame(list(product(df_index, [2, 3]))) df_empty.columns = ['month', 'lot'] df_kpi102=df_kpi102.groupby(['month', 'ronde_number', 'lot']).count()[['_timestamp']] \ .rename(columns={'_timestamp': 'count'}).reset_index() df_kpi102 = df_kpi102.groupby( ['month', 'lot']).count().reset_index()[['month', 'lot', 'count']] df_kpi102.loc[df_kpi102['lot'] == 2, 'percent'] = round( (df_kpi102['count'] / 16) * 100, 2) df_kpi102.loc[df_kpi102['lot'] == 3, 'percent'] = round( (df_kpi102['count'] / 6) * 100, 2) df_kpi102['percent'] = df_kpi102['percent'].apply( lambda x: '{0:g}'.format(float(x))) df_kpi102 = df_empty.merge(df_kpi102, how='left', left_on=['month', 'lot'], right_on=['month', 'lot']) df_kpi102.fillna(0, inplace=True) df_kpi102['_timestamp'] = pd.to_datetime(df_kpi102['month'], format='%Y-%m') df_kpi102['_index'] = 'biac_month_kpi102' df_kpi102['_id'] = df_kpi102['month'] + '_lot' + df_kpi102['lot'].astype( str) es.indices.delete(index='biac_month_kpi102', ignore=[400, 404]) es_helper.dataframe_to_elastic(es, df_kpi102) endtime = time.time() log_message( "Compute monthly KPI102 (process biac_import_kpi102.py) finished. Duration: %d Records: %d." % (endtime - starttime, df_kpi102.shape[0]))
def create_default_records(es): """ Create default record (01JAN2019) 0/0 100% in biac_month_kizeo. Parameters ---------- es elasticsearch connection """ kpi = ['201', '202', '204', '205', '207', '208', '216', '217', '303'] lot = ['1', '2', '3', '4'] technic = [ 'acces', 'fire fighting', 'cradle', 'dnb', 'sanitair', 'hvac pa', 'elektriciteit', 'hvac pb' ] df_kpi = pd.DataFrame({'kpi': kpi, 'merge': 1}) df_lot = pd.DataFrame({'lot': lot, 'merge': 1}) df_tec = pd.DataFrame({'technic': technic, 'merge': 1}) df_all = df_tec.merge(df_lot, how='outer') df_all = df_all.merge(df_kpi, how='outer') del df_all['merge'] df_all['screen_name'] = df_all.apply(extract_screen_name, axis=1) df_all['contract'] = df_all.apply(extract_contract, axis=1) df_all['check_conform'] = 0 df_all['check_no_conform'] = 0 df_all['check_number'] = 0 df_all['computation_period'] = 'W1' df_all['computation_period_fr'] = 'S1' df_all['percentage_conform'] = 100 df_all['percentage_no_conform'] = 0 df_all['last_update'] = datetime(2019, 1, 1, 12).timestamp() * 1000 df_all['_index'] = 'biac_month_kizeo' df_all['_id'] = df_all.apply(lambda row: row['lot'] + '_' + str(row['kpi']) + '_' + str(row['screen_name']), axis=1) del df_all['technic'] es_helper.dataframe_to_elastic(es, df_all)
def compute_kpi103_monthly(start, end): """ Called three seconds after an import with the first day and last day of the previous import. Parameters ---------- start Date of the first imported record end Date of the last imported record """ starttime = time.time() logger.info(start) logger.info(end) start = mkFirstOfMonth(start) end = end.replace(day=calendar.monthrange(end.year, end.month)[1]) logger.info(start) logger.info(end) # df_kpi103 = etp.genericIntervalSearch(es, 'biac_kpi103', query='*', start=start, end=end,timestampfield="date") df_kpi103 = es_helper.elastic_to_dataframe(es, 'biac_kpi103', query='*', start=start, end=end,timestampfield="date") #logger.info(df_kpi103['date'].dt) df_kpi103['month'] = pd.to_datetime(df_kpi103['date2']).dt.strftime('%Y-%m') df_kpi103=df_kpi103.groupby(['month', 'ronde_number']).count()[['date']].rename(columns={'date': 'count'}).reset_index() min_date = datetime.strptime(min(df_kpi103['month']), '%Y-%m') max_date = datetime.strptime(max(df_kpi103['month']), '%Y-%m') months = [] current = min_date while current <= max_date: months.append(current.strftime('%Y-%m')) current += relativedelta(months=1) df_months = pd.DataFrame(months) df_months['join']=1 df_months.columns=['month', 'join'] rondes = [1, 2] df_rondes = pd.DataFrame(rondes) df_rondes['join']=1 df_rondes.columns=['ronde_number', 'join'] df_default=df_months.merge(df_rondes, left_on='join', right_on='join') del df_default['join'] df_default['number_of_days'] = df_default['month'].apply(lambda x: get_days_already_passed(x)) df_default['_id'] = df_default['month'] + '_' + df_default['ronde_number'].astype(str) df_kpi103['_id'] = df_kpi103['month'] + '_' + df_kpi103['ronde_number'].astype(str) del df_kpi103['month'] del df_kpi103['ronde_number'] df_merged = df_default.merge(df_kpi103, left_on='_id', right_on='_id', how="outer") df_merged = df_merged.fillna(0) df_merged['percent'] = round(100*df_merged['count'] / df_merged['number_of_days'], 2) df_merged['_index'] = 'biac_month_kpi103' df_merged['_timestamp'] = pd.to_datetime(df_merged['month'], format='%Y-%m') df_merged.columns=['month', 'ronde_number', 'number_of_days', '_id', 'ronde_done', 'percent', '_index', '_timestamp'] df_merged['ronde_done'] = df_merged['ronde_done'].astype(int) es_helper.dataframe_to_elastic(es, df_merged) endtime = time.time() log_message("Compute monthly KPI103 (process biac_import_kpi103.py) finished. Duration: %d Records: %d." % (endtime-starttime, df_merged.shape[0]))
def loadKPI103(): """ Main function that reads the two forms from kizeo every hour. """ try: starttime = time.time() logger.info(">>> LOADING KPI103") logger.info("==================") url_kizeo = 'https://www.kizeoforms.com/rest/v3' kizeo_user=os.environ["KIZEO_USER"] kizeo_password=os.environ["KIZEO_PASSWORD"] kizeo_company=os.environ["KIZEO_COMPANY"] payload = { "user": kizeo_user, "password": kizeo_password, "company": kizeo_company } r = requests.post(url_kizeo + '/login', json = payload) if r.status_code != 200: logger.error('Unable to reach Kizeo server. Code:'+str(r.status_code)+" Reason:"+str(r.reason)) return response = r.json() token = response['data']['token'] logger.info('>Token: '+str(token)) r = requests.get(url_kizeo + '/forms?Authorization='+token) form_list = r.json()['forms'] logger.info('>Form List: ') #logger.info(form_list) df_all=pd.DataFrame() for i in form_list: if re.findall("LOT 1 - Gebouw .*$", i['name'].strip()) and not 'Wateranalyse' in i['name']: logger.info('MATCH') logger.info(i['name']) form_id = i['id'] start=(datetime.now()+timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") logger.info("Start %s" %(start)) end=(datetime.now()-timedelta(days=60)).strftime("%Y-%m-%d %H:%M:%S") logger.info("End %s" %(end)) #post={"onlyFinished":False,"startDateTime":start,"endDateTime":end,"filters":[]} #r = requests.post(url_kizeo + '/forms/' + form_id + '/data/exports_info?Authorization='+token,post) r = requests.get(url_kizeo + '/forms/' + form_id + '/data/exports_info?Authorization='+token) if r.status_code != 200: logger.error('something went wrong...') logger.error(r.status_code, r.reason) elif r.text == '': logger.info('Empty response') else: ids=[] for rec in r.json()["data"]: # print(rec) ids.append(rec["id"]) logger.info(ids) payload={ "data_ids": ids } posturl=("%s/forms/%s/data/multiple/excel_custom" %(url_kizeo,form_id)) headers = {'Content-type': 'application/json','Authorization':token} r=requests.post(posturl,data=json.dumps(payload),headers=headers) if r.status_code != 200: logger.error('something went wrong...') logger.error(r.status_code, r.reason) logger.info("Handling Form. Content Size:"+str(len(r.content))) if len(r.content) >0: file = open("./tmp/"+i['name']+".xlsx", "wb") file.write(r.content) file.close() df = pd.read_excel("./tmp/"+i['name']+".xlsx", header=None) logger.info(i["name"]) if "16" in i['name']: ronde='16' else: ronde='702' df=df[[1,2]] df[5]=ronde df_all=df_all.append(df) if len(df_all) > 0: df_all.columns=["date","creation_date","ronde_number"] df_all['_id']='biac_kpi103'+"_"+df_all['date'].dt.strftime("%d%B%Y")+"_"+df_all['ronde_number'].astype(str)#+"_"+df_all["operator"] df_all['_id'].apply(lambda x:x.lower().replace(" ","")) df_all['_index'] = "biac_kpi103" df_all['creation_date']=df_all['creation_date'].dt.tz_localize(tz='Europe/Paris') df_all['date']=df_all['date'].dt.tz_localize(tz='Europe/Paris') es_helper.dataframe_to_elastic(es, df_all) obj={ 'start': min(df_all['date']), 'end': max(df_all['date']), 'ronde_number': ronde } # conn.send_message('/topic/BIAC_KPI103_IMPORTED', str(obj)) time.sleep(3) # compute_kpi103_monthly(obj['start'], obj['end']) endtime = time.time() log_message("Import of KPI103 from Kizeo finished. Duration: %d Records: %d." % (endtime-starttime, df_all.shape[0])) except Exception as e: logger.error("Unable to read Kizeo.",exc_info=True) endtime = time.time() exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=1, file=sys.stdout) # exc_type below is ignored on 3.5 and later traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout)
def messageReceivedDOCX(destination, message, headers): global es starttime = time.time() logger.info("==> " * 10) logger.info("DOC Message Received %s" % destination) logger.info(headers) df_comment = pd.DataFrame() entityObj = getEntityObj(es) logger.info(entityObj) if "CamelSplitAttachmentId" in headers: headers["file"] = headers["CamelSplitAttachmentId"] if "file" in headers: logger.info("File:%s" % headers["file"]) log_message("Import of file [%s] started." % headers["file"]) docbytes = base64.b64decode(message) f = open('./tmp/word.docx', 'wb') f.write(docbytes) f.close() try: doc = Document('./tmp/word.docx') dict_comment = [] user = '' user_id = '' if 'user' in headers: user_obj = json.loads(headers['user']) user = user_obj['firstname'] + ' ' + user_obj['lastname'] user_id = user_obj['id'] if 'From' in headers: mailfrom = headers['From'].split("<")[1][:-1] user_obj = es.get(index="nyx_user", id=mailfrom, doc_type="doc") user_id = user_obj['_id'] user_obj = user_obj["_source"] user = user_obj['firstname'] + ' ' + user_obj['lastname'] logger.info(user) lot = 0 contract = '' technic = '' key = '' report_date = None title = '' for paragraph in doc.paragraphs: if paragraph.text.strip() != '' and paragraph.text.strip( ) != '\\n': print(">>>>" + paragraph.text + "<<<<<") finalp = paragraph.text.strip() if finalp in entityObj: lot = entityObj[finalp]['lot'] contract = entityObj[finalp]['contract'] technic = entityObj[finalp]['technic'] key = entityObj[finalp]['key'] title = finalp regex = 'KPI [a-zA-Z]{3,10} [0-9]{4}' x = re.search(regex, paragraph.text.strip()) if x is not None: report_date = datetime.strptime(paragraph.text, 'KPI %B %Y') logger.info('key : ' + key) logger.info('title : ' + title) logger.info('lot : ' + str(lot)) logger.info('contract : ' + contract) logger.info('technic : ' + technic) logger.info('date : ' + str(report_date)) logger.info('user : '******'user_id : ' + str(user_id)) maanden = [ 'Januari', 'Februari', 'Maart', 'April', 'Mei', 'Juni', 'Juli', 'Augustus', 'September', 'Oktober', 'November', 'December' ] reportdateNL = maanden[report_date.month - 1] results = [] if key != '': key = key.replace("Lot3 (All)", "Lot3 (BACEXT)").replace("Lot1 (All)", "Lot1 (BACHEA)") for paragraph in doc.paragraphs: if paragraph.text.strip() != '' and paragraph.text.strip( ) != '\\n': regex = '@([kK][pP][iI])? *([0-9]{1,3}[a-zA-Z]?) *:(.*)' print(">>>>" + paragraph.text.strip()) matches = re.findall(regex, paragraph.text.strip()) if len(matches) > 0: for x in matches: kpi = x[1] comment = x[2].strip() logger.info(' KPI COMMENT: ' + kpi) logger.info(' COMMENT: ' + comment) obj = { 'key': key, 'title': title, 'lot': lot, 'contract': contract, 'technic': technic, 'report_date': report_date, 'creation_date': datetime.now(), 'kpi': kpi, 'comment': comment, 'user': user, 'user_id': user_id, } results.append({'kpi': kpi, 'comment': comment}) dict_comment.append(obj) df_comment = pd.DataFrame(dict_comment) logger.info(df_comment) if len(df_comment) > 0: df_comment['creation_date'] = df_comment[ 'creation_date'].dt.tz_localize(tz='Europe/Paris') df_comment['report_date'] = df_comment[ 'report_date'].dt.tz_localize(tz='Europe/Paris') df_comment['_index'] = 'biac_feedback_comment' es_helper.dataframe_to_elastic(es, df_comment) scorebody = "".join([ "<li><b>KPI:</b>" + str(x["kpi"]).replace('.0', '') + " <b>Commentaar:</b>" + str(x["comment"]) + "</li>" for x in results ]) scorebody = "<ul>" + scorebody + "</ul>" returnmail = { "mail": user_id, "results": results, "user": user, 'reportdate_nl': reportdateNL, "reportdate": report_date.strftime("%d/%m/%Y"), "scorebody": scorebody, "title": title } logger.info(json.dumps(returnmail)) set_docx_status(es, user, key, report_date) conn.send_message("/queue/BAC_FEEDBACK_RETURNMAIL_DOCX", json.dumps(returnmail)) else: returnmail = { "mail": user_id, "results": results, "user": user, "reportdate": report_date.strftime("%d/%m/%Y"), 'reportdate_nl': reportdateNL, "scorebody": "<div>- No Remarks Found</div>", "title": title } set_docx_status(es, user, key, report_date) conn.send_message("/queue/BAC_FEEDBACK_RETURNMAIL_DOCX", json.dumps(returnmail)) except Exception as e: endtime = time.time() logger.error(e, exc_info=e) log_message("Import of file [%s] failed. Duration: %d Exception: %s." % (headers["file"], (endtime - starttime), str(e))) endtime = time.time() try: log_message("Import of file [%s] finished. Duration: %d Records: %d." % (headers["file"], (endtime - starttime), df_comment.shape[0])) except: log_message("Import of file [%s] finished. Duration: %d." % (headers["file"], (endtime - starttime)))
def messageReceivedXLSX(destination, message, headers): global es starttime = time.time() logger.info("==> " * 10) logger.info("XLS Message Received %s" % destination) logger.info(headers) if "CamelSplitAttachmentId" in headers: headers["file"] = headers["CamelSplitAttachmentId"] if "file" in headers: logger.info("File:%s" % headers["file"]) log_message("Import of file [%s] started." % headers["file"]) try: xlsbytes = base64.b64decode(message) f = open('./tmp/excel.xlsx', 'wb') f.write(xlsbytes) f.close() if 'user' in headers: user_obj = json.loads(headers['user']) user = user_obj['firstname'] + ' ' + user_obj['lastname'] user_id = user_obj['id'] if 'From' in headers: mailfrom = headers['From'].split("<")[1][:-1] user_obj = es.get(index="nyx_user", id=mailfrom, doc_type="doc") user_id = user_obj['_id'] user_obj = user_obj["_source"] user = user_obj['firstname'] + ' ' + user_obj['lastname'] logger.info(user_obj) logger.info(user) logger.info(user_id) dftop = pd.read_excel("./tmp/excel.xlsx", skiprows=0) if dftop.columns[7] == "LOT5": df = pd.read_excel("./tmp/excel.xlsx", skiprows=8) dforg = df.copy() df = df[[df.columns[0], df.columns[7]]] df.columns = ["KPI", "NEGO"] nscore = df[pd.notnull(df['KPI']) & pd.notnull(df['NEGO'])] print(nscore) reporttype = dftop.columns[7] if (reporttype == "LOT5"): reporttype = "Lot5" reportdate = datetime.strptime(dftop.columns[6], "%d/%m/%Y") entity = {"lot": 5, "contract": "Lot5", "title": "Lot5"} elif dftop.columns[7] == "LOT6": df = pd.read_excel("./tmp/excel.xlsx", skiprows=7) dforg = df.copy() df = df[[df.columns[0], df.columns[11]]] df.columns = ["KPI", "NEGO"] nscore = df[pd.notnull(df['KPI']) & pd.notnull(df['NEGO'])] print(nscore) reporttype = dftop.columns[7] if (reporttype == "LOT6"): reporttype = "Lot6" reportdate = datetime.strptime(dftop.columns[6], "%d/%m/%Y") entity = {"lot": 6, "contract": "Lot6", "title": "Lot6"} elif dftop.columns[7] == "LOT7": df = pd.read_excel("./tmp/excel.xlsx", skiprows=7) dforg = df.copy() df = df[[df.columns[0], df.columns[11]]] df.columns = ["KPI", "NEGO"] nscore = df[pd.notnull(df['KPI']) & pd.notnull(df['NEGO'])] print(nscore) reporttype = dftop.columns[7] if (reporttype == "LOT7"): reporttype = "Lot7" reportdate = datetime.strptime(dftop.columns[6], "%d/%m/%Y") entity = {"lot": 7, "contract": "Lot7", "title": "Lot7"} else: df = pd.read_excel("./tmp/excel.xlsx", skiprows=7) dforg = df.copy() df = df[[df.columns[1], df.columns[10]]] df.columns = ["KPI", "NEGO"] nscore = df[pd.notnull(df['KPI']) & pd.notnull(df['NEGO'])] print(nscore) df = pd.read_excel("./tmp/excel.xlsx") # print(df) try: reporttype = df.columns[8] reportdate = datetime.strptime(df.columns[7], "%d/%m/%Y") except: # LOT 4 logger.info("Unable to decode report date") print(df.columns) reporttype = df.columns[7] reportdate = datetime.strptime(df.columns[6], "%d/%m/%Y") dforg = dforg[[dforg.columns[2], dforg.columns[14]]] dforg.columns = ["KPI", "NEGO"] #dforg=dforg[dforg['NEGO']!="/"] nscore = dforg[(pd.notnull(dforg['KPI'])) & (pd.notnull(dforg['NEGO']))] print(nscore) reporttype = reporttype.replace("Lot4 (DNB)", "Lot4 (BACDNB)") entity = getEntityObjXLS(es, reporttype) maanden = [ 'Januari', 'Februari', 'Maart', 'April', 'Mei', 'Juni', 'Juli', 'Augustus', 'September', 'Oktober', 'November', 'December' ] reportdateNL = maanden[reportdate.month - 1] logger.info(entity) results = [{ "kpi": x[1][0], "result": x[1][1] } for x in nscore.iterrows()] scorebody = "".join([ "<li><b>KPI:</b>" + str(x["kpi"]).replace('.0', '') + " <b>Resultaat:</b>" + str(x["result"]) + "</li>" for x in results ]) scorebody = "<ul>" + scorebody + "</ul>" returnmail = { "mail": user_id, "results": results, "user": user, 'reportdate_nl': reportdateNL, "reportdate": reportdate.strftime("%d/%m/%Y"), "scorebody": scorebody, "entity": entity } logger.info(json.dumps(returnmail)) dict_comment = [] for result in results: obj = { 'key': reporttype, 'title': entity["title"], 'lot': entity["lot"], 'contract': entity["contract"], 'technic': entity.get("technic", ""), 'report_date': reportdate, 'creation_date': datetime.now(), 'kpi': cleankpi(result["kpi"]), 'result': result["result"], 'user': user, 'user_id': user_id, } dict_comment.append(obj) df_score = pd.DataFrame(dict_comment) logger.info(df_score) if len(df_score) > 0: df_score['creation_date'] = df_score[ 'creation_date'].dt.tz_localize(tz='Europe/Paris') df_score['report_date'] = df_score['report_date'].dt.tz_localize( tz='Europe/Paris') df_score['_index'] = 'biac_feedback_result' #df_score['_id']=(reportdate.strftime("%d%m%Y")+"_"+reporttype+"_"+df_score["kpi"])#.lower().replace(" ","").replace(")","").replace("(","_") df_score['_id'] = df_score["kpi"].apply(lambda x: ( reportdate.strftime("%d%m%Y") + "_" + reporttype + "_" + str(x) ).lower().replace(" ", "").replace(")", "").replace("(", "_")) logger.info(df_score) es_helper.dataframe_to_elastic(es, df_score) set_xlsx_status(es, user, reporttype, reportdate) conn.send_message("/queue/BAC_FEEDBACK_RETURNMAIL_XLSX", json.dumps(returnmail)) except Exception as e: endtime = time.time() logger.error(e, exc_info=e) log_message("Import of file [%s] failed. Duration: %d Exception: %s." % (headers["file"], (endtime - starttime), str(e))) endtime = time.time() try: log_message("Import of file [%s] finished. Duration: %d Records: %d." % (headers["file"], (endtime - starttime), df_comment.shape[0])) except: log_message("Import of file [%s] finished. Duration: %d." % (headers["file"], (endtime - starttime)))
def cleanData(filename, mes): global es logger.info("===> CLEANING DATA <===") if decodeMetaData(filename) == 0: logger.info("===> ENTER IF <===") indexname = "OPT_CLEANED_DATA-%s" % (datetime.now().strftime("%Y-%m")) dataasio = StringIO(mes) nofsemicolons = mes.count(';') nofcommas = mes.count(',') if nofsemicolons > nofcommas: df = pd.read_csv(dataasio, sep=";", header=None) else: df = pd.read_csv(dataasio, sep=",", header=None) dfindexed = df.set_index(0) if contract == 'COGLTS': dfindexed = df.set_index(0) H2SApThiopaq = dfindexed.loc['LUTOSA_H2S_Ap_Thiopaq'].at[3] H2SAvCogen = dfindexed.loc['LUTOSA_H2S_Av_Cogen'].at[3] H2SAvThiopaq = dfindexed.loc['LUTOSA_H2S_Av_Thiopaq'].at[3] CH4ApThiopaq = dfindexed.loc['LUTOSA_CH4_Ap_Thiopaq'].at[3] CH4AvCogen = dfindexed.loc['LUTOSA_CH4_Av_Cogen'].at[3] CH4AvThiopaq = dfindexed.loc['LUTOSA_CH4_Av_Thiopaq'].at[3] O2ApThiopaq = dfindexed.loc['LUTOSA_O2_Ap_Thiopaq'].at[3] O2AvCogen = dfindexed.loc['LUTOSA_O2_Av_Cogen'].at[3] O2AvThiopaq = dfindexed.loc['LUTOSA_O2_Av_Thiopaq'].at[3] DebitBiogazThiopaq = dfindexed.loc[ 'COGLTS_BIOLTS_Valeur_Debit_Biogaz_Thiopaq'].at[3] DebitBiogazCogen = dfindexed.loc[ 'COGLTS_BIOLTS_Valeur_Debit_Biogaz_Cogen'].at[3] H2SApThiopaq = condType1(H2SApThiopaq, DebitBiogazThiopaq) H2SAvCogen = condType3(H2SAvCogen, DebitBiogazCogen, 20) H2SAvThiopaq = condType2(H2SAvThiopaq, DebitBiogazThiopaq) CH4ApThiopaq = condType3(CH4ApThiopaq, DebitBiogazThiopaq, 35) CH4AvCogen = condType3(CH4AvCogen, DebitBiogazCogen, 40) CH4AvThiopaq = condType3(CH4AvThiopaq, DebitBiogazThiopaq, 45) O2ApThiopaq = condType3(O2ApThiopaq, DebitBiogazThiopaq, 1.5) O2AvCogen = condType3(O2AvCogen, DebitBiogazCogen, 1.5) O2AvThiopaq = condType3(O2AvThiopaq, DebitBiogazThiopaq, 1.5) dfindexed.at['LUTOSA_H2S_Ap_Thiopaq', 3] = H2SApThiopaq dfindexed.at['LUTOSA_H2S_Av_Cogen', 3] = H2SAvCogen dfindexed.at['LUTOSA_H2S_Av_Thiopaq', 3] = H2SAvThiopaq dfindexed.at['LUTOSA_CH4_Ap_Thiopaq', 3] = CH4ApThiopaq dfindexed.at['LUTOSA_CH4_Av_Cogen', 3] = CH4AvCogen dfindexed.at['LUTOSA_CH4_Av_Thiopaq', 3] = CH4AvThiopaq dfindexed.at['LUTOSA_O2_Ap_Thiopaq', 3] = O2ApThiopaq dfindexed.at['LUTOSA_O2_Av_Cogen', 3] = O2AvCogen dfindexed.at['LUTOSA_O2_Av_Thiopaq', 3] = O2AvThiopaq datefile = dfindexed.at['LUTOSA_Cpt_Ther_HT', 1] param1 = dfindexed.at['LUTOSA_Cpt_Ther_HT', 2] param2 = dfindexed.at['LUTOSA_Cpt_Ther_HT', 4] dffinal = dfindexed.reset_index() fctThiopaqCond1 = 1 if dfindexed.loc[ 'COGLTS_BIOLTS_Valeur_Debit_Biogaz_Thiopaq'].at[3] > 120 else 0 fctThiopaqCond2 = 1 if ( dfindexed.loc['COGLTS_BIOLTS_Valeur_Pression_Thiopaq_Entree'].at[3] > 15 and dfindexed.loc['COGLTS_BIOLTS_Valeur_Pression_Thiopaq_Entree'].at[3] < 36) else 0 fctThiopaqCond3 = dfindexed.loc['LUTOSA_Etat_Autor_Biog'].at[3] fctThiopaqCondGlobal = 1 if fctThiopaqCond1 + fctThiopaqCond2 + fctThiopaqCond3 == 3 else 0 consoDispos = dfindexed.loc['LUTOSA_Etat_Peleur1_Fct'].at[ 3] + dfindexed.loc['LUTOSA_Etat_Peleur2_Fct'].at[ 3] + dfindexed.loc['LUTOSA_Etat_Blancheur_1'].at[ 3] + dfindexed.loc['LUTOSA_Etat_Blancheur_2'].at[3] fctCogenCond1 = 1 if consoDispos >= 2 else 0 fctCogenCond2 = 1 if dfindexed.loc['LUTOSA_TempRetour_Boucle_HT'].at[ 3] <= 80 else 0 fctCogenCond3 = 1 if dfindexed.loc[ 'COGLTS_BIOLTS_Valeur_Debit_Biogaz_Thiopaq'].at[3] > 220 else 0 fctCogenCond4 = dfindexed.loc['LUTOSA_Moteur_Disponible'].at[3] fctCogenCond5 = 1 if dfindexed.loc['LUTOSA_H2S_Av_Cogen'].at[ 3] <= 5 else 0 fctCogenCondGlobal = 1 if fctCogenCond1 + fctCogenCond2 + fctCogenCond3 + fctCogenCond4 + fctCogenCond5 == 5 else 0 data = [{ 0: 'LUTOSA_Cond_Fct_1_Thiopaq', 1: datefile, 2: param1, 3: fctThiopaqCond1, 4: param2 }, { 0: 'LUTOSA_Cond_Fct_2_Thiopaq', 1: datefile, 2: param1, 3: fctThiopaqCond2, 4: param2 }, { 0: 'LUTOSA_Cond_Fct_3_Thiopaq', 1: datefile, 2: param1, 3: fctThiopaqCond3, 4: param2 }, { 0: 'LUTOSA_Cond_Fct_Global_Thiopaq', 1: datefile, 2: param1, 3: fctThiopaqCondGlobal, 4: param2 }, { 0: 'LUTOSA_Cond_Fct_1_Cogen', 1: datefile, 2: param1, 3: fctCogenCond1, 4: param2 }, { 0: 'LUTOSA_Cond_Fct_2_Cogen', 1: datefile, 2: param1, 3: fctCogenCond2, 4: param2 }, { 0: 'LUTOSA_Cond_Fct_3_Cogen', 1: datefile, 2: param1, 3: fctCogenCond3, 4: param2 }, { 0: 'LUTOSA_Cond_Fct_4_Cogen', 1: datefile, 2: param1, 3: fctCogenCond4, 4: param2 }, { 0: 'LUTOSA_Cond_Fct_5_Cogen', 1: datefile, 2: param1, 3: fctCogenCond5, 4: param2 }, { 0: 'LUTOSA_Cond_Fct_Global_Cogen', 1: datefile, 2: param1, 3: fctCogenCondGlobal, 4: param2 }] dffinal = dffinal.append(data, ignore_index=True, sort=True) else: dffinal = dfindexed.reset_index() dffinal = dffinal[[0, 1, 3]] dffinal.columns = ['name', 'date', 'value'] dffinal['src'] = 'gtc' dffinal['area_name'] = site + '_' + dffinal['name'] dffinal['client_area_name'] = contract + '_' + dffinal['area_name'] dffinal['client'] = contract dffinal['area'] = site dffinal["date"] = dffinal['date'].apply(convertToDate) dffinal["date"] = dffinal['date'].apply(lambda x: utc_to_local(x)) dffinal['@timestamp'] = dffinal['date'].apply( lambda x: getTimestamp(x) * 1000) dffinal['date'] = dffinal['date'].apply( lambda x: x.strftime('%Y-%m-%d %H:%M:00')) dffinal['_index'] = dffinal['date'].apply(lambda x: getIndex(x)) dffinal['_id'] = dffinal.apply(lambda row: getId(row), axis=1) es_helper.dataframe_to_elastic(es, dffinal)
def compute_targets(year): global es target_prod_biogaz = ch.get_targets(es)['biogas'] target_prod_elec = ch.get_targets(es)['elec'] target_prod_heat = ch.get_targets(es)['heat'] target_runtime_cogen = ch.get_targets(es)['runtime'] start, end = datetime(year, 1, 1), datetime(year, 12, 31, 23, 59, 59) logger.info(start) logger.info(end) df_calendar = es_helper.elastic_to_dataframe(es, start=start, end=end, index='nyx_calendar', query='type:LUTOSA', datecolumns=['date'], timestampfield='date') open_days = (datetime(year, 12, 31) - datetime(year, 1, 1)).days + 1 logger.info('size df_calendar: ' + str(len(df_calendar))) if len(df_calendar) > 0: open_days = df_calendar.loc[df_calendar['on'], 'on'].count() # df_calendar['date']=pd.to_datetime(df_calendar['date'], utc=True).dt.tz_convert('Europe/Paris').dt.date df_calendar['date'] = pd.to_datetime(df_calendar['date']).dt.date del df_calendar['_id'] del df_calendar['_index'] logger.info('opening days: ' + str(open_days)) daily_target_prod_biogaz = target_prod_biogaz / open_days daily_target_prod_elec = target_prod_elec / open_days daily_target_prod_heat = target_prod_heat / open_days daily_target_runtime_cogen = target_runtime_cogen / open_days df_daily_lutosa = es_helper.elastic_to_dataframe( es, start=start, end=end, index='daily_cogen_lutosa', datecolumns=['@timestamp'], query='*') if len(df_daily_lutosa) == 0: return None if 'on' in df_daily_lutosa.columns: del df_daily_lutosa['on'] df_daily_lutosa['date'] = pd.to_datetime( df_daily_lutosa['@timestamp']).dt.date if len(df_calendar) > 0: logger.info('merge calendar') df_daily_lutosa = df_daily_lutosa.merge(df_calendar[['date', 'on']], left_on='date', right_on='date', how='left') df_daily_lutosa['entry_biogas_thiopaq_target_MWh'] = 0 df_daily_lutosa.loc[ df_daily_lutosa['on'], 'entry_biogas_thiopaq_target_MWh'] = daily_target_prod_biogaz df_daily_lutosa.loc[ df_daily_lutosa['on'], 'entry_biogas_thiopaq_ratio_target'] = round( (df_daily_lutosa.loc[df_daily_lutosa['on'], 'entry_biogas_thiopaq_MWh'] / df_daily_lutosa.loc[df_daily_lutosa['on'], 'entry_biogas_thiopaq_target_MWh']), 2) df_daily_lutosa['daily_avail_motor_target_hour'] = 0 df_daily_lutosa.loc[ df_daily_lutosa['on'], 'daily_avail_motor_target_hour'] = daily_target_runtime_cogen df_daily_lutosa.loc[ df_daily_lutosa['on'], 'daily_avail_motor_ratio_target'] = round( (df_daily_lutosa.loc[df_daily_lutosa['on'], 'daily_avail_motor_hour'] / df_daily_lutosa.loc[df_daily_lutosa['on'], 'daily_avail_motor_target_hour']), 2) df_daily_lutosa['out_elec_cogen_target_MWh'] = 0 df_daily_lutosa.loc[df_daily_lutosa['on'], 'out_elec_cogen_target_MWh'] = daily_target_prod_elec df_daily_lutosa.loc[df_daily_lutosa['on'], 'out_elec_cogen_ratio_target'] = round( (df_daily_lutosa.loc[df_daily_lutosa['on'], 'out_elec_cogen_MWh'] / df_daily_lutosa.loc[df_daily_lutosa['on'], 'out_elec_cogen_target_MWh']), 2) df_daily_lutosa['out_therm_cogen_target_MWh'] = 0 df_daily_lutosa.loc[df_daily_lutosa['on'], 'out_therm_cogen_target_MWh'] = daily_target_prod_heat df_daily_lutosa.loc[df_daily_lutosa['on'], 'out_therm_cogen_ratio_target'] = round(( df_daily_lutosa.loc[df_daily_lutosa['on'], 'out_therm_cogen_MWh'] / df_daily_lutosa.loc[df_daily_lutosa['on'], 'out_therm_cogen_target_MWh']), 2) if 'on_x' in df_daily_lutosa.columns: del df_daily_lutosa['on_x'] if 'on_y' in df_daily_lutosa.columns: del df_daily_lutosa['on_y'] if 'weekday' in df_daily_lutosa.columns: del df_daily_lutosa['weekday'] if 'date' in df_daily_lutosa.columns: del df_daily_lutosa['date'] if 'type' in df_daily_lutosa.columns: del df_daily_lutosa['type'] df_daily_lutosa.loc[df_daily_lutosa['on'] == False, 'out_therm_cogen_ratio_target'] = np.nan df_daily_lutosa.loc[df_daily_lutosa['on'] == False, 'out_elec_cogen_ratio_target'] = np.nan df_daily_lutosa.loc[df_daily_lutosa['on'] == False, 'daily_avail_motor_ratio_target'] = np.nan df_daily_lutosa.loc[df_daily_lutosa['on'] == False, 'entry_biogas_thiopaq_ratio_target'] = np.nan df_daily_lutosa.loc[df_daily_lutosa['on'] == False, 'avail_cogen_ratio'] = np.nan df_daily_lutosa.loc[df_daily_lutosa['on'] == False, 'avail_thiopaq_ratio'] = np.nan es_helper.dataframe_to_elastic(es, df_daily_lutosa)
def loadKPI105(): """ Load KPI 105 ferom the kizeo forms. """ try: starttime = time.time() logger.info(">>> LOADING KPI105") logger.info("==================") url_kizeo = 'https://www.kizeoforms.com/rest/v3' kizeo_user = os.environ["KIZEO_USER"] kizeo_password = os.environ["KIZEO_PASSWORD"] kizeo_company = os.environ["KIZEO_COMPANY"] payload = { "user": kizeo_user, "password": kizeo_password, "company": kizeo_company } r = requests.post(url_kizeo + '/login', json=payload) if r.status_code != 200: logger.error('Unable to reach Kizeo server. Code:' + str(r.status_code) + " Reason:" + str(r.reason)) return response = r.json() token = response['data']['token'] logger.info('>Token: ' + str(token)) r = requests.get(url_kizeo + '/forms?Authorization=' + token) form_list = r.json()['forms'] logger.info('>Form List: ') #logger.info(form_list) df_all = pd.DataFrame() for i in form_list: if re.findall("LOT [1-9] - Dagelijkse ronde .*", i['name'].strip()): logger.info('MATCH') logger.info(i['name']) form_id = i['id'] start = (datetime.now() + timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") logger.info("Start %s" % (start)) end = (datetime.now() - timedelta(days=160)).strftime("%Y-%m-%d %H:%M:%S") logger.info("End %s" % (end)) #post={"onlyFinished":False,"startDateTime":start,"endDateTime":end,"filters":[]} r = requests.get(url_kizeo + '/forms/' + form_id + '/data/exports_info?Authorization=' + token) if r.status_code != 200: logger.error('something went wrong...') logger.error(r.status_code, r.reason) elif r.text == '': logger.info('Empty response') else: ids = [] for rec in r.json()["data"]: ids.append(rec["id"]) logger.info(ids) payload = {"data_ids": ids} posturl = ("%s/forms/%s/data/multiple/excel_custom" % (url_kizeo, form_id)) headers = { 'Content-type': 'application/json', 'Authorization': token } r = requests.post(posturl, data=json.dumps(payload), headers=headers) if r.status_code != 200: logger.error('something went wrong...') logger.error(r.status_code, r.reason) logger.info("Handling Form. Content Size:" + str(len(r.content))) if len(r.content) > 0: file_name = "./tmp/" + i['name'].replace( '/', '').replace(' ', '').lower() + ".xlsx" file = open(file_name, "wb") file.write(r.content) file.close() df = pd.read_excel(file_name) print("======*" * 100) print(file_name) df.columns = [ 'ronde', 'date', 'date_modification', 'date_answer', 'record_number' ] logger.info(df) df_all = df_all.append(df) if len(df_all) > 0: with pd.ExcelWriter('toto.xlsx') as writer: df_all.to_excel(writer) df_all['lot'] = df_all['ronde'].apply(lambda x: extract_lot(x)) df_all['ronde_number'] = df_all['ronde'].apply( lambda x: extract_ronde_number(x)) df_all['lot'] = df_all['ronde'].apply(lambda x: extract_lot(x)) # df_all['_timestamp'] = df_all['date_answer'].dt.date # df_all['_timestamp'] = df_all['_timestamp'].apply(lambda x:datetime(x.year, x.month, x.day)) df_all['_timestamp'] = df_all['date'] df_all['_id'] = df_all['_timestamp'].astype(str) + '_' + df_all[ 'lot'].astype(str) + '_' + df_all['ronde_number'] df_all['_index'] = 'biac_kpi105' with pd.ExcelWriter('toto2.xlsx') as writer: df_all.to_excel(writer) es_helper.dataframe_to_elastic(es, df_all) obj = { 'start': min(df_all['_timestamp']), 'end': max(df_all['_timestamp']), } conn.send_message('/topic/BIAC_KPI105_IMPORTED', str(obj)) time.sleep(3) compute_kpi105_monthly(obj['start'], obj['end']) time.sleep(1) es.indices.delete(index='biac_kib_kpi105', ignore=[400, 404]) for lot in df_all['lot'].unique(): computeStats105(int(lot)) endtime = time.time() log_message( "Import of KPI105 from Kizeo finished. Duration: %d Records: %d." % (endtime - starttime, df_all.shape[0])) except Exception as e: endtime = time.time() exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=1, file=sys.stdout) # exc_type below is ignored on 3.5 and later traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout)
def messageReceived(destination, message, headers): global es, goodmonth records = 0 starttime = time.time() logger.info("==> " * 10) logger.info("Message Received %s" % destination) if destination == "/topic/RECOMPUTE_502": recomputeKPI502() return logger.info(headers) filepath = "" file = "" if "CamelSplitAttachmentId" in headers: headers["file"] = headers["CamelSplitAttachmentId"] if "file" in headers: logger.info("File:%s" % headers["file"]) log_message("Import of file [%s] started." % headers["file"]) file = headers["file"] #file= './tmp/Safety Register - KPI502 - Lots_1_2_3 - 2019-04.xlsx' #file="Safety Register - KPI502 - Lots_1_2_3 - 2019-0.xlsx" logger.info("FILE :" + file) filename = file logger.info("FILE NAME :" + filename) filedate = file[1:].split(".")[0][-7:] goodmonth = filedate.split("-")[1] + "-" + filedate.split("-")[0] logger.info("MONTH(BEF) :" + goodmonth) gm = datetime.strptime(goodmonth, "%m-%Y") gm = gm - relativedelta(months=1) goodmonth = datetime.strftime(gm, "%m-%Y") xlsbytes = base64.b64decode(message) f = open('./tmp/excel.xlsx', 'wb') f.write(xlsbytes) f.close() filename = './tmp/excel.xlsx' # exit() #filename = './tmp/KPI502 - Lot2- 2019-01.xlsx' #file="KPI502 - Lot2- 2019-01.xlsx" #filename = './tmp/KPI502 - Lot2 - 2019-03.xlsx' # logger.info("MONTH(AFT) :"+goodmonth) try: logger.info("Opening XLS...") dfdata = pd.DataFrame() for sheet in ["Lot 1", "Lot 2", "Lot 3", "Lot 4"]: logger.info(">>>>>>>>>> LOADING:" + sheet) try: dfd = pd.read_excel(filename, sheet_name=sheet, skiprows=2, index_col=0) except: logger.error("Unable to read sheet.") continue dfd["Sheet"] = sheet #logger.info(dfd.describe()) dfdata = dfdata.append(dfd) #,ignore_index = True) #break #dfdata=dfdata.reset_index() logger.info("Done.") #dfdata = dfdata.drop('Unnamed: 0', axis=1) dfdata = dfdata.reset_index() del dfdata["index"] #print(dfdata.columns) logger.info(dfdata.columns) #newcols=['Month','SRid','Status','Opm. number','Definition','Building','Floor','Place','Technic','Sub-technic','materials','AssetCode','Device','BACid','Frequency','Control','Report','Report Date','Reference Date','Reference Year','Reference source date','Last shipment','Repeat','Point of interest','KPI timing','GroupNum','Type','FB Name','FB date','FB','Orig label','Orig Definition','Control organism','To','Cc 1','Cc 2','Cc 3','Cc 4','Cc 5','Cc 6','Cc 7','Cc 8','Cc 9','BAC Dept','BAC Sub Dept','BAC Service','Group','Contractor','Lot','KPI Type','ShortStatus','ShortStatusFU','ConcatShortStatus&OrigStatus','LongStatus','Classification nr (O/I)','Classification (O/I)','Show graph','Report link','Status (M-1)','Report Date (M-1)','FB Date (M-1)','Deleted vs M-1','MonthFU','Sheet'] newcols = [ 'Month', 'SRid', 'Status', 'Opm. number', 'Definition', 'Building', 'Floor', 'Place', 'Technic', 'Sub-technic', 'materials', 'AssetCode', 'Device', 'BACid', 'Frequency', 'Control', 'Report', 'Report Date', 'Reference Date', 'Reference Year', 'Reference source date', 'Last shipment', 'Repeat', 'Point of interest', 'KPI timing', 'GroupNum', 'Type', 'FB Name', 'FB date', 'FB', 'Orig label', 'Orig Definition', 'Control organism', 'To', 'Cc 1', 'Cc 2', 'Cc 3', 'Cc 4', 'Cc 5', 'Cc 6', 'Cc 7', 'Cc 8', 'Cc 9', 'BAC Dept', 'BAC Sub Dept', 'BAC Service', 'Group', 'Contractor', 'Lot', 'KPI Type', 'ShortStatus', 'ShortStatusFU', 'ConcatShortStatus&OrigStatus', 'LongStatus', 'Classification nr (O/I)', 'Classification (O/I)', 'Show graph', 'Report link', 'Status (M-1)', 'Report Date (M-1)', 'FB Date (M-1)', 'Deleted vs M-1', "KPI Type Nr", "CheckArchived", 'Sheet' ] logger.info(newcols) dfdata.columns = newcols dfdata["KPI Type Nr"] = 502 dfdata["MonthFU"] = dfdata.apply(computeOverdue, axis=1) dfdata2 = dfdata.reset_index(drop=True) dfdata2.fillna('', inplace=True) dfdata2 = dfdata2.drop(dfdata2[dfdata2["Month"] == ''].index) dfdata2['_index'] = "biac_kpi502" #dfdata2['_timestamp'] = dfdata2['Reference Date'].apply(lambda x: getTimestamp(x)*1000) #dfdata2['_timestamp'] = dfdata2['Reference Date'].apply(lambda x: int(x.timestamp()*1000)) dfdata2['_timestamp'] = dfdata2.apply(lambda row: fixDate(row), axis=1) dfdata2['_id'] = dfdata2.apply( lambda row: get_id(row['Month'], row['SRid']), axis=1) dfdata2['_id'] = dfdata2['_id'].apply(lambda x: goodmonth + "-" + x) logger.info(dfdata2) filedate = file[1:].split(".")[0][-7:] dfdata2["filedate"] = filedate dfdata2['key'] = dfdata2.apply(computeReport, axis=1) dfdata2["ShortStatus"] = dfdata2["LongStatus"].apply( computeShortStatus) logger.info(len(dfdata2)) dfdata2 = dfdata2[(dfdata2["ShortStatus"] == 4) | (dfdata2["ShortStatus"] == 5)] logger.info(len(dfdata2)) dfdata2.drop_duplicates('_id', inplace=True) dfdata2["Month_"] = dfdata2["Month"] logger.info("BEFORE CLEANING") logger.info(dfdata2.shape) for x in [str(x) for x in range(2010, 2018)]: dfdata2 = dfdata2[~dfdata2["Month_"].str.contains(x)] dfdata2 = dfdata2[~dfdata2["Month_"]. isin(["2018-01", "2018-02", "2018-03", "2018-04"])] logger.info("AFTER CLEANING") logger.info(dfdata2.shape) dfdata2["Month"] = goodmonth dfdata2["ValueCount"] = 1 logger.info(filedate) #res4=compute_previous_months(int(filedate.split("-")[1]),int(filedate.split("-")[0]),12,skip=0) res4 = compute_previous_months(int(goodmonth.split("-")[0]), int(goodmonth.split("-")[1]), 12, skip=0) res4lot4 = compute_previous_months(int(goodmonth.split("-")[0]), int(goodmonth.split("-")[1]), 6, skip=0) res4table = [] for key in dfdata2['key'].unique(): print("===>" * 30) print(key) if key.startswith("Lot4"): res4touse = res4lot4 print("LOT4" * 100) else: res4touse = res4 for rec in res4touse: res4table.append({ "Month": goodmonth, "MonthFU": rec, "ShortStatus": 4, "Status": 4, "ValueCount": 0, "_id": goodmonth + rec + "-S4-" + str(hash(key)), "_index": "biac_kpi502", "key": key, "filedate": filedate }) res4tabledf = pd.DataFrame(res4table) #print(res4tabledf) es_helper.dataframe_to_elastic(es, res4tabledf) #A/0 #res5=compute_previous_months(int(filedate.split("-")[1]),int(filedate.split("-")[0]),6,skip=0) res5 = compute_previous_months(int(goodmonth.split("-")[0]), int(goodmonth.split("-")[1]), 6, skip=0) res5lot4 = compute_previous_months(int(goodmonth.split("-")[0]), int(goodmonth.split("-")[1]), 3, skip=0) res5table = [] for key in dfdata2['key'].unique(): if key.startswith("Lot4"): res5touse = res5lot4 print("LOT4" * 100) else: res5touse = res5 for rec in res5touse: res5table.append({ "Month": goodmonth, "MonthFU": rec, "ShortStatus": 5, "Status": 5, "ValueCount": 0, "_id": goodmonth + rec + "-S5-" + str(hash(key)), "_index": "biac_kpi502", "key": key, "filedate": filedate }) # DELETE OLD DATA deletequery = { "query": { "bool": { "must": [{ "query_string": { "query": "Month: \"" + goodmonth + "\"" } }] } } } logger.info( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Deleting ORIGINALS records") logger.info(deletequery) try: resdelete = es.delete_by_query(body=deletequery, index="biac_kpi502") logger.info(resdelete) except Exception as e3: logger.info(e3) logger.info("Unable to delete records.") logger.info("Waiting for deletion to finish") time.sleep(3) # WRITE DATA res5tabledf = pd.DataFrame(res5table) es_helper.dataframe_to_elastic(es, res5tabledf) es_helper.dataframe_to_elastic(es, dfdata2) ## NOW COMPUTE MONTH logger.info("Waiting for records to be written...") time.sleep(3) deletequery = { "query": { "bool": { "must": [{ "query_string": { "query": "Month: \"" + goodmonth + "\"" } }] } } } logger.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Deleting records") logger.info(deletequery) try: resdelete = es.delete_by_query(body=deletequery, index="biac_month_kpi502") logger.info(resdelete) except Exception as e3: logger.info(e3) logger.info("Unable to delete records.") logger.info("Waiting for deletion to finish") time.sleep(3) goodmonth_dt = datetime.strptime(goodmonth, '%m-%Y') df_kpi502 = es_helper.elastic_to_dataframe( es, "biac_kpi502", query='ShortStatusFU: Actievereist AND Month: ' + goodmonth) df_kpi502_4 = df_kpi502[df_kpi502["ShortStatus"] == 4] df_kpi502_4_overdue = df_kpi502_4[df_kpi502_4["MonthFU"].str.contains( "OVERDUE")].shape[0] logger.info("4 overdue:%d" % (df_kpi502_4_overdue)) kpi502_4 = df_kpi502_4_overdue / df_kpi502_4.shape[0] kpi502_4 df_kpi502_5 = df_kpi502[df_kpi502["ShortStatus"] == 5] df_kpi502_5_overdue = df_kpi502_5[df_kpi502_5["MonthFU"].str.contains( "OVERDUE")].shape[0] kpi502_5 = df_kpi502_5_overdue / df_kpi502_5.shape[0] logger.info("5 overdue:%d" % (df_kpi502_5_overdue)) #df_kpi502_4_overdue df_kpi502.shape[0] df_kpi502_4_obj = { "total": df_kpi502_4.shape[0], "overdue": df_kpi502_4_overdue } df_kpi502_5_obj = { "total": df_kpi502_5.shape[0], "overdue": df_kpi502_5_overdue } df_kpi502 = dfdata2.copy() recs = [] recsfire = [] recsokko = [] for key in df_kpi502.key.unique(): #logger.info(key) #goodmonth=filedate.split('-')[1]+"-"+filedate.split('-')[0] newrec = { "key": key, "type": "summary", "_id": goodmonth + "_" + key, "_index": "biac_month_kpi502", "filedate": filedate, "Month": goodmonth } if key.startswith("Lot4"): limitmonth4 = (goodmonth_dt - relativedelta(months=6)).strftime('%Y-%m') limitmonth5 = (goodmonth_dt - relativedelta(months=3)).strftime('%Y-%m') else: limitmonth4 = (goodmonth_dt - relativedelta(months=12)).strftime('%Y-%m') limitmonth5 = (goodmonth_dt - relativedelta(months=6)).strftime('%Y-%m') logger.info(">>>>>KEY:" + key) logger.info(">>>>>LIMIT4:" + limitmonth4) logger.info(">>>>>LIMIT5:" + limitmonth5) df_kpi502_4 = df_kpi502[(df_kpi502["ShortStatus"] == 4) & (df_kpi502["key"] == key)] newoverdues_4 = 0 oldoverdues_4 = 0 for index, row in df_kpi502_4.iterrows(): newrec_okko = { "key": key, "type": "stat", "_index": "biac_month_kpi502", "filedate": filedate, "Month": goodmonth } if "OVERDUE" in row["MonthFU"]: if row["Month_"] == limitmonth4: newrec_okko["sub_type"] = "new_overdue_remarks" newoverdues_4 += 1 else: newrec_okko["sub_type"] = "overdue_remarks" oldoverdues_4 += 1 else: newrec_okko["sub_type"] = "ok_remarks" newrec_okko["@timestamp"] = datetime.utcnow().isoformat() recsokko.append(newrec_okko) logger.info(">>>>>" + str(df_kpi502_4[df_kpi502_4["Month_"] == limitmonth4].shape[0])) #df_kpi502_4_overdue=df_kpi502_4[df_kpi502_4["MonthFU"].str.contains("OVERDUE")].shape[0] df_kpi502_4_overdue = newoverdues_4 kpi502_4_percentage = 1 if df_kpi502_4.shape[0] - oldoverdues_4 > 0: kpi502_4_percentage = (df_kpi502_4.shape[0] - oldoverdues_4 - newoverdues_4) / (df_kpi502_4.shape[0] - oldoverdues_4) else: kpi502_4_percentage = 1 df_kpi502_4_obj = { "total": df_kpi502_4.shape[0], "overdue": df_kpi502_4_overdue, "percentage": round(kpi502_4_percentage * 100, 2) } newrec["Total_Remarks"] = df_kpi502_4.shape[0] newrec["Overdues_Remarks"] = df_kpi502_4_overdue newrec["Old_Overdues_Remarks"] = oldoverdues_4 newrec["New_Overdues_Remarks"] = newoverdues_4 newrec["KPI_Remarks"] = round(kpi502_4_percentage * 100, 2) df_kpi502_5 = df_kpi502[(df_kpi502["ShortStatus"] == 5) & (df_kpi502["key"] == key)] newoverdues_5 = 0 oldoverdues_5 = 0 for index, row in df_kpi502_5.iterrows(): newrec_okko = { "key": key, "type": "stat", "_index": "biac_month_kpi502", "filedate": filedate, "Month": goodmonth } if "OVERDUE" in row["MonthFU"]: if row["Month_"] == limitmonth5: newrec_okko["sub_type"] = "new_overdue_breaches" newoverdues_5 += 1 else: newrec_okko["sub_type"] = "overdue_breaches" oldoverdues_5 += 1 else: newrec_okko["sub_type"] = "ok_breaches" newrec_okko["@timestamp"] = datetime.utcnow().isoformat() recsokko.append(newrec_okko) #df_kpi502_5_overdue=df_kpi502_5[df_kpi502_5["MonthFU"].str.contains("OVERDUE")].shape[0] df_kpi502_5_overdue = newoverdues_5 kpi502_5_percentage = 1 if df_kpi502_5.shape[0] - oldoverdues_5 > 0: kpi502_5_percentage = (df_kpi502_5.shape[0] - oldoverdues_5 - newoverdues_5) / (df_kpi502_5.shape[0] - oldoverdues_5) else: kpi502_5_percentage = 1 # logger.info(df_kpi502_5) newrec["Total_Breaches"] = df_kpi502_5.shape[0] newrec["Overdues_Breaches"] = df_kpi502_5_overdue newrec["KPI_Breaches"] = round(kpi502_5_percentage * 100, 2) newrec["Old_Overdues_Breaches"] = oldoverdues_5 newrec["New_Overdues_Breaches"] = newoverdues_5 newrec["Total"] = df_kpi502_4.shape[0] + df_kpi502_5.shape[0] newrec["Overdues"] = df_kpi502_4_overdue + df_kpi502_5_overdue percentage = 0 if df_kpi502_4.shape[0] + df_kpi502_5.shape[0] > 0: percentage = (df_kpi502_4_overdue + df_kpi502_5_overdue) / ( df_kpi502_4.shape[0] + df_kpi502_5.shape[0]) newrec["KPI"] = round(percentage * 100, 2) if "BACFIR" not in key: recs.append(newrec) else: recsfire.append(newrec) pdfire = pd.DataFrame(recsfire) firerec = json.loads(pdfire.sum().to_json()) firerec["filedate"] = filedate firerec["Month"] = goodmonth firerec["type"] = "summary" firerec["_index"] = "biac_month_kpi502" firerec["key"] = "ALL (BACFIR)" if firerec["Total_Breaches"] - firerec["Old_Overdues_Breaches"] > 0: firerec["KPI_Breaches"] = round( 100 * (firerec["Total_Breaches"] - firerec["Old_Overdues_Breaches"] - firerec["New_Overdues_Breaches"]) / (firerec["Total_Breaches"] - firerec["Old_Overdues_Breaches"]), 2) else: firerec["KPI_Breaches"] = 100 if firerec["Total_Remarks"] > 0: firerec["KPI_Remarks"] = round( 100 * (firerec["Total_Remarks"] - firerec["Old_Overdues_Remarks"] - firerec["New_Overdues_Remarks"]) / (firerec["Total_Remarks"] - firerec["Old_Overdues_Remarks"]), 2) else: firerec["KPI_Remarks"] = 100 df_month_kpi502 = pd.DataFrame(recs + [firerec]) es_helper.dataframe_to_elastic(es, df_month_kpi502) df_month_kpi502_stats = pd.DataFrame(recsokko) es_helper.dataframe_to_elastic(es, df_month_kpi502_stats) except Exception as e: endtime = time.time() logger.error(e, exc_info=True) log_message("Import of file [%s] failed. Duration: %d Exception: %s." % (file, (endtime - starttime), str(e))) logger.info(">>>>Set Active Fields") logger.info("Reset active records ") logger.info("=====================") time.sleep(3) updatequery = { "script": { "inline": "ctx._source.active=0", "lang": "painless" }, 'query': { 'bool': { 'must': [{ 'query_string': { 'query': '*' } }] } } } logger.info("*=" * 30) logger.info(json.dumps(updatequery)) try: resupdate = es.update_by_query(body=updatequery, index="biac_kpi502") logger.info(resupdate) except Exception as e3: logger.error(e3) logger.error("Unable to update records biac_month_kpi502.") try: resupdate = es.update_by_query(body=updatequery, index="biac_month_kpi502") logger.info(resupdate) except Exception as e3: logger.error(e3) logger.error("Unable to update records biac_month_kpi502.") # Activating current month logger.info("Waiting for update...") time.sleep(3) logger.info("Done") updatequery = { "script": { "inline": "ctx._source.active=1", "lang": "painless" }, 'query': { 'bool': { 'must': [{ 'query_string': { 'query': 'Month: "' + goodmonth + '"' } }] } } } logger.info("Update active records") logger.info(updatequery) try: resupdate = es.update_by_query(body=updatequery, index="biac_kpi502") logger.info(resupdate) except Exception as e3: logger.error(e3) logger.error("Unable to update records biac_month_kpi502.") try: resupdate = es.update_by_query(body=updatequery, index="biac_month_kpi502") logger.info(resupdate) except Exception as e3: logger.error(e3) logger.error("Unable to update records biac_month_kpi502.") recomputeKPI502() endtime = time.time() try: log_message("Import of file [%s] finished. Duration: %d Records: %d." % (file, (endtime - starttime), df.shape[0])) except: log_message("Import of file [%s] finished. Duration: %d." % (file, (endtime - starttime))) logger.info("<== " * 10)
def loadKizeo(): try: global es starttime = time.time() logger.info(">>> LOADING SPOTCHECKS LOT4") logger.info("==================") url_kizeo = 'https://www.kizeoforms.com/rest/v3' kizeo_user = os.environ["KIZEO_USER"] kizeo_password = os.environ["KIZEO_PASSWORD"] kizeo_company = os.environ["KIZEO_COMPANY"] payload = { "user": kizeo_user, "password": kizeo_password, "company": kizeo_company } r = requests.post(url_kizeo + '/login', json=payload) if r.status_code != 200: logger.error('Unable to reach Kizeo server. Code:' + str(r.status_code) + " Reason:" + str(r.reason)) return response = r.json() token = response['data']['token'] logger.info('>Token: ' + str(token)) r = requests.get(url_kizeo + '/forms?Authorization=' + token) form_list = r.json()['forms'] logger.info('>Form List: ') df_all = pd.DataFrame() for i in form_list: if 'SPOTCHECK ~ Lot 4' in i['name'] and 'test' not in i['name']: logger.info(i['name']) form_id = i['id'] start = (datetime.now() + timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") logger.info("Start %s" % (start)) end = datetime(2019, 1, 1) logger.info("End %s" % (end)) #post={"onlyFinished":False,"startDateTime":start,"endDateTime":end,"filters":[]} #r = requests.post(url_kizeo + '/forms/' + form_id + '/data/exports_info?Authorization='+token,post) r = requests.get(url_kizeo + '/forms/' + form_id + '/data/exports_info?Authorization=' + token) if r.status_code != 200: logger.info('something went wrong...') logger.info(r.status_code, r.reason) elif r.text == '': logger.info('Empty response') else: ids = [] for rec in r.json()["data"]: ids.append(rec["id"]) logger.info(ids) payload = {"data_ids": ids} posturl = ("%s/forms/%s/data/multiple/excel_custom" % (url_kizeo, form_id)) headers = { 'Content-type': 'application/json', 'Authorization': token } r = requests.post(posturl, data=json.dumps(payload), headers=headers) if r.status_code != 200: logger.info('something went wrong...') logger.info(r.status_code, r.reason) logger.info("Handling Form. Content Size:" + str(len(r.content))) if len(r.content) > 0: file = open("./tmp/excel.xlsx", "wb") file.write(r.content) file.close() df_all = df_all.append( pd.read_excel("./tmp/excel.xlsx")) if len(df_all) > 0: df_all['KPI'] = df_all['KPI'].str.extract(r'([0-9]{3})').astype( int) df_all['datetime'] = df_all.apply(lambda row: localtz.localize( datetime.combine(row['Datum Controle'].date(), row['Uur'])), axis=1) del df_all['Datum Controle'] del df_all['Uur'] df_all.columns = [ 'record_nummer', 'contract', 'kpi', 'madeBy', 'building', 'floor', 'comment', 'check', 'conform', 'not_conform', 'percentage', 'qr_code_1', 'qr_code_1_cabin_name', 'qr_code_2', 'qr_code_2_cabin_name', 'qr_code_3', 'qr_code_3_cabin_name', 'datetime' ] df_all['_timestamp'] = df_all['datetime'] df_all['_index'] = 'biac_spot_lot4' df_all['_id'] = df_all['record_nummer'] df_all['lot'] = 4 try: # es.indices.delete('biac_spot_lot4') es.delete_by_query(index='biac_spot_lot4', doc_type='', body={"query": { "match_all": {} }}) except: logger.warn('unable to delete biac_spot_lot4') pass es_helper.dataframe_to_elastic(es, df_all) obj = { 'start_ts': int(datetime(2019, 1, 1).timestamp()), 'end_ts': int(datetime.now().timestamp()) } conn.send_message('/topic/BIAC_KIZEO_IMPORTED', json.dumps(obj)) conn.send_message('/topic/BIAC_KIZEO_IMPORTED_2', json.dumps(obj)) else: try: # es.indices.delete('biac_spot_lot4') es.delete_by_query(index='biac_spot_lot4', doc_type='', body={"query": { "match_all": {} }}) except: logger.info('already no data') pass except Exception as e: endtime = time.time() exc_type, exc_value, exc_traceback = sys.exc_info() logging.error('Ooops', exc_info=True)
def update_kib_kpi304(es, lot, tec, date): containertimezone = pytz.timezone(tzlocal.get_localzone().zone) start_dt = containertimezone.localize(datetime(date.year, date.month, 1)) end_dt = containertimezone.localize( datetime(date.year, date.month, calendar.monthrange(date.year, date.month)[1], 23, 59, 59)) query = 'lot:' + str(lot) + ' AND tec:' + tec logger.info('query' * 100) logger.info(query) logger.info(start_dt) logger.info(end_dt) logger.info(query) df = es_helper.elastic_to_dataframe(es, index='biac_kpi304',datecolumns=["@timestamp"]\ , query=query, start=start_dt, end=end_dt) if 'off' not in df: df['off'] = 0 df['off'] = df['off'].fillna(0) df['week_day'] = df['@timestamp'].dt.weekday logger.info(df.shape) df.head() new_arr = [] for index, row in df.iterrows(): flag_off = False if row['week_day'] == 5 or row['week_day'] == 6 or int( row['off']) == 1: flag_off = True type_list = ['hoofd', 'tech1', 'tech2'] if 'tech1' not in row or row['tech1'] != row['tech1']: type_list = ['hoofd', 'tech'] for i in type_list: obj = { 'type': i, 'lot': row['lot'], 'kpi304_technic': row['tec'], '@timestamp': row['@timestamp'], } if flag_off: obj['value'] = -1 else: obj['value'] = row[i] obj['_id'] = 'lot' + str( row['lot']) + '_' + row['tec'] + '_' + i + '_' + str( int(obj['@timestamp'].timestamp() * 1000)) if obj['type'] == 'hoofd': obj['type_nl'] = 'Verantwoordelijke' elif obj['type'] == 'tech': obj['type_nl'] = 'Technieker' elif obj['type'] == 'tech1': obj['type_nl'] = 'Technieker 1' elif obj['type'] == 'tech2': obj['type_nl'] = 'Technieker 2' new_arr.append(obj) df_to_push = pd.DataFrame(new_arr) df_to_push['_index'] = 'biac_kib_kpi304' logger.info(df_to_push.shape) es_helper.dataframe_to_elastic(es, df_to_push)
def get_kpi304_values(es, lot, tec, date): query = 'lot:' + lot if lot == '2': query += ' AND tec:' + tec print(query) containertimezone = pytz.timezone(tzlocal.get_localzone().zone) start_dt = containertimezone.localize(datetime(date.year, date.month, 1)) end_dt = containertimezone.localize( datetime(date.year, date.month, calendar.monthrange(date.year, date.month)[1], 23, 59, 59)) print(start_dt) print(end_dt) dataframe = es_helper.elastic_to_dataframe(es, index="biac_kpi304", datecolumns=["@timestamp"], query=query, start=start_dt, end=end_dt) if len(dataframe) == 0: print('dataframe empty we create in DB') default_df = pd.DataFrame(pd.date_range(start=start_dt, end=end_dt), columns=['_timestamp']) if lot == '1' or lot == '2': default_df['tech'] = 2 default_df['tech1'] = 1 default_df['tech2'] = 1 else: default_df['tech'] = 1 default_df['hoofd'] = 1 default_df['dayofweek'] = default_df['_timestamp'].dt.dayofweek default_df.loc[default_df['dayofweek'] >= 5, 'tech'] = 0 default_df.loc[default_df['dayofweek'] >= 5, 'hoofd'] = 0 default_df['total'] = default_df['tech'] + default_df['hoofd'] default_df['tec'] = tec default_df['lot'] = lot default_df['_index'] = 'biac_kpi304' default_df['_id'] = default_df['lot'] + '_' + default_df[ 'tec'] + '_' + default_df['_timestamp'].astype(str) del default_df['dayofweek'] es_helper.dataframe_to_elastic(es, default_df) default_df['_timestamp'] = default_df['_timestamp'].dt.date.astype(str) logger.info('query' * 100) thr = Timer(5, update_kib_kpi304, (es, lot, tec, date)) thr.start() return default_df.rename({ '_timestamp': '@timestamp' }, axis='columns').to_json(orient='records') else: dataframe.sort_values('@timestamp', inplace=True) dataframe['@timestamp'] = dataframe['@timestamp'].dt.date.astype(str) return dataframe.to_json(orient='records')
def messageReceived(destination, message, headers): global es records = 0 starttime = time.time() logger.info("==> " * 10) logger.info("Message Received %s" % destination) logger.info(headers) now = datetime.now() local_timezone = tzlocal.get_localzone() obj = json.loads(message) start_ts = obj['start_ts'] end_ts = int(datetime.timestamp(now) + 7200) * 1000 query = { "query": { "bool": { "must": [{ "query_string": { "query": "-equipment:obw AND lot:7", "analyze_wildcard": True } }, { "range": { "@timestamp": { "gte": start_ts, "lte": end_ts, "format": "epoch_millis" } } }] } } } df_ret = es_search_with_scroll(es, "biac_availability*", "doc", query, 10000, '2m') df_from_es = df_ret.copy() df_from_es['month'] = pd.to_datetime(df_from_es['@timestamp'], unit='ms') df_from_es['month'] = df_from_es['month'].apply( lambda x: x.strftime("%Y-%m")) df_grouped = df_from_es.groupby(['month', 'equipment', 'category']) \ .agg({'@timestamp': 'max', 'equipment': 'size', 'value': 'mean', 'weekOfMonth': 'max', 'weekOfYear': 'max'}) \ .rename(columns={'equipment': 'count', 'value': 'availability', 'weekOfMonth': 'max_week', 'weekOfYear': 'max_week_year'}).reset_index() df_grouped['week'] = df_grouped['max_week_year'] df_grouped['year_week'] = df_grouped['week'] df_grouped['year_week'] = df_grouped['year_week'].astype(int) df_grouped['year_week'] = df_grouped['year_week'].apply(lambda x: str(x)) df_grouped.index = df_grouped['equipment'] + \ df_grouped['month'].str.replace('-', '') df_grouped['year'] = df_grouped['month'].apply(lambda x: x[:4]) df_grouped['str_max_week'] = df_grouped['max_week'].apply(get_str_max_week) df_grouped['str_max_week_abs'] = df_grouped.apply( lambda row: get_str_weeks(row), axis=1) df_grouped['str_max_week_abs_fr'] = df_grouped.apply( lambda row: get_str_weeks_fr(row), axis=1) df_grouped['str_range_week'] = df_grouped.apply( lambda row: get_str_range_week(row), axis=1) df_grouped['str_range_week_fr'] = df_grouped.apply( lambda row: get_str_range_week_fr(row), axis=1) df_grouped['type'] = 'equipment' df_grouped.loc[df_grouped['equipment'] == 'global', 'type'] = 'global' df_grouped[df_grouped['equipment'] != 'global'] df_grouped['interval'] = 'week' maxWeek = int(df_grouped['week'].max()) df_grouped['display'] = df_grouped['week'].apply( lambda x: getDisplayWeek(x, maxWeek)) df_grouped['_id'] = df_grouped.index regex = r"^gtx_gv_trs" dffiltered = df_grouped[~df_grouped.equipment.str. contains(regex, regex=True)] regex = r"^gtx_td_qsb" dffiltered = dffiltered[~dffiltered.equipment.str. contains(regex, regex=True)] regex = r"^gtx_kpi" dffiltered = dffiltered[~dffiltered.equipment.str. contains(regex, regex=True)] dffiltered = dffiltered.sort_values(['availability']) badeq = [ 'gtx_ana', 'gtx_aws', 'gtx_eac', 'gtx_eds', 'gtx_ema', 'gtx_eq', 'gtx_etd', 'gtx_kro', 'gtx_md', 'gtx_opt', 'gtx_rx', 'gtx_samd', 'gtx_td', 'gtx_trs' ] dffiltered = dffiltered[~dffiltered['equipment'].isin(badeq)] dffiltered['_id'] = dffiltered.apply(lambda row: getID(row), axis=1) months = dffiltered.month.unique() lastmonth = months.max() prevmonth = getPreviousMonth(lastmonth) dfs = [] for month in months: df = dffiltered[dffiltered['month'] == month] df = df.reset_index(drop=True) totalequipments = df.shape[0] goodequipments = df[df.availability.between(98, 100)] totalgoodequipments = goodequipments.shape[0] kpi = totalgoodequipments / totalequipments kpi = kpi * 100 print(kpi) copyrow = df.loc[totalequipments - 1] copyrow.availability = kpi copyrow.equipment = 'global' copyrow._id = 'globalkpilot7-' + month df.loc[totalequipments] = copyrow df['lot'] = 7 df['_index'] = df['year'].apply( lambda x: 'biac_month_availability-' + str(x)) df['previousmonth'] = df['month'].apply(lambda x: 1 if x == prevmonth else 0) dfs.append(df) for df in dfs: es_helper.dataframe_to_elastic(es, df) logger.info("<== " * 10)
def loadKPI102(): global es try: starttime = time.time() logger.info(">>> LOADING KPI102") logger.info("==================") url_kizeo = 'https://www.kizeoforms.com/rest/v3' kizeo_user = os.environ["KIZEO_USER"] kizeo_password = os.environ["KIZEO_PASSWORD"] kizeo_company = os.environ["KIZEO_COMPANY"] payload = { "user": kizeo_user, "password": kizeo_password, "company": kizeo_company } r = requests.post(url_kizeo + '/login', json=payload) if r.status_code != 200: logger.error('Unable to reach Kizeo server. Code:' + str(r.status_code) + " Reason:" + str(r.reason)) return response = r.json() token = response['data']['token'] logger.info('>Token: ' + str(token)) r = requests.get(url_kizeo + '/forms?Authorization=' + token) form_list = r.json()['forms'] logger.info('>Form List: ') #logger.info(form_list) df_all = pd.DataFrame() for i in form_list: if 'LOT 3 - Maandelijkse ronde N°' in i[ 'name'] or 'LOT 2 - Maandelijkse ronde N°' in i['name']: print(i['name']) form_id = i['id'] start = (datetime.now() + timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") logger.info("Start %s" % (start)) end = datetime(2019, 1, 1) logger.info("End %s" % (end)) post = { "onlyFinished": False, "startDateTime": start, "endDateTime": end, "filters": [] } r = requests.post( url_kizeo + '/forms/' + form_id + '/data/exports_info?Authorization=' + token, post) if r.status_code != 200: logger.info('something went wrong...') logger.info(r.status_code, r.reason) elif r.text == '': logger.info('Empty response') else: logger.info(r.json()) ids = r.json()['data']["dataIds"] logger.info(ids) payload = {"data_ids": ids} posturl = ("%s/forms/%s/data/multiple/excel_custom" % (url_kizeo, form_id)) headers = { 'Content-type': 'application/json', 'Authorization': token } r = requests.post(posturl, data=json.dumps(payload), headers=headers) if r.status_code != 200: logger.info('something went wrong...') logger.info(r.status_code, r.reason) logger.info("Handling Form. Content Size:" + str(len(r.content))) if len(r.content) > 0: file = open("./tmp/excel.xlsx", "wb") file.write(r.content) file.close() df_all = df_all.append( pd.read_excel("./tmp/excel.xlsx")) if len(df_all) > 0: if 'Datum / Date' in df_all.columns: df_all['Datum/Date'].fillna(df_all['Datum / Date'], inplace=True) del df_all['Datum / Date'] if 'Lot 3' in df_all.columns: df_all['Ronde'].fillna(df_all['Lot 3'], inplace=True) del df_all['Lot 3'] df_all['lot'] = 3 df_all.loc[df_all['Ronde'].str.contains('Lot 2'), 'lot'] = 2 df_all['ronde_number'] = df_all['Ronde'].str.extract( r'N° ([0-9]*)') df_all.rename(columns={ 'Ronde': 'ronde', 'Datum/Date': '_timestamp', 'Answer date': 'answer_date', 'Record number': 'record_number' }, inplace=True) df_all['_id'] = df_all['_timestamp'].astype(str) + '_lot' + df_all[ 'lot'].astype(str) + '_' + df_all['ronde_number'] df_all['_index'] = 'biac_kpi102' es.indices.delete(index='biac_kpi102', ignore=[400, 404]) es_helper.dataframe_to_elastic(es, df_all) compute_kpi102_monthly(df_all) compute_kib_heatmap(df_all) endtime = time.time() log_message( "Import of KPI102 from Kizeo finished. Duration: %d Records: %d." % (endtime - starttime, df_all.shape[0])) except Exception as e: endtime = time.time() exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=1, file=sys.stdout) # exc_type below is ignored on 3.5 and later traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout)
def loadKPI102(): try: starttime = time.time() logger.info(">>> LOADING KPI102 LOT4") logger.info("==================") url_kizeo = 'https://www.kizeoforms.com/rest/v3' kizeo_user=os.environ["KIZEO_USER"] kizeo_password=os.environ["KIZEO_PASSWORD"] kizeo_company=os.environ["KIZEO_COMPANY"] payload = { "user": kizeo_user, "password": kizeo_password, "company": kizeo_company } r = requests.post(url_kizeo + '/login', json = payload) if r.status_code != 200: logger.error('Unable to reach Kizeo server. Code:'+str(r.status_code)+" Reason:"+str(r.reason)) return response = r.json() token = response['data']['token'] logger.info('>Token: '+str(token)) r = requests.get(url_kizeo + '/forms?Authorization='+token) form_list = r.json()['forms'] logger.info('>Form List: ') #logger.info(form_list) df_all=pd.DataFrame() for i in form_list: if 'LOT 4 - HS Cabines ~ Wekelijkse Ronde ' in i['name'] and 'test' not in i['name']: logger.info(i['name']) form_id = i['id'] start=(datetime.now()+timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") logger.info("Start %s" %(start)) end = datetime(2019, 1, 1) logger.info("End %s" %(end)) #post={"onlyFinished":False,"startDateTime":start,"endDateTime":end,"filters":[]} #r = requests.post(url_kizeo + '/forms/' + form_id + '/data/exports_info?Authorization='+token,post) r = requests.get(url_kizeo + '/forms/' + form_id + '/data/exports_info?Authorization='+token) if r.status_code != 200: logger.info('something went wrong...') logger.info(r.status_code, r.reason) elif r.text == '': logger.info('Empty response') else: #logger.info(r.json()) #ids=r.json()['data']["dataIds"] ids=[] for rec in r.json()["data"]: # print(rec) ids.append(rec["id"]) logger.info(ids) logger.info(ids) payload={ "data_ids": ids } posturl=("%s/forms/%s/data/multiple/excel_custom" %(url_kizeo,form_id)) headers = {'Content-type': 'application/json','Authorization':token} r=requests.post(posturl,data=json.dumps(payload),headers=headers) if r.status_code != 200: logger.info('something went wrong...') logger.info(r.status_code, r.reason) logger.info("Handling Form. Content Size:"+str(len(r.content))) if len(r.content) >0: file = open("./tmp/excel.xlsx", "wb") file.write(r.content) file.close() df_all = df_all.append(pd.read_excel("./tmp/excel.xlsx")) if len(df_all) > 0: df_all.columns = ['answer_time', 'date', 'date_2', 'record_number', 'ronde_1', 'ronde', 'modif_time'] df_all.loc[df_all['ronde'].isnull(), 'ronde'] = df_all.loc[df_all['ronde'].isnull(), 'ronde_1'] df_all.loc[df_all['date'].isnull(), 'date'] = df_all.loc[df_all['date'].isnull(), 'date_2'] df_all = df_all[['answer_time', 'date', 'record_number', 'ronde', 'modif_time']] df_all['ronde_letter'] = df_all['ronde'].str \ .replace(' \(Rondier\)', '') \ .str.replace('LOT 4 - HS Cabines - Wekelijkse Ronde ','') df_all['answer_time'] = pd.to_datetime(df_all['answer_time'], utc=False).dt.tz_localize('Europe/Paris') df_all['modif_time'] = pd.to_datetime(df_all['modif_time'], utc=False).dt.tz_localize('Europe/Paris') df_all['date'] = pd.to_datetime(df_all['date'], utc=False).dt.tz_localize('Europe/Paris') df_all['@timestamp'] = df_all['date'] df_all['_index'] = 'biac_kpi102_lot4' df_all['ts'] = df_all['@timestamp'].values.astype(np.int64) // 10 ** 9 df_all['_id'] = df_all['record_number'].astype(str) + '_' + df_all['ronde_letter'] + '_' + df_all['ts'].astype(str) df_all['lot'] = 4 del df_all['ts'] es_helper.dataframe_to_elastic(es, df_all) compute_kib_index(es, df_all) endtime = time.time() log_message("Import of KPI102 LOT4 from Kizeo finished. Duration: %d Records: %d." % (endtime-starttime, df_all.shape[0])) except Exception as e: endtime = time.time() exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=1, file=sys.stdout) # exc_type below is ignored on 3.5 and later traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout)
def messageReceived(destination, message, headers): global es starttime = time.time() logger.info("==> " * 10) logger.info("Message Received %s" % destination) logger.info(headers) local_timezone = tzlocal.get_localzone() try: logger.info( 'waiting 5 sec before doing the request to be sure date are correctly inserted by biac_import_kizeo.py' ) time.sleep(5) logger.info('message: ' + str(message)) obj = json.loads(message) last_update = datetime.fromtimestamp(obj['end_ts']) interval = define_interval(last_update) start_dt = interval['dt_start'] end_dt = interval['dt_end'] logger.info(start_dt) logger.info(end_dt) df = es_helper.elastic_to_dataframe(es, index="biac_kizeo", scrollsize=1000, start=start_dt, end=end_dt, datecolumns=["@timestamp"]) df['month'] = df['@timestamp'].dt.strftime('%Y-%m') df_grouped = df.groupby(['lot', 'kpi', 'contract', 'screen_name', 'month']) \ .agg({'check_conform':'sum', 'check_no_conform':'sum', 'check_number':'sum', '@timestamp':'max'}) \ .reset_index() df_grouped2 = df[df['contract']=='BACFIR'].groupby(['lot', 'kpi', 'contract', 'month']) \ .agg({'check_conform':'sum', 'check_no_conform':'sum', 'check_number':'sum', '@timestamp':'max'}) \ .reset_index() df_grouped2['screen_name'] = 'BACFIR' df_grouped = df_grouped.append(df_grouped2) df_lot4 = pd.DataFrame() try: #handling lot4 df_lot4 = es_helper.elastic_to_dataframe( es, index="biac_spot_lot4", query="kpi:302", scrollsize=1000, start=start_dt, end=end_dt, datecolumns=["@timestamp"]) except elasticsearch.NotFoundError: logger.warn('Index biac_spot_lot4 does not exist') if len(df_lot4) == 0: obj = { 'lot': '4', 'kpi': 302, 'contract': 'DNBBA', 'screen_name': 'DNBBA', 'month': '2019-01', 'conform': 0, 'not_conform': 0, 'check': 0, '@timestamp': datetime(2019, 1, 1, tzinfo=local_timezone), } df_lot4 = pd.DataFrame.from_dict({0: obj.values()}, orient='index', columns=obj.keys()) else: df_lot4['screen_name'] = 'BACDNB' df_lot4['month'] = df_lot4['@timestamp'].dt.strftime('%Y-%m') df_lot4['lot'] = df_lot4['lot'].astype(str) df_grouped_lot4 = df_lot4.groupby(['lot', 'kpi', 'contract', 'screen_name', 'month']) \ .agg({'conform':'sum', 'not_conform':'sum', 'check':'sum', '@timestamp':'max'}) \ .reset_index() df_grouped_lot4 = df_grouped_lot4.rename( columns={ "conform": "check_conform", "not_conform": "check_no_conform", "check": "check_number" }) df_grouped = df_grouped.append(df_grouped_lot4) df_grouped['_index'] = 'biac_month_2_kizeo' df_grouped['_id'] = df_grouped.apply(lambda row: row['lot'] + '_' + str(row['kpi']) + '_' + \ row['contract'].lower() + '_' + row['screen_name'].lower() + '_' + \ str(int(row['@timestamp'].timestamp())), axis=1) df_grouped = df_grouped.rename(columns={"@timestamp": "last_update"}) df_grouped['_id'] = df_grouped['_id'].str.replace(' ', '_') df_grouped['_id'] = df_grouped['_id'].str.replace('/', '_') df_grouped['percentage_conform'] = round( 100 * (df_grouped['check_conform'] / df_grouped['check_number']), 2).fillna(100) df_grouped['percentage_conform'] = df_grouped[ 'percentage_conform'].apply(lambda x: ('%f' % x).rstrip('0').rstrip('.')) df_grouped['percentage_no_conform'] = round( 100 * (df_grouped['check_no_conform'] / df_grouped['check_number']), 2).fillna(0) df_grouped['percentage_no_conform'] = df_grouped[ 'percentage_no_conform'].apply(lambda x: ('%f' % x).rstrip('0').rstrip('.')) df_grouped['percentage_conform'] = df_grouped[ 'percentage_conform'].fillna(100) df_grouped['percentage_no_conform'] = df_grouped[ 'percentage_no_conform'].fillna(0) logger.info(df_grouped) es_helper.dataframe_to_elastic(es, df_grouped) except Exception as e: endtime = time.time() logger.error(e, exc_info=True) log_message("Process month Kizeo failed. Duration: %d Exception: %s." % ((endtime - starttime), str(e))) endtime = time.time() try: log_message("Process month Kizeo finished. Duration: %d Records: %d." % ((endtime - starttime), df_grouped.shape[0])) except: log_message("Process month Kizeo finished. Duration: %d." % ((endtime - starttime))) logger.info("<== " * 10)
def compute_kpi105_monthly(start, end): """ Compute KPI 105 monthy records (Records used for the dashboard) Parameters ---------- start Date of the first imported record end Date of the last imported record """ logger.info("====> compute_kpi105_monthly") starttime = time.time() logger.info(start) logger.info(end) start = mkFirstOfMonth(start) end = end.replace(day=calendar.monthrange(end.year, end.month)[1]) logger.info(start) logger.info(end) # df_kpi105 = etp.genericIntervalSearch(es, 'biac_kpi105', query='*', start=start, end=end) df_kpi105 = es_helper.elastic_to_dataframe(es, 'biac_kpi105', query='*', start=start, end=end) df_kpi105['month'] = pd.to_datetime(df_kpi105['@timestamp'], unit='ms').dt.strftime('%Y-%m') df_kpi105 = df_kpi105.groupby(['lot', 'month', 'ronde_number' ]).count()[['@timestamp' ]].rename(columns={ '@timestamp': 'count' }).reset_index() records_number = 0 for lot in df_kpi105['lot'].unique(): logger.info(">>>>> Working for lot:" + str(lot)) df_lot = df_kpi105[df_kpi105['lot'] == lot] min_date = datetime.strptime(min(df_lot['month']), '%Y-%m') max_date = datetime.strptime(max(df_lot['month']), '%Y-%m') months = [] current = min_date while current <= max_date: months.append(current.strftime('%Y-%m')) current += relativedelta(months=1) df_months = pd.DataFrame(months) df_months['join'] = 1 df_months.columns = ['month', 'join'] rondes = [1] if int(lot) == 2: rondes = [1, 2, 3] df_rondes = pd.DataFrame(rondes) df_rondes['join'] = 1 df_rondes.columns = ['ronde_number', 'join'] df_default = df_months.merge(df_rondes, left_on='join', right_on='join') del df_default['join'] df_default['number_of_days'] = df_default['month'].apply( lambda x: get_days_already_passed(x)) df_default['_id'] = df_default['month'] + '_' + df_default[ 'ronde_number'].astype(str) df_lot['_id'] = df_lot['month'] + '_' + df_lot['ronde_number'].astype( str) del df_lot['month'] del df_lot['ronde_number'] df_merged = df_default.merge(df_lot, left_on='_id', right_on='_id', how="outer") df_merged['lot'] = df_merged['lot'].fillna(lot) df_merged['count'] = df_merged['count'].fillna(0) df_merged['percent'] = round( 100 * df_merged['count'] / df_merged['number_of_days'], 2) df_merged['_index'] = 'biac_month_kpi105' df_merged['_timestamp'] = pd.to_datetime(df_merged['month'], format='%Y-%m') df_merged.columns = [ 'month', 'ronde_number', 'number_of_days', '_id', 'lot', 'ronde_done', 'percent', '_index', '_timestamp' ] df_merged['ronde_done'] = df_merged['ronde_done'].astype(int) df_merged['_id'] = df_merged['_id'].apply( lambda x: 'lot' + str(int(lot)) + '_' + str(x)) logger.info("Storing " * 20) # pte.pandas_to_elastic(es, df_merged) es_helper.dataframe_to_elastic(es, df_merged) records_number += len(df_merged) endtime = time.time() log_message( "Compute monthly KPI105 (process biac_import_kpi105.py) finished. Duration: %d Records: %d." % (endtime - starttime, records_number))
def messageReceived(destination, message, headers): global es global goodmonth records = 0 starttime = time.time() logger.info("==> " * 10) logger.info("Message Received %s" % destination) logger.info(headers) result = "failed" if "CamelSplitAttachmentId" in headers: headers["file"] = headers["CamelSplitAttachmentId"] if "file" in headers: logger.info("File:%s" % headers["file"]) log_message("Import of file [%s] started." % headers["file"]) file = headers["file"] filedate = file.split(".")[0][-7:] goodmonth = filedate.split("-")[1] + "-" + filedate.split("-")[0] logger.info("MONTH(BEF) :" + goodmonth) gm = datetime.strptime(goodmonth, "%m-%Y") gm = gm - relativedelta(months=1) goodmonth = datetime.strftime(gm, "%m-%Y") logger.info("MONTH(AFT) :" + goodmonth) xlsbytes = base64.b64decode(message) f = open('./tmp/excel.xlsx', 'wb') f.write(xlsbytes) f.close() orgfile = file file = './tmp/excel.xlsx' if "501" in destination: try: #dfdata = pd.read_excel(file, sheet_name='Sheet1') dfdatas = pd.ExcelFile(file) sheettoload = "" #for sheet in dfdatas.sheet_names: # if "Lot" in sheet or "Sheet1" in sheet: # sheettoload=sheet # break sheettoload = dfdatas.sheet_names[1] if sheettoload == "": logger.info("No worksheet to load...") else: logger.info("Loading :" + sheettoload) dfdata = pd.read_excel(file, sheet_name=sheettoload) if dfdata.shape[1] == 38: newcols = [ 'Month', 'BACID', 'SRPresentation', 'SendDate', 'TypeOfReport', 'ReportNumber', 'ReportDate', 'Building', 'Material', 'ExtraData', 'Label', 'MonitorOKYN', 'x1', 'Label2', 'LinkPeriod2', 'SendDate2', 'Status', 'ReportDate2', 'CheckDateSend', 'CheckStatus', 'CheckReportDate', 'Month_BacID', 'CheckMonth', 'GlobalCheck', 'CountC', 'CountCR', 'CountNC', 'CountPositives', 'Count', 'Dept', 'SubDept', 'BACService', 'Company', 'CofelyResp', 'Lot', 'Organism', 'MonitorNOK', 'MonitorOK' ] elif dfdata.shape[1] == 40: newcols = [ 'Month', 'BACID', 'SRPresentation', 'SendDate', 'TypeOfReport', 'ReportNumber', 'ReportDate', 'Building', 'Material', 'ExtraData', 'Label', 'Note', 'Supervisor', 'MonitorOKYN', 'x1', 'Label2', 'LinkPeriod2', 'SendDate2', 'Status', 'ReportDate2', 'CheckDateSend', 'CheckStatus', 'CheckReportDate', 'Month_BacID', 'CheckMonth', 'GlobalCheck', 'CountC', 'CountCR', 'CountNC', 'CountPositives', 'Count', 'Dept', 'SubDept', 'BACService', 'Company', 'CofelyResp', 'Lot', 'Organism', 'MonitorNOK', 'MonitorOK' ] else: # MARCH 2019 newcols = [ 'Month', 'BACID', 'SRPresentation', 'SendDate', 'TypeOfReport', 'ReportNumber', 'ReportDate', 'Building', 'Material', 'ExtraData', 'Label', 'Note', 'Supervisor', 'MonitorOKYN', 'x1', 'Label2', 'LinkPeriod2', 'SendDate2', 'Status', 'ReportDate2', 'CheckDateSend', 'CheckStatus', 'CheckReportDate', 'Month_BacID', 'CheckMonth', 'GlobalCheck', 'CountC', 'CountCR', 'CountNC', 'CountPositives', 'Count', 'Dept', 'SubDept', 'BACService', 'Company', 'CofelyResp', 'Lot', 'Organism', 'Building2', 'DocName', 'MonitorNOK', 'MonitorOK' ] dfdata.columns = newcols dfdata["Month"] = dfdata["Month"].apply(reorderMonth) if not dfdata.empty: regex = r"Lot[0-4]" matches = re.finditer(regex, orgfile, re.MULTILINE) lot = "NA" for matchNum, match in enumerate(matches, start=1): lot = match.group() break logger.info("Lot:" + lot) if "4" in str(lot): dfdata["key"] = "Lot4 (BACDNB)" elif "1" in str(lot): dfdata["key"] = "Lot1 (BACHEA)" else: dfdata["key"] = dfdata.apply(computeReport, axis=1) dfdata["FileLot"] = lot # dfdata["_id"]=dfdata["Month_BacID"] dfdata["_index"] = "biac_kpi501" dfdata["SRPresentation"] = pd.to_datetime( dfdata["SRPresentation"], dayfirst=True) dfdata = dfdata.fillna("") # logger.info(dfdata["FileLot"]) for month in dfdata["Month"].unique(): deletequery = { "query": { "bool": { "must": [{ "query_string": { "query": "Month: " + month } }, { "query_string": { "query": "FileLot: " + lot } }] } } } logger.info("Deleting records") logger.info(deletequery) try: resdelete = es.delete_by_query(body=deletequery, index="biac_kpi501") logger.info(resdelete) except Exception as e3: logger.error(e3) logger.error("Unable to delete records.") time.sleep(3) #DELETE COLUMNS WITH MIXED CONTENTS del dfdata["SRPresentation"] del dfdata["ReportDate"] es_helper.dataframe_to_elastic(es, dfdata) else: logger.info("Empty Data") compute501() conn.send_message('/topic/BIAC_KPI501_IMPORTED', {}) result = "finished" except Exception as e: endtime = time.time() logger.error(e, exc_info=e) log_message( "Import of file [%s] failed. Duration: %d Exception: %s." % (headers["file"], (endtime - starttime), str(e))) else: try: dfdata = pd.read_excel(file, sheet_name='Sheet1') if dfdata.shape[1] == 36: newcols = [ 'Month', 'BACID', 'SRPresentation', 'SendDate', 'TypeOfReport', 'ReportNumber', 'ReportDate', 'Building', 'Material', 'ExtraData', 'Label', 'MonitorOKYN', 'x1', 'Label2', 'LinkPeriod2', 'SendDate2', 'Status', 'ReportDate2', 'CheckDateSend', 'CheckStatus', 'CheckReportDate', 'Month_BacID', 'CheckMonth', 'GlobalCheck', 'CountC', 'CountCR', 'CountNC', 'CountPositives', 'Count', 'Dept', 'SubDept', 'BACService', 'Company', 'CofelyResp', 'Lot', 'Organism' ] elif dfdata.shape[1] == 42: # MARCH 2019 newcols = [ 'Month', 'BACID', 'SRPresentation', 'SendDate', 'TypeOfReport', 'ReportNumber', 'ReportDate', 'Building', 'Material', 'ExtraData', 'Label', 'Note', 'Supervisor', 'MonitorOKYN', 'x1', 'Label2', 'LinkPeriod2', 'SendDate2', 'Status', 'ReportDate2', 'CheckDateSend', 'CheckStatus', 'CheckReportDate', 'Month_BacID', 'CheckMonth', 'GlobalCheck', 'CountC', 'CountCR', 'CountNC', 'CountPositives', 'Count', 'Dept', 'SubDept', 'BACService', 'Company', 'CofelyResp', 'Lot', 'Organism', 'Building2', 'DocName', 'SupervisorNOK', 'SupervisorOK' ] else: newcols = [ 'Month', 'BACID', 'SRPresentation', 'SendDate', 'TypeOfReport', 'ReportNumber', 'ReportDate', 'Building', 'Material', 'ExtraData', 'Label', 'Note', 'Supervisor', 'MonitorOKYN', 'x1', 'Label2', 'LinkPeriod2', 'SendDate2', 'Status', 'ReportDate2', 'CheckDateSend', 'CheckStatus', 'CheckReportDate', 'Month_BacID', 'CheckMonth', 'GlobalCheck', 'CountC', 'CountCR', 'CountNC', 'CountPositives', 'Count', 'Dept', 'SubDept', 'BACService', 'Company', 'CofelyResp', 'Lot', 'Organism' ] dfdata.columns = newcols dfdata["key"] = dfdata.apply(computeReport, axis=1) #dfdata["_id"]=dfdata["Month_BacID"] dfdata["_index"] = "biac_kpi305" dfdata["SRPresentation"] = pd.to_datetime(dfdata["SRPresentation"], dayfirst=True) #DELETE COLUMNS WITH MIXED CONTENTS del dfdata["ReportDate"] del dfdata["SendDate"] dfdata = dfdata.fillna("") logger.info(dfdata) for month in dfdata["Month"].unique(): deletequery = { "query": { "bool": { "must": [{ "query_string": { "query": "Month: " + month } }] } } } logger.info("Deleting records") logger.info(deletequery) try: resdelete = es.delete_by_query(body=deletequery, index="biac_kpi305") logger.info(resdelete) except Exception as e3: logger.error(e3) logger.error("Unable to delete records.") time.sleep(3) logger.info("****=" * 30) dfdata = dfdata.fillna("") logger.info(dfdata) logger.info(dfdata.dtypes) logger.info("****=" * 30) es_helper.dataframe_to_elastic(es, dfdata) compute305() conn.send_message('/topic/BIAC_KPI305_IMPORTED', {}) result = "finished" except Exception as e: endtime = time.time() logger.error(e, exc_info=e) log_message( "Import of file [%s] failed. Duration: %d Exception: %s." % (headers["file"], (endtime - starttime), str(e))) endtime = time.time() try: log_message("Import of file [%s] %s. Duration: %d Records: %d." % (headers["file"], result, (endtime - starttime), df.shape[0])) except: log_message("Import of file [%s] %s. Duration: %d." % (headers["file"], result, (endtime - starttime))) logger.info("<== " * 10)
def doTheWork(start): #now = datetime.now() start = datetime(start.year, start.month, start.day) df = retrieve_raw_data(start) try: df['index1'] = df['index1'].apply(lambda x: removeStr(x)) df['index2'] = df['index2'].apply(lambda x: removeStr(x)) df['index1_min'] = df['index1'] df['index1_min_sec'] = df['index1'] df['index1_max'] = df['index1'] df['index1_avg'] = df['index1'] df['index2_min'] = df['index2'] df['index2_min_sec'] = df['index2'] df['index2_max'] = df['index2'] df['index2_avg'] = df['index2'] df_grouped = df.groupby(['id', 'messSubType', 'messType', 'client', 'area'])\ .agg({'@timestamp': 'min', 'index1_min': 'min', 'index1_max': 'max', 'index1_avg': 'mean', 'index1_min_sec':getCustomMin,\ 'index2_min': 'min', 'index2_max': 'max', 'index2_avg': 'mean', 'index2_min_sec':getCustomMin}).reset_index() df_grouped['pinindex1'] = df_grouped['messSubType'].apply( lambda x: int(int(x) / 16)) df_grouped['pinindex2'] = df_grouped['messSubType'].apply( lambda x: int(int(x) % 16)) df1 = df_grouped[[ 'id', 'messSubType', 'messType', 'client', 'area', '@timestamp', 'index1_min', 'index1_max', 'index1_avg', 'index1_min_sec', 'pinindex1' ]] df1 = df1.rename( columns={ 'index1_min': 'value_min', 'index1_max': 'value_max', 'index1_avg': 'value_avg', 'index1_min_sec': 'value_min_sec', 'pinindex1': 'pin' }) df2 = df_grouped[[ 'id', 'messSubType', 'messType', 'client', 'area', '@timestamp', 'index2_min', 'index2_max', 'index2_avg', 'index2_min_sec', 'pinindex2' ]] df2 = df2.rename( columns={ 'index2_min': 'value_min', 'index2_max': 'value_max', 'index2_avg': 'value_avg', 'index2_min_sec': 'value_min_sec', 'pinindex2': 'pin' }) df_grouped = pd.concat([df1, df2], axis=0, ignore_index=True) logger.info(df_grouped) df_grouped[ 'value_day'] = df_grouped['value_max'] - df_grouped['value_min'] df_grouped['conso_day'] = df_grouped['value_avg'] * 24 df_grouped['availability'] = df_grouped['value_avg'] * 1440 df_grouped['availability_perc'] = df_grouped['value_avg'] * 100 df_grouped['value_day_sec'] = df_grouped['value_max'] - df_grouped[ 'value_min_sec'] df_grouped['@timestamp'] = df_grouped['@timestamp'].apply( lambda x: buildDate(x)) df_grouped['@timestamp'] = pd.to_datetime(df_grouped['@timestamp'], \ unit='ms', utc=True).dt.tz_convert(containertimezone) df_grouped['date'] = df_grouped['@timestamp'].apply( lambda x: getDate(x)) df_grouped['month'] = df_grouped['date'].apply(lambda x: getMonth(x)) df_grouped['year'] = df_grouped['date'].apply(lambda x: getYear(x)) df_grouped['pinStr'] = df_grouped['pin'].apply( lambda x: 'Cpt' + str(x)) df_grouped['client_area_name'] = df_grouped[ 'client'] + '-' + df_grouped['area'] + '-' + df_grouped['pinStr'] df_grouped['_id'] = df_grouped['client_area_name'] + '-' + df_grouped[ 'id'] + '-' + df_grouped['date'] df_grouped['_index'] = df_grouped['date'].apply(lambda x: getIndex(x)) logger.info(df_grouped) res = es_helper.dataframe_to_elastic(es, df_grouped) print("data inserted for day " + str(start)) print("finished") except Exception as er: logger.error('Unable to compute data for ' + str(start)) logger.error(er)
def doTheWork(start): #now = datetime.now() start = datetime(start.year, start.month, start.day) try: df = retrieve_raw_data(start) obj_to_es = create_obj(df, start) obj_to_es['@timestamp'] = containertimezone.localize(start) obj_to_es['site'] = 'LUTOSA' es.index(index='daily_cogen_lutosa', doc_type='doc', id=int(start.timestamp()), body=obj_to_es) save_tags_to_computed(es, obj_to_es) except Exception as er: logger.error('Error During creating specifics data') error = traceback.format_exc() logger.error(error) try: df['value'] = df['value'].apply(lambda x: removeStr(x)) df['value_min'] = df['value'] df['value_min_sec'] = df['value'] df['value_max'] = df['value'] df['value_avg'] = df['value'] df_grouped = df.groupby( ['client_area_name', 'area_name', 'client', 'area']).agg({ '@timestamp': 'min', 'value_min': 'min', 'value_max': 'max', 'value_avg': 'mean', 'value_min_sec': getCustomMin }).reset_index() df_grouped[ 'value_day'] = df_grouped['value_max'] - df_grouped['value_min'] df_grouped['conso_day'] = df_grouped['value_avg'] * 24 df_grouped['availability'] = df_grouped['value_avg'] * 1440 df_grouped['availability_perc'] = df_grouped['value_avg'] * 100 df_grouped['value_day_sec'] = df_grouped['value_max'] - df_grouped[ 'value_min_sec'] df_grouped['date'] = df_grouped['@timestamp'].apply( lambda x: getDate(x)) df_grouped['month'] = df_grouped['date'].apply(lambda x: getMonth(x)) df_grouped['year'] = df_grouped['date'].apply(lambda x: getYear(x)) df_grouped[ '_id'] = df_grouped['client_area_name'] + '-' + df_grouped['date'] df_grouped['_index'] = df_grouped['date'].apply(lambda x: getIndex(x)) df_grouped['site'] = df_grouped.apply(lambda raw: getSite(raw), axis=1) es_helper.dataframe_to_elastic(es, df_grouped) print("data inserted for day " + str(start)) print("finished") except Exception as er: logger.error('Unable to compute data for ' + str(start)) error = traceback.format_exc() logger.error(error)