def updateBaseClassification(credentials): annotatedDF = getAnnotatedSentences(credentials) baseDF = getBaseClassification(credentials) existingIds = baseDF["sentencesId"] newAnnotatedDF = annotatedDF.loc[~annotatedDF["id"].isin(existingIds)] newAnnotatedDF = newAnnotatedDF[["id", "paragraph", "isLesson"]] newAnnotatedDF = newAnnotatedDF.rename(columns={"id": "sentencesId"}) newAnnotatedDF["source"] = "annotation" newIdStart = max(baseDF["id"].astype('int').tolist()) + 1 newAnnotatedDF["id"] = range(newIdStart, newIdStart + len(newAnnotatedDF)) newAnnotatedDF["id"] = newAnnotatedDF["id"].astype('str') if "_index" in baseDF.columns: baseDF = baseDF.drop(columns=["_index"]) if "_type" in baseDF.columns: baseDF = baseDF.drop(columns=["_type"]) if "_id" in baseDF.columns: baseDF = baseDF.drop(columns=["_id"]) if "_score" in baseDF.columns: baseDF = baseDF.drop(columns=["_score"]) baseDF = baseDF.append(newAnnotatedDF) deleteIndex(credentials, "tmp") ep = es_pandas('http://' + credentials["username"] + ':' + credentials["password"] + '@' + credentials["ip_and_port"]) ep.init_es_tmpl(baseDF, "tmp") ep.to_es(baseDF, "tmp", doc_type="tmp")
def test(): # Information of es cluseter es_host = '192.168.90.10:9200' index = 'chejian' # crete es_pandas instance ep = es_pandas(es_host) # Example data frame db = "check_infos" df = sqlToDf( "SELECT cj.id, riqi.data_at as riqi, hpzl.name as hpzl, hphm, syr,cllx.name as cllx, csys.name as csys,clyt.name as clyt,fdjxh,zzcmc.name as zzcmc,check_created_at, GROUP_CONCAT(infos.category separator '&') as category, GROUP_CONCAT(infos.name separator '&') as name FROM em_vehicle.cj_anshun_checks cj join (SELECT id, data_at FROM em_vehicle.cj_riqis) as riqi join (SELECT id,name FROM em_vehicle.cj_hpzls) as hpzl join (SELECT id,name FROM em_vehicle.cj_cllxes) as cllx join (SELECT id,name FROM em_vehicle.cj_csies) as csys join (SELECT id,name FROM em_vehicle.cj_clyts) as clyt join (SELECT id,name FROM em_vehicle.cj_zzcmcs) as zzcmc join (SELECT vehicle_check_id,category.category,name FROM em_vehicle.cj_anshun_infos join (SELECT id, category FROM em_vehicle.cj_codes) as category on em_vehicle.cj_anshun_infos.category_id=category.id) as infos on riqi.id=cj.riqi_id and hpzl.id=cj.hpzl_id and cllx.id=cj.cllx_id and csys.id=cj.csys_id and clyt.id=cj.clyt_id and zzcmc.id=cj.zzcmc_id and cj.id=4642 group by cj.id;" ) # # init template if you want doc_type = '_doc' df.loc[:, 'name'] = df['name'].apply(lambda x: len(x.split('&'))) df.loc[:, 'category'] = df['category'].apply(lambda x: x.split('&')) print(df['name']) # ep.init_es_tmpl(df, doc_type) # # # Example of write data to es, use the template you create # ep.to_es(df, index, doc_type=doc_type, thread_count=2, chunk_size=10000) # # # set use_index=True if you want to use DataFrame index as records' _id # ep.to_es(df, index, doc_type=doc_type, use_index=True, thread_count=2, chunk_size=10000) # # # delete records from es # ep.to_es(df.iloc[5000:], index, doc_type=doc_type, _op_type='delete', thread_count=2, chunk_size=10000) # # # Update doc by doc _id # df.iloc[:1000, 1] = 'Bye' # df.iloc[:1000, 2] = pd.datetime.now() # ep.to_es(df.iloc[:1000, 1:], index, doc_type=doc_type, _op_type='update') # # # Example of read data from es # df = ep.to_pandas(index) # print(df.head()) # # # return certain fields in es # heads = ['Num', 'Date'] # df = ep.to_pandas(index, heads=heads) # print(df.head()) # # # set certain columns dtype # dtype = {'Num': 'float', 'Alpha': object} # df = ep.to_pandas(index, dtype=dtype) # print(df.dtypes) # # # infer dtype from es template # df = ep.to_pandas(index, infer_dtype=True) # print(df.dtypes) # # # Example of write data to es with pandas.io.json # ep.to_es(df, index, doc_type=doc_type, use_pandas_json=True, thread_count=2, chunk_size=10000) print('write es doc with pandas.io.json finished')
def carrega_dados_desk(): import pandas as pd import requests import json from es_pandas import es_pandas # modulo para integrar com elasticsearch # Dados do Elasticsearch es_host = '192.168.2.120:9200' index = 'chamados' # Cria instancia es_pandas ep = es_pandas(es_host) # AUTENTICAÇÃO API DESK MANAGER url = "https://api.desk.ms/Login/autenticar" pubkey = '\"ef89a6460dbd71f2e37a999514d2543b99509d4f\"' payload = " {\r\n \"PublicKey\" :" + pubkey + "\r\n}" headers = { 'Authorization': '66e22b87364fa2946f2ce04dce1b8b59b669ab7f', 'Content-Type': 'application/json' } token = requests.request("POST", url, headers=headers, data=payload) resp_token = json.loads(token.text) # BUSCA RELATORIO NA API DESK MANAGER url = "https://api.desk.ms/Relatorios/imprimir" paginador = '\"' + '0' + '\"' relatorio = "875" payload = "{\r\n \"Chave\" :" + relatorio + ", \r\n \"APartirDe\" :" + paginador + ", \r\n \"Total\": \"\" \r\n}" headers = {'Authorization': resp_token, 'Content-Type': 'application/json'} resp = requests.request("POST", url, headers=headers, data=payload) resp_data = json.loads(resp.text) root = resp_data['root'] df = pd.DataFrame(root) ############################ # Inicia o template doc_type = 'chamados' ep.init_es_tmpl(df, doc_type) # limpa indices elasticsearch ep.to_es(df.iloc[50000:], index, doc_type=doc_type, _op_type='delete', thread_count=2, chunk_size=10000) # carrega dados elasticsearch ep.to_es(df, index, doc_type=doc_type, use_index=True) print("\n")
def saveTopics(credentials, topicsDF): ep = es_pandas('http://' + credentials["username"] + ':' + credentials["password"] + '@' + credentials["ip_and_port"]) if "_index" in topicsDF.columns: topicsDF = topicsDF.drop(columns=["_index"]) if "_type" in topicsDF.columns: topicsDF = topicsDF.drop(columns=["_type"]) if "_id" in topicsDF.columns: topicsDF = topicsDF.drop(columns=["_id"]) if "_score" in topicsDF.columns: topicsDF = topicsDF.drop(columns=["_score"]) ep.init_es_tmpl(topicsDF, "topics") ep.to_es(topicsDF, "topics", doc_type="topics")
def saveIndex(credentials, df, indexName): if "_index" in df.columns: df = df.drop(columns=["_index"]) if "_type" in df.columns: df = df.drop(columns=["_type"]) if "_id" in df.columns: df = df.drop(columns=["_id"]) if "_score" in df.columns: df = df.drop(columns=["_score"]) ep = es_pandas('http://' + credentials["username"] + ':' + credentials["password"] + '@' + credentials["ip_and_port"]) ep.init_es_tmpl(df, indexName) ep.to_es(df, indexName, doc_type=indexName)
def saveBaseClassifications(credentials): """ Description: Function to initially save the base classifications Returns: None """ baseDF = pd.read_csv("data/base_classification_data.csv") baseDF = baseDF.rename(columns={"is_lesson": "isLesson"}) baseDF["source"] = "base" baseDF["sentencesId"] = "" baseDF["isLesson"] = baseDF["isLesson"] ep = es_pandas('http://' + credentials["username"] + ':' + credentials["password"] + '@' + credentials["ip_and_port"]) ep.init_es_tmpl(baseDF, "base-classification") ep.to_es(baseDF, "base-classification", doc_type="base-classification")
def saveBaseSummaries(credentials): """ Description: Function to initially save the base summaries Returns: None """ baseDF = pd.read_csv(defaults.DATA_PATH + "train_data_lesson_title.csv") baseDF = baseDF.rename( columns={"human generated title": "annotationTitle"}) baseDF = baseDF[baseDF["paragraph"] != '"'] baseDF = baseDF.dropna() baseDF["source"] = "base" baseDF["sentencesId"] = "" ep = es_pandas('http://' + credentials["username"] + ':' + credentials["password"] + '@' + credentials["ip_and_port"]) ep.init_es_tmpl(baseDF, "base-summaries") ep.to_es(baseDF, "base-summaries", doc_type="base-summaries")
def computeIds(credentials): sentencesDF = ef.getSentences(credentials) sentencesDF["id"] = sentencesDF["_id"] sentencesDF["id"] = sentencesDF["id"].astype('str') ef.deleteIndex(credentials, "sentences") if "_index" in sentencesDF.columns: sentencesDF = sentencesDF.drop(columns=["_index"]) if "_type" in sentencesDF.columns: sentencesDF = sentencesDF.drop(columns=["_type"]) if "_id" in sentencesDF.columns: sentencesDF = sentencesDF.drop(columns=["_id"]) if "_score" in sentencesDF.columns: sentencesDF = sentencesDF.drop(columns=["_score"]) ep = es_pandas('http://' + credentials["username"] + ':' + credentials["password"] + '@' + credentials["ip_and_port"]) ep.init_es_tmpl(sentencesDF, "sentences") ep.to_es(sentencesDF, "sentences", doc_type="sentences")
def updateSentences(credentials, updatedDF): sentencesDF = getSentences(credentials) sentencesDF["id"] = sentencesDF["id"].astype('str') sentencesDF = sentencesDF.set_index("id") updatedDF["id"] = updatedDF["id"].astype('str') updatedDF = updatedDF.set_index("id") sentencesDF.update(updatedDF) sentencesDF.reset_index(inplace=True) if "_index" in sentencesDF.columns: sentencesDF = sentencesDF.drop(columns=["_index"]) if "_type" in sentencesDF.columns: sentencesDF = sentencesDF.drop(columns=["_type"]) if "_id" in sentencesDF.columns: sentencesDF = sentencesDF.drop(columns=["_id"]) if "_score" in sentencesDF.columns: sentencesDF = sentencesDF.drop(columns=["_score"]) sentencesDF["id"] = sentencesDF["id"].astype('str') deleteIndex(credentials, "sentences") ep = es_pandas('http://' + credentials["username"] + ':' + credentials["password"] + '@' + credentials["ip_and_port"]) ep.init_es_tmpl(sentencesDF, "sentences") ep.to_es(sentencesDF, "sentences", doc_type="sentences")
import time import pandas as pd from es_pandas import es_pandas # Information of es cluseter es_host = 'localhost:9200' index = 'demo' # crete es_pandas instance ep = es_pandas(es_host) # Example data frame df = pd.DataFrame({ 'Alpha': [chr(i) for i in range(97, 128)], 'Num': [x for x in range(31)], 'Date': pd.date_range(start='2019/01/01', end='2019/01/31') }) # init template if you want doc_type = 'demo' ep.init_es_tmpl(df, doc_type, delete=True) # Example of write data to es ep.to_es(df, index, doc_type=doc_type, use_index=True) # waiting for es data writing time.sleep(5) ep.delete_es(df.iloc[0:10, :], index)
def main(): print('entrou no main') # dataabase connection - assign to the correct values engineorigem = sqlalchemy.create_engine( 'mssql+pyodbc://sa:[email protected]/bi_integracao?driver=SQL Server' ) #engineorigem = sqlalchemy.create_engine('mssql+pyodbc://sa:[email protected]/metrics?driver=SQL Server') # gets the disk partitions in order to get a list of NFTS drives drps = psutil.disk_partitions() drives = [dp.device for dp in drps if dp.fstype == 'NTFS'] # initialises the data frame with the appropriate values df = pd.DataFrame( { 'CPU_usada': int(get_cpu_usage_pct()), 'CPU_frequencia': int(get_cpu_frequency()), 'RAM_total': get_ram_total() // 1024 // 1024, 'RAM_utilizada': int(round(get_ram_usage() / 1024 // 1024)), 'RAM_utilizada%': round(get_ram_usage_pct()), #'MEM_virtual': [psutil.virtual_memory()[2]], 'UltimoBoot': datetime.fromtimestamp( psutil.boot_time()).strftime('%d-%m-%Y %H:%M:%S') }, index=[0]) print(df['CPU_usada']) # records the drive usage for each drive found for drive in drives: df['{}_Driver_uso'.format(drive.replace( ":\\", ""))] = psutil.disk_usage(drive)[3] # adds the current date and time stamp df['DataRegistro'] = datetime.now().strftime('%d-%m-%Y %H:%M:%S') #df['RAM_utilizada%'] = 100 #if_exists="replace" if the table does not yet exist, then add HistoryID (or ID) as the auto-incremented primary key df.to_sql(name='cpu_mem_disco', con=engineorigem, if_exists='append', index=False) # Dados Elasticsearch es_host = '192.168.2.120:9200' index = 'cpu_mem_disco' # Cria instancia es_pandas ep = es_pandas(es_host) # init template if you want doc_type = 'cpu_mem_disco' ep.init_es_tmpl(df, doc_type) # limpa indices elasticsearch #ep.to_es(df.iloc[50000:], index, doc_type=doc_type, _op_type='delete', thread_count=2, chunk_size=10000) # carrega dados elasticsearch #ep.to_es(df, index, doc_type=doc_type, use_index=True) ep.to_es(df, index, doc_type=doc_type, thread_count=2, chunk_size=10000)
from langdetect import detect, detect_langs from datetime import datetime import os import html import glob import time from elasticsearch import Elasticsearch # es = Elasticsearch("https://*****:*****@search-raiders-lost-kek-i37ssqiqmlpnt4o2klkbj3wvnq.us-east-2.es.amazonaws.com") # https://search-raiders-lost-kek-i37ssqiqmlpnt4o2klkbj3wvnq.us-east-2.es.amazonaws.com/ # create a client instance of the library # https://search-turbo-raiders-544c36viav3v7tyefmlihyg66e.us-east-2.es.amazonaws.com/ # ep2 = es_pandas("https://*****:*****@search-raiders-lost-kek-i37ssqiqmlpnt4o2klkbj3wvnq.us-east-2.es.amazonaws.com") ep2 = es_pandas( "https://*****:*****@search-turbo-raiders-544c36viav3v7tyefmlihyg66e.us-east-2.es.amazonaws.com" ) def log_me(txt): try: print("{} {}".format( time.strftime(u"%Y-%b-%d %H:%M:%S", time.localtime()), txt)) except: print("{} Formatting issue with a log message.".format( time.strftime(u"%Y-%b-%d %H:%M:%S", time.localtime()))) approved_ent = [ 'PERSON', 'NORP', 'FAC', 'ORG', 'GPE', 'EVENT', 'WORK_OF_ART', 'LAW' ]
def saveTFIDF(credentials, dfTFIDF): ep = es_pandas('http://' + credentials["username"] + ':' + credentials["password"] + '@' + credentials["ip_and_port"]) ep.init_es_tmpl(dfTFIDF, "tfidf") ep.to_es(dfTFIDF, "tfidf", doc_type="tfidf")
except ValueError: pass print(dataaggregate) """ dataaggregate = call_data("158.108.38.66" , "cpu_user") print(dataaggregate) dataaggregate = dataaggregate.reset_index(drop=True) dataaggregate_new = call_data("158.108.38.66" , "cpu_system") #dataaggregate.append(call_data("cpu_system")) dataaggregate_new = dataaggregate_new.reset_index(drop=True) dataaggregate = dataaggregate.append(dataaggregate_new) dataaggregate = dataaggregate.reset_index(drop=True) """ es.delete_by_query(index="ganglia-metrics-prediction", body={"query": { "match_all": {} }}) print("\n" + "Delete old data ", datetime.now()) es_host = "158.108.38.66:9200" es_auth = 'elastic' password = '******' dataindex = "ganglia-metrics-prediction" type = "_doc" dtype = {"prediction": "float"} ep = es_pandas(es_host, http_auth=("elastic", "1q2w3e4r"), dtype=dtype) ep.to_es(dataaggregate, dataindex, type) print("\n" + "Success ", datetime.now())