Пример #1
0
def updateBaseClassification(credentials):
    annotatedDF = getAnnotatedSentences(credentials)
    baseDF = getBaseClassification(credentials)
    existingIds = baseDF["sentencesId"]
    newAnnotatedDF = annotatedDF.loc[~annotatedDF["id"].isin(existingIds)]
    newAnnotatedDF = newAnnotatedDF[["id", "paragraph", "isLesson"]]
    newAnnotatedDF = newAnnotatedDF.rename(columns={"id": "sentencesId"})
    newAnnotatedDF["source"] = "annotation"
    newIdStart = max(baseDF["id"].astype('int').tolist()) + 1
    newAnnotatedDF["id"] = range(newIdStart, newIdStart + len(newAnnotatedDF))
    newAnnotatedDF["id"] = newAnnotatedDF["id"].astype('str')
    if "_index" in baseDF.columns:
        baseDF = baseDF.drop(columns=["_index"])
    if "_type" in baseDF.columns:
        baseDF = baseDF.drop(columns=["_type"])
    if "_id" in baseDF.columns:
        baseDF = baseDF.drop(columns=["_id"])
    if "_score" in baseDF.columns:
        baseDF = baseDF.drop(columns=["_score"])
    baseDF = baseDF.append(newAnnotatedDF)
    deleteIndex(credentials, "tmp")
    ep = es_pandas('http://' + credentials["username"] + ':' +
                   credentials["password"] + '@' + credentials["ip_and_port"])
    ep.init_es_tmpl(baseDF, "tmp")
    ep.to_es(baseDF, "tmp", doc_type="tmp")
Пример #2
0
def test():
    # Information of es cluseter
    es_host = '192.168.90.10:9200'
    index = 'chejian'
    # crete es_pandas instance
    ep = es_pandas(es_host)

    # Example data frame
    db = "check_infos"
    df = sqlToDf(
        "SELECT cj.id, riqi.data_at as riqi, hpzl.name as hpzl, hphm, syr,cllx.name as cllx, csys.name as csys,clyt.name as clyt,fdjxh,zzcmc.name as zzcmc,check_created_at, GROUP_CONCAT(infos.category separator '&') as category, GROUP_CONCAT(infos.name separator '&') as name FROM em_vehicle.cj_anshun_checks cj join (SELECT id, data_at FROM em_vehicle.cj_riqis) as riqi join (SELECT id,name FROM em_vehicle.cj_hpzls) as hpzl join (SELECT id,name FROM em_vehicle.cj_cllxes) as cllx join (SELECT id,name FROM em_vehicle.cj_csies) as csys join (SELECT id,name FROM em_vehicle.cj_clyts) as clyt join (SELECT id,name FROM em_vehicle.cj_zzcmcs) as zzcmc join (SELECT vehicle_check_id,category.category,name FROM em_vehicle.cj_anshun_infos join (SELECT id, category FROM em_vehicle.cj_codes) as category on em_vehicle.cj_anshun_infos.category_id=category.id) as infos on riqi.id=cj.riqi_id and hpzl.id=cj.hpzl_id and cllx.id=cj.cllx_id and csys.id=cj.csys_id and clyt.id=cj.clyt_id and zzcmc.id=cj.zzcmc_id and cj.id=4642 group by cj.id;"
    )
    # # init template if you want
    doc_type = '_doc'
    df.loc[:, 'name'] = df['name'].apply(lambda x: len(x.split('&')))
    df.loc[:, 'category'] = df['category'].apply(lambda x: x.split('&'))
    print(df['name'])
    # ep.init_es_tmpl(df, doc_type)
    #
    # # Example of write data to es, use the template you create
    # ep.to_es(df, index, doc_type=doc_type, thread_count=2, chunk_size=10000)
    #
    # # set use_index=True if you want to use DataFrame index as records' _id
    # ep.to_es(df, index, doc_type=doc_type, use_index=True, thread_count=2, chunk_size=10000)
    #
    # # delete records from es
    # ep.to_es(df.iloc[5000:], index, doc_type=doc_type, _op_type='delete', thread_count=2, chunk_size=10000)
    #
    # # Update doc by doc _id
    # df.iloc[:1000, 1] = 'Bye'
    # df.iloc[:1000, 2] = pd.datetime.now()
    # ep.to_es(df.iloc[:1000, 1:], index, doc_type=doc_type, _op_type='update')
    #
    # # Example of read data from es
    # df = ep.to_pandas(index)
    # print(df.head())
    #
    # # return certain fields in es
    # heads = ['Num', 'Date']
    # df = ep.to_pandas(index, heads=heads)
    # print(df.head())
    #
    # # set certain columns dtype
    # dtype = {'Num': 'float', 'Alpha': object}
    # df = ep.to_pandas(index, dtype=dtype)
    # print(df.dtypes)
    #
    # # infer dtype from es template
    # df = ep.to_pandas(index, infer_dtype=True)
    # print(df.dtypes)
    #
    # # Example of write data to es with pandas.io.json
    # ep.to_es(df, index, doc_type=doc_type, use_pandas_json=True, thread_count=2, chunk_size=10000)
    print('write es doc with pandas.io.json finished')
Пример #3
0
def carrega_dados_desk():

    import pandas as pd
    import requests
    import json
    from es_pandas import es_pandas  # modulo para integrar com elasticsearch

    # Dados do Elasticsearch
    es_host = '192.168.2.120:9200'
    index = 'chamados'

    # Cria instancia es_pandas
    ep = es_pandas(es_host)

    # AUTENTICAÇÃO API DESK MANAGER
    url = "https://api.desk.ms/Login/autenticar"
    pubkey = '\"ef89a6460dbd71f2e37a999514d2543b99509d4f\"'
    payload = " {\r\n  \"PublicKey\" :" + pubkey + "\r\n}"
    headers = {
        'Authorization': '66e22b87364fa2946f2ce04dce1b8b59b669ab7f',
        'Content-Type': 'application/json'
    }
    token = requests.request("POST", url, headers=headers, data=payload)
    resp_token = json.loads(token.text)

    # BUSCA RELATORIO NA API DESK MANAGER
    url = "https://api.desk.ms/Relatorios/imprimir"
    paginador = '\"' + '0' + '\"'
    relatorio = "875"
    payload = "{\r\n  \"Chave\" :" + relatorio + ", \r\n  \"APartirDe\" :" + paginador + ", \r\n  \"Total\": \"\" \r\n}"
    headers = {'Authorization': resp_token, 'Content-Type': 'application/json'}
    resp = requests.request("POST", url, headers=headers, data=payload)
    resp_data = json.loads(resp.text)
    root = resp_data['root']
    df = pd.DataFrame(root)
    ############################

    # Inicia o template
    doc_type = 'chamados'
    ep.init_es_tmpl(df, doc_type)

    # limpa indices elasticsearch
    ep.to_es(df.iloc[50000:],
             index,
             doc_type=doc_type,
             _op_type='delete',
             thread_count=2,
             chunk_size=10000)
    # carrega dados elasticsearch
    ep.to_es(df, index, doc_type=doc_type, use_index=True)

    print("\n")
Пример #4
0
def saveTopics(credentials, topicsDF):
    ep = es_pandas('http://' + credentials["username"] + ':' +
                   credentials["password"] + '@' + credentials["ip_and_port"])
    if "_index" in topicsDF.columns:
        topicsDF = topicsDF.drop(columns=["_index"])
    if "_type" in topicsDF.columns:
        topicsDF = topicsDF.drop(columns=["_type"])
    if "_id" in topicsDF.columns:
        topicsDF = topicsDF.drop(columns=["_id"])
    if "_score" in topicsDF.columns:
        topicsDF = topicsDF.drop(columns=["_score"])
    ep.init_es_tmpl(topicsDF, "topics")
    ep.to_es(topicsDF, "topics", doc_type="topics")
Пример #5
0
def saveIndex(credentials, df, indexName):
    if "_index" in df.columns:
        df = df.drop(columns=["_index"])
    if "_type" in df.columns:
        df = df.drop(columns=["_type"])
    if "_id" in df.columns:
        df = df.drop(columns=["_id"])
    if "_score" in df.columns:
        df = df.drop(columns=["_score"])
    ep = es_pandas('http://' + credentials["username"] + ':' +
                   credentials["password"] + '@' + credentials["ip_and_port"])
    ep.init_es_tmpl(df, indexName)
    ep.to_es(df, indexName, doc_type=indexName)
Пример #6
0
def saveBaseClassifications(credentials):
    """
    Description: Function to initially save the base classifications
    Returns: None
    """
    baseDF = pd.read_csv("data/base_classification_data.csv")
    baseDF = baseDF.rename(columns={"is_lesson": "isLesson"})
    baseDF["source"] = "base"
    baseDF["sentencesId"] = ""
    baseDF["isLesson"] = baseDF["isLesson"]
    ep = es_pandas('http://' + credentials["username"] + ':' +
                   credentials["password"] + '@' + credentials["ip_and_port"])
    ep.init_es_tmpl(baseDF, "base-classification")
    ep.to_es(baseDF, "base-classification", doc_type="base-classification")
Пример #7
0
def saveBaseSummaries(credentials):
    """
    Description: Function to initially save the base summaries
    Returns: None
    """
    baseDF = pd.read_csv(defaults.DATA_PATH + "train_data_lesson_title.csv")
    baseDF = baseDF.rename(
        columns={"human generated title": "annotationTitle"})
    baseDF = baseDF[baseDF["paragraph"] != '"']
    baseDF = baseDF.dropna()
    baseDF["source"] = "base"
    baseDF["sentencesId"] = ""
    ep = es_pandas('http://' + credentials["username"] + ':' +
                   credentials["password"] + '@' + credentials["ip_and_port"])
    ep.init_es_tmpl(baseDF, "base-summaries")
    ep.to_es(baseDF, "base-summaries", doc_type="base-summaries")
Пример #8
0
def computeIds(credentials):
    sentencesDF = ef.getSentences(credentials)
    sentencesDF["id"] = sentencesDF["_id"]
    sentencesDF["id"] = sentencesDF["id"].astype('str')
    ef.deleteIndex(credentials, "sentences")
    if "_index" in sentencesDF.columns:
        sentencesDF = sentencesDF.drop(columns=["_index"])
    if "_type" in sentencesDF.columns:
        sentencesDF = sentencesDF.drop(columns=["_type"])
    if "_id" in sentencesDF.columns:
        sentencesDF = sentencesDF.drop(columns=["_id"])
    if "_score" in sentencesDF.columns:
        sentencesDF = sentencesDF.drop(columns=["_score"])
    ep = es_pandas('http://' + credentials["username"] + ':' +
                   credentials["password"] + '@' + credentials["ip_and_port"])
    ep.init_es_tmpl(sentencesDF, "sentences")
    ep.to_es(sentencesDF, "sentences", doc_type="sentences")
Пример #9
0
def updateSentences(credentials, updatedDF):
    sentencesDF = getSentences(credentials)
    sentencesDF["id"] = sentencesDF["id"].astype('str')
    sentencesDF = sentencesDF.set_index("id")
    updatedDF["id"] = updatedDF["id"].astype('str')
    updatedDF = updatedDF.set_index("id")
    sentencesDF.update(updatedDF)
    sentencesDF.reset_index(inplace=True)
    if "_index" in sentencesDF.columns:
        sentencesDF = sentencesDF.drop(columns=["_index"])
    if "_type" in sentencesDF.columns:
        sentencesDF = sentencesDF.drop(columns=["_type"])
    if "_id" in sentencesDF.columns:
        sentencesDF = sentencesDF.drop(columns=["_id"])
    if "_score" in sentencesDF.columns:
        sentencesDF = sentencesDF.drop(columns=["_score"])
    sentencesDF["id"] = sentencesDF["id"].astype('str')
    deleteIndex(credentials, "sentences")
    ep = es_pandas('http://' + credentials["username"] + ':' +
                   credentials["password"] + '@' + credentials["ip_and_port"])
    ep.init_es_tmpl(sentencesDF, "sentences")
    ep.to_es(sentencesDF, "sentences", doc_type="sentences")
Пример #10
0
import time

import pandas as pd

from es_pandas import es_pandas

# Information of es cluseter
es_host = 'localhost:9200'
index = 'demo'

# crete es_pandas instance
ep = es_pandas(es_host)

# Example data frame
df = pd.DataFrame({
    'Alpha': [chr(i) for i in range(97, 128)],
    'Num': [x for x in range(31)],
    'Date': pd.date_range(start='2019/01/01', end='2019/01/31')
})

# init template if you want
doc_type = 'demo'
ep.init_es_tmpl(df, doc_type, delete=True)

# Example of write data to es
ep.to_es(df, index, doc_type=doc_type, use_index=True)

# waiting for es data writing
time.sleep(5)
ep.delete_es(df.iloc[0:10, :], index)
Пример #11
0
    def main():

        print('entrou no main')

        # dataabase connection - assign to the correct values
        engineorigem = sqlalchemy.create_engine(
            'mssql+pyodbc://sa:[email protected]/bi_integracao?driver=SQL Server'
        )
        #engineorigem = sqlalchemy.create_engine('mssql+pyodbc://sa:[email protected]/metrics?driver=SQL Server')

        # gets the disk partitions in order to get a list of NFTS drives
        drps = psutil.disk_partitions()
        drives = [dp.device for dp in drps if dp.fstype == 'NTFS']

        # initialises the data frame with the appropriate values
        df = pd.DataFrame(
            {
                'CPU_usada':
                int(get_cpu_usage_pct()),
                'CPU_frequencia':
                int(get_cpu_frequency()),
                'RAM_total':
                get_ram_total() // 1024 // 1024,
                'RAM_utilizada':
                int(round(get_ram_usage() / 1024 // 1024)),
                'RAM_utilizada%':
                round(get_ram_usage_pct()),
                #'MEM_virtual': [psutil.virtual_memory()[2]],
                'UltimoBoot':
                datetime.fromtimestamp(
                    psutil.boot_time()).strftime('%d-%m-%Y %H:%M:%S')
            },
            index=[0])

        print(df['CPU_usada'])

        # records the drive usage for each drive found
        for drive in drives:
            df['{}_Driver_uso'.format(drive.replace(
                ":\\", ""))] = psutil.disk_usage(drive)[3]

        # adds the current date and time stamp
        df['DataRegistro'] = datetime.now().strftime('%d-%m-%Y %H:%M:%S')
        #df['RAM_utilizada%'] = 100

        #if_exists="replace" if the table does not yet exist, then add HistoryID (or ID) as the auto-incremented primary key
        df.to_sql(name='cpu_mem_disco',
                  con=engineorigem,
                  if_exists='append',
                  index=False)

        # Dados Elasticsearch
        es_host = '192.168.2.120:9200'
        index = 'cpu_mem_disco'

        # Cria instancia es_pandas
        ep = es_pandas(es_host)

        # init template if you want
        doc_type = 'cpu_mem_disco'
        ep.init_es_tmpl(df, doc_type)

        # limpa indices elasticsearch
        #ep.to_es(df.iloc[50000:], index, doc_type=doc_type, _op_type='delete', thread_count=2, chunk_size=10000)
        # carrega dados elasticsearch
        #ep.to_es(df, index, doc_type=doc_type, use_index=True)
        ep.to_es(df,
                 index,
                 doc_type=doc_type,
                 thread_count=2,
                 chunk_size=10000)
Пример #12
0
from langdetect import detect, detect_langs
from datetime import datetime
import os
import html
import glob
import time

from elasticsearch import Elasticsearch
# es = Elasticsearch("https://*****:*****@search-raiders-lost-kek-i37ssqiqmlpnt4o2klkbj3wvnq.us-east-2.es.amazonaws.com")
# https://search-raiders-lost-kek-i37ssqiqmlpnt4o2klkbj3wvnq.us-east-2.es.amazonaws.com/

# create a client instance of the library
# https://search-turbo-raiders-544c36viav3v7tyefmlihyg66e.us-east-2.es.amazonaws.com/
# ep2 = es_pandas("https://*****:*****@search-raiders-lost-kek-i37ssqiqmlpnt4o2klkbj3wvnq.us-east-2.es.amazonaws.com")
ep2 = es_pandas(
    "https://*****:*****@search-turbo-raiders-544c36viav3v7tyefmlihyg66e.us-east-2.es.amazonaws.com"
)


def log_me(txt):
    try:
        print("{} {}".format(
            time.strftime(u"%Y-%b-%d %H:%M:%S", time.localtime()), txt))
    except:
        print("{} Formatting issue with a log message.".format(
            time.strftime(u"%Y-%b-%d %H:%M:%S", time.localtime())))


approved_ent = [
    'PERSON', 'NORP', 'FAC', 'ORG', 'GPE', 'EVENT', 'WORK_OF_ART', 'LAW'
]
Пример #13
0
def saveTFIDF(credentials, dfTFIDF):
    ep = es_pandas('http://' + credentials["username"] + ':' +
                   credentials["password"] + '@' + credentials["ip_and_port"])
    ep.init_es_tmpl(dfTFIDF, "tfidf")
    ep.to_es(dfTFIDF, "tfidf", doc_type="tfidf")
Пример #14
0
        except ValueError:
            pass

print(dataaggregate)
"""
dataaggregate = call_data("158.108.38.66" , "cpu_user")
print(dataaggregate)
dataaggregate = dataaggregate.reset_index(drop=True) 
dataaggregate_new = call_data("158.108.38.66" , "cpu_system")
#dataaggregate.append(call_data("cpu_system"))
dataaggregate_new = dataaggregate_new.reset_index(drop=True) 
dataaggregate = dataaggregate.append(dataaggregate_new)
dataaggregate = dataaggregate.reset_index(drop=True)
"""

es.delete_by_query(index="ganglia-metrics-prediction",
                   body={"query": {
                       "match_all": {}
                   }})
print("\n" + "Delete old data ", datetime.now())

es_host = "158.108.38.66:9200"
es_auth = 'elastic'
password = '******'
dataindex = "ganglia-metrics-prediction"
type = "_doc"
dtype = {"prediction": "float"}
ep = es_pandas(es_host, http_auth=("elastic", "1q2w3e4r"), dtype=dtype)
ep.to_es(dataaggregate, dataindex, type)
print("\n" + "Success ", datetime.now())