def __init__(self, hbase_meta_path, pro_path, see_path, ehost='localhost', eport=8080, hhost='localhost', hport=9090): self.jso = handEl() self.hmeta = self.jso.loadJson(hbase_meta_path, ex=True) self.smeta = self.jso.loadJson(os.path.join(see_path, "seeConfig.json"), ex=True) self.pmeta = self.jso.loadJson(os.path.join(pro_path, "proConfig.json"), ex=True) self.he = handEl(ehost, eport) self.hh = handEl(hhost, hport) self.ps = proSweeper(ehost, eport, None, pro_path) self.tabref = self.hmeta['TABLE']['REF'] self.fields = [ self.hmeta['TABLE']['COLUMN']['FAMILIES'][raw_cf] for raw_cf in self.hmeta['TABLE']['COLUMN']['RAW_FAM'] ] self.hbdict = { self.hmeta['TABLE']['COLUMN']['FAMILIES'][raw_cf]: raw_cf for raw_cf in self.hmeta['TABLE']['COLUMN']['RAW_FAM'] }
def main() : he = handEl( host = ELASTICSEARCH_HOST, port = ELASTICSEARCH_PORT, index = ELASTICSEARCH_INDEX) # https://search.shopping.naver.com/detail/lite.nhn?nvMid=19318896974&NaPm=ct%3Dkdy2mauo%7Cci%3D987c95070ec54eb8d3d777d1331f6c14d51d4e82%7Ctr%3Dslsl%7Csn%3D95694%7Chk%3Deaecc241a834a012c2f8a88267c0fb6804fd640d texts = [ '오늘 노란색 옷을 입었는 데 빨간색 여성 상의를 보여줘', '어플로 찍은거라 실제 색감과 다르게 나왔으니 핏만 봐주세요', '사진에서는 라벤더컬러가 약간 더 보랏빛돌고 생기있어보이는데 실제로는 차분하고 아주 은은한 라벤더컬러라 린넨바지랑 같이 입기 좋을것 같아요', '소재가 엄청 까끌한건 아닌데 누워있으면 자잘하게 자국나는 그런 니트소재에요!', '두께가 얇아서 상의 속옷 반쯤 비침있는데 확 드러나는건 아니라 그냥 입고 다녀도 될듯해요' ] user_dict = { '어플':'앱' } try : tinx = 0 text = texts[tinx] for udkey in user_dict : if udkey in text : text = text.replace(udkey, user_dict[udkey]) results = analyze(he, text) for result in results : if 'E(Verbal endings)' in result['rightPOS'] : print("{0}({1}) ".format(result['token'], 'E')) else : print("{0}({1}) ".format(result['token'], result['rightPOS'][:result['rightPOS'].index("(")]), end="") except KeyboardInterrupt as ki : print("Detect ctrl+c, Bye!")
def __init__(self, host, port, cpath, CONF_PATH): self.he = handEl(host, port) self.cpath = cpath self.table = self.loadJson(cpath) if cpath else None self.config = self.loadJson(os.path.join(CONF_PATH, 'proConfig.json'))['SWEEPER'] self.dirty_indices = list() self.clean_items = list() self.statime = datetime.now() self.endtime = datetime.now()
#%% from handEl import handEl from pprint import pprint as pp import numpy as np ELASTICSEARCH_HOST = "192.168.1.132" ELASTICSEARCH_PORT = "8080" ELASTICSEARCH_INDEX = "nori_analyzer" he = handEl(host=ELASTICSEARCH_HOST, port=ELASTICSEARCH_PORT, index=ELASTICSEARCH_INDEX) # 품사 태그 설명 # https://docs.google.com/spreadsheets/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY/edit#gid=589544265 #%% def get_text_analyzed(text): global he options = ["rightPOS", "-f"] return he.analyze(text, options) def get_texts_split_by_E(results): texts = [[]] tags = [[]] for result in results: text = result['token'] tag = result['rightPOS'][:result['rightPOS'].index("(")]
from handEl import handEl from pprint import pprint as pp if __name__ == "__main__" : HOST = 'localhost' PORT = 8080 INDEX = "test_index" DATA1 = {'alias':['hello', '메롱']} DATA2 = {'alias':['world', '메렁']} he = handEl(host=HOST, port=PORT) # Index from the ES he.indexing(INDEX) # Add documents he.prope('A0', DATA1, True) he.prope("A1", DATA2, False) he.doc('A0') pp(he.result) he.search("메롱 좀 하지마!") pp(he.result) he.search("그럼 메렁 해야지~") pp(he.result) print(he.tokenize("i might love with you."))
def __init__(self, host, port, index="default"): self.handel = handEl(host, port, index) self.index = index self.table = None
from handEl import handEl he = handEl('localhost', '8080')