class EventListener: """ A queue to store the comming message, and be passed to processor for processing. """ def __init__(self): self.queue = Queue.Queue() self.processor = Processor('catch_upload processor', self.queue) self.processor.start() def on_get(self, req, resp): resp.status = falcon.HTTP_200 resp.body = ('\n Congratulations! You GET /event successfully!\n\n') def on_post(self, req, resp): try: raw_json = req.stream.read() logging.debug('req:%s' % raw_json) except: raise falcon.HTTPBadRequest('bad req', 'when read from req, please check if the req is correct.') try: result_json = json.loads(raw_json, encoding='utf-8') logging.debug('result json:%s' % result_json) logging.info('start to run process....') self.queue.put(result_json) except: raise falcon.HTTPError(falcon.HTTP_400, 'malformed json') resp.status = falcon.HTTP_202 resp.body = json.dumps(result_json, encoding='utf-8')
def main(args): print_log = False print args if args[1] == '-t': text = ' '.join(args[2:]) show_json = True else: text = ' '.join(args[1:]) show_json = False try: new_txt = Processor.process_text(text, [u'@', u'{', u'}'], [u'', u'{', u'}'], 1, print_log) except: try: text = text.decode('utf-8') new_txt = Processor.process_text(text, [u'@', u'{', u'}'], [u'', u'{', u'}'], 1, print_log) except: return 0 if show_json: print new_txt[-1] else: print new_txt[0] return 0
def test_camera(index=0): processor = Processor() cap = cv2.VideoCapture(index) while True: ret, img = cap.read() if not ret: break show = processor.RecogAndDraw(img) cv2.imshow('py_face_recognition', show) cv2.waitKey(1)
def test_buffer_limit(self): ''' Adds more processes than process can handle till buffer overflow ''' b = Buffer(2) p = Processor('rtt', 40, b) p.add_process(123) p.add_process(1231231) p.add_process(87) self.assertEqual(0, p.add_process(666))
def iterate_root_web(cls, temp_filename): # try: tree = etree.parse(temp_filename) # except: # return 'error', 'error' encoding = tree.docinfo.encoding root = tree.getroot() doctype = tree.docinfo.doctype standalone = tree.docinfo.standalone log_data = [] markers = u'іѣъiѢЪIѣъѣіі' for child in root.iter(): try: if u'i' in child.text or u'I' in child.text or u'і' in child.text or u'ѣ' in child.text or u'Ѣ' in child.text or u'ъ' in child.text or u'Ъ' in child.text or u'ѣ' in child.text or u'і' in child.text: # old = child.text new_text, changes, wrong_changes, _ = Processor.process_text( child.text, 1, META['old_new_delimiters'][ META['current_delimiters_xml']], 0) child.text = new_text if changes: log_data.append(changes) except: pass try: #for marker in markers: if u'i' in child.tail or u'I' in child.tail or u'і' in child.tail or u'ѣ' in child.tail or u'Ѣ' in child.tail or u'ъ' in child.tail or u'Ъ' in child.tail or u'ѣ' in child.tail or u'і' in child.tail: # old = child.tail new_text, changes, wrong_changes, _ = Processor.process_text( child.tail, 1, META['old_new_delimiters'][ META['current_delimiters_xml']], 0) child.tail = new_text if changes: log_data.append(changes) except: pass new_text = etree.tostring(root, xml_declaration=True, encoding=encoding, standalone=standalone, doctype=doctype) new_text = new_text.replace('<choice>', '<choice>') new_text = new_text.replace('<reg>', '<reg>') new_text = new_text.replace('</choice>', '</choice>') new_text = new_text.replace('</reg>', '</reg>') new_text = new_text.replace('<orig>', '<orig>') new_text = new_text.replace('</orig>', '</orig>') return new_text, u'\n'.join(log_data)
def test_processing_flow(self): # create a process p = Processor('tt', 40) till_free = p.add_process(123) current_state = till_free # check if process reduces till free timer with each call # to process method for i in range(till_free): self.assertEqual(p.process(), current_state - 1) current_state -= 1 # check that in the end of process till free is set to 0 self.assertEqual(0, p.process())
def log(self): in_text = self.entered.get('1.0', 'end') new_text, changes, _, _ = Processor.process_text( in_text, 1, META['old_new_delimiters'][META['current_delimiters_text']]) s = Show(self.top, changes)
def __init__(self, baseUrl, initParams, formatter=Formatter()): #initializing the queryContainer with parameters and initial request self.query = QueryContainer(baseUrl) self.params = initParams self.query.setParameters(self.params) self.query.setRequest() #initialize Formatter object #this object converts responses to Sumit's json format self.formatter = formatter #initialize the processor self.query.getResponse() self.processor = Processor(self.query.response, 'xml', soupifyResponse=True)
def load_train(params): assert ('data_path' in params) processor = Processor(**params) loader = Loader(processor, **params) print("Length of training set {}".format(len(loader.x_train))) print("Length of test set {}".format(len(loader.x_test))) return loader, processor
def ok(self): in_text = self.entered.get('1.0', 'end') new_text, changes, _, _ = Processor.process_text( in_text, 1, META['old_new_delimiters'][META['current_delimiters_text']]) self.result.config(state='normal') self.result.delete("1.0", "end") self.result.insert("end", new_text) self.result.config(state='disabled')
def test_is_busy_after_proc_finish(self): p = Processor('test', 10) work_time = p.add_process(12313) for w in range(work_time - 1): p.process() # self.assertEqual(p.is_busy(), True) self.assertEqual(p.is_busy(), False)
def __init__(self, config, model=None, retrieve=None): self.config = config self.processor = Processor(config) if model is None: bert = load_bert_from_ckpt(self.config.bert_dir, transformer_num=self.config.n_layers, trainable=True) self.model = get_mrc_model(bert, self.config) else: self.model = model if retrieve is not None: self.retrieve = keras.models.Model(retrieve.inputs, retrieve.outputs[0]) else: self.retrieve = None start = keras.layers.Input(shape=(config.seq_maxlen, ), dtype='float32') end = keras.layers.Input(shape=(config.seq_maxlen, ), dtype='float32') decoded = Answer(config.ans_maxlen)([start, end]) self.decoder = keras.models.Model([start, end], decoded)
def load(cls, text, root, delimiters): """ Loading file """ text.config(state='normal') text.insert("end", u"В обработке...\n") text.config(state='disabled') # name = os.path.basename(meta.filename) name = os.path.basename(META['filename']) try: # with codecs.open(filename, 'r', 'utf-8-sig') as f_dict: with codecs.open(META['filename'], 'r', 'utf-8') as f: data = f.read() #считали файл # s_dict = r_dict.split() #поделили по пробелам text.config(state='normal') text.delete("1.0", "end") text.insert("end", u"Файл загружен\n") text.config(state='disabled') Dialog.dialog(root, text) check_brackets = 1 # учитывать скобки if META['flag'] == 1: #??????? new_text, changes, wrong_changes, _ = Processor.process_text( data, 1, delimiters, check_brackets) #транслитерировали SaveText.save_translit_text(text, new_text, changes) else: text.config(state='normal') text.delete("1.0", "end") text.insert("end", u"Вы не ввели имена выходных файлов\n") text.config(state='disabled') META['flag'] = 0 except IOError as e: err = u"Проблема с чтением файла " + os.path.basename(META['filename']) +\ u": I/O error({0}): {1}".format(e.errno, e.strerror) +\ u"\nВыберите другой файл." Error.dialogError(err, root) except ValueError: err = u"Проблема с чтением файла " + os.path.basename(META['filename']) +\ u": Неверный формат данных." +\ u"\nВыберите другой файл." Error.dialogError(err, root) except: err = u"Проблема с чтением файла " + os.path.basename(META['filename']) +\ u": Неизвестная ошибка: " + str(sys.exc_info()[0]) +\ u"\nВыберите другой файл." Error.dialogError(err, root) raise
def load_test(test_params, train_params=None, split='test', fit_processor=False): if train_params is None: processor = Processor(**test_params) else: _, processor = load_train(train_params) x, y, dl = load_x_y_with_processor(test_params, processor, split=split, fit_processor=fit_processor) gt = np.array([np.argmax(y, axis=-1)]) return x, gt, processor, dl
def res(self): in_text = self.entered.get('1.0', 'end') new_text, changes, _, _ = Processor.process_text( in_text, 1, META['old_new_delimiters'][META['current_delimiters_text']]) res = self.out.get() if res != '': res_name = META['default_directory'] + res + '.txt' else: res_name = META['default_directory'] + 'result.txt' with codecs.open(res_name, 'w', 'utf-8') as ouf: ouf.write(new_text)
def test_adjective_sentiment1(self): processor = Processor() self.assertEqual( processor.textAdjectives("This was a great experience.")[1], 1)
def test_adjectives2(self): processor = Processor() self.assertTrue("interesting" in processor.textAdjectives( "I found this project to be very interesting.")[0])
def test_adjectives1(self): processor = Processor() self.assertTrue("cheerful" in processor.textAdjectives( "The host was cheerful.")[0])
def test_sentiment_empty(self): processor = Processor() self.assertEqual(round(processor.textSentiment("")), 0)
def test_sentiment_neutral(self): processor = Processor() self.assertEqual( round(processor.textSentiment("This event was okay.")), 0)
def updateProcessor(self): self.processor = Processor(self.query.response, 'xml', soupifyResponse=True)
def test_keyphrases2(self): processor = Processor() self.assertTrue("great event" in processor.textKeyPhrases( "This was a great event."))
def test_avg_unsafe_count(self): processor = Processor() self.assertEqual(processor.runningAvg(5, 3, -10), 3)
def test_avg_first_element(self): processor = Processor() self.assertEqual(processor.runningAvg(2, 0, 0), 2)
def test_keyphrases3(self): processor = Processor() self.assertTrue("boring workshop" in processor.textKeyPhrases( "This was an boring workshop."))
class FlowController(object): def __init__(self, baseUrl, initParams, formatter=Formatter()): #initializing the queryContainer with parameters and initial request self.query = QueryContainer(baseUrl) self.params = initParams self.query.setParameters(self.params) self.query.setRequest() #initialize Formatter object #this object converts responses to Sumit's json format self.formatter = formatter #initialize the processor self.query.getResponse() self.processor = Processor(self.query.response, 'xml', soupifyResponse=True) def initFlow(self, harvester, filePath=None, connector=None, processFormat=None, limit=None): ''' sets limit to how many records are stored ''' self.harvest = harvester self.filePath = filePath self.connector = connector self.processFormat = processFormat self.responseCount = len(self.processor.recordList) if limit: self.limit = limit else: self.limit = self.processor.completeListSize if self.limit < self.responseCount: self.responseCount = self.limit print self.limit self.totalRecordCount = 0 #create empty file if filePath is specified if self.filePath: with open(self.filePath, 'w') as f: f.write('') while self.totalRecordCount < self.limit: #harvest responses from current processor state self.harvest(self.filePath, self.connector, self.processFormat) self.updateParams() self.updateRequest() self.updateProcessor() self.responseCount = len(self.processor.recordList) print "updating pipeline for next HTTP call..." print "resumption token: %s" % self.params['resumptionToken'] def harvest(self, filePath=None, connector=None, processFormat=None): ''' Method that saves multiple documents to file *connector* is a function as an argument that saves file to to a cloud server storage system, e.g. aws-s3. The connector function should be able to handle the http calls to the server. For now it works for a key-value data store and takes two arguments, one for the key name, and the other the content of the file to be saved ''' recordList = self.processor.processResponse( count=self.responseCount, processFormat=processFormat) for r in recordList: self.totalRecordCount += 1 #serialize json or xml format to be saved to file or #to a server specified by the connector function if processFormat == 'json': formattedRecord = self.formatter.formatRecord(r) keyName = formattedRecord['okrID'] record = json.dumps(formattedRecord) elif processFormat == 'xml': keyName = r.find_all('identifier')[0].get_text() record = unicode(r) print keyName try: self.processor.save2file(filePath, record) print "Record %d saved to file." % self.totalRecordCount except: pass try: connector(keyName, record) except: pass def updateParams(self): ''' updates parameters with processor's current resumptionToken ''' try: del self.params['metadataPrefix'] except: pass try: self.params['resumptionToken'] = self.processor.resumptionToken self.query.setParameters(self.params) except: print 'the processor does not have resumptionToken' def updateRequest(self): self.query.setRequest() self.query.getResponse() def updateProcessor(self): self.processor = Processor(self.query.response, 'xml', soupifyResponse=True)
def test_adjective_sentiment1(self): processor = Processor() self.assertEqual( processor.textAdjectives("This was a horrific project.")[1], -1)
def test_keyphrases1(self): processor = Processor() self.assertTrue("technical issues" in processor.textKeyPhrases( "The host had lots of technical issues."))
def test_sentiment_positive(self): processor = Processor() self.assertGreaterEqual( processor.textSentiment("This workshop was incredible."), 0.5)
def test_avg_decrease_avg(self): processor = Processor() self.assertEqual(processor.runningAvg(2, 5, 100), 502 / 101)
def test_sentiment_negative(self): processor = Processor() self.assertLessEqual( processor.textSentiment("This project was the worst."), -0.5)
def test_time_2(self): processor = Processor() start = datetime(2000, 12, 11, 11, 11, 00) current = datetime(2000, 12, 12, 11, 00, 47) self.assertEqual(processor.getIntervalTime(start, current, 150), datetime(2000, 12, 12, 10, 58, 30))
def __init__(self): self.queue = Queue.Queue() self.processor = Processor('catch_upload processor', self.queue) self.processor.start()