def main(): client = pymongo.MongoClient(IP) db = client[DB_NAME] col = db['entity'] timer = Timer() timer.start() with bz2.open(DUMP_PATH) as f: for ln, line in enumerate(f): try: line = line.decode().strip() entity = json.loads(line[:-1]) res = col.insert_one(entity) except Exception as e: print(e) if (ln + 1) % 1000 == 0: print(f"{ln + 1} entities inserted...[{timer.diff():.2f} sec]") new_col = db['lang'] docs = col.find({"id": {"$regex": "^Q"}}) for idx, doc in enumerate(docs): cleaned_doc = {'id': doc['id']} wiki_langs = set(doc['sitelinks'].keys()) for lang in LANGS + ['en']: if f"{lang}wiki" in wiki_langs: cleaned_doc[f"{lang}wiki"] = 1 else: cleaned_doc[f"{lang}wiki"] = 0 new_col.insert_one(cleaned_doc) if (idx + 1) % 1000 == 0: time_spent = f"[{timer.diff():.2f} sec]" print(f"{idx + 1} entities' languages indexed..." + time_spent)
def test_remaining_time(): t = Timer() t.start(timeout=1) time.sleep(0.1) t.stop() assert t.remaining_time == 0
def _send_message(self, message, success=None, error=None, *args, **kwargs): if message.startswith('msg'): try: how_long = int(message.split()[1]) t = Timer(how_long, self.protocol.incomming_message, self.self_buddy, u"Here's your message %ds later" % how_long) t.start() except Exception: pass
def test_multiple_starts(): t = Timer() t.start(timeout=2) assert t.running == True with pytest.raises(RuntimeError, match="Timer is already running"): t.start() assert t.running == True time.sleep(0.1) t.stop()
def test_invalid_start(): t = Timer() with pytest.raises(ValueError, match="No timeout value stored, please provide one"): t.start() assert t.running == False assert t.remaining_time == 0 with pytest.raises(ValueError, match="Invalid timeout value: 0"): t.start(timeout=0.9) assert t.running == False assert t.remaining_time == 0
def generate_sudoku(self, target = 25): search = AStarSearch() base_sudoku = self.generate_full_sudoku() timer = Timer() if self.__kind == 'reverse': problem = ReverseSudokuGenerationProblem(Sudoku(), target, self.solver) else: problem = SudokuGenerationProblem(base_sudoku, target, self.solver) timer.start() node, cnt_explored = search.search(problem, h = lambda n: problem.value(n.state)) time = timer.stop() return node.state, len(node.state), cnt_explored, time
class FieldProtector: PROTECTED = 'protected' NOT_PROTECTED = 'not_protected' BLINKING = 'blinking' def __init__(self, field: Field): self.field = field self._blink_animator = Animator(delay=1, max_states=2) self._protected_timer = Timer(delay=15) self._blink_timer = Timer(delay=6) self._state = self.NOT_PROTECTED def update(self): if self._state == self.PROTECTED: if self._protected_timer.tick(): self._state = self.BLINKING self._blink_timer.start() elif self._state == self.BLINKING: if self._blink_timer.tick(): self._change_base_border_tye(CellType.BRICK) self._state = self.NOT_PROTECTED else: state = self._blink_animator() self._change_base_border_tye( CellType.BRICK if state else CellType.CONCRETE) @property def cells_around_base(self): return [(11, 25), (11, 24), (11, 23), (12, 23), (13, 23), (14, 23), (14, 24), (14, 25)] def _change_base_border_tye(self, ct: CellType): for x, y in self.cells_around_base: self.field.map.set_cell_col_row(x, y, ct) def activate(self): self._state = self.PROTECTED self._blink_timer.stop() self._protected_timer.start() self._change_base_border_tye(CellType.CONCRETE) # 1. защитить базу бетоном # 2. запустить таймер на 20 сек # 3. когда таймер кончится - запустить аниматор и таймер мигания на 10 сек # 4. пока таймер мигания - каждую секунду меняем щит с бетона на кирпич и обратно! ...
def test_start_after_stop(): t = Timer() t.start(timeout=2) time.sleep(0.1) t.stop() t.start() assert t.running == True assert t.remaining_time > 0 time.sleep(0.1) t.stop()
def test_multiple_stops(): t = Timer() t.start(timeout=2) time.sleep(0.1) t.stop() status = t.running remaining = t.remaining_time assert status == False assert remaining > 0 t.stop() assert t.running == status assert t.remaining_time == remaining
def load(self): self.loading_lock.clear() timer = Timer() self.logger.info("Loading model %s" % self.model_id) timer.start() try: # opt = DefaultOpt(self.user_opt['models'], 'src-test.txt', 'temp.txt') # should read model paths from json, not fixed opt = self.opt self.translator = build_translator(opt, report_score=False, out_file=codecs.open( os.devnull, "w", "utf-8")) except RuntimeError as e: raise ServerModelError("Runtime Error: %s" % str(e)) timer.tick("model_loading") self.load_time = timer.tick() self.reset_unload_timer() self.loading_lock.set()
def do_POST(self): t = Timer() t.start() response = 200 result = {} try: content_length = int(self.headers.getheader('content-length')) req = json.loads(self.rfile.read(content_length)) print req req_type = req['type'] result = None if req_type == 'catalog': result = json.dumps(self.server.catalog) elif req_type == 'execute': task = req['args']['task'] json.dumps(BasicExecutor(self.server.cache, task).execute()) elif req_type == 'lookup': uuid = req['args']['uuid'] result = self.server.cache[uuid] if type(result) is pd.DataFrame: page_size = int(req['args']['page_size']) page_num = int(req['args']['page_num']) i = page_size * page_num j = i + page_size result = result[i:j] result = result.to_json() except: print traceback.format_exc() response = 500 result = '{}' t.stop() self.send_response(response) self.send_header('Content-type','application/json') self.end_headers() self.wfile.write(result) print 'Run Time:', t.time()
def do_POST(self): t = Timer() t.start() response = 200 result = {} try: content_length = int(self.headers.getheader('content-length')) req = json.loads(self.rfile.read(content_length)) print req req_type = req['type'] result = None if req_type == 'catalog': result = json.dumps(self.server.catalog) elif req_type == 'execute': task = req['args']['task'] json.dumps(BasicExecutor(self.server.cache, task).execute()) elif req_type == 'lookup': uuid = req['args']['uuid'] result = self.server.cache[uuid] if type(result) is pd.DataFrame: page_size = int(req['args']['page_size']) page_num = int(req['args']['page_num']) i = page_size * page_num j = i + page_size result = result[i:j] result = result.to_json() except: print traceback.format_exc() response = 500 result = '{}' t.stop() self.send_response(response) self.send_header('Content-type', 'application/json') self.end_headers() self.wfile.write(result) print 'Run Time:', t.time()
def test_clearing(): t = Timer() t.start(timeout=2) time.sleep(0.1) t.stop() t.clear() assert t.running == False assert t.remaining_time == 0 with pytest.raises(ValueError, match="No timeout value stored, please provide one"): t.start() t.start(timeout=2) time.sleep(0.1) t.stop()
class OscarTimeoutSocket(common.socket): def tryconnect(self, ips, on_connect, on_fail, timeout = 2.0): self._connectedonce = False info('tryconnect Y=%r, N=%r', on_connect, on_fail) self.ips = self.iptuples(ips) if not callable(on_connect) or not callable(on_fail): raise TypeError( 'on_connect and on_fail must be callables' ) self.on_connect = on_connect self.on_fail = on_fail self.timetowait = timeout self._tryagain(timeout) def tryaccept(self, addr, on_connect, on_fail, timeout = 1.5): self._connectedonce = False info('tryaccept Y=%r, N=%r', on_connect, on_fail) self.ips = () self.on_connect = on_connect self.on_fail = on_fail info('listening for a connection at %s:%d', *addr) self.bind( addr ) self.listen(1) if timeout: info('timeout in %r secs', timeout) def dotimeout(): info('TIMEOUT. calling %r', self.on_fail) self.on_fail() self.timeout = Timer(timeout, dotimeout) self.timeout.start() def _tryagain(self, timetowait): # Try the next IP. addr = self.ips.pop(0) if len(self.ips) > 0: timeoutfunc = partial(self._tryagain, timetowait) else: # This is the last one. timeoutfunc = self.on_fail self.timeout = Timer(timetowait, timeoutfunc) info('%r attempting conn: %s:%d', self, *addr) self.make_socket() self.connect(addr, error=timeoutfunc) info('timeout is %r seconds...', timetowait) if self.timeout is not None: self.timeout.start() def handle_expt(self): info('handle_expt in %r', self) self.handle_disconnect() def handle_error(self, e=None): info('handle_error in %r', self) import traceback traceback.print_exc() if not self._connectedonce: self.handle_disconnect() else: self.close() def handle_disconnect(self): self.cancel_timeout() self.close() if len(self.ips) > 0: info('got an error, trying next ip immediately: ' + \ repr(self.ips[0])) self.create_socket(socket.AF_INET, socket.SOCK_STREAM) self._tryagain(self.timetowait) elif not self._connectedonce: info('no more ips to attempt, calling on_fail (%r)', self.on_fail) self.on_fail() def handle_connect(self): info('connected!') self.cancel_timeout() #self.handle_disconnect = lambda: None self._connectedonce = True self.on_connect() self.on_fail = Sentinel def handle_accept(self): self.cancel_timeout() conn, address = self.accept() info('%r connection accepted (%r), canceling timeout and calling %r', self, address, self.on_connect) self._connectedonce = True self.on_connect(conn) def cancel_timeout(self): # Cancel any timeout. if hasattr(self, 'timeout') and self.timeout is not None: self.timeout.cancel() self.timeout = None def iptuples(self, ips): if not hasattr(ips, '__len__'): raise TypeError('ips must be (host, port) or [(host,port), (host,port)]') if not hasattr(ips[0], '__len__'): ips = tuple([ips]) # ips is now a sequence of (host, port) tuples assert all(isinstance(a, basestring) and isinstance(p, int) for a, p in ips) return ips def __repr__(self): try: pn = self.getpeername() except Exception: pn = None return '<TimeoutSocket peername=%r ips=%r at 0x%08x>' % (pn, getattr(self, 'ips', None), id(self))
class TimeoutSocket(common.socket): def tryconnect(self, ips, on_connect, on_fail, timeout=2.0): ''' Setup for a new set of ips and start the connect routine @param ips: @param on_connect: @param on_fail: @param timeout: ''' self.cancel_timeout() self.timetowait = timeout self.on_connect = on_connect self.on_fail = on_fail self._ips = iptuples(ips) self.attempts = 0 self._accepting = False self.try_connect() def try_connect(self): 'Do the connection routine.' addr = self._ips[self.attempts] log.warning('tryconnect: %r', (addr,)) self.attempts += 1 self.timeout = Timer(self.timetowait, lambda s=self.socket: self.handle_timeout(s)) self.make_socket() if self.timeout is not None: self.timeout.start() def succ(*a, **k): log.info("WIN") def fail(*a, **k): log.info("FAIL") self.connect(addr, success=succ, error=fail) def tryaccept(self, addr, on_connect, on_fail, timeout = 1.5): self._accepting = True info('tryaccept Y=%r, N=%r', on_connect, on_fail) self.on_connect = on_connect self.on_fail = on_fail info('listening for a connection at %r', (addr,)) self.make_socket() common.socket.bind( self, addr ) self.listen(1) if timeout: info('timeout in %r secs', timeout) self.timeout = Timer(timeout, lambda s=self.socket: self.handle_timeout(s)) self.timeout.start() def handle_timeout(self, socket): info('TIMEOUT %r', socket) if socket is self.socket: self.do_disconnect() elif socket is not None: socket.close() def handle_expt(self): info('handle_expt in %r', self) self.do_disconnect() def handle_error(self, e=None): info('handle_error in %r', self) import traceback traceback.print_exc() self.do_disconnect() def do_disconnect(self): ''' toss away the current connection will try the next address immediately ''' log.warning('do_disconnect') self.cancel_timeout() self.close() if not self._accepting and self.attempts < len(self._ips): self.try_connect() else: self.on_fail() def handle_connect(self): info('connected!') self.cancel_timeout() self.on_connect(self) def handle_accept(self): self.cancel_timeout() conn, address = self.accept() info('%r connection accepted (%r), canceling timeout and calling %r', self, address, self.on_connect) self.on_connect(conn) def cancel_timeout(self): # Cancel any timeout. if hasattr(self, 'timeout') and self.timeout is not None: info('cancelling timeout') self.timeout.cancel() else: log.warning('there was no timeout to cancel') self.timeout = None def __repr__(self): if hasattr(self,'ips') and len(self.ips): return '<TimeoutSocket %s:%d>' % self.ips[0] else: pn = None try: pn = self.socket.getpeername() finally: return "<%s connected to %r>" % (self.__class__.__name__,pn)
def main(): n_epoch = params.n_epoch save_weight_filename = params.save_weight_file do_validation_only = params.test_only gen_n_text_samples = params.gen_n_samples learning_rate = params.learning_rate training_t = Timer() validation_t = Timer() best_pp = None prev_loss = None prev_acc = None patience = MAX_PATIENCE if params.mode == 'C2W2C': def c2w2c_weighted_objective(fn): def weighted(y_true, y_pred, weights, mask=None): assert mask is None assert weights is not None score_array = fn(y_true, y_pred) # reduce score_array to same ndim as weight array ndim = K.ndim(score_array) weight_ndim = K.ndim(weights) score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) # apply sample weighting score_array *= weights word_scores = K.sum(score_array, axis=-1) return K.mean(word_scores) return weighted # by default Keras calculates only mean which is not correct because # word loss = sum(char losses), thus we need to monkey batch the # weighted_objective function to return correct loss for C2W2C model # ATTENTION: this might not work in later Keras versions, only tested with 1.0.5 ket.weighted_objective = c2w2c_weighted_objective # ======== PREPARE MODELS AND DATA ======== t_model, v_model, training_data, validation_data, gen_text = prepare_env(params) def validate_model(best): if gen_n_text_samples: print '\nGenerating %d text samples...' % gen_n_text_samples n_seed = 100 start = max(0, np.random.randint(0, training_dataset.n_words - n_seed)) seed = training_dataset.get_words()[start: start + n_seed] gen_text(seed=seed, how_many=gen_n_text_samples) print '\nValidating model...' validation_t.start() v_model.set_weights(t_model.get_weights()) v_model.reset_states() n_v_samples, gen_v = validation_data[0]() loss, _ = v_model.evaluate_generator(gen_v, n_v_samples) pp = np.exp(loss) val_elapsed, val_tot = validation_t.lap() validation_info = '''Validation result: - Model loss: %f - Perplexity: %f %s - OOV rate: %f - Validation took: %s - Total validation: %s ''' % (loss, pp, delta_str(pp, best), validation_data[1], val_elapsed, val_tot) info(validation_info) return pp if do_validation_only: validate_model(None) sys.exit(0) print '\nTraining model...' for epoch in range(1, n_epoch + 1): print '=== Epoch %d ===' % epoch training_t.start() n_t_samples, gen_t = training_data[0]() t_model.reset_states() callbacks = [] if save_weight_filename: callbacks += [ModelCheckpoint(save_weight_filename, monitor='loss', mode='min', save_best_only=True)] h = t_model.fit_generator(generator=gen_t, samples_per_epoch=n_t_samples, callbacks=callbacks, nb_epoch=1, verbose=1) fit_elapsed, fit_tot = training_t.lap() loss = h.history['loss'][0] acc = h.history['acc'][0] epoch_info = '''Epoch %d summary at %s: - Model loss: %f %s - Model accuracy: %f %s - Perplexity: %f - Training took: %s - Total training: %s''' % (epoch, strftime("%Y-%m-%d %H:%M:%S", localtime()), loss, delta_str(loss, prev_loss), acc, delta_str(acc, prev_acc), np.exp(loss), fit_elapsed, fit_tot) print '' info(epoch_info) pp = validate_model(best_pp) if best_pp is not None and pp > best_pp: if patience <= 0 and learning_rate > MIN_LR: learning_rate /= 2. learning_rate = max(learning_rate, MIN_LR) info('Validation perplexity increased. Halving learning rate to %f...\n' % learning_rate) K.set_value(t_model.optimizer.lr, learning_rate) patience = 1 else: patience -= 1 else: best_pp = pp patience = MAX_PATIENCE prev_acc = acc prev_loss = loss print 'Training complete'
def generate_sudoku(self, target = 25): ''' Genera un sudoku rimuovendo casualemente dei valori fino a che non si ottiene un livello di difficoltà pari a quello specificato dal parametro target Ogni 1000 Backtrack ripristina metà valori scelti casualemente tra quelli rimossi returns (current_sudoku, len(current_sudoku), cnt_backtrack, time) ''' base_sudoku = self.generate_full_sudoku() current_sudoku = Sudoku(base_sudoku.get_dict()) cache = [] # Cache dei valori per il backtrack cnt_backtrack = 0 cnt_step = 0 single_solution = True; timer = Timer() timer.start() while True: cnt_step += 1 #print '----------------------------' #print 'Cache size', len(cache) # Test di uscita if len(current_sudoku) == target and single_solution: break; #print 'Current values count: ', len(current_sudoku) #print 'Single solution: ', single_solution #print 'Backtrack', cnt_backtrack # Quanti valori togliere n = len(current_sudoku) / 20 #print 'Prova a rimuovere %d valori' %n assert n != 0 # Togli i numeri for i in range(n): key = random.choice(current_sudoku.filled_cell()) cache.append(key) current_sudoku.clear_cell(key) #print 'Cache size', len(cache) # Verifica l'unicità della soluzione (sols, b, t) = self.solver.solve(current_sudoku, max_sol = 2) # Se unica, continua if len(sols) == 1: single_solution = True #print "Rimossi con successo %d elementi" % n continue # Se più di una, torna indietro else: #print "Backtrack, sols: %d" % len(sols) single_solution = False cnt_backtrack += 1 # Ripristina gli ultimi n valori tolti #print 'Restored cache size', len(cache) for i in range(n): # Ripristina gli utlimi elementi tolti k = cache[-1] current_sudoku.set_cell(k, base_sudoku.cell(k)) cache.pop(-1) if cnt_backtrack % 1000 == 0: #print 'Riprista casualmente metà cache' for i in range(len(cache)/2): # Ripristina gli utlimi elementi tolti idx = random.randint(0, len(cache)-1) k = cache[idx] current_sudoku.set_cell(k, base_sudoku.cell(k)) cache.pop(idx) #print '----------------------------' #print 'Backtrack necessari: ', cnt_backtrack time = timer.stop() return current_sudoku, len(current_sudoku), cnt_step, time
class OscarTimeoutSocket(common.socket): def tryconnect(self, ips, on_connect, on_fail, timeout=2.0): self._connectedonce = False info('tryconnect Y=%r, N=%r', on_connect, on_fail) self.ips = self.iptuples(ips) if not callable(on_connect) or not callable(on_fail): raise TypeError('on_connect and on_fail must be callables') self.on_connect = on_connect self.on_fail = on_fail self.timetowait = timeout self._tryagain(timeout) def tryaccept(self, addr, on_connect, on_fail, timeout=1.5): self._connectedonce = False info('tryaccept Y=%r, N=%r', on_connect, on_fail) self.ips = () self.on_connect = on_connect self.on_fail = on_fail info('listening for a connection at %s:%d', *addr) self.bind(addr) self.listen(1) if timeout: info('timeout in %r secs', timeout) def dotimeout(): info('TIMEOUT. calling %r', self.on_fail) self.on_fail() self.timeout = Timer(timeout, dotimeout) self.timeout.start() def _tryagain(self, timetowait): # Try the next IP. addr = self.ips.pop(0) if len(self.ips) > 0: timeoutfunc = partial(self._tryagain, timetowait) else: # This is the last one. timeoutfunc = self.on_fail self.timeout = Timer(timetowait, timeoutfunc) info('%r attempting conn: %s:%d', self, *addr) self.make_socket() self.connect(addr, error=timeoutfunc) info('timeout is %r seconds...', timetowait) if self.timeout is not None: self.timeout.start() def handle_expt(self): info('handle_expt in %r', self) self.handle_disconnect() def handle_error(self, e=None): info('handle_error in %r', self) import traceback traceback.print_exc() if not self._connectedonce: self.handle_disconnect() else: self.close() def handle_disconnect(self): self.cancel_timeout() self.close() if len(self.ips) > 0: info('got an error, trying next ip immediately: ' + \ repr(self.ips[0])) self.create_socket(socket.AF_INET, socket.SOCK_STREAM) self._tryagain(self.timetowait) elif not self._connectedonce: info('no more ips to attempt, calling on_fail (%r)', self.on_fail) self.on_fail() def handle_connect(self): info('connected!') self.cancel_timeout() #self.handle_disconnect = lambda: None self._connectedonce = True self.on_connect() self.on_fail = Sentinel def handle_accept(self): self.cancel_timeout() conn, address = self.accept() info('%r connection accepted (%r), canceling timeout and calling %r', self, address, self.on_connect) self._connectedonce = True self.on_connect(conn) def cancel_timeout(self): # Cancel any timeout. if hasattr(self, 'timeout') and self.timeout is not None: self.timeout.cancel() self.timeout = None def iptuples(self, ips): if not hasattr(ips, '__len__'): raise TypeError( 'ips must be (host, port) or [(host,port), (host,port)]') if not hasattr(ips[0], '__len__'): ips = tuple([ips]) # ips is now a sequence of (host, port) tuples assert all( isinstance(a, basestring) and isinstance(p, int) for a, p in ips) return ips def __repr__(self): try: pn = self.getpeername() except Exception: pn = None return '<TimeoutSocket peername=%r ips=%r at 0x%08x>' % ( pn, getattr(self, 'ips', None), id(self))
def run(self, inputs, is_split=False): """Translate `inputs` using this model Args: inputs (List[dict[str, str]]): [{"src": "..."},{"src": ...}] Returns: result (list): translations times (dict): containing times """ self.stop_unload_timer() timer = Timer() timer.start() self.logger.info("Running translation using %s" % self.model_id) if not self.loading_lock.is_set(): self.logger.info( "Model #%s is being loaded by another thread, waiting" % self.model_id) if not self.loading_lock.wait(timeout=30): raise ServerModelError("Model %s loading timeout" % self.model_id) else: if not self.loaded: self.load() timer.tick(name="load") elif self.opt.cuda: self.to_gpu() timer.tick(name="to_gpu") texts = [] head_spaces = [] tail_spaces = [] sentence_objs = [] for i, inp in enumerate(inputs): src = inp['src'] if src.strip() == "": head_spaces.append(src) texts.append("") tail_spaces.append("") else: whitespaces_before, whitespaces_after = "", "" match_before = re.search(r'^\s+', src) match_after = re.search(r'\s+$', src) if match_before is not None: whitespaces_before = match_before.group(0) if match_after is not None: whitespaces_after = match_after.group(0) head_spaces.append(whitespaces_before) tail_spaces.append(whitespaces_after) sent_obj = self.maybe_preprocess(src.strip(), is_split) sentence_objs.append(sent_obj) tok = self.maybe_tokenize(sent_obj.tokenized_list) texts.extend(tok) empty_indices = [i for i, x in enumerate(texts) if x == ""] texts_to_translate = [x for x in texts if x != ""] self.logger.debug(f'text after preprocess: {texts_to_translate}') scores = [] predictions = [] if len(texts_to_translate) > 0: try: scores, predictions = self.translator.translate( texts_to_translate, None, '', 1, 'sent', False, False) except (RuntimeError, Exception) as e: err = "Error: %s" % str(e) self.logger.error(err) self.logger.error("repr(text_to_translate): " + repr(texts_to_translate)) self.logger.error("model: #%s" % self.model_id) self.logger.error("model opt: " + str(self.opt.__dict__)) self.logger.error(traceback.format_exc()) raise ServerModelError(err) timer.tick(name="translation") self.logger.info( """Using model [%s], input num [%d], translation time: [%f]""" % (self.model_id, len(texts), timer.times['translation'])) self.reset_unload_timer() # NOTE: translator returns lists of `n_best` list def flatten_list(_list): return sum(_list, []) results = flatten_list(predictions) self.logger.debug(f'text after translate: {results}') scores = [score_tensor.item() for score_tensor in flatten_list(scores)] source_lines = [ line for obj in sentence_objs for line in obj.get_sentence_list() ] final_result = [ self.maybe_postprocess(target, source) for target, source in zip(results, source_lines) ] self.logger.debug(f'text after postprocess: {final_result}') final_result = self.__get_final_result(final_result, sentence_objs) final_result = self.postprocess_after_merge(final_result) # build back results with empty texts for i in empty_indices: j = i * self.opt.n_best results = results[:j] + [""] * self.opt.n_best + results[j:] aligns = aligns[:j] + [None] * self.opt.n_best + aligns[j:] scores = scores[:j] + [0] * self.opt.n_best + scores[j:] head_spaces = [h for h in head_spaces for i in range(self.opt.n_best)] tail_spaces = [h for h in tail_spaces for i in range(self.opt.n_best)] final_result = [ "".join(items) for items in zip(head_spaces, final_result, tail_spaces) ] self.logger.info("Translation Results: %d", len(final_result)) return final_result
optimizer = tester.OPTIMIZERS[optimizer_name] solver = tester.SOLVER mods, ties = dotparse.dot2graph(design_file) design = Design(mods, ties) cgra = adlparse(fabric_file) mrrg = MRRG(cgra, contexts=contexts, add_tie_nodes=not args.ntiesnodes) pnr = PNR(mrrg, design, solver, incremental=incremental, duplicate_const=duplicate_const, duplicate_all=duplicate_all) full_timer.start() result = pnr.optimize_design( optimizer, tester.init, tester.funcs, verbose=False, cutoff=cutoff, build_timer=build_timer, solve_timer=solve_timer, return_bounds=True, optimize_final=optimize_final, # attest_func=modeler.model_checker, ) full_timer.stop()
class DigsbyConnect(TimeoutSocketOne): _SERVERTIMEOUT = 8 def stale_connection(self): if getattr(self, '_triumphant', False): log.info('stale_connection was called but i already won! yayayay') else: log.info('%r had a stale connection. Calling do_fail (%r) with a connlost error', self, self.do_fail) self.do_fail(DigsbyLoginError('connlost')) def succ(self): generator = self.do_login() self._timeouttimer = Timer(self._SERVERTIMEOUT, self.stale_connection) self._timeouttimer.start() self.run_sequence( generator ) @lock def handle_error(self, e=None): if hasattr(self, '_timeouttimer'): self._timeouttimer.stop() TimeoutSocketOne.handle_error(self) @lock def handle_expt(self): if hasattr(self, '_timeouttimer'): self._timeouttimer.stop() TimeoutSocketOne.handle_expt(self) @lock def handle_close(self): if hasattr(self, '_timeouttimer'): self._timeouttimer.stop() TimeoutSocketOne.handle_close(self) def do_login(self): login_str = make_pstring(self.cid) + make_pstring(self.un) + make_pstring(self.password) codelen = yield (4, login_str) codelen = unpack('!I', codelen)[0] if codelen <= 0: raise DigsbyLoginError('client') code = yield (codelen, '') try: if code == 'success': cookielen = unpack('!I', (yield (4, '')))[0] cookie = yield (cookielen, '') log.debug('Got cookie: %r', cookie) serverslen = unpack('!I', (yield (4, '')))[0] servers = yield (serverslen, '') log.debug('Got servers: %r', servers) servers = servers.split(' ') self.cookie = cookie self.servers = servers self._triumphant = True return elif code == 'error': log.debug('Got error!') reasonlen = unpack('!I', (yield (4, '')))[0] reason = yield (reasonlen, '') log.debug('Got error reason: %r', reason) raise DigsbyLoginError(reason) else: log.debug('Unknown error occurred! blaming the client!') raise DigsbyLoginError('client') except DigsbyLoginError, e: if e.reason == 'server': log.debug('Got "upgrading digsby" error code. Sleeping.') import time; time.sleep(POLL_SLEEP_TIME) raise e except Exception, e: print_exc() raise DigsbyLoginError('client')
class Executor(Process): def __init__(self, catalog, results, task): Process.__init__(self) self.catalog = catalog self.results = results self.task = task self.timer = Timer() def get_result(self, uuid): result = self.results[uuid] while result.complete == 0.0: time.sleep(0.0005) result = self.results[uuid] return result def wait(self, uuid): while self.results[uuid].complete == 0.0: time.sleep(0.0005) def run(self): self.timer.start() try: if isinstance(self.task, ClassifyTask): self.classify() elif isinstance(self.task, CorrelateTask): self.correlate() elif isinstance(self.task, DifferenceTask): self.difference() elif isinstance(self.task, FeatureSelectTask): self.feature_select() elif isinstance(self.task, FrequentItemsetsTask): self.frequent_itemsets() elif isinstance(self.task, IntersectTask): self.intersect() elif isinstance(self.task, LoadTask): self.load() elif isinstance(self.task, MergeTask): self.merge() elif isinstance(self.task, ProjectTask): self.project() elif isinstance(self.task, SelectTask): self.select() elif isinstance(self.task, UnionTask): self.union() else: raise NotImplementedError() except Exception as e: print str(e) result = ErrorResult(self.task, str(e)) self.results[self.task.uuid] = result self.timer.stop() print 'task' + str(self.task.uuid) + ': ' + str(self.timer.time()) + 's' def classify(self): raise NotImplementedError() def correlate(self): raise NotImplementedError() def difference(self): raise NotImplementedError() def feature_select(self): raise NotImplementedError() def frequent_itemsets(self): raise NotImplementedError() def intersect(self): raise NotImplementedError() def load(self): raise NotImplementedError() def merge(self): raise NotImplementedError() def project(self): raise NotImplementedError() def select(self): raise NotImplementedError() def union(self): raise NotImplementedError()
class Executor(Process): def __init__(self, catalog, results, task): Process.__init__(self) self.catalog = catalog self.results = results self.task = task self.timer = Timer() def get_result(self, uuid): result = self.results[uuid] while result.complete == 0.0: time.sleep(0.0005) result = self.results[uuid] return result def wait(self, uuid): while self.results[uuid].complete == 0.0: time.sleep(0.0005) def run(self): self.timer.start() try: if isinstance(self.task, ClassifyTask): self.classify() elif isinstance(self.task, CorrelateTask): self.correlate() elif isinstance(self.task, DifferenceTask): self.difference() elif isinstance(self.task, FeatureSelectTask): self.feature_select() elif isinstance(self.task, FrequentItemsetsTask): self.frequent_itemsets() elif isinstance(self.task, IntersectTask): self.intersect() elif isinstance(self.task, LoadTask): self.load() elif isinstance(self.task, MergeTask): self.merge() elif isinstance(self.task, ProjectTask): self.project() elif isinstance(self.task, SelectTask): self.select() elif isinstance(self.task, UnionTask): self.union() else: raise NotImplementedError() except Exception as e: print str(e) result = ErrorResult(self.task, str(e)) self.results[self.task.uuid] = result self.timer.stop() print 'task' + str(self.task.uuid) + ': ' + str( self.timer.time()) + 's' def classify(self): raise NotImplementedError() def correlate(self): raise NotImplementedError() def difference(self): raise NotImplementedError() def feature_select(self): raise NotImplementedError() def frequent_itemsets(self): raise NotImplementedError() def intersect(self): raise NotImplementedError() def load(self): raise NotImplementedError() def merge(self): raise NotImplementedError() def project(self): raise NotImplementedError() def select(self): raise NotImplementedError() def union(self): raise NotImplementedError()
def main(): n_epoch = params.n_epoch save_weight_filename = params.save_weight_file do_validation_only = params.test_only gen_n_text_samples = params.gen_n_samples learning_rate = params.learning_rate training_t = Timer() validation_t = Timer() best_pp = None prev_loss = None prev_acc = None patience = MAX_PATIENCE if params.mode == 'C2W2C': def c2w2c_weighted_objective(fn): def weighted(y_true, y_pred, weights, mask=None): assert mask is None assert weights is not None score_array = fn(y_true, y_pred) # reduce score_array to same ndim as weight array ndim = K.ndim(score_array) weight_ndim = K.ndim(weights) score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) # apply sample weighting score_array *= weights word_scores = K.sum(score_array, axis=-1) return K.mean(word_scores) return weighted # by default Keras calculates only mean which is not correct because # word loss = sum(char losses), thus we need to monkey batch the # weighted_objective function to return correct loss for C2W2C model # ATTENTION: this might not work in later Keras versions, only tested with 1.0.5 ket.weighted_objective = c2w2c_weighted_objective # ======== PREPARE MODELS AND DATA ======== t_model, v_model, training_data, validation_data, gen_text = prepare_env( params) def validate_model(best): if gen_n_text_samples: print '\nGenerating %d text samples...' % gen_n_text_samples n_seed = 100 start = max( 0, np.random.randint(0, training_dataset.n_words - n_seed)) seed = training_dataset.get_words()[start:start + n_seed] gen_text(seed=seed, how_many=gen_n_text_samples) print '\nValidating model...' validation_t.start() v_model.set_weights(t_model.get_weights()) v_model.reset_states() n_v_samples, gen_v = validation_data[0]() loss, _ = v_model.evaluate_generator(gen_v, n_v_samples) pp = np.exp(loss) val_elapsed, val_tot = validation_t.lap() validation_info = '''Validation result: - Model loss: %f - Perplexity: %f %s - OOV rate: %f - Validation took: %s - Total validation: %s ''' % (loss, pp, delta_str(pp, best), validation_data[1], val_elapsed, val_tot) info(validation_info) return pp if do_validation_only: validate_model(None) sys.exit(0) print '\nTraining model...' for epoch in range(1, n_epoch + 1): print '=== Epoch %d ===' % epoch training_t.start() n_t_samples, gen_t = training_data[0]() t_model.reset_states() callbacks = [] if save_weight_filename: callbacks += [ ModelCheckpoint(save_weight_filename, monitor='loss', mode='min', save_best_only=True) ] h = t_model.fit_generator(generator=gen_t, samples_per_epoch=n_t_samples, callbacks=callbacks, nb_epoch=1, verbose=1) fit_elapsed, fit_tot = training_t.lap() loss = h.history['loss'][0] acc = h.history['acc'][0] epoch_info = '''Epoch %d summary at %s: - Model loss: %f %s - Model accuracy: %f %s - Perplexity: %f - Training took: %s - Total training: %s''' % ( epoch, strftime("%Y-%m-%d %H:%M:%S", localtime()), loss, delta_str(loss, prev_loss), acc, delta_str( acc, prev_acc), np.exp(loss), fit_elapsed, fit_tot) print '' info(epoch_info) pp = validate_model(best_pp) if best_pp is not None and pp > best_pp: if patience <= 0 and learning_rate > MIN_LR: learning_rate /= 2. learning_rate = max(learning_rate, MIN_LR) info( 'Validation perplexity increased. Halving learning rate to %f...\n' % learning_rate) K.set_value(t_model.optimizer.lr, learning_rate) patience = 1 else: patience -= 1 else: best_pp = pp patience = MAX_PATIENCE prev_acc = acc prev_loss = loss print 'Training complete'
class Vote: def __init__(self, bot): self._bot = bot self._current_room_users = self._sort_user_by_join_time(self._bot.users.all) self._has_voted = [] # user handle of the users who voted self._votes = [] # list of tuple(User, vote) self._user_to_vote = None self._vote_session = 0 self._vote_type = '' self._vote_timer = None @property def is_active(self): """ Check for active vote session. :return: True if active vote session. :rtype: bool """ return self._has_active_session() @property def has_voted(self): """ Returns a list of user handles that have already voted. :return: A list of user handles. :rtype: list """ return self._has_voted @property def vote_user(self): """ Return the User object of the user up for vote. :return: The User object of the user up for vote. :rtype: User """ return self._user_to_vote @property def active_vote_type(self): """ Return the type of vote session. :return: Type of vote session. :rtype: str """ return self._vote_type def vote(self, user, vote): """ Add a vote to the vote session. :param user: A User object. :type user: User :param vote: A yes/no vote. :type vote: str :return: True if the vote was accepted. :rtype: bool """ if self._can_vote(user): if vote in YES or vote in NO: # there might not be a reason to store the user self._votes.append((user, vote)) self._has_voted.append(user.handle) return True return False @staticmethod def can_start(user, seconds=300): """ Helper method to check if a user is allowed to start the vote session. The intention was, that this method should be called before start(), to check if the user wanting to start the vote session, have been in the room long enough. :param user: The user to check. :type user: User :param seconds: The time the user must have been in the room. :type seconds: int :return: True if the user is allowed to start the session. :rtype: bool """ now = datetime.now() dif = now - user.join_time if dif.seconds > seconds: return True return False def start(self, user_to_vote, session, vote_type): """ Start the voting session. :param user_to_vote: The user up for voting. :type user_to_vote: User :param session: The session time in seconds. :type session: int :param vote_type: The type of vote. :type vote_type: str :return: True if the vote session was started :rtype: bool """ log.debug('%s, session=%s, vote_type=%s' % (user_to_vote, session, vote_type)) if not self._has_active_session(): self._user_to_vote = user_to_vote self._vote_session = session self._vote_type = vote_type # start the timer self._vote_timer = Timer() self._vote_timer.start(self._decide_vote, self._vote_session) return True return False def cancel(self): """ Cancel the vote session. :return: True if canceled. :rtype: bool """ if self._has_active_session(): return self._vote_timer.cancel() return False def _sort_user_by_join_time(self, users): # only allow users who have been in the room # for more than 5 minutes(300seconds) to vote sorted_users = {} now = datetime.now() for handle in users: dif = now - users[handle].join_time if dif.seconds > 300: # do not add the bot itself if handle != self._bot.users.client.handle: sorted_users[handle] = users[handle] return sorted_users def _can_vote(self, user): # was the user among the room users # when the vote session was started if user.handle in self._current_room_users: # has the user already voted if user.handle not in self._has_voted: return True return False def _has_active_session(self): # is there an active vote session running if self._vote_timer is not None: if isinstance(self._vote_timer, Timer): if self._vote_timer.is_alive: return True return False def _was_vote_yes(self): # the result of the votes, True if yes, False on tie or no # Thanks to notnola (https://github.com/notnola/pinychat) yes = 0 no = 0 for _, vote in self._votes: if vote in YES: yes += 1 else: no += 1 return yes > no def _calculate_vote_percentage(self): # calculate the voting percentage return len(self._has_voted) * 100 / len(self._current_room_users) def _decide_vote(self): # decide based on vote percentage and votes percentage = self._calculate_vote_percentage() # at least 1/3 or the room should have voted. maybe adjust this if percentage >= 33: if self._was_vote_yes(): self._bot.responder('With %s voters (%s%%) the room has decided to %s %s.' % (len(self._has_voted), percentage, self._vote_type, self._user_to_vote.nick)) self._vote_action() else: self._bot.responder('With %s voters (%s%%) the room has decided NOT to %s %s.' % (len(self._has_voted), percentage, self._vote_type, self._user_to_vote.nick)) else: self._bot.responder('With %s voters (%s%%) there were not ' 'enough votes to make a decision.' % (len(self._has_voted), percentage)) def _vote_action(self): # initiate the action of the vote, based on vote type user = self._bot.users.search(self._user_to_vote.handle) if user is None: user = self._bot.users.search_by_nick(self._user_to_vote.nick) log.debug('%s' % user) if user is not None: if self._vote_type == 'ban': self._bot.send_ban_msg(user.handle) elif self._vote_type == 'kick': self._bot.send_kick_msg(user.handle) elif self._vote_type == 'close': if user.is_broadcasting: self._bot.send_close_user_msg(user.handle) # prevent further user cam user.can_broadcast = False
class ExperimentSystem(ABC): def __init__(self, iteration_num=1, use_test_params=True): # 模型训练的迭代次数 self.iteration_num = iteration_num # 共指消解日志、共指消解得到的簇日志、实体连接日志 self.coref_logger, self.export_clusters_logger, self.entity_linking_logger = self.init_system_logging() # 共指消解模型参数、实体连接参数 self.coref_params, self.linking_params = self.init_params(use_test_params=use_test_params) # 共指消解特征保存路径 self.coref_feat_map_save_path = Paths.CorefModels.get_feat_map_export_path(self._experiment_type(), self.iteration_num) # 共指消解模型保存路径 self.coref_model_save_path = Paths.CorefModels.get_model_export_path(self._experiment_type(), self.iteration_num) # 实体连接模型保存路径 self.linking_model_save_path = Paths.LinkingModels.get_model_export_path(self._experiment_type(), self.iteration_num) # 该抽象类的继承类一旦实例化则实例化计时器 self.timer = Timer() # 共指消解特征构成的训练集 self.trn_coref_states = [] # 共指消解特征构成的验证集 self.dev_coref_states = [] # 共指消解特征构成的测试集 self.tst_coref_states = [] # 定义的角色标记 self.other_label = "#other#" self.general_label = "#general#" self.linking_labels = ['monica geller', 'judy geller', 'jack geller', 'lily buffay', 'rachel green', 'joey tribbiani', 'phoebe buffay', 'carol willick', 'ross geller', 'chandler bing', 'gunther', 'ben geller', 'barry farber', 'richard burke', 'kate miller', 'peter becker', 'emily waltham'] + [self.other_label, self.general_label] # 初始化角色识别系统模型参数值 def init_params(self, use_test_params=True): if use_test_params: coref_params_path = Paths.Params.get_test_params_path(self._experiment_type(), SubsystemTypes.COREF) linking_params_path = Paths.Params.get_test_params_path(self._experiment_type(), SubsystemTypes.ENTITY_LINKING) else: coref_params_path = Paths.Params.get_params_path(self._experiment_type(), SubsystemTypes.COREF) linking_params_path = Paths.Params.get_params_path(self._experiment_type(), SubsystemTypes.ENTITY_LINKING) coref_params = load_json_from_path(coref_params_path) linking_params = load_json_from_path(linking_params_path) return coref_params, linking_params # 初始化角色识别系统日志对象 def init_system_logging(self): init_log_package_for_run(self._experiment_type(), self.iteration_num) coref_logger = init_logger( "%s.%s" % (self.__class__.__name__, SubsystemTypes.COREF), Paths.Logs.get_log_path(self._experiment_type(), SubsystemTypes.COREF, self.iteration_num) ) export_clusters_logger = init_logger( "%s.%s" % (self.__class__.__name__, SubsystemTypes.EXPORT_CLUSTERS), Paths.Logs.get_log_path(self._experiment_type(), SubsystemTypes.EXPORT_CLUSTERS, self.iteration_num) ) entity_linking_logger = init_logger( "%s.%s" % (self.__class__.__name__, SubsystemTypes.ENTITY_LINKING), Paths.Logs.get_log_path(self._experiment_type(), SubsystemTypes.ENTITY_LINKING, self.iteration_num) ) return coref_logger, export_clusters_logger, entity_linking_logger # 设置模型训练迭代次数 def set_model_iteration(self, model_num): self.iteration_num = model_num # 设置共指消解特征保存路径 def set_feat_map_save_path(self, save_path): self.coref_feat_map_save_path = save_path # 设置共指消解模型保存路径 def set_coref_model_save_path(self, save_path): self.coref_model_save_path = save_path # 设置实体连接模型保存路径 def set_linking_model_save_path(self, save_path): self.linking_model_save_path = save_path @abstractmethod def _experiment_type(self): pass @abstractmethod def _load_transcripts(self): pass # 加载共指消解词典资源 def _load_coref_resources(self): # 加载词语向量 self.timer.start("load_w2v") w2v = load_word_vecs() self.coref_logger.info("Fasttext data loaded - %.2fs" % self.timer.end("load_w2v")) # 加载姓名词典 self.timer.start("load_w2g") w2g = load_gender_data() self.coref_logger.info("Gender data loaded - %.2fs" % self.timer.end("load_w2g")) # 加载animacy词典 self.timer.start("load_animacy_dicts") ani = load_animate_data() ina = load_inanimate_data() self.coref_logger.info("Animacy data loaded - %.2fs" % self.timer.end("load_animacy_dicts")) return w2v, w2g, ani, ina # 抽取共指消解特征 def _extract_coref_features(self, spks, poss, ners, deps, save_feats=True): # 加载抽取共指特征所需资源,包括词语向量对象,人名姓词典,无生命名词词典,有生命名词词典 w2v, w2g, ani, ina = self._load_coref_resources() # 初始化词向量对象,人名词典,有生命名词词典,无生命名词词典,词语向量维度大小的空词语向量,人名姓向量维度大小的空人名姓向量 feat_extractor = MentionFeatureExtractor(w2v, w2g, spks, poss, ners, deps, ani, ina) # 抽取共指消解特征 self.timer.start("feature_extraction") for s in sum([self.trn_coref_states, self.dev_coref_states, self.tst_coref_states], []): s.pfts = {m: dict() for m in s} for i, m in enumerate(s): m.id, (efts, mft) = i, feat_extractor.extract_mention(m) m.feat_map['efts'], m.feat_map['mft'] = efts, mft for a in s[:i]: s.pfts[a][m] = feat_extractor.extract_pairwise(a, m) self.coref_logger.info("Feature extracted - %.2fs\n" % self.timer.end("feature_extraction")) # 保存共指消解特特征 if save_feats: self.timer.start("dump_feature_extractor") with open(self.coref_feat_map_save_path, 'wb') as fout: pickle.dump(feat_extractor, fout, protocol=2) self.coref_logger.info("Feature extractor saved to %s - %.2fs" % (self.coref_feat_map_save_path, self.timer.end("dump_feature_extractor"))) # 获取共指消解特征各向量维度 def _get_coref_feature_shapes(self): m1, m2 = self.trn_coref_states[0][1], self.trn_coref_states[0][2] efts, mft = m1.feat_map["efts"], m1.feat_map["mft"] eftdims = list(map(lambda x: x.shape, efts)) mftdim, pftdim = len(mft), len(self.trn_coref_states[0].pfts[m1][m2]) return eftdims, mftdim, pftdim @abstractmethod def run_coref(self): pass @abstractmethod def extract_learned_coref_features(self): pass @abstractmethod def run_entity_linking(self): pass # 运行角色识别系统 def run(self): # 抽取共指消解特征,训练共指消解模型,保存共指消解模型, # 如果设置seed_path="test",则只抽取共指消解特征,不训练也不保存共指消解模型。 self.run_coref(seed_path="")
class TimeoutSocketOne(common.socket): ''' single socket timeout socket ''' @lock def try_connect(self, address, succ, fail, time_to_wait, provide_init): provide_init(self) self.real_success = succ self.fail = fail self.dead = False self.data = None #make new socket self.make_socket() self.timeoutvalid = True self.timeout = Timer(time_to_wait, self.handle_timeout) self.timeout.start() print '*'*40 from util import funcinfo print funcinfo(self.connect) print '*'*40 self.connect(address, error=self.do_fail) #do connect with callback #success indicates that the socket started, but guarantees nothing #error indicates that there was a problem, should try to close + do fail def succ(self): info('succ') self.real_success() @lock def do_fail(self, *a, **k): info('do_fail') if self.timeout is not None: self.timeout.cancel() self.timeout = None self.timeoutvalid = False self.close() print a, k self.fail(*a,**k) @lock def handle_connect(self): info('CONNECT') if self.timeout is not None: self.timeout.cancel() self.timeout = None self.timeoutvalid = False self.succ() #cancel timeout #success @lock def handle_timeout(self): info('TIMEOUT') #mark as dead if self.timeoutvalid: if self.timeout is not None: self.timeout.cancel() self.timeout = None self.timeoutvalid = False self.close() self.dead = True self.fail() @lock def collect_incoming_data(self, data): self.data += data @lock def __error(self): olddead = self.dead self.dead = True if self.timeout is not None: self.timeout.cancel() self.timeout = None self.timeoutvalid = False #cancel timeout self.close() if not olddead: self.fail() def handle_error(self, e=None): info('ERROR: %r', e) import traceback;traceback.print_exc() self.__error() def handle_expt(self): info('EXPT') self.__error() def handle_close(self): info('CLOSE') self.__error()
def main(): timer = Timer() timer.start() os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' tf.set_random_seed(0) MAX_SENT_LENGTH = 20 MAX_SENTS = 100 EMBEDDING_DIM = 50 POST_DIM = 10 TEXT_DIM = 50 VALIDATION_SPLIT = 0.2 MIXTURES = 5 Graph_DIM = 10 TRAINING_EPOCHS = 50 flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_float('learning_rate', 0.0001, 'Initial learning rate.') flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.') flags.DEFINE_integer('hidden2', Graph_DIM, 'Number of units in hidden layer 2.') flags.DEFINE_integer('batch_size', 32, 'Size of a mini-batch') flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).') flags.DEFINE_float('lambda1', 1e-4, 'Parameter of energy.') flags.DEFINE_float('lambda2', 1e-9, 'lossSigma.') flags.DEFINE_float('lambda3', 0.01, 'GAE.') flags.DEFINE_string('model', 'gcn_ae', 'Model string.') model_str = FLAGS.model # variable to store evaluation results precision_list = [] recall_list = [] f1_list = [] auc_list = [] for t in range(10): with open('./data/instagram.pickle', 'rb') as handle: store_data = pickle.load(handle) labels = store_data['labels'] df = store_data['df'] data = store_data['data'] postInfo = store_data['postInfo'] timeInfo = store_data['timeInfo'] embedding_matrix = store_data['embedding_matrix'] word_index = store_data['word_index'] num_session = data.shape[0] nb_validation_samples = int(VALIDATION_SPLIT * num_session) '''For Evaluation''' single_label = np.asarray(labels) labels = to_categorical(np.asarray(labels)) print('Shape of data tensor:', data.shape) print('Shape of label tensor:', labels.shape) zeros = np.zeros(num_session) zeros = zeros.reshape((num_session, 1, 1)) # FLAGS.learning_rate = lr '''Hierarchical Attention Network for text and other info''' placeholders = { 'zero_input': tf.placeholder(tf.float32, shape=[None, 1, 1]), 'review_input': tf.placeholder(tf.float32, shape=[None, MAX_SENTS, MAX_SENT_LENGTH + 1]), 'post_input': tf.placeholder(tf.float32, shape=[ None, 4, ]), 'time_label': tf.placeholder(tf.float32, shape=[None, MAX_SENTS]) } g = nx.Graph() edgelist = pd.read_csv('./data/source_target.csv') for i, elrow in edgelist.iterrows(): g.add_edge(elrow[0].strip('\n'), elrow[1].strip('\n')) adj = nx.adjacency_matrix(g) user_attributes = pd.read_csv('./data/user_friend_follower.csv') user_attributes = user_attributes.set_index('user').T.to_dict('list') nodelist = list(g.nodes()) features = [] User_post = np.zeros( (len(nodelist), num_session)) # 2218 number of posts for id, node in enumerate(nodelist): posts_ID = df.loc[df['owner_id'] == node].index.values.tolist() for p_id in posts_ID: User_post[id][p_id] = 1 features.append(user_attributes[node]) # only keep the posts that are in the training data User_post_train = User_post[:, :-nb_validation_samples] User_post_test = User_post[:, -nb_validation_samples:] features = sparse.csr_matrix(features) features = normalize(features, norm='max', axis=0) adj_orig = adj adj_orig = adj_orig - sparse.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_norm = preprocess_graph(adj) adj_label = adj + sparse.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) # Define placeholders placeholders.setdefault('features', tf.sparse_placeholder(tf.float32)) placeholders.setdefault('adj', tf.sparse_placeholder(tf.float32)) placeholders.setdefault('adj_orig', tf.sparse_placeholder(tf.float32)) placeholders.setdefault('dropout', tf.placeholder_with_default(0., shape=())) placeholders.setdefault( 'user_post', tf.placeholder(tf.int32, [len(nodelist), None])) d = {placeholders['dropout']: FLAGS.dropout} placeholders.update(d) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] '''Graph AutoEncoder''' if model_str == 'gcn_ae': Graph_model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': Graph_model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SENT_LENGTH, trainable=True, mask_zero=True) all_input = Input(shape=(MAX_SENT_LENGTH + 1, )) sentence_input = crop(1, 0, MAX_SENT_LENGTH)(all_input) # slice time_input = crop(1, MAX_SENT_LENGTH, MAX_SENT_LENGTH + 1)(all_input) # slice embedded_sequences = embedding_layer(sentence_input) # embedded_sequences=BatchNormalization()(embedded_sequences) l_lstm = Bidirectional(GRU(TEXT_DIM, return_sequences=True))(embedded_sequences) l_att = AttLayer(TEXT_DIM)(l_lstm) # (?,200) # time_embedding=Dense(TIME_DIM,activation='sigmoid')(time_input) merged_output = Concatenate()([l_att, time_input]) # text+time information sentEncoder = Model(all_input, merged_output) review_input = placeholders['review_input'] review_encoder = TimeDistributed(sentEncoder)(review_input) l_lstm_sent = Bidirectional(GRU(TEXT_DIM, return_sequences=True))(review_encoder) fully_sent = Dense(1, use_bias=False)(l_lstm_sent) pred_time = Activation(activation='linear')(fully_sent) zero_input = placeholders['zero_input'] shift_predtime = Concatenate(axis=1)([zero_input, pred_time]) shift_predtime = crop(1, 0, MAX_SENTS)(shift_predtime) l_att_sent = AttLayer(TEXT_DIM)(l_lstm_sent) # embed the #likes, shares post_input = placeholders['post_input'] fully_post = Dense(POST_DIM, use_bias=False)(post_input) # norm_fullypost=BatchNormalization()(fully_post) post_embedding = Activation(activation='relu')(fully_post) fully_review = concatenate( [l_att_sent, post_embedding] ) # merge the document level vectro with the additional embedded features such as #likes pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) with tf.name_scope('graph_cost'): preds_sub = Graph_model.reconstructions labels_sub = tf.reshape( tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]) if model_str == 'gcn_ae': opt = CostAE(preds=preds_sub, labels=labels_sub, pos_weight=pos_weight, norm=norm) elif model_str == 'gcn_vae': opt = CostVAE(preds=preds_sub, labels=labels_sub, model=Graph_model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm) User_latent = Graph_model.z_mean # (n_user, G_embeddim) Post_latent = fully_review # (batch size, text_embed_dim+post_dim) max_indices = tf.argmax(placeholders['user_post'], axis=0) add_latent = tf.gather(User_latent, max_indices) session_latent = tf.concat( [Post_latent, add_latent], axis=1) # the representation of text + graph '''DAGMM''' h1_size = 2 * TEXT_DIM + Graph_DIM + POST_DIM gmm = GMM(MIXTURES) est_net = EstimationNet([h1_size, MIXTURES], tf.nn.tanh) gamma = est_net.inference(session_latent, FLAGS.dropout) gmm.fit(session_latent, gamma) individual_energy = gmm.energy(session_latent) Time_label = placeholders['time_label'] Time_label = tf.reshape(Time_label, [tf.shape(Time_label)[0], MAX_SENTS, 1]) with tf.name_scope('loss'): GAE_error = opt.cost energy = tf.reduce_mean(individual_energy) lossSigma = gmm.cov_diag_loss() prediction_error = tf.losses.mean_squared_error( shift_predtime, Time_label) loss = prediction_error + FLAGS.lambda1 * energy + FLAGS.lambda2 * lossSigma + FLAGS.lambda3 * GAE_error x_train = data[:-nb_validation_samples] time_train = timeInfo[:-nb_validation_samples] zeros_train = zeros[:-nb_validation_samples] y_train = labels[:-nb_validation_samples] post_train = postInfo[:-nb_validation_samples] x_val = data[-nb_validation_samples:] zeros_test = zeros[-nb_validation_samples:] time_test = timeInfo[-nb_validation_samples:] y_val = labels[-nb_validation_samples:] post_test = postInfo[-nb_validation_samples:] y_single = single_label[-nb_validation_samples:] print( 'Number of positive and negative posts in training and validation set' ) print(y_train.sum(axis=0)) print(y_val.sum(axis=0)) print("model fitting - Unsupervised cyberbullying detection") optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) train_step = optimizer.minimize(loss) GAEcorrect_prediction = tf.equal( tf.cast(tf.greater_equal(tf.sigmoid(preds_sub), 0.5), tf.int32), tf.cast(labels_sub, tf.int32)) feed_dict_train = construct_feed_dict(zeros_train, x_train, post_train, time_train, FLAGS.dropout, adj_norm, adj_label, features, User_post_train, placeholders) feed_dict_train.update({placeholders['dropout']: FLAGS.dropout}) sess = tf.Session() sess.run(tf.global_variables_initializer()) total_batch = int(num_session / FLAGS.batch_size) zero_batches = np.array_split(zeros_train, total_batch) x_batches = np.array_split(x_train, total_batch) p_batches = np.array_split(post_train, total_batch) t_batches = np.array_split(time_train, total_batch) UP_batches = np.array_split(User_post_train, total_batch, axis=1) for epoch in range(TRAINING_EPOCHS): ave_cost = 0 ave_energy = 0 ave_recon = 0 ave_sigma = 0 ave_GAE = 0 for i in range(total_batch): batch_x = x_batches[i] batch_p = p_batches[i] batch_t = t_batches[i] batch_z = zero_batches[i] user_post = UP_batches[i] feed_dict = construct_feed_dict(batch_z, batch_x, batch_p, batch_t, FLAGS.dropout, adj_norm, adj_label, features, user_post, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) _, total_loss, loss_sigma, GAE_loss, Energy_error, recon_error = sess.run( [ train_step, loss, lossSigma, GAE_error, energy, prediction_error ], feed_dict) ave_cost += total_loss / total_batch ave_energy += Energy_error / total_batch ave_GAE += GAE_loss / total_batch ave_sigma += loss_sigma / total_batch ave_recon += recon_error / total_batch # if epoch % 10 == 0 or epoch == TRAINING_EPOCHS - 1: # print("This is epoch %d, the total loss is %f, energy error is %f, GAE error is %f, sigma error is %f,prediction error is %f") \ # % (epoch + 1, ave_cost, ave_energy, ave_GAE, ave_sigma, ave_recon) fix = gmm.fix_op() sess.run(fix, feed_dict=feed_dict_train) feed_dict_test = construct_feed_dict(zeros_test, x_val, post_test, time_test, FLAGS.dropout, adj_norm, adj_label, features, User_post_test, placeholders) pred_energy, representations = sess.run( [individual_energy, session_latent], feed_dict=feed_dict_test) bully_energy_threshold = np.percentile(pred_energy, 65) print('the bully energy threshold is : %f' % bully_energy_threshold) label_pred = np.where(pred_energy >= bully_energy_threshold, 1, 0) print(precision_recall_fscore_support(y_single, label_pred)) print(accuracy_score(y_single, label_pred)) print(roc_auc_score(y_single, label_pred)) tf.reset_default_graph() K.clear_session() precision_list.append( precision_recall_fscore_support(y_single, label_pred)[0][1]) recall_list.append( precision_recall_fscore_support(y_single, label_pred)[1][1]) f1_list.append( precision_recall_fscore_support(y_single, label_pred)[2][1]) auc_list.append(roc_auc_score(y_single, label_pred)) print('>>> Evaluation metrics') print('>>> precision mean: {0.4f}; precision std: {1:.4f}'.format( np.mean(precision_list), np.std(precision_list))) print('>>> recall mean: {0.4f}; recall std: {1:.4f}'.format( np.mean(recall_list), np.std(recall_list))) print('>>> f1 mean: {0.4f}; f1 std: {1:.4f}'.format( np.mean(f1_list), np.std(f1_list))) print('>>> auc mean: {0.4f}; auc std: {1:.4f}'.format( np.mean(auc_list), np.std(auc_list))) timer.stop()
class ElectrumGui: def __init__(self, config, network): set_language(config.get('language')) self.network = network self.config = config self.windows = [] self.efilter = OpenFileEventFilter(self.windows) self.app = QApplication(sys.argv) self.app.installEventFilter(self.efilter) self.timer = Timer() self.app.connect(self.app, QtCore.SIGNAL('new_window'), self.start_new_window) def new_window(self, config): self.app.emit(SIGNAL('new_window'), config) def load_wallet_file(self, path): self.app.emit(SIGNAL('new_window'), self.config, path) def start_new_window(self, config, path=None): if path is None: path = config.get_wallet_path() for w in self.windows: if w.config.get_wallet_path() == path: w.bring_to_top() break else: w = ElectrumWindow(config, self.network, self) w.connect_slots(self.timer) w.load_wallet_file(path) w.show() self.windows.append(w) url = config.get('url') if url: w.pay_to_URI(url) return w def main(self): self.timer.start() last_wallet = self.config.get('gui_last_wallet') if last_wallet is not None and self.config.get('wallet_path') is None: if os.path.exists(last_wallet): self.config.cmdline_options['default_wallet_path'] = last_wallet # main window self.current_window = self.main_window = self.start_new_window(self.config) # plugins interact with main window run_hook('init_qt', self) signal.signal(signal.SIGINT, lambda *args: self.app.quit()) # main loop self.app.exec_() # clipboard persistence. see http://www.mail-archive.com/[email protected]/msg17328.html event = QtCore.QEvent(QtCore.QEvent.Clipboard) self.app.sendEvent(self.app.clipboard(), event)
class DigsbyConnect(TimeoutSocketOne): _SERVERTIMEOUT = 8 def stale_connection(self): if getattr(self, '_triumphant', False): log.info('stale_connection was called but i already won! yayayay') else: log.info( '%r had a stale connection. Calling do_fail (%r) with a connlost error', self, self.do_fail) self.do_fail(DigsbyLoginError('connlost')) def succ(self): generator = self.do_login() self._timeouttimer = Timer(self._SERVERTIMEOUT, self.stale_connection) self._timeouttimer.start() self.run_sequence(generator) @lock def handle_error(self, e=None): if hasattr(self, '_timeouttimer'): self._timeouttimer.stop() TimeoutSocketOne.handle_error(self) @lock def handle_expt(self): if hasattr(self, '_timeouttimer'): self._timeouttimer.stop() TimeoutSocketOne.handle_expt(self) @lock def handle_close(self): if hasattr(self, '_timeouttimer'): self._timeouttimer.stop() TimeoutSocketOne.handle_close(self) def do_login(self): login_str = make_pstring(self.cid) + make_pstring( self.un) + make_pstring(self.password) codelen = yield (4, login_str) codelen = unpack('!I', codelen)[0] if codelen <= 0: raise DigsbyLoginError('client') code = yield (codelen, '') try: if code == 'success': cookielen = unpack('!I', (yield (4, '')))[0] cookie = yield (cookielen, '') log.debug('Got cookie: %r', cookie) serverslen = unpack('!I', (yield (4, '')))[0] servers = yield (serverslen, '') log.debug('Got servers: %r', servers) servers = servers.split(' ') self.cookie = cookie self.servers = servers self._triumphant = True return elif code == 'error': log.debug('Got error!') reasonlen = unpack('!I', (yield (4, '')))[0] reason = yield (reasonlen, '') log.debug('Got error reason: %r', reason) raise DigsbyLoginError(reason) else: log.debug('Unknown error occurred! blaming the client!') raise DigsbyLoginError('client') except DigsbyLoginError, e: if e.reason == 'server': log.debug('Got "upgrading digsby" error code. Sleeping.') import time time.sleep(POLL_SLEEP_TIME) raise e except Exception, e: print_exc() raise DigsbyLoginError('client')
class HPETrainBaseRun(TrainBaseRun): def setup(self): super().setup() self.img_size = self.options.hpe.img_size self.speed_diagnose = self.options.general.speed_diagnose self.model = self.make_model() self.heatmap_max = 1 self.last_results = None self.timer = Timer() @abstractmethod def make_model(self): pass def iterate(self, data): if self.speed_diagnose: self.timer.start('preprocess') data = self.arrange_data(data) if self.speed_diagnose: self.timer.stop('preprocess') self.timer.start('setting input') self.model.set_input(data) if self.speed_diagnose: self.timer.stop('setting input') self.timer.start('optimize') self.model.optimize() if self.speed_diagnose: self.timer.stop('optimize') self.timer.print_elapsed_times() self.avg_dict.add(self.model.get_current_losses()) # save the result for visualization self.last_results = self.model.get_detached_current_results() self.last_data = data def save_checkpoint(self, epoch): checkpoint = self.model.pack_as_checkpoint() self.logger.save_checkpoint(checkpoint, epoch) def end_epoch(self): pass @abstractmethod def arrange_data(self, data): """ reshape the data for the model. """ def _visualize_results_as_image(self, results, cur_iter): if results is None: return results = self._select_first_in_batch(results) img = results['img'] joint_out, heatmap_out, heatmap_true, heatmap_reprojected = hpe_util.unpack_data( results) out_heatmap_img = convert_to_colormap(heatmap_out, 1.0) true_heatmap_img = convert_to_colormap(heatmap_true, 1.0) reprojected_heatmap_img = convert_to_colormap(heatmap_reprojected, 1.0) img = expand_channel(img) stacked_img = torch.cat( (img, out_heatmap_img, reprojected_heatmap_img, true_heatmap_img), 3) # horizontal_stack self.visualizer.add_image('train sample', stacked_img, cur_iter) def _visualize_network_grad(self, epoch, current_iter): grads = self.model.get_grads() for tag, val in grads.items(): self.visualizer.add_histogram(tag, val, epoch)