def MessageBoxA(self, uc, esp, log): eip, owner, text_ptr, title_ptr, type = struct.unpack("<IIIII", uc.mem_read(esp, 20)) uc.mem_write(esp + 16, struct.pack("<I", eip)) text = get_string(text_ptr, uc) title = get_string(title_ptr, uc) print(f"\x1b[31mMessage Box ({title}): {text}\x1b[0m") return 1, esp + 16
def main(): parser = argparse.ArgumentParser() parser.add_argument('--price_map_file', required=True) parser.add_argument('--ticker_filter_file') parser.add_argument('--ranking_dir', required=True) parser.add_argument('--k', required=True) parser.add_argument('--h', required=True) parser.add_argument('--real', action='store_true') parser.add_argument('--market_data_file') args = parser.parse_args() ticker_filter = set() if args.ticker_filter_file: with open(args.ticker_filter_file, 'r') as fp: ticker_filter = set(fp.read().splitlines()) utils.printd('filtering %d tickers' % len(ticker_filter)) k = int(args.k) h = int(args.h) assert h > 0 holding_period = datetime.timedelta(days=h) dates = sorted([f[:f.find('.')] for f in os.listdir(args.ranking_dir) if f.endswith('.csv')]) utils.printd('simulating %d dates' % len(dates)) utils.printd('loading price map...') price_map = utils.read_price_map(args.price_map_file) utils.printd('done, %d entries in the price map' % len(price_map)) if len(ticker_filter) > 0: index = None elif k > 0: index = range(1, k+1) else: index = range(-1, k-1, -1) all_trans = [] for date in dates: ranking_file = '%s/%s.csv' % (args.ranking_dir, date) if len(ticker_filter) > 0: tickers = utils.read_tickers_by_filter(ranking_file, k, ticker_filter) else: tickers = utils.read_tickers_by_index(ranking_file, index) trans = get_trans(price_map, tickers, date, holding_period) if len(trans) == 0: continue utils.printd('%s: %s' % (date, utils.get_string(trans, h, args.real))) all_trans.extend(trans) utils.printd(utils.get_string(all_trans, h, args.real)) gains = utils.get_gains(all_trans, args.real) avg_gain = sum(gains)/len(gains) print(avg_gain/h*100) if args.market_data_file: mtrans = utils.simulate_market_trans(all_trans, args.market_data_file) utils.printd('market:') utils.printd(utils.get_string(mtrans, h, False)) sharpe = utils.compute_sharpe_ratio(all_trans, mtrans, args.real) utils.printd('sharpe ratio: %.4f' % sharpe) print(sharpe)
def LoadLibraryA(self, uc, esp, log, mod_name_ptr): # TODO: does not actually load the library mod_name = get_string(mod_name_ptr, uc) log and print( f"LoadLibraryA: mod_name_ptr 0x{mod_name_ptr}: {mod_name}") handle = self.base_addr + self.module_handle_offset self.module_handle_offset += 1 self.module_handles[handle] = get_string(mod_name_ptr, uc) log and print(f"\tHandle: 0x{handle:02x}") return handle
def LoadLibraryA(self, uc, esp, log): # TODO: does not actually load the library eip, mod_name_ptr = struct.unpack("<II", uc.mem_read(esp, 8)) mod_name = get_string(mod_name_ptr, uc) log and print(f"LoadLibraryA: 0x{eip:02x} mod_name_ptr 0x{mod_name_ptr}: {mod_name}") uc.mem_write(esp + 4, struct.pack("<I", eip)) handle = self.base_addr + self.module_handle_offset self.module_handle_offset += 1 self.module_handles[handle] = get_string(mod_name_ptr, uc) log and print(f"\tHandle: 0x{handle:02x}") return handle, esp + 4
def parse(self, response): rows = response.css('table tr') entries = 0 if len(rows) <= 0: utils.Report.warning( 'There are no rows to go through... Perhaps the syntax has changed?' ) for cve in rows: fields = cve.css('td') try: date = serialize_date( utils.get_string(fields[0].css('::text').extract())) except ValueError: continue reference = utils.get_string(fields[2].css('a ::text').extract()) url = utils.get_string(fields[2].xpath('a//@href').extract()) description = utils.get_string(fields[4].css('::text').extract()) if len(reference) > 0 and len(url) > 0: entries += 1 else: print('Invalid CVE has been detected.') continue # Check if the leak has been published in a time window. if utils.start_date() <= date: leak = { 'date': date, 'reference': reference, 'url': 'https://pivotal.io' + url, 'description': description } if not utils.event_exists(leak['reference']): print('Adding new CVE event. {}'.format(leak['reference'])) utils.send_event('pivotal', leak) else: print('CVE {} already registered. Skipping.'.format( leak['reference'])) if entries <= 0: print('There has been an issue parsing the page.') utils.Report.critical( 'No entries could be detected! Please have a manual check instead.' )
def main(): parser = argparse.ArgumentParser() parser.add_argument('--price_map_file', required=True) parser.add_argument('--ticker_file', required=True) parser.add_argument('--ranking_dir', required=True) parser.add_argument('--k', required=True) parser.add_argument('--h', required=True) parser.add_argument('--real', action='store_true') args = parser.parse_args() with open(args.ticker_file, 'r') as fp: ticker_filter = set(fp.read().splitlines()) utils.printd('simulating %d tickers' % len(ticker_filter)) k = int(args.k) h = int(args.h) assert h > 0 holding_period = datetime.timedelta(days=h) dates = sorted([f[:f.find('.')] for f in os.listdir(args.ranking_dir) if f.endswith('.csv')]) utils.printd('simulating %d dates' % len(dates)) utils.printd('loading price map...') price_map = utils.read_price_map(args.price_map_file) utils.printd('done, %d entries in the price map' % len(price_map)) all_trans, all_mtrans = [], [] for date in dates: ranking_file = '%s/%s.csv' % (args.ranking_dir, date) tickers = utils.read_tickers_by_filter(ranking_file, k, ticker_filter) trans = get_trans(price_map, tickers, date, holding_period) if len(trans) == 0: utils.printd('no trans for %s' % date) continue mtickers = utils.read_tickers_by_filter(ranking_file, 0, ticker_filter) mtrans = get_trans(price_map, mtickers, date, holding_period) utils.printd('%s (ranking): %s' % (date, utils.get_string(trans, h, args.real))) utils.printd('%s (market): %s' % (date, utils.get_string(mtrans, h, args.real))) all_trans.extend(trans) all_mtrans.extend(mtrans) utils.printd('all (ranking): %s' % utils.get_string(all_trans, h, args.real)) utils.printd('all (market): %s' % utils.get_string(all_mtrans, h, args.real)) gains = utils.get_gains(all_trans, args.real) avg_gain = sum(gains)/len(gains) print(avg_gain/h*100) mgains = utils.get_gains(all_mtrans, args.real) avg_mgain = sum(mgains)/len(mgains) print(avg_mgain/h*100)
def GetModuleHandleA(self, uc, esp, log, module_name_ptr): module_name = get_string(module_name_ptr, uc) log and print( f"GetModuleHandleA: module_name_ptr 0x{module_name_ptr:02x}: {module_name}" ) if not module_name_ptr: pe = pefile.PE(self.sample) loaded = pe.get_memory_mapped_image(ImageBase=self.base_addr) handle = self.alloc(log, len(loaded), uc) uc.mem_write(handle, loaded) return handle, esp + 4 handle = self.base_addr + self.module_handle_offset self.module_handle_offset += 1 self.module_handles[handle] = get_string(module_name_ptr, uc) return handle
def __init__(self, **kwargs): super(splashButton, self).__init__(**kwargs) self.size_hint = (None, None) self.size = (288 * g.scale, 144 * g.scale) self.border = (0, 0, 0, 0) self.background_normal = g.IMG_SPLASH_BUTTON_NORMAL self.background_down = g.IMG_SPLASH_BUTTON_PRESSED self.pos = (g.screen_size[0] / 2 - self.size[0] / 2, -144 * g.scale) self.end_pos = (g.screen_size[0] / 2 - self.size[0] / 2, 0) self.bind(on_press=self.tap_screen) self.btn_text = Label(text=u.get_string("splash", "first"), font_name=g.FONT_CB, font_size=32 * g.scale, color=g.COLOR_BROWN) self.btn_text.texture_update() self.btn_text.size = self.btn_text.texture_size[:] self.btn_text.pos = (g.screen_size[0] / 2 - self.btn_text.size[0] / 2, 25 * g.scale - self.pos[1]) self.btn_text.end_pos = (g.screen_size[0] / 2 - self.btn_text.size[0] / 2, 25 * g.scale) self.add_widget(self.btn_text)
def get_nation(self): if hasattr(self, 'nation'): return self.nation title = self.get_soup().find('title') line = utils.get_string(title) name = line.split(' - ')[2] self.nation = name return self.nation
def GetProcAddress(self, uc, esp, log, module_handle, proc_name_ptr): proc_name = get_string(proc_name_ptr, uc) try: module_name = self.module_handles[module_handle] except KeyError: module_name = "?" log and print( f"GetProcAddress: module handle 0x{module_handle:02x}: {module_name}, proc_name_ptr 0x{proc_name_ptr:02x}: {proc_name}" ) hook_addr = None for addr, name in self.hooks.items(): if name == proc_name: hook_addr = addr log and print( f"\tRe-used previously added hook at 0x{hook_addr:02x}") if hook_addr is None: hook_addr = self.add_hook(uc, proc_name, module_name) log and print(f"\tAdded new hook at 0x{hook_addr:02x}") if proc_name in self.pending_breakpoints: print( f"\x1b[31mPending breakpoint attached for new dynamic import {proc_name} at 0x{hook_addr:02x}\x1b[0m" ) self.breakpoints.add(hook_addr) self.pending_breakpoints.remove(proc_name) return hook_addr
def is_valid_wizard_sindarin(proc_dict, template_sindarin): purpose = proc_dict['purpose'] proc_id = ut.get_process(template_sindarin) valid = True if proc_id != 'PROCESS' and \ proc_id != template_sindarin.replace('-template.sin', ''): ut.fatal('The process doesnt have the same name as the file') valid = False if purpose == 'scan' and not ut.grep('#SETSCAN', template_sindarin): ut.fatal('Your purpose is scan but the sindarin has no #SETSCAN') valid = False elif (purpose == 'nlo' or purpose == 'nlo_combined') and \ not is_nlo_calculation(template_sindarin): ut.fatal('Your purpose is nlo* but the sindarin has no nlo command') valid = False fks_mapping = ut.get_string("fks_mapping_type", template_sindarin) resonance_set_in_sindarin = fks_mapping == '"resonances"' fks_method = proc_dict.get('fks_method', 'default') resonance_not_set_in_json = fks_method != 'resonances' if resonance_set_in_sindarin and resonance_not_set_in_json: ut.fatal( 'You set fks_mapping_type to resonances but havent set it in the run.json' ) valid = False return valid
def GetModuleHandleA(self, uc, esp, log): """1 argument, we have to clean up""" eip, module_name_ptr = struct.unpack("<II", uc.mem_read(esp, 8)) module_name = get_string(module_name_ptr, uc) log and print(f"GetModuleHandleA: 0x{eip:02x} module_name_ptr 0x{module_name_ptr:02x}: {module_name}") uc.mem_write(esp + 4, struct.pack("<I", eip)) if not module_name_ptr: pe = pefile.PE(self.sample) loaded = pe.get_memory_mapped_image(ImageBase=self.base_addr) handle = self.alloc(log, len(loaded), uc) uc.mem_write(handle, loaded) return handle, esp + 4 handle = self.base_addr + self.module_handle_offset self.module_handle_offset += 1 self.module_handles[handle] = get_string(module_name_ptr, uc) return handle, esp + 4
def __ext_tag(self, tag, stat_name): stats = tag stat_class = getattr(self, stat_name + 'Class') stat_tag = stats.find('td', stat_class) num_val = utils.get_string(stat_tag) try: return int(num_val) except: # shown on soccerwiki as "?" print '\t' + utils.colored('Stats not available yet', 'red') return 0
def get_players(self): div = self.get_soup().find('div', self.squadClass) if div is None: yield None cats = [utils.get_string(x).strip('s') for x in div('th')] bodies = div('tbody') grps = [x('td', {'style': 'vertical-align: top;'}) for x in bodies] for grp, pos in zip(grps, cats)[:-1]: # last one is coaches # print 'Getting players - ', pos for pl in grp: yield Player(pl, pos, self.name, self.lname)
def get_earliest(self, items): if len(items) == 0: logger.error('len(items) == 0') return None, None min_pub_time = 0 min_pub_time_id = None title = None for item in items: pub_time_str = item['pub_time'] pub_time = int(datetime.datetime.strptime(pub_time_str, "%Y-%m-%d %H:%M:%S").strftime("%s")) if not min_pub_time_id: min_pub_time = pub_time min_pub_time_id = item['id'] title = utils.get_string(item, 'title') elif min_pub_time > pub_time: min_pub_time = pub_time min_pub_time_id = item['id'] title = utils.get_string(item, 'title') return min_pub_time_id, title
def inference(config_file, image_file): """ Run text recognition network on an image file. """ # Get config FLAGS = Flags(config_file).get() out_charset = load_charset(FLAGS.charset) num_classes = len(out_charset) net = get_network(FLAGS, out_charset) if FLAGS.use_rgb: num_channel = 3 mode = cv2.IMREAD_COLOR else: num_channel = 1 mode = cv2.IMREAD_GRAYSCALE # Input node image = tf.placeholder(tf.uint8, shape=[None, None, num_channel], name='input_node') # Network proc_image = net.preprocess_image(image, is_train=False) proc_image = tf.expand_dims(proc_image, axis=0) proc_image.set_shape( [None, FLAGS.resize_hw.height, FLAGS.resize_hw.width, num_channel]) logits, sequence_length = net.get_logits(proc_image, is_train=False, label=None) prediction, log_prob = net.get_prediction(logits, sequence_length) prediction = tf.sparse_to_dense(sparse_indices=prediction.indices, sparse_values=prediction.values, output_shape=prediction.dense_shape, default_value=num_classes, name='output_node') # Restore restore_model = get_init_trained() sess = tf.Session() restore_model(sess, FLAGS.eval.model_path) # Run img = cv2.imread(image_file, mode) img = np.reshape(img, [img.shape[0], img.shape[1], num_channel]) predicted = sess.run(prediction, feed_dict={image: img}) string = get_string(predicted[0], out_charset) string = adjust_string(string, FLAGS.eval.lowercase, FLAGS.eval.alphanumeric) print(string) return string
def print_mem(uc, base, num_elements, t="int", base_alias=""): if not base_alias: base_alias = f"0x{base:02x}" string = None if t == "str": string = get_string(base, uc) t = "byte" num_elements = len(string) types = {"byte": ("B", 1), "int": ("<I", 4)} fmt, size = types[t] for i in range(num_elements): item, = struct.unpack(fmt, uc.mem_read(base + i * size, size)) print(f"{base_alias}+{i * 4} = 0x{item:02x}") if string is not None: print(f"String @0x{base:02x}: {string}")
def __init__(self, btn_id, appearance=1, rpos=None, **kwargs): """Based on the btn_id, let's prepare the button's appearance and behavior.""" super(titleButton, self).__init__(**kwargs) self.name = btn_id self.text = u.get_string("title", btn_id) self.border = (0, 0, 0, 0) self.background_normal = getattr( g, 'IMG_TITLE_BTN' + str(appearance) + '_NORMAL') self.background_down = getattr( g, 'IMG_TITLE_BTN' + str(appearance) + '_PRESS') # Set button behaviors self.bind(on_release=getattr(self, 'click_' + btn_id)) self.font_name = g.FONT_CR self.color = g.COLOR_BROWN self.size_hint = (None, None) self.size = [280 * g.scale, 60 * g.scale] self.font_size = 28 * g.scale if rpos != None: self.apply_relative_position(rpos)
def __init__(self, name, **kwargs): """The __init__ function runs at the beginning of the application and not every time the screen is presented. This means only static elements can be created here, alongside with declaring variables and stubs.""" super(titleScreen, self).__init__(**kwargs) # All screens must have a name so we can access it self.name = name # Every temporary widget, must be placed in a list, so they can be removed on the on_leave function. The buttons are not actually added to it (they are directly handled by the on_called and on_leave function. So the temporary list is just a reference for a possible future addition. self.temporary_widgets = [] with self.canvas: # lets draw a semi-transparent red square Color(1, 1, 1, 1, mode='rgba') Rectangle(pos=(0, 0), size=g.screen_size) # Loading component images back_image = u.mImage(source=g.IMG_BACKGROUND, size=(433, 769), rpos=(0, 0)) main_image = u.mImage(source=g.IMG_TITLE_SCENE, size=(433, 769), rpos=(0, 0)) self.add_widget(back_image) self.add_widget(main_image) version_count_string = u.get_string( 'title', 'teste' ) + ' ' + g.current_version_human + ' ' + g.current_resolution_human self.version_count = Label(text=version_count_string, font_name=g.FONT_CB, font_size=18 * g.scale, color=g.COLOR_WHITE, pos=(0, -115 * g.scale)) self.add_widget(self.version_count) self.buttons = []
def get_physical_chan_string_attribute(devName, attrId): return utils.get_string(c_daqmx.DAQmxGetPhysicalChanAttribute, ctypes.c_char_p(devName), ctypes.c_int32(attrId))
def main(config_file): """ Train text recognition network """ # Parse configs FLAGS = Flags(config_file).get() # Set directory, seed, logger model_dir = create_model_dir(FLAGS.model_dir) logger = get_logger(model_dir, 'train') best_model_dir = os.path.join(model_dir, 'best_models') set_seed(FLAGS.seed) # Print configs flag_strs = [ '{}:\t{}'.format(name, value) for name, value in FLAGS._asdict().items() ] log_formatted(logger, '[+] Model configurations', *flag_strs) # Print system environments num_gpus = count_available_gpus() num_cpus = os.cpu_count() mem_size = virtual_memory().available // (1024**3) log_formatted(logger, '[+] System environments', 'The number of gpus : {}'.format(num_gpus), 'The number of cpus : {}'.format(num_cpus), 'Memory Size : {}G'.format(mem_size)) # Get optimizer and network global_step = tf.train.get_or_create_global_step() optimizer, learning_rate = get_optimizer(FLAGS.train.optimizer, global_step) out_charset = load_charset(FLAGS.charset) net = get_network(FLAGS, out_charset) is_ctc = (net.loss_fn == 'ctc_loss') # Multi tower for multi-gpu training tower_grads = [] tower_extra_update_ops = [] tower_preds = [] tower_gts = [] tower_losses = [] batch_size = FLAGS.train.batch_size tower_batch_size = batch_size // num_gpus val_tower_outputs = [] eval_tower_outputs = [] for gpu_indx in range(num_gpus): # Train tower print('[+] Build Train tower GPU:%d' % gpu_indx) input_device = '/gpu:%d' % gpu_indx tower_batch_size = tower_batch_size \ if gpu_indx < num_gpus-1 \ else batch_size - tower_batch_size * (num_gpus-1) train_loader = DatasetLodaer( dataset_paths=FLAGS.train.dataset_paths, dataset_portions=FLAGS.train.dataset_portions, batch_size=tower_batch_size, label_maxlen=FLAGS.label_maxlen, out_charset=out_charset, preprocess_image=net.preprocess_image, is_train=True, is_ctc=is_ctc, shuffle_and_repeat=True, concat_batch=True, input_device=input_device, num_cpus=num_cpus, num_gpus=num_gpus, worker_index=gpu_indx, use_rgb=FLAGS.use_rgb, seed=FLAGS.seed, name='train') tower_output = single_tower(net, gpu_indx, train_loader, out_charset, optimizer, name='train', is_train=True) tower_grads.append([x for x in tower_output.grads if x[0] is not None]) tower_extra_update_ops.append(tower_output.extra_update_ops) tower_preds.append(tower_output.prediction) tower_gts.append(tower_output.text) tower_losses.append(tower_output.loss) # Print network structure if gpu_indx == 0: param_stats = tf.profiler.profile(tf.get_default_graph()) logger.info('total_params: %d\n' % param_stats.total_parameters) # Valid tower print('[+] Build Valid tower GPU:%d' % gpu_indx) valid_loader = DatasetLodaer(dataset_paths=FLAGS.valid.dataset_paths, dataset_portions=None, batch_size=FLAGS.valid.batch_size // num_gpus, label_maxlen=FLAGS.label_maxlen, out_charset=out_charset, preprocess_image=net.preprocess_image, is_train=False, is_ctc=is_ctc, shuffle_and_repeat=False, concat_batch=False, input_device=input_device, num_cpus=num_cpus, num_gpus=num_gpus, worker_index=gpu_indx, use_rgb=FLAGS.use_rgb, seed=FLAGS.seed, name='valid') val_tower_output = single_tower(net, gpu_indx, valid_loader, out_charset, optimizer=None, name='valid', is_train=False) val_tower_outputs.append( (val_tower_output.loss, val_tower_output.prediction, val_tower_output.text, val_tower_output.filename, val_tower_output.dataset)) # Aggregate gradients losses = tf.reduce_mean(tower_losses) grads = _average_gradients(tower_grads) with tf.control_dependencies(tower_extra_update_ops[-1]): if FLAGS.train.optimizer.grad_clip_norm is not None: grads, global_norm = _clip_gradients( grads, FLAGS.train.optimizer.grad_clip_norm) tf.summary.scalar('global_norm', global_norm) train_op = optimizer.apply_gradients(grads, global_step=global_step) # Define config, scaffold saver = tf.train.Saver() sess_config = get_session_config() scaffold = get_scaffold(saver, FLAGS.train.tune_from, 'train') restore_model = get_init_trained() # Define validation saver, summary writer summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) val_summary_op = tf.summary.merge( [s for s in summaries if 'valid' in s.name]) val_summary_writer = { dataset_name: tf.summary.FileWriter(os.path.join(model_dir, 'valid', dataset_name)) for dataset_name in valid_loader.dataset_names } val_summary_writer['total_valid'] = tf.summary.FileWriter( os.path.join(model_dir, 'valid', 'total_valid')) val_saver = tf.train.Saver(max_to_keep=len(valid_loader.dataset_names) + 1) best_val_err_rates = {} best_steps = {} # Training print('[+] Make Session...') with tf.train.MonitoredTrainingSession( checkpoint_dir=model_dir, scaffold=scaffold, config=sess_config, save_checkpoint_steps=FLAGS.train.save_steps, save_checkpoint_secs=None, save_summaries_steps=FLAGS.train.summary_steps, save_summaries_secs=None, ) as sess: log_formatted(logger, 'Training started!') _step = 0 train_t = 0 start_t = time.time() while _step < FLAGS.train.max_num_steps \ and not sess.should_stop(): # Train step step_t = time.time() [step_loss, _, _step, preds, gts, lr] = sess.run([ losses, train_op, global_step, tower_preds[0], tower_gts[0], learning_rate ]) train_t += time.time() - step_t # Summary if _step % FLAGS.valid.steps == 0: # Train summary train_err = 0. for i, (p, g) in enumerate(zip(preds, gts)): s = get_string(p, out_charset, is_ctc=is_ctc) g = g.decode('utf8').replace(DELIMITER, '') s = adjust_string(s, FLAGS.train.lowercase, FLAGS.train.alphanumeric) g = adjust_string(g, FLAGS.train.lowercase, FLAGS.train.alphanumeric) e = int(s != g) train_err += e if FLAGS.train.verbose and i < 5: print('TRAIN :\t{}\t{}\t{}'.format(s, g, not bool(e))) train_err_rate = \ train_err / len(gts) # Valid summary val_cnts, val_errs, val_err_rates, _ = \ validate(sess, _step, val_tower_outputs, out_charset, is_ctc, val_summary_op, val_summary_writer, val_saver, best_val_err_rates, best_steps, best_model_dir, FLAGS.valid.lowercase, FLAGS.valid.alphanumeric) # Logging log_strings = ['', '-' * 28 + ' VALID_DETAIL ' + '-' * 28, ''] for dataset in sorted(val_err_rates.keys()): if dataset == 'total_valid': continue cnt = val_cnts[dataset] err = val_errs[dataset] err_rate = val_err_rates[dataset] best_step = best_steps[dataset] s = '%s : %.2f%%(%d/%d)\tBEST_STEP : %d' % \ (dataset, (1.-err_rate)*100, cnt-err, cnt, best_step) log_strings.append(s) elapsed_t = float(time.time() - start_t) / 60 remain_t = (elapsed_t / (_step+1)) * \ (FLAGS.train.max_num_steps - _step - 1) log_formatted( logger, 'STEP : %d\tTRAIN_LOSS : %f' % (_step, step_loss), 'ELAPSED : %.2f min\tREMAIN : %.2f min\t' 'STEP_TIME: %.1f sec' % (elapsed_t, remain_t, float(train_t) / (_step + 1)), 'TRAIN_SEQ_ERR : %f\tVALID_SEQ_ERR : %f' % (train_err_rate, val_err_rates['total_valid']), 'BEST_STEP : %d\tBEST_VALID_SEQ_ERR : %f' % (best_steps['total_valid'], best_val_err_rates['total_valid']), *log_strings) log_formatted(logger, 'Training is completed!')
def MessageBoxA(self, uc, esp, log, owner, text_ptr, title_ptr, type): text = get_string(text_ptr, uc) title = get_string(title_ptr, uc) print(f"\x1b[31mMessage Box ({title}): {text}\x1b[0m") return 1
source: source_batch, target: target_batch, lengths: lengths_batch, is_train: True }) niter_global += 1 if niter_global % 100 == 0: autoencoder.noise_radius = autoencoder.noise_radius * args.noise_anneal print( '[%d/%d][%d/%d] Loss_D: %.8f (Loss_D_real: %.8f Loss_D_fake: %.8f) Loss_G: %.8f' % (epoch, args.epochs, niter, len(train_data), err_D_fake_val - err_D_real_val, err_D_real_val, err_D_fake_val, G_loss_val)) if niter_global % 300 == 0: source_batch, target_batch, lengths_batch = train_data[ random.randint(0, len(train_data) - 1)] max_ind = sess.run( [max_indices], { source: source_batch, target: target_batch, lengths: lengths_batch, is_train: True }) print('Evaluating generator: %s' % get_string(max_ind[0], corpus))
def init_scraping(source, source_location, driver, start, stop): company_list = [] if source == "url": company_list = get_companies_from_url(source_location, driver, start, stop) else: company_list = get_companies_from_file(source_location) path = os.getcwd() + os.sep + "results" + os.sep + "{}.csv".format( str(os.path.basename(__file__)).replace(".py", '')) status = False if not os.path.exists(path): status = True else: company_csv_file = open(path, "r") csv_rd = csv.reader(company_csv_file) # if csv already has more than 60000 records, move it to some other name and create new one total_rows = sum(1 for row in csv_rd) company_csv_file.close() if total_rows >= 60000: filename = os.path.basename(path).replace(".csv", '') index = 0 while os.path.exists("{filename}_{index}.csv".format( filename=filename, index=index)): index += 1 os.rename( "{}.csv".format(filename), "{filename}_{index}.csv".format(filename=filename, index=index)) # Now allow to create a new file with the name status = True company_csv_file = open(path, "ab+") csv_wr = csv.writer(company_csv_file, quoting=csv.QUOTE_ALL) if status: csv_wr.writerow(header) for company in company_list: driver.get( "https://www.zaubacorp.com/company/SOME-COMPANY-NAME/{cin}".format( cin=company)) while True: company_obj = init_extraction(driver.page_source) if hasattr(company_obj, "cin"): break time.sleep(1) try: company_obj.url = "https://www.zaubacorp.com/company/{company}/{cin}".format( company=str(getattr(company_obj, "company_name")).upper().replace(' ', '-'), cin=str(getattr(company_obj, "cin")).strip().upper()) content = [ get_string(getattr(company_obj, key)) for key in header if key != "cin" ] content.insert(0, str(getattr(company_obj, "cin")).strip().upper()) if hasattr(company_obj, "cin"): csv_wr.writerow(content) except Exception as ae: logger.exception(msg=[ key for key in dir(company_obj) if not key.startswith('_') ]) logger.exception(msg="Company CIN: {}".format( getattr(company_obj, "cin")), exc_info=True) continue time.sleep(1) company_csv_file.close()
def init_extraction(page=None): if page is None: exit(1) company = Company() soup = bs4.BeautifulSoup(page, "lxml") info_list = soup.findAll("div", attrs={"class": "col-lg-12"}) info_list += soup.findAll("div", attrs={"class": "col-12"}) info_dict = {} for info in info_list: if info.h4 and info.h4.text == "Company Details": soup1 = bs4.BeautifulSoup(str(info.table), "lxml") ptag = soup1.findAll("p") info_dict = dict( map( None, *[ iter([ str(get_string(p.text)).lower().strip().replace( ' ', '_') for p in ptag if not str(get_string( p.text)).startswith("Click here") ]) ] * 2)) if "llp_identification_number" in info_dict: info_dict["cin"] = info_dict.pop("llp_identification_number") elif "foreign_company_registration_number" in info_dict: info_dict["cin"] = info_dict.pop( "foreign_company_registration_number") else: pass if "main_division_of_business_activity_to_be_carried_out_in_india" in info_dict: info_dict["company_category"] = info_dict.pop( "main_division_of_business_activity_to_be_carried_out_in_india" ) if "type_of_office" in info_dict: info_dict["company_category"] = info_dict.pop("type_of_office") if "description_of_main_division" in info_dict: info_dict["company_sub_category"] = info_dict.pop( "description_of_main_division") if "country_of_incorporation" in info_dict: info_dict["roc"] = info_dict.pop("country_of_incorporation") for attrib in header: if attrib not in info_dict: info_dict[attrib] = '' elif info.h4 and info.h4.text == "Contact Details": soup1 = bs4.BeautifulSoup(str(info.div), "lxml") ptag = soup1.findAll("p") contact = {} for lst in [ str(get_string(p.text)).lower().strip().split(':') for p in ptag ]: if len(lst) > 2: # Its a website contact[str(lst[0]).strip().replace( ' ', '_')] = "{0}:{1}".format( str(lst[1]).strip(), str(lst[2]).strip()) elif len(lst) > 1: contact[str(lst[0]).strip().replace(' ', '_')] = str(lst[1]).strip() \ if not str(lst[1]).strip().startswith('click here') else '' else: # Its an address's last part contact['tmp'] = lst[0] if 'tmp' in contact and 'address' in contact: contact['address'] = contact['tmp'] if not contact['address'] \ else "{0} {1}".format([contact['address'], contact['tmp']]) del contact['tmp'] info_dict.update(contact) for key, value in info_dict.iteritems(): setattr( company, key, get_string(value).strip().replace('_', ' ') if value is not None and key != "email_id" else '') setattr( company, "email_id", get_string(info_dict['email_id']).strip() if 'email_id' in info_dict else '') return company
def get_name(self): if hasattr(self, 'lname'): return self.lname tag = self.get_soup().find('h1') self.lname = utils.get_string(tag.text) return self.lname
def get_hash_code(self, item): title = utils.get_string(item, 'title') hash_code = hashlib.md5(title.encode('utf8')).hexdigest() return hash_code
def get_dev_channels(get_chan, devName): devChannels = utils.get_string(get_chan, devName) return unflatten(devChannels)
def get_product_type(devName): devProductType = utils.get_string(c_system.DAQmxGetDevProductType, devName) return devProductType
def get_devices(): csvDevNames = utils.get_string(c_system.DAQmxGetSysDevNames) return unflatten(csvDevNames)
def get_physical_chan_string_vtr_attribute(devName, attrId): value = utils.get_string(c_daqmx.DAQmxGetPhysicalChanAttribute, ctypes.c_char_p(devName), ctypes.c_int32(attrId)) return unflatten(value)
def get_device_string_vtr_attribute(devName, attrId): value = utils.get_string(c_daqmx.DAQmxGetDeviceAttribute, ctypes.c_char_p(devName), ctypes.c_int32(attrId)) return unflatten(value)
def get_device_string_attribute(devName, attrId): return utils.get_string(c_daqmx.DAQmxGetDeviceAttribute, ctypes.c_char_p(devName), ctypes.c_int32(attrId))