def parse_xml_data_files(msdn_data_dir): """ Return dictionary holding function information. Arguments: msdn_data_dir -- path to the directory storing the XML data files """ functions_map = {} # Parse main database file first xml_file = os.path.join(msdn_data_dir, MSDN_INFO_FILE) functions = xml_parser.parse(xml_file) for function in functions: functions_map[function.name] = function # Parse additional files data_files = get_data_files(msdn_data_dir) for file in data_files: xml_file = os.path.join(msdn_data_dir, file) additional_functions = xml_parser.parse(xml_file) # Merge functions or add new function for a_function in additional_functions: if a_function.name in functions_map: functions_map[a_function.name].merge(a_function) else: functions_map[a_function.name] = a_function return functions_map
def send_emails(): m = Model() couchdb = m.get_database(C.couchdb_config['doc_db']) for i in m.read(C.couchdb_config['doc_db'], m.OUTSTANDING_EMAIL): xml = os.path.join(C.authorized_docs_folder, "{}_1.xml".format(i.value['claveacceso'])) rep_xml = parse(xml) dct = parse_voucher(rep_xml['comprobante']) dct['numeroAutorizacion'] = i.value["numeroAutorizacion"] dct['fechaAutorizacion'] = i.value["fechaAutorizacion"] pdf = os.path.join(C.authorized_docs_folder, "{}_1.pdf".format(i.value['claveacceso'])) generate(dct, 0, pdf) text = '' with open('mail.html', 'r') as ma: text = ma.read() subject = u"Documentos electronicos enviados por Fullcarga Ecuador" send_from = "*****@*****.**" send_to = i.value['email_recipient'] files = [xml, pdf] send_mail(send_from, send_to, subject, text, files) # Once the email is sended update db with mail_sended = True m.write_mail_sended(C.couchdb_config['doc_db'], i.value['claveacceso'])
def send_emails(): m = Model() couchdb = m.get_database(C.couchdb_config['doc_db']) for i in m.read(C.couchdb_config['doc_db'], m.AUTHORIZED): xml = os.path.join(C.authorized_docs_folder, "{}_1.xml".format(i.value['claveacceso'])) rep_xml = parse(xml) dct = parse_voucher(rep_xml['comprobante']) dct['numeroAutorizacion'] = i.value["numeroAutorizacion"] dct['fechaAutorizacion'] = i.value["fechaAutorizacion"] pdf = os.path.join(C.authorized_docs_folder, "{}_1.pdf".format(i.value['claveacceso'])) generate(dct, 0, pdf) text = "Documento electronico No. {}".format(i.value['claveacceso']) subject = "Documento electronico'" send_from = "*****@*****.**" # send_to = ["*****@*****.**", "*****@*****.**"] # send_to = ["*****@*****.**"] send_to = ["*****@*****.**"] files = [xml, pdf] send_mail(send_from, send_to, subject, text, files) # Once the email is sended update db with mail_sended = True m.write_mail_sended(C.couchdb_config['doc_db'], i.value['claveacceso']) lm_obj = LM() lm_obj.write_authorization(i.value['claveacceso'], dct['numeroAutorizacion'], dct['fechaAutorizacion'])
def main(unused_argv): if len(unused_argv) != 1: raise Exception('Problem with flags: %s' % str(unused_argv)) try: assert (FLAGS.mode == 'train') except: raise ValueError('mode must be "train" while now it is "%s"' % FLAGS.mode) FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) data_manager = BinaryDataManager(binary_file=FLAGS.data_path, single_pass=True) data_manager.load_data() model_hp_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_enc_steps', 'max_dec_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] model_hp_dict = {} for key, value in FLAGS.__flags.iteritems(): if key in model_hp_list: model_hp_dict[key] = value model_settings = namedtuple('HParams', model_hp_dict.keys())(**model_hp_dict) model_settings = model_settings._replace(max_dec_steps=1) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # Lauch extractive model cur_path = os.path.abspath('.') FLAGS.sentence_extract_config = os.path.abspath( FLAGS.sentence_extract_config) os.chdir(FLAGS.sentence_extract_root) sys.path.insert(0, 'run') sys.path.insert(0, 'util') import laucher import xml_parser laucher_params = xml_parser.parse(FLAGS.sentence_extract_config, flat=False) ext_solver = laucher.laucher(laucher_params) ext_solver.start() os.chdir(cur_path) # Launch abstractive model loaded_params = tf.global_variables() abs_model = SummarizationModel(model_settings, vocab, extra_info={}) train_model(ext_solver=ext_solver, abs_model=abs_model, data_manager=data_manager)
def convert_session(input_file, output_dir): files = {} # filepath -> file_handler try: for element in xml_parser.parse(input_file): # gunzip automatically type_ = element.pop('type') sensor_name = element.pop('sensorName', None) filepath = get_csv_filepath(output_dir, type_, sensor_name) if filepath not in files: files[filepath] = open(filepath, 'w') csv.writer(files[filepath]).writerow(element.keys()) csv.writer(files[filepath]).writerow(element.values()) finally: for f in files.values(): f.close()
def main(unused_argv): if len(unused_argv)!=1: raise Exception('Problem with flags: %s'%str(unused_argv)) # start sentence extraction model ret_path=os.path.abspath('.') os.chdir(FLAGS.sentence_extract_root) sys.path.insert(0,'./run') sys.path.insert(0,'./util') import laucher import xml_parser laucher_params=xml_parser.parse(FLAGS.sentence_extract_config,flat=False) se_solver=laucher.laucher(laucher_params) se_solver.start() os.chdir(ret_path) loaded_params=tf.global_variables() try: assert(FLAGS.mode=='decode') except: raise ValueError('mode must be "decode" but now it is %s'%str(FLAGS.mode)) FLAGS.log_root=os.path.join(FLAGS.log_root,FLAGS.exp_name) try: assert(os.path.exists(FLAGS.log_root)) except: raise ValueError('Invalid log_root: %s'%str(FLAGS.log_root)) FLAGS.batch_size=FLAGS.beam_size model_hp_list=['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen'] model_hp_dict={} for key,value in FLAGS.__flags.iteritems(): if key in model_hp_list: model_hp_dict[key]=value model_settings=namedtuple('HParams',model_hp_dict.keys())(**model_hp_dict) model_settings=model_settings._replace(max_dec_steps=1) solver=RunTimeWrapper(hp=FLAGS, model_settings=model_settings) solver.start(loaded_params=loaded_params) if FLAGS.interactive_mode=='cmd': command_line_mode(solver,se_solver) elif FLAGS.interactive_mode=='cs': server_client_mode(solver,se_solver) else: raise ValueError('Unrecognized interative mode: %s'%FLAGS.interactive_mode) solver.end() se_solver.end()
def main(): instance = request_api.YahooHandler() instance.process_tokens() xml = instance.api_call("teams") data = ET.fromstring(xml) print xml_parser.parse(data,"name") print xml_parser.parse(data,"team_key") print xml_parser.parse(data,"team_id")
def map_drawer(scr, path_to_map, x=0, y=0, path_to_squares="images/squares.png", count_of_lines=16, count_of_column=30, size_of_plate=16, shift=5): y_position = y list_of_squares = [] map = [] parsed_xml = parse(path_to_map) squares = load(path_to_squares) # Создаем список с координатами квадратов для карты for y_position_of_square in range(0, count_of_lines * size_of_plate, size_of_plate): for x_position_of_square in range(0, count_of_column * size_of_plate, size_of_plate): list_of_squares.append( (x_position_of_square, y_position_of_square)) # Создаем список с номерами квадратов из карты for i in range(len(parsed_xml)): splitted_line = (parsed_xml[i].split(",")) if i < len(parsed_xml) - 1: del splitted_line[len(splitted_line) - 1] map.append(splitted_line) scr.fill((0, 0, 0)) # Рисуем карту по списку с номерами квадратов карты for line in map: x_position = x for plate_num in line: try: plate = list_of_squares[int(plate_num) - shift] scr.blit(squares, (x_position, y_position), (plate[0], plate[1], size_of_plate, size_of_plate)) except Exception: no_plate = load("images/no_plate.png") scr.blit(no_plate, (x_position, y_position)) x_position += size_of_plate y_position += size_of_plate
def EnOtroIdioma(name, num): lista_alojamientos = xml_parser.parse(num) # Por defecto, 1 es español, 2 es inglés y 3 es francés (nombre, direc, email, telefono, descripcion, web) = ('', '', '', '', '', '') Encontrado = False for alojamiento in lista_alojamientos: if (alojamiento['name'] == name): Encontrado = True direc = alojamiento['address'] + '. ' + alojamiento['zipcode'] + '. ' + \ alojamiento['subAdministrativeArea'] + '. (' + alojamiento['latitude'] + ', ' + \ alojamiento['longitude'] + '). ' + alojamiento['country'] + '.' nombre = alojamiento['name'] email = alojamiento['email'] telefono = alojamiento['phone'] descripcion = alojamiento['body'] web = alojamiento['web'] if Encontrado == False: nombre = 'error' return (nombre, direc, email, telefono, descripcion, web)
def process(params: list) -> list: db = fetch_db() meta = fetch_meta(db) doc = params[0] target = params[1] count = 0 exceptions = [] documents = [] print(datetime.now(), doc, 'Begin') for path, _, names in os.walk(doc): for name in names: if name.endswith('.zip'): kind = os.path.join(target, os.path.split(path)[-1]) fzip = ZipFile(os.path.join(path, name)) target_folder = os.path.join(kind, name[:-4]) fzip.extractall(target_folder) for zipName in os.listdir(target_folder): if 'TOC' not in zipName and '.xml' in zipName: count += 1 if count % 1000 == 0: print(datetime.now(), doc, count) with open(os.path.join(target_folder, zipName), 'rb') as f: try: patent = parse(f.read()) documents.append(patent) except Exception as e: exceptions.append((zipName, e)) print(datetime.now(), doc, count) print(datetime.now(), doc, 'Begin insert') try: result = meta.insert_many(documents, False) print(datetime.now(), doc, len(result.inserted_ids)) except BulkWriteError as e: print(datetime.now(), doc, 'writeErrors', len(e.details['writeErrors'])) exceptions.append((doc, 'writeErrors', len(e.details['writeErrors']))) print(datetime.now(), doc, 'End') exceptions.append((doc, count)) return exceptions
start += size if name in [" r", " s"]: # Skip return address and base pointer current = arguments continue current.append({'name' : name}) return (local_variables, arguments) wrapper = TextWrapper(break_long_words=False, width=70) filename = os.path.join(os.path.split(sys.modules[__name__].__file__)[0], "msdn.xml") functions = parse(filename) library = idc.GetInputFile().lower() functions_map = { } for lf in functions: functions_map[lf.name] = lf assigned = 0 not_assigned = 0 for ea in Functions(0, 0xFFFFFFFF): function_object = idaapi.get_func(ea) if not function_object: continue
def main(unused_argv): if len(unused_argv) != 1: raise Exception('Problem with flags: %s' % str(unused_argv)) try: assert (FLAGS.mode == 'decode') except: raise ValueError('mode much be "decode" but now it is %s' % str(FLAGS.mode)) FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) try: assert (os.path.exists(FLAGS.log_root)) except: raise ValueError('Invalid log_root: %s' % str(FLAGS.log_root)) FLAGS.batch_size = FLAGS.beam_size data_manager = BinaryDataManager(binary_file=FLAGS.data_path, single_pass=True) data_manager.load_data() # Loading the external information first extra_info = {} if os.path.exists(FLAGS.external_config): external_params = xml_parser.parse(FLAGS.external_config, flat=False) if 'sent2vec_params' in external_params: sent2vec_params = external_params['sent2vec_params'] convnet_params = sent2vec_params['convnet_params'] convnet_model2load = sent2vec_params['model2load'] gamma = 0.2 if not 'gamma' in sent2vec_params else sent2vec_params[ 'gamma'] my_convnet = convnet.convnet(convnet_params) my_convnet.train_validate_test_init() my_convnet.load_params(file2load=convnet_model2load) fixed_vars = tf.global_variables() fixed_vars.remove(my_convnet.embedding_matrix) extra_info['sent2vec'] = {'gamma': gamma, 'network': my_convnet} extra_info['fixed_vars'] = fixed_vars if 'key_phrases' in external_params: # TODO: phrase some parameters to import the results of key-phrase extracted or \ # parameters for online key-phrase extraction extra_info['key_phrases'] = {} raise NotImplementedError( 'Key phrases part has not been implemented yet') model_hp_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] model_hp_dict = {} for key, value in FLAGS.__flags.iteritems(): if key in model_hp_list: model_hp_dict[key] = value model_settings = namedtuple('HParams', model_hp_dict.keys())(**model_hp_dict) model_settings = model_settings._replace(max_dec_steps=1) for folder in [ FLAGS.article_folder, FLAGS.refer_folder, FLAGS.output_folder ]: if not os.path.exists(folder): os.makedirs(folder) solver = RunTimeWrapper(hp=FLAGS, model_settings=model_settings, extra_info=extra_info) solver.start() result2write = '' for idx, (article, abstract) in enumerate(data_manager.text_abstract_pair): sys.stdout.write( 'Analysizing the documents %d/%d = %.1f%% \r' % (idx + 1, len(data_manager.text_abstract_pair), float(idx + 1) / float(len(data_manager.text_abstract_pair)) * 100)) sys.stdout.flush() _, summary = solver.run(query=article) abstract = '\n'.join(abstract) # Reference and compare with open(FLAGS.article_folder + os.sep + '%04d_article.txt' % idx, 'w') as fopen: fopen.write(article) with open(FLAGS.refer_folder + os.sep + '%04d_reference.txt' % idx, 'w') as fopen: fopen.write(abstract) with open(FLAGS.output_folder + os.sep + '%04d_decode.txt' % idx, 'w') as fopen: fopen.write(summary) result2write += '\n\n===\n%s\n\n>>>refer:\n%s\n\n>>>output:\n%s\n' % ( article, abstract, summary) if (idx + 1) % 100 == 0: with open('results.txt', 'w') as fopen: fopen.write(result2write) solver.end()
type=bool) parser.add_argument('--autorships_dump', help='autorships for papers dump file name') parser.add_argument('--citations', help='enable to extract citations for papers', default=False, type=bool) parser.add_argument('--citatons_dump', help='citations for papers dump file name') parser.add_argument('--citations_start', help='citations start point', type=int) parser.add_argument('--citations_finish', help='citations finish point', type=int) args = parser.parse_args() print('Start scraping') scrape(args.subject, args.startpage, args.finishpage, args.dump_name) print('Scraping finished. Start parsing dump') parse(args.dump_name, args.dataset_name) print('Finished parsing') if args.autorships: autorships(args.dataset_name, args.autorships_dump) if args.citations: citations(args.dataset_name, args.citations_dump, args.citations_start, args.citations_finish)
path_xml = '../SemCor/semcor.data.xml' # path of resources needed to build the mappings among various sense representations. path_inst2sk = '../SemCor/semcor.gold.key.txt' path_bn2wn = '../resources/babelnet2wordnet.tsv' path_bn2lex = '../resources/babelnet2lexnames.tsv' path_bn2wnd = '../resources/babelnet2wndomains.tsv' inst2sk_dict = get_dictionary( path_inst2sk, 0) # collection of the dictionaries to realise those mappings wn2bn_dict = get_dictionary(path_bn2wn, 1) bn2lex_dict = get_dictionary(path_bn2lex, 0) bn2wnDom_dict = get_bn2wnDomains(path_bn2wnd) inputs, labels_BN, bnIds = parse( path_xml, inst2sk_dict, wn2bn_dict, bn2lex_dict, 'BN') # parsing of the .xml file for retrieving fine-grained (BN) data _, labels_WND, wndIds = parse( path_xml, inst2sk_dict, wn2bn_dict, bn2wnDom_dict, 'WND') # parsing of the .xml file for retrieving coarse-grained (WND) data _, labels_LEX, lexIds = parse( path_xml, inst2sk_dict, wn2bn_dict, bn2lex_dict, 'LN') # parsing of the .xml file for retrieving coarse-grained (LEX) data voc_words = get_vocabulary(inputs, 0.0) # collect vocabulary of input lemmas voc_senses_BN = get_vocab_senses(bnIds, 0.0) # collect vocabulary of FG senses voc_joint_BN = voc_words + voc_senses_BN # create joint voc. voc_senses_WND = get_vocab_senses(wndIds, 0.0) # collect vocabulary of CG-WND senses
welcome_msg_fg_color = 'red' welcome_msg_text += '\nПлохой результат' elif normalized_user_result <= 0.75: welcome_msg_fg_color = 'blue' welcome_msg_text += '\nХороший результат' else: welcome_msg_fg_color = 'green' welcome_msg_text += '\nОтличный результат' welcome_msg.config(text=welcome_msg_text, fg=welcome_msg_fg_color, font=resourses.constants.TEST_RESULT_FONT) if __name__ == '__main__': path = 'resourses/questions.xml' parsed = xml_parser.parse(path) # parsed.popitem() root = Tk() root.minsize(width=600, height=500) # root.maxsize(width=600, height=500) root.title('Тестирование') main_frame = Frame(root) main_frame.config(bg="#FFF") main_frame.pack(expand=YES, fill=BOTH) # label = Label(f, text=resourses.constants.intro_label) # label.config(height=5, width=30) # label.config(font=resourses.constants.label_font) # label.pack()
def main(unused_argv): if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) # Loading the external information first extra_info = {} if os.path.exists(FLAGS.external_config): external_params = xml_parser.parse(FLAGS.external_config, flat=False) if 'sent2vec_params' in external_params: sent2vec_params = external_params['sent2vec_params'] convnet_params = sent2vec_params['convnet_params'] convnet_model2load = sent2vec_params['model2load'] gamma = 0.2 if not 'gamma' in sent2vec_params else sent2vec_params[ 'gamma'] my_convnet = convnet.convnet(convnet_params) my_convnet.train_validate_test_init() my_convnet.load_params(file2load=convnet_model2load) fixed_vars = tf.global_variables() fixed_vars.remove(my_convnet.embedding_matrix) extra_info['sent2vec'] = {'gamma': gamma, 'network': my_convnet} extra_info['fixed_vars'] = fixed_vars if 'key_phrases' in external_params: # TODO: phrase some parameters to import the results of key-phrase extracted or \ # parameters for online key-phrase extraction extra_info['key_phrases'] = {} raise NotImplementedError( 'Key phrases part has not been implemented yet') tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception( "Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] hps_dict = {} for key, val in FLAGS.__flags.iteritems(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness if hps.mode == 'train': print "creating model..." model = SummarizationModel(hps, vocab, extra_info) setup_training(model, batcher) elif hps.mode == 'eval': model = SummarizationModel(hps, vocab, extra_info) run_eval(model, batcher, vocab) elif hps.mode == 'decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab, extra_info) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode( ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
type=int) args = parser.parse_args() #year = args.year #subject = args.subject subjects = [ 'AGRI', 'ARTS', 'BIOC', 'BUSI', 'CENG', 'CHEM', 'COMP', 'DECI', 'DENT', 'EART', 'ECON', 'ENER', 'ENGI', 'ENVI', 'HEAL', 'IMMU', 'MATE', 'MATH', 'MEDI', 'NEUR', 'NURS', 'PHAR', 'PHYS', 'PSYC', 'SOCI', 'VETE', 'MULT' ] for subject in subjects: for year in range(2000, 2020): year_str = str(year) dumpName = subject + '_' + year_str + '_' + 'dump' datasetName = subject + '_' + year_str + '_' + 'dataset' print('Start scraping') scrape(dumpName, year_str, subject, args) print('Scraping finished. Start parsing dump') parse(dumpName, datasetName) print('Finished parsing') # if args.autorships: # autorships(args.dataset_name, args.autorships_dump, args.apikey) # if args.citations: # citations(args.dataset_name, args.citations_dump, args.citations_start, args.citations_finish)
if name in [" r", " s"]: # Skip return address and base pointer current = arguments continue current.append({'name': name}) return (local_variables, arguments) wrapper = TextWrapper(break_long_words=False, width=70) filename = os.path.join( os.path.split(sys.modules[__name__].__file__)[0], "msdn.xml") functions = parse(filename) library = idc.GetInputFile().lower() functions_map = {} for lf in functions: functions_map[lf.name] = lf assigned = 0 not_assigned = 0 for ea in Functions(0, 0xFFFFFFFF): function_object = idaapi.get_func(ea) if not function_object: continue
import numpy as np if sys.version_info.major==2: import cPickle as pickle else: import pickle sys.path.insert(0, './util') sys.path.insert(0, './model') import convnet import xml_parser if len(sys.argv)<2: print('Usage: python rouge.py <config>') exit(0) hyper_param=xml_parser.parse(sys.argv[1]) saved_file=hyper_param['saved_file'] refer_folder=hyper_param['refer_folder'] output_folder=hyper_param['output_folder'] refer_suffix=hyper_param['refer_suffix'] if 'refer_suffix' in hyper_param else 'reference' output_suffix=hyper_param['output_suffix'] if 'output_suffix' in hyper_param else 'decode' convnet_params=hyper_param['convnet_params'] model2load=hyper_param['model2load'] word2idx_file=hyper_param['word2idx_file'] unk_idx=hyper_param['unk_idx'] # Collect files refer_name2file={} output_name2file={}
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-p', '--image_dir', type=str, required=True, help='The path of the folder that stores source images') parser.add_argument('-a', '--annotation_dir', type=str, required=True, help='The path of the folder that stores annotations') parser.add_argument( '-t', '--testset', action='store_true', help='Operate on test set. If unspecified then train set.') args = parser.parse_args() IMAGE_DIR = args.image_dir LABEL_DIR = args.annotation_dir CLASSIFICATION_DIR = './classification' LOCALIZATION_DIR = './localization' CLASSIFICATION_TARGET = '' LOCALIZATION_TARGET = '' CLASSIFICATION_TEXT = '' LOCALIZATION_TEXT = '' if args.testset: CLASSIFICATION_TARGET = os.path.join(CLASSIFICATION_DIR, 'test') LOCALIZATION_TARGET = os.path.join(LOCALIZATION_DIR, 'test') CLASSIFICATION_TEXT = os.path.join(CLASSIFICATION_DIR, 'test.txt') LOCALIZATION_TEXT = os.path.join(LOCALIZATION_DIR, 'test.txt') else: CLASSIFICATION_TARGET = os.path.join(CLASSIFICATION_DIR, 'train') LOCALIZATION_TARGET = os.path.join(LOCALIZATION_DIR, 'train') CLASSIFICATION_TEXT = os.path.join(CLASSIFICATION_DIR, 'train.txt') LOCALIZATION_TEXT = os.path.join(LOCALIZATION_DIR, 'train.txt') classification_text = open(CLASSIFICATION_TEXT, 'w') localization_text = open(LOCALIZATION_TEXT, 'w') for parent, dirnames, filenames in os.walk(LABEL_DIR): total = len(filenames) interval = total / 50 count = 1 bar = '' for xml_file in filenames: objects = xml_parser.parse(os.path.join(LABEL_DIR, xml_file)) label_vector = extract_multi_label(objects) img_file = xml_file.split('.')[0] + '.jpg' write_multi_label(classification_text, img_file, label_vector) img = cv2.imread(os.path.join(IMAGE_DIR, img_file)) for key, obj in objects.items(): curr_img = img[obj['ymin']:obj['ymax'], obj['xmin']:obj['xmax']] output_img_name = xml_file.split('.')[0] + '_' + str( key) + '.jpg' curr_img = cv2.resize(curr_img, (256, 256)) cv2.imwrite(os.path.join(LOCALIZATION_TARGET, output_img_name), curr_img) write_multi_label(localization_text, output_img_name, [label_map[obj['type']]]) if not args.testset: curr_img = cv2.resize(img, (256, 256)) cv2.imwrite(os.path.join(CLASSIFICATION_TARGET, img_file), curr_img) curr = int(float(count) / total * 100) remain = 50 - len(bar) - 1 if count < total: sys.stdout.write( str(curr) + '% [' + bar + '>' + remain * ' ' + ']\r') sys.stdout.flush() else: sys.stdout.write(str(curr) + '% [' + bar + ']\r') sys.stdout.flush() count += 1 if count % interval == 0: bar += '=' print classification_text.close() localization_text.close()
def alojamientos(request): titulo = ("Lista de todos los alojamientos de Madrid: ") # Muestro el formulario de filtrado y actualizar form_filtrar = FormFiltrar() form_act = FormActualizar() contenido = form_filtrar + form_act if request.method == "GET": try: lista_alojamientos = Alojamiento.objects.all() contenido += "<p>" for alojamiento in lista_alojamientos: #web = unicode.encode(alojamiento.web) contenido += "<li>" + str(alojamiento.nombre) + ": " + "<a href='/" + \ str(alojamiento.web) + "'>" + str(alojamiento.web) + "</a>" contenido += '</p>' except Alojamiento.DoesNotExist: contenido = ("No hay alojamientos disponibles, debe usted actualizar dichos alojamientos") # Si se ha filtrado o actualizado elif request.method == "POST": contenido = form_filtrar + form_act # Filtro los alojamientos if request.POST.get("tipo") == "Filtrar": alojamientos, num = FiltrarAlojamientos(request) # Filtro alojamientos contenido += ImprimirAloj(request, alojamientos, num) # Imprimo los alojamientos # Guardo los alojamientos elif request.POST.get("tipo") == "actualizar": lista_alojamientos = xml_parser.parse(1) # Por defecto, 1 es español, 2 es inglés y 3 es francés for alojamiento in lista_alojamientos: direc = alojamiento['address'] + '. ' + alojamiento['zipcode'] + '. ' + \ alojamiento['subAdministrativeArea'] + '. (' + alojamiento['latitude'] + ', ' + \ alojamiento['longitude'] + '). ' + alojamiento['country'] + '.' alojamiento_nuevo = Alojamiento(nombre=alojamiento['name'], email=alojamiento['email'], telefono=alojamiento['phone'], descripcion=alojamiento['body'], web=alojamiento['web'], direccion=direc, categoria=alojamiento['Categoria'], subcategoria=alojamiento['SubCategoria']) alojamiento_nuevo.save() # Guardo 5 imagenes imagen1, imagen2, imagen3, imagen4, imagen5 = ('', '', '', '', '') try: imagenes = alojamiento['imagenes'] imagen1 = imagenes.split(' , ')[0] imagen2 = imagenes.split(' , ')[1] imagen3 = imagenes.split(' , ')[2] imagen4 = imagenes.split(' , ')[3] imagen5 = imagenes.split(' , ')[4] except IndexError: None imagenes_nuevas = Imagenes(alojamiento=alojamiento['name'], url1=imagen1, url2=imagen2, url3=imagen3, url4=imagen4, url5=imagen5) imagenes_nuevas.save() return HttpResponseRedirect('/alojamientos') # Actualizo la lista de alojamientos else: contenido = "ERROR" # Para pasar los datos del menu de navegacion asi como el estilo de la pagina escogida navegacion = Menu(False) try: usuarios = ConfiguracionUsuario.objects.all() adicional = PagUsers(usuarios) except ConfiguracionUsuario.DoesNotExist: adicional = "" try: user = ConfiguracionUsuario.objects.get(user=request.user) color = user.color letra = user.letra except ConfiguracionUsuario.DoesNotExist: color, letra = ("", "") rendered = Render(request, color, letra, titulo, navegacion, contenido, adicional) return HttpResponse(rendered)
import logging from pprint import pprint from random import randint import db import xml_parser logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) _LOGGER = logging.getLogger(__name__) data = xml_parser.parse() for user in data: try: name = ' '.join([user['u_fname'], user['u_lname']]) balance = randint(0, 9) user_uid = db.add_user(name, balance) card_uid = user['u_number'] db.add_card(user_uid, card_uid) except KeyError as e: pprint(user) raise e
import numpy as np sys.path.insert(0,'./model') sys.path.insert(0,'./util') import data_loader import data_generator import embedding_loader import sentence_extractor import xml_parser if len(sys.argv)!=2: print('Usage: python sentence_extract_test.py <config>') exit(0) hyper_params=xml_parser.parse(sys.argv[1],flat=False) # Build word list or entity list loader_params=hyper_params['loader'] data_loader_params=loader_params['data_loader'] src_folder_list2build_list=loader_params['src_folder_list2build_list'] dest_folder_list2build_list=loader_params['dest_folder_list2build_list'] if 'dest_folder_list2build_list' in loader_params else None src_folder_list2parse=loader_params['src_folder_list2parse'] dest_folder_list2parse=loader_params['dest_folder_list2parse'] list_saved_format=loader_params['list_saved_format'] my_data_loader=data_loader.data_loader(data_loader_params) # my_data_loader.build_lists(src_folder_list2build_list,dest_folder_list2build_list,list_saved_format) my_data_loader.load_dict() # my_data_loader.build_idx_files(src_folder_list2parse,dest_folder_list2parse)
def main(unused_argv): if len(unused_argv) != 1: raise Exception('Problem with flags: %s' % str(unused_argv)) try: assert (FLAGS.mode == 'decode') except: raise ValueError('mode must be "decode" while now it is "%s"' % FLAGS.mode) FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) try: assert (os.path.exists(FLAGS.log_root)) except: raise ValueError('invalid log_root: %s, file or folder not exists' % str(FLAGS.log_root)) FLAGS.batch_size = FLAGS.beam_size data_manager = BinaryDataManager(binary_file=FLAGS.data_path, single_pass=True) data_manager.load_data() model_hp_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] model_hp_dict = {} for key, value in FLAGS.__flags.iteritems(): if key in model_hp_list: model_hp_dict[key] = value model_settings = namedtuple('HParams', model_hp_dict.keys())(**model_hp_dict) model_settings = model_settings._replace(max_dec_steps=1) for folder in [ FLAGS.article_folder, FLAGS.refer_folder, FLAGS.output_folder ]: if not os.path.exists(folder): os.makedirs(folder) # Launch extractive model cur_path = os.path.abspath('.') FLAGS.sentence_extract_config = os.path.abspath( FLAGS.sentence_extract_config) os.chdir(FLAGS.sentence_extract_root) sys.path.insert(0, 'run') sys.path.insert(0, 'util') import laucher import xml_parser laucher_params = xml_parser.parse(FLAGS.sentence_extract_config, flat=False) ext_solver = laucher.laucher(laucher_params) ext_solver.start() os.chdir(cur_path) # Launch abstractive model loaded_params = tf.global_variables() abs_solver = RunTimeWrapper(hp=FLAGS, model_settings=model_settings) abs_solver.start(loaded_params=loaded_params) # Preparation for folder in [ FLAGS.article_folder, FLAGS.refer_folder, FLAGS.output_folder ]: if not os.path.exists(folder): os.makedirs(folder) result2write = '' for idx, (article, abstract) in enumerate(data_manager.text_abstract_pair): sys.stdout.write( 'Analysizing the documents %d/%d = %.1f %%\r' % (idx + 1, len(data_manager.text_abstract_pair), float(idx + 1) / float(len(data_manager.text_abstract_pair)) * 100)) sys.stdout.flush() try: article = article.decode('ascii', errors='ignore').encode('ascii') sentence_list = tokenize.sent_tokenize(article) tokenized_article = '\n'.join(sentence_list) with open('tmp.txt', 'w') as fopen: fopen.write(tokenized_article) selected_sentence = ext_solver.select('tmp.txt') extracted_content = selected_sentence.replace('\n', ' ').lower() _, summary = abs_solver.run(query=extracted_content) # Reference and compare with open(FLAGS.article_folder + os.sep + '%04d_article.txt' % idx, 'w') as fopen: fopen.write(article) with open(FLAGS.refer_folder + os.sep + '%04d_reference.txt' % idx, 'w') as fopen: fopen.write(abstract) with open(FLAGS.output_folder + os.sep + '%04d_decode.txt' % idx, 'w') as fopen: fopen.write(summary) result2write += '\n\n===\n%s\n\n>>>refer:\n%s\n\n>>>output:\n%s\n' % ( article, abstract, summary) if (idx + 1) % 100 == 0: with open('results.txt', 'w') as fopen: fopen.write(result2write) except: print('ERROR while loading document %d' % idx) traceback.print_exc() ext_solver.end() abs_solver.end()