Пример #1
0
def parse_xml_data_files(msdn_data_dir):
    """ Return dictionary holding function information.

    Arguments:
    msdn_data_dir -- path to the directory storing the XML data files
    """
    functions_map = {}

    # Parse main database file first
    xml_file = os.path.join(msdn_data_dir, MSDN_INFO_FILE)
    functions = xml_parser.parse(xml_file)
    for function in functions:
        functions_map[function.name] = function

    # Parse additional files
    data_files = get_data_files(msdn_data_dir)
    for file in data_files:
        xml_file = os.path.join(msdn_data_dir, file)
        additional_functions = xml_parser.parse(xml_file)

        # Merge functions or add new function
        for a_function in additional_functions:
            if a_function.name in functions_map:
                functions_map[a_function.name].merge(a_function)
            else:
                functions_map[a_function.name] = a_function
    return functions_map
Пример #2
0
def parse_xml_data_files(msdn_data_dir):
    """ Return dictionary holding function information.

    Arguments:
    msdn_data_dir -- path to the directory storing the XML data files
    """
    functions_map = {}

    # Parse main database file first
    xml_file = os.path.join(msdn_data_dir, MSDN_INFO_FILE)
    functions = xml_parser.parse(xml_file)
    for function in functions:
        functions_map[function.name] = function

    # Parse additional files
    data_files = get_data_files(msdn_data_dir)
    for file in data_files:
        xml_file = os.path.join(msdn_data_dir, file)
        additional_functions = xml_parser.parse(xml_file)

        # Merge functions or add new function
        for a_function in additional_functions:
            if a_function.name in functions_map:
                functions_map[a_function.name].merge(a_function)
            else:
                functions_map[a_function.name] = a_function
    return functions_map
Пример #3
0
def send_emails():
    m = Model()
    couchdb = m.get_database(C.couchdb_config['doc_db'])

    for i in m.read(C.couchdb_config['doc_db'], m.OUTSTANDING_EMAIL):
        xml = os.path.join(C.authorized_docs_folder,
                           "{}_1.xml".format(i.value['claveacceso']))
        rep_xml = parse(xml)
        dct = parse_voucher(rep_xml['comprobante'])
        dct['numeroAutorizacion'] = i.value["numeroAutorizacion"]
        dct['fechaAutorizacion'] = i.value["fechaAutorizacion"]
        pdf = os.path.join(C.authorized_docs_folder,
                           "{}_1.pdf".format(i.value['claveacceso']))
        generate(dct, 0, pdf)
        text = ''
        with open('mail.html', 'r') as ma:
            text = ma.read()

        subject = u"Documentos electronicos enviados por Fullcarga Ecuador"
        send_from = "*****@*****.**"
        send_to = i.value['email_recipient']
        files = [xml, pdf]

        send_mail(send_from, send_to, subject, text, files)
        #   Once the email is sended update db with mail_sended = True
        m.write_mail_sended(C.couchdb_config['doc_db'], i.value['claveacceso'])
Пример #4
0
def send_emails():
    m = Model()
    couchdb = m.get_database(C.couchdb_config['doc_db'])

    for i in m.read(C.couchdb_config['doc_db'], m.AUTHORIZED):
        xml = os.path.join(C.authorized_docs_folder,
                           "{}_1.xml".format(i.value['claveacceso']))
        rep_xml = parse(xml)
        dct = parse_voucher(rep_xml['comprobante'])
        dct['numeroAutorizacion'] = i.value["numeroAutorizacion"]
        dct['fechaAutorizacion'] = i.value["fechaAutorizacion"]
        pdf = os.path.join(C.authorized_docs_folder,
                           "{}_1.pdf".format(i.value['claveacceso']))
        generate(dct, 0, pdf)
        text = "Documento electronico No. {}".format(i.value['claveacceso'])
        subject = "Documento electronico'"
        send_from = "*****@*****.**"
        #        send_to = ["*****@*****.**", "*****@*****.**"]
        #        send_to = ["*****@*****.**"]
        send_to = ["*****@*****.**"]
        files = [xml, pdf]

        send_mail(send_from, send_to, subject, text, files)
        #   Once the email is sended update db with mail_sended = True
        m.write_mail_sended(C.couchdb_config['doc_db'], i.value['claveacceso'])
        lm_obj = LM()
        lm_obj.write_authorization(i.value['claveacceso'],
                                   dct['numeroAutorizacion'],
                                   dct['fechaAutorizacion'])
Пример #5
0
def main(unused_argv):
    if len(unused_argv) != 1:
        raise Exception('Problem with flags: %s' % str(unused_argv))

    try:
        assert (FLAGS.mode == 'train')
    except:
        raise ValueError('mode must be "train" while now it is "%s"' %
                         FLAGS.mode)

    FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name)
    data_manager = BinaryDataManager(binary_file=FLAGS.data_path,
                                     single_pass=True)
    data_manager.load_data()

    model_hp_list = [
        'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag',
        'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim',
        'batch_size', 'max_enc_steps', 'max_dec_steps', 'coverage',
        'cov_loss_wt', 'pointer_gen'
    ]
    model_hp_dict = {}
    for key, value in FLAGS.__flags.iteritems():
        if key in model_hp_list:
            model_hp_dict[key] = value
    model_settings = namedtuple('HParams',
                                model_hp_dict.keys())(**model_hp_dict)
    model_settings = model_settings._replace(max_dec_steps=1)

    vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size)

    # Lauch extractive model
    cur_path = os.path.abspath('.')
    FLAGS.sentence_extract_config = os.path.abspath(
        FLAGS.sentence_extract_config)
    os.chdir(FLAGS.sentence_extract_root)
    sys.path.insert(0, 'run')
    sys.path.insert(0, 'util')
    import laucher
    import xml_parser
    laucher_params = xml_parser.parse(FLAGS.sentence_extract_config,
                                      flat=False)
    ext_solver = laucher.laucher(laucher_params)
    ext_solver.start()
    os.chdir(cur_path)

    # Launch abstractive model
    loaded_params = tf.global_variables()
    abs_model = SummarizationModel(model_settings, vocab, extra_info={})
    train_model(ext_solver=ext_solver,
                abs_model=abs_model,
                data_manager=data_manager)
def convert_session(input_file, output_dir):
    files = {}  # filepath -> file_handler
    try:
        for element in xml_parser.parse(input_file):  # gunzip automatically
            type_ = element.pop('type')
            sensor_name = element.pop('sensorName', None)
            filepath = get_csv_filepath(output_dir, type_, sensor_name)
            if filepath not in files:
                files[filepath] = open(filepath, 'w')
                csv.writer(files[filepath]).writerow(element.keys())
            csv.writer(files[filepath]).writerow(element.values())
    finally:
        for f in files.values():
            f.close()
Пример #7
0
def main(unused_argv):
    if len(unused_argv)!=1:
        raise Exception('Problem with flags: %s'%str(unused_argv))

    # start sentence extraction model
    ret_path=os.path.abspath('.')
    os.chdir(FLAGS.sentence_extract_root)
    sys.path.insert(0,'./run')
    sys.path.insert(0,'./util')
    import laucher
    import xml_parser

    laucher_params=xml_parser.parse(FLAGS.sentence_extract_config,flat=False)
    se_solver=laucher.laucher(laucher_params)
    se_solver.start()
    os.chdir(ret_path)

    loaded_params=tf.global_variables()
    try:
        assert(FLAGS.mode=='decode')
    except:
        raise ValueError('mode must be "decode" but now it is %s'%str(FLAGS.mode))
    FLAGS.log_root=os.path.join(FLAGS.log_root,FLAGS.exp_name)
    try:
        assert(os.path.exists(FLAGS.log_root))
    except:
        raise ValueError('Invalid log_root: %s'%str(FLAGS.log_root))
    FLAGS.batch_size=FLAGS.beam_size

    model_hp_list=['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm',
        'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen']
    model_hp_dict={}
    for key,value in FLAGS.__flags.iteritems():
        if key in model_hp_list:
            model_hp_dict[key]=value
    model_settings=namedtuple('HParams',model_hp_dict.keys())(**model_hp_dict)
    model_settings=model_settings._replace(max_dec_steps=1)

    solver=RunTimeWrapper(hp=FLAGS, model_settings=model_settings)
    solver.start(loaded_params=loaded_params)
    if FLAGS.interactive_mode=='cmd':
        command_line_mode(solver,se_solver)
    elif FLAGS.interactive_mode=='cs':
        server_client_mode(solver,se_solver)
    else:
        raise ValueError('Unrecognized interative mode: %s'%FLAGS.interactive_mode)

    solver.end()
    se_solver.end()
Пример #8
0
def main():
	instance = request_api.YahooHandler()
	instance.process_tokens()
	
	xml = instance.api_call("teams")
	data = ET.fromstring(xml)

	print xml_parser.parse(data,"name")
	print xml_parser.parse(data,"team_key")
	print xml_parser.parse(data,"team_id")
Пример #9
0
def map_drawer(scr,
               path_to_map,
               x=0,
               y=0,
               path_to_squares="images/squares.png",
               count_of_lines=16,
               count_of_column=30,
               size_of_plate=16,
               shift=5):
    y_position = y

    list_of_squares = []
    map = []

    parsed_xml = parse(path_to_map)
    squares = load(path_to_squares)

    # Создаем список с координатами квадратов для карты
    for y_position_of_square in range(0, count_of_lines * size_of_plate,
                                      size_of_plate):
        for x_position_of_square in range(0, count_of_column * size_of_plate,
                                          size_of_plate):
            list_of_squares.append(
                (x_position_of_square, y_position_of_square))

    # Создаем список с номерами квадратов из карты
    for i in range(len(parsed_xml)):
        splitted_line = (parsed_xml[i].split(","))
        if i < len(parsed_xml) - 1:
            del splitted_line[len(splitted_line) - 1]
        map.append(splitted_line)

    scr.fill((0, 0, 0))

    # Рисуем карту по списку с номерами квадратов карты
    for line in map:
        x_position = x
        for plate_num in line:
            try:
                plate = list_of_squares[int(plate_num) - shift]
                scr.blit(squares, (x_position, y_position),
                         (plate[0], plate[1], size_of_plate, size_of_plate))
            except Exception:
                no_plate = load("images/no_plate.png")
                scr.blit(no_plate, (x_position, y_position))
            x_position += size_of_plate
        y_position += size_of_plate
Пример #10
0
def EnOtroIdioma(name, num):
    lista_alojamientos = xml_parser.parse(num) # Por defecto, 1 es español, 2 es inglés y 3 es francés
    (nombre, direc, email, telefono, descripcion, web) = ('', '', '', '', '', '')
    Encontrado = False
    for alojamiento in lista_alojamientos:
        if (alojamiento['name'] == name):
            Encontrado = True
            direc = alojamiento['address'] + '. ' + alojamiento['zipcode'] + '. ' + \
                    alojamiento['subAdministrativeArea'] + '. (' + alojamiento['latitude'] + ', ' + \
                    alojamiento['longitude'] + '). ' + alojamiento['country'] + '.'
            nombre = alojamiento['name']
            email = alojamiento['email']
            telefono = alojamiento['phone']
            descripcion = alojamiento['body']
            web = alojamiento['web']
    if Encontrado == False:
        nombre = 'error'
    return (nombre, direc, email, telefono, descripcion, web)
Пример #11
0
def process(params: list) -> list:
    db = fetch_db()
    meta = fetch_meta(db)
    doc = params[0]
    target = params[1]
    count = 0
    exceptions = []
    documents = []
    print(datetime.now(), doc, 'Begin')
    for path, _, names in os.walk(doc):
        for name in names:
            if name.endswith('.zip'):
                kind = os.path.join(target, os.path.split(path)[-1])
                fzip = ZipFile(os.path.join(path, name))
                target_folder = os.path.join(kind, name[:-4])
                fzip.extractall(target_folder)
                for zipName in os.listdir(target_folder):
                    if 'TOC' not in zipName and '.xml' in zipName:
                        count += 1
                        if count % 1000 == 0:
                            print(datetime.now(), doc, count)
                        with open(os.path.join(target_folder, zipName),
                                  'rb') as f:
                            try:
                                patent = parse(f.read())
                                documents.append(patent)
                            except Exception as e:
                                exceptions.append((zipName, e))
    print(datetime.now(), doc, count)
    print(datetime.now(), doc, 'Begin insert')
    try:
        result = meta.insert_many(documents, False)
        print(datetime.now(), doc, len(result.inserted_ids))
    except BulkWriteError as e:
        print(datetime.now(), doc, 'writeErrors',
              len(e.details['writeErrors']))
        exceptions.append((doc, 'writeErrors', len(e.details['writeErrors'])))
    print(datetime.now(), doc, 'End')
    exceptions.append((doc, count))
    return exceptions
Пример #12
0
            
        start += size
        
        if name in [" r", " s"]:
            # Skip return address and base pointer
            current = arguments
            continue
        
        current.append({'name' : name})

    return (local_variables, arguments)

wrapper = TextWrapper(break_long_words=False, width=70)

filename = os.path.join(os.path.split(sys.modules[__name__].__file__)[0], "msdn.xml")
functions = parse(filename)

library = idc.GetInputFile().lower()

functions_map = { }

for lf in functions:
    functions_map[lf.name] = lf

assigned = 0
not_assigned = 0
    
for ea in Functions(0, 0xFFFFFFFF):
    function_object = idaapi.get_func(ea)
    if not function_object:
        continue
def main(unused_argv):
    if len(unused_argv) != 1:
        raise Exception('Problem with flags: %s' % str(unused_argv))

    try:
        assert (FLAGS.mode == 'decode')
    except:
        raise ValueError('mode much be "decode" but now it is %s' %
                         str(FLAGS.mode))
    FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name)
    try:
        assert (os.path.exists(FLAGS.log_root))
    except:
        raise ValueError('Invalid log_root: %s' % str(FLAGS.log_root))
    FLAGS.batch_size = FLAGS.beam_size

    data_manager = BinaryDataManager(binary_file=FLAGS.data_path,
                                     single_pass=True)
    data_manager.load_data()

    # Loading the external information first
    extra_info = {}
    if os.path.exists(FLAGS.external_config):
        external_params = xml_parser.parse(FLAGS.external_config, flat=False)

        if 'sent2vec_params' in external_params:
            sent2vec_params = external_params['sent2vec_params']
            convnet_params = sent2vec_params['convnet_params']
            convnet_model2load = sent2vec_params['model2load']

            gamma = 0.2 if not 'gamma' in sent2vec_params else sent2vec_params[
                'gamma']

            my_convnet = convnet.convnet(convnet_params)
            my_convnet.train_validate_test_init()
            my_convnet.load_params(file2load=convnet_model2load)

            fixed_vars = tf.global_variables()
            fixed_vars.remove(my_convnet.embedding_matrix)

            extra_info['sent2vec'] = {'gamma': gamma, 'network': my_convnet}
            extra_info['fixed_vars'] = fixed_vars

        if 'key_phrases' in external_params:
            # TODO: phrase some parameters to import the results of key-phrase extracted or \
            # parameters for online key-phrase extraction
            extra_info['key_phrases'] = {}
            raise NotImplementedError(
                'Key phrases part has not been implemented yet')

    model_hp_list = [
        'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag',
        'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim',
        'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage',
        'cov_loss_wt', 'pointer_gen'
    ]
    model_hp_dict = {}
    for key, value in FLAGS.__flags.iteritems():
        if key in model_hp_list:
            model_hp_dict[key] = value
    model_settings = namedtuple('HParams',
                                model_hp_dict.keys())(**model_hp_dict)
    model_settings = model_settings._replace(max_dec_steps=1)

    for folder in [
            FLAGS.article_folder, FLAGS.refer_folder, FLAGS.output_folder
    ]:
        if not os.path.exists(folder):
            os.makedirs(folder)

    solver = RunTimeWrapper(hp=FLAGS,
                            model_settings=model_settings,
                            extra_info=extra_info)
    solver.start()
    result2write = ''
    for idx, (article, abstract) in enumerate(data_manager.text_abstract_pair):
        sys.stdout.write(
            'Analysizing the documents %d/%d = %.1f%% \r' %
            (idx + 1, len(data_manager.text_abstract_pair), float(idx + 1) /
             float(len(data_manager.text_abstract_pair)) * 100))
        sys.stdout.flush()
        _, summary = solver.run(query=article)
        abstract = '\n'.join(abstract)
        # Reference and compare
        with open(FLAGS.article_folder + os.sep + '%04d_article.txt' % idx,
                  'w') as fopen:
            fopen.write(article)
        with open(FLAGS.refer_folder + os.sep + '%04d_reference.txt' % idx,
                  'w') as fopen:
            fopen.write(abstract)
        with open(FLAGS.output_folder + os.sep + '%04d_decode.txt' % idx,
                  'w') as fopen:
            fopen.write(summary)
        result2write += '\n\n===\n%s\n\n>>>refer:\n%s\n\n>>>output:\n%s\n' % (
            article, abstract, summary)
        if (idx + 1) % 100 == 0:
            with open('results.txt', 'w') as fopen:
                fopen.write(result2write)
    solver.end()
Пример #14
0
                        type=bool)
    parser.add_argument('--autorships_dump',
                        help='autorships for papers dump file name')
    parser.add_argument('--citations',
                        help='enable to extract citations for papers',
                        default=False,
                        type=bool)
    parser.add_argument('--citatons_dump',
                        help='citations for papers dump file name')
    parser.add_argument('--citations_start',
                        help='citations start point',
                        type=int)
    parser.add_argument('--citations_finish',
                        help='citations finish point',
                        type=int)

    args = parser.parse_args()

    print('Start scraping')
    scrape(args.subject, args.startpage, args.finishpage, args.dump_name)
    print('Scraping finished. Start parsing dump')
    parse(args.dump_name, args.dataset_name)
    print('Finished parsing')

    if args.autorships:
        autorships(args.dataset_name, args.autorships_dump)

    if args.citations:
        citations(args.dataset_name, args.citations_dump, args.citations_start,
                  args.citations_finish)
Пример #15
0
path_xml = '../SemCor/semcor.data.xml'  # path of resources needed to build the mappings among various sense representations.
path_inst2sk = '../SemCor/semcor.gold.key.txt'
path_bn2wn = '../resources/babelnet2wordnet.tsv'
path_bn2lex = '../resources/babelnet2lexnames.tsv'
path_bn2wnd = '../resources/babelnet2wndomains.tsv'

inst2sk_dict = get_dictionary(
    path_inst2sk,
    0)  # collection of the dictionaries to realise those mappings
wn2bn_dict = get_dictionary(path_bn2wn, 1)
bn2lex_dict = get_dictionary(path_bn2lex, 0)
bn2wnDom_dict = get_bn2wnDomains(path_bn2wnd)

inputs, labels_BN, bnIds = parse(
    path_xml, inst2sk_dict, wn2bn_dict, bn2lex_dict,
    'BN')  # parsing of the .xml file for retrieving fine-grained (BN) data
_, labels_WND, wndIds = parse(
    path_xml, inst2sk_dict, wn2bn_dict, bn2wnDom_dict,
    'WND')  # parsing of the .xml file for retrieving coarse-grained (WND) data
_, labels_LEX, lexIds = parse(
    path_xml, inst2sk_dict, wn2bn_dict, bn2lex_dict,
    'LN')  # parsing of the .xml file for retrieving coarse-grained (LEX) data

voc_words = get_vocabulary(inputs, 0.0)  # collect vocabulary of input lemmas

voc_senses_BN = get_vocab_senses(bnIds, 0.0)  # collect vocabulary of FG senses
voc_joint_BN = voc_words + voc_senses_BN  # create joint voc.

voc_senses_WND = get_vocab_senses(wndIds,
                                  0.0)  # collect vocabulary of CG-WND senses
Пример #16
0
        welcome_msg_fg_color = 'red'
        welcome_msg_text += '\nПлохой результат'
    elif normalized_user_result <= 0.75:
        welcome_msg_fg_color = 'blue'
        welcome_msg_text += '\nХороший результат'
    else:
        welcome_msg_fg_color = 'green'
        welcome_msg_text += '\nОтличный результат'

    welcome_msg.config(text=welcome_msg_text, fg=welcome_msg_fg_color, font=resourses.constants.TEST_RESULT_FONT)


if __name__ == '__main__':

    path = 'resourses/questions.xml'
    parsed = xml_parser.parse(path)
    # parsed.popitem()

    root = Tk()
    root.minsize(width=600, height=500)
    # root.maxsize(width=600, height=500)
    root.title('Тестирование')

    main_frame = Frame(root)
    main_frame.config(bg="#FFF")
    main_frame.pack(expand=YES, fill=BOTH)

    # label = Label(f, text=resourses.constants.intro_label)
    # label.config(height=5, width=30)
    # label.config(font=resourses.constants.label_font)
    # label.pack()
def main(unused_argv):
    if len(unused_argv
           ) != 1:  # prints a message if you've entered flags incorrectly
        raise Exception("Problem with flags: %s" % unused_argv)

    # Loading the external information first
    extra_info = {}
    if os.path.exists(FLAGS.external_config):
        external_params = xml_parser.parse(FLAGS.external_config, flat=False)

        if 'sent2vec_params' in external_params:
            sent2vec_params = external_params['sent2vec_params']
            convnet_params = sent2vec_params['convnet_params']
            convnet_model2load = sent2vec_params['model2load']

            gamma = 0.2 if not 'gamma' in sent2vec_params else sent2vec_params[
                'gamma']

            my_convnet = convnet.convnet(convnet_params)
            my_convnet.train_validate_test_init()
            my_convnet.load_params(file2load=convnet_model2load)

            fixed_vars = tf.global_variables()
            fixed_vars.remove(my_convnet.embedding_matrix)

            extra_info['sent2vec'] = {'gamma': gamma, 'network': my_convnet}
            extra_info['fixed_vars'] = fixed_vars

        if 'key_phrases' in external_params:
            # TODO: phrase some parameters to import the results of key-phrase extracted or \
            # parameters for online key-phrase extraction
            extra_info['key_phrases'] = {}
            raise NotImplementedError(
                'Key phrases part has not been implemented yet')

    tf.logging.set_verbosity(
        tf.logging.INFO)  # choose what level of logging you want
    tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode))

    # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary
    FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name)
    if not os.path.exists(FLAGS.log_root):
        if FLAGS.mode == "train":
            os.makedirs(FLAGS.log_root)
        else:
            raise Exception(
                "Logdir %s doesn't exist. Run in train mode to create it." %
                (FLAGS.log_root))

    vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size)  # create a vocabulary

    # If in decode mode, set batch_size = beam_size
    # Reason: in decode mode, we decode one example at a time.
    # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses.
    if FLAGS.mode == 'decode':
        FLAGS.batch_size = FLAGS.beam_size

    # Make a namedtuple hps, containing the values of the hyperparameters that the model needs
    hparam_list = [
        'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag',
        'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim',
        'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage',
        'cov_loss_wt', 'pointer_gen'
    ]
    hps_dict = {}
    for key, val in FLAGS.__flags.iteritems():  # for each flag
        if key in hparam_list:  # if it's in the list
            hps_dict[key] = val  # add it to the dict
    hps = namedtuple("HParams", hps_dict.keys())(**hps_dict)

    # Create a batcher object that will create minibatches of data
    batcher = Batcher(FLAGS.data_path,
                      vocab,
                      hps,
                      single_pass=FLAGS.single_pass)

    tf.set_random_seed(111)  # a seed value for randomness

    if hps.mode == 'train':
        print "creating model..."
        model = SummarizationModel(hps, vocab, extra_info)
        setup_training(model, batcher)
    elif hps.mode == 'eval':
        model = SummarizationModel(hps, vocab, extra_info)
        run_eval(model, batcher, vocab)
    elif hps.mode == 'decode':
        decode_model_hps = hps  # This will be the hyperparameters for the decoder model
        decode_model_hps = hps._replace(
            max_dec_steps=1
        )  # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries
        model = SummarizationModel(decode_model_hps, vocab, extra_info)
        decoder = BeamSearchDecoder(model, batcher, vocab)
        decoder.decode(
        )  # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once)
    else:
        raise ValueError("The 'mode' flag must be one of train/eval/decode")
                        type=int)

    args = parser.parse_args()

    #year = args.year
    #subject = args.subject

    subjects = [
        'AGRI', 'ARTS', 'BIOC', 'BUSI', 'CENG', 'CHEM', 'COMP', 'DECI', 'DENT',
        'EART', 'ECON', 'ENER', 'ENGI', 'ENVI', 'HEAL', 'IMMU', 'MATE', 'MATH',
        'MEDI', 'NEUR', 'NURS', 'PHAR', 'PHYS', 'PSYC', 'SOCI', 'VETE', 'MULT'
    ]

    for subject in subjects:
        for year in range(2000, 2020):
            year_str = str(year)
            dumpName = subject + '_' + year_str + '_' + 'dump'
            datasetName = subject + '_' + year_str + '_' + 'dataset'

            print('Start scraping')
            scrape(dumpName, year_str, subject, args)
            print('Scraping finished. Start parsing dump')
            parse(dumpName, datasetName)
            print('Finished parsing')

    # if args.autorships:
    #     autorships(args.dataset_name, args.autorships_dump, args.apikey)

    # if args.citations:
    #     citations(args.dataset_name, args.citations_dump, args.citations_start, args.citations_finish)
Пример #19
0
        if name in [" r", " s"]:
            # Skip return address and base pointer
            current = arguments
            continue

        current.append({'name': name})

    return (local_variables, arguments)


wrapper = TextWrapper(break_long_words=False, width=70)

filename = os.path.join(
    os.path.split(sys.modules[__name__].__file__)[0], "msdn.xml")
functions = parse(filename)

library = idc.GetInputFile().lower()

functions_map = {}

for lf in functions:
    functions_map[lf.name] = lf

assigned = 0
not_assigned = 0

for ea in Functions(0, 0xFFFFFFFF):
    function_object = idaapi.get_func(ea)
    if not function_object:
        continue
Пример #20
0
import numpy as np
if sys.version_info.major==2:
    import cPickle as pickle
else:
    import pickle

sys.path.insert(0, './util')
sys.path.insert(0, './model')
import convnet
import xml_parser

if len(sys.argv)<2:
    print('Usage: python rouge.py <config>')
    exit(0)

hyper_param=xml_parser.parse(sys.argv[1])

saved_file=hyper_param['saved_file']
refer_folder=hyper_param['refer_folder']
output_folder=hyper_param['output_folder']
refer_suffix=hyper_param['refer_suffix'] if 'refer_suffix' in hyper_param else 'reference'
output_suffix=hyper_param['output_suffix'] if 'output_suffix' in hyper_param else 'decode'

convnet_params=hyper_param['convnet_params']
model2load=hyper_param['model2load']
word2idx_file=hyper_param['word2idx_file']
unk_idx=hyper_param['unk_idx']

# Collect files
refer_name2file={}
output_name2file={}
Пример #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--image_dir',
        type=str,
        required=True,
        help='The path of the folder that stores source images')
    parser.add_argument('-a',
                        '--annotation_dir',
                        type=str,
                        required=True,
                        help='The path of the folder that stores annotations')
    parser.add_argument(
        '-t',
        '--testset',
        action='store_true',
        help='Operate on test set. If unspecified then train set.')
    args = parser.parse_args()

    IMAGE_DIR = args.image_dir
    LABEL_DIR = args.annotation_dir
    CLASSIFICATION_DIR = './classification'
    LOCALIZATION_DIR = './localization'
    CLASSIFICATION_TARGET = ''
    LOCALIZATION_TARGET = ''
    CLASSIFICATION_TEXT = ''
    LOCALIZATION_TEXT = ''

    if args.testset:
        CLASSIFICATION_TARGET = os.path.join(CLASSIFICATION_DIR, 'test')
        LOCALIZATION_TARGET = os.path.join(LOCALIZATION_DIR, 'test')
        CLASSIFICATION_TEXT = os.path.join(CLASSIFICATION_DIR, 'test.txt')
        LOCALIZATION_TEXT = os.path.join(LOCALIZATION_DIR, 'test.txt')
    else:
        CLASSIFICATION_TARGET = os.path.join(CLASSIFICATION_DIR, 'train')
        LOCALIZATION_TARGET = os.path.join(LOCALIZATION_DIR, 'train')
        CLASSIFICATION_TEXT = os.path.join(CLASSIFICATION_DIR, 'train.txt')
        LOCALIZATION_TEXT = os.path.join(LOCALIZATION_DIR, 'train.txt')

    classification_text = open(CLASSIFICATION_TEXT, 'w')
    localization_text = open(LOCALIZATION_TEXT, 'w')
    for parent, dirnames, filenames in os.walk(LABEL_DIR):
        total = len(filenames)
        interval = total / 50
        count = 1
        bar = ''
        for xml_file in filenames:

            objects = xml_parser.parse(os.path.join(LABEL_DIR, xml_file))
            label_vector = extract_multi_label(objects)

            img_file = xml_file.split('.')[0] + '.jpg'
            write_multi_label(classification_text, img_file, label_vector)

            img = cv2.imread(os.path.join(IMAGE_DIR, img_file))

            for key, obj in objects.items():
                curr_img = img[obj['ymin']:obj['ymax'],
                               obj['xmin']:obj['xmax']]
                output_img_name = xml_file.split('.')[0] + '_' + str(
                    key) + '.jpg'

                curr_img = cv2.resize(curr_img, (256, 256))
                cv2.imwrite(os.path.join(LOCALIZATION_TARGET, output_img_name),
                            curr_img)

                write_multi_label(localization_text, output_img_name,
                                  [label_map[obj['type']]])

            if not args.testset:
                curr_img = cv2.resize(img, (256, 256))
                cv2.imwrite(os.path.join(CLASSIFICATION_TARGET, img_file),
                            curr_img)

            curr = int(float(count) / total * 100)
            remain = 50 - len(bar) - 1

            if count < total:
                sys.stdout.write(
                    str(curr) + '% [' + bar + '>' + remain * ' ' + ']\r')
                sys.stdout.flush()
            else:
                sys.stdout.write(str(curr) + '% [' + bar + ']\r')
                sys.stdout.flush()

            count += 1
            if count % interval == 0:
                bar += '='

    print
    classification_text.close()
    localization_text.close()
Пример #22
0
def alojamientos(request):
    titulo = ("Lista de todos los alojamientos de Madrid: ")
    # Muestro el formulario de filtrado y actualizar
    form_filtrar = FormFiltrar()
    form_act = FormActualizar()
    contenido = form_filtrar + form_act

    if request.method == "GET":
        try:
            lista_alojamientos = Alojamiento.objects.all()
            contenido += "<p>"
            for alojamiento in lista_alojamientos:
                #web = unicode.encode(alojamiento.web)
                contenido += "<li>" + str(alojamiento.nombre) + ": " + "<a href='/" + \
                            str(alojamiento.web) + "'>" + str(alojamiento.web) + "</a>"
            contenido += '</p>'
        except Alojamiento.DoesNotExist:
            contenido = ("No hay alojamientos disponibles, debe usted actualizar dichos alojamientos")

    # Si se ha filtrado o actualizado
    elif request.method == "POST":
        contenido = form_filtrar + form_act
        # Filtro los alojamientos
        if request.POST.get("tipo") == "Filtrar":
            alojamientos, num = FiltrarAlojamientos(request)   # Filtro alojamientos
            contenido += ImprimirAloj(request, alojamientos, num)  # Imprimo los alojamientos
        # Guardo los alojamientos
        elif request.POST.get("tipo") == "actualizar":
            lista_alojamientos = xml_parser.parse(1) # Por defecto, 1 es español, 2 es inglés y 3 es francés
            for alojamiento in lista_alojamientos:
                direc = alojamiento['address'] + '. ' + alojamiento['zipcode'] + '. ' + \
                        alojamiento['subAdministrativeArea'] + '. (' + alojamiento['latitude'] + ', ' + \
                        alojamiento['longitude'] + '). ' + alojamiento['country'] + '.'

                alojamiento_nuevo = Alojamiento(nombre=alojamiento['name'], email=alojamiento['email'],
                                        telefono=alojamiento['phone'], descripcion=alojamiento['body'],
                                        web=alojamiento['web'], direccion=direc,
                                        categoria=alojamiento['Categoria'], subcategoria=alojamiento['SubCategoria'])
                alojamiento_nuevo.save()
                # Guardo 5 imagenes
                imagen1, imagen2, imagen3, imagen4, imagen5 = ('', '', '', '', '')
                try:
                    imagenes = alojamiento['imagenes']
                    imagen1 = imagenes.split(' , ')[0]
                    imagen2 = imagenes.split(' , ')[1]
                    imagen3 = imagenes.split(' , ')[2]
                    imagen4 = imagenes.split(' , ')[3]
                    imagen5 = imagenes.split(' , ')[4]
                except IndexError:
                    None

                imagenes_nuevas = Imagenes(alojamiento=alojamiento['name'], url1=imagen1,
                                    url2=imagen2, url3=imagen3, url4=imagen4, url5=imagen5)
                imagenes_nuevas.save()

            return HttpResponseRedirect('/alojamientos') # Actualizo la lista de alojamientos

    else:
        contenido = "ERROR"

    # Para pasar los datos del menu de navegacion asi como el estilo de la pagina escogida
    navegacion = Menu(False)
    try:
        usuarios = ConfiguracionUsuario.objects.all()
        adicional = PagUsers(usuarios)
    except ConfiguracionUsuario.DoesNotExist:
        adicional = ""
    try:
        user = ConfiguracionUsuario.objects.get(user=request.user)
        color = user.color
        letra = user.letra
    except ConfiguracionUsuario.DoesNotExist:
        color, letra = ("", "")
    rendered = Render(request, color, letra, titulo, navegacion, contenido, adicional)
    return HttpResponse(rendered)
Пример #23
0
import logging
from pprint import pprint
from random import randint

import db
import xml_parser

logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
_LOGGER = logging.getLogger(__name__)

data = xml_parser.parse()

for user in data:
    try:
        name = ' '.join([user['u_fname'], user['u_lname']])
        balance = randint(0, 9)
        user_uid = db.add_user(name, balance)
        card_uid = user['u_number']
        db.add_card(user_uid, card_uid)
    except KeyError as e:
        pprint(user)
        raise e
import numpy as np

sys.path.insert(0,'./model')
sys.path.insert(0,'./util')

import data_loader
import data_generator
import embedding_loader
import sentence_extractor
import xml_parser

if len(sys.argv)!=2:
    print('Usage: python sentence_extract_test.py <config>')
    exit(0)

hyper_params=xml_parser.parse(sys.argv[1],flat=False)

# Build word list or entity list
loader_params=hyper_params['loader']
data_loader_params=loader_params['data_loader']
src_folder_list2build_list=loader_params['src_folder_list2build_list']
dest_folder_list2build_list=loader_params['dest_folder_list2build_list'] if 'dest_folder_list2build_list' in loader_params else None
src_folder_list2parse=loader_params['src_folder_list2parse']
dest_folder_list2parse=loader_params['dest_folder_list2parse']
list_saved_format=loader_params['list_saved_format']

my_data_loader=data_loader.data_loader(data_loader_params)
# my_data_loader.build_lists(src_folder_list2build_list,dest_folder_list2build_list,list_saved_format)
my_data_loader.load_dict()
# my_data_loader.build_idx_files(src_folder_list2parse,dest_folder_list2parse)
def main(unused_argv):
    if len(unused_argv) != 1:
        raise Exception('Problem with flags: %s' % str(unused_argv))

    try:
        assert (FLAGS.mode == 'decode')
    except:
        raise ValueError('mode must be "decode" while now it is "%s"' %
                         FLAGS.mode)

    FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name)
    try:
        assert (os.path.exists(FLAGS.log_root))
    except:
        raise ValueError('invalid log_root: %s, file or folder not exists' %
                         str(FLAGS.log_root))
    FLAGS.batch_size = FLAGS.beam_size

    data_manager = BinaryDataManager(binary_file=FLAGS.data_path,
                                     single_pass=True)
    data_manager.load_data()

    model_hp_list = [
        'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag',
        'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim',
        'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage',
        'cov_loss_wt', 'pointer_gen'
    ]
    model_hp_dict = {}
    for key, value in FLAGS.__flags.iteritems():
        if key in model_hp_list:
            model_hp_dict[key] = value
    model_settings = namedtuple('HParams',
                                model_hp_dict.keys())(**model_hp_dict)
    model_settings = model_settings._replace(max_dec_steps=1)

    for folder in [
            FLAGS.article_folder, FLAGS.refer_folder, FLAGS.output_folder
    ]:
        if not os.path.exists(folder):
            os.makedirs(folder)

    # Launch extractive model
    cur_path = os.path.abspath('.')
    FLAGS.sentence_extract_config = os.path.abspath(
        FLAGS.sentence_extract_config)
    os.chdir(FLAGS.sentence_extract_root)
    sys.path.insert(0, 'run')
    sys.path.insert(0, 'util')
    import laucher
    import xml_parser
    laucher_params = xml_parser.parse(FLAGS.sentence_extract_config,
                                      flat=False)
    ext_solver = laucher.laucher(laucher_params)
    ext_solver.start()
    os.chdir(cur_path)

    # Launch abstractive model
    loaded_params = tf.global_variables()
    abs_solver = RunTimeWrapper(hp=FLAGS, model_settings=model_settings)
    abs_solver.start(loaded_params=loaded_params)

    # Preparation
    for folder in [
            FLAGS.article_folder, FLAGS.refer_folder, FLAGS.output_folder
    ]:
        if not os.path.exists(folder):
            os.makedirs(folder)

    result2write = ''
    for idx, (article, abstract) in enumerate(data_manager.text_abstract_pair):
        sys.stdout.write(
            'Analysizing the documents %d/%d = %.1f %%\r' %
            (idx + 1, len(data_manager.text_abstract_pair), float(idx + 1) /
             float(len(data_manager.text_abstract_pair)) * 100))
        sys.stdout.flush()
        try:
            article = article.decode('ascii', errors='ignore').encode('ascii')
            sentence_list = tokenize.sent_tokenize(article)
            tokenized_article = '\n'.join(sentence_list)
            with open('tmp.txt', 'w') as fopen:
                fopen.write(tokenized_article)
            selected_sentence = ext_solver.select('tmp.txt')
            extracted_content = selected_sentence.replace('\n', ' ').lower()
            _, summary = abs_solver.run(query=extracted_content)
            # Reference and compare
            with open(FLAGS.article_folder + os.sep + '%04d_article.txt' % idx,
                      'w') as fopen:
                fopen.write(article)
            with open(FLAGS.refer_folder + os.sep + '%04d_reference.txt' % idx,
                      'w') as fopen:
                fopen.write(abstract)
            with open(FLAGS.output_folder + os.sep + '%04d_decode.txt' % idx,
                      'w') as fopen:
                fopen.write(summary)
            result2write += '\n\n===\n%s\n\n>>>refer:\n%s\n\n>>>output:\n%s\n' % (
                article, abstract, summary)
            if (idx + 1) % 100 == 0:
                with open('results.txt', 'w') as fopen:
                    fopen.write(result2write)
        except:
            print('ERROR while loading document %d' % idx)
            traceback.print_exc()

    ext_solver.end()
    abs_solver.end()