def __init__(self, dict_, load=False): super().__init__(dict_, load) self.type = self.dict['type'] = 'circle' self.radius = get_from_dict(self.dict, 'radius', default=None, write_default=True) self.center_coord = get_from_dict(self.dict, 'center_coord', default=[0.0, 0.0], write_default=True) if isinstance(self.center_coord, list): self.center_coord = np.array(self.center_coord, dtype=np.float) # By opencv convention, origin is at the top-left corner of the pircture. self.x0 = self.center_coord[0] - self.radius self.x1 = self.center_coord[0] + self.radius self.y0 = self.center_coord[1] - self.radius self.y1 = self.center_coord[1] + self.radius self.xy_range = (self.x0, self.x0, self.x1, self.y1) self.square_min_size = self.width = self.height = 2 * self.radius self.border_region_width = search_dict(self.dict, ['border_region_width'], default=0.03 * self.square_min_size, write_default=True) self.get_random_max = self.get_random_square = self.get_random_max_rectangle self.avoid_border = self.avoid_border_circle self.out_of_region = self.out_of_region_circle self.get_random_xy = self.get_random_xy_circle self.plot_arena = self.plot_arena_plt
def __init__(self, dict_, load=False, options=None): ''' if options is not None: self.receive_options(options) else: raise Exception('Trainer: options is none.') ''' self.dict = dict_ ''' self.epoch_now = get_from_dict(self.dict, 'epoch_now', default=self.epoch_start, write_default=True) self.epoch_start = get_from_dict(self.dict, 'epoch_start', default=1, write_default=True) self.epoch_end = get_from_dict(self.dict, 'epoch_end', default=self.epoch_um, write_default=True) ''' self.epoch_now = 0 #print(self.dict.keys()) self.epoch_num = self.dict['epoch_num'] self.epoch_end = self.epoch_num - 1 # save directory setting self.save_model_path = search_dict(self.dict, ['save_model_path', 'save_dir_model', 'save_path_model'], default='./SavedModels/', write_default=True, write_default_key='save_model_path') #print(self.save_model_path) ensure_path(self.save_model_path) self.save_model = get_from_dict(self.dict, 'save_model', default=True, write_default=True) self.save_after_train = get_from_dict(self.dict, 'save_after_train', default=True, write_default=True) self.save_before_train = get_from_dict(self.dict, 'save_before_train', default=True, write_default=True) if self.save_model: self.save_interval = get_from_dict(self.dict, 'save_model_interval', default=True, write_default=True) self.anal_path = search_dict(self.dict, ['anal_path'], default='./', write_default=True) #print(self.anal_path) ensure_path(self.anal_path)
def __init__(self, dict_, load=False): if options is not None: self.receive_options(options) self.dict = dict_ #set_instance_variable(self, self.dict) self.epoch_num = self.dict['epoch_num'] self.batch_num = self.dict['batch_num'] self.batch_size = self.dict['batch_size'] if not hasattr(self, 'anal_path'): self.anal_path = self.dict.setdefault('anal_path', './anal/') ''' self.epoch_index = get_from_dict(self.dict, 'epoch_index', default=self.epoch_start, write_default=True) self.epoch_start = get_from_dict(self.dict, 'epoch_start', default=1, write_default=True) self.epoch_end = get_from_dict(self.dict, 'epoch_end', default=self.epoch_um, write_default=True) ''' self.epoch_index = 0 self.epoch_end = self.epoch_num - 1 # save directory setting self.save_path = search_dict( self.dict, ['save_path', 'save_model_path', 'save_dir_model'], default='./saved_models/', write_default=True, write_default_key='save_path') ensure_path(self.save_path) self.save = search_dict(self.dict, ['save', 'save_model'], default=True, write_default=True) self.save_after_train = get_from_dict(self.dict, 'save_after_train', default=True, write_default=True) self.save_before_train = get_from_dict(self.dict, 'save_before_train', default=True, write_default=True) self.anal_before_train = get_from_dict(self.dict, 'anal_before_train', default=True, write_default=True) if self.save: self.save_interval = search_dict( self.dict, ['save_interval', 'save_model_interval'], default=int(self.epoch_num / 10), write_default=True) ''' if options is not None: self.options = options self.set_options() ''' self.test_performs = self.dict['test_performs'] = {} self.train_performs = self.dict['train_performs'] = {} self.anal_model = self.dict.setdefault('anal_model', True)
def get_dados_pl(self, json_projeto, projeto, data_projeto, ong_name): """ Returns dictionary with all pl data Args ------- json_projeto: dict -> json returned on specific pl request projeto: str -> PL id ong_name: str -> Ong name Returns -------- dict -> all pl data """ crawl = CrawlSenado() url_pl = (constants.URL_WEB_SENADO + f"web/atividade/materias/-/materia/{projeto}") try: situacao_pl = ( json_projeto['DetalheMateria']['Materia']['SituacaoAtual'] ['Autuacoes']['Autuacao']['Situacao']['DescricaoSituacao']) except TypeError: situacoes = (json_projeto['DetalheMateria']['Materia'] ['SituacaoAtual']['Autuacoes']) situacao_pl = ( situacoes['Autuacao'][0]['Situacao']['DescricaoSituacao']) dados_pl = { "ongName": ong_name, "ementa": utils.get_from_dict(self.campos_banco["ementa"], json_projeto), "tramitacao": crawl.crawl_tramitacao(json_projeto["DetalheMateria"]["Materia"] ["IdentificacaoMateria"]["CodigoMateria"]), "situacao": situacao_pl.lower().capitalize(), "sigla": utils.get_from_dict(self.campos_banco["sigla"], json_projeto), "numero": utils.get_from_dict(self.campos_banco["numero"], json_projeto).strip("0"), "ano": utils.get_from_dict(self.campos_banco["ano"], json_projeto), "data": data_projeto, "urlPL": url_pl, "casa": "Senado" } return dados_pl
def __init__(self, dict_, load=False): super().__init__(dict_, load) #set_instance_variable(self, self.dict, ['width', 'height', 'type']) self.width = self.dict['width'] # maximum rectangle self.height =self.dict['height'] self.type_ = self.type = self.dict['type'] self.center_coord = get_from_dict(self.dict, 'center_coord', default=[0.0, 0.0], write_default=True) #print(self.center_coord) #print(self.width) #print(self.height) # By opencv convention, origin is at the top-left corner of the pircture. self.x0 = self.center_coord[0] - self.width / 2 self.x1 = self.center_coord[0] + self.width / 2 self.y0 = self.center_coord[1] - self.width / 2 self.y1 = self.center_coord[1] + self.width / 2 self.xy_range = (self.x0, self.x0, self.x1, self.y1) self.square_min_size = min(self.width, self.height) self.get_random_max = self.get_random_square = self.get_random_max_rectangle self.edge_num = get_from_dict(self.dict, 'edge_num', default=None, write_default=True) # set self.edge_num if self.edge_num is None: if self.type in ['square', 'rectangle', 'square_max', 'rec_max']: self.edge_num = self.dict['edge_num'] = 4 self.get_random_xy = self.get_random_xy_max elif isinstance(self.type, int): self.edge_num = self.dict['edge_num'] = self.type_ self.get_random_xy = self.get_random_xy_polygon else: raise Exception('Arena_Polygon: Cannot calculate edge_num.') self.border_region_width = search_dict(self.dict, ['border_region_width'], default=0.03 * self.square_min_size, write_default=True) # standardize arena_type str. #print(self.type) if self.type in ['rec_max', 'square_max']: #print('ccc') vertices = np.array([[self.x0, self.y0], [self.x1, self.y0], [self.x1, self.y1], [self.x0, self.y1]]) else: #print('ddd') self.rotate = get_from_dict(self.dict, 'rotate', default=0.0, write_default=True) vertices = get_polygon_regular(edge_num=self.edge_num, square_size=self.square_min_size, direct_offset=self.rotate, center_coord=self.center_coord) self.type = self.dict['type'] = 'polygon' edge_vecs = get_polygon_vecs(vertices) edge_norms, edge_norms_theta = get_polygon_norms(vertices, edge_vecs) set_dict_and_instance_variable(self, self.dict, locals(), keys=['vertices', 'edge_vecs', 'edge_norms', 'edge_norms_theta']) self.out_of_region = self.out_of_region_polygon self.avoid_border = self.avoid_border_polygon self.plot_arena = self.plot_arena_plt
def forward(self, x, step_num=None): # [batch_size, C x H x W] if step_num is None: step_num = self.step_num act_list = [] output_list = [] x = x.view(x.size(0), -1) self.prep_input(x) #self.reset_x(batch_size=x.size(0)) #i_ = self.prep_input(x) # [step_num, batch_size, N_num] s = None h = None for time in range(step_num): state = self.forward_N(s=s, h=h, i=self.get_input(time)) s, u, h, o = get_from_dict(state, ['s', 'u', 'h', 'o']) act_list.append(u) # [batch_size, N_num] output_list.append(o) # [batch_size, output_num] output_list = list(map(lambda x:torch.unsqueeze(x, 1), output_list)) act_list = list(map(lambda x:torch.unsqueeze(x, 1), act_list)) output = torch.cat(output_list, dim=1) # [batch_size, step_num, output_num] act = torch.cat(act_list, dim=1) # [batch_size, step_num, N_num] return { 'output': output, 'act': act }
def save_senado_project(self, projetos, keywords, ong): """ Saves pl from the senate in the database Args ----------- projetos: list of strings -> All projects keywords: list of string -> All keywords from all subjects ong: dict -> Data from ong """ for projeto in projetos: db_data = {} id_projeto = projeto['id'] proj_req = utils.get_request(constants.URL_API_SENADO + f"materia/{id_projeto}").json() ementa = utils.get_from_dict(self.campos_banco["ementa"], proj_req) # ementa = (proj_req['DetalheMateria'] # ['Materia'] # ["DadosBasicosMateria"] # ["EmentaMateria"]) try: codigo_situacao_pl = ( proj_req['DetalheMateria']['Materia']['SituacaoAtual'] ['Autuacoes']['Autuacao']['Situacao']['CodigoSituacao']) except TypeError: situacoes = (proj_req['DetalheMateria']['Materia'] ['SituacaoAtual']['Autuacoes']) codigo_situacao_pl = ( situacoes['Autuacao'][0]['Situacao']['CodigoSituacao']) situacao_arquivada = self.get_codigo_pl_arquivado() senador = Senador() if (utils.search_keyword(ementa, keywords) and situacao_arquivada != codigo_situacao_pl): json_autor = senador.get_dados_autor(proj_req, id_projeto) dados_pl = self.get_dados_pl(proj_req, id_projeto, projeto['data'], ong["Name"]) dados_relator = senador.get_dados_relator(id_projeto) db_data.update(dados_pl) db_data.update(json_autor) db_data.update(dados_relator) el_data = db_data utils.save_projeto_to_db(db_data) pl_datetime = (datetime.strptime(el_data['data'], "%d/%m/%Y")) el_data['data'] = datetime.strftime(pl_datetime, "%Y/%m/%d") el_data['tags_ementa'] = utils.get_tags_from_string(ementa) el_data['tags_tramitacao'] = utils.get_tags_from_string( dados_pl["tramitacao"]) el_data['keywords'] = utils.get_ementa_keyword( keywords, ementa) del el_data['_id'] constants.es.index(index='projects', doc_type='project', body=el_data)
def cal_perform(self, data): x, y = data['input'].to(self.device), data['output'].to(self.device) #x: [batch_size, step_num, input_num] #y: [batch_size, step_num, output_num] result = self.forward(x) output, act = get_from_dict(result, ['output', 'act']) #self.dict['act_avg'] = torch.mean(torch.abs(act)) #print(output.size()) #input() return self.cal_perform_from_output(output, y, act)
def get_coord_from_uf(self, dict_projeto): states_coord = constants.states_coord pl_uf = utils.get_from_dict(self.campos_senador["estado"], dict_projeto) pl_coord = { "coord": { "lat": states_coord[pl_uf]["lat"], "lon": states_coord[pl_uf]["lon"] } } return pl_coord
def build_db_data(self, json_projeto, ong_name, pl_date, url_camara): """ Returns database fields from reporter or deputy Args ------- json_projeto: dict -> json returned on specific pl request ong_name: str -> Ong name pl_date: string -> pl date in "dd/mm/YYYY" format url_deputado: string -> camara api url for deputy request url_camara: string -> camara api url for pl request Returns -------- dict -> all pl data """ crawl = CrawlCamara() db_data = { "ongName": ong_name, "ementa": utils.get_from_dict(self.campos_banco["ementa"], json_projeto), "tramitacao": utils.get_from_dict(self.campos_banco["tramitacao"], json_projeto), "apreciacao": crawl.crawl_apreciacao(json_projeto), "situacao": utils.get_from_dict(self.campos_banco["situacao"], json_projeto), "sigla": utils.get_from_dict(self.campos_banco["sigla"], json_projeto), "numero": utils.get_from_dict(self.campos_banco["numero"], json_projeto), "ano": utils.get_from_dict(self.campos_banco["ano"], json_projeto), "data": pl_date, "urlPL": url_camara, "regime": utils.get_from_dict(self.campos_banco["regime"], json_projeto), "apensados": crawl.crawl_apensados(json_projeto), "casa": "Câmara" } return db_data
def get_dados_autor(self, json_projeto, projeto): """ Returns dictionary with pl author data Args ------- json_projeto: dict -> json returned on specific pl request Returns -------- dict -> all pl author data """ json_autor = {"autor": {}} states_coord = constants.states_coord try: uf = utils.get_from_dict(self.campos_autor["estado"]["uf"], json_projeto) id_autor = utils.get_from_dict(self.campos_autor["id"], json_projeto) url_api_senador = (constants.URL_API_SENADO + f"senador/{id_autor}") json_autor["autor"]["id"] = id_autor json_autor["autor"]["urlParlamentar"] = utils.get_from_dict( self.campos_autor["urlParlamentar"], json_projeto) json_autor["autor"]["urlApiParlamentar"] = url_api_senador json_autor["autor"]["nome"] = utils.get_from_dict( self.campos_autor["nome"], json_projeto) json_autor["autor"]["sexo"] = utils.get_from_dict( self.campos_autor["sexo"], json_projeto) json_autor["autor"]["estado"] = { "uf": uf, "coord": { "lat": states_coord[uf]["lat"], "lon": states_coord[uf]["lon"] } } json_autor["autor"]["siglaPartido"] = utils.get_from_dict( self.campos_autor["siglaPartido"], json_projeto) except KeyError: json_autor["autor"]["nome"] = utils.get_from_dict([ 'DetalheMateria', 'Materia', 'Autoria', 'Autor', 0, 'NomeAutor' ], json_projeto) json_autor["autor"]["urlParlamentar"] = None json_autor["autor"]["sexo"] = None json_autor["autor"]["estado"] = None json_autor["autor"]["siglaPartido"] = None return json_autor
def build_deputado_final(self, json_fields, url_deputado): """ Returns database fields from reporter or deputy Args ------- json_fields: dict -> database fields fom reporter or deputy url_deputado: string -> camara api url for deputy request Returns -------- dict -> all deputy data """ states_coord = constants.states_coord proposicao = url_deputado[1] if url_deputado[0]: req_deputado = utils.get_request(url_deputado[0]) json_deputado = req_deputado.json() dados_deputado = json_deputado["dados"] uf = utils.get_from_dict(self.campos_deputado["estado"]["uf"], json_deputado) id_parlamentar = utils.get_from_dict(self.campos_deputado["id"], json_deputado) dados_deputado = { json_fields["deputado"]: { "id": id_parlamentar, "nome": utils.get_from_dict(self.campos_deputado["nome"], json_deputado).lower().title(), json_fields["urlApiParlamentar"]: utils.get_from_dict( self.campos_deputado["urlApiParlamentar"], json_deputado), json_fields["urlParlamentar"]: (constants.SITE_CAMARA + "deputados/" f"{id_parlamentar}"), "siglaPartido": utils.get_from_dict(self.campos_deputado["siglaPartido"], json_deputado), "urlPartido": utils.get_from_dict(self.campos_deputado["urlPartido"], json_deputado), "estado": { "uf": uf, "coord": { "lat": states_coord[uf]["lat"], "lon": states_coord[uf]["lon"] } }, "sexo": json_deputado["dados"]["sexo"] } } else: url_autores_pl = (constants.URL_API_CAMARA + f"proposicoes/{proposicao}/autores") dados_autores = utils.get_request(url_autores_pl).json() dados_deputado = { json_fields["deputado"]: { "id": None, "nome": dados_autores["dados"][0]["nome"], "siglaPartido": None, "estado": None, "sexo": None, json_fields["urlParlamentar"]: None } } return dados_deputado
def compute_features(sentences, dictionary, type_analysis): """ Compute the features defined in the Class Features() :param sentences: ([[Sentence]]) list of sentences :param dictionary: // :return: computed features (dictionary) """ features_values = {'n_sentences': len(sentences), 'n_tokens': 0} features = Features() for sentence in sentences: features_values['n_tokens'] += len(sentence.tokens) features.max_sentence_trees_depth.append(max_depth(sentence.root)) for token in sentence.tokens: # Features for each token in the sentence if dictionary: features.lexicon_in_dictionary( token, dictionary) # Lessico nel dizionario di demauro features.count_chars_and_tokens( token) # Count character per token and number of tokens features.count_forms_and_lemmas( token) # Features about forms and lemmas features.count_pos_and_dep(token) # Count uPOS, xPOS, dep features.count_lexical_words( token) # Count lexical words (PAROLE PIENE) features.verbal_features(token) # Verbal features features.count_roots(token) features.count_links( token) # Checking number of roots and links per file features.count_subjects( token) # Count preverbal and postverbal subjects features.count_objects( token) # Count preverbal and postverbal objects features.count_prepositional_chain_and_syntagms( token, sentence ) # Count prepositional chains and prepositional syntagms features.count_subordinate_propositions( token, sentence ) # Count subordinate propositions, pre and post verbal subordinates, subordinate chains # Compute type/token ratio on forms and lemmas if type_analysis == 1: if len(features.ttrs_form) > 0: features_values['ttr_form'] = dict_distribution( features.ttrs_form, 'ttr_form') features_values['ttr_lemma'] = dict_distribution( features.ttrs_lemma, 'ttr_lemma') if type_analysis == 0: features_values['ttr_form'] = ratio(len(features.types_form), float(features.n_tok)) features_values['ttr_lemma'] = ratio(len(features.types_lemma), float(features.n_tok)) features_values['tokens_per_sent'] = ratio( features_values['n_tokens'], float(features_values['n_sentences'])) features_values['char_per_tok'] = ratio( features.n_char, float(features.n_tok_no_punct)) # mean chars per token if dictionary: features_values['in_dict'] = ratio(features.in_dict, float(features.n_tok_no_punct)) features_values['in_dict_types'] = ratio( features.in_dict_types, float(len(features.types_lemma))) features_values['in_FO'] = ratio(features.n_FO, float(features.n_tok_no_punct)) features_values['in_AD'] = ratio(features.n_AD, float(features.n_tok_no_punct)) features_values['in_AU'] = ratio(features.n_AU, float(features.n_tok_no_punct)) features_values['in_FO_types'] = ratio( features.n_FO_types, float(len(features.types_lemma))) features_values['in_AD_types'] = ratio( features.n_AD_types, float(len(features.types_lemma))) features_values['in_AU_types'] = ratio( features.n_AU_types, float(len(features.types_lemma))) features_values['upos_dist'] = dict_distribution( features.upos_total, 'upos_dist') # Coarse-grained features_values['xpos_dist'] = dict_distribution( features.xpos_total, 'xpos_dist') # Fine-grained features_values['lexical_density'] = ratio(features.lexical_words, features.n_tok_no_punct) features_values['verbs_mood_dist'] = dict_distribution( features.verbs_mood_total, 'verbs_mood_dist') features_values['verbs_tense_dist'] = dict_distribution( features.verbs_tense_total, 'verbs_tense_dist') features_values['verbs_gender_dist'] = dict_distribution( features.verbs_gender_total, 'verbs_gender_dist') features_values['verbs_form_dist'] = dict_distribution( features.verbs_form_total, 'verbs_form_dist') features_values['verbs_num_pers_dist'] = dict_distribution( features.verbs_num_pers_total, 'verbs_num_pers_dist') # syntactic features features_values['verbal_head_total'] = get_from_dict( features.upos_total, 'VERB') features_values['verbal_head_per_sent'] = ratio( get_from_dict(features.upos_total, 'VERB'), features_values['n_sentences']) # For documents features_values['verbal_root_total'] = features.n_verbal_root features_values['verbal_root_perc'] = ratio( features.n_verbal_root, features.n_root) # For documents features_values['avg_token_per_clause'] = ratio(features.n_tok, features.n_verb) features_values['avg_links_len'] = ratio(features.total_links_len, features.n_links) features_values['max_links_len'] = features.max_links_len features_values['avg_max_links_len'] = ratio( features.max_links_len, features_values['n_sentences']) features_values['avg_max_depth'] = ratio( sum(features.max_sentence_trees_depth), len(features.max_sentence_trees_depth)) # Documents features_values['dep_dist'] = dict_distribution(features.dep_total, 'dep_dist') features_values['dep_total'] = [('dep_total_' + x, y) for x, y in sorted( features.dep_total.items(), key=operator.itemgetter(1), reverse=True)] features_values['subj_pre'] = ratio( features.n_subj_pre, features.n_subj_pre + features.n_subj_post) features_values['subj_post'] = ratio( features.n_subj_post, features.n_subj_pre + features.n_subj_post) features_values['obj_pre'] = ratio( features.n_obj_pre, features.n_obj_pre + features.n_obj_post) features_values['obj_post'] = ratio( features.n_obj_post, features.n_obj_pre + features.n_obj_post) features_values['n_prepositional_chains'] = features.n_prepositional_chain features_values['avg_prepositional_chain_len'] = ratio( features.total_prepositional_chain_len, features.n_prepositional_chain) features_values['prepositional_chain_total'] = sorted( { 'prep_total_' + str(i): features.prep_chains.count(i) for i in set(features.prep_chains) }.items(), key=operator.itemgetter(1), reverse=True) features_values['prepositional_chain_distribution'] = sorted( { 'prep_dist_' + str(i): features.prep_chains.count(i) / float(features.n_prepositional_chain) for i in set(features.prep_chains) }.items(), key=operator.itemgetter(1), reverse=True) features_values['subordinate_chains_total'] = sorted( { 'subordinate_total_' + str(i): features.subordinate_chains.count(i) for i in set(features.subordinate_chains) }.items(), key=operator.itemgetter(1), reverse=True) features_values['subordinate_chains_distribution'] = sorted( { 'subordinate_dist_' + str(i): features.subordinate_chains.count(i) / float(features.n_subordinate_chain) for i in set(features.subordinate_chains) }.items(), key=operator.itemgetter(1), reverse=True) features_values[ 'total_subordinate_proposition'] = features.n_subordinate_proposition features_values['total_subordinate_chain'] = features.n_subordinate_chain features_values[ 'total_subordinate_chain_len'] = features.total_subordinate_chain_len features_values['avg_subordinate_chain_len'] = ratio( features.total_subordinate_chain_len, features.n_subordinate_chain) features_values['principal_proposition_dist'] = ratio( features.n_verb - features.n_subordinate_proposition, features.n_verb) features_values['subordinate_proposition_dist'] = ratio( features.n_subordinate_proposition, features.n_verb) features_values['subordinate_pre'] = ratio( features.n_subordinate_pre, features.n_subordinate_proposition) features_values['subordinate_post'] = ratio( features.n_subordinate_post, features.n_subordinate_proposition) features_values['verb_edges_dist'] = [ ('verb_edges_dist_' + str(k), v) for k, v in dict_distribution(features.verb_edges_total, '') ] # Arità totale features_values['avg_verb_edges'] = ratio(features.total_verb_edges, features.n_verb) # Arità media return features_values