def __ners(seq): r = [] for entities in seq: entities = flatten(flatten(entities)) fst = [x for idx, x in enumerate(entities) if idx % 2 == 0] snd = [x for idx, x in enumerate(entities) if (idx + 1) % 2 == 0] if all(x == snd[0] for x in snd): r.append(" ".join(fst)) return unique(r)
def __ners(seq): r = [] for entities in seq: entities = flatten( flatten( entities ) ) fst = [x for idx, x in enumerate(entities) if idx % 2 == 0] snd = [x for idx, x in enumerate(entities) if (idx + 1) % 2 == 0] if all(x==snd[0] for x in snd): r.append( " ".join(fst) ) return unique( r )
async def get(self, user: dict): school_term = await self.fetch_current_school_term(user['escuela']) if not school_term: return {'error': 'No hay un ciclo académico registrado'} projects = flatten(await self.fetch_projects(school_term['id']), {}) for i, project in enumerate(projects): projects[i]['members'] = flatten(await self.fetch_members(project['id'], school_term['id']), {}) return {'projects': projects}
def print_clusters(c, fids): k = len(list(Counter(c).keys())) cluster_size = {cidx: np.where(c == cidx)[0].shape[0] for cidx in range(0,k)} clusters = {} for doc, cid in enumerate(c): if cid == -1: continue if not cid in clusters: clusters[cid] = [] clusters[cid].append( doc ) for cid, docs in list(clusters.items()): if len(docs) < 1: continue collection = [] for doc in docs: collection.append( fids[doc] ) print("Cluster {}".format(cid)) for doc in collection: print(doc) print("-"*40) features = {} for cid, docs in list(clusters.items()): features[cid] = unique(flatten([fids[doc].split("_") for doc in docs])) for cid, docs in list(clusters.items()): print( "{}. {} | {}".format(cid, len(docs), len(features[cid])))
def get(self, title, ner_tag): if ner_tag not in self.CHARACTERISTIC_PROPERTIES.keys(): raise ValueError('NER tag is not supported for entity lookup.') # Prepare title title = title.replace('ue', 'ü').replace('ae', 'ä').replace('ue', 'ü') # Get candidate items candidate_ids = flatten( [self._search_items(x) for x in self._extend_title(title)], True) candidates = self._get_items(candidate_ids) # Remove items from candidates, which do not have any of the # characteristic properties regarding their NER tag present_properties = { item['id']: item['claims'].keys() for item in candidates } characteristic_properties = self.CHARACTERISTIC_PROPERTIES[ner_tag] candidates = [ item for item in candidates if characteristic_properties.intersection(present_properties[ item['id']]) ] # Return candidate with the maximal similarity of its label and the # provided title if candidates: return max(candidates, key=lambda item: NGram.compare( title, item['labels']['en']['value'], N=2))
def _extend_title(title): words = title.split(' ') title_combinations = flatten([ itertools.permutations(words, i + 1) for i in reversed(range(len(words))) ]) return [' '.join(combination) for combination in title_combinations]
def print_clusters(c, fids): k = len(list(Counter(c).keys())) cluster_size = { cidx: np.where(c == cidx)[0].shape[0] for cidx in range(0, k) } clusters = {} for doc, cid in enumerate(c): if cid == -1: continue if not cid in clusters: clusters[cid] = [] clusters[cid].append(doc) for cid, docs in list(clusters.items()): if len(docs) < 1: continue collection = [] for doc in docs: collection.append(fids[doc]) print("Cluster {}".format(cid)) for doc in collection: print(doc) print("-" * 40) features = {} for cid, docs in list(clusters.items()): features[cid] = unique(flatten([fids[doc].split("_") for doc in docs])) for cid, docs in list(clusters.items()): print("{}. {} | {}".format(cid, len(docs), len(features[cid])))
def create_corpus(index, features): dict_path = '/tmp/{}_dict.dict'.format( index ) corpus_path_tfidf = '/tmp/{}_corpus_tfidf.mm'.format( index ) dictionary = None corpus_tfidf = None try: dictionary = corpora.Dictionary.load(dict_path) corpus_tfidf = corpora.MmCorpus.load(corpus_path_tfidf) return dictionary, corpus_tfidf except: print( "no corpus or dictionary found, creating..." ) sents = flatten( features ) freqs = FreqDist(sents) freq_sents = [[token for token in text if freqs[token] > 1] for text in features] dictionary = corpora.Dictionary(freq_sents) dictionary.filter_extremes(no_below=1, no_above=0.8) dictionary.save( dict_path ) corpus = [dictionary.doc2bow(text) for text in freq_sents if text] corpora.MmCorpus.serialize('/tmp/{}_corpus.mm'.format( index ), corpus) tfidf = models.TfidfModel(corpus, normalize=True) corpus_tfidf = tfidf[corpus] corpus_tfidf.save( corpus_path_tfidf ) return dictionary, corpus_tfidf
async def get(self, user: dict): student = int(self.request.match_info['student']) grades = flatten(await self.get_grades(user['escuela'], student), {}) if not grades: return json_response({'message': 'No se encontraron notas a mostrar'}, status=400) return json_response({'grades': grades})
async def get(self, user: dict): students = await self.get_students(user['escuela']) if students: students = flatten(students, {}) school_term = students[0]['ciclo_acad_id'] schedules = flatten(await self.fetch_schedules(school_term), {}) for i, schedule in enumerate(schedules): schedules[i]['str'] = schedule_to_str( schedule['dia_clase'], schedule['hora_comienzo'], schedule['hora_fin']) del school_term for i, student in enumerate(students): students[i]['attendance'] = [] students[i]['total_attendances'] = 0 students[i]['total_non_attendances'] = 0 for schedule in schedules: attendances = await self.fetch_non_attendances( student['id'], schedule['id']) non_attendances = attendances[ 'inasistencias_porcentaje'] or 0 _non_attendances_amount = attendances['inasistencias'] or 0 _total_attendances = attendances['total_asistencias'] or 0 students[i][ 'total_non_attendances'] += _non_attendances_amount students[i]['total_attendances'] += _total_attendances students[i]['attendance'].append({ 'attendances': 100 - non_attendances, 'non_attendances': non_attendances }) del _total_attendances, _non_attendances_amount, non_attendances, attendances return {'students': students, 'schedules': schedules} return {'students': []}
async def map_student(student: dict) -> dict: grades = await self.fetch_grades(school_term['id'], student['id']) grades = flatten(grades, {}) grades = list(map(lambda x: float(x['valor']) if x['valor'] is not None else '-', grades)) final_grade = await self.fetch_final_grade(school_term['id'], student['id']) if final_grade is None: final_grade = '-' grades.append(final_grade) return {**student, 'grades': grades}
def _extend_value(self, root_token): # Use normalized NER, if present if root_token.normalized_ner: return root_token.normalized_ner # Follow compound dependencies compounds = [ dep.dependent for dep in root_token.dependencies(role='governor') if dep.dep == 'compound' ] # Follow nmod dependencies, if root is noun if root_token.pos.startswith('NN'): compounds += flatten(self._extract_nmod_tuples(root_token, 'of')) # Build value string value_tokens = sorted([root_token] + compounds, key=lambda x: x.index) value = ' '.join(token.word for token in value_tokens) return value
def get_semantic_set(from_date, to_date=None): if not to_date: to_date = from_date tn = TextNormalizer() docs = defaultdict(list) for doc in get_days(from_date, to_date): keywords = list(doc["keywords"]) pos = flatten(doc["pos"]) ner = __ners(doc["ner"]) nps = doc["np"] p = [doc["id"], doc["title"]] p += [len(keywords)] + keywords p += [len(pos)] + pos p += [len(ner)] + ner p += [len(nps)] + nps docs[doc["index"]].append(p) return docs
async def _validate_role(name: str, value: str, pos: int, elems: list, dbi: PoolConnectionHolder, user_role: int, self_role: int): if user_role == 4 and self_role != 4: return 'No tienes permisos suficientes para cambiar el rol de este usuario' if int(value) == 4 and self_role != 4: return 'No tienes permisos suficientes para asignar este rol' async with dbi.acquire() as connection: roles = await (await connection.prepare(''' SELECT * FROM rol_usuario ''')).fetch() roles = flatten(roles, {}) if roles else [] error = True for role in roles: if int(value) == role['id']: error = False if error: return '{}: {} no existe...'.format(name, value)
def get_semantic_set(from_date, to_date=None): if not to_date: to_date = from_date tn = TextNormalizer() docs = defaultdict(list) for doc in get_days(from_date, to_date): keywords = list(doc["keywords"]) pos = flatten(doc["pos"]) ner = __ners(doc["ner"]) nps = doc["np"] p = [ doc["id"], doc["title"] ] p += [len(keywords)] + keywords p += [len(pos)] + pos p += [len(ner)] + ner p += [len(nps)] + nps docs[doc["index"]].append( p ) return docs
def __unfold(data): return flatten( [y for _, y in __sort_by_index(data)] )
async def get_roles(self): return flatten(await self.fetch_roles() or [], {})
async def get(self, user: dict): # Validar permisos... if not user['permissions']['ver_reportes_personales'] and self.request.match_info['student_id'] == 'my-own': raise HTTPUnauthorized elif user['permissions']['ver_reportes_personales'] and self.request.match_info['student_id'] != 'my-own': raise HTTPUnauthorized elif not user['permissions']['ver_notas_de_clase'] and not user['permissions']['ver_reportes_personales']: raise HTTPUnauthorized if self.request.match_info['student_id'] == 'my-own': student_id = user['id'] else: student_id = int(self.request.match_info['student_id']) if 'school_term' in self.request.match_info: school_term_id = int(self.request.match_info['school_term']) school_term = await self.school_term_exists(school_term_id, user['escuela']) if not school_term: return json_response({'message': 'Ciclo académico no encontrado'}, status=400) else: # Se encontró el ciclo académico school_term = {'id': school_term_id} else: school_term = await self.fetch_school_term(user['escuela']) if not school_term: return json_response({'message': 'No se encontró un ciclo académico para esta fecha'}, status=400) student = await self.fetch_student(student_id) if not student: return json_response({'message': 'No se encontró al estudiante'}, status=400) grades = await self.fetch_grades(school_term['id'], student['id']) if not grades: return json_response({'message': 'No hay estructura de notas registrada, no hay notas por ver...'}, status=400) final_grade = await self.fetch_final_grade(school_term['id'], student['id']) or '-' result_data = flatten({ 'school_term': school_term, 'student': student, 'grades': grades, 'final_grade': final_grade }, {}) del school_term, student, grades grade_group = list() def find_grade(_grade: dict) -> Union[int, bool]: for _i, _g in enumerate(grade_group): if isinstance(_g, list) and _g[0]['grupo'] == _grade['grupo']: return _i return False for grade in result_data['grades']: if grade['valor'] is None: grade['valor'] = '-' if grade['grupo'] is None: grade_group.append(grade) else: _g_i = find_grade(grade) if _g_i is False: grade_group.append([grade]) else: grade_group[_g_i].append(grade) result_data['grades'] = grade_group return json_response(result_data)
async def get(self, user: dict): # Validar permisos... if not user['permissions'][ 'ver_reportes_personales'] and self.request.match_info[ 'student_id'] == 'my-own': raise HTTPUnauthorized elif user['permissions'][ 'ver_reportes_personales'] and self.request.match_info[ 'student_id'] != 'my-own': raise HTTPUnauthorized elif not user['permissions']['ver_listado_alumnos'] and not user[ 'permissions']['ver_reportes_personales']: raise HTTPUnauthorized if self.request.match_info['student_id'] == 'my-own': student_id = user['id'] else: student_id = int(self.request.match_info['student_id']) if 'school_term_id' in self.request.match_info: school_term_id = int(self.request.match_info['school_term_id']) school_term = await self.school_term_exists( school_term_id, user['escuela'], self.request.app.db) if school_term: school_term = {'id': school_term_id} del school_term_id else: # No se encontró ciclo académico return json_response( {'message': 'Ciclo académico no encontrado'}, status=400) else: school_term = await self.fetch_school_term(user['escuela'], self.request.app.db) if not school_term: # No hay ciclo académico registrado para esta fecha return json_response( { 'message': 'No se encontró un ciclo académico para esta fecha' }, status=400) schedules = await self.fetch_schedules(school_term['id'], self.request.app.db) if not schedules: return json_response({'message': 'No hay horarios disponibles'}, status=400) attendances = dict() for schedule in schedules: attendances[ schedule['id']] = await self.fetch_attendance_for_schedule( student_id, schedule['id'], self.request.app.db) result_data = flatten( { 'school_term': school_term, 'schedules': schedules, 'attendances': attendances }, { 'with_time': True, 'long': True }) result_data['overall'] = {} total_amount, attended = 0, 0 for _i, _s in enumerate(result_data['schedules']): _ni = _i + 1 result_data['overall'][_ni] = list() for _, _s_wa in result_data['attendances'].items(): if _s_wa: for _a in _s_wa: if _a['horario_id'] == _s['id']: total_amount += 1 if _a['asistio']: attended += 1 result_data['overall'][_ni].append(1) else: result_data['overall'][_ni].append(0) for _k, _overall in result_data['overall'].items(): if _overall: result_data['overall'][_k] = int( round(sum(_overall) / len(_overall), 2) * 100) else: result_data['overall'][_k] = 0 if attended != 0 and total_amount != 0: result_data['overall']['average'] = int( round(attended / total_amount, 2) * 100) else: result_data['overall']['average'] = 0 return json_response(result_data, status=200)
def _print_and_store_per_validation_metrics(self, run_info, best_team, teams_population, programs_population): print "\n\n>>>>> Generation: "+str(self.current_generation_)+", run: "+str(run_info.run_id) run_info.train_score_per_validation.append(best_team.fitness_) run_info.test_score_per_validation.append(best_team.score_testset_) run_info.recall_per_validation.append(best_team.extra_metrics_['recall_per_action']) print("\n### Best Team Metrics: "+best_team.metrics()+"\n") older_teams = [team for team in teams_population if team.generation != self.current_generation_] fitness_score_mean = round_value(numpy.mean([team.fitness_ for team in older_teams])) validation_score_mean = round_value(numpy.mean([team.score_testset_ for team in older_teams])) run_info.global_mean_validation_score_per_validation.append(validation_score_mean) print for key in best_team.diversity_: run_info.global_diversity_per_validation[key].append(run_info.global_diversity_per_generation[key][-1]) print str(key)+": "+str(best_team.diversity_[key])+" (global: "+str(run_info.global_diversity_per_generation[key][-1])+")" print "\n### Global Metrics:" run_info.global_mean_fitness_score_per_validation.append(fitness_score_mean) run_info.global_max_fitness_score_per_validation.append(round_value(max([team.fitness_ for team in older_teams]))) print "\nfitness (global): "+str(fitness_score_mean) actions_distribution = Counter([p.action for p in programs_population]) print "\nactions distribution: "+str(actions_distribution) actions_distribution_array = [] for action in range(Config.RESTRICTIONS['total_actions']): if action in actions_distribution: actions_distribution_array.append(actions_distribution[action]) else: actions_distribution_array.append(0) run_info.actions_distribution_per_validation.append(actions_distribution_array) inputs_distribution_per_instruction = Counter() inputs_distribution_per_team = Counter() for team in older_teams: inputs_distribution_per_instruction.update(team.inputs_distribution()) inputs_distribution_per_team.update(list(team.inputs_distribution())) inputs_distribution_per_instruction_array = [] inputs_distribution_per_team_array = [] for value in range(Config.RESTRICTIONS['total_inputs']): if value in inputs_distribution_per_instruction: inputs_distribution_per_instruction_array.append(inputs_distribution_per_instruction[value]) else: inputs_distribution_per_instruction_array.append(0) if value in inputs_distribution_per_team: inputs_distribution_per_team_array.append(inputs_distribution_per_team[value]) else: inputs_distribution_per_team_array.append(0) print "inputs distribution (global, per instruction): "+str(inputs_distribution_per_instruction_array) print "inputs distribution (global, per team): "+str(inputs_distribution_per_team_array) run_info.inputs_distribution_per_instruction_per_validation.append(inputs_distribution_per_instruction_array) run_info.inputs_distribution_per_team_per_validation.append(inputs_distribution_per_team_array) print print "Global Fitness (last 10 gen.): "+str(run_info.global_mean_fitness_per_generation[-10:]) if len(Config.RESTRICTIONS['used_diversities']) > 0: print "Global Diversity (last 10 gen.):" for diversity in Config.RESTRICTIONS['used_diversities']: print "- "+str(diversity)+": "+str(run_info.global_diversity_per_generation[diversity][-10:]) if len(Config.RESTRICTIONS['used_diversities']) > 1: print "Diversity Type (last 10 gen.): "+str(run_info.novelty_type_per_generation[-10:]) avg_team_size = round_value(numpy.mean([len(team.programs) for team in older_teams])) avg_program_with_intros_size = round_value(numpy.mean(flatten([[len(program.instructions) for program in team.programs] for team in older_teams]))) avg_program_without_intros_size = round_value(numpy.mean(flatten([[len(program.instructions_without_introns_) for program in team.programs] for team in older_teams]))) run_info.mean_team_size_per_validation.append(avg_team_size) run_info.mean_program_size_with_introns_per_validation.append(avg_program_with_intros_size) run_info.mean_program_size_without_introns_per_validation.append(avg_program_without_intros_size) print "\nMean Team Sizes: "+str(run_info.mean_team_size_per_validation[-10:]) print "Mean Program Sizes (with introns): "+str(run_info.mean_program_size_with_introns_per_validation[-10:]) print "Mean Program Sizes (without introns): "+str(run_info.mean_program_size_without_introns_per_validation[-10:]) print "\n<<<<< Generation: "+str(self.current_generation_)+", run: "+str(run_info.run_id)
def __unfold(data): return flatten([y for _, y in __sort_by_index(data)])