def update_kanji_pos(self): # get all char count self.total_count = self.db.get_count_for_drama( JdsDatabase.get_merged_drama()) # set jlpt position jlpt_dict = self.compute_pos_dict('jlpt') position = 1 for level in range(len(jlpt_dict) - 1, 0, -1): for char in jlpt_dict[level]: char.jlpt_pos = position position += 1 # set jouyou position jouyou_dict = self.compute_pos_dict('jouyou') position = 1 for level in range(1, len(jouyou_dict), 1): for char in jouyou_dict[level]: char.jouyou_pos = position position += 1 # set jdpt position char_per_level = {} for level in range(len(jlpt_dict) - 1, 0, -1): char_per_level[level] = len(jlpt_dict[level]) # find sum of all kanji count sum_all_count = 0 for char_uid in sorted(self.total_count, key=self.total_count.get, reverse=True): if is_kanji(self.chars[char_uid].value): sum_all_count += self.chars[char_uid].count() # set JDPT level position = 1 cur_level = 6 jdpt_limits = [1, 0.99, 0.98, 0.95, 0.9, 0.75, 0.5] cumul_freq = 0 for char_uid in sorted(self.total_count, key=self.total_count.get, reverse=True): if is_kanji(self.chars[char_uid].value): count = self.total_count[char_uid] freq = count / sum_all_count self.chars[char_uid].jdpt_pos = position self.chars[char_uid].freq = freq self.chars[char_uid].cumul_freq = cumul_freq + freq cumul_freq += freq self.chars[char_uid].set_count(count) position += 1 if cumul_freq > jdpt_limits[cur_level]: cur_level -= 1 if cumul_freq < jdpt_limits[cur_level]: self.chars[char_uid].jdpt = cur_level - 1 self.db.push_kanji_pos(self.chars) # set episode and drama frequency char_drama_count = {} char_episode_count = {} results = self.db.get_kanji_count_raw() for result in results: kanji_uid = result['kanji_uid'] if kanji_uid not in char_drama_count: char_drama_count[kanji_uid] = 0 char_episode_count[kanji_uid] = 0 drama_uid = result['drama_uid'] episode_count = result['episode_count'] if drama_uid is 0: continue char_drama_count[kanji_uid] += 1 char_episode_count[kanji_uid] += episode_count num_of_drama = len( self.db.get_all_dramas()) - 1 # -1 due to merge drama num_of_episodes = len(self.db.get_all_episodes_raw()) char_drama_freq = {} char_episode_freq = {} for kanji_uid, count in char_drama_count.items(): char_drama_freq[kanji_uid] = count / num_of_drama for kanji_uid, count in char_episode_count.items(): char_episode_freq[kanji_uid] = count / num_of_episodes self.db.push_drama_and_episode_count(char_drama_freq, char_episode_freq)
if result['flag'] is not 1: continue row = dict() rows[result['kanji_uid']] = row row['kanji'] = chr(result['kanji_uid']) row['jouyou'] = result['jouyou'] row['jouyou_pos'] = result['jouyou_pos'] row['jdpt'] = result['jdpt'] row['jdpt_pos'] = result['jdpt_pos'] row['freq'] = result['freq'] row['cumul_freq'] = result['cumul_freq'] row['drama_freq'] = result['drama_freq'] row['episode_freq'] = result['episode_freq'] for result in kanji_count_results: if result['drama_uid'] is not JdsDatabase.get_merged_drama().uid: continue if result['kanji_uid'] not in rows: continue row = rows[result['kanji_uid']] row['count'] = result['count'] writer.writerows(rows.values()) print("{} ended in {:2.2f}".format(__file__, (time.perf_counter() - start_time))) if settings.enable_profiler: pr.disable() pr.print_stats(sort="cumulative")