def get_vocab_from_selector(selector_id): jpDB = Japanese_DB_handler() f = jpDB.base_format data = jpDB.get_item_by_id(f.kanjis, selector_id, f.kanjis.name) if not data: return None selector_name = data[0] item_to_get = www_config.get_vocab_format_including_id() vocab_list = jpDB.list_word_by_kanjis(selector_name, *item_to_get) return vocab_list
def get_selector_name_from_id(cls, selector_id): jpDB = Japanese_DB_handler() f = jpDB.base_format table = f.get_table(cls.sub_url) field = f.get_field(cls.sub_url, 'name') data = jpDB.get_item_by_id(table, selector_id, field) if data: return data[0]
def parse(inputFile, outputDir): # getting configuration and BD. db_handler = Japanese_DB_handler() #config_data = _parseConf() config_data = configuration.get_configuration() if not config_data : log.error("couldn't find get configuration data") return copyfile(inputFile, _generateFileName(config_data.input_files_bk, 'input')) f = db_handler.base_format existing_kanjis = db_handler.list(f.vocab, f.vocab.word) potentialErrors = [] newEntriesList = [] # Parsing input file. with open(inputFile, 'r') as fin: for row in csv.reader(fin, delimiter=' '): # usefull to just get half of the list # but question are not necessrely before awnser # we forced japanese as row[0] word = row[0] meaning = row[1] prononciation = row[2] if row[2] else '' exemple = '' if word not in existing_kanjis : newEntriesList.append(['','',word, prononciation, meaning, exemple]) else : log.error('already exists : '+word) nb_of_files = len(newEntriesList)//100 if len(newEntriesList)%100 != 0 : nb_of_files += 1 outputDir += '/' for nb in range(1, nb_of_files+1, 1): fileName = _generateFileName(outputDir, "int", str(nb)) with open(fileName, 'w') as fout: writer = csv.writer(fout, delimiter= ' ') writer.writerow(['categorie','tag','word','prononciation','meaning','exemple']) for entry in newEntriesList[100 * (nb - 1) : 100 * nb] : writer.writerow(entry) fileName = _generateFileName(outputDir, "int", '_pottentialErrors') with open(fileName, 'w') as fout: writer = csv.writer(fout, delimiter= ' ') for error in potentialErrors: writer.writerow(error) log.error(error) return
def hello(): jpDB = Japanese_DB_handler() stat_dict = jpDB.get_db_stat() output = '' for list_name, list_info in stat_list_names.items(): most_used, number = list_info output += template('list_test', rows=stat_dict[most_used], list_name=list_name, number=stat_dict[number]) return output
def categorie_page(categorie_id): jpDB = Japanese_DB_handler() f = jpDB.base_format checked_cat_id = categorie_id if jpDB.check_categorie_existence( categorie_id) else None if checked_cat_id: item_to_get = (f.vocab.word, f.vocab.prononciation, f.vocab.meaning, f.vocab.example) vocab_rows = jpDB.list_word_by_categorie(categorie_id, *item_to_get) else: vocab_rows = () output = template('categorie', name=checked_cat_id, rows=vocab_rows) return output
def list_cat_tag_from_csv_files(*input_file_list, log_info=False): csv_all_cat = set() # all cat found in csv csv_all_tag = set() # all tag found in csv csv_existing_cat = set() # cat found in csv files already existing in DB. csv_existing_tag = set() # tag found in csv files already existing in DB. csv_new_cat = set() # new cat found in at least on csv file. csv_new_tag = set() # new tag found in at least on csv file. jpDB = Japanese_DB_handler() f = jpDB.base_format jpDB_cat = set(jpDB.select(f.categories, f.categories.name)) jpDB_tag = set(jpDB.select(f.tags, f.tags.name)) for input_file in input_file_list: found_cat, found_tag = _list_cat_tag_process_single_file(input_file) csv_all_cat.update(found_cat) csv_all_tag.update(found_tag) csv_existing_cat = csv_all_cat & jpDB_cat csv_existing_tag = csv_all_tag & jpDB_tag csv_new_cat = csv_all_cat - csv_existing_cat csv_new_tag = csv_all_tag - csv_existing_tag if log_info: _log_separator('categories') log.info('1 : existing categories : ') _log_set(csv_existing_cat) log.info(' ') log.info('2 : existing categories : ') _log_set(csv_new_cat) log.info(' ') _log_separator('tags') log.info('1 : existing tags : ') _log_set(csv_existing_tag) log.info(' ') log.info('2 : existing tags : ') _log_set(csv_new_tag) log.info(' ') return csv_existing_cat, csv_new_cat, csv_existing_tag, csv_new_tag
def words_page(): item_to_get = www_config.get_vocab_format_including_id() vocab_list = Japanese_DB_handler().list_all_words(*item_to_get) name = application_title + ", words" css_file = main_css body = header_kioku() body += create_add_word_page() body += list_vocabulary(www_config.get_vocab_format_as_string(), vocab_list) data = page_base_structure(name, css_file, body) return data
def add_word_status(method='GET'): print('aaaaaa') #word = request.forms.get('word') #prononciation = request.forms.get('prononciation') #meaning = request.forms.get('meaning') #example = request.forms.get('example') #categorie = request.forms.get('categorie') #tag = request.forms.get('tag') status = Japanese_DB_handler().add_single_word( request.GET.word, request.GET.prononciation, request.GET.meaning, request.GET.example, request.GET.categorie, request.GET.tag) name = application_title + ': add new word, status' css_file = main_css body = header_kioku() body += template('add_word_status', request.GET.word, status) data = page_base_structure(name, css_file, body) return data
def _add_vocab_fromCsv_dir(csv_file_list, add_categories, add_tags): cat_in_csv = set() tag_in_csv = set() vocab_entries = set() error_entries = [] fields = [ 'categorie', 'tag', 'word', 'prononciation', 'meaning', 'example' ] delimiter = _get_delimiter() delimiter = " " for file in csv_file_list: with open(file, 'r') as csv_file: reader = csv.DictReader(csv_file, fieldnames=fields, delimiter=delimiter) for row in reader: if row['tag'] != 'tag': if "ERROR" in row['tag']: error_entries.append(_format_row(row)) else: cat_in_csv.add(row['categorie']) tag_in_csv.add(row['tag']) vocab_entries.add(_format_row(row)) jpDb = Japanese_DB_handler() if add_categories: jpDb.add_categories(*tuple(cat_in_csv), silent=True) if add_tags: jpDb.add_tags(*tuple(tag_in_csv), silent=True) status = jpDb.add_vocab(*tuple(vocab_entries)) return status, error_entries
def get_word_data(word_id): jpDB = Japanese_DB_handler() return jpDB.get_word_info(word_id)
def get_selector_list_data(): jpDB = Japanese_DB_handler() sel_list = jpDB.list_categorie_by_usage(include_id=True) sel_number = jpDB.count(jpDB.base_format.categories) return sel_list, sel_number
import os import sys import csv from japanese.Japanese_DB_handler import Japanese_DB_handler import configuration as configuration logging.basicConfig() log = logging.getLogger() log.setLevel(logging.DEBUG) output_dir = sys.argv[1] if not os.path.exists(output_dir): log.error('directory not found : ' + str(output_dir)) sys.exit(1) jpDB = Japanese_DB_handler() f = jpDB.base_format config_data = configuration.get_configuration() if not config_data: log.error("couldn't find get configuration data") sys.exit(1) cat_dir = {} tag_dir = {} cat_list = jpDB.list_categorie_by_usage() tag_list = jpDB.list_tag_by_usage() for cat, _ in cat_list: if not cat: continue
def categories_page(): jpDB = Japanese_DB_handler() data = jpDB.list_categorie_by_usage() output = template('full_list', rows=data, list_name='categories') return output
def update_name(orig_name, new_name): return Japanese_DB_handler().edit_cat(orig_name, new_name)
def get_selector_list_data(): jpDB = Japanese_DB_handler() sel_list = jpDB.list_core_p_by_usage(include_id=True) sel_number = jpDB.count(jpDB.base_format.core_prononciations) return sel_list, sel_number
def stat_test(): jpDB = Japanese_DB_handler() print(jpDB) print(jpDB.base_format) stat_dict = jpDB.get_db_stat() return str(dict(stat_dict['most_used_categories']))
def _get_DB_format(): global db_format if not db_format: db_format = Japanese_DB_handler().base_format return db_format
def parse(inputFile, outputDir): # getting configuration and BD. db_handler = Japanese_DB_handler() #config_data = _parseConf() config_data = configuration.get_configuration() if not config_data: log.error("couldn't find get configuration data") return copyfile(inputFile, _generateFileName(config_data.input_files_bk, 'input')) f = db_handler.base_format existing_kanjis = db_handler.list(f.vocab, f.vocab.word) potentialErrors = [] newEntriesList = [] # Parsing input file. with open(inputFile, 'r') as fin: for row in csv.reader(fin, delimiter=' '): # usefull to just get half of the list # but question are not necessrely before awnser # we forced japanese as row[0] if not _is_cjk(row[0][0]): continue japanese = row[0] french = row[1] # print(japanese) # 3 cases : # 1, juste kana # 2, a bunch of kanji and kana prononciation # 3, 2 + a sentence exemple. # 1) no kanjis status = True if ' ' not in japanese: word = japanese prononciation = '' exemple = '' else: potentialKanjis, afterKanjis = japanese.split(' ', 1) # remove trailing spaces. afterKanjis = _delTrailingSpaces(afterKanjis) if afterKanjis[:2] == 'する': potentialKanjis += ' (する)' afterKanjis = _delTrailingSpaces(afterKanjis[2:]) if afterKanjis[:2] == 'な ': potentialKanjis += ' (な)' afterKanjis = _delTrailingSpaces(afterKanjis[1:]) # x) Potentials errors : Full phrase. if len(potentialKanjis) > 7: log.error('potential error :' + potentialKanjis) status = False potentialErrors.append(row) # 2) just kanjis and prononciation elif ' ' not in afterKanjis: word = potentialKanjis prononciation = _delTrailingSpaces(afterKanjis) exemple = '' # 3) kanjis prononciation and exemple else: word = potentialKanjis prononciation, exemple = afterKanjis.split(' ', 1) prononciation = _delTrailingSpaces(prononciation) exemple = _delTrailingSpaces(exemple) if status and word not in existing_kanjis: newEntriesList.append( ['', '', word, prononciation, french, exemple]) else: log.error('already exists : ' + word) nb_of_files = len(newEntriesList) // 100 if len(newEntriesList) % 100 != 0: nb_of_files += 1 outputDir += '/' for nb in range(1, nb_of_files + 1, 1): fileName = _generateFileName(outputDir, "int", str(nb)) with open(fileName, 'w') as fout: writer = csv.writer(fout, delimiter=' ') writer.writerow([ 'categorie', 'tag', 'word', 'prononciation', 'meaning', 'exemple' ]) for entry in newEntriesList[100 * (nb - 1):100 * nb]: writer.writerow(entry) fileName = _generateFileName(outputDir, "int", '_pottentialErrors') with open(fileName, 'w') as fout: writer = csv.writer(fout, delimiter=' ') for error in potentialErrors: writer.writerow(error) log.error(error) return
def _jpdb(): global _jpdb_object if not _jpdb_object: _jpdb_object = Japanese_DB_handler() return _jpdb_object