def run(): table = get_data(get_data_path('taxonomy.csv')) group_id = species_id = 1 for index in range(1, len(table)): row = table[index] kwargs = { 'id': group_id, 'order': row[5], 'family': row[6].split('(')[0].strip(), 'genus': row[3].split(' ')[0], } if 'en' in get_languages() and '(' in row[6]: kwargs['name_en'] = row[6].split('(')[1][:-1].strip() group, created = SpeciesGroup.objects.get_or_create(**kwargs) if created: group_id += 1 Species.objects.create( id=species_id, include=False, order=species_id, rank=Rank.objects.get(slug=row[1]), group=group, standard_name=row[4], scientific_name=row[3], ) if created: species_id += 1
def init_settings(data_path): global _db_path global _db_conn db_dir = os.path.join(data_path, 'data') _db_path = os.path.join(db_dir, 'settings.sqlite') if not os.path.exists(db_dir): os.makedirs(db_dir) if not os.path.exists(_db_path): _db_conn = sqlite3.connect(_db_path) _db_conn.execute(""" CREATE TABLE IF NOT EXISTS lang_settings( lang VARCHAR(25) UNIQUE NOT NULL, tab_width INTEGER(2) NOT NULL, use_spaces INTEGER(1) NOT NULL ) """) for lang in get_languages(): _db_conn.execute('INSERT INTO lang_settings(\ lang, tab_width, use_spaces)\ VALUES("' + lang + '", 4, 1)') _db_conn.commit() else: _db_conn = sqlite3.connect(_db_path)
def GET(self): i = web.input(q="", limit=5) i.limit = safeint(i.limit, 5) languages = [ lang for lang in utils.get_languages() if lang.name.lower().startswith(i.q.lower()) ] return to_json(languages[:i.limit])
def search_engine_3(data_frame, tfidf_vocabulary, query, vocabulary): se3 = data_frame languages = utils.get_languages(se3) # ask user for the language print("Choose a language from: ") print(languages) l = input("Select language: ") print("You selected: " + l) if (l not in languages): print("Sorry, your language is not in our dataset") # add column with new ranking se3['Similarity'] = se3.apply(utils.cosSim, axis=1, tfidf_vocabulary=tfidf_vocabulary, query=query, vocabulary=vocabulary) # add a column to checking the language se3['sel_lan'] = se3.apply(utils.selected_films, axis=1, language=l) # create tuples with similarity and film id list_tuples = [] for i in range(30000): list_tuples.append( (list(se3[se3['film_id'] == i]['Similarity'])[0], i)) # order tuples and take top 15th heapq.heapify(list_tuples) largest_sim = nlargest(15, list_tuples) # create a data frame with the top 15th results res_sim = pd.DataFrame(columns=[ 'title', 'intro', 'Wikipedia_link', 'Running time', 'Similarity' ]) for elem in largest_sim: res_sim = res_sim.append(se3[se3['film_id'] == elem[1]][[ 'title', 'intro', 'Wikipedia_link', 'Running time', 'Similarity' ]]) # add column ranking running tima res_sim['rank_duration'] = res_sim.apply(utils.R_time_score, axis=1) res_sim.sort_values(by=['rank_duration'], inplace=True, ascending=True) print(res_sim[['title', 'intro', 'Wikipedia_link', 'rank_duration']].to_string())
def GET(self): i = web.input(q="", limit=5) i.limit = safeint(i.limit, 5) languages = [lang for lang in utils.get_languages() if lang.name.lower().startswith(i.q.lower())] return to_json(languages[:i.limit])