def do(self, search, page): """Search a term in the Whoosh index.""" aborted_search = False results = [] num_results = 0 total_time = 0 PER_PAGE = 100 start_time = time.time() if search.has_invalid_search_term: aborted_search = True pagination = None glossary = None else: g = Glossary(search.source) g.search() glossary = g.get_results() raw_results = search.get_results() num_results = raw_results.scored_length() if len(raw_results) > 0: url = request.url o = urllib.parse.urlparse(url) url = '?' + o.query pagination = Pagination(PER_PAGE, len(raw_results), url, page) start = (pagination.page - 1) * PER_PAGE end = start max_end = start + PER_PAGE if num_results - start < max_end: end += num_results - start else: end += PER_PAGE for i in range(start, end): results.append(self.get_result(raw_results[i])) pages = pagination.pages else: pagination = None pages = 0 total_time = time.time() - start_time ctx = { 'source': search.source, 'target': search.target, 'project': search.project, 'num_results': num_results, 'time': "{:.2f}".format(total_time), 'aborted_search': aborted_search, 'glossary': glossary, 'pages': pages, 'results': results, } return ctx
def do(self, search): """Search a term in the Whoosh index.""" aborted_search = False results = [] num_results = 0 total_time = 0 PER_PAGE = 100 g = Glossary(search.source) g.search() glossary = g.get_results() if search.has_invalid_search_term: aborted_search = True pagination = None else: start_time = time.time() raw_results = search.get_results() total_time = time.time() - start_time num_results = raw_results.scored_length() if len(raw_results) > 0: url = request.url.encode('utf-8') o = urlparse(url) url = '?' + o.query pagination = Pagination(PER_PAGE, len(raw_results), url) start = (pagination.page - 1) * PER_PAGE end = start max_end = start + PER_PAGE if num_results - start < max_end: end += num_results - start else: end += PER_PAGE for i in xrange(start, end): results.append(self.get_result(raw_results[i])) else: pagination = None ctx = { 'source': search.source, 'target': search.target, 'project': search.project, 'results': results, 'num_results': num_results, 'time': "{:.2f}".format(total_time), 'aborted_search': aborted_search, 'glossary': glossary, 'pagination': pagination, } env = Environment(loader=FileSystemLoader('./')) template = env.get_template('templates/search_results.html') r = template.render(ctx).encode('utf-8') return r
def do(self, search): """Search a term in the Whoosh index.""" aborted_search = False results = [] num_results = 0 total_time = 0 PER_PAGE = 100 g = Glossary(search.source) g.search() glossary = g.get_results() if search.has_invalid_search_term: aborted_search = True pagination = None else: start_time = time.time() raw_results = search.get_results() total_time = time.time() - start_time num_results = raw_results.scored_length() if len(raw_results) > 0: url = request.url o = urllib.parse.urlparse(url) url = '?' + o.query pagination = Pagination(PER_PAGE, len(raw_results), url) start = (pagination.page - 1) * PER_PAGE end = start max_end = start + PER_PAGE if num_results - start < max_end: end += num_results - start else: end += PER_PAGE for i in range(start, end): results.append(self.get_result(raw_results[i])) else: pagination = None ctx = { 'source': search.source, 'target': search.target, 'project': search.project, 'results': results, 'num_results': num_results, 'time': "{:.2f}".format(total_time), 'aborted_search': aborted_search, 'glossary': glossary, 'pagination': pagination, } env = Environment(loader=FileSystemLoader('./')) template = env.get_template('templates/search_results.html') r = template.render(ctx).encode('utf-8') return r
def process_projects(src_directory, glossary_description, glossary_file): corpus = Corpus(src_directory) corpus.process() reference_sources = ReferenceSources() reference_sources.read_sources() metrics = Metrics() metrics.create(corpus) # Select terms MAX_TERMS = 5000 sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get, reverse=True) # Developer report glossary_entries = OrderedDict() translations = Translations() selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency for term in selected_terms: glossary_entries[term] = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources) dev_glossary_serializer = DevGlossarySerializer() dev_glossary_serializer.create(u"dev-" + glossary_file + ".html", glossary_description, corpus, glossary_entries, reference_sources) # User report glossary_entries = [] selected_terms = sorted(sorted_terms_by_tfxdf[:MAX_TERMS]) # Sorted by term glossary = Glossary(glossary_description) for term in selected_terms: glossary_entry = GlossaryEntry( term, translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources) ) glossary.entries.append(glossary_entry) glossary_entries = glossary.get_dict() process_template('templates/userglossary-html.mustache', glossary_file + ".html", glossary_entries) process_template('templates/userglossary-csv.mustache', glossary_file + ".csv", glossary_entries) generate_database(glossary, glossary_file)
def process_projects(src_directory, glossary_description, glossary_file): corpus = Corpus(src_directory) corpus.process() reference_sources = ReferenceSources() reference_sources.read_sources() metrics = Metrics() metrics.create(corpus) # Select terms MAX_TERMS = 1000 sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get, reverse=True) # Developer report glossary_entries = OrderedDict() translations = Translations() selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency for term in selected_terms: glossary_entries[ term] = translations.create_for_word_sorted_by_frequency( corpus.documents, term, reference_sources) dev_glossary_serializer = DevGlossarySerializer() dev_glossary_serializer.create(u"dev-" + glossary_file + ".html", glossary_description, corpus, glossary_entries, reference_sources) # User report glossary_entries = [] selected_terms = sorted( sorted_terms_by_tfxdf[:MAX_TERMS]) # Sorted by term glossary = Glossary(glossary_description) for term in selected_terms: glossary_entry = GlossaryEntry( term, translations.create_for_word_sorted_by_frequency( corpus.documents, term, reference_sources)) glossary.entries.append(glossary_entry) glossary_entries = glossary.get_dict() process_template('templates/userglossary-html.mustache', glossary_file + ".html", glossary_entries) process_template('templates/userglossary-csv.mustache', glossary_file + ".csv", glossary_entries)
def process_projects(): global glossary_file global glossary_description corpus = Corpus(src_directory) corpus.process() reference_sources = ReferenceSources() reference_sources.read_sources() metrics = Metrics() metrics.create(corpus) # Select terms MAX_TERMS = 1000 sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get, reverse=True) # Developer report glossary_entries = OrderedDict() translations = Translations() selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency for term in selected_terms: glossary_entries[term] = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources) dev_glossary_serializer = DevGlossarySerializer() dev_glossary_serializer.create(u"dev-" + glossary_file + ".html", glossary_description, corpus, glossary_entries, reference_sources) # User report glossary_entries = [] selected_terms = sorted(sorted_terms_by_tfxdf[:MAX_TERMS]) # Sorted by term glossary = Glossary() glossary.description = glossary_description for term in selected_terms: glossary_entry = GlossaryEntry() glossary_entry.source_term = term glossary_entry.translations = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources) glossary.entries.append(glossary_entry) user_glossary_serializer = UserGlossarySerializer() user_glossary_serializer.create(glossary_file, glossary.get_dict(), reference_sources)
def __init__(self): super().__init__() self._page_time_sleep = 1 self._detail_page_time_sleep = 0.500 self._start_page_number = 0 self._page_limit = 20 self._glossary = Glossary() self._data_output = [] self._url = 'https://kinozal-tv.appspot.com/' self._request_header = {'user-agent': 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0', 'Charset': 'utf-8'}
except: print 'Warning: module "psyco" not found.' t0 = time.time() try: dicPath = sys.argv[1] except: dicPath ='quick_eng-persian-e0.3.txt' try: wordsFilePath = sys.argv[2] except: wordsFilePath = dicPath[:-4]+'-words.txt' g = Glossary() g.readTabfile(dicPath) g.checkUnicode() #g.faEdit() #words = g.takeOutputWords() #wordsFile = open(wordsFilePath, "w") #print len(words),"words found. writing to file..." #wordsFile.write( string.join(words,"\n") ) #del wordsFile wordsFile = open(wordsFilePath, "r") g2 = g.reverseDic(wordsFile, {'matchWord':True}) g2.writeTabfile() print 'About',int(time.time()-t0) ,'seconds left.'
#!/usr/bin/python import sys sys.path.append("/usr/share/pyglossary/src") from glossary import Glossary import time t0 = time.time() dicPath = sys.argv[1] g = Glossary() g.read(dicPath) words = g.takeOutputWords({"minLen": 4, "noEn": True}) wordsFile = open(dicPath[:-4] + "-words.tab.txt", "w") print(len(words), "words found. writing to file...") wordsFile.write(string.join(words, "\t#\n") + "\tNothing\n") wordsFile.close() print("%f seconds left." % (time.time() - t0))
def glossary_search_api(): source = request.args.get('source') glossary = Glossary(source) glossary.search() return Response(glossary.get_json(), mimetype='application/json')
print 'Using module "psyco" to reduce execution time.' psyco.bind(Glossary) except: print 'Warning: module "psyco" not found.' t0 = time.time() try: dicPath = sys.argv[1] except: dicPath = 'quick_eng-persian-e0.3.txt' try: wordsFilePath = sys.argv[2] except: wordsFilePath = dicPath[:-4] + '-words.txt' g = Glossary() g.readTabfile(dicPath) g.checkUnicode() #g.faEdit() #words = g.takeOutputWords() #wordsFile = open(wordsFilePath, "w") #print len(words),"words found. writing to file..." #wordsFile.write( string.join(words,"\n") ) #del wordsFile wordsFile = open(wordsFilePath, "r") g2 = g.reverseDic(wordsFile, {'matchWord': True}) g2.writeTabfile() print 'About', int(time.time() - t0), 'seconds left.'
#!/usr/bin/python import sys sys.path.append('/usr/share/pyglossary/src') from glossary import Glossary g1 = Glossary() g2 = Glossary() g1.read(sys.argv[1]) g2.read(sys.argv[2]) gm = g1.merge(g2) gm.writeTabfile()
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._glossary = Glossary() self._delimiter = ';'
options_vbox.pack_start(hbox, 0, 0) ## options_exp.add(options_vbox) self.vbox.pack_start(options_exp, 0, 0) #### button_close = self.add_button(gtk.STOCK_CLOSE, 0) button_replace_all = self.add_button('Replace All', 0) button_replace_all.set_image( gtk.image_new_from_stock(gtk.STOCK_FIND_AND_REPLACE, gtk.ICON_SIZE_BUTTON)) button_replace = self.add_button('Replace', 0) button_replace.set_image( gtk.image_new_from_stock(gtk.STOCK_FIND_AND_REPLACE, gtk.ICON_SIZE_BUTTON)) button_find = self.add_button(gtk.STOCK_FIND, 0) self.action_area.set_homogeneous(False) #### self.vbox.show_all() def onDeleteEvent(self, widget, event): self.hide() return True ## Warn when replacing in all entries, and show number of occurrences if __name__ == '__main__': from glossary import Glossary glos = Glossary() DbEditorFindDialog(glos).run()
import sys sys.path.append('/usr/share/pyglossary/src') from glossary import Glossary import time try: import psyco print 'Using module "psyco" to reduce execution time.' usePsyco = True except: print 'Warning: module "psyco" not found' usePsyco = False t0 = time.time() dicPath=sys.argv[1] g = Glossary() g.read(dicPath) if usePsyco: psyco.bind(Glossary, 100) words = g.takeOutputWords({'minLen':4, 'noEn':True}) wordsFile = open(dicPath[:-4]+"-words.tab.txt", "w") print len(words),"words found. writing to file..." wordsFile.write( string.join(words,'\t#\n')+'\tNothing\n' ) wordsFile.close() print '%f seconds left.' %(time.time()-t0)
#!/usr/bin/python import sys sys.path.append('/usr/share/pyglossary/src') from glossary import Glossary g = Glossary() g.read(sys.argv[1]) g.writeTabfile()