def short_uniq_filename_ext(source): ''' Made short uniq filename, need for filesystems with Max Path, and for utilities (like ffmpeg) working reliable only with ASCII filenames ''' ufilename = source dirname, nameext = os.path.split(ufilename) sshortname = '-'.join([trans.trans(nameext[:4]), hash4string(nameext), trans.trans(nameext[-64:])]) return os.path.join(dirname, sshortname)
def run(self): buf, key, rand, queue = self._args sel_a, sel_b, swp_a, swp_b = options.select_alpha, options.select_beta, options.swap_alpha, options.swap_beta while True: plain = trans.trans(buf, dict(zip(key, to_set))) do_break = True for word in words: if word in plain: break else: do_break = False if do_break: queue.put((key, plain)) break moving_indices = from_indices sel_idx = int((len(from_indices)-1)*rand.betavariate(sel_a, sel_b)) select = from_indices[sel_idx] swp_idx = min(len(from_indices)-1, sel_idx + 1 + len(from_indices) * int(rand.betavariate(swp_a, swp_b))) swap = from_indices[swp_idx] if options.verbose: print 'Swap step:' print ''.join(key) print (' '*(select))+'^'+('-'*(swap-select-1))+'^' temp = key[select] key[select] = key[swap] key[swap] = temp if swap == select: print "you're a tool"
def CheckCategory(category): categoryes = Category.objects.all() for categor in categoryes: if (category).lower() == (trans(categor.name, 'slug')).lower(): return categor.id else: return None
def set_yottos_category(self, shop_cat_id, y_cat_id): #~ if len(self.market_offers)==0: #~ if self.market_offers_task.state!='SUCCESS': #~ self.market_offers_task.wait() #~ self.market_offers = self.market_offers_task.result #self.market_offers = adload.get_offers_market_by_id(market_id)['offers'] for x in self.market_offers: if str(x['categoryId']) == shop_cat_id: try: x['vendor'] = self.set_vender(x['vendor']) except: x['vendor'] = 100000 if x['vendor'] == None: x['vendor'] = 100000 title = x['title'] title = title.replace(',', '') title = title.replace('.', '') title = title.replace(' ', '_') x['transTitle'] = trans(unicode(title))[0] x['categoryId'] = y_cat_id x['price'] = float(x['price']) try: self.rynok_db.Products.find_one({'id':x['id']})['id'] except: self.set_market_count() self.set_category_count(int(y_cat_id)) self.rynok_db.Products.update({'id':x['id']},x, upsert=True)
def update_markets_in_campaign(campaign_id): markets_model = MarketsModel() markets_by_campaign = CampaignsModel.adload.get_shops_by_advertise(campaign_id) for market in markets_by_campaign: market['count'] = 0 transformed_title = unicode(market['transformedTitle']) if 'transformedTitle' in market else False if not transformed_title: title = market['title'] if 'title' in market else '' transformed_title = trans(unicode(title.replace("_", "-").replace(".", "-")))[0] transformed_title = re.compile(r'[^\w+]').sub('-', transformed_title) transformed_title = re.compile(r'[-]+').sub('-', transformed_title) market['transformedTitle'] = transformed_title.lower() if ('Categories') in market: categories = market['Categories'] comparison = markets_model.get_comparison(market['id']) for category in categories[:]: yml_category_id = category['id'] if yml_category_id.isdigit(): yml_category_id = int(yml_category_id) for comparison_item in comparison: if yml_category_id == comparison_item['shop_cat_id']: categories.remove(category) markets_model.save(market) CampaignsModel.adload.campaign_add('rynok', campaign_id)
def gen(self, *files): """ Generates the site. `gen` is the most important command available, by default (when no files are specified) it will process the entire www folder contents and apply the transformations available to generate the output static pages in the cache directory. If files are given, then only they will be generated and copied. Files can also contain wildcards, or point to directories. """ self._info('Generating...', 'gen') if self._genlog not in self.tags.infof: self.tags.infof.append(self._genlog) if (len(files) == 0) or ('all' in files): file = None self._info('files: (All Files)', 'gen') else: self._info('files: {}'.format(', '.join(files)), 'gen') try: c = self.config['common'] if 'common' in self.config.keys() else None t = trans(self.dir, self.path, c, self.tags) try: t.run() self._info('Finished transfer. Log written to \'{}\'.'.format(t.log.fname), 'gen') except Exception as e: self._exception('Error during transfer. {}.'.format(type(e).__name__)) except Exception as e: self._exception('Unable to initialise transfer. {}.'.format(type(e).__name__))
def slugify(text): """ "Antyfona na Komunię" -> "antyfona-na-komunie" :param text: :return: """ return trans(text).replace(' ', '-').lower()
def get_sanitized_filename(filename): """ Sanitize the given file name by replacing all special characters with an underscore and keeping the extension if present. """ # 1. discard the directory filename = os.path.basename(filename) # 2. transliterate all special characters to ascii filename = trans(filename).encode('ascii', 'ignore').decode('utf-8', 'ignore') # 3. split into non-empty extension components comp = [s for s in filename.split('.') if s] # 4. replace unwanted characters table = ''.join(c if c.isalnum() else '_' for c in map(chr, range(256))) comp = [s.translate(table) for s in comp] # 5. recompose filename filename = '.'.join(comp) if not filename: filename = 'file_to_analyze' return filename
def run(self): buf, key, rand, queue = self._args sel_a, sel_b, swp_a, swp_b = options.select_alpha, options.select_beta, options.swap_alpha, options.swap_beta while True: plain = trans.trans(buf, dict(zip(key, to_set))) do_break = True for word in words: if word in plain: break else: do_break = False if do_break: queue.put((key, plain)) break moving_indices = from_indices sel_idx = int( (len(from_indices) - 1) * rand.betavariate(sel_a, sel_b)) select = from_indices[sel_idx] swp_idx = min( len(from_indices) - 1, sel_idx + 1 + len(from_indices) * int(rand.betavariate(swp_a, swp_b))) swap = from_indices[swp_idx] if options.verbose: print 'Swap step:' print ''.join(key) print(' ' * (select)) + '^' + ('-' * (swap - select - 1)) + '^' temp = key[select] key[select] = key[swap] key[swap] = temp if swap == select: print "you're a tool"
def get(self, request): authors = [data.author for data in Data.objects.distinct('author')] keys = [trans(author).lower().replace(" ", "") for author in authors] result = dict(zip(keys, authors)) return Response(result)
def find_word(fil, letters): """ find best matching""" with open(fil, 'r') as f: for line in f: for word in line.split(): word2 = trans(word) if remove_vowels(word2.lower()) == letters: print(" ", word, end='')
def save_and_trans(text_dict): #将翻译后的结果保存到数据库中,并在翻译前查询数据库中是否已经有了相应内容之前存储的翻译结果,以提高效率 mids = text_dict.keys() untranslated_mids = [] untranslated_text_list = [] bulk_create_action = [] res = es_translation.mget(index=translation_index_name, doc_type=translation_index_type, body={'ids': mids})['docs'] for r in res: mid = r['_id'] if r.has_key( 'found' ): #{u'_type': u'translation', u'_id': u'xxx', u'found': False或者True, u'_index': u'record'} found = r['found'] if found: translation = r['_source']['translation'] text_dict[mid] = translation else: untranslated_mids.append(mid) untranslated_text_list.append(text_dict[mid]) else: #es表中目前无任何记录 {u'_type': u'translation', u'_id': u'xxx', u'error': u'[record] missing', u'_index': u'record'} untranslated_mids.append(mid) untranslated_text_list.append(text_dict[mid]) if untranslated_mids: count = 1.0 while True: try: trans_result = trans(untranslated_text_list) count = 0 except Exception, e: print e if count == 0: break else: print 'sleep start ...' time.sleep(count) count = count * 1.1 print 'sleep over, next try ...' try: for i in range(len(untranslated_text_list)): mid = untranslated_mids[i] text = trans_result[i] text_dict[mid] = text create_action = {'index': {'_id': mid}} bulk_create_action.extend( [create_action, { 'translation': text }]) save_result = es.bulk(bulk_create_action, index=translation_index_name, doc_type=translation_index_type) if save_result['errors']: print save_result except Exception, e: print e
def clear_local_char_in_column(df, col_name): values_list = df[col_name] clean_values = [] for val in values_list: clean_values.append(t.trans(str(val))) df[col_name] = clean_values return df
def _suggest_search(self, query): # translates national characters into similar sounding latin characters cleaned_query = trans(query) query_encoded = quote(cleaned_query) first_alphanum_char = self._query_first_alpha_num(cleaned_query) path = '/suggests/{0}/{1}.json'.format(first_alphanum_char, query_encoded) url = urljoin(SEARCH_BASE_URI, path) search_results = self._get(url=url, query=query_encoded) return search_results
def _suggest_search(self, query): # translates national characters into similar sounding latin characters cleaned_query = trans(query) query_encoded = quote(cleaned_query) first_alphanum_char = self._query_first_alpha_num(cleaned_query) path = '/suggests/{0}/{1}.json'.format( first_alphanum_char, query_encoded ) url = urljoin(SEARCH_BASE_URI, path) search_results = self._get(url=url, query=query_encoded) return search_results
def lecturer_img(dir_name, img_url): import shutil, os from trans import trans dir_name = trans(dir_name) try: os.mkdir("application/static/img/lecturers/"+dir_name) except FileExistsError: pass shutil.copy(img_url, "application/static/img/lecturers/"+dir_name+"/photo.jpg") new_url = "static/img/lecturers/"+dir_name+"/photo.jpg" return new_url
def test_eng_corpus(filename,number): corpus = read_data(filename) shuffle_len = len(corpus) x_len = list(range(shuffle_len)) shuffle(x_len) x_len = x_len[:number] print('\n TEST will be done for randomly selected 10 cases:\n') corpus = [corpus[int(z)] for z in x_len] result = [count_eng(str(z)[2:-2],trans(str(z)[2:-2])) for z in corpus] acc = sum(result)/len(corpus) print('\n TEST English word detection performance: ', acc, '\n')
def test_trans_corpus(filename,number): corpus = read_data(filename) shuffle_len = len(corpus) x_len = list(range(shuffle_len)) shuffle(x_len) x_len = x_len[:number] print('\n TEST will be done for randomly selected 10 cases:\n') corpus = [corpus[int(z)] for z in x_len] result = [eval_diff(decompose(trans(str(z).split(' ')[0][2:])),decompose(str(z).split(' ')[1][:-2])) for z in corpus] acc = sum(result)/len(corpus) print('\n TEST Transliteration performance: ', acc, '\n')
def trans_bio_data(bio_data): count = 1.0 while True: translated_bio_data = trans(bio_data) if len(translated_bio_data) == len(bio_data): break else: print 'sleep start ...' time.sleep(count) count = count * 1.1 print 'sleep over ...' return translated_bio_data
def test_my_table_complex_register(self): trans.tables['my_complex'] = ({ u'4 5': u'45' }, { u'1': u'11', u'2': u'22', u'4': u'4', u'5': u'5', None: u'-' }) self.assertEqual(trans(u'1 2 3 4 5 6 7 8 9', 'my_complex'), u'11-22---45--------')
def test_my_table_complex(self): my_complex = ({ u'4 5': u'45' }, { u'1': u'11', u'2': u'22', u'4': u'4', u'5': u'5', None: u'-' }) self.assertEqual(trans(u'1 2 3 4 5 6 7 8 9', my_complex), u'11-22---45--------')
def test_my_table_complex(): my_complex = ({ u'4 5': u'45' }, { u'1': u'11', u'2': u'22', u'4': u'4', u'5': u'5', None: u'-' }) assert trans_module.trans(u'1 2 3 4 5 6 7 8 9', my_complex) == u'11-22---45--------'
def t(text): a = trans.trans(text) original_langcode = a[0] x = langcodes.Language.get(original_langcode) lang = x.language_name() if x.region_name() != None: lang += " (" + x.region_name() + ")" if x.script_name() != None: lang += " - " + x.script_name() translated = a[1] output = "Translated from: %s\n" % lang output += "`%s`" % translated return output
def entity(db, type, lang, num, name='', url='', date=datetime.datetime.min): '''Return entity defined by key = (db|type|lang|num). Create one if none exists. If non-keys are empty (e.g. name / url), update them''' num = __clean__(num) name = __clean__(name) url = __clean__(url) e = Entity.selectBy(db=db, type=type, lang=lang, num=num).getOne(None) if e: if not e.name and name: e.set(name=name) if not e.url and url: e.set(url=url) else: if type in ('movie', 'song', 'person'): tr = trans.trans(name, lang) else: tr = '' e = Entity(db=db, type=type, lang=lang, num=num, name=name, tran=tr, url=url, date=date) return e
def searchterm_in_result(search_term, search_result): norm_search_term = sanitize_filename(search_term.lower()) norm_result = sanitize_filename(search_result.lower()) if norm_search_term in norm_result: return True # translates national characters into similar sounding latin characters # For ex. Физрук -> Fizruk search_term_alpha = trans(self.config['searchterm']) if search_term_alpha != search_term and search_term_alpha in norm_result: return True return False
def post(self, request: Request, order_id: str, email: str): documents = Document.objects.filter(order_id=order_id) order = Order.objects.get(id=order_id) documents_to_send = [ document for document in documents if email in document.receivers_emails ] attachments_as_blobs = [ self.bucket.blob(document.blob_name) for document in documents_to_send ] send_log = DocumentSendLog.objects.select_for_update().get( order_id=order_id, email=email ) if not send_log.sent: send_mail( region=order.region, subject=f"Bestellbestätigung für #{order.id}", recipient_list=[email], template="mails/generic.html", context={ "body": ( f"Ihre Unterlagen für die Bestellung #{order.id} finden " f"Sie im Anhang dieser E-Mail" ) }, attachments=[ ( trans(blob.name.split("/")[-1]), blob.download_as_string(), blob.content_type, ) for blob in attachments_as_blobs ], ) send_log.sent = True send_log.save(update_fields=["sent", "updated_at"]) return Response()
def get(self, request, req): # Creating dict, like: {"lukaszpilatowski": "Łukasz Piłatowski"} keys = [] values = list(Data.objects.values_list("author", flat="True").distinct()) for a in values: keys.append(trans(a).lower().replace(" ", "")) dictionary = dict(zip(keys, values)) # If author doesn't exist if not req in dictionary.keys(): content = 'Author does not exist' return Response(content, status=status.HTTP_404_NOT_FOUND) # Change request to authors name qreq = dictionary[req] # 10 most used words in text text = list(Data.objects.values_list("text", flat=True)) word_list = [word for line in text for word in line.split()] counts = OrderedDict(Counter(word_list).most_common(10)) # Creating stats for author words = Data.objects.values_list("text", flat="True").filter(author=qreq) list_of_words = [x.split() for x in words] flat_list = [re.sub(r'[^\w\s]','',item) for sublist in list_of_words for item in sublist] word_counts = Counter(flat_list) y = OrderedDict((k, word_counts[k]) for k in counts.keys()) return Response(y)
def test(s, l, h_m): random.seed(s) for i in range(h_m): msg = 'ABCDEFGHIJKLMNOPQRSTUWXYZ' * random.randint(4, l) msg = list(msg) random.shuffle(msg) msg = ''.join(msg) print(f'Test #{str(i + 1)}: {str(msg[:50])}...') for key in range(1, int(len(msg)/2)): enc = trans.trans(msg, key) dec = decryptTrans.trans(enc, key) if msg != dec: print(f'Missmatch with key: {key} and message: {msg}') print(f'Decrypted as: {dec}') sys.exit() print(f'All Test Passed!')
def gen(self, *files): """ Generates the site. `gen` is the most important command available, by default (when no files are specified) it will process the entire www folder contents and apply the transformations available to generate the output static pages in the cache directory. If files are given, then only they will be generated and copied. Files can also contain wildcards, or point to directories. """ self._info('Generating...', 'gen') if self._genlog not in self.tags.infof: self.tags.infof.append(self._genlog) if (len(files) == 0) or ('all' in files): file = None self._info('files: (All Files)', 'gen') else: self._info('files: {}'.format(', '.join(files)), 'gen') try: c = self.config['common'] if 'common' in self.config.keys( ) else None t = trans(self.dir, self.path, c, self.tags) try: t.run() self._info( 'Finished transfer. Log written to \'{}\'.'.format( t.log.fname), 'gen') except Exception as e: self._exception('Error during transfer. {}.'.format( type(e).__name__)) except Exception as e: self._exception('Unable to initialise transfer. {}.'.format( type(e).__name__))
def transportation(): params = request.get_data() json_data = json.loads(params.decode('utf-8')) metaData = json_data["meta"] # print(metaData) rawMetaData = metaData.replace('#', '\n') filename = './transportation.txt' with open(filename, 'w') as file_object: file_object.write(rawMetaData) dataMax = np.loadtxt(filename, dtype=np.float) # print(dataMax) (width, length) = dataMax.shape # 产地数组 product = ['A' + str(i) for i in range(1, width)] # print(product) # 销地名称数组 sale = ['B' + str(i) for i in range(1, length)] # print(sale) s = [(product[i], dataMax[i, length - 1]) for i in range(width - 1)] d = [(sale[i], dataMax[width - 1, i]) for i in range(length - 1)] # c = dataMax[:width - 1, :length - 1] re = trans(tuple(product), tuple(sale), dataMax) info = getString(s, d) # print(s) info += re result = { 'status': 20000, 'message': '这里你看到的是单纯形法', "info": info, } response = make_response(jsonify(result)) response.headers['Access-Control-Allow-Origin'] = '*' response.headers['Access-Control-Allow-Methods'] = 'OPTIONS,HEAD,GET,POST' response.headers['Access-Control-Allow-Headers'] = 'x-requested-with' response.status = "200" return response
def save_static_page(self): static_pages_model = StaticPagesModel static_page_translitedTitle = request.params.get('translitedTitle', None) static_page_title = request.params.get('title', 'Не заполнено') static_page_content = request.params.get('content', '') new_page = {} if static_page_translitedTitle is not None: new_page = static_pages_model.get_by_translitedTitle(static_page_translitedTitle) if new_page is None: new_page = {} new_page['translitedTitle'] = re.compile(r'[^\w+]').sub('_', trans.trans(static_page_title)[0]) new_page['title'] = static_page_title new_page['content'] = static_page_content static_pages_model.save(new_page) return redirect(url(controller='settings', action='edit_static_page', page=new_page['translitedTitle']))
def test_trans(): res = trans.trans('Hello', 'en', 'ko') print res assert res == u'여보세요'
def test_russian_diphthongs(self): self.assertEqual(trans(u'Юй Икари...'), u'Yuy Ikari...')
if not DB.Products.find_one({'LotID':X.LotID_int}) is None: continue try: # if COUNT >= COUNT_TO_LOAD: # break; QUERY = "select * from LotByCategory where LotID=%s" % X.LotID_int CATEG = CURSOR_LOT.execute(QUERY).fetchone() ITEM = {} ITEM['Title'] = X.Title.decode('cp1251') try: t = unicode(ITEM['Title']) except: t = ITEM['Title'] print t t = '_'.join(t.split()) ITEM['Translited'] = trans(t)[0].strip().replace("(", " ").replace(")", " ").replace(" ", "_").replace("-", "_").replace(",", "").replace(".", "") ITEM['LotID'] = X.LotID_int MarketID = X.MarketID_int ITEM['ShopID'] = MarketID ITEM['ShopName'] = MARKET[MarketID]['Title'] ITEM['ShopTransName'] = trans(MARKET[MarketID]['Translited'])[0].strip().replace("(", " ").replace(")", " ").replace(" ", "_").replace("-", "_").replace(",", "").replace(".", "") ITEM['AdCampaignID'] = X.SellerID_int ITEM['URLconf'] = 2 ITEM['CategorID'] = CATEG.CategoryID ITEM['Weight'] = 0 ITEM['StartDate'] = dt.now() ITEM['Descr'] = X.Description.decode('cp1251') ITEM['Price'] = X.Price.decode('cp1251') ITEM['PriceHistory'] = [{'date':dt.now()}, {'price':X.Price.decode('cp1251')}] ITEM['Currency'] = 'UHR' ITEM['CurrencyArray'] = [{'usd':'8.93'}, {'EUR':'CBRF'}]
def test_my_table_complex(): my_complex = ( {u'4 5': u'45'}, {u'1': u'11', u'2': u'22', u'4': u'4', u'5': u'5', None: u'-'}) assert trans_module.trans(u'1 2 3 4 5 6 7 8 9', my_complex) == u'11-22---45--------'
def translate(s): """Возвращает транслетированую строку""" s = s.replace(" ","_") return trans(s)[0]
def _dictify(card): fields = ['type', 'types', 'name', 'names', 'cmc', 'manaCost', 'loyalty', 'text', 'power', 'toughness', 'hand', 'life'] key = trans(card['name']).lower() value = dict([(k, v) for k, v in card.items() if k in fields]) return key, value
id_set.add(id) break #генерация случайного пола sex = choice(["м","ж"]) #генерация sequence в файле sequence = i #генерация случайного имени в зависимости от пола if sex == "м": first_name = choice(male_first_names) last_name = choice(all_last_names) else: first_name = choice(female_first_names) last_name = choice(all_last_names)+"a" #перевод имени в транслит first_name_translit = trans(first_name) last_name_translit = trans(last_name) #генерация случайной страны country = choice(list(country_city_names.keys())) # генерация случайного города в зависимости от страны city = choice(country_city_names[country].split(",")) # генерация случайной даты date = str(randint(1,31)) + '-' + str(randint(1,12)) + '-' + str(randint(1996,2020)) # генерация случайного email email = ''.join([choice(list(first_name_translit+last_name_translit)) for x in range(randint(4,12))])+"@"+choice(email_domen_names) # генерация транслитной записи - первая буква имени плюс фамилия translit = first_name_translit[0]+"."+last_name_translit # генерация случайного целого значения value_of_type = randint(1,100) result_line = f"{result_pattern}".format(id=id, sequence=sequence, first_name=first_name, last_name=last_name, sex=sex, country=country, city=city, date=date, translit=translit, email=email,
def test_my_table_simple_register(self): trans.tables['my_simple'] = {u'1': u'2', u'2': u'3'} self.assertEqual(trans(u'1 2', 'my_simple'), u'2_3')
def slugify(val): return trans.trans(val, 'slug').lower()
threshold = 0.2 now = datetime.date.today() delta_days = 7 doc_set = {} s_review = sorted(rank_info.items(), key=lambda x: x[1], reverse=True) for k in s_review: str_en = "" print("*** [{}] [{}] {} {} ***".format( k[0], k[1], reivew_db["comment"].find({ "appid": k[0] }).count(), reivew_db["comment"].find({ "appid": k[0], "usr_star": "1" }).count())) for r in reivew_db["comment"].find({"appid": k[0], "usr_star": "1"}): date = timestamp2date(trans(r["language"], r["usr_date"])) date_obj = datetime.datetime.strptime(date, '%Y-%m-%d').date() # if (now-date_obj).days<=delta_days: print("[{}] [{}] {} [{}]".format(r["language"], r["usr_star"], date, r["usr_comment"])) # compat.wait4input() if k[1] > threshold: # str_en+=(removeTags(r["usr_comment"])+" ") if r["usr_comment"] != '': if r["language"] not in doc_set: doc_set[r["language"]] = [] doc_set[r["language"]].append(removeTags(r["usr_comment"])) print("https://play.google.com/store/apps/details?id=" + k[0] + "\n") # print (str_en) # if str_en!='': # doc_set_en.append(str_en)
all_reviews = reivew_db.comment.find({"appid": appid}).count() print( ">> Clustering: {}, [{}] All {} reviews, {} reviews hit keyword.". format( appid, str(round(float(related_reviews) / all_reviews, 3) * 100) + "%", all_reviews, related_reviews)) date = "nil" for review in cluster[appid]: ll += 1 if ll % 40 == 0: compat.wait4input() try: date = convertTimeStamp2Str( trans.trans(review['language'], review['usr_date'])) except: date = "nil" # ll += 1 # if ll % 20 == 0: # t = input() keywords = "" for k in review["key"]: keywords += k keywords += " " print("\t[{}] [{}] [{}] [{}] {}".format(date, review["type"], review["usr_star"], keywords.strip(), review["usr_comment"]))
def main(): for i in sys.argv[1:]: with open(i, 'r') as f: print trans.trans(f.read())
import struct import os from translit import translit_str from trans import trans from math import * import numpy as np import matplotlib.pyplot as plt x = np.arange(-5.0,5.0,0.01) y = 2.0*np.exp(-x/1.1) plt.plot(x,y) plt.show() s= translit_str('This is Проверка'); print(s) s= trans('This is Проверка'); print(s) flist=os.listdir() print(flist) for fl in os.scandir('.'): if fl.is_file(): try: os.rename(fl.name,translit_str(fl.name)) except: print('Не переименовано ',fl.name)
def test_my_table_simple(self): my_simple = {u'1': u'2', u'2': u'3'} self.assertEqual(trans(u'1 2', my_simple), u'2_3')
def get_recommend_follows(task_detail): recommend_results = dict() # daily_interests_list = task_detail['daily_interests'].split(',') monitor_keywords_list = task_detail['monitor_keywords'].split(',') create_time = time.time() if S_TYPE == 'test': create_time = datetime2ts(S_DATE) index_name_list = get_flow_text_index_list(create_time) '''#FB flow_text中没有daily_interests字段 ## 日常兴趣关注 try: query_body = { 'query':{ 'filtered':{ 'filter':{ 'terms':{'daily_interests':daily_interests_list} } } }, # 'sort':{'user_fansnum':{'order':'desc'}}, 'size':DAILY_INTEREST_TOP_USER, '_source':['uid'] } es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body)['hits']['hits'] daily_interest_uid_set = set() for result in es_results: daily_interest_uid_set.add(result['_source']['uid']) daily_interest_uid_list = list(daily_interest_uid_set) es_daily_interests_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\ body={'ids':daily_interest_uid_list})['docs'] nick_name_dict = {} es_daily_interests_results = es_daily_interests_results[:max(NICK_NAME_TOP,len(es_daily_interests_results))] for result in es_daily_interests_results: if result['found'] == True: result = result['_source'] nick_name_dict[result['uid']] = result['nick_name'] else: continue recommend_results['daily_interests'] = nick_name_dict except Exception,e: print e print '没有找到日常兴趣相符的用户' recommend_results['daily_interests'] = {} ''' ## 监测词关注 nest_query_list = [] #文本中可能存在英文或者繁体字,所以都匹配一下 monitor_en_keywords_list = trans(monitor_keywords_list, target_language='en') for i in range(len(monitor_keywords_list)): monitor_keyword = monitor_keywords_list[i] monitor_traditional_keyword = simplified2traditional(monitor_keyword) if len(monitor_en_keywords_list) == len(monitor_keywords_list): #确保翻译没出错 monitor_en_keyword = monitor_en_keywords_list[i] nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_en_keyword+'*'}}) nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_keyword+'*'}}) nest_query_list.append({'wildcard':{'keywords_string':'*'+monitor_traditional_keyword+'*'}}) try: query_body_monitor = { 'query':{ 'bool':{ # 'must':nest_query_list 'should':nest_query_list } }, # 'sort':{'user_fansnum':{'order':'desc'}}, 'size':MONITOR_TOP_USER, '_source':['uid'] } es_results = es_flow_text.search(index=index_name_list,doc_type='text',body=query_body_monitor)['hits']['hits'] monitor_keywords_uid_set = set() for result in es_results: monitor_keywords_uid_set.add(result['_source']['uid']) monitor_keywords_uid_list = list(monitor_keywords_uid_set) es_monitor_keywords_results = es_user_profile.mget(index=profile_index_name,doc_type=profile_index_type,\ body={'ids':monitor_keywords_uid_list})['docs'] nick_name_dict = {} es_monitor_keywords_results = es_monitor_keywords_results[:max(NICK_NAME_TOP,len(es_monitor_keywords_results))] for result in es_monitor_keywords_results: if result['found'] == True: result = result['_source'] nick_name_dict[result['uid']] = result['username'] else: continue recommend_results['monitor_keywords'] = nick_name_dict except Exception,e: print e print '没有找到监测词相符的用户' recommend_results['monitor_keywords'] = {}
def test_my_table_complex(self): my_complex = ({u'4 5': u'45'}, {u'1': u'11', u'2': u'22', u'4': u'4', u'5': u'5', None: u'-'}) self.assertEqual(trans(u'1 2 3 4 5 6 7 8 9', my_complex), u'11-22---45--------')
def write_tweets(keyword, file): global total_polarity, total_subjectivity q = keyword + ' -filter:retweets' df = pd.DataFrame(columns=COLS) # page attribute in tweepy.cursor and iteration for page in tweepy.Cursor(api.search, q, count=5, include_rts=False).pages(5): for status in page: new_entry = [] status = status._json # preprocessing filtered_tweet = textpre.clean_tweets(status['text']) # check whether the tweet is in english or skip to the next tweet if status['lang'] != 'en': filtered_tweet = trans.trans(filtered_tweet) # pass textBlob method for sentiment calculations blob = TextBlob(filtered_tweet) Sentiment = blob.sentiment # seperate polarity and subjectivity in to two variables polarity = Sentiment.polarity subjectivity = Sentiment.subjectivity # new entry append new_entry += [status['id'], status['text'], filtered_tweet, Sentiment, polarity, subjectivity, status['lang']] # calculating total polarity total_polarity += polarity # calculating total subjectivity total_subjectivity += subjectivity # get location of the tweet if possible try: location = status['user']['location'] except TypeError: location = '' new_entry.append(location) try: coordinates = [coord for loc in status['place']['bounding_box']['coordinates'] for coord in loc] except TypeError: coordinates = None new_entry.append(coordinates) single_tweet_df = pd.DataFrame([new_entry], columns=COLS) df = df.append(single_tweet_df, ignore_index=True) df["polarity"] = pd.to_numeric(df["polarity"], errors='coerce') df["subjectivity"] = pd.to_numeric(df["subjectivity"], errors='coerce') csvFile = open(file, 'a', encoding='utf-8') df.to_csv(csvFile, mode='a', columns=COLS, index=False, encoding="utf-8") no_of_rows=len(df) returnlist.append(str(no_of_rows)) avgpol=total_polarity/no_of_rows returnlist.append(str(avgpol)) avgsub= total_subjectivity/no_of_rows returnlist.append(str(avgsub)) if avgpol>0: returnlist.append('Positive') elif avgpol==0: returnlist.append('Neutral') else: returnlist.append('Negative')
def test_my_table_complex_register(self): trans.tables['my_complex'] = ({u'4 5': u'45'}, {u'1': u'11', u'2': u'22', u'4': u'4', u'5': u'5', None: u'-'}) self.assertEqual(trans(u'1 2 3 4 5 6 7 8 9', 'my_complex'), u'11-22---45--------')
def getSubjectVal(self, subject): enSubject = str(trans.trans(unicode(subject))).upper() splitedSubject = enSubject.split() self.subjectVal = self.getSplitedTextValue(splitedSubject)
from trans import trans if __name__ == '__main__': trans()
#coding: utf-8 import sys import os.path from subprocess import * import trans import visual_studio_srm #todo browserの設定を外部で可能にしたい browser = os.getenv('LOCALAPPDATA') + r"\Google\Chrome\Application\chrome.exe"; try: code_name = sys.argv[1] code_name = os.path.abspath(code_name) html_name = u"%s.html" % os.path.splitext(code_name)[0] visual_studio_srm.cmd_for_srm(code_name) out_html_name = trans.trans(html_name) #ブラウザでhtmlを開く Popen([browser,out_html_name]) except Exception, e: print e
def getnbuf(self): return trans(self.getobuf(), self.key)
def test_ansii(self): self.assertEqual(trans(u'qwerty'), u'qwerty') self.assertTrue(isinstance(trans(u'qwerty'), unicode if PY2 else str))
def _find_cards(self, card_name): names = trans(card_name).split("//") return [self.data[n.strip().lower()] for n in names if n.strip().lower() in self.data]
def test_ansii_slug(self): self.assertEqual(trans(u'1 2 3 4 5 \n6 7 8 9', 'slug'), u'1_2_3_4_5__6_7_8_9') self.assertTrue(isinstance(trans(u'qwerty', 'slug'), unicode if PY2 else str))
def test_my_table_simple(): my_simple = {u'1': u'2', u'2': u'3'} assert trans_module.trans(u'1 2', my_simple) == u'2_3'
def test_russian(self): self.assertEqual(trans(u'йцукен'), u'ycuken') self.assertEqual(trans(self.s), self.s_encoded) self.assertTrue(isinstance(trans(self.s), unicode if PY2 else str))
def test_russian_slug(self): self.assertEqual(trans(self.s, 'slug')[-42:-1], u'_c__G__L__Oldi___Skazki_dedushki_vampira_')
def retranslate(): for item in film.Entity.select(): if item.type in ('song', 'movie', 'person'): item.tran = trans.trans(item.name) else: item.tran = ''