def ensure_person(person): profiler_start("Ensuring person %s for repository %d", (person.name, self.repo_id)) printdbg("DBContentHandler: ensure_person %s <%s>", (person.name, person.email)) cursor = self.cursor name = to_utf8(person.name) email = person.email if email is not None: email = to_utf8(email).decode("utf-8") cursor.execute(statement( "SELECT id from people where name = ?", self.db.place_holder), (to_utf8(name).decode("utf-8"),)) rs = cursor.fetchone() if not rs: p = DBPerson(None, person) cursor.execute(statement(DBPerson.__insert__, self.db.place_holder), (p.id, to_utf8(p.name).decode("utf-8"), email)) person_id = p.id else: person_id = rs[0] profiler_stop("Ensuring person %s for repository %d", (person.name, self.repo_id), True) return person_id
def save_word2vec_format(self, fname, fvocab=None, binary=False): """ Store the input-hidden weight matrix in the same format used by the original C word2vec-tool, for compatibility. """ if fvocab is not None: logger.info("Storing vocabulary in %s" % (fvocab)) with utils.smart_open(fvocab, 'wb') as vout: for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count): vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count))) logger.info("storing %sx%s projection weights into %s" % (len(self.vocab), self.layer1_size, fname)) assert (len(self.vocab), self.layer1_size) == self.syn0.shape with utils.smart_open(fname, 'wb') as fout: fout.write(utils.to_utf8("%s %s\n" % self.syn0.shape)) # store in sorted order: most frequent words at the top for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count): row = self.syn0[vocab.index] if binary: fout.write(utils.to_utf8(word) + b" " + row.tostring()) else: fout.write( utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))
def login_bind(self, account, platform_id, remark='0'): '''send player account to zqb account center''' def on_response(user_data, res): succeed = False if not res.error: succeed = True ret = json.loads(res.body) if str(ret['status']) == '200': self.log_info("bind account success:(%s)(%s)" % (user_data, ret)) elif str(ret['status']) == '501': self.log_warning("repeat bind:(%s)(%s)" % (user_data, ret)) else: succeed = False if not succeed: self.log_error("bind account error:(%s), %s, %s" % (user_data, res.error, res.body)) account = utils.to_utf8(account) remark = utils.to_utf8(remark) url = GameCenterMixin.get_bind_url(account, platform_id, remark) user_data = { 'account': account, 'platform_id': platform_id, 'remark': remark } self.request_get(url, {}, on_response, user_data)
def parse(cls, selector): with contextlib.closing(StringIO()) as result: if type(selector) == dict: for k, v in selector.items(): result.write('%s:(%s)' % (to_utf8(k), cls.parse(v))) elif type(selector) in (list, tuple): result.write(','.join(map(cls.parse, selector))) else: result.write(to_utf8(selector)) return result.getvalue()
def __init__(self, id, uri, name, type): if id is None: self.id = DBRepository.id_counter DBRepository.id_counter += 1 else: self.id = id self.uri = to_utf8(uri) self.name = to_utf8(name) self.type = to_utf8(type)
def __init__(self, id, commit): if id is None: self.id = DBLog.id_counter DBLog.id_counter += 1 else: self.id = id self.rev = to_utf8(commit.revision) self.committer = None self.author = None self.date = commit.date self.message = to_utf8(commit.message) self.composed_rev = commit.composed_rev
def get_verify_code_url(account, platformid, phone, code, remark='0'): account = utils.to_utf8(account) thirdcode = "zqb" + str(platformid) sysparam = GameCenterMixin.get_sys_param() params = [ account, thirdcode, phone, code, GameCenterMixin.APPID, urllib.quote(utils.to_utf8(remark)), sysparam ] uri = '/'.join(params) sign_str = ''.join(params) sign = hashlib.md5(sign_str + GameCenterMixin.SECRET_KEY).hexdigest().lower() return GameCenterMixin.VERIFY_CODE_URL + uri + '/' + sign
def save_cat2vec_format(self, fname): """ Store cat vectors """ logger.info("storing %sx%s projection weights into %s" % (self.cat_len, self.layer1_size, fname)) assert (self.cat_len, self.layer1_size) == self.cats.shape with utils.smart_open(fname, 'wb') as fout: fout.write(utils.to_utf8("#cats_len: %d\n#size:%d\n" % self.cats.shape)) fout.write(utils.to_utf8("#sg:%d\n#hs:%d\n#negative:%d\n#cbow_mean:%d\n" % (self.sg,self.hs,self.negative,self.cbow_mean))) for cat_id in self.cat_no_hash.keys(): row = self.cats[self.cat_no_hash[cat_id]] fout.write(utils.to_utf8("%s\t%s\n" % (cat_id, ' '.join("%f" % val for val in row))))
def write_headers(self, num_docs, num_terms, num_nnz): self.fout.write(MmWriter.HEADER_LINE) if num_nnz < 0: # we don't know the matrix shape/density yet, so only log a general line logger.info("saving sparse matrix to %s" % self.fname) self.fout.write(utils.to_utf8(' ' * 50 + '\n')) # 48 digits must be enough for everybody else: logger.info("saving sparse %sx%s matrix with %i non-zero entries to %s" % (num_docs, num_terms, num_nnz, self.fname)) self.fout.write(utils.to_utf8('%s %s %s\n' % (num_docs, num_terms, num_nnz))) self.last_docno = -1 self.headers_written = True
def __iter__(self): try: if self.cache == 0 and self._save: for sts in self.cache_list: yield sts elif self.cache == 1 and self._save: self.strf.seek(0, 0) greader = gzip.GzipFile(filename='test', mode='rb', fileobj=self.strf) with closing(greader): for line in greader: yield line.split() elif self.cache == 2 and self._save: greader = gen_open(self.path, mode='rb') with closing(greader): for line in greader: yield line.split() else: self._save = True for doc in self.corpus: for pos, sentence in enumerate(doc.to_sentences()): if True: ts = analyzer.tokenStream("dummy", StringReader(str(sentence))) #offsetAtt = ts.addAttribute(OffsetAttribute.class_) termAtt = ts.addAttribute(CharTermAttribute.class_) ts.reset() ##Resets this stream to the beginning. (Required buf = [] while ts.incrementToken(): buf.append(to_utf8(termAtt.toString())) else: buf = [to_utf8(word.lower().strip()) for word in sentence.split() if word.isalpha()] if self.cache == 0: self.cache_list.append(buf) elif self.cache == 1 or self.cache == 2: self.file_writer.write(' '.join(buf) + '\n') yield buf self.file_writer.close() except Exception as inst: print 'error in Disk125', type(inst) print inst.args print self.fname print inst #print sentence #print [to_utf8(word.lower().strip()) for word in sentence.split() if word.isalpha()] #print doc finally: if self._save: pass
def modify(self, dn, mod_type=None, attrs=None): """ Modify a record """ if self.read_only: msg = 'Running in read-only mode, modification is disabled' logger.info(msg) return msg utf8_dn = to_utf8(dn) res = self.search(base=utf8_dn, scope=self.BASE) attrs = attrs and attrs or {} if res['exception']: return res['exception'] if res['size'] == 0: return 'LDAPDelegate.modify: Cannot find dn "%s"' % dn cur_rec = res['results'][0] mod_list = [] msg = '' for key, values in attrs.items(): values = map(to_utf8, values) if mod_type is None: if cur_rec.get(key, ['']) != values and values != ['']: mod_list.append((self.REPLACE, key, values)) elif cur_rec.has_key(key) and values == ['']: mod_list.append((self.DELETE, key, None)) else: mod_list.append((mod_type, key, values)) try: connection = self.connect() new_rdn = attrs.get(self.rdn_attr, [''])[0] if new_rdn and new_rdn != cur_rec.get(self.rdn_attr)[0]: new_utf8_rdn = to_utf8('%s=%s' % (self.rdn_attr, new_rdn)) connection.modrdn_s(utf8_dn, new_utf8_rdn) old_dn_exploded = self.explode_dn(utf8_dn) old_dn_exploded[0] = new_utf8_rdn utf8_dn = ','.join(old_dn_exploded) connection.modify_s(utf8_dn, mod_list) except ldap.INVALID_CREDENTIALS, e: e_name = e.__class__.__name__ msg = '%s No permission to modify "%s"' % (e_name, dn)
def insert(self, base, rdn, attrs=None): """ Insert a new record """ if self.read_only: msg = 'Running in read-only mode, insertion is disabled' logger.info(msg) return msg msg = '' dn = to_utf8('%s,%s' % (rdn, base)) attribute_list = [] attrs = attrs and attrs or {} for attr_key, attr_val in attrs.items(): if isinstance(attr_val, str) or isinstance(attr_val, unicode): attr_val = [x.strip() for x in attr_val.split(';')] if attr_val != ['']: attr_val = map(to_utf8, attr_val) attribute_list.append((attr_key, attr_val)) try: connection = self.connect() connection.add_s(dn, attribute_list) except ldap.INVALID_CREDENTIALS, e: e_name = e.__class__.__name__ msg = '%s No permission to insert "%s"' % (e_name, dn)
def create_dealer_index_xychart(title,labels,score,mark_value=None,format='{value|1}',fontAngle=0,Scale=100): new_labels = [truncate_hanzi(label,25) for label in labels] colors = BASE_COLOR chart_height = 60+20*len(new_labels) c = XYChart(400, chart_height) title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12) title.setMargin2(20, 0, 10, 30) c.setBackground(c.linearGradientColor(0, 0, 0, c.getHeight(), '0xFEFEFE', '0xFFFFFF'),'0X666666') title_height = 0 c.addLine(20, title_height, c.getWidth() - 21, title_height, '0xffffff') plot_height = chart_height-30 c.setPlotArea(70, 50, 270, plot_height, -1, -1, Transparent, '0xffffff') layer = c.addBarLayer3(score, colors) # layer.setBorderColor(Transparent, softLighting(Right)) layer.setAggregateLabelFormat(format) layer.setAggregateLabelStyle("simsun.ttc", 8) xAxis = c.xAxis() xAxis.setLabels(new_labels) c.yAxis().setColors(Transparent) c.yAxis2().setColors(Transparent) c.xAxis().setTickColor(Transparent) c.xAxis().setLabelStyle("simsun.ttc", 9, 0x0, fontAngle) c.yAxis().setLabelStyle("simsun.ttc", 9) c.yAxis2().setLabelStyle("simsun.ttc", 9) c.yAxis().setLinearScale(0,Scale) c.packPlotArea(20, title_height + 15, c.getWidth() - 30, c.getHeight() - 15) return c.makeChart2(PNG)
def __insert_many(self): if not self.actions and not self.commits: return cursor = self.cursor if self.actions: actions = [(a.id, a.type, a.file_id, a.commit_id, a.branch_id) \ for a in self.actions] profiler_start("Inserting actions for repository %d", (self.repo_id,)) cursor.executemany(statement(DBAction.__insert__, self.db.place_holder), actions) self.actions = [] profiler_stop("Inserting actions for repository %d", (self.repo_id,)) if self.commits: commits = [(c.id, c.rev, c.committer, c.author, c.date, \ to_utf8(c.message).decode("utf-8"), c.composed_rev, \ c.repository_id) for c in self.commits] profiler_start("Inserting commits for repository %d", (self.repo_id,)) cursor.executemany(statement(DBLog.__insert__, self.db.place_holder), commits) self.commits = [] profiler_stop("Inserting commits for repository %d", (self.repo_id,)) profiler_start("Committing inserts for repository %d", (self.repo_id,)) self.cnn.commit() profiler_stop("Committing inserts for repository %d", (self.repo_id,))
def search( self , base , scope , filter='(objectClass=*)' , attrs=[] , bind_dn='' , bind_pwd='' ): """ The main search engine """ result = { 'exception' : '' , 'size' : 0 , 'results' : [] } filter = to_utf8(filter) try: connection = self.connect(bind_dn=bind_dn, bind_pwd=bind_pwd) if connection is None: result['exception'] = 'Cannot connect to LDAP server' return result try: res = connection.search_s(base, scope, filter, attrs) except ldap.PARTIAL_RESULTS: res_type, res = connection.result(all=0) except ldap.REFERRAL, e: connection = self.handle_referral(e) try: res = connection.search_s(base, scope, filter, attrs) except ldap.PARTIAL_RESULTS: res_type, res = connection.result(all=0) for rec_dn, rec_dict in res: # When used against Active Directory, "rec_dict" may not be # be a dictionary in some cases (instead, it can be a list) # An example of a useless "res" entry that can be ignored # from AD is # (None, ['ldap://ForestDnsZones.PORTAL.LOCAL/DC=ForestDnsZones,DC=PORTAL,DC=LOCAL']) # This appears to be some sort of internal referral, but # we can't handle it, so we need to skip over it. try: items = rec_dict.items() except AttributeError: # 'items' not found on rec_dict continue for key, value in items: if not isinstance(value, str): try: for i in range(len(value)): value[i] = from_utf8(value[i]) except: pass rec_dict['dn'] = from_utf8(rec_dn) result['results'].append(rec_dict) result['size'] += 1
def __init__(self, id, name): if id is None: self.id = DBTag.id_counter DBTag.id_counter += 1 else: self.id = id self.name = to_utf8(name)
def write_headers(self, num_docs, num_terms, num_nnz): self.fout.write(MmWriter.HEADER_LINE) if num_nnz < 0: # we don't know the matrix shape/density yet, so only log a general line logger.info("saving sparse matrix to %s" % self.fname) self.fout.write( utils.to_utf8(' ' * 50 + '\n')) # 48 digits must be enough for everybody else: logger.info( "saving sparse %sx%s matrix with %i non-zero entries to %s" % (num_docs, num_terms, num_nnz, self.fname)) self.fout.write( utils.to_utf8('%s %s %s\n' % (num_docs, num_terms, num_nnz))) self.last_docno = -1 self.headers_written = True
def save_doc2vec_format(self, fname): """ Store the input-hidden weight matrix in the same format used by the original C word2vec-tool, for compatibility. """ logger.info("storing %sx%s projection weights into %s" % (self.sents_len, self.layer1_size, fname)) assert (self.sents_len, self.layer1_size) == self.sents.shape with utils.smart_open(fname, 'wb') as fout: fout.write(utils.to_utf8("%s %s\n" % self.sents.shape)) # store in sorted order: most frequent words at the top for sent_no in xrange(self.sents_len): row = self.sents[sent_no] fout.write( utils.to_utf8("sent_%d %s\n" % (sent_no, ' '.join("%f" % val for val in row))))
def __getitem__(self, key): key = to_utf8(key) if key in self.cache: return self.cache[key] value = self.converter(self.db[key]) self.cache[key] = value return value
def __init__(self, id, file_name): if id is None: self.id = DBFile.id_counter DBFile.id_counter += 1 else: self.id = id self.file_name = to_utf8(file_name) self.repository_id = None
def __init__(self, id, person): if id is None: self.id = DBPerson.id_counter DBPerson.id_counter += 1 else: self.id = id self.name = to_utf8(person.name) self.email = person.email or None
def _on_notify_url_found(result, ex): if ex: pass else: pay_notice_url = result['url'] ext = utils.to_utf8(result['ext']) or '' self.save_order_notification( app, pay_notice_url, params, functools.partial(on_created, ext))
def calc_sign(self, params): keys = sorted(filter(lambda x: x != "sign", params.keys())) sign_str = '&'.join([ "%s=%s" % (key, to_utf8(params.get(key, ''))) for key in keys if params.get(key) ]) print("sign_str:%s" % sign_str) sign = hashlib.md5(sign_str + self._app['key']).hexdigest() return sign
def __init__(self, id, commit_id, file_id, file_path): if id is None: self.id = DBFilePath.id_counter DBFilePath.id_counter += 1 else: self.id = id self.commit_id = commit_id self.file_id = file_id self.file_path = to_utf8(file_path)
def save_word2vec_format(self, fname, fvocab=None, binary=False, total_vec=None): """ Store the input-hidden weight matrix in the same format used by the original C word2vec-tool, for compatibility. `fname` is the file used to save the vectors in `fvocab` is an optional file used to save the vocabulary `binary` is an optional boolean indicating whether the data is to be saved in binary word2vec format (default: False) `total_vec` is an optional parameter to explicitly specify total no. of vectors (in case word vectors are appended with document vectors afterwards) """ if total_vec is None: total_vec = len(self.vocab) vector_size = self.syn0.shape[1] if fvocab is not None: logger.info("storing vocabulary in %s" % (fvocab)) with utils.smart_open(fvocab, 'wb') as vout: for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count): vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count))) logger.info("storing %sx%s projection weights into %s" % (total_vec, vector_size, fname)) assert (len(self.vocab), vector_size) == self.syn0.shape with utils.smart_open(fname, 'wb') as fout: fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size))) # store in sorted order: most frequent words at the top for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count): row = self.syn0[vocab.index] if binary: fout.write(utils.to_utf8(word) + b" " + row.tostring()) else: fout.write( utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))
def create_simple_xychart(title, labels, data, mark_value=None, format='{value|1}', fontAngle=0, x=560, y=220, swapxy=False, Scale=100): colors = BASE_COLOR c = XYChart(x, y) c.setBackground( c.linearGradientColor(0, 0, 0, c.getHeight(), '0xFEFEFE', '0xFFFFFF'), '0X666666') title_height = 0 c.addLine(20, title_height, c.getWidth() - 21, title_height, '0xffffff') plot_width = 30 + 50 * len(labels) c.setPlotArea(70, 50, plot_width, 170, -1, -1, Transparent, '0xffffff') if swapxy: c.swapXY() title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12) title.setMargin2(20, 0, 10, 30) layer = c.addBarLayer3(data, colors) layer.setBorderColor(Transparent, softLighting(Right)) layer.setAggregateLabelFormat(format) font_size = 8 if fontAngle == 0 else 7 layer.setAggregateLabelStyle("simsun.ttc", font_size) layer.setBarWidth(x, 15) xAxis = c.xAxis() xAxis.setLabels(labels) c.yAxis().setLinearScale(0, Scale) c.yAxis().setColors(Transparent) c.yAxis2().setColors(Transparent) c.xAxis().setTickColor(Transparent) c.xAxis().setLabelStyle("simsun.ttc", 9, 0x0, fontAngle) c.yAxis().setLabelStyle("simsun.ttc", 9) c.yAxis2().setLabelStyle("simsun.ttc", 9) # if mark_value: # markData = [mark_value for i in range(len(data))] # markLayer = c.addBoxWhiskerLayer(None, None, None, None, markData, -1, '0xff0000') c.packPlotArea(20, title_height + 40, c.getWidth() - 30, c.getHeight() - 15) return c.makeChart2(PNG)
def calc_sign(self, params): keys = sorted(filter(lambda x: x != "sign", params.keys())) sign_str = '&'.join([ "%s=%s" % (key, to_utf8(params.get(key, ''))) for key in keys if params.get(key, "") != "" ]) sign = hashlib.md5( (sign_str + self._app['key']).encode('utf-8')).hexdigest() self.log_info("sign_str:%s, sign:%s" % (sign_str + self._app['key'], sign)) return sign
def save_as_text(self, fname, sort_by_word=True): """ Save this Dictionary to a text file, in format: `id[TAB]word_utf8[TAB]document frequency[NEWLINE]`. Sorted by word, or by decreasing word frequency. Note: text format should be use for corpus inspection. Use `save`/`load` to store in binary format (pickle) for improved performance. """ logger.info("saving dictionary mapping to %s", fname) with utils.smart_open(fname, 'wb') as fout: if sort_by_word: for token, tokenid in sorted(iteritems(self.token2id)): line = "%i\t%s\t%i\n" % (tokenid, token, self.dfs.get(tokenid, 0)) fout.write(utils.to_utf8(line)) else: for tokenid, freq in sorted(iteritems(self.dfs), key=lambda item: -item[1]): line = "%i\t%s\t%i\n" % (tokenid, self[tokenid], freq) fout.write(utils.to_utf8(line))
def save_cat2vec_format(self, fname): """ Store cat vectors """ logger.info("storing %sx%s projection weights into %s" % (self.cat_len, self.layer1_size, fname)) assert (self.cat_len, self.layer1_size) == self.cats.shape with utils.smart_open(fname, 'wb') as fout: fout.write( utils.to_utf8("#cats_len: %d\n#size:%d\n" % self.cats.shape)) fout.write( utils.to_utf8( "#sg:%d\n#hs:%d\n#negative:%d\n#cbow_mean:%d\n" % (self.sg, self.hs, self.negative, self.cbow_mean))) for cat_id in self.cat_no_hash.keys(): row = self.cats[self.cat_no_hash[cat_id]] fout.write( utils.to_utf8("%s\t%s\n" % (cat_id, ' '.join("%f" % val for val in row))))
def write_vector(self, docno, vector): """ Write a single sparse vector to the file. Sparse vector is any iterable yielding (field id, field value) pairs. """ assert self.headers_written, "must write Matrix Market file headers before writing data!" assert self.last_docno < docno, "documents %i and %i not in sequential order!" % (self.last_docno, docno) vector = sorted((i, w) for i, w in vector if abs(w) > 1e-12) # ignore near-zero entries for termid, weight in vector: # write term ids in sorted order self.fout.write(utils.to_utf8("%i %i %s\n" % (docno + 1, termid + 1, weight))) # +1 because MM format starts counting from 1 self.last_docno = docno return (vector[-1][0], len(vector)) if vector else (-1, 0)
def save_word2vec_format(self, fname, fvocab=None, binary=False): """ Store the input-hidden weight matrix in the same format used by the original C word2vec-tool, for compatibility. """ if fvocab is not None: logger.info("Storing vocabulary in %s" % (fvocab)) with utils.smart_open(fvocab, 'wb') as vout: for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count): vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count))) logger.info("storing %sx%s projection weights into %s" % (len(self.vocab), self.layer1_size, fname)) assert (len(self.vocab), self.layer1_size) == self.syn0.shape with utils.smart_open(fname, 'wb') as fout: fout.write(utils.to_utf8("%s %s\n" % self.syn0.shape)) # store in sorted order: most frequent words at the top for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count): row = self.syn0[vocab.index] if binary: fout.write(utils.to_utf8(word) + b" " + row.tostring()) else: fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))
def __get_person(self, person): """Get the person_id given a person struct First, it tries to get it from cache and then from the database. When a new person_id is gotten from the database, the cache must be updated """ def ensure_person(person): profiler_start("Ensuring person %s for repository %d", (person.name, self.repo_id)) printdbg("DBContentHandler: ensure_person %s <%s>", (person.name, person.email)) cursor = self.cursor name = to_utf8(person.name) email = person.email if email is not None: email = to_utf8(email).decode("utf-8") cursor.execute( statement("SELECT id from people where name = ?", self.db.place_holder), (to_utf8(name).decode("utf-8"), )) rs = cursor.fetchone() if not rs: p = DBPerson(None, person) cursor.execute( statement(DBPerson.__insert__, self.db.place_holder), (p.id, to_utf8(p.name).decode("utf-8"), email)) person_id = p.id else: person_id = rs[0] profiler_stop("Ensuring person %s for repository %d", (person.name, self.repo_id), True) return person_id if person is None: return None name = to_utf8(person.name) if name in self.people_cache: person_id = self.people_cache[name] else: person_id = ensure_person(person) self.people_cache[name] = person_id return person_id
def __get_person(self, person): """Get the person_id given a person struct First, it tries to get it from cache and then from the database. When a new person_id is gotten from the database, the cache must be updated """ def ensure_person(person): profiler_start("Ensuring person %s for repository %d", (person.name, self.repo_id)) printdbg("DBContentHandler: ensure_person %s <%s>", (person.name, person.email)) cursor = self.cursor name = to_utf8(person.name) email = person.email if email is not None: email = to_utf8(email).decode("utf-8") cursor.execute(statement( "SELECT id from people where name = ?", self.db.place_holder), (to_utf8(name).decode("utf-8"),)) rs = cursor.fetchone() if not rs: p = DBPerson(None, person) cursor.execute(statement(DBPerson.__insert__, self.db.place_holder), (p.id, to_utf8(p.name).decode("utf-8"), email)) person_id = p.id else: person_id = rs[0] profiler_stop("Ensuring person %s for repository %d", (person.name, self.repo_id), True) return person_id if person is None: return None name = to_utf8(person.name) if name in self.people_cache: person_id = self.people_cache[name] else: person_id = ensure_person(person) self.people_cache[name] = person_id return person_id
def save_word2vec_format(self, fname, binary=False): """ Store the input-hidden weight matrix in the same format used by the original C word2vec-tool, for compatibility. """ logger.info("storing %sx%s projection weights into %s" % (len(self.vocab), self.layer1_size, fname)) assert (len(self.vocab), self.layer1_size) == self.syn0.shape with open(fname, 'wb') as fout: fout.write("%s %s\n" % self.syn0.shape) # store in sorted order: most frequent words at the top for word, vocab in sorted(self.vocab.iteritems(), key=lambda item: -item[1].count): word = utils.to_utf8(word) # always store in utf8 row = self.syn0[vocab.index] if binary: fout.write("%s %s\n" % (word, row.tostring())) else: fout.write("%s %s\n" % (word, ' '.join("%f" % val for val in row)))
def write_vector(self, docno, vector): """ Write a single sparse vector to the file. Sparse vector is any iterable yielding (field id, field value) pairs. """ assert self.headers_written, "must write Matrix Market file headers before writing data!" assert self.last_docno < docno, "documents %i and %i not in sequential order!" % ( self.last_docno, docno) vector = sorted((i, w) for i, w in vector if abs(w) > 1e-12) # ignore near-zero entries for termid, weight in vector: # write term ids in sorted order self.fout.write( utils.to_utf8( "%i %i %s\n" % (docno + 1, termid + 1, weight))) # +1 because MM format starts counting from 1 self.last_docno = docno return (vector[-1][0], len(vector)) if vector else (-1, 0)
def create_simple_xychart(title,labels,data,mark_value=None,format='{value|1}',fontAngle=0,x=560,y=220,swapxy=False,Scale=100): colors = BASE_COLOR c = XYChart(x, y) c.setBackground(c.linearGradientColor(0, 0, 0, c.getHeight(), '0xFEFEFE', '0xFFFFFF'),'0X666666') title_height = 0 c.addLine(20, title_height, c.getWidth() - 21, title_height, '0xffffff') plot_width = 30+50*len(labels) c.setPlotArea(70, 50, plot_width, 170, -1, -1, Transparent, '0xffffff') if swapxy: c.swapXY() title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12) title.setMargin2(20, 0, 10, 30) layer = c.addBarLayer3(data, colors) layer.setBorderColor(Transparent, softLighting(Right)) layer.setAggregateLabelFormat(format) font_size = 8 if fontAngle == 0 else 7 layer.setAggregateLabelStyle("simsun.ttc", font_size) layer.setBarWidth(x,15) xAxis = c.xAxis() xAxis.setLabels(labels) c.yAxis().setLinearScale(0,Scale) c.yAxis().setColors(Transparent) c.yAxis2().setColors(Transparent) c.xAxis().setTickColor(Transparent) c.xAxis().setLabelStyle("simsun.ttc", 9, 0x0, fontAngle) c.yAxis().setLabelStyle("simsun.ttc", 9) c.yAxis2().setLabelStyle("simsun.ttc", 9) # if mark_value: # markData = [mark_value for i in range(len(data))] # markLayer = c.addBoxWhiskerLayer(None, None, None, None, markData, -1, '0xff0000') c.packPlotArea(20, title_height + 40, c.getWidth() - 30, c.getHeight() - 15) return c.makeChart2(PNG)
def create_dealer_index_xychart(title, labels, score, mark_value=None, format='{value|1}', fontAngle=0, Scale=100): new_labels = [truncate_hanzi(label, 25) for label in labels] colors = BASE_COLOR chart_height = 60 + 20 * len(new_labels) c = XYChart(400, chart_height) title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12) title.setMargin2(20, 0, 10, 30) c.setBackground( c.linearGradientColor(0, 0, 0, c.getHeight(), '0xFEFEFE', '0xFFFFFF'), '0X666666') title_height = 0 c.addLine(20, title_height, c.getWidth() - 21, title_height, '0xffffff') plot_height = chart_height - 30 c.setPlotArea(70, 50, 270, plot_height, -1, -1, Transparent, '0xffffff') layer = c.addBarLayer3(score, colors) # layer.setBorderColor(Transparent, softLighting(Right)) layer.setAggregateLabelFormat(format) layer.setAggregateLabelStyle("simsun.ttc", 8) xAxis = c.xAxis() xAxis.setLabels(new_labels) c.yAxis().setColors(Transparent) c.yAxis2().setColors(Transparent) c.xAxis().setTickColor(Transparent) c.xAxis().setLabelStyle("simsun.ttc", 9, 0x0, fontAngle) c.yAxis().setLabelStyle("simsun.ttc", 9) c.yAxis2().setLabelStyle("simsun.ttc", 9) c.yAxis().setLinearScale(0, Scale) c.packPlotArea(20, title_height + 15, c.getWidth() - 30, c.getHeight() - 15) return c.makeChart2(PNG)
def bind_account(self, params, callback): account, phone, code = utils.to_utf8(params.get( 'account', '')), params['phone'], params.get('code', '') server_id, user_id, player_id = params.get('server_id', 0), params.get( 'user_id', 0), params.get('player_id', '0') if not phone or not isinstance(phone, (str, unicode)) or not phone.isdigit(): self.log_error("reject deal with bad phone number:%s" % phone) return platform_id = int(self._platform_info['distributor_id']) self.log_info("bind_account params:(%s)" % params) if not (code and code.strip()): url = self.apply_verify_code(account or player_id, platform_id, phone) else: url = self.get_verify_code_url(account or player_id, platform_id, phone, code) user_data = dict(server_id=server_id, player_id=player_id, account=account, phone=phone, code=code, callback=callback) self.request_get(url, {}, self.on_bind_account, user_data)
def to_utf8(self, str): str = utils.to_utf8(str) return str
def fake_headers(self, num_docs, num_terms, num_nnz): stats = '%i %i %i' % (num_docs, num_terms, num_nnz) if len(stats) > 50: raise ValueError('Invalid stats: matrix too large!') self.fout.seek(len(MmWriter.HEADER_LINE)) self.fout.write(utils.to_utf8(stats))
def test_to_utf8(self): assert utils.to_utf8('abc') == 'abc' assert utils.to_utf8(u'abc') == 'abc' assert utils.to_utf8(u'\u4f60\u597d') == '\xe4\xbd\xa0\xe5\xa5\xbd' assert utils.to_utf8('\xe4\xbd\xa0\xe5\xa5\xbd') == \ '\xe4\xbd\xa0\xe5\xa5\xbd'
def test_to_utf8(self): self.assertEqual('abc', utils.to_utf8('abc')) self.assertEqual('abc', utils.to_utf8(u'abc')) self.assertEqual('\xe4\xbd\xa0\xe5\xa5\xbd', utils.to_utf8(u'\u4f60\u597d')) self.assertEqual('\xe4\xbd\xa0\xe5\xa5\xbd', utils.to_utf8('\xe4\xbd\xa0\xe5\xa5\xbd'))
def __contains__(self, key): key = to_utf8(key) return key in self.cache or self.db_contains(key)
def create_history_now_future_xychart(title, labels, series_list, series_top, maxv=100): top3, ytd, ave, future_score, point = get_ave_score(series_list) series_list.append(dict(name=u'2012 Top3 Ave', value=top3)) series_list.append(dict(name=u'2012 YTD', value=ytd)) series_list.append(dict(name=u'2011 Ave', value=ave)) # Create a XYChart object of size 540 x 375 pixels c = XYChart(900, 320) # Add a title to the chart using 18 pts Times Bold Italic font #c.addTitle("Average Weekly Network Load", "timesbi.ttf", 18) title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12) title.setMargin2(20, 0, 10, 30) color_list = BASE_COLOR COLOR_BLUE = 0x0070C0 COLOR_93 = 0x00B050 COLOR_87 = 0xFFD600 COLOR_TOP3_AVE = 0x595443 COLOR_YTD = 0xFF0000 COLOR_AVE = 0x5678A9 # Set the plotarea at (50, 55) and of 440 x 280 pixels in size. Use a vertical # gradient color from light red (ffdddd) to dark red (880000) as background. Set # border and grid lines to white (ffffff). chart_width = 30 + 190 * len(labels) c.setPlotArea(50, 90, chart_width, 200, c.linearGradientColor(60, 40, 60, 280, 0xffffff, 0xd8e2ec), -1, 0xffffff, 0xffffff) legendBox = c.addLegend(50, 30, 0, "simsun.ttc", 10) legendBox.setBackground(Transparent) #legendBox.setAlignment(TopCenter) legendBox.setHeight(30) # Set the x axis labels c.xAxis().setLabels([utils.to_utf8(label) for label in labels]) # Draw the ticks between label positions (instead of at label positions) c.xAxis().setTickOffset(0.5) # Set axis label style to 8pts Arial Bold c.xAxis().setLabelStyle("simsun.ttc", 9) c.yAxis().setLabelStyle("simsun.ttc", 9) # Set axis line width to 2 pixels c.xAxis().setWidth(2) c.yAxis().setWidth(2) c.yAxis2().setWidth(1) # Add axis title c.yAxis().setTitle("得分/Score", "simsun.ttc", 9) c.yAxis().setLinearScale(0, maxv) # Add a multi-bar layer with 3 data sets and 4 pixels 3D depth #~ layer = c.addBarLayer2(Side, 1) layer = c.addBarLayer() layer.setBarGap(0.1) layer.setBarWidth(170, 18) for index, series in enumerate(series_list): values = series['value'] if len(values) > 1: color = COLOR_BLUE else: values.append(future_score) if point == 93: color = COLOR_93 elif point == 87: color = COLOR_87 else: color = COLOR_BLUE name = utils.to_utf8(series['name']) if name == u'2012 Top3 Ave': color = COLOR_TOP3_AVE if name == u'2012 YTD': color = COLOR_YTD if name == u'2011 Ave': color = COLOR_AVE #print values, color, name write_list = [] for value in values: if value == -1 or value > 100: write_list.append(0) else: write_list.append(value) layer.addDataSet(write_list, color, name) for i, v in enumerate(values): if v == -1 or v > 100: if name in (u'2012 Top3 Ave',u'2012 YTD', u'2011 Ave'): layer.addCustomGroupLabel(index, i, " ") else: layer.addCustomGroupLabel(index, i, "N/A") else: layer.setAggregateLabelFormat("{value|1}") layer.setAggregateLabelStyle ('', 10, '0x0000', 0) yMark = c.yAxis().addMark(point, '0x800080', '%s' % point) yMark.setLineWidth(1) yMark.setAlignment(TopCenter) # Set bar border to transparent. Use soft lighting effect with light direction from # top. layer.setBorderColor(Transparent, softLighting(Top)) #layer.setBorderColor(Transparent, barLighting(0.75, 2.0)) #layer.setAggregateLabelFormat("{value|1}") # output the chart return c.makeChart2(PNG)
def create_multi_xychart(title, labels, series_list, series_top, maxv=100): # labels = [labels[0]] series_list = [series_list[0]] # Create a XYChart object of size 540 x 375 pixels c = XYChart(900, 320) # Add a title to the chart using 18 pts Times Bold Italic font #c.addTitle("Average Weekly Network Load", "timesbi.ttf", 18) title = c.addTitle(utils.to_utf8(title), "simsun.ttc", 12) title.setMargin2(20, 0, 5, 30) color_list = BASE_COLOR # Set the plotarea at (50, 55) and of 440 x 280 pixels in size. Use a vertical # gradient color from light red (ffdddd) to dark red (880000) as background. Set # border and grid lines to white (ffffff). chart_width = 30 + 190 * len(labels) c.setPlotArea(50, 90, chart_width, 200, c.linearGradientColor(60, 40, 60, 280, 0xffffff, 0xd8e2ec), -1, 0xffffff, 0xffffff) legendBox = c.addLegend(50, 16, 0, "simsun.ttc", 10) legendBox.setBackground(Transparent) #legendBox.setAlignment(TopCenter) legendBox.setHeight(30) # Set the x axis labels c.xAxis().setLabels([utils.to_utf8(label) for label in labels]) # Draw the ticks between label positions (instead of at label positions) c.xAxis().setTickOffset(0.5) # Set axis label style to 8pts Arial Bold c.xAxis().setLabelStyle("simsun.ttc", 9) c.yAxis().setLabelStyle("simsun.ttc", 9) # Set axis line width to 2 pixels c.xAxis().setWidth(2) c.yAxis().setWidth(2) c.yAxis2().setWidth(1) # Add axis title c.yAxis().setTitle("得分/Score", "simsun.ttc", 9) c.yAxis().setLinearScale(0, maxv) # Add a multi-bar layer with 3 data sets and 4 pixels 3D depth #~ layer = c.addBarLayer2(Side, 1) layer = c.addBarLayer() layer.setBarGap(0.2) layer.setBarWidth(150, 48) for index, series in enumerate(series_list): layer.addDataSet(series['value'], color_list[index % len(color_list)], utils.to_utf8(series['name'])) if series_top: legendBox.addKey2(2, utils.to_utf8(series_top['name']), 0xFF6900, 2) markLayer = c.addBoxWhiskerLayer(None, None, None, None, series_top['value'], -1, 0xFF6900) markLayer.setLineWidth(2) markLayer.setDataGap(0.1) markLayer.setDataLabelStyle("simsun.ttc", 9) markLayer.setDataLabelFormat("{value|1}") # Set bar border to transparent. Use soft lighting effect with light direction from # top. layer.setBorderColor(Transparent, softLighting(Top)) #layer.setBorderColor(Transparent, barLighting(0.75, 2.0)) layer.setAggregateLabelFormat("{value|1}") # output the chart return c.makeChart2(PNG)
model.save(s2v_model_name) else: model = Sentence2Vec.load(s2v_model_name) print "Input an article title (type EXIT to exit)" sys.stdout.write("Name: ") line = sys.stdin.readline() while line: line = utils.to_unicode(line.rstrip()) if line == "EXIT": break try: if model.sent_no_hash.has_key(line): sent_no = model.sent_no_hash[line] sent_vec = model.sents[sent_no] nsents = model.most_similar_sentence(sent_vec, 11) print "Similar articles similarity" print "-"*45 for nsent in nsents[1:]: print nsent[0], " "*(max(30 - len(utils.to_utf8(nsent[0])), 0)), nsent[1] print else: print "we couldn't find the specified category/article" print except Exception: print "something wrong is happened" print "Input a category name or an article title (type EXIT to exit)" sys.stdout.write("Name: ") line = sys.stdin.readline()