def msonify_xas(item): energy = py_.pluck(item['spectrum'], 0) # (eV) intensity = py_.pluck(item['spectrum'], 3) # (mu) structure = Structure.from_dict(item['structure']) absorption_specie = structure[item['absorbing_atom']].species_string mid_and_el = ",".join([item["mp_id"], absorption_specie]) edge = "K" structure.add_site_property( 'absorbing_atom', [i == item['absorbing_atom'] for i, _ in enumerate(structure.sites)]) if len(energy) == 0: return { "spectrum": None, "mid_and_el": mid_and_el, "error": "Empty spectrum" } try: out = { "spectrum": XANES( x=energy, y=intensity, structure=structure, absorption_specie=absorption_specie, edge=edge, ).as_dict(), "mid_and_el": mid_and_el, } except ValueError as e: out = {"spectrum": None, "mid_and_el": mid_and_el, "error": str(e)} return out
def get(self, *args): url = self.request.uri qs = urllib.urlsplit(url) parsed = urllib.parse_qs(qs.query) """Placeholder for supporting inheritance. Inheritance is currently not implemented.""" query = """ SELECT w_id, w_name, p_id, p_category, p_name, p_value FROM {%1}.widget_property wp JOIN {%1}.widget w ON (wp.wp_widget_id = w.w_id) JOIN {%1}.property p ON (wp.wp_property_id = p.p_id); """ query = query.replace('{%1}', self.schema) cur = self.db.cursor() cur.execute(query) """Check if the query parameters are valid.""" data = c.parsedata(cur) categories = py_.pluck(data, 'p_category') valid_params = py_.intersection(parsed.keys(), categories) """Create a hash with widget id as key.""" data_by_id = py_.group_by(data, 'w_id') """For each widget, create a new widget object to return.""" widgets = [] for wid in data_by_id.keys(): this = data_by_id[wid] by_cat = py_.group_by(this, 'p_category') uniq_cat = py_.uniq(py_.pluck(this, 'p_category')) widget = { 'w_id': wid, 'w_name': py_.uniq(py_.pluck(this, 'w_name'))[0], 'match': True } """Create a new key, val pair for each property for the widget.""" for cat in uniq_cat: widget[cat] = py_.pluck(by_cat[cat], 'p_name') """Check if each property associated with the widget matches the query parameters.""" for key in valid_params: if key in by_cat.keys(): widget['match'] = widget['match'] and len(py_.intersection(widget[key], parsed[key])) > 0 widgets.append(widget) """If query parameters are not provided or invalid, return all widgets without filtering.""" if len(valid_params) == 0: ret = widgets else: ret = py_.filter(widgets, {'match': True}) self.write(json.dumps(ret, indent=4, separators=(',', ': ')))
def test(): standard_template = [] with codecs.open(os.path.join(STATICRESOURCE_DIR, 'standard_template2009.json'), 'r', 'utf-8-sig') as f: standard_template = json.loads(f.read()) units = _.pluck(standard_template, 'unit') for un in units: print (check_has_subunit(standard_template, u'东大茨线', un))
def get_issue_content(self): response = PyRequest().get( f'https://api.github.com/repos/{self.owner}/{self.repoName}/issues' ) result = py_.pluck(response, 'body') return result
def find_first(alist): ids = _.pluck(alist, '_id') id = alist[0]['_id'] prev_id = None while id and id in ids: prev_id = id id = find_prev(prev_id) return prev_id
def find_first(collection_edges, alist): ids = _.pluck(alist, '_id') id = alist[0]['_id'] prev_id = None while id and id in ids: prev_id = id id = find_prev(collection_edges, prev_id) return prev_id
def invalid_pks(target, pk): """Fetch values of primary key `pk` marked as invalid in target.""" cursor = target.query(criteria=mark_invalid({}), properties={ '_id': 0, pk: 1 }) return py_.pluck(cursor, pk)
def test13(): import re, datetime from pydash import py_ as _ from pymongo import MongoClient from bson.objectid import ObjectId XLS_FILE = ur'G:\2014项目\配电网故障定位\普洱FTU导出数据\10kV线路柱上馈线终端FTU安装台账 (2).xls' book = xlrd.open_workbook(XLS_FILE) startrowidx = 1 startcolidx = 1 recs = [] ids_map = {} for sheet in book.sheets(): if sheet.name.lower() == u'sheet3': ids_map['pzz'] = [] for row in range(startrowidx, sheet.nrows): if sheet.cell_value(row, 0) == '': continue rec = {} rec['_id'] = ObjectId(str(sheet.cell_value(row, 13))) rec[u'device_no'] = sheet.cell_value(row, 4) rec[u'rf_addr'] = sheet.cell_value(row, 5) rec[u'phase'] = {} rec[u'phase'][u'a'] = sheet.cell_value(row, 6) rec[u'phase'][u'b'] = sheet.cell_value(row, 7) rec[u'phase'][u'c'] = sheet.cell_value(row, 8) rec[u'sim'] = sheet.cell_value(row, 9) rec[u'status'] = sheet.cell_value(row, 10) rec[u'engineer'] = sheet.cell_value(row, 11) tmp = sheet.cell_value(row, 12) rec[u'installation_date'] = datetime.datetime.strptime(tmp, '%Y/%m/%d') # print(type(rec['installation_date'])) rec[u'switch_alias'] = int(sheet.cell_value(row, 14)) # rec['line_py'] = sheet.cell_value(row, 14) recs.append(rec) # print(json.dumps(recs, ensure_ascii=False, indent=4)) # print(len(recs)) ids = _.pluck(recs, '_id') # print(ids) client = MongoClient('localhost', 27017) kmgd = client['kmgd_pe'] collection = kmgd['features'] res = list(collection.find({"_id":{'$in':ids}})) # print(len(res)) for item in res: _id = item['_id'] one = _.find(recs, {'_id':_id}) if one: del one['_id'] one[u'type'] = u'ftu' item[u'properties'][u'devices'] = [one, ] # print(item) collection.save(item)
def check_has_subunit(alist, line_name, unit): ret = [] children = _.result(_.find(alist, {'unit':unit}), 'children') ids = _.pluck(children, 'id') ids = _.map(ids, lambda x:'unitsub_' + x) for id in ids: p = get_occur_p(line_name, id) if p>0: ret.append(id) return ret
def find_chain(alist, obj): ids = _.pluck(alist, '_id') chainlist = [] while obj: chainlist.append(obj) nst_id = find_next(obj['_id']) if nst_id: obj = _.find(alist, {'_id': nst_id}) else: obj = None return chainlist
def test_add_field(): XLS_FILE = ur'D:\2014项目\配电网故障定位\20151128\节点数据.xls' book = xlrd.open_workbook(XLS_FILE) startrowidx = 1 namelist = [] for sheet in book.sheets(): for row in range(startrowidx, sheet.nrows): name = sheet.cell_value(row, 0) code = sheet.cell_value(row, 1) namelist.append({'name':name, 'code':code}) break names = _.pluck(namelist, 'name') print(len(names)) client = MongoClient('localhost', 27017) db = client['kmgd'] collection = db['features'] l = list(collection.find({'properties.name':{'$in':names}})) # print(len(l)) # # name_code_mapping = [] # for i in l: # code = _.result(_.find(namelist, {'name':i['properties']['name']}), 'code') # # cl.append(code) # i['properties']['code_name'] = code # # o = {} # # o['name'] = code # # o['_id'] = str(i['_id']) # # name_code_mapping.append(o) # collection.save(i) # # print(json.dumps(name_code_mapping, ensure_ascii=True, indent=4)) # ids = [] # for i in l: # ids.append(i['_id']) # collection = db['edges'] # tids = set() # for id in ids: # ll = list(collection.find({'$or':[{'properties.start':id},{'properties.end':id}]})) # for ii in ll: # if ii['properties']['start'] == id: # tids.add(ii['properties']['end']) # if ii['properties']['end'] == id: # tids.add(ii['properties']['start']) # tids = list(tids) # ids.extend(tids) # ids = list(set(ids)) linename = u'10kV州城Ⅴ回线' collection = db['network'] zc = collection.find_one({'properties.name':linename}) if not ObjectId('5657b187d8b95a18a48c4a62') in zc['properties']['nodes']: zc['properties']['nodes'].append(ObjectId('5657b187d8b95a18a48c4a62')) if not ObjectId('5656aa13d8b95a0a485fbaa7') in zc['properties']['nodes']: zc['properties']['nodes'].append(ObjectId('5656aa13d8b95a0a485fbaa7')) collection.save(zc)
def test_add_field(): XLS_FILE = ur'D:\2014项目\配电网故障定位\20151128\节点数据.xls' book = xlrd.open_workbook(XLS_FILE) startrowidx = 1 namelist = [] for sheet in book.sheets(): for row in range(startrowidx, sheet.nrows): name = sheet.cell_value(row, 0) code = sheet.cell_value(row, 1) namelist.append({'name': name, 'code': code}) break names = _.pluck(namelist, 'name') print(len(names)) client = MongoClient('localhost', 27017) db = client['kmgd'] collection = db['features'] l = list(collection.find({'properties.name': {'$in': names}})) # print(len(l)) # # name_code_mapping = [] # for i in l: # code = _.result(_.find(namelist, {'name':i['properties']['name']}), 'code') # # cl.append(code) # i['properties']['code_name'] = code # # o = {} # # o['name'] = code # # o['_id'] = str(i['_id']) # # name_code_mapping.append(o) # collection.save(i) # # print(json.dumps(name_code_mapping, ensure_ascii=True, indent=4)) # ids = [] # for i in l: # ids.append(i['_id']) # collection = db['edges'] # tids = set() # for id in ids: # ll = list(collection.find({'$or':[{'properties.start':id},{'properties.end':id}]})) # for ii in ll: # if ii['properties']['start'] == id: # tids.add(ii['properties']['end']) # if ii['properties']['end'] == id: # tids.add(ii['properties']['start']) # tids = list(tids) # ids.extend(tids) # ids = list(set(ids)) linename = u'10kV州城Ⅴ回线' collection = db['network'] zc = collection.find_one({'properties.name': linename}) if not ObjectId('5657b187d8b95a18a48c4a62') in zc['properties']['nodes']: zc['properties']['nodes'].append(ObjectId('5657b187d8b95a18a48c4a62')) if not ObjectId('5656aa13d8b95a0a485fbaa7') in zc['properties']['nodes']: zc['properties']['nodes'].append(ObjectId('5656aa13d8b95a0a485fbaa7')) collection.save(zc)
def get_device_from_tower(session, aConfig, tower_id): ret = None app = aConfig['gConfig']['wsgi']['application'] db, collection = get_collection(aConfig, app, 'main', 'collection_tower_device_bind') if isinstance(tower_id, str) : one = collection.find_one({'tower_id':add_mongo_id(tower_id)}) if one: ret = remove_mongo_id(one['device_id']) elif isinstance(tower_id, list): arr = _.pluck(list(collection.find({'tower_id':{'$in':add_mongo_id(tower_id)}})), 'device_id') ret = remove_mongo_id(arr) return ret
def update_targets(self, items): self.web_docs.ensure_index("mp-id", unique=True) self.images.ensure_index("mp-id", unique=True) web_docs = [{ "mp-id": item["mp_id"], "ph_bs": item["web_doc"] } for item in items] self.web_docs.update(web_docs) images = [{ "mp-id": item["mp_id"], "plot": item["image"] } for item in items] self.images.update(images) mp_ids = py_.pluck(items, "mp_id") self.logger.info("Updated targets for {}".format(mp_ids))
def query3(): client = MongoClient('localhost', 27017) kmgd = client['kmgd'] collection = kmgd['network'] ret = list(collection.find({})) ret = remove_mongo_id(ret) properties = _.pluck(ret, 'properties') # print(names) for i in properties: if i.has_key('edges') and len(i['edges']): print (enc(i['name'])) # collection = kmgd['network'] # ret = list(collection.find({'properties.edges':{'$elemMatch':ObjectId('53f306efca49c822ece76641')}})) # ret = remove_mongo_id(ret) # print(ret) return ret
def query3(): client = MongoClient('localhost', 27017) kmgd = client['kmgd'] collection = kmgd['network'] ret = list(collection.find({})) ret = remove_mongo_id(ret) properties = _.pluck(ret, 'properties') # print(names) for i in properties: if i.has_key('edges') and len(i['edges']): print(enc(i['name'])) # collection = kmgd['network'] # ret = list(collection.find({'properties.edges':{'$elemMatch':ObjectId('53f306efca49c822ece76641')}})) # ret = remove_mongo_id(ret) # print(ret) return ret
def update_targets(self, items): target = self.targets[0] xas_averaged = target.collection valids, invalids = py_.partition( mark_lu(py_.flatten(items), target.lu_field, self.dt_fetch), 'valid') # Remove documents flagging now-valid data as invalid. xas_averaged.delete_many( mark_invalid({"mp_id": { "$in": py_.pluck(valids, 'mp_id') }})) for doc in valids: xas_averaged.update_one(py_.pick(doc, 'mp_id', 'element'), {'$set': doc}, upsert=True) for doc in invalids: xas_averaged.update_one(mark_invalid(py_.pick(doc, 'mp_id')), {'$set': doc}, upsert=True)
def update_targets(self, items): xas_averaged = self.targets[0] xas_averaged.ensure_index([("valid", 1), ("mp_id", 1)]) xas_averaged.ensure_index([("mp_id", 1), ("element", 1)]) xas_averaged.ensure_index([("chemsys", 1), ("element", 1)]) valids, invalids = py_.partition( mark_lu(py_.flatten(items), xas_averaged.lu_field, self.dt_fetch), 'valid') # Remove documents flagging now-valid data as invalid. xas_averaged.collection.delete_many( mark_invalid({"mp_id": { "$in": py_.pluck(valids, 'mp_id') }})) bulk = xas_averaged.collection.initialize_ordered_bulk_op() for doc in valids: (bulk.find(py_.pick(doc, 'mp_id', 'element')).upsert().replace_one(doc)) for doc in invalids: (bulk.find(mark_invalid(py_.pick( doc, 'mp_id'))).upsert().replace_one(doc)) bulk.execute()
def repos_to_csv(repos_by_lang, page_num): repo_issue_content_list = [] for index, repo in enumerate(repos_by_lang): # get repo with basic numerical numerical data repos_by_lang[index] = py_.pick(repo, 'full_name', 'forks_count', 'open_issues_count', 'watchers_count') # separate full name to list ['owner', 'repository name'] repo_name = repo['full_name'] repo_owner_name_list = repo_name.split('/') issue_list = GetIssueContent( repo_owner_name_list[0], repo_owner_name_list[1]).get_issue_content()[0:2] clean_issue_list = '[[[[[Next]]]]]'.join(map(str, issue_list)) repo_issue_content_list.append(clean_issue_list) # add star count and merge to existing dictionary star_count = { "star_count": GetStarCountsByRepo(repo['full_name']).get() } repos_by_lang[index] = py_.merge(repos_by_lang[index], star_count) pd_format_dic = { 'full_name': py_.pluck(repos_by_lang, 'full_name'), 'forks_count': py_.pluck(repos_by_lang, 'forks_count'), 'open_issues_count': py_.pluck(repos_by_lang, 'open_issues_count'), 'watchers_count': py_.pluck(repos_by_lang, 'watchers_count'), 'comment_count': py_.pluck(repos_by_lang, 'comment_count'), 'star_count': py_.pluck(repos_by_lang, 'star_count'), 'issue_content': repo_issue_content_list } # print(pd_format_dic) df = pd.DataFrame.from_dict(pd_format_dic) file_name = Config().get_search_setting()['lang'].split(':')[1] df.to_csv(f'../data/{file_name}_github_{page_num}.csv') print(f'Saving {file_name}_github_{page_num} to csv finished!!')
ret = [] linesmap = {} with codecs.open(ur'd:\linesmap.json', 'r', 'utf-8-sig') as f: linesmap = json.loads(f.read()) polyline_dn = mongo_find('kmgd', 'network', {'properties.webgis_type':'polyline_dn'}) # towers = mongo_find('kmgd', 'features', {'properties.webgis_type':'point_tower'}) idx = 0 for k in linesmap.keys(): codes = _.uniq(_.flatten(linesmap[k])) o = get_line_id(polyline_dn, k) if o: # l = mongo_find('kmgd', 'features', {'properties.line_func_code':k}) # ids = _.pluck(l, '_id') ll = mongo_find('kmgd', 'features', {'properties.function_pos_code':{'$in':codes}}) if len(ll): lll = _.pluck(ll, '_id') o['properties']['nodes'] = lll # o = add_mongo_id(o) ret.append(o) idx += 1 # if idx > 10: # break mongo_action('kmgd', 'network', 'save', ret) def test7(): def get_line_id(alist, code): return _.find(alist, lambda x: x['properties'].has_key('func_pos_code') and x['properties']['func_pos_code'] == code) # return _.matches_property('properties.func_pos_code', code)(alist) def get_point_id(alist, code): return _.find(alist, lambda x: x['properties'].has_key('function_pos_code') and x['properties']['func_pos_code'] == code)
def test_algorithm(): def find_next_by_node(features, collection_edges, alist=[], id=None): if isinstance(id, str): id = add_mongo_id(id) l = _.deep_pluck(list(collection_edges.find({'properties.start':id})),'properties.end') for i in l: obj = _.find(features, {'_id': i}) if obj and obj.has_key('properties'): if obj['properties'].has_key('devices'): alist.append(obj['_id']) else: alist = find_next_by_node(features, collection_edges, alist, obj['_id']) return alist def find_chain(features, collection_edges, alist=[], id=None): _ids = find_next_by_node(features, collection_edges, [], id) for _id in _ids: obj = _.find(features, {'_id':_id}) if obj : from_index = _.find_index(features, {'_id':id}) to_index = _.find_index(features, {'_id':_id}) if obj.has_key('properties') and obj['properties'].has_key('devices'): alist.append({ 'lnbr_idx': len(alist) + 1, 'from_id': add_mongo_id(id), 'to_id': obj['_id'], # 'from_idx': from_index, # 'to_idx': to_index, }) alist = find_chain(features, collection_edges, alist, obj['_id']) return alist def find_prev(collection_edges, id): ret = None one = collection_edges.find_one({'properties.end': id}) if one: ret = one['properties']['start'] return ret def find_next(collection_edges, id): ret = None one = collection_edges.find_one({'properties.start': id}) if one: ret = one['properties']['end'] return ret def find_first(collection_edges, alist): ids = _.pluck(alist, '_id') id = alist[0]['_id'] prev_id = None while id and id in ids: prev_id = id id = find_prev(collection_edges, prev_id) return prev_id def write_excel_lnbr(features_all, chains, filename): wb = xlwt.Workbook() # print(dir(wb)) for chain in chains: ws = wb.add_sheet(str(len(wb._Workbook__worksheets) + 1)) columns = [ '_001_LnBR', '_002_Bus_from', '_003_Bus_to', '_004_R', '_005_X', '_006_B_1_2', '_007_kVA', '_008_State', ] for col in columns: ws.write(0, columns.index(col), col) for i in chain: row = chain.index(i) + 1 ws.write(row, 0, str(i['lnbr_idx'])) from_obj = _.find(features_all, {'_id':i['from_id']}) to_obj = _.find(features_all, {'_id':i['to_id']}) from_name = from_obj['properties']['name'] from_id = remove_mongo_id(from_obj['_id']) # from_idx = i['from_idx'] to_name = to_obj['properties']['name'] to_id = remove_mongo_id(to_obj['_id']) # to_idx = i['to_idx'] # ws.write(row, 1, from_name) # ws.write(row, 2, to_name) ws.write(row, 1, from_id) ws.write(row, 2, to_id) wb.save(filename) def write_excel_bus(features_all, chains, filename): wb = xlwt.Workbook() # print(dir(wb)) for chain in chains: ws = wb.add_sheet(str(len(wb._Workbook__worksheets) + 1)) columns = [ '_001_No', '_002_Type', '_003_MW', '_004_Mvar', '_005_GS', '_006_Bs', '_007_Mag', '_008_Deg', ] for col in columns: ws.write(0, columns.index(col), col) for i in chain: row = chain.index(i) + 1 obj = _.find(features_all, {'_id': i['from_id']}) # name = obj['properties']['name'] id = obj['_id'] # from_idx = i['from_idx'] # ws.write(row, 0, name) ws.write(row, 0, remove_mongo_id(id)) # ws.write(row, 0, from_idx) if row == 1: ws.write(row, 1, 3) else: ws.write(row, 1, 1) if row == len(chain): obj1 = _.find(features_all, {'_id': i['to_id']}) # name1 = obj1['properties']['name'] id1 = obj1['_id'] # to_idx = i['to_idx'] # ws.write(row+1, 0, name1) ws.write(row + 1, 0, remove_mongo_id(id1)) # ws.write(row + 1, 0, to_idx) ws.write(row+1, 1, 1) wb.save(filename) client = MongoClient('localhost', 27017) db = client['kmgd_pe'] collection_network = db['network'] collection_fea = db['features'] collection_edges = db['edges'] # line_ids = ['570ce0c1ca49c80858320619', '570ce0c1ca49c8085832061a'] # 坪掌寨线 ids0 = collection_network.find_one({'_id':add_mongo_id('570ce0c1ca49c8085832061a')})['properties']['nodes'] features_all = list(collection_fea.find({'_id':{'$in':ids0}})) line_ids = _.pluck(list(collection_network.find({'$and':[{'properties.py': {'$regex': '^pzzx.*$'}}, {'properties.py': {'$not': re.compile('^pzzx$')}}]})), '_id') # print(line_ids) chains = [] for i in line_ids: line = collection_network.find_one({'_id':i}) if line and line['properties'].has_key('nodes'): features = list(collection_fea.find({'_id':{'$in':add_mongo_id(line['properties']['nodes'])}})) first_id = find_first(collection_edges, features) if first_id: first = _.find(features, {'_id': first_id}) if first: chain = find_chain(features, collection_edges, [], first_id) print(first['properties']['name']) print(len(chain)) chains.append(chain) write_excel_lnbr(features_all, chains, 'data_lnbr_pzz.xls') write_excel_bus(features_all, chains, 'data_bus_pzz.xls') # chains = [] # line = collection_network.find_one({'_id': add_mongo_id('570ce0c1ca49c8085832061a')}) # if line and line['properties'].has_key('nodes'): # first_id = add_mongo_id('570ce0b7ca49c8085832018f') # chain = find_chain(features_all, collection_edges, [], first_id) # chains.append(chain) # write_excel_lnbr(features_all, chains, 'data_lnbr_pzz0.xls') # first = ['570ce0b7ca49c8085832018f', '570ce0c1ca49c8085832031b'] #酒房丫口线 chains = [] ids0 = collection_network.find_one({'_id': add_mongo_id('570ce0c1ca49c80858320619')})['properties']['nodes'] features_all = list(collection_fea.find({'_id': {'$in': ids0}})) line_ids = _.pluck(list(collection_network.find( {'$and': [{'properties.py': {'$regex': '^jfykx.*$'}}, {'properties.py': {'$not': re.compile('^jfykx$')}}]})), '_id') # print(line_ids) for i in line_ids: line = collection_network.find_one({'_id': i}) if line and line['properties'].has_key('nodes'): features = list(collection_fea.find({'_id': {'$in': add_mongo_id(line['properties']['nodes'])}})) first_id = find_first(collection_edges, features) if first_id: first = _.find(features, {'_id': first_id}) if first: chain = find_chain(features, collection_edges, [], first_id) print(first['properties']['name']) print(len(chain)) chains.append(chain) write_excel_lnbr(features_all, chains, 'data_lnbr_jfyk.xls') write_excel_bus(features_all, chains, 'data_bus_jfyk.xls')
def test_pzzx():#坪掌寨线 piny = get_pinyin_data() client = MongoClient('localhost', 27017) db = client['kmgd_pe'] collection_fea = db['features'] collection_network = db['network'] collection_edges = db['edges'] one = collection_network.find_one({'_id':add_mongo_id('570ce0c1ca49c8085832061a')}) branches = [] print(len(one['properties']['nodes'])) l = list(collection_fea.find({'properties.py': {'$regex': '^.*sslzx.*$'}})) #松山林支线 # print(len(l)) l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'松山林支线%d, [%s]' % (len(l), s)) name = u'坪掌寨线松山林支线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^.*mdszx.*$'}})) # 忙肚山支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'忙肚山支线%d, [%s]' % (len(l), s)) name = u'坪掌寨线忙肚山支线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^.*mdszx.*$'}})) # 大河边支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'大河边支线%d, [%s]' % (len(l), s)) name = u'坪掌寨线大河边支线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^.*xdtzx.*$'}})) #下大田支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'下大田支线%d, [%s]' % (len(l), s)) name = u'坪掌寨线下大田支线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^pzzxN.*$'}})) ids = _.pluck(l, '_id') main_ids = _.difference(ids, branches) print('len(main_ids)=%d' % len(main_ids)) l = list(collection_fea.find({'_id': {'$in': main_ids}})) # 坪掌寨线 l = sortlist(collection_edges, l) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'坪掌寨线%d, [%s]' % (len(l), s)) name = u'坪掌寨线主线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} collection_network.insert(o)
def pluck(self, column): """Pluck `column` attribute values from :meth:`all` results and return as list. """ return py_.pluck(self.all(), column)
def test_jfykx(): #酒房丫口线 piny = get_pinyin_data() client = MongoClient('localhost', 27017) db = client['kmgd_pe'] collection_fea = db['features'] collection_network = db['network'] collection_edges = db['edges'] one = collection_network.find_one( {'_id': add_mongo_id('570ce0c1ca49c80858320619')}) # print(len(one['properties']['nodes'])) branches = [] l = list(collection_fea.find({'properties.py': { '$regex': '^.*dhpzzx.*$' }})) # 大河平掌支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'大河平掌支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线大河平掌支线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': { '$regex': '^.*bszzx.*$' }})) # 控制半山寨支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'控制半山寨支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线控制半山寨支线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': { '$regex': '^.*mchzx.*$' }})) # 控制马草河支线支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'控制马草河支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线控制马草河支线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': { '$regex': '^.*dpzzx.*$' }})) # 大平掌支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'大平掌支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线大平掌支线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': { '$regex': '^.*bjzx.*$' }})) # 碧鸡支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'碧鸡支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线碧鸡支线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^jfykxN.*$'}})) ids = _.pluck(l, '_id') main_ids = _.difference(ids, branches) print('len(main_ids)=%d' % len(main_ids)) l = list(collection_fea.find({'_id': {'$in': main_ids}})) # 酒房丫口线 l = sortlist(collection_edges, l) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'酒房丫口线%d, [%s]' % (len(l), s)) name = u'酒房丫口线主线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } collection_network.insert(o)
def test_pzzx(): #坪掌寨线 piny = get_pinyin_data() client = MongoClient('localhost', 27017) db = client['kmgd_pe'] collection_fea = db['features'] collection_network = db['network'] collection_edges = db['edges'] one = collection_network.find_one( {'_id': add_mongo_id('570ce0c1ca49c8085832061a')}) branches = [] print(len(one['properties']['nodes'])) l = list(collection_fea.find({'properties.py': { '$regex': '^.*sslzx.*$' }})) #松山林支线 # print(len(l)) l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'松山林支线%d, [%s]' % (len(l), s)) name = u'坪掌寨线松山林支线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': { '$regex': '^.*mdszx.*$' }})) # 忙肚山支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'忙肚山支线%d, [%s]' % (len(l), s)) name = u'坪掌寨线忙肚山支线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': { '$regex': '^.*mdszx.*$' }})) # 大河边支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'大河边支线%d, [%s]' % (len(l), s)) name = u'坪掌寨线大河边支线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': { '$regex': '^.*xdtzx.*$' }})) #下大田支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'下大田支线%d, [%s]' % (len(l), s)) name = u'坪掌寨线下大田支线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^pzzxN.*$'}})) ids = _.pluck(l, '_id') main_ids = _.difference(ids, branches) print('len(main_ids)=%d' % len(main_ids)) l = list(collection_fea.find({'_id': {'$in': main_ids}})) # 坪掌寨线 l = sortlist(collection_edges, l) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'坪掌寨线%d, [%s]' % (len(l), s)) name = u'坪掌寨线主线' o = { 'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace( 'I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') } } collection_network.insert(o)
def test_jfykx():#酒房丫口线 piny = get_pinyin_data() client = MongoClient('localhost', 27017) db = client['kmgd_pe'] collection_fea = db['features'] collection_network = db['network'] collection_edges = db['edges'] one = collection_network.find_one({'_id': add_mongo_id('570ce0c1ca49c80858320619')}) # print(len(one['properties']['nodes'])) branches = [] l = list(collection_fea.find({'properties.py': {'$regex': '^.*dhpzzx.*$'}})) # 大河平掌支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'大河平掌支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线大河平掌支线' o = {'properties':{ 'name':name, 'py':piny.hanzi2pinyin_first_letter(name.replace('#','').replace('II',u'二').replace('I',u'一').replace(u'Ⅱ',u'二').replace(u'Ⅰ',u'一')), 'voltage':'12', 'webgis_type':'polyline_dn', 'nodes':_.pluck(l, '_id') }} # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^.*bszzx.*$'}})) # 控制半山寨支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'控制半山寨支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线控制半山寨支线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^.*mchzx.*$'}})) # 控制马草河支线支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'控制马草河支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线控制马草河支线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^.*dpzzx.*$'}})) # 大平掌支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'大平掌支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线大平掌支线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^.*bjzx.*$'}})) # 碧鸡支线 l = sortlist(collection_edges, l) branches.extend(_.pluck(l, '_id')) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'碧鸡支线%d, [%s]' % (len(l), s)) name = u'酒房丫口线碧鸡支线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} # print(o) collection_network.insert(o) l = list(collection_fea.find({'properties.py': {'$regex': '^jfykxN.*$'}})) ids = _.pluck(l, '_id') main_ids = _.difference(ids, branches) print('len(main_ids)=%d' % len(main_ids)) l = list(collection_fea.find({'_id': {'$in': main_ids}})) # 酒房丫口线 l = sortlist(collection_edges, l) s = ','.join(_.deep_pluck(l, 'properties.name')) print(u'酒房丫口线%d, [%s]' % (len(l), s)) name = u'酒房丫口线主线' o = {'properties': { 'name': name, 'py': piny.hanzi2pinyin_first_letter( name.replace('#', '').replace('II', u'二').replace('I', u'一').replace(u'Ⅱ', u'二').replace(u'Ⅰ', u'一')), 'voltage': '12', 'webgis_type': 'polyline_dn', 'nodes': _.pluck(l, '_id') }} collection_network.insert(o)
def invalid_pks(target, pk): """Fetch values of primary key `pk` marked as invalid in target.""" cursor = target.collection.find(mark_invalid({}), {'_id': 0, pk: 1}) return py_.pluck(cursor, pk)
def test_algorithm(): def find_next_by_node(features, collection_edges, alist=[], id=None): if isinstance(id, str): id = add_mongo_id(id) l = _.deep_pluck(list(collection_edges.find({'properties.start': id})), 'properties.end') for i in l: obj = _.find(features, {'_id': i}) if obj and obj.has_key('properties'): if obj['properties'].has_key('devices'): alist.append(obj['_id']) else: alist = find_next_by_node(features, collection_edges, alist, obj['_id']) return alist def find_chain(features, collection_edges, alist=[], id=None): _ids = find_next_by_node(features, collection_edges, [], id) for _id in _ids: obj = _.find(features, {'_id': _id}) if obj: from_index = _.find_index(features, {'_id': id}) to_index = _.find_index(features, {'_id': _id}) if obj.has_key('properties') and obj['properties'].has_key( 'devices'): alist.append({ 'lnbr_idx': len(alist) + 1, 'from_id': add_mongo_id(id), 'to_id': obj['_id'], # 'from_idx': from_index, # 'to_idx': to_index, }) alist = find_chain(features, collection_edges, alist, obj['_id']) return alist def find_prev(collection_edges, id): ret = None one = collection_edges.find_one({'properties.end': id}) if one: ret = one['properties']['start'] return ret def find_next(collection_edges, id): ret = None one = collection_edges.find_one({'properties.start': id}) if one: ret = one['properties']['end'] return ret def find_first(collection_edges, alist): ids = _.pluck(alist, '_id') id = alist[0]['_id'] prev_id = None while id and id in ids: prev_id = id id = find_prev(collection_edges, prev_id) return prev_id def write_excel_lnbr(features_all, chains, filename): wb = xlwt.Workbook() # print(dir(wb)) for chain in chains: ws = wb.add_sheet(str(len(wb._Workbook__worksheets) + 1)) columns = [ '_001_LnBR', '_002_Bus_from', '_003_Bus_to', '_004_R', '_005_X', '_006_B_1_2', '_007_kVA', '_008_State', ] for col in columns: ws.write(0, columns.index(col), col) for i in chain: row = chain.index(i) + 1 ws.write(row, 0, str(i['lnbr_idx'])) from_obj = _.find(features_all, {'_id': i['from_id']}) to_obj = _.find(features_all, {'_id': i['to_id']}) from_name = from_obj['properties']['name'] from_id = remove_mongo_id(from_obj['_id']) # from_idx = i['from_idx'] to_name = to_obj['properties']['name'] to_id = remove_mongo_id(to_obj['_id']) # to_idx = i['to_idx'] # ws.write(row, 1, from_name) # ws.write(row, 2, to_name) ws.write(row, 1, from_id) ws.write(row, 2, to_id) wb.save(filename) def write_excel_bus(features_all, chains, filename): wb = xlwt.Workbook() # print(dir(wb)) for chain in chains: ws = wb.add_sheet(str(len(wb._Workbook__worksheets) + 1)) columns = [ '_001_No', '_002_Type', '_003_MW', '_004_Mvar', '_005_GS', '_006_Bs', '_007_Mag', '_008_Deg', ] for col in columns: ws.write(0, columns.index(col), col) for i in chain: row = chain.index(i) + 1 obj = _.find(features_all, {'_id': i['from_id']}) # name = obj['properties']['name'] id = obj['_id'] # from_idx = i['from_idx'] # ws.write(row, 0, name) ws.write(row, 0, remove_mongo_id(id)) # ws.write(row, 0, from_idx) if row == 1: ws.write(row, 1, 3) else: ws.write(row, 1, 1) if row == len(chain): obj1 = _.find(features_all, {'_id': i['to_id']}) # name1 = obj1['properties']['name'] id1 = obj1['_id'] # to_idx = i['to_idx'] # ws.write(row+1, 0, name1) ws.write(row + 1, 0, remove_mongo_id(id1)) # ws.write(row + 1, 0, to_idx) ws.write(row + 1, 1, 1) wb.save(filename) client = MongoClient('localhost', 27017) db = client['kmgd_pe'] collection_network = db['network'] collection_fea = db['features'] collection_edges = db['edges'] # line_ids = ['570ce0c1ca49c80858320619', '570ce0c1ca49c8085832061a'] # 坪掌寨线 ids0 = collection_network.find_one( {'_id': add_mongo_id('570ce0c1ca49c8085832061a')})['properties']['nodes'] features_all = list(collection_fea.find({'_id': {'$in': ids0}})) line_ids = _.pluck( list( collection_network.find({ '$and': [{ 'properties.py': { '$regex': '^pzzx.*$' } }, { 'properties.py': { '$not': re.compile('^pzzx$') } }] })), '_id') # print(line_ids) chains = [] for i in line_ids: line = collection_network.find_one({'_id': i}) if line and line['properties'].has_key('nodes'): features = list( collection_fea.find({ '_id': { '$in': add_mongo_id(line['properties']['nodes']) } })) first_id = find_first(collection_edges, features) if first_id: first = _.find(features, {'_id': first_id}) if first: chain = find_chain(features, collection_edges, [], first_id) print(first['properties']['name']) print(len(chain)) chains.append(chain) write_excel_lnbr(features_all, chains, 'data_lnbr_pzz.xls') write_excel_bus(features_all, chains, 'data_bus_pzz.xls') # chains = [] # line = collection_network.find_one({'_id': add_mongo_id('570ce0c1ca49c8085832061a')}) # if line and line['properties'].has_key('nodes'): # first_id = add_mongo_id('570ce0b7ca49c8085832018f') # chain = find_chain(features_all, collection_edges, [], first_id) # chains.append(chain) # write_excel_lnbr(features_all, chains, 'data_lnbr_pzz0.xls') # first = ['570ce0b7ca49c8085832018f', '570ce0c1ca49c8085832031b'] #酒房丫口线 chains = [] ids0 = collection_network.find_one( {'_id': add_mongo_id('570ce0c1ca49c80858320619')})['properties']['nodes'] features_all = list(collection_fea.find({'_id': {'$in': ids0}})) line_ids = _.pluck( list( collection_network.find({ '$and': [{ 'properties.py': { '$regex': '^jfykx.*$' } }, { 'properties.py': { '$not': re.compile('^jfykx$') } }] })), '_id') # print(line_ids) for i in line_ids: line = collection_network.find_one({'_id': i}) if line and line['properties'].has_key('nodes'): features = list( collection_fea.find({ '_id': { '$in': add_mongo_id(line['properties']['nodes']) } })) first_id = find_first(collection_edges, features) if first_id: first = _.find(features, {'_id': first_id}) if first: chain = find_chain(features, collection_edges, [], first_id) print(first['properties']['name']) print(len(chain)) chains.append(chain) write_excel_lnbr(features_all, chains, 'data_lnbr_jfyk.xls') write_excel_bus(features_all, chains, 'data_bus_jfyk.xls')
'https://www.googleapis.com/auth/drive'] credentials = ServiceAccountCredentials.from_json_keyfile_name('easy-deutsch.json', scope) gc = gspread.authorize(credentials) sheet = gc.open("Deutsch Wörter").worksheet('Expressions') tokenizer = Tokenizer(split_camel_case=True, token_classes=False, extra_info=False) data = py_(sheet.get_all_values()).filter(lambda r: r[0]).map(lambda r: py_.compact(r)).map( lambda r: [py_.capitalize(r[0], strict=False), *r[1:]] ).map( lambda r, i: dict(id=i, de=r[0], low=r[0].lower(), tokens=tokenizer.tokenize(r[0].lower()), rest=r[1:]) ).value() token_index = {} for tokens in py_.pluck(data, 'tokens'): for token in tokens: if len(token) <= 1: continue t = token.lower() if t not in token_index: token_index[t] = dict( key=t, ids=py_(data).filter(lambda d: t in d['tokens']).pluck('id').value() ) token_data = py_(token_index.values()).map( lambda d: dict(count=len(d['ids']), **d) ).order_by(['-count', 'key']).value() pprint.pprint(token_data)