class TestCalculate(unittest.TestCase): def setUp(self): mongo = MongoDB(conn_str='mongodb://*****:*****@139.196.189.191:3717/') mdb = MonDatabase(mongodb=mongo, database_name='region') self.mcollection = MonCollection(database=mdb, collection_name='cities') def test_connect_mongodb(self): self.assertEqual(2, len(list(self.mcollection.find())))
class PopCensusDatabase(): """ 类PopCensusDatabase表示人口普查数据库 """ def __init__(self): # 连接PopCensus集合 mongo = MongoDB() mdb = MonDatabase(mongodb=mongo, database_name='region') self.collection = MonCollection(database=mdb, collection_name='popcensus') # 年份 @property def period(self): return sorted(self.collection.find().distinct('year')) # 年份 @property def variables(self): return sorted(self.collection.find().distinct('variable'))
def search_from_dbase(variables=None,query_dict=None,match='exact'): collection_variable = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'), collection_name='storedvariable') found = collection_variable.find(query_dict) found_dict = {item['origin']:item['variable'] for item in found} if match == 'exact': pd_result = VariableMatcher.search_for_same_variable(variables=variables,source=found_dict.keys()) for ind in pd_result.index: pd_result.loc[ind, 'matched_variable'] = found_dict.get(pd_result.loc[ind, 'matched_variable']) else: pd_result = VariableMatcher.search_for_similar_variable(variables=variables,source=found_dict.keys()) for ind in pd_result.index: pd_result.loc[ind, 'matched_middel_variable'] = pd_result.loc[ind, 'matched_variable'] pd_result.loc[ind,'matched_variable'] = found_dict.get(pd_result.loc[ind,'matched_variable']) return pd_result
class AdminDatabase(): """ 类AdminDatabase用来连接行政区划数据库 """ def __init__(self): # 连接AdminDatabase集合 mongo = MongoDB() mdb = MonDatabase(mongodb=mongo, database_name='region') self.collection = MonCollection(database=mdb, collection_name='admincode') # 查询 def find(self,**conds): # 设置projection projection = conds.get('projection') if projection is None: projection = {'region':1,'version':1,'adminlevel':1,'acode':1,'_id':1,'parent':1} else: conds.pop('projection') # 设置sorts sorts = conds.get('sorts') if sorts is None: sorts= [('year',ASCENDING),('acode',ASCENDING)] else: conds.pop('sorts') # 设置查询条件 condition = dict() for key in conds: if isinstance(conds[key],list): condition[key] = {'$in':conds[key]} else: condition[key] = conds[key] # 返回查询结果 return self.collection.find(condition,projection).sort(sorts) # 年份 @property def period(self): return sorted(self.find().distinct('year')) # 版本号 def version(self,year=None): if year is None: return sorted(self.find().distinct('version')) else: return sorted(self.find(year=str(year)).distinct('version'))
def search_from_dbase(variables=None, query_dict=None, match='exact'): collection_variable = MonCollection(database=MonDatabase( mongodb=MongoDB(), database_name='region'), collection_name='storedvariable') found = collection_variable.find(query_dict) found_dict = {item['origin']: item['variable'] for item in found} if match == 'exact': pd_result = VariableMatcher.search_for_same_variable( variables=variables, source=found_dict.keys()) for ind in pd_result.index: pd_result.loc[ind, 'matched_variable'] = found_dict.get( pd_result.loc[ind, 'matched_variable']) else: pd_result = VariableMatcher.search_for_similar_variable( variables=variables, source=found_dict.keys()) for ind in pd_result.index: pd_result.loc[ind, 'matched_middel_variable'] = pd_result.loc[ ind, 'matched_variable'] pd_result.loc[ind, 'matched_variable'] = found_dict.get( pd_result.loc[ind, 'matched_variable']) return pd_result
:param columns: 详见pandas.pivot_table()函数参数说明 :param dropna: 详见pandas.pivot_table()函数参数说明 :param fill_value: 详见pandas.pivot_table()函数参数说明 :return: 返回转换后的宽格式表格 :rtype: pandas.DataFrame """ result = pd.pivot_table(data=dataframe, values=values, index=index, columns=columns, dropna=dropna,fill_value=fill_value) return result if __name__ == '__main__': mcollection = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'), collection_name='provincestat') cursor = mcollection.find({'variable':{'$in':['人均地区生产总值','私人控股企业法人单位数','城镇居民消费','城镇单位就业人员平均工资']}}, projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1,'year':1}) #cursor = mcollection.find({'year':'2010', 'variable':{'$in':['人均地区生产总值','私人控股企业法人单位数','城镇居民消费','城镇单位就业人员平均工资']}}, # projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1}) #cursor = mcollection.find({'variable':'人均地区生产总值','acode':'110000'}, # projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1,'year':1}) mongoconverter = MongoDBToPandasFormat(cursor) # Test first #result = mongoconverter(values='value', index=['year'], columns='variable',dropna=True) result = mongoconverter(values='value', index=['acode','year'], columns='variable',dropna=True) #result = mongoconverter(values='value', index=['acode','year'], columns='variable', # dropna=False, balanced=True) print(result) #result.to_excel('e:/backup/result.xlsx')
fill_value=fill_value) return result if __name__ == '__main__': mcollection = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'), collection_name='provincestat') cursor = mcollection.find( { 'variable': { '$in': ['人均地区生产总值', '私人控股企业法人单位数', '城镇居民消费', '城镇单位就业人员平均工资'] } }, projection={ '_id': 0, 'variable': 1, 'value': 1, 'province': 1, 'acode': 1, 'year': 1 }) #cursor = mcollection.find({'year':'2010', 'variable':{'$in':['人均地区生产总值','私人控股企业法人单位数','城镇居民消费','城镇单位就业人员平均工资']}}, # projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1}) #cursor = mcollection.find({'variable':'人均地区生产总值','acode':'110000'}, # projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1,'year':1}) mongoconverter = MongoDBToPandasFormat(cursor) # Test first #result = mongoconverter(values='value', index=['year'], columns='variable',dropna=True) result = mongoconverter(values='value',