示例#1
0
class TestCalculate(unittest.TestCase):
    def setUp(self):
        mongo = MongoDB(conn_str='mongodb://*****:*****@139.196.189.191:3717/')
        mdb = MonDatabase(mongodb=mongo, database_name='region')
        self.mcollection = MonCollection(database=mdb, collection_name='cities')

    def test_connect_mongodb(self):
        self.assertEqual(2, len(list(self.mcollection.find())))
class PopCensusDatabase():
    """ 类PopCensusDatabase表示人口普查数据库

    """
    def __init__(self):
        # 连接PopCensus集合
        mongo = MongoDB()
        mdb = MonDatabase(mongodb=mongo, database_name='region')
        self.collection = MonCollection(database=mdb, collection_name='popcensus')

    # 年份
    @property
    def period(self):
        return sorted(self.collection.find().distinct('year'))

    # 年份
    @property
    def variables(self):
        return sorted(self.collection.find().distinct('variable'))
    def search_from_dbase(variables=None,query_dict=None,match='exact'):
        collection_variable = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'),
                                            collection_name='storedvariable')
        found = collection_variable.find(query_dict)
        found_dict = {item['origin']:item['variable'] for item in found}
        if match == 'exact':
            pd_result = VariableMatcher.search_for_same_variable(variables=variables,source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_variable'] = found_dict.get(pd_result.loc[ind, 'matched_variable'])
        else:
            pd_result = VariableMatcher.search_for_similar_variable(variables=variables,source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_middel_variable'] = pd_result.loc[ind, 'matched_variable']
                pd_result.loc[ind,'matched_variable'] = found_dict.get(pd_result.loc[ind,'matched_variable'])

        return pd_result
示例#4
0
class AdminDatabase():
    """ 类AdminDatabase用来连接行政区划数据库

    """
    def __init__(self):
        # 连接AdminDatabase集合
        mongo = MongoDB()
        mdb = MonDatabase(mongodb=mongo, database_name='region')
        self.collection = MonCollection(database=mdb, collection_name='admincode')

    # 查询
    def find(self,**conds):
        # 设置projection
        projection = conds.get('projection')
        if projection is None:
            projection = {'region':1,'version':1,'adminlevel':1,'acode':1,'_id':1,'parent':1}
        else:
            conds.pop('projection')
        # 设置sorts
        sorts = conds.get('sorts')
        if sorts is None:
            sorts= [('year',ASCENDING),('acode',ASCENDING)]
        else:
            conds.pop('sorts')

        # 设置查询条件
        condition = dict()
        for key in conds:
            if isinstance(conds[key],list):
                condition[key] = {'$in':conds[key]}
            else:
                condition[key] = conds[key]

        # 返回查询结果
        return self.collection.find(condition,projection).sort(sorts)

    # 年份
    @property
    def period(self):
        return sorted(self.find().distinct('year'))

    # 版本号
    def version(self,year=None):
        if year is None:
            return sorted(self.find().distinct('version'))
        else:
            return sorted(self.find(year=str(year)).distinct('version'))
示例#5
0
    def search_from_dbase(variables=None, query_dict=None, match='exact'):
        collection_variable = MonCollection(database=MonDatabase(
            mongodb=MongoDB(), database_name='region'),
                                            collection_name='storedvariable')
        found = collection_variable.find(query_dict)
        found_dict = {item['origin']: item['variable'] for item in found}
        if match == 'exact':
            pd_result = VariableMatcher.search_for_same_variable(
                variables=variables, source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_variable'] = found_dict.get(
                    pd_result.loc[ind, 'matched_variable'])
        else:
            pd_result = VariableMatcher.search_for_similar_variable(
                variables=variables, source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_middel_variable'] = pd_result.loc[
                    ind, 'matched_variable']
                pd_result.loc[ind, 'matched_variable'] = found_dict.get(
                    pd_result.loc[ind, 'matched_variable'])

        return pd_result
        :param columns: 详见pandas.pivot_table()函数参数说明
        :param dropna: 详见pandas.pivot_table()函数参数说明
        :param fill_value: 详见pandas.pivot_table()函数参数说明
        :return: 返回转换后的宽格式表格
        :rtype: pandas.DataFrame
        """
        result = pd.pivot_table(data=dataframe, values=values, index=index, columns=columns,
                                dropna=dropna,fill_value=fill_value)

        return result


if __name__ == '__main__':
    mcollection = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'),
                                collection_name='provincestat')
    cursor = mcollection.find({'variable':{'$in':['人均地区生产总值','私人控股企业法人单位数','城镇居民消费','城镇单位就业人员平均工资']}},
                              projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1,'year':1})
    #cursor = mcollection.find({'year':'2010', 'variable':{'$in':['人均地区生产总值','私人控股企业法人单位数','城镇居民消费','城镇单位就业人员平均工资']}},
    #                          projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1})
    #cursor = mcollection.find({'variable':'人均地区生产总值','acode':'110000'},
    #                          projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1,'year':1})
    mongoconverter = MongoDBToPandasFormat(cursor)

    # Test first
    #result = mongoconverter(values='value', index=['year'], columns='variable',dropna=True)
    result = mongoconverter(values='value', index=['acode','year'], columns='variable',dropna=True)
    #result = mongoconverter(values='value', index=['acode','year'], columns='variable',
    #                        dropna=False, balanced=True)
    print(result)
    #result.to_excel('e:/backup/result.xlsx')

                                fill_value=fill_value)

        return result


if __name__ == '__main__':
    mcollection = MonCollection(database=MonDatabase(mongodb=MongoDB(),
                                                     database_name='region'),
                                collection_name='provincestat')
    cursor = mcollection.find(
        {
            'variable': {
                '$in': ['人均地区生产总值', '私人控股企业法人单位数', '城镇居民消费', '城镇单位就业人员平均工资']
            }
        },
        projection={
            '_id': 0,
            'variable': 1,
            'value': 1,
            'province': 1,
            'acode': 1,
            'year': 1
        })
    #cursor = mcollection.find({'year':'2010', 'variable':{'$in':['人均地区生产总值','私人控股企业法人单位数','城镇居民消费','城镇单位就业人员平均工资']}},
    #                          projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1})
    #cursor = mcollection.find({'variable':'人均地区生产总值','acode':'110000'},
    #                          projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1,'year':1})
    mongoconverter = MongoDBToPandasFormat(cursor)

    # Test first
    #result = mongoconverter(values='value', index=['year'], columns='variable',dropna=True)
    result = mongoconverter(values='value',