示例#1
0
class Layout:
    '''
    类Layout用于各种数据结构的转换。
    
    属性:
    self.ad: AdminCode的一个实例
    self._data: 读入的数据
    self.year:代表数据的时间,是一个str的列表
    self.acode:代表地区的行政代码,是一个str的list
    self.region:代表地区名称,是一个str的list
    self.variable:代表数据的变量,是一个str的list
    self.ndim:代表数据的维度,是一个字典
    
    方法:
    __init__(self,data=None):构造函数,用来进行初始化设置。
    _type(self)->dict:辅助函数,用来返回数据的结构类型,无输入参数。返回值是一个字典。:
    stackToNormal(self):转换数据,从stack格式到normal格式

    Demo:
    转换区域数据的格式,从stack到normal
    ad = AdminCode()
    rdata = RegionalData()    # 构建一个RegionalData的实例
    mdata = rdata.query(region=ad[u'浙江',u'f'],variable=[u'财政支出'],year=2012)    # 查询数据
    lout = Layout(mdata)    # 进行格式转换
    print(lout.stackToNormal())

    得到的结果类似于
           region   财政支出
    330100    杭州市  78628
    330200    宁波市  82844
    330300    温州市  38779
    330400    嘉兴市  26070
    '''
    # 构造函数
    def __init__(self,data=None):
        self.ad = AdminData()
        self._data = data

        self.tags = self._type()
        self.ndim = {'year':len(self.tags['year']),'variable':len(self.tags['variable']),'region':len(self.tags['region'])}

    # 格式转换
    def stackToNormal(self):
        #  构建时间序列数据
        if (self.ndim['variable'] == 1) and (self.ndim['region'] == 1):
            #data = pd.Series(dict(zip(year,[group for name, group in self._data.groupby(['acode','variable'],sort=True)][0]['value'])))
            #d = dict(zip(year,data))
            #sname = '|'.join([self.region[0],self.variable[0]])
            #self.tags = [self.region[0],self.variable[0]]
            #self.tags = {'region':self.region[0],'variable':self.variable[0]}
            return pd.Series(dict(zip(self.year,[group for name, group in self._data.groupby(['acode','variable'],sort=True)][0]['value'])))

        # 构建横截面数据(地区|变量)
        if self.ndim['year'] == 1:
            g1 = [group for name,group in self._data.groupby(['year'], sort=True)][0]
            g2 = g1.groupby(['variable'], sort=True)
            i = 0
            mdata = []
            for name,data in g2:
                rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                if i == 0:
                    mdata = rdata
                else:
                    mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                i = i + 1
            mdata.insert(0, 'region', [self.ad.get_by_acode(item)[0]['region'] for item in mdata.index])
            #self.information = self.type['year']
            #self.tags = {'region':}
            return mdata

        # 构建横截面数据(地区|时间)
        if self.ndim['variable'] == 1:
            g = self._data.groupby(['year'], sort=True)
            i = 0
            mdata = []
            for name,data in g:
                rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                if i == 0:
                    mdata = rdata
                else:
                    mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                i = i + 1
            mdata.insert(0, 'region', [self.ad.get_by_acode(item)[0]['region'] for item in mdata.index])
            #self.information = self.type['variable']
            return mdata

        # panel data
        g = self._data.groupby(['year'], sort=True)
        year = []
        pdata =[]
        for y,g1 in g:
            g2 = g1.groupby(['variable'], sort=True)
            i = 0
            mdata = []
            for name,data in g2:
                rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                if i == 0:
                    mdata = rdata
                else:
                    mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                i = i + 1
            mdata.insert(0, 'region', [self.ad.get_by_acode(item)[0]['region'] for item in mdata.index])
            year.append(str(y))
            pdata.append(mdata)
        result = pd.Panel(dict(zip(year,pdata)))
        #self.information = []
        return result

    # 辅助函数,返回数据结构
    def _type(self)->dict:
        g = self._data.groupby(['year'], sort=True)
        self.year = [str(name) for name, group in g]

        g = self._data.groupby(['acode'], sort=True)
        self.acode = [name for name, group in g]
        self.region = [self.ad.get_by_acode(item)[0]['region'] for item in self.acode]

        g = self._data.groupby(['variable'], sort=True)
        self.variable = [name for name, group in g]

        return {'year':self.year,'region':self.region,'variable':self.variable}
示例#2
0
class RegionFormat(Format):
    '''RegionFormat类用来进行区域数据格式转换

    :param list data: 区域数据
    '''
    def __init__(self,data):
        Format.__init__(self,data)
        self.ad = AdminData()
        self.to_set_type()

    # 转换格式
    def transform(self,sourcetype='stack',targettype='normal',connect='outer'):
        if (re.match('stack',sourcetype) is not None) and (re.match('normal',targettype) is not None):
            return self.stack_to_normal(connect=connect)

    def stack_to_normal(self,connect):
        '''转换区域数据格式,从stack格式到normal格式

        :param str connect: 区域横截面数据连接方式,可以分别是'inner'或'outer'
        :return: dict
        '''
        scale = self.ndim.get('scale')
        # 当存在全市和市辖区的差别的时候
        if (scale is not None) and scale > 1:
            # 构建横截面数据(地区|变量)
            if self.ndim['year'] == 1:
                g1 = [group for name,group in self._data.groupby(['year'], sort=True)][0]
                region_dict = dict(zip(g1['acode'],g1['region']))
                g2 = g1.groupby(['variable','scale'], sort=True)
                i = 0
                mdata = []
                for name,data in g2:
                    rdata = pd.DataFrame({'_'.join(name):data['value'].values},index=data['acode'].values)
                    if i == 0:
                        mdata = rdata
                    else:
                        mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how=connect)
                    i = i + 1
                tags = {'year',self.year[0]}
                mdata.insert(0, 'region', [region_dict[i] for i in mdata.index])
                return {'tags':tags,'data':mdata}

            # panel data
            g = self._data.groupby(['year'], sort=True)
            year = []
            pdata =[]
            for y,g1 in g:
                region_dict = dict(zip(g1['acode'],g1['region']))
                g2 = g1.groupby(['variable','scale'], sort=True)
                i = 0
                mdata = []
                for name,data in g2:
                    rdata = pd.DataFrame({'_'.join(name):data['value'].values},index=data['acode'].values)
                    if i == 0:
                        mdata = rdata
                    else:
                        mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                    i = i + 1
                mdata.insert(0, 'region', [region_dict[i] for i in mdata.index])
                year.append(str(y))
                pdata.append(mdata)
            result = pd.Panel(dict(zip(year,pdata)))
            mdata = result.swapaxes('items','minor')
            mdata = mdata.swapaxes('major','minor')
            mdata = mdata.to_frame(False)
            result2 = result.dropna(axis=1)
            mdata2 = result2.swapaxes('items','minor')
            mdata2 = mdata2.swapaxes('major','minor')
            mdata2 = mdata2.to_frame(False)
            return {'data':mdata,'pdata':result,'balanceddata':mdata2}
        # 如果没有scale,或者scale为1
        else:
            # 时间序列模型
            if (self.ndim['variable'] == 1) and (self.ndim['region'] == 1):
                tags = {'variable':self.variable[0],'region':self.ad.get_by_acode(self.acode[0])[0]['region']}
                result = pd.Series(dict(zip(self.year,[group for name, group in self._data.groupby(['acode','variable'],sort=True)][0]['value'])))
                if scale is not None:
                    tags['scale'] = self.scale[0]
                return {'tags':tags,'data':result}

            # 构建横截面数据(地区|变量)
            if self.ndim['year'] == 1:
                g1 = [group for name,group in self._data.groupby(['year'], sort=True)][0]
                region_dict = dict(zip(g1['acode'],g1['region']))
                g2 = g1.groupby(['variable'], sort=True)
                i = 0
                mdata = []
                for name,data in g2:
                    rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                    if i == 0:
                        mdata = rdata
                    else:
                        mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                    i = i + 1
                mdata.insert(0, 'region', [region_dict[i] for i in mdata.index])
                tags = {'year':self.year[0]}
                if scale is not None:
                    tags['scale'] = self.scale[0]
                return {'tags':tags,'data':mdata}

            # panel data
            g = self._data.groupby(['year'], sort=True)
            year = []
            pdata =[]
            for y,g1 in g:
                region_dict = dict(zip(g1['acode'],g1['region']))
                g2 = g1.groupby(['variable'], sort=True)
                i = 0
                mdata = []
                for name,data in g2:
                    rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                    if i == 0:
                        mdata = rdata
                    else:
                        mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                    i = i + 1
                mdata.insert(0, 'region', [region_dict[i] for i in mdata.index])
                year.append(str(y))
                pdata.append(mdata)

            print(pdata)
            result = pd.Panel(dict(zip(year,pdata)))
            mdata = result.swapaxes('items','minor')
            mdata = mdata.swapaxes('major','minor')
            mdata = mdata.to_frame(False)
            result2 = result.dropna(axis=1)
            mdata2 = result2.swapaxes('items','minor')
            mdata2 = mdata2.swapaxes('major','minor')
            mdata2 = mdata2.to_frame(False)
            if scale is not None:
                tags = {'scale':self.scale}
                return {'tags':tags,'data':mdata,'pdata':result,'balanceddata':mdata2}
            else:
                return {'data':mdata,'pdata':result,'balanceddata':mdata2}

    # 辅助函数,返回数据结构
    def to_set_type(self):
        '''辅助函数,用来设置区域数据的参数

        :return: 无返回值
        '''
        # 设定年份
        g = self._data.groupby(['year'], sort=True)
        self.year = [str(name) for name, group in g]

        # 设定行政区域代码
        g = self._data.groupby(['acode'], sort=True)
        self.acode = [name for name, group in g]
        self.number_of_region = len(g.groups)

        # 设定变量
        g = self._data.groupby(['variable'], sort=True)
        self.variable = [name for name, group in g]

        # 设定变量的维度
        self.ndim = {'year':len(self.year),'variable':len(self.variable),'region':self.number_of_region}

        # 设定区域尺度:全市或者市辖区
        if 'scale' in self._data.columns:
            g = self._data.groupby(['scale'],sort=True)
            self.scale = [name for name, group in g]
            self.ndim['scale'] = len(self.scale)
示例#3
0
from werkzeug.datastructures import ImmutableMultiDict
import json

year = list(range(1990, 2015))
print(year)

# 利用CEIC的数据来做Demo
# 1. 导入CEIC数据
db = Database()
con = db.connect("regionDB", "CEIC")
ceic_region_code = sorted(con.find().distinct("acode"))
print(len(ceic_region_code))

# 2. 搜索行政区划代码数据库
admin_data = AdminData()
regions = [admin_data.get_by_acode(acode=acode)[0] for acode in ceic_region_code]

# 3. 生成区域行政数据
region_list = []
for region in regions:
    # 第一个元素是行政区划代码
    if region["adminlevel"] < 3:
        parent = u"中国"
    else:
        parent = admin_data.database.collection.find_one({"_id": region["parent"]})
        parent = "/".join(["中国", "".join([parent["region"], "属下"])])
    region_list.append([region["acode"], parent, region["region"]])
print(region_list)
# json.dump(region_list,fp=open('e:/gitwork/application/testweb/region_ceic.txt', 'w'))

# 4. 查询变量