Exemplo n.º 1
0
        if command=='3':
            data_name=input('请输入数据的文件名(包含路径),缺省为 .\\data\\data.xlsx. 请输入:')
            if not data_name:
                data_name='.\\data\\data.xlsx'
            try:
                data=rpt.read_data(data_name)
                print('已成功导入data.')
            except Exception as e:
                print(e)
                print('data导入失败, 请检查')
                continue
            code_name=input('请输入code的文件名(包含路径),缺省为 .\\data\\code.xlsx. 请输入:')
            if not code_name:
                code_name='.\\data\\code.xlsx'
            try:
                code=rpt.read_code(code_name)
                print('已成功导入code.')
            except Exception as e:
                print(e)
                print('code导入失败, 请检查')
                continue
            cross_qlist=list(sorted(code,key=lambda c: int(re.findall('\d+',c)[0])))
            print('-'*20+'题目数:{}个,样本数:{}个'.format(len(code),len(data))+'-'*20)
            print('题目编码情况如下......\n')
            for k in cross_qlist:
                print('{key}:  {c}'.format(key=k,c=code[k]['content']))
                time.sleep(0.1)
            break

        if command=='4':
            try: 
Exemplo n.º 2
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import report as rpt
reload(rpt)



#  数据d导入
code=rpt.read_code('.\\data\\code.xlsx')
data0=pd.read_excel('.\\data\\data.xlsx',encoding='gbk')

# 数据清晰
data=data0[(data0['Q5']==1)|(data0['Q5'].isnull())]#清楚自己购买但使用不是自己的人
data=data[data[u'来源详情']==u'直接访问']

'''
Q12=data[code['Q12']['qlist']]
Q12.applymap(lambda x:int(x==1))
Q12=Q12.sum()
Q12.rename(index=code['Q12']['code'],inplace=True)
Q12.sort_values(inplace=True)
'''

filename=u'小鲜4真实使用用户1_334'
rpt.summary_chart(data,code,filename=filename)
Exemplo n.º 3
0
"""

# 聚类分析
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
#import seaborn as sns; sns.set()

import report as rpt
from imp import reload

reload(rpt)

#  数据d导入
code = rpt.read_code('code.xlsx')
data = pd.read_excel('data.xlsx')

data['Q21a'] = data[code['Q21']['qlist']].T.max().T
code['Q21a'] = {
    'content': '拍照频率',
    'qtype': '单选题',
    'qlist': ['Q21a'],
    'code': {
        1: '几乎不使用',
        2: '偶尔用',
        3: '经常用'
    }
}

from sklearn.cluster import KMeans