if command=='3': data_name=input('请输入数据的文件名(包含路径),缺省为 .\\data\\data.xlsx. 请输入:') if not data_name: data_name='.\\data\\data.xlsx' try: data=rpt.read_data(data_name) print('已成功导入data.') except Exception as e: print(e) print('data导入失败, 请检查') continue code_name=input('请输入code的文件名(包含路径),缺省为 .\\data\\code.xlsx. 请输入:') if not code_name: code_name='.\\data\\code.xlsx' try: code=rpt.read_code(code_name) print('已成功导入code.') except Exception as e: print(e) print('code导入失败, 请检查') continue cross_qlist=list(sorted(code,key=lambda c: int(re.findall('\d+',c)[0]))) print('-'*20+'题目数:{}个,样本数:{}个'.format(len(code),len(data))+'-'*20) print('题目编码情况如下......\n') for k in cross_qlist: print('{key}: {c}'.format(key=k,c=code[k]['content'])) time.sleep(0.1) break if command=='4': try:
import pandas as pd import numpy as np import matplotlib.pyplot as plt import report as rpt reload(rpt) # 数据d导入 code=rpt.read_code('.\\data\\code.xlsx') data0=pd.read_excel('.\\data\\data.xlsx',encoding='gbk') # 数据清晰 data=data0[(data0['Q5']==1)|(data0['Q5'].isnull())]#清楚自己购买但使用不是自己的人 data=data[data[u'来源详情']==u'直接访问'] ''' Q12=data[code['Q12']['qlist']] Q12.applymap(lambda x:int(x==1)) Q12=Q12.sum() Q12.rename(index=code['Q12']['code'],inplace=True) Q12.sort_values(inplace=True) ''' filename=u'小鲜4真实使用用户1_334' rpt.summary_chart(data,code,filename=filename)
""" # 聚类分析 import numpy as np import pandas as pd import matplotlib.pyplot as plt from scipy import stats #import seaborn as sns; sns.set() import report as rpt from imp import reload reload(rpt) # 数据d导入 code = rpt.read_code('code.xlsx') data = pd.read_excel('data.xlsx') data['Q21a'] = data[code['Q21']['qlist']].T.max().T code['Q21a'] = { 'content': '拍照频率', 'qtype': '单选题', 'qlist': ['Q21a'], 'code': { 1: '几乎不使用', 2: '偶尔用', 3: '经常用' } } from sklearn.cluster import KMeans