def ptt_zyclass_xktype(self): # benke zyclass-xktype-count dt = self.df_class_type_count_bk[['zyclass', 'xk0'] + list(self.field_dict.keys())] dt = dt.astype({fs: int for fs in list(self.field_dict.keys())+['xk0']}) dt.zyclass = dt.zyclass.apply(lambda x: self.zyclass_name_bk[int(x[0:2])]) dt.loc[:, 'xk_sum'] = sum(dt[fs] for fs in ['xk0']+list(self.field_dict.keys())) dsum = pd.DataFrame(dt.sum()) dsum.loc['zyclass'] = 'total' dsum = dsum.unstack().unstack() # print(dsum) dt = dt.append(dsum) align_dict = {fs: 'r' for fs in list(self.field_dict.keys())+['xk0']} align_dict.update({'zyclass': 'l', 'xk_sum': 'r'}) print(ptt.make_page(dt, title='xk type count for benke', align=align_dict)) dtt = pd.DataFrame(self.df_class_type_count_bk.sum()).unstack().unstack() dtt.zyclass = dtt.zyclass.apply(lambda x: 'total') dt2 = pd.concat([self.df_class_type_count_bk, dtt]) dt2.zyclass = dt2.zyclass.apply(lambda x: self.zyclass_name_bk[int(x[0:2])] if x != 'total' else x) dt2 = dt2.astype({fs: int for fs in dt2.columns.values if fs != 'zy' + 'class'}) print(ptt.make_page(dt2, title='all zy count', align={fs: 'l' if fs=='zyclass' else 'r' for fs in dt2.columns}))
def find_zhuanye(self, lowpos=0, highpos=1000000, xxfilterlist=('', ), zyfilterlist=('', )): # align = dict() if align is None else if self.dflq is None: return pd.DataFrame() xxfilterfun = select_filter(xxfilterlist, '') zyfilterfun = select_filter(zyfilterlist, '') df = self.dflq[self.dflq.YXMC.apply(xxfilterfun) & self.dflq.ZYMC.apply(zyfilterfun) & \ (self.dflq.WC >= lowpos) & (self.dflq.WC <= highpos)].\ groupby(['YXDH', 'ZYDH'])[['WC', 'YXMC', 'ZYMC']].max() if len(df) > 0: print( ptt.make_page(df.sort_values('WC'), ''.join(zyfilterlist), align={ 'YXMC': 'l', 'ZYMC': 'l', 'WC': 'r' })) else: print('no record found in pos {}--{} for xx={} zy={}'.format( lowpos, highpos, xxfilterlist, zyfilterlist)) return # df
def find_tdinfo_from_yxname(self, xxsubstr=('医学', ), kl='wk', cc='bk'): ffun = closed_filter(xxsubstr) if ffun is False: return # df1, df2, df3 = None, None, None if cc == 'bk': print('2016p1---') df1 = self.td16bk1[self.td16bk1.xx.apply(ffun)][['xx', 'wkpos', 'lkpos']].\ sort_values(by=('lkpos' if kl == 'lk' else 'wkpos')) print(ptt.make_page(df1, '2016p1')) print('2016p2---') df2 = self.td16bk2[self.td16bk2.xx.apply(ffun)][['xx', 'wkpos', 'lkpos']].\ sort_values(by='lkpos' if kl == 'lk' else 'wkpos') print(ptt.make_page(df2, '2016p2')) print('2017bk---') df3 = self.td17bk[self.td17bk.xx.apply(ffun)][['xx', 'wkpos', 'lkpos']].\ sort_values(by='lkpos' if kl == 'lk' else 'wkpos') print(ptt.make_page(df3, '2017bk', align={'xx': 'l'})) print('2018bk---') df3 = self.td18bk[self.td18bk.xx.apply(ffun)][['xx', 'wkpos', 'lkpos']].\ sort_values(by='lkpos' if kl == 'lk' else 'wkpos') print(ptt.make_page(df3, '2018bk', align={'xx': 'l'})) else: # print('2016zk---') df = self.td16zk[self.td16zk.xx.apply(ffun)][['xx', 'wkpos', 'lkpos']].\ sort_values(by=('lkpos' if kl == 'lk' else 'wkpos')) print(ptt.make_page(df, title='2016 zk', align={'xx': 'l'})) # print('2017zk---') df = self.td17zk[self.td17zk.xx.apply(ffun)][['xx', 'wkpos', 'lkpos']].\ sort_values(by='lkpos' if kl == 'lk' else 'wkpos') print(ptt.make_page(df, title='2017 zk', align={'xx': 'l'})) # print('2018zk---') df = self.td18zk[self.td18zk.xx.apply(ffun)][['xx', 'wkpos', 'lkpos']].\ sort_values(by='lkpos' if kl == 'lk' else 'wkpos') print(ptt.make_page(df, title='2018 zk', align={'xx': 'l'})) return # df1, df2, df3
def find_wc_from_score(self, score=500, scope=0, year=18, kl='wk'): df = None if year == 18: if kl.lower() in 'wk, lk': df = self.fd2018pt if kl.lower() == 'ys': df = self.fd2018ystk_zhf elif year == 19: if kl.lower() in 'wk, lk': df = self.fd2019pt if kl.lower() == 'ys': df = self.fd2019ystk_zhf if df is None: print('no fd data found for kl={} year={}!'.format(kl, year)) print('year in [{}] kl in [{}]'.format('18,19', 'wk,lk, msw, msl')) return fdv = df[df.fd.apply(lambda x: score - scope <= x <= score + scope)] if len(fdv) > 0: print( ptt.make_page(fdv, title=str('score on {} with scope {}'.format( score, scope)))) else: print('not founded data for score={}!'.format(score))
def find_tdinfo_from_wc(self, low, high, selecter=(''), filter=('', ), kl='wk', cc='bk', align=None): posfield = 'wkpos' if kl == 'wk' else 'lkpos' align = dict() if align is None else align if cc == 'bk': # print('2016pc1---') df1 = self.get_df_from_pos(self.td16bk1, lowpos=low, highpos=high, posfield=posfield, selecter=selecter, filter=filter, kl=kl) # print('2016pc2---') df2 = self.get_df_from_pos(self.td16bk2, lowpos=low, highpos=high, posfield=posfield, selecter=selecter, filter=filter, kl=kl) # print('2017---') df3 = self.get_df_from_pos(self.td17bk, lowpos=low, highpos=high, posfield=posfield, selecter=selecter, filter=filter, kl=kl) df3.loc[:, 'xxh'] = df3.xx.apply(lambda x: str(x)[0:4]) # print('2018---') df4 = self.get_df_from_pos(self.td18bk, lowpos=low, highpos=high, posfield=posfield, selecter=selecter, filter=filter, kl=kl) else: # print('2016zk---') df1 = self.get_df_from_pos(self.td16zk, lowpos=low, highpos=high, posfield=posfield, selecter=selecter, filter=filter, kl=kl) df2 = df1.head(0) # print('2017zk---') df3 = self.get_df_from_pos(self.td17zk, lowpos=low, highpos=high, posfield=posfield, selecter=selecter, filter=filter, kl=kl) # print('2018zk---') df4 = self.get_df_from_pos(self.td18zk, lowpos=low, highpos=high, posfield=posfield, selecter=selecter, filter=filter, kl=kl) if kl == 'lk': df1 = df1.rename(columns={'lkjh': 'lkjh16', 'lkpos': 'lkpos16'}) df2 = df2.rename(columns={ 'lkjh': 'lkjh16p2', 'lkpos': 'lkpos16p2' }) df3 = df3.rename(columns={'lkjh': 'lkjh17', 'lkpos': 'lkpos17'}) df4 = df4.rename(columns={'lkjh': 'lkjh18', 'lkpos': 'lkpos18'}) else: df1 = df1.rename(columns={'wkjh': 'wkjh16', 'wkpos': 'wkpos16'}) df2 = df2.rename(columns={ 'wkjh': 'wkjh16p2', 'wkpos': 'wkpos16p2' }) df3 = df3.rename(columns={'wkjh': 'wkjh17', 'wkpos': 'wkpos17'}) df4 = df4.rename(columns={'wkjh': 'wkjh18', 'wkpos': 'wkpos18'}) for df in [df1, df2, df3, df4]: df.loc[:, 'xxh'] = df.xx.apply(lambda x: x[0:4]) df1 = df1.drop(labels=['xx'], axis=1) df2 = df2.drop(labels=['xx'], axis=1) df3 = df3.drop(labels=['xx'], axis=1) dfmerge = pd.merge(df4, df3, on='xxh', how='outer') dfmerge = pd.merge(dfmerge, df1, on='xxh', how='outer') # [outfields] if df2['xxh'].count() > 0: dfmerge = pd.merge(dfmerge, df2, on='xxh', how='outer') # [outfields] dfmerge = dfmerge.fillna('-1') f_type = { k: int for k in dfmerge.columns if ('jh' in k) or ('pos' in k) } dfmerge = dfmerge.astype(dtype=f_type) dfmerge = dfmerge.drop(labels='xxh', axis=1) dfmerge = dfmerge.query('xx != "-1"') print(ptt.make_page(dfmerge, title='data 16-18', align={'xx': 'l'})) return # dfmerge
def xk_stats(xkfile=data_path_dell+'xk/xk_type_zycount.csv', plot_pie=False, ptt_zycount=False, ): xk_name = ['xk0', 'xk1', 'xk2', 'xk3', 'xk21', 'xk31'] xk_label = ['0', '1', '2', '3', '1/2', '1/3'] xk_subject = ['d', 'h', 'l', 's', 'w', 'z'] xk_sub_cb = cb(xk_subject, 3) dc = pd.read_csv(data_path_dell+'xk/xk_zyclass_zycount.txt') dc = dc.fillna(0) zyclass_name = [x for x in dc.zyclass if x not in ('total','ratio')] zyclass_name[0] = '00实验基地班' zyclass_name[-1] = '88中外合作' # print(zyclass_name) # read zy class type data dzy = pd.read_csv(xkfile, dtype={'zyclass': str}) dzy = dzy.fillna(0) zyfield=list(dzy.columns.values) zyfield.remove('zyclass') zy_xk_series = dzy[zyfield].sum() xk_comb_dict = {} for xs in xk_sub_cb: zynum = zy_xk_series['xk0'] for t in zy_xk_series.index: if '_' not in t: continue xktype = t[0:t.find('_')] xksubs = t[t.find('_')+1:] if xktype in 'xk1,xk2,xk3': xss = ''.join(xs) # print(xksubs, xss) if xksubs in xss: print(xss,t,zy_xk_series[t]) zynum += zy_xk_series[t] elif xktype in 'xk21, xk31': if len(set(xs) & set(xksubs)) > 0: print(xs, t, zy_xk_series[t]) zynum += zy_xk_series[t] xk_comb_dict.update({xs: zynum}) print('km-{} zycount={}'.format(xs, zynum)) dzyp = dzy[dzy.zyclass < '50'] # benke zy_xk_series_bk = dzyp[zyfield].sum() field_dict = { 'xk1': [x for x in dzy.columns if 'xk1_' in x], 'xk2': [x for x in dzy.columns if 'xk2_' in x], 'xk3': [x for x in dzy.columns if 'xk3_' in x], 'xk21': [x for x in dzy.columns if 'xk21_' in x], 'xk31': [x for x in dzy.columns if 'xk31_' in x]} dtemp = dzy.copy() dtemp2 = dzyp.copy() for fs in field_dict.keys(): dtemp.loc[:, fs] = sum(dzy[fd] for fd in field_dict[fs]) dtemp2.loc[:, fs] = sum(dzyp[fd] for fd in field_dict[fs]) dzy = dtemp dzyp = dtemp2 # count for xk_type xk_count = dzy[xk_name].sum(axis=0) zyclass_count = dzy[xk_name].sum(axis=1) type_name = dzy.zyclass xk_count_bk = dzyp[xk_name].sum() zyclass_count_bk = dzyp[xk_name].sum(axis=1) type_name_bk = dzyp.zyclass xk_count_zk = [x-y for x,y in zip(xk_count, xk_count_bk)] zyclass_count_zk = [x-y for x,y in zip(zyclass_count, zyclass_count_bk)] # print(xk_count_zk) if plot_pie: plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams.update({'font.size': 16}) plt.figure(u'选科专业统计') plt.subplot(131) plt.pie(xk_count, labels=xk_label, autopct='%1.2f%%') plt.title(u'全部选科专业') plt.subplot(132) plt.pie(xk_count_bk, labels=xk_label, autopct='%1.2f%%') plt.title(u'本科选科专业') plt.subplot(133) plt.pie(xk_count_zk, labels=xk_label, autopct='%1.2f%%') plt.title(u'专科选科专业') if ptt_zycount: print(ptt.make_page(dzy[['zyclass']+xk_name], title='all zy count')) print(ptt.make_page(dzyp[['zyclass']+xk_name], title='benke zy count')) return zy_xk_series, zy_xk_series_bk
def print_xk(self): print(ptt.make_page(self.df_class_type_count[['zyclass'] + self.xk_type_name], title='all zy count')) print(ptt.make_page(self.df_class_type_count_bk[['zyclass'] + self.xk_type_name], title='benke zy count'))