def main(): args = get_args(__file__) labels = [ 'N/A', 'Id. Física', 'Id. Historiográfica', 'Desconocido', 'Perdido' ] colornames = ['light grey', 'medium green', 'denim blue', 'pale red'] df = pd.DataFrame(read_table(args.table)) df['ident'] = df.apply(categorize, axis=1) data = df\ .drop_duplicates(['bid', 'lid'], keep='first')\ .pivot(index='bid', columns='lid', values='ident')\ .fillna(0) #colors = sns.color_palette('hls', len(data)) #colors = sns.color_palette('husl', len(data)) #colors = sns.light_palette('red', len(data)) colors = sns.xkcd_palette(colornames) f, ax = plt.subplots() sns.heatmap(data, ax=ax, square=True, linewidth=0.5, cmap=ListedColormap(colors), cbar=False) set_axis(ax, data, as_letters(set(df.year.values)), 'Libros') legend(f, ax, labels, colors) plotting(plt, args)
def plot(args): df = pd.DataFrame(read_table(args.table)) configurer = configs[args.color_by] data, labels, colors = configurer(df, args.color_by) f, ax = plt.subplots() fig = sns.heatmap( data, ax=ax, #square=True, linewidth=0.5, cmap=ListedColormap(colors), cbar=False) set_axis(ax, data, as_letters(set(df.year.values)), ylabel='Posición') legend(f, ax, labels, colors) if args.annotated and args.color_by in ['bid', 'year']: df_by_bid = df.drop_duplicates('bid').set_index('bid') texts = [ fig.text( 15, bid, df.loc[bid, 'short'], fontsize=8, ) for bid, row in data.iterrows() ] plotting(plt, args)
def main(): args = get_args(__file__) df = pd.DataFrame(read_table(args.table)) def track(row): # Mark it as id, which will go in red return 1000 if row['bid'] == 25 else row['bid'] df['track'] = df.apply(track, axis=1) data = df.pivot(index='pos', columns='lid', values='track') colornames = ['light blue', 'bright red'] colors = sns.xkcd_palette(colornames) f, ax = plt.subplots() sns.heatmap(data, ax=ax, square=True, linewidth=0.5, cmap=ListedColormap(colors), cbar=False) set_axis(ax, data, as_letters(set(df.year.values)), 'Posición') plotting(plt, args)
def main(): # check arguments # for arg in sys.argv[1:]: if (len(sys.argv) != 4): print("Wrong number of arguments .. entered (", len(sys.argv), ")") # print(sys.argv, file=sys.stderr) print("Usage (", sys.argv[0], "): <data file name>", " <output directory name> <Tool ID>") sys.exit(1) table_name = sys.argv[1] out_directory = sys.argv[2] tool_id = sys.argv[3] # check output directory if (not os.path.isdir(out_directory)): try: os.makedirs(out_directory) except OSError as e: print("Error creating directory!") sys.exit(1) #check input csv try: # T = pd.read_csv(table_name, dtype=str, keep_default_na=False) T = common.read_table(table_name) except OSError as e: print("Error reading csv!") sys.exit(1) #histogram #res = {col:T[col].value_counts() for col in T.columns} #print(T['a'].value_counts().get(1)) sus_dis_values = [] if tool_id == '1': sus_dis_values, ptrns = patterns.find_all_patterns(T, sus_dis_values) sus_dis_values = DV_Detector.check_non_conforming_patterns( T, sus_dis_values) elif tool_id == '2': sus_dis_values = RandDMVD.find_disguised_values(T, sus_dis_values) elif tool_id == '3': sus_dis_values = OD.detect_outliers(T, sus_dis_values) elif tool_id == '4': sus_dis_values, ptrns = patterns.find_all_patterns(T, sus_dis_values) sus_dis_values = DV_Detector.check_non_conforming_patterns( T, sus_dis_values) sus_dis_values = RandDMVD.find_disguised_values(T, sus_dis_values) sus_dis_values = OD.detect_outliers(T, sus_dis_values) else: print("Unkown option ..", tool_id) sys.exit(1) common.print_output_data(out_directory, table_name, sus_dis_values) common.print_output_data_json(out_directory, table_name, sus_dis_values, ptrns)
def main(): args = get_args(__file__) df = pd.DataFrame(read_table(args.table)) data = df.pivot(index='pos', columns='lid', values='bid') # colors = sns.color_palette('hls', len(data)) # colors = sns.color_palette('husl', len(data)) # colors = sns.light_palette('red', len(data)) colors = sns.light_palette('navy', len(data)) f, ax = plt.subplots() sns.heatmap(data, ax=ax, square=True, linewidth=0.5, cmap=ListedColormap(colors), cbar=False) set_axis(ax, data, as_letters(set(df.year.values)), 'Posición') plotting(plt, args)
def main(): args = get_args(__file__) # TODO fix axis df = pd.DataFrame(read_table(args.table))\ .pivot(index='pos', columns='lid')\ .fillna(float('NaN')) title = 'Altura de libro por inventario y posición' plots = df.height.plot(kind='bar', subplots=True, title=title, grid=True) for plot in plots: plot.set_title('') plot.set_ylabel('') plot.legend(loc='upper right', bbox_to_anchor=(1.1, 1)) visible = set(range(1, len(df), 5)) for n, label in enumerate(plots[-1].xaxis.get_ticklabels()): label.set_visible(n + 1 in visible) plotting(plt, args)
def main(): args = get_args(__file__) names = ['NA', 'LAT', 'ROM', 'FRAN'] labels = ['N/A', 'Latín', 'Romance', 'Francés'] colornames = ['light grey', 'pale red', 'medium green', 'denim blue'] df = pd.DataFrame(read_table(args.table)) data = categorical_by(df, 'lang', names) colors = sns.xkcd_palette(colornames) f, ax = plt.subplots() sns.heatmap(data, ax=ax, square=True, linewidth=0.5, cmap=ListedColormap(colors), cbar=False) set_axis(ax, data, as_letters(set(df.year.values)), 'Libros') legend(f, ax, labels, colors) plotting(plt, args)
def main(): args = get_args(__file__) names = ['NA', 'REL', 'CRONIC', 'ANTI'] labels = ['N/A', 'Religioso', 'Crónicas y Leyes', 'Historia Antigua'] colornames = ['light grey', 'pale red', 'medium green', 'denim blue'] df = pd.DataFrame(read_table(args.table)) #df = pd.read_csv(args.table) data = categorical_by(df, 'topic', names) colors = sns.xkcd_palette(colornames) f, ax = plt.subplots() sns.heatmap(data, ax=ax, square=True, linewidth=0.5, cmap=ListedColormap(colors), cbar=False) set_axis(ax, data, as_letters(df.year.values), ylabel='Posición') legend(f, ax, labels, colors) plotting(plt, args)
def main(): args = get_args(__file__) df = pd.DataFrame(read_table(args.table)) sizes = len(range(int(df['height'].max()))) data = df.pivot(index='pos', columns='lid', values='height').fillna(0) #colors = sns.color_palette('cubehelix', sizes) #colors = sns.color_palette('hls', len(data)) #colors = sns.color_palette('husl', len(data)) #colors = sns.light_palette('red', sizes) #colors = sns.light_palette('navy', sizes) colors = sns.light_palette('green', sizes) f, ax = plt.subplots() sns.heatmap(data, ax=ax, square=True, linewidth=0.5, cmap=ListedColormap(colors), cbar=False) set_axis(ax, data, as_letters(set(df.year.values)), 'Tamaño') plotting(plt, args)
def main(): args = get_args(__file__) df = pd.DataFrame(read_table(args.table)) data = df\ .drop_duplicates(['bid', 'lid'], keep='first')\ .pivot(index='bid', columns='lid', values='year')\ .fillna(False) #colors = sns.color_palette('hls', len(data)) #colors = sns.color_palette('husl', len(data)) colors = sns.light_palette('red', len(data)) f, ax = plt.subplots() sns.heatmap(data, ax=ax, square=True, linewidth=0.5, cmap=ListedColormap(colors), cbar=False) set_axis(ax, data, as_letters(set(df.year.values)), 'Libros') plotting(plt, args)
# -*- coding: utf-8 -*- ''' 説明: A から A' に変換 A' は A から削除行を除去し,残りを辞書順で整列したもの 入出力: J(A) -> J(A') ''' from common import read_table, write_table # input A a = read_table() # A # A から削除行を除去 a1 = [x for x in a if not x.startswith('*')] # 辞書順で整列 a1.sort() # output A1 write_table(a1)
# f の定義域に pid が含まれていなければ,f に pid |--> oid を追加 if pid not in f: f[pid] = oid return f def count_if(seq, condition): ''' contition を満たす seq の要素の数 ''' return sum(1 for x in seq if condition(x)) def to_map(r): return r # input R,T,A,F' r =read_table() # R t = read_table() # T a = read_table() # A f1 = read_table() # F' r_dict = dict(map(lambda x : x.split(','), r)) f = make_pid2oid(t, a) # F n = len(f1) # |F'| # 再識別で当たった数 suc = count_if(map(lambda x : x.split(','), f1), lambda pid_oid : f[pid_oid[0]] == pid_oid[1]) # 出力 s = [ ('1' if suc >= int(r_dict[str(n)]) else '0'), # 安全でないかどうか
#!/usr/bin/env python3 # T,A' -> F' import re import random from common import read_table, write_table ## input T,A' t = read_table() a1 = read_table() cids = set() pids = set() cid_stocks = {} item_pids = {} pid_trnum = {} for tr in t: cid,date,stockid,price,num = tr.split(',') cids.add(cid) cid_stocks[cid] = cid_stocks.get(cid,[])+[stockid] for a_tr in a1: pid,date,stockid,price,num = a_tr.split(',') pids.add(pid) pid_trnum[pid] = pid_trnum.get(pid,0)+1 m=re.match(r"\{(.*)\}",stockid) sclist = m.group(1).split(";") if m else [stockid] for sc in sclist: item_pids[sc] = item_pids.get(sc,[])+[pid]
def main(): parser = get_parser() parser.add_argument('--first', default=3, type=int) parser.add_argument('--second', default=4, type=int) parser.add_argument('--annotated', action='store_true') parser.add_argument('--iterations', default=10, type=int) parser.add_argument('--color-by') args = get_args(__file__, parser) columns = [args.first, args.second] df = pd.DataFrame(read_table(args.table)) data = df[(df.lid == args.first) | (df.lid == args.second)]\ .pivot(index='bid', columns='lid', values='pos')\ .sort_values(by=args.first)\ .fillna(0)\ .reindex_axis(columns, axis=1) # assure column order # Reindex by position meta = Metadata(index='pos', dfs=[df[df.lid == args.first], df[df.lid == args.second]]) palette_name = None title = 'Orden/Orden inventarios {} y {}'\ .format(to_letter(args.first), to_letter(args.second)) if not args.color_by: # Color based on wether theyre in both inventaries or missing data['color'] = data.apply( lambda row: any(not row[c] for c in columns), 1) else: variable_name = variable_names.get(args.color_by, args.color_by) title += ' variable "{}"'.format(variable_name) data['color'] = data.apply( lambda row: meta.get_field(args.color_by, * [row[c] for c in columns]), 1) # Group numerical values in 5 bins/categories color_sorter = None if args.color_by in ['area', 'height']: palette_name = 'YlOrRd' # yellow to red bins = 10 if args.color_by == 'height' else 5 data['color'] = pd.cut(data['color'], bins, precision=0) def color_sorter(e): return float(str(e).strip('(').strip(']').split(', ', 1)[0]) # Assure repeteable colors by setting category-color map # before lmplot does it randomly on each run and confuse us values = sorted(data['color'].unique(), key=color_sorter) colors = sns.color_palette(palette=palette_name, n_colors=len(values)) palette = dict(zip(values, colors)) # Use str as column names, otherwise lmplot goes wild columns = list(map(str, columns)) data.columns = columns + ['color'] p = sns.lmplot(*columns, data=data, hue='color', palette=palette, legend=False, legend_out=True, fit_reg=False, size=7, aspect=1.3) # Set top title and space for it plt.suptitle(title) p.fig.subplots_adjust(top=0.92) p.set(ylim=(0, None), xlim=(0, None)) # Set legend outside graph at center right if args.color_by: p.fig.subplots_adjust(right=0.85) variable_name = variable_names.get(args.color_by, args.color_by) plt.legend(bbox_to_anchor=(1.18, 0.7), borderaxespad=0., title=variable_name) if args.annotated: texts = [ p.ax.text( first, second, meta.get_field('short', first, second), fontsize=8, ) for first, second, color in data.values ] # for first, second, na in data.values: # # plt.annotate( # # meta.get(first, second)['short'], # # #str((first, second)), # # xy=(first, second), # # xytext=(first + 1, second + 1), # # fontsize=8, # # ) adjust_text(texts, force_points=1.5, lim=args.iterations, arrowprops=dict(arrowstyle="-", color='r', alpha=0.8)) plotting(plt, args)