示例#1
0
 def view_data(pdf):
     # pdf.style.set_properties(**{'width': '300px'})
     field = 'moves_learnt_by_tr' + '_'
     pdf = pdf.filter(regex=f'{field}|name', axis=1)
     # pdf.columns[pdf.columns.str.contains(f'({field}.*|name|generation)', flags=re.I,regex=True)]
     total_rows = len(pdf.index.value_counts())
     unique_rows = len(pdf.drop_duplicates().index.value_counts())
     dup_rows = len(pdf[pdf.duplicated()].index.value_counts())
     percent_duped = (dup_rows / total_rows) * 100
     ps = Enumerable([
         # lambda: pdf.isna().mean().sort_values(ascending=False),
         lambda: pdf.columns,
         lambda: total_rows,
         lambda: unique_rows,
         lambda: dup_rows,
         lambda: percent_duped,
         lambda: pdf.dropna().sample(7),
         # lambda: pdf.sort_values(['name', 'generation', 'moves_learnt_by_level_up_lvl']).drop([], axis=1).sample(5),
         # lambda: pdf.sort_values(['name', 'generation', 'moves_learnt_by_level_up_lvl']).sample(5),
         # lambda: pdf[pdf.duplicated()].sort_values(['name', 'generation'])
     ])
     u.foreach(lambda f: print(f()), ps)
import re
import toml
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import functools as ft
from py_linq import Enumerable

this_dir = os.path.dirname(os.path.realpath(__file__))
config = toml.load(os.path.join(this_dir, 'config.toml'))
u.set_full_paths(config, this_dir)
csv_loc = config['file_locations']['raw_taco_bell']

df: pd.DataFrame = pd.read_csv(csv_loc)  # type: ignore
df.columns = df.columns.str.lower().str.replace('\\s+', '', regex=True)
df.drop(['web-scraper-order', 'web-scraper-start-url', 'category-href'], inplace=True, axis=1)

pd.set_option('display.max_rows', df.shape[0] + 1)
pd.set_option('display.max_columns', df.shape[1] + 1)

pdf = df
ps = Enumerable([
  lambda: pdf.columns,
  lambda: pdf,
])
u.foreach(lambda f: print(f()), ps)

df.to_csv(config['file_locations']['clean_taco_bell'], index=False)