def armed_or_not(self, img_out=False, csv_out=False, path=None): arm_df = self.shoots_df.select(c.ARMED, 'race_name')\ .na.drop(subset=[c.ARMED])\ .na.drop(subset=['race_name']) arm_df = arm_df.withColumn(c.ARMED, udf_convert_armed(c.ARMED))\ .groupby('race_name', c.ARMED)\ .count() for label in ['armed', 'unarmed']: _arm_df = arm_df.filter(col(c.ARMED) == label) arm_dfpd = self_toPandas(_arm_df) if csv_out: self._save_csv(arm_dfpd, f'{path}/{label}.csv') if img_out: fig = go.Figure(data=[ go.Pie(labels=arm_dfpd['race_name'], values=arm_dfpd['count'], hole=.3, textinfo='label+percent', insidetextorientation='radial') ]) fig.update_layout(title_text=f'USA: {label}', title_x=0.5) fig.write_image(f'{path}/{label}.png')
def crimes_per_state(self, img_out=False, csv_out=False, path=None): shoot_state = self.shoots_df.select(c.STATE)\ .groupby(c.STATE)\ .count()\ .sort(col('count')) shoot_state = self_toPandas(shoot_state) if csv_out: self._save_csv(shoot_state, f'{path}/crimes_per_states.csv') if img_out: fig = go.Figure( go.Bar( y=shoot_state[c.STATE], x=shoot_state['count'], orientation='h', text=shoot_state['count'], textposition='outside', marker_color=shoot_state['count'], )) fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_layout(title_text='Fatal Killing - All States', yaxis_title='States', xaxis_title='Total number of victims', title_x=0.5, height=1000) fig.write_image(f'{path}/crimes_per_states.png')
def kills_per_year(self, img_out=False, csv_out=False, path=None): year_shoot = self.shoots_df.select('year')\ .groupby('year')\ .count()\ .sort(col('year')) year_shoot = self_toPandas(year_shoot) if csv_out: self._save_csv(year_shoot, f'{path}/kills_year_sp.csv') if img_out: fig = go.Figure(data=go.Scatter(x=year_shoot['year'], y=year_shoot['count'], mode='lines+markers', marker_color='red')) fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_layout(title_text='Deaths - All Years', xaxis_title='Years', yaxis_title='Total number of kills', title_x=0.5) fig.write_image(f'{path}/kills_year_sp.png')
def flee(self, img_out=False, csv_out=False, path=None): arm_df = self.shoots_df.select(c.FLEE).groupby(c.FLEE).count() arm_df = self_toPandas(arm_df) if csv_out: self._save_csv(arm_df, f'{path}/flee.csv') if img_out: fig = go.Figure(data=[ go.Pie(labels=arm_df['flee'], values=arm_df['count'], hole=.3, textinfo='label+percent', insidetextorientation='radial') ]) fig.update_layout(title_text='Victim Flee?', title_x=0.5) fig.write_image(f'{path}/flee.png')
def monthly(self, img_out=False, csv_out=False, path=None): monthly_df = self.shoots_df.select(c.DATE)\ .withColumn(c.DATE, udf_convert_date_to_string(c.DATE))\ .groupby(c.DATE)\ .count()\ .sort(col(c.DATE)) monthly_df = self_toPandas(monthly_df) month_year = [str(i) for i in monthly_df[c.DATE]] if csv_out: self._save_csv(monthly_df, f'{path}/monthlysp.csv') if img_out: fig = make_subplots( rows=2, cols=1, subplot_titles=('Monthly series', 'Distribution of monthly count')) fig.add_trace(go.Scatter( x=month_year, y=monthly_df['count'], name='Monthly Deaths', mode='lines', ), row=1, col=1) fig.add_trace(go.Box(y=monthly_df['count'], name='Count', marker_color='indianred', boxmean='sd'), row=2, col=1) fig.update_xaxes(title_text='Year', row=1, col=1, showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_xaxes(title_text=' ', row=2, col=1, showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_yaxes(title_text='Number of Victims', row=1, col=1, showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_yaxes(title_text='Number of Victims', row=2, col=1, showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_layout( title_text='Fatal Killing Monthly Count 2015 - 2020', title_x=0.5, showlegend=False, height=1000) fig.write_image(f'{path}/monthlysp.png') return monthly_df
def races(self, img_out=False, csv_out=False, path=None): shoot_race = self.shoots_df.groupby(['year', 'month_num', 'race_name']).count() shoot_race = self_toPandas(shoot_race) shoot_race['monthly'] = shoot_race['year'].astype( str) + '-' + shoot_race['month_num'].astype(str) if csv_out: self._save_csv(shoot_race, f'{path}/racetoll.csv') if img_out: fig = make_subplots(rows=3, cols=2, subplot_titles=('Black', 'White', 'Hispanic', 'Asian', 'Native American', 'Others')) fig.add_trace(self.plot_month_race(shoot_race, 'Black', 'brown'), row=1, col=1) fig.add_trace(self.plot_month_race(shoot_race, 'White', 'deepskyblue'), row=1, col=2) fig.add_trace(self.plot_month_race(shoot_race, 'Hispanic', 'green'), row=2, col=1) fig.add_trace(self.plot_month_race(shoot_race, 'Asian', 'red'), row=2, col=2) fig.add_trace(self.plot_month_race(shoot_race, 'Native American', 'orange'), row=3, col=1) fig.add_trace(self.plot_month_race(shoot_race, 'Others', 'violet'), row=3, col=2) fig.update_layout(title_text='Deaths - All Race', title_x=0.5) fig.write_image(f'{path}/affrace.png') fig = go.Figure() races = [ 'Black', 'White', 'Hispanic', 'Asian', 'Native American', 'Others' ] colors = ['brown', 'grey', 'green', 'red', 'orange', 'violet'] for race, color in zip(races, colors): fig.add_trace( go.Box(y=shoot_race.loc[( shoot_race['race_name'] == race)]['count'], name=race, marker_color=color, boxmean=True)) fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True) fig.update_layout(title='Death Toll - All Races', title_x=0.5, xaxis=dict(title='Race'), yaxis=dict(title='Number of Victims')) fig.write_image(f'{path}/racetoll.png')