Пример #1
0
def plot_time_distribution_by_bib_numbers(df):
    '''
    This function display the time distribution of runners according to the BIB numbers

    Parameters
        - df: DataFrame containing data to use to generate the graph
    '''

    x = 'number'
    y = 'time'
    ax = df.plot(kind='scatter', x=x, y=y, xlim=(-1000, 18000))
    ax.set(xlabel=x.capitalize(), ylabel=y.capitalize())
    lines = [-200, 2000, 8800, 9800, 17000]
    annotations = [('10 km', 12500), ('21 km', 4500), ('42 km', 150)]
    for line in lines:
        ax.axvline(line, color='b', linestyle='--')
    for annotation in annotations:
        annotation_obj = [annotation[0], (0, 0), (annotation[1], 28000)]
        ax.annotate(annotation_obj[0],
                    annotation_obj[1],
                    annotation_obj[2],
                    color='b')
    plt.yticks(ax.get_yticks(), [
        study_utils.convert_seconds_to_time(label)
        for label in ax.get_yticks()
    ])
    plt.title('Running time according to BIB number of participants')
    plt.show()
Пример #2
0
def plot_time_difference_distribution(data):
    '''
    display a histogram showing the time difference bewteen team members and best time of the team.
    
    Parameters
        - data: DataFrame containing the data relative to a given running.
    '''

    ax = data['time difference team'].hist(bins=30, figsize=(10, 6))

    # Computing of the mean and max for bowplot.
    mean = np.mean(data['time difference team'])
    max_time_diff = np.max(data['time difference team'])

    # Display of the median and title
    ax.axvline(mean, 0, 1750, color='r', linestyle='--')
    ax.set_title(
        'Distribution of runners according to difference of time with the best runner in team'
    )
    ax.set_xlabel('Difference of time')
    ax.set_ylabel('Number of runners')

    # Display of x ticks in HH:mm:ss format
    plt.xticks(ax.get_xticks(), [
        study_utils.convert_seconds_to_time(label)
        for label in ax.get_xticks()
    ])

    # Calculation and display of age distribution statistics by gender
    time_stats = 'Mean difference of time: ' + study_utils.convert_seconds_to_time(
        mean
    ) + '\n' + 'Maximum difference of time: ' + study_utils.convert_seconds_to_time(
        max_time_diff)
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    ax.text(.95,
            .95,
            time_stats,
            fontsize=11,
            transform=ax.transAxes,
            va='top',
            ha='right',
            bbox=props,
            multialignment='left')
Пример #3
0
def plot_time_distribution(ax, running, name):
    '''
    This function create a subplot containing the time distribution for a given age, and for the 3 types of runnings (10 km, 21km, 42km).

    Parameters
        - ax: subplot to use for the histogram (Matplotlib axes)
        - running: data for a given set of participants of same age (DataFrameGroupby)
        - name: name of the category
    '''

    # Creation of histogram
    running_10k = running[running['distance (km)'] == 10]
    race_10k = running_10k['time'].tolist()
    running_21k = running[running['distance (km)'] == 21]
    race_21k = running_21k['time'].tolist()
    running_42k = running[running['distance (km)'] == 42]
    race_42k = running_42k['time'].tolist()
    ax.hist([race_10k, race_21k, race_42k],
            bins=30,
            stacked=True,
            rwidth=1.0,
            label=['10 km', '21 km', '42 km'])
    ax.legend()
    ax.set_ylabel('Number of Runners')
    ax.set_title('Time distribution (' + str(name) + ')')
    ax.xaxis.set_label_coords(1.15, -0.025)

    # Creation of texts
    total_10k = len(race_10k)
    total_10k_str = '10 km: ' + str(total_10k) + ' runners'
    total_21k = len(race_21k)
    total_21k_str = '21 km: ' + str(total_21k) + ' runners'
    total_42k = len(race_42k)
    total_42k_str = '42 km: ' + str(total_42k) + ' runners'
    total = len(running['time'].tolist())
    total_str = 'Total: ' + str(total) + ' runners'
    stats_str = total_10k_str + '\n' + total_21k_str + '\n' + total_42k_str + '\n' + total_str
    plt.xticks(ax.get_xticks(), [
        study_utils.convert_seconds_to_time(label)
        for label in ax.get_xticks()
    ],
               rotation=90)
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    ax.annotate(stats_str,
                xy=(0, 1),
                xytext=(12, -12),
                va='top',
                xycoords='axes fraction',
                textcoords='offset points',
                bbox=props)
Пример #4
0
def plot_scatter_difference_time_number(fig,
                                        data,
                                        distance,
                                        subplot_idx,
                                        annotation=[],
                                        time_mini=1000):
    '''
    This function plots the difference time between team members who have finished late compared to the best time of the team.

    Parameters
        - fig: Figure on which subplots are displayed
        - data: DataFrame containing the data relative to a given running
        - distance: number of kilometers of the considered running (10/21/42)
        - subplot_idx: Index of the subplot in the figure
        - annotation: Annotation to add in the graph (by default, no annotation)
        - time_mini: Minimal time to consider (how much runners are late compare to the first)
                    (1000 by default) 
    '''

    # select runner in teams and with the selected distance
    race_team = data[(data['team'].notnull())
                     & (data['distance (km)'] == distance)]

    # Remove of times which are lower than minimal time considered
    race_team = (race_team[race_team['time difference team'] > time_mini])

    # Remove of teams with only one runner
    for team in race_team['team']:
        race_team_selected = race_team[race_team['team'] == team]
        if len(race_team_selected['team']) == 1:
            race_team = race_team[race_team['team'] != team]

    # Map team name with team number
    team_label_encode = preprocessing.LabelEncoder()
    team_label = team_label_encode.fit_transform(race_team['team'])
    race_team['team_code'] = team_label

    # Computation of runners in pair
    number_runner_in_pair = race_team.apply(study_utils.compute_pair_runner,
                                            args=(race_team, 60),
                                            axis=1)
    counter_pair = Counter(number_runner_in_pair)

    # Plotting the results
    plot = fig.add_subplot(subplot_idx)
    sns.swarmplot(x='team_code',
                  y='time difference team',
                  hue='sex',
                  data=race_team,
                  ax=plot)
    plot.set_title('Distance: ' + str(distance) + ' km')
    plot.set_xlabel('')
    plot.set_ylabel('')
    plot.legend(loc='upper left')

    # Add annotation if any given
    if len(annotation) != 0:
        if subplot_idx == annotation[0]:
            plot.annotate(annotation[1],
                          annotation[2],
                          annotation[3],
                          arrowprops=dict(facecolor='red', shrink=0.05))

    # Manage of legends
    if subplot_idx != 311:
        plot.legend_.remove()
    if subplot_idx == 312:
        plot.set_ylabel('Difference of time with the best runner in the team')
    if subplot_idx == 313:
        plot.set_xlabel('Team number')

    plt.yticks(plot.get_yticks(), [
        study_utils.convert_seconds_to_time(label)
        for label in plot.get_yticks()
    ])
    display_legend(counter_pair, plot)
def plot_time_difference_distribution(
        df,
        title='Time difference with the best runner in team',
        time_difference_column_name='time difference team'):
    '''
    This function displays distribution representing time difference bewteen performance of team members and best performance within the team.
    
    Parameters
        - df: DataFrame containing information on runners
        - title: Title of the graph (by default, 'Time difference with the best runner in team')
        - time_difference_column_name: Name of column containing time differencies (by default, 'time_difference_column_name')

    Return
        - figure: Plotly figure
    '''

    mean_difference = np.mean(df[time_difference_column_name])
    mean_difference_dt = datetime.datetime.strptime(
        study_utils.convert_seconds_to_time(mean_difference), '%H:%M:%S')
    max_difference = np.max(df[time_difference_column_name])
    statistics = [
        'Mean difference of time: ' +
        str(study_utils.convert_seconds_to_time(mean_difference)),
        'Maximum difference of time: ' +
        str(study_utils.convert_seconds_to_time(max_difference))
    ]

    data = df.copy()
    data[time_difference_column_name] = pd.to_datetime([
        study_utils.convert_seconds_to_time(t)
        for t in data[time_difference_column_name]
    ],
                                                       format='%H:%M:%S')
    histogram = [
        go.Histogram(x=data[time_difference_column_name],
                     xbins={
                         'start': np.min(data[time_difference_column_name]),
                         'end': np.max(data[time_difference_column_name]),
                         'size': 5 * 60000
                     })
    ]
    annotations = [
        Annotation(y=1,
                   x=1,
                   text='<br>'.join(statistics),
                   align='right',
                   xref='paper',
                   yref='paper',
                   showarrow=False)
    ]
    shapes = [{
        'type': 'line',
        'yref': 'paper',
        'x0': mean_difference_dt,
        'y0': 0,
        'x1': mean_difference_dt,
        'y1': 1,
        'line': {
            'color': '#f44242',
            'width': 2,
            'dash': 'dash'
        }
    }]
    figure = study_utils.create_plotly_legends_and_layout(
        histogram,
        title=title,
        x_name='Performance gap',
        x_type='date',
        x_format='%H:%M:%S',
        y_name='Number of runners',
        barmode='group',
        bargap=0.1,
        annotations=annotations,
        shapes=shapes)
    plotly.offline.iplot(figure)
    return figure