Exemplo n.º 1
0
 def __init__(self, project_info, token):
     """
   Input
     - project_info: project information data
     - token: tracker access token
 """
     super(PivotalTrackerAnalyzer, self).__init__()
     self.project_info = project_info
     self.client = TrackerApi(token)
Exemplo n.º 2
0
 def __init__(self, tokens, proj_info):
   super(TestAnalyzer, self).__init__()
   self.tokens = tokens
   self.project_info = proj_info
   self.pt_client = TrackerApi(tokens['pivotal_tracker']['token'])
   with open('cache/log_info.json', 'r') as f_in:
     self.build_info = json.load(f_in)
   self.out_header = 'test_analysis'
   if not self.out_header in os.listdir('results'):
     os.mkdir('results/{}'.format(self.out_header))
   self.out_header = 'results/{}'.format(self.out_header)
Exemplo n.º 3
0
 def __init__(self, project_info):
     """
   Tokens are read from conf/tokens.json file.
   
   Input
     - project_info: dataset contains all course projects
 """
     super(ProcessSegmentAnalyzer, self).__init__()
     self.project_info = project_info
     with open('conf/tokens.json', 'r') as f_in:
         self.token = json.load(f_in)
     self.gt_analyzer = GithubAnalyzer(self.token['github']['token'],
                                       self.project_info)
     self.gt_client = GithubApi(self.token['github']['token'])
     self.pt_client = TrackerApi(self.token['pivotal_tracker']['token'])
     self.out_header = 'process_segment'
     if not self.out_header in os.listdir('results'):
         os.mkdir('results/{}'.format(self.out_header))
     self.out_header = 'results/{}'.format(self.out_header)
Exemplo n.º 4
0
class PivotalTrackerAnalyzer(object):
    """
    Generate pivotal tracker statistics and visualization.
  """
    def __init__(self, project_info, token):
        """
      Input
        - project_info: project information data
        - token: tracker access token
    """
        super(PivotalTrackerAnalyzer, self).__init__()
        self.project_info = project_info
        self.client = TrackerApi(token)

    def stories(self, proj):
        """
      Get all stories for a project
    """
        return self.client.get_stories(proj['tracker'])

    def story_assign(self, reload=False):
        '''
      Generate the task assignment within team. Ownership is defined by own relation.
      If reload=True, use cached file. Otherwise generate and cache the result.

      Output
        - dictPnts: dictionary from a tracker user name to fraction of points
        - dictNum: dictionary from a tracker user name to fraction of number of stories
    '''
        if reload:
            with open('cache/student_points_num.json', 'r') as f_in:
                cache = json.load(f_in)
            return cache['points'], cache['number']
        dictPnts, dictNum = {}, {}
        for project in tqdm(self.project_info, desc='Project'):
            dictStu2Pnts, dictStu2Num = self._story_assign(
                project['tracker'], len(project['students']))
            dictPnts.update(dictStu2Pnts)
            dictNum.update(dictStu2Num)
        with open('cache/student_points_num.json', 'w') as f_out:
            json.dump({'points': dictPnts, 'number': dictNum}, f_out)
        return dictPnts, dictNum

    def story_assign_plot(self, reload=False):
        '''
      Plot
        - histogram of story assignment, both fraction of points and numbers are plotted
    '''
        dictPnts, dictNums = self.story_assign()
        lstPnts = [v for _, v in dictPnts.items()]
        lstNums = [v for _, v in dictNums.items()]

        fig, ax = plt.subplots()
        sns.distplot(lstPnts, label='Percentage of Points')
        sns.distplot(lstNums, label='Percentage of Total Stories')
        plt.legend()
        plt.savefig('results/story_assign.png')
        plt.close(fig)

    def _story_assign(self, project_id, num_stu):
        '''Process single project.'''
        dictStu2Pnts, dictStu2Num = defaultdict(lambda: 0), defaultdict(
            lambda: 0)
        total_pnts, total_num = 0, 0
        for story in self.client.get_stories(project_id):
            pnts = story['estimate'] if 'estimate' in story else 1
            total_pnts += pnts
            total_num += 1
            owners = self.client.get_story_owners(project_id, story['id'])
            # owners = story['owner_ids']
            for owner in owners:
                dictStu2Num[owner['name']] += 1
                dictStu2Pnts[owner['name']] += pnts
            time.sleep(0.1)  # Prevent too frequent requests
        for i in range(num_stu - len(dictStu2Num)):
            dictStu2Num[project_id + str(i) + '#'] = 0
            dictStu2Pnts[project_id + str(i) + '#'] = 0
        return {k: float(v)/float(total_pnts) for k, v in dictStu2Pnts.items()},\
               {k: float(v)/float(total_num) for k, v in dictStu2Num.items()}

    def iteration_points(self, reload=False):
        '''
      Generate the task assignment within team divided by iterations. Ownership is defined by own relation.
      If reload=True, use cached file. Otherwise generate and cache the result.

      Output
        - dictPnts: dictionary from a tracker user name to a list of fraction of points
        - dictNum: dictionary from a tracker user name to a list of fraction of number of stories
    '''
        if reload:
            with open('cache/student_iter_pnts.json', 'r') as f_in:
                cache = json.load(f_in)
            return cache['points'], cache['number']
        dictPnts, dictNum = {}, {}
        for project in tqdm(self.project_info, desc='Project'):
            dictStu2Pnts, dictStu2Num = self._iteration_points(
                project['tracker'], len(project['students']))
            dictPnts.update(dictStu2Pnts)
            dictNum.update(dictStu2Num)
        with open('cache/student_iter_pnts.json', 'w') as f_out:
            json.dump({'points': dictPnts, 'number': dictNum}, f_out)
        return dictPnts, dictNum

    def _iteration_points(self, project_id, num_stu):
        """Process single project"""
        dictStu2Pnts, dictStu2Num = defaultdict(lambda: np.zeros(
            (4, ))), defaultdict(lambda: np.zeros((4, )))
        total_pnts, total_num = np.ones((4, )), np.ones((4, ))
        with open('conf/iterations.json', 'r') as f_in:
            timestamps = json.load(f_in)
        timestamps = [
            datetime.datetime.strptime(s, '%Y-%m-%d') for s in timestamps
        ]
        for story in self.client.get_stories(project_id):
            # t = datetime.datetime.fromtimestamp(int(story['updated_at'])/1e3)
            t = datetime.datetime.strptime(story['updated_at'],
                                           '%Y-%m-%dT%H:%M:%SZ')
            ind = np.searchsorted(timestamps, t)
            pnts = story['estimate'] if 'estimate' in story else 1

            if ind in [1, 2, 3, 4]:
                ind = ind - 1
                total_pnts[ind] += pnts
                total_num[ind] += 1
                owners = self.client.get_story_owners(project_id, story['id'])
                for owner in owners:
                    dictStu2Num[owner['name']][ind] += 1
                    dictStu2Pnts[owner['name']][ind] += pnts
            time.sleep(0.1)  # Prevent too frequent requests
        for i in range(num_stu - len(dictStu2Num)):
            dictStu2Num[project_id + str(i) + '#'] = np.zeros((4, ))
            dictStu2Pnts[project_id + str(i) + '#'] = np.zeros((4, ))
        return {k: (v/total_pnts).tolist() for k, v in dictStu2Pnts.items()},\
               {k: (v/total_num).tolist() for k, v in dictStu2Num.items()}
Exemplo n.º 5
0
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

from pivotal_tracker_api.tracker_api import TrackerApi

api = TrackerApi('02dd7e25c04dc4e72699947eaf9b25d7')

transition_time = [[], [], []]
for story in api.get_stories(1544059):
    if 'estimate' in story:
        estimate = story['estimate']
    else:
        estimate = 1
    started, finished, accepted = None, None, None
    for transition in api.get_story_transitions(1544059, story['id']):
        if transition['state'] == 'started':
            started = datetime.strptime(transition['occurred_at'][:19],
                                        '%Y-%m-%dT%H:%M:%S')
        elif transition['state'] == 'finished':
            finished = datetime.strptime(transition['occurred_at'][:19],
                                         '%Y-%m-%dT%H:%M:%S')
        elif transition['state'] == 'accepted':
            accepted = datetime.strptime(transition['occurred_at'][:19],
                                         '%Y-%m-%dT%H:%M:%S')
    if started and finished and accepted:
        transition_time[estimate - 1].append(
            (finished - started, accepted - finished))

fig, ax = plt.subplots()
Exemplo n.º 6
0
 def _load_connection(self):
     self.client = TrackerApi(self.tokens['pivotal_tracker']['token'])
Exemplo n.º 7
0
class MetricTracker(BasicMetric):
    """All metrics concerning pivotal tracker"""
    def __init__(self, proj, token, **args):
        super(MetricTracker, self).__init__(proj, token)

        self.out_header = 'metric_tracker'
        if not self.out_header in os.listdir(self.ROOT_PATH + '/results'):
            os.mkdir('{}/results/{}'.format(self.ROOT_PATH, self.out_header))
        self.out_header = '{}/results/{}'.format(self.ROOT_PATH,
                                                 self.out_header)

    def _load_connection(self):
        self.client = TrackerApi(self.tokens['pivotal_tracker']['token'])

    def resample(self):
        stories = self.client.get_stories(self.proj['tracker'])
        with open('{}/conf/iterations.json'.format(self.ROOT_PATH),
                  'r') as f_in:
            iterations = json.load(f_in)
        iterations = [
            time.mktime(time.strptime(x, '%Y-%m-%d')) for x in iterations
        ]
        state_count = defaultdict(lambda: 0)
        velocity_count = defaultdict(lambda: 0.0)
        pnts_time = []
        owner_info = []
        for story in stories:
            # Count number of different states
            state_count[story['current_state']] += 1
            if not story['current_state'] in ['accepted', 'delivered']:
                continue

            stime = time.mktime(
                time.strptime(story['created_at'], '%Y-%m-%dT%H:%M:%SZ'))
            transitions = self.client.get_story_transitions(
                self.proj['tracker'], story['id'])
            trans_owner_time = []
            for trans in transitions:
                occurred_at = time.mktime(
                    time.strptime(trans['occurred_at'], '%Y-%m-%dT%H:%M:%SZ'))
                if trans['state'] == 'started':
                    stime = occurred_at
                trans_owner_time.append(
                    (trans['state'], trans['performed_by_id'], occurred_at))

            # Calculate velocities
            etime = time.mktime(
                time.strptime(story['updated_at'], '%Y-%m-%dT%H:%M:%SZ'))
            niter = bisect.bisect(iterations, etime)
            story_points = story['estimate'] if 'estimate' in story else 1
            velocity_count[niter] += story_points

            # Time versus points
            pnts_time.append((story_points, etime - stime))

            # Ownership
            for tot in trans_owner_time:
                niter = bisect.bisect(iterations, tot[2])
                owner_info.append((tot[0], tot[1], niter))
        self.data = {
            'velocity': velocity_count,
            'state': state_count,
            'pnts_time': pnts_time,
            'ownership': owner_info
        }

        return self.data, datetime.datetime.now()

    def graph(self):
        velocity_count, state_count = self.data['velocity'], self.data['state']
        pnts_time, owner_info = self.data['pnts_time'], self.data['ownership']

        fig, ax = plt.subplots()
        ind = np.arange(len(state_count))
        width = 0.5
        labels = [k for k in state_count]
        ax.bar(ind, [state_count[v] for v in labels], width)
        ax.set_xticks(ind + width / 2)
        ax.set_xticklabels(labels)
        plt.savefig('{}/state_{}.png'.format(self.out_header, self.proj['ID']))
        plt.close(fig)

        fig, ax = plt.subplots()
        ind = np.arange(4)
        ax.plot(ind, [velocity_count[i + 1] for i in range(4)])
        ax.set_xticks(ind)
        ax.set_xticklabels(['Iter {}'.format(i + 1) for i in range(4)])
        plt.savefig('{}/velocity_{}.png'.format(self.out_header,
                                                self.proj['ID']))
        plt.close(fig)

        if len(pnts_time) > 0:
            fig, ax = plt.subplots()
            plotdata = pd.DataFrame({
                'points': [item[0] for item in pnts_time],
                'time': [item[1] for item in pnts_time]
            })
            sns.pointplot(x='points', y='time', data=plotdata)
            plt.savefig('{}/time_pnts_{}.png'.format(self.out_header,
                                                     self.proj['ID']))
            plt.close(fig)

        if len(owner_info) > 0:
            fig, ax = plt.subplots()
            plotdata = pd.DataFrame({
                'owner': [item[1] for item in owner_info],
                'iteration': [item[2] for item in owner_info]
            })
            sns.countplot(x='iteration', hue='owner', data=plotdata)
            plt.savefig('{}/ownership_{}.png'.format(self.out_header,
                                                     self.proj['ID']))
            plt.close(fig)

    def metrics(self, **args):
        stories = self.client.get_stories(self.proj['tracker'])
        with open('{}/conf/iterations.json'.format(self.ROOT_PATH),
                  'r') as f_in:
            iterations = json.load(f_in)
        iterations = [
            time.mktime(time.strptime(x, '%Y-%m-%d')) for x in iterations
        ]
        iteration_data = defaultdict(lambda: defaultdict(lambda: []))
        for story in stories:
            # Count number of different states
            if not story['current_state'] in ['accepted', 'delivered']:
                continue

            stime = time.mktime(
                time.strptime(story['created_at'], '%Y-%m-%dT%H:%M:%SZ'))
            transitions = self.client.get_story_transitions(
                self.proj['tracker'], story['id'])
            state_time_owner = {}
            for trans in transitions:
                occurred_at = time.mktime(
                    time.strptime(trans['occurred_at'], '%Y-%m-%dT%H:%M:%SZ'))
                performed_by = trans['performed_by_id']
                state_time_owner[trans['state']] = (occurred_at, performed_by)

            # Calculate velocities
            if 'started' in state_time_owner:
                stime = state_time_owner['started'][0]
            etime = time.mktime(
                time.strptime(story['updated_at'], '%Y-%m-%dT%H:%M:%SZ'))
            if 'finished' in state_time_owner:
                etime = state_time_owner['finished'][0]
            dtime = etime
            if 'delivered' in state_time_owner:
                dtime = state_time_owner['delivered'][0]
            atime = dtime
            if 'accepted' in state_time_owner:
                atime = state_time_owner['accepted'][0]

            niter = bisect.bisect(iterations, etime)
            story_points = story['estimate'] if 'estimate' in story and story[
                'estimate'] != 0 else 1

            iteration_data[niter]['velocity'].append(story_points)
            iteration_data[niter]['time'].append((story_points, etime - stime))
            iteration_data[niter]['review'].append(dtime - etime)
            iteration_data[niter]['customer'].append(atime - dtime)
        result = defaultdict(lambda: [None for _ in self.metric_name()])
        for k, v in iteration_data.items():
            result[k] = self._extract(v)
        return result

    def metric_name(self):
        return ['Velocity', 'Review Time', 'Correlation']

    def _extract(self, pt_info):
        import math
        from scipy.stats import pearsonr
        velocity = np.sum(pt_info['velocity'])
        avg_reveiw_time = np.average([
            np.log(x + 1) for x in pt_info['review']
        ]) if len(pt_info['review']) > 0 else None
        avg_customer_timer = np.average(
            pt_info['customer']) if len(pt_info['customer']) > 0 else None
        r = pearsonr([x[1] for x in pt_info['time']],
                     [x[0] for x in pt_info['time']])[0]
        r = r if not math.isnan(r) else None
        return [velocity, avg_reveiw_time, r]
Exemplo n.º 8
0
class ProcessSegmentAnalyzer(object):
    """
    Segment git commits and correlates them with user stories.
  """
    def __init__(self, project_info):
        """
      Tokens are read from conf/tokens.json file.
      
      Input
        - project_info: dataset contains all course projects
    """
        super(ProcessSegmentAnalyzer, self).__init__()
        self.project_info = project_info
        with open('conf/tokens.json', 'r') as f_in:
            self.token = json.load(f_in)
        self.gt_analyzer = GithubAnalyzer(self.token['github']['token'],
                                          self.project_info)
        self.gt_client = GithubApi(self.token['github']['token'])
        self.pt_client = TrackerApi(self.token['pivotal_tracker']['token'])
        self.out_header = 'process_segment'
        if not self.out_header in os.listdir('results'):
            os.mkdir('results/{}'.format(self.out_header))
        self.out_header = 'results/{}'.format(self.out_header)

    def correlation(self, proj):
        """
      Generate segmentation for a single project.

      Input
        - proj: a data point in project_info
    """
        pass

    def time_sequence(self, proj):
        """
      Extract time information and files information from commits.

      Input
        - proj: the project
      Output
        - time_sequence: a list of datetime objects
        - file_sequence: a list of file indexes
        - file_dict: a dictionary from file index to file name
    """
        commits = self.gt_client.get_commits(proj['repo']['owner'],
                                             proj['repo']['repo'])
        stories = self.pt_client.get_stories(proj['tracker'])

        file_indexer = {}
        time_sequence, file_sequence = [], []
        for cmit in commits:
            # tmp_time = datetime.datetime.strptime(commit['commit']['author']['date'], '%Y-%m-%dT%H:%M:%SZ')

            tmp_file_vec = []
            commit = self.gt_analyzer.get_commit(cmit['sha'])
            if not commit:
                print('Commit not found: {}'.format(cmit['sha']))
                continue
            if 'merge' in commit['commit']['message']:
                continue
            for f in commit['files']:
                if not f['filename'] in file_indexer:
                    file_indexer[f['filename']] = len(file_indexer)
                tmp_file_vec.append(file_indexer[f['filename']])
            file_sequence.append(tmp_file_vec)
            time_sequence.append(
                datetime.datetime.strptime(cmit['commit']['author']['date'],
                                           '%Y-%m-%dT%H:%M:%SZ'))
        return time_sequence, file_sequence

    def story_time(self, proj):
        """
      Extract time informaiton and story information from pivotral tracker

      Input
        - proj: the project
      Output
    """
        times, info = [], []
        for story in self.pt_client.get_stories(proj['tracker']):
            s = datetime.datetime.strptime(story['created_at'],
                                           '%Y-%m-%dT%H:%M:%SZ')
            e = datetime.datetime.strptime(story['updated_at'],
                                           '%Y-%m-%dT%H:%M:%SZ')
            times.append((s, e))
            info.append(story)
        return times, info

    def story_time_overlaps(self):
        """
      Plot
        - a counting plot of 'active' user stories over time
    """
        import time
        if not 'story_time' in os.listdir(self.out_header):
            os.mkdir('{}/story_time'.format(self.out_header))
        for proj in self.project_info[1:2]:
            times, info = self.story_time(proj)
            time_to_val = {}
            for s_t, e_t in times:
                time_to_val[s_t] = 1
                time_to_val[e_t] = -1
            time_seq, count_seq = [], []
            counter = 0
            for t in sorted(time_to_val.keys()):
                time_seq.append(t)
                counter += time_to_val[t]
                count_seq.append(counter)

            fig, ax = plt.subplots()
            plt.plot([time.mktime(t.timetuple()) for t in time_seq], count_seq)
            plt.savefig('{}/story_time/{}_{}'.format(
                self.out_header, proj['ID'], proj['project'].replace(" ", "")))
            plt.close(fig)

    def git_commit_overlaps(self):
        """
      Plot
        - a scatter plot between time and files edited for a given project.
    """
        import time
        if not 'commit_time' in os.listdir(self.out_header):
            os.mkdir('{}/commit_time'.format(self.out_header))
        for proj in self.project_info[1:2]:
            times, files = self.time_sequence(proj)
            sorted_time = sorted(times)
            t_seq, f_seq = [], []
            for i in range(len(times)):
                for f in files[i]:
                    # t_seq.append(sorted_time.index(times[i]))
                    t_seq.append(time.mktime(times[i].timetuple()))
                    f_seq.append(f)
            plotdata = pd.DataFrame({'time': t_seq, 'file': f_seq})

            fig, ax = plt.subplots()
            sns.jointplot(x='time', y='file', data=plotdata)
            plt.savefig('{}/commit_time/{}_{}.png'.format(
                self.out_header, proj['ID'], proj['project'].replace(" ", "")))
            plt.close(fig)
Exemplo n.º 9
0
class TestAnalyzer(object):
  """
    Analysis based on test information extracted from TravisCI build history.
    Assume certain cache files exist.
  """
  def __init__(self, tokens, proj_info):
    super(TestAnalyzer, self).__init__()
    self.tokens = tokens
    self.project_info = proj_info
    self.pt_client = TrackerApi(tokens['pivotal_tracker']['token'])
    with open('cache/log_info.json', 'r') as f_in:
      self.build_info = json.load(f_in)
    self.out_header = 'test_analysis'
    if not self.out_header in os.listdir('results'):
      os.mkdir('results/{}'.format(self.out_header))
    self.out_header = 'results/{}'.format(self.out_header)

  def cucumber_scenarios(self, proj):
    """
      Cucumber scenario analysis for a given project.
      It correlates commits/builds with user stories.

      Input
        - proj: one project from project info dataset.
    """
    bd_history = self.build_info[proj['ID']]
    features, scenarios = [], []
    total_features, total_scenarios = set(), set()
    for bd in bd_history:
      tmp_feature, tmp_scenarios = set(), set()
      for scenario in bd['cucumber']['scenarios']:
        feature = self._extract_feature(scenario['feature'])
        scenario = self._extract_scenario(scenario['scenario'])
        if feature and scenario:
          tmp_feature.add(feature)
          tmp_scenarios.add(scenario)
          total_features.add(feature)
          total_scenarios.add(scenario)
      features.append(tmp_feature)
      scenarios.append(tmp_scenarios)

    stories = self.pt_client.get_stories(proj['tracker'])
    story_str = [self._get_story_str(story) for story in stories]
    from sklearn.feature_extraction.text import CountVectorizer

    vectorizer = CountVectorizer()
    total_features = list(total_features)
    word_corps = total_features + story_str
    flags = ['cucumber']*len(total_features) + ['stories']*len(story_str)

    plotdata = pd.DataFrame({'length':[len(txt) for txt in word_corps], 'flag':flags})
    fig, ax = plt.subplots()
    sns.boxplot(x='flag', y='length', data=plotdata)
    plt.savefig('{}/{}_boxplot.png'.format(self.out_header, proj['ID']))
    plt.close(fig)

    word_vecs = vectorizer.fit_transform(word_corps)

    from sklearn.feature_extraction.text import TfidfTransformer
    transformer = TfidfTransformer(smooth_idf=False)
    tfidf = transformer.fit_transform(word_vecs)

    cucumber_vec_txt = list(zip(tfidf[:len(total_features)], total_features))
    story_vec_txt = list(zip(tfidf[len(total_features):], stories))
    best_match = {}
    for vec_txt in cucumber_vec_txt:
      story = self._nearest_neighbor(vec_txt, story_vec_txt)
      best_match[vec_txt[1]] = story[1]

    with open('cache/cucumber_story_match/{}_match_results.json'.format(proj['ID']), 'w') as f_out:
      json.dump(best_match, f_out, sort_keys=True, indent=4, separators=(',', ': '))

    # from sklearn.manifold import TSNE
    # model = TSNE(n_components=2, random_state=0)
    # plot_points = model.fit_transform(tfidf.toarray())

    # fig, ax = plt.subplots()
    # plt.scatter([x[0] for x in plot_points], [x[1] for x in plot_points], c=['g' if x == 'cucumber' else 'b' for x in flags], alpha=0.5)
    # plt.savefig('{}/{}_scatter.png'.format(self.out_header, proj['ID']))
    # plt.close(fig)

  def lifecycle(self, proj):
    """
      Plot the first appearance of a test case within the lifecycle of corresponding user story.
      Assume a cache file exists.

      Input
        - proj: a given project
    """
    bd_history = self.build_info[proj['ID']]
    features, scenarios = [], []
    # total_features, total_scenarios = set(), set()
    for bd in bd_history:
      tmp_feature, tmp_scenarios = set(), set()
      for scenario in bd['cucumber']['scenarios']:
        feature = self._extract_feature(scenario['feature'])
        scenario = self._extract_scenario(scenario['scenario'])
        if feature and scenario:
          tmp_feature.add(feature)
          tmp_scenarios.add(scenario)
          # total_features.add(feature)
          # total_scenarios.add(scenario)
      features.append(tmp_feature)
      scenarios.append(tmp_scenarios)

    with open('cache/cucumber_story_match/{}_match_results.json'.format(proj['ID']), 'r') as f_in:
      story_match = json.load(f_in)

    owner, repo = proj['repo']['owner'], proj['repo']['repo']
    key = '{}|{}'.format(owner, repo)
    with open('cache/builds.json', 'r') as f_in:
      build_cache = json.load(f_in)
    builds = build_cache[key]

    prev_feature = set()
    lifecycle_info = []
    for feature, bd in zip(features, builds):
      for f in feature - prev_feature:
        lifecycle_info.append(self._compare(f, bd, story_match[f], proj['tracker']))
      prev_feature.update(feature)

    time_span_data = []
    for t_test, t_story in lifecycle_info:
      if 'created' in t_story and 'updated' in t_story:
        t_0 = time.mktime(time.strptime(t_test, '%Y-%m-%dT%H:%M:%SZ'))
        t_1 = time.mktime(time.strptime(t_story['created'], '%Y-%m-%dT%H:%M:%SZ'))
        t_2 = time.mktime(time.strptime(t_story['updated'], '%Y-%m-%dT%H:%M:%SZ'))
        time_span_data.append(float(t_0-t_1)/float(t_2-t_1+0.1))

    time_span_data = list(filter(lambda x: abs(x) < 1.5, time_span_data))
    fig, ax = plt.subplots()
    plt.hist(time_span_data, 20)
    plt.savefig('{}/{}_span.png'.format(self.out_header, proj['ID']))
    plt.close(fig)

  def _compare(self, feature_str, bd, story, proj_id):
    story_transitions = self.pt_client.get_story_transitions(proj_id, story['id'])
    transition_time = {item['state']:item['occurred_at'] for item in story_transitions}
    transition_time['created'] = story['created_at']
    transition_time['updated'] = story['updated_at']
    return bd['started_at'], transition_time

  def _nearest_neighbor(self, pnt, candidate_list):
    choice = list(sorted(candidate_list, key=lambda x: self._distance(x, pnt)))[0]
    return choice

  def _distance(self, pnt_1, pnt_2):
    return np.linalg.norm(pnt_1[0].toarray()-pnt_2[0].toarray())

  def _get_story_str(self, story):
    """
      Extract a string description out of a user story
    """
    str_output = story['name']
    if 'description' in story:
      str_output += '\n'
      str_output += story['description']
    return str_output

  def _extract_feature(self, log_str):
    """
      Extract the feature from a log line
      All cucumber feature lines are in the format of
      "Feature: Create an admin account"
    """
    return log_str
    # if not log_str:
    #   return False
    # return log_str[9:]

  def _extract_scenario(self, log_str):
    """
      Extract the scenario from a log line
      All cucumber scenario lines are in the format of
      "  Scenario: An admin cancels editing their information # features/admins/edit.feature:15"
    """
    log_str = log_str.replace('\t', '')
    tmp_lst = log_str.split('#')
    index = 0 if 'Scenario' in tmp_lst[0] else 1
    tmp_lst = tmp_lst[index].split(' ')
    if not 'Scenario:' in tmp_lst:
      return False
    return ' '.join(tmp_lst[tmp_lst.index('Scenario:')+1:])