示例#1
0
  def api_activity_histogram(self,app_id,exp_uid,task):
    """
    Description: returns the data to plot all API activity (for all algorithms) in a histogram with respect to time for any task in {getQuery,processAnswer,predict} 

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """


    list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app_id+':APP-CALL',{'exp_uid':exp_uid,'task':task})

    from datetime import datetime
    from datetime import timedelta
    start_date_str,didSucceed,message = self.db.get('experiments_admin',exp_uid,'start_date')
    start_date = utils.str2datetime(start_date_str)
    numerical_timestamps = [ ( utils.str2datetime(item['timestamp'])-start_date).total_seconds() for item in list_of_log_dict]

    import matplotlib.pyplot as plt
    import mpld3
    fig, ax = plt.subplots(subplot_kw=dict(axisbg='#FFFFFF'),figsize=(12,1.5))
    ax.hist(numerical_timestamps,int(1+4*numpy.sqrt(len(numerical_timestamps))),alpha=0.5,color='black')
    ax.set_frame_on(False)
    ax.get_xaxis().set_ticks([])
    ax.get_yaxis().set_ticks([])
    ax.get_yaxis().set_visible(False)
    ax.set_xlim(0, max(numerical_timestamps))
    plot_dict = mpld3.fig_to_dict(fig)

    
    return plot_dict
示例#2
0
    def api_activity_histogram(self, app, butler):
        """
    Description: returns the data to plot all API activity (for all algorithms) in a histogram with respect to time for any task in {getQuery,processAnswer,predict}

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """
        queries = butler.queries.get(pattern={'exp_uid': app.exp_uid})
        #self.db.get_docs_with_filter(app_id+':queries',{'exp_uid':exp_uid})
        start_date = utils.str2datetime(
            butler.admin.get(uid=app.exp_uid)['start_date'])
        numerical_timestamps = [
            (utils.str2datetime(item['timestamp_query_generated']) -
             start_date).total_seconds() for item in queries
        ]
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#FFFFFF'),
                               figsize=(12, 1.5))
        ax.hist(numerical_timestamps,
                min(int(1 + 4 * numpy.sqrt(len(numerical_timestamps))), 300),
                alpha=0.5,
                color='black')
        ax.set_frame_on(False)
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
        ax.get_yaxis().set_visible(False)
        ax.set_xlim(0, max(numerical_timestamps))
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()
        return plot_dict
示例#3
0
  def api_activity_histogram(self, app, butler):
    """
    Description: returns the data to plot all API activity (for all algorithms) in a histogram with respect to time for any task in {getQuery,processAnswer,predict}

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """
    queries = butler.queries.get(pattern={'exp_uid':app.exp_uid})
    #self.db.get_docs_with_filter(app_id+':queries',{'exp_uid':exp_uid})
    start_date = utils.str2datetime(butler.admin.get(uid=app.exp_uid)['start_date'])
    numerical_timestamps = [(utils.str2datetime(item['timestamp_query_generated'])-start_date).total_seconds() 
                                for item in queries]
    fig, ax = plt.subplots(subplot_kw=dict(axisbg='#FFFFFF'),figsize=(12,1.5))
    ax.hist(numerical_timestamps,min(int(1+4*numpy.sqrt(len(numerical_timestamps))),300),alpha=0.5,color='black')
    ax.set_frame_on(False)
    ax.get_xaxis().set_ticks([])
    ax.get_yaxis().set_ticks([])
    ax.get_yaxis().set_visible(False)
    ax.set_xlim(0, max(numerical_timestamps))
    plot_dict = mpld3.fig_to_dict(fig)
    plt.close()
    return plot_dict
示例#4
0
    def most_current_embedding(self, app_id, exp_uid, alg_label):
        """
        Description: Returns embedding in the form of a list of dictionaries, which is conveneint for downstream applications

        Expected input:
          (string) alg_label : must be a valid alg_label contained in alg_list list of dicts 

        Expected output (in dict):
          plot_type : 'scatter2d_noaxis'
          (float) x_min : minimum x-value to display in viewing box
          (float) x_max : maximum x-value to display in viewing box
          (float) y_min : minimum y-value to display in viewing box
          (float) y_max : maximum y-value to display in viewing box
          (list of dicts with fields) data : 
            (int) index : index of target
            (float) x : x-value of target
            (float) y : y-value of target
        """

        alg_list, didSucceed, message = self.db.get(app_id + ':experiments',
                                                    exp_uid, 'alg_list')

        for algorithm in alg_list:
            if algorithm['alg_label'] == alg_label:
                alg_id = algorithm['alg_id']
                alg_uid = algorithm['alg_uid']

        list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter(
            app_id + ':ALG-EVALUATION', {'alg_uid': alg_uid})
        list_of_log_dict = sorted(
            list_of_log_dict,
            key=lambda item: utils.str2datetime(item['timestamp']))

        item = list_of_log_dict[-1]

        embedding = item['X2']

        data = []
        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        for idx, target in enumerate(embedding):

            target_dict = {}
            target_dict['index'] = idx
            target_dict['x'] = target[0]
            target_dict['y'] = target[1]

            x_min = min(x_min, target[0])
            x_max = max(x_max, target[0])
            y_min = min(y_min, target[1])
            y_max = max(y_max, target[1])

            data.append(target_dict)

        return_dict = {}
        return_dict['x_min'] = x_min
        return_dict['x_max'] = x_max
        return_dict['y_min'] = y_min
        return_dict['y_max'] = y_max
        return_dict['data'] = data
        return_dict['plot_type'] = 'scatter2d_noaxis'

        return return_dict
示例#5
0
    def test_error_multiline_plot(self, app_id, exp_uid):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to 
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          plot_type 'multi_line_plot'
          (string) x_label : 'Number of answered triplets'
          (float) x_min : 1
          (float) x_max : maximum number of reported answers for any algorithm
          (string) y_label : 'Error on hold-out set'
          (float) y_min : 0.
          (float) y_max : maximum duration value achieved by any algorithm
          (list of dicts with fields) data : 
            (list of strings) t : list of timestamp strings
            (list of floats) x : integers ranging from 1 to maximum number of elements in y (or t)
            (list of floats) y : list of durations
            (string) legend_label : alg_label
        """

        # get list of algorithms associated with project
        alg_list, didSucceed, message = self.db.get(app_id + ':experiments',
                                                    exp_uid, 'alg_list')

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_id = algorithm['alg_id']
            alg_uid = algorithm['alg_uid']
            alg_label = algorithm['alg_label']

            list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter(
                app_id + ':ALG-EVALUATION', {'alg_uid': alg_uid})
            list_of_log_dict = sorted(
                list_of_log_dict,
                key=lambda item: utils.str2datetime(item['timestamp']))

            x = []
            y = []
            t = []
            for item in list_of_log_dict:
                x.append(item['num_reported_answers'])
                _y = float(item['error'])
                y.append(_y)
                t.append(str(item['timestamp'])[:-3])

                if _y > 0.:
                    y_min = min(y_min, _y)

            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            alg_dict['t'] = t
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_max = max(y_max, max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        return_dict = {}
        return_dict['data'] = list_of_alg_dicts
        return_dict['plot_type'] = 'multi_line_plot'
        return_dict['x_label'] = 'Number of answered triplets'
        return_dict['x_min'] = x_min
        return_dict['x_max'] = x_max
        return_dict['y_label'] = 'Error on hold-out set'
        return_dict['y_min'] = y_min
        return_dict['y_max'] = y_max

        return return_dict
示例#6
0
  def compute_duration_detailed_stacked_area_plot(self,app_id,exp_uid,task,alg_label,detailedDB=False):
    """
    Description: Returns stacked area plot for a particular algorithm and task where the durations
    are broken down into compute,db_set,db_get (for cpu, database_set, database_get)

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}
      (string) alg_label : must be a valid alg_label contained in alg_list list of dicts 

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """

    alg_list,didSucceed,message = self.db.get(app_id+':experiments',exp_uid,'alg_list')

    for algorithm in alg_list:
      if algorithm['alg_label'] == alg_label:
        alg_id = algorithm['alg_id']
        alg_uid = algorithm['alg_uid']

    list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app_id+':ALG-DURATION',{'alg_uid':alg_uid,'task':task})
    list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )


    y = []
    for item in list_of_log_dict:
      y.append( item.get('app_duration',0.) + item.get('duration_enqueued',0.) )
    y = numpy.array(y)
    num_items = len(list_of_log_dict)
    multiplier = min(num_items,MAX_SAMPLES_PER_PLOT)
    incr_inds = [ k*num_items/multiplier for k in range(multiplier)]
    max_inds = list(numpy.argsort(-y)[0:multiplier])
    final_inds = sorted(set(incr_inds + max_inds))


    x = []
    t = []
    enqueued = []
    admin = []
    dbOverhead = []
    dbGet = []
    dbSet = []
    compute = []

    max_y_value = 0.
    min_y_value = float('inf')
    for idx in final_inds:
      item = list_of_log_dict[idx]
      x.append(idx+1)
      t.append(str(item.get('timestamp','')))

      _alg_duration = item.get('duration',0.)
      _alg_duration_dbGet = item.get('duration_dbGet',0.)
      _alg_duration_dbSet = item.get('duration_dbSet',0.)
      _duration_enqueued = item.get('duration_enqueued',0.)
      _app_duration = item.get('app_duration',0.)

      if (_app_duration+_duration_enqueued) > max_y_value:
        max_y_value = _app_duration + _duration_enqueued
      if (_app_duration+_duration_enqueued) < min_y_value:
        min_y_value = _app_duration + _duration_enqueued
      
      enqueued.append(_duration_enqueued)
      admin.append(_app_duration-_alg_duration)
      dbSet.append(_alg_duration_dbSet)
      dbGet.append(_alg_duration_dbGet)
      compute.append( _alg_duration - _alg_duration_dbSet - _alg_duration_dbGet )

    try:
      min_x = min(x)
      max_x = max(x)
    except:
      min_x = 0.
      max_x = 0.

    import matplotlib.pyplot as plt
    import mpld3
    fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
    stack_coll = ax.stackplot(x,compute,dbGet,dbSet,admin,enqueued, alpha=.5)
    ax.set_xlabel('API Call')
    ax.set_ylabel('Duration (s)')
    ax.set_xlim([min_x,max_x])
    ax.set_ylim([0.,max_y_value])
    ax.grid(color='white', linestyle='solid')
    ax.set_title(alg_label+' - '+task, size=14)
    proxy_rects = [plt.Rectangle((0, 0), 1, 1, alpha=.5,fc=pc.get_facecolor()[0]) for pc in stack_coll]
    legend = ax.legend(proxy_rects, ['compute','dbGet','dbSet','admin','enqueued'],loc=2,ncol=3,mode="expand")
    for label in legend.get_texts():
      label.set_fontsize('small')
    plot_dict = mpld3.fig_to_dict(fig)
    

    return plot_dict
示例#7
0
    def compute_duration_detailed_stacked_area_plot(self, app_id, exp_uid, task, alg_label, detailedDB=False):
        """
    Description: Returns stacked area plot for a particular algorithm and task where the durations
    are broken down into compute,db_set,db_get (for cpu, database_set, database_get)

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}
      (string) alg_label : must be a valid alg_label contained in alg_list list of dicts 

    Expected output (in dict):
      plot_type 'stacked_area_plot'
      (string) x_label : 'API Call'
      (float) x_min : 1
      (float) x_max : length of datastream
      (string) y_label : 'Duration (s)'
      (float) y_min : 0.
      (float) y_max : maximum duration value achieved sum of all layers
      (list of strings) t : list of timestamp strings
      (list of floats) x : integers ranging from 1 x_max
      (list of dicts with fields) data : 
        (list of floats) y : list of durations
        (string) legend_label : area_label in {'compute','db_set','db_get'}
    """

        alg_list, didSucceed, message = self.db.get(app_id + ":experiments", exp_uid, "alg_list")

        for algorithm in alg_list:
            if algorithm["alg_label"] == alg_label:
                alg_id = algorithm["alg_id"]
                alg_uid = algorithm["alg_uid"]

        list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter(
            app_id + ":ALG-DURATION", {"alg_uid": alg_uid, "task": task}
        )
        list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item["timestamp"]))

        x = []
        t = []
        enqueued = []
        admin = []
        dbOverhead = []
        dbGet = []
        dbSet = []
        compute = []

        max_y_value = 0.0
        min_y_value = float("inf")
        k = 0
        for item in list_of_log_dict:
            k += 1
            x.append(k)
            t.append(str(item.get("timestamp", "")))

            _alg_duration = item.get("duration", 0.0)
            _alg_duration_dbGet = item.get("duration_dbGet", 0.0)
            _alg_duration_dbSet = item.get("duration_dbSet", 0.0)
            _duration_enqueued = item.get("duration_enqueued", 0.0)
            _app_duration = item.get("app_duration", 0.0)

            if (_app_duration + _duration_enqueued) > max_y_value:
                max_y_value = _app_duration + _duration_enqueued
            if (_app_duration + _duration_enqueued) < min_y_value:
                min_y_value = _app_duration + _duration_enqueued

            enqueued.append(_duration_enqueued)
            admin.append(_app_duration - _alg_duration)
            dbSet.append(_alg_duration_dbSet)
            dbGet.append(_alg_duration_dbGet)
            compute.append(_alg_duration - _alg_duration_dbSet - _alg_duration_dbGet)

        list_of_dicts = []

        duration_dict = {}
        duration_dict["legend_label"] = "compute"
        duration_dict["y"] = compute
        list_of_dicts.append(duration_dict)

        duration_dict = {}
        duration_dict["legend_label"] = "db:get"
        duration_dict["y"] = dbGet
        list_of_dicts.append(duration_dict)

        duration_dict = {}
        duration_dict["legend_label"] = "db:set"
        duration_dict["y"] = dbSet
        list_of_dicts.append(duration_dict)

        duration_dict = {}
        duration_dict["legend_label"] = "admin"
        duration_dict["y"] = admin
        list_of_dicts.append(duration_dict)

        duration_dict = {}
        duration_dict["legend_label"] = "enqueued"
        duration_dict["y"] = enqueued
        list_of_dicts.append(duration_dict)

        return_dict = {}
        return_dict["x"] = x
        return_dict["t"] = t
        return_dict["data"] = list_of_dicts
        return_dict["plot_type"] = "stacked_area_plot"
        return_dict["x_label"] = "API Call"
        try:
            return_dict["x_min"] = min(x)
            return_dict["x_max"] = max(x)
            return_dict["y_min"] = min_y_value
            return_dict["y_max"] = max_y_value
        except:
            return_dict["x_min"] = 0.0
            return_dict["x_max"] = 0.0
            return_dict["y_min"] = 0.0
            return_dict["y_max"] = 0.0
        return_dict["y_label"] = "Duration (s)"

        return return_dict
示例#8
0
  def compute_duration_detailed_stacked_area_plot(self,app_id,exp_uid,task,alg_label,detailedDB=False):
    """
    Description: Returns stacked area plot for a particular algorithm and task where the durations
    are broken down into compute,db_set,db_get (for cpu, database_set, database_get)

    Expected input:
      (string) task :  must be in {'getQuery','reportAnswer','predict'}
      (string) alg_label : must be a valid alg_label contained in alg_list list of dicts 

    Expected output (in dict):
      plot_type 'stacked_area_plot'
      (string) x_label : 'API Call'
      (float) x_min : 1
      (float) x_max : length of datastream
      (string) y_label : 'Duration (s)'
      (float) y_min : 0.
      (float) y_max : maximum duration value achieved sum of all layers
      (list of strings) t : list of timestamp strings
      (list of floats) x : integers ranging from 1 x_max
      (list of dicts with fields) data : 
        (list of floats) y : list of durations
        (string) legend_label : area_label in {'compute','db_set','db_get'}
    """

    alg_list,didSucceed,message = self.db.get(app_id+':experiments',exp_uid,'alg_list')

    for algorithm in alg_list:
      if algorithm['alg_label'] == alg_label:
        alg_id = algorithm['alg_id']
        alg_uid = algorithm['alg_uid']

    list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app_id+':ALG-DURATION',{'alg_uid':alg_uid,'task':task})
    list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )

    x = []
    t = []
    enqueued = []
    admin = []
    dbOverhead = []
    dbGet = []
    dbSet = []
    compute = []

    max_y_value = 0.
    min_y_value = float('inf')
    k = 0
    for item in list_of_log_dict:
      k += 1
      x.append(k)
      t.append(str(item.get('timestamp','')))

      _alg_duration = item.get('duration',0.)
      _alg_duration_dbGet = item.get('duration_dbGet',0.)
      _alg_duration_dbSet = item.get('duration_dbSet',0.)
      _duration_enqueued = item.get('duration_enqueued',0.)
      _app_duration = item.get('app_duration',0.)

      if (_app_duration+_duration_enqueued) > max_y_value:
        max_y_value = _app_duration + _duration_enqueued
      if (_app_duration+_duration_enqueued) < min_y_value:
        min_y_value = _app_duration + _duration_enqueued
      
      enqueued.append(_duration_enqueued)
      admin.append(_app_duration-_alg_duration)
      dbSet.append(_alg_duration_dbSet)
      dbGet.append(_alg_duration_dbGet)
      compute.append( _alg_duration - _alg_duration_dbSet - _alg_duration_dbGet )

    
    list_of_dicts = []

    duration_dict = {}
    duration_dict['legend_label'] = 'compute'
    duration_dict['y'] = compute
    list_of_dicts.append(duration_dict)

    duration_dict = {}
    duration_dict['legend_label'] = 'db:get'
    duration_dict['y'] = dbGet
    list_of_dicts.append(duration_dict)

    duration_dict = {}
    duration_dict['legend_label'] = 'db:set'
    duration_dict['y'] = dbSet
    list_of_dicts.append(duration_dict)

    duration_dict = {}
    duration_dict['legend_label'] = 'admin'
    duration_dict['y'] = admin
    list_of_dicts.append(duration_dict)

    duration_dict = {}
    duration_dict['legend_label'] = 'enqueued'
    duration_dict['y'] = enqueued
    list_of_dicts.append(duration_dict)

    return_dict = {}
    return_dict['x'] = x
    return_dict['t'] = t
    return_dict['data'] = list_of_dicts
    return_dict['plot_type'] = 'stacked_area_plot'
    return_dict['x_label'] = 'API Call'
    try:
      return_dict['x_min'] = min(x)
      return_dict['x_max'] = max(x)
      return_dict['y_min'] = min_y_value
      return_dict['y_max'] = max_y_value
    except:
      return_dict['x_min'] = 0.
      return_dict['x_max'] = 0.
      return_dict['y_min'] = 0.
      return_dict['y_max'] = 0.
    return_dict['y_label'] = 'Duration (s)'
    

    return return_dict
示例#9
0
  def compute_duration_multiline_plot(self,app_id,exp_uid,task):
    """
    Description: Returns multiline plot where there is a one-to-one mapping lines to 
    algorithms and each line indicates the durations to complete the task (wrt to the api call) 

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """
    alg_list,didSucceed,message = self.db.get(app_id+':experiments',exp_uid,'alg_list')

    x_min = numpy.float('inf')
    x_max = -numpy.float('inf')
    y_min = numpy.float('inf')
    y_max = -numpy.float('inf')
    list_of_alg_dicts = []

    for algorithm in alg_list:
      alg_id = algorithm['alg_id']
      alg_uid = algorithm['alg_uid']
      alg_label = algorithm['alg_label']
      
      list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app_id+':ALG-DURATION',{'alg_uid':alg_uid,'task':task})
      list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )

      x = []
      y = []
      t = []
      k=0
      for item in list_of_log_dict:
        k+=1
        x.append(k)
        y.append( item.get('app_duration',0.) + item.get('duration_enqueued',0.) )
        t.append(str(item['timestamp'])[:-3])
      
      x = numpy.array(x)
      y = numpy.array(y)
      t = numpy.array(t)
      num_items = len(list_of_log_dict)
      multiplier = min(num_items,MAX_SAMPLES_PER_PLOT)
      incr_inds = [ k*num_items/multiplier for k in range(multiplier)]
      max_inds = list(numpy.argsort(-y)[0:multiplier])
      final_inds = sorted(set(incr_inds + max_inds))
      x = list(x[final_inds])
      y = list(y[final_inds])
      t = list(t[final_inds])


      alg_dict = {}
      alg_dict['legend_label'] = alg_label
      alg_dict['x'] = x
      alg_dict['y'] = y
      alg_dict['t'] = t
      try:
        x_min = min(x_min,min(x))
        x_max = max(x_max,max(x))
        y_min = min(y_min,min(y))
        y_max = max(y_max,max(y))
      except:
        pass

      list_of_alg_dicts.append(alg_dict)

    return_dict = {}
    return_dict['data'] = list_of_alg_dicts
    return_dict['plot_type'] = 'multi_line_plot'
    return_dict['x_label'] = 'API Call'
    return_dict['x_min'] = x_min
    return_dict['x_max'] = x_max
    return_dict['y_label'] = 'Duration (s)'
    return_dict['y_min'] = y_min
    return_dict['y_max'] = y_max


    import matplotlib.pyplot as plt
    import mpld3
    fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
    for alg_dict in list_of_alg_dicts:
        ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label'])
    ax.set_xlabel('API Call')
    ax.set_ylabel('Duration (s)')
    ax.set_xlim([x_min,x_max])
    ax.set_ylim([y_min,y_max])
    ax.grid(color='white', linestyle='solid')
    ax.set_title(task, size=14)
    legend = ax.legend(loc=2,ncol=3,mode="expand")
    for label in legend.get_texts():
      label.set_fontsize('small')
    plot_dict = mpld3.fig_to_dict(fig)


    return plot_dict
示例#10
0
    def compute_duration_detailed_stacked_area_plot(self,
                                                    app_id,
                                                    exp_uid,
                                                    task,
                                                    alg_label,
                                                    detailedDB=False):
        """
    Description: Returns stacked area plot for a particular algorithm and task where the durations
    are broken down into compute,db_set,db_get (for cpu, database_set, database_get)

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}
      (string) alg_label : must be a valid alg_label contained in alg_list list of dicts 

    Expected output (in dict):
      plot_type 'stacked_area_plot'
      (string) x_label : 'API Call'
      (float) x_min : 1
      (float) x_max : length of datastream
      (string) y_label : 'Duration (s)'
      (float) y_min : 0.
      (float) y_max : maximum duration value achieved sum of all layers
      (list of strings) t : list of timestamp strings
      (list of floats) x : integers ranging from 1 x_max
      (list of dicts with fields) data : 
        (list of floats) y : list of durations
        (string) legend_label : area_label in {'compute','db_set','db_get'}
    """

        alg_list, didSucceed, message = self.db.get(app_id + ':experiments',
                                                    exp_uid, 'alg_list')

        for algorithm in alg_list:
            if algorithm['alg_label'] == alg_label:
                alg_id = algorithm['alg_id']
                alg_uid = algorithm['alg_uid']

        list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter(
            app_id + ':ALG-DURATION', {
                'alg_uid': alg_uid,
                'task': task
            })
        list_of_log_dict = sorted(
            list_of_log_dict,
            key=lambda item: utils.str2datetime(item['timestamp']))

        x = []
        t = []
        enqueued = []
        admin = []
        dbOverhead = []
        dbGet = []
        dbSet = []
        compute = []

        max_y_value = 0.
        min_y_value = float('inf')
        k = 0
        for item in list_of_log_dict:
            k += 1
            x.append(k)
            t.append(str(item.get('timestamp', '')))

            _alg_duration = item.get('duration', 0.)
            _alg_duration_dbGet = item.get('duration_dbGet', 0.)
            _alg_duration_dbSet = item.get('duration_dbSet', 0.)
            _duration_enqueued = item.get('duration_enqueued', 0.)
            _app_duration = item.get('app_duration', 0.)

            if (_app_duration + _duration_enqueued) > max_y_value:
                max_y_value = _app_duration + _duration_enqueued
            if (_app_duration + _duration_enqueued) < min_y_value:
                min_y_value = _app_duration + _duration_enqueued

            enqueued.append(_duration_enqueued)
            admin.append(_app_duration - _alg_duration)
            dbSet.append(_alg_duration_dbSet)
            dbGet.append(_alg_duration_dbGet)
            compute.append(_alg_duration - _alg_duration_dbSet -
                           _alg_duration_dbGet)

        list_of_dicts = []

        duration_dict = {}
        duration_dict['legend_label'] = 'compute'
        duration_dict['y'] = compute
        list_of_dicts.append(duration_dict)

        duration_dict = {}
        duration_dict['legend_label'] = 'db:get'
        duration_dict['y'] = dbGet
        list_of_dicts.append(duration_dict)

        duration_dict = {}
        duration_dict['legend_label'] = 'db:set'
        duration_dict['y'] = dbSet
        list_of_dicts.append(duration_dict)

        duration_dict = {}
        duration_dict['legend_label'] = 'admin'
        duration_dict['y'] = admin
        list_of_dicts.append(duration_dict)

        duration_dict = {}
        duration_dict['legend_label'] = 'enqueued'
        duration_dict['y'] = enqueued
        list_of_dicts.append(duration_dict)

        return_dict = {}
        return_dict['x'] = x
        return_dict['t'] = t
        return_dict['data'] = list_of_dicts
        return_dict['plot_type'] = 'stacked_area_plot'
        return_dict['x_label'] = 'API Call'
        try:
            return_dict['x_min'] = min(x)
            return_dict['x_max'] = max(x)
            return_dict['y_min'] = min_y_value
            return_dict['y_max'] = max_y_value
        except:
            return_dict['x_min'] = 0.
            return_dict['x_max'] = 0.
            return_dict['y_min'] = 0.
            return_dict['y_max'] = 0.
        return_dict['y_label'] = 'Duration (s)'

        return return_dict
示例#11
0
    def test_error_multiline_plot(self, app, butler):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """
        # get list of algorithms associated with project
        args = butler.experiment.get(key='args')
        test_alg_label = args['alg_list'][0]['test_alg_label']

        test_S = butler.queries.get(pattern={
            'exp_uid': app.exp_uid,
            'alg_label': test_alg_label
        })
        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in args['alg_list']:
            alg_label = algorithm['alg_label']
            list_of_log_dict, didSucceed, message = butler.ell.get_logs_with_filter(
                app.app_id + ':ALG-EVALUATION', {
                    'exp_uid': app.exp_uid,
                    'alg_label': alg_label
                })
            list_of_log_dict = sorted(
                list_of_log_dict,
                key=lambda item: utils.str2datetime(item['timestamp']))
            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                Xd = item['X']
                err = 0.5
                if len(test_S) > 0:
                    # compute error rate
                    number_correct = 0.
                    for query in test_S:
                        if 'q' in query:
                            i, j, k = query['q']
                            score = numpy.dot(Xd[j], Xd[j]) - 2 * numpy.dot(
                                Xd[j], Xd[k]) + 2 * numpy.dot(
                                    Xd[i], Xd[k]) - numpy.dot(Xd[i], Xd[i])
                            if score > 0:
                                number_correct += 1.0

                    accuracy = number_correct / len(test_S)
                    err = 1.0 - accuracy
                x.append(num_reported_answers)
                y.append(err)
            alg_dict = {'legend_label': alg_label, 'x': x, 'y': y}
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_min = min(y_min, min(y))
                y_max = max(y_max, max(y))
            except:
                pass
            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],
                    alg_dict['y'],
                    label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered triplets')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min, x_max])
        ax.set_ylim([y_min, y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Triplet Test Error', size=14)
        legend = ax.legend(loc=2, ncol=3, mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()
        return plot_dict
示例#12
0
    def test_error_multiline_plot(self,app_id,exp_uid):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to 
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          mpld3 plot object
        """

        # get list of algorithms associated with project
        alg_list,didSucceed,message = self.db.get(app_id+':experiments',exp_uid,'alg_list')

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_id = algorithm['alg_id']
            alg_uid = algorithm['alg_uid']
            alg_label = algorithm['alg_label']

            list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app_id+':ALG-EVALUATION',{'alg_uid':alg_uid})
            list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )

            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                err = item['error']
                x.append(num_reported_answers)
                y.append(err)

        
            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            try:
                x_min = min(x_min,min(x))
                x_max = max(x_max,max(x))
                y_min = min(y_min,min(y))
                y_max = max(y_max,max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered triplets')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min,x_max])
        ax.set_ylim([y_min,y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Triplet Test Error', size=14)
        legend = ax.legend(loc=2,ncol=3,mode="expand")
        for label in legend.get_texts():
          label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)

        return plot_dict
示例#13
0
    def compute_duration_multiline_plot(self, app_id, exp_uid, task):
        """
    Description: Returns multiline plot where there is a one-to-one mapping lines to 
    algorithms and each line indicates the durations to complete the task (wrt to the api call) 

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}

    Expected output (in dict):
      plot_type 'multi_line_plot'
      (string) x_label : 'API Call'
      (float) x_min : 1
      (float) x_max : maximum number of reported answers for any algorithm
      (string) y_label : 'Duration (s)'
      (float) y_min : 0.
      (float) y_max : maximum duration value achieved by any algorithm
      (list of dicts with fields) data : 
        (list of strings) t : list of timestamp strings
        (list of floats) x : integers ranging from 1 to maximum number of elements in y (or t)
        (list of floats) y : list of durations
        (string) legend_label : alg_label
    """
        alg_list, didSucceed, message = self.db.get(app_id + ':experiments',
                                                    exp_uid, 'alg_list')

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_id = algorithm['alg_id']
            alg_uid = algorithm['alg_uid']
            alg_label = algorithm['alg_label']

            list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter(
                app_id + ':ALG-DURATION', {
                    'alg_uid': alg_uid,
                    'task': task
                })
            list_of_log_dict = sorted(
                list_of_log_dict,
                key=lambda item: utils.str2datetime(item['timestamp']))

            x = []
            y = []
            t = []
            k = 0
            for item in list_of_log_dict:
                k += 1
                x.append(k)
                y.append(
                    item.get('app_duration', 0.) +
                    item.get('duration_enqueued', 0.))

                t.append(str(item['timestamp'])[:-3])

            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            alg_dict['t'] = t
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_min = min(y_min, min(y))
                y_max = max(y_max, max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        return_dict = {}
        return_dict['data'] = list_of_alg_dicts
        return_dict['plot_type'] = 'multi_line_plot'
        return_dict['x_label'] = 'API Call'
        return_dict['x_min'] = x_min
        return_dict['x_max'] = x_max
        return_dict['y_label'] = 'Duration (s)'
        return_dict['y_min'] = y_min
        return_dict['y_max'] = y_max

        return return_dict
示例#14
0
    def test_error_multiline_plot(self, app, butler):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """
        # get list of algorithms associated with project
        args = butler.experiment.get(key='args')        
        test_alg_label = args['alg_list'][0]['test_alg_label']

        test_S = butler.queries.get(pattern={'exp_uid':app.exp_uid, 'alg_label':test_alg_label})
        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in args['alg_list']:
            alg_label = algorithm['alg_label']
            list_of_log_dict,didSucceed,message = butler.ell.get_logs_with_filter(app.app_id+':ALG-EVALUATION',{'exp_uid':app.exp_uid, 'alg_label':alg_label})
            list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )
            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                Xd = item['X']
                err = 0.5
                if len(test_S)>0:
                    # compute error rate
                    number_correct = 0.
                    for query in test_S:
                        if 'q' in query:
                            i, j, k = query['q']
                            score =  numpy.dot(Xd[j],Xd[j]) -2*numpy.dot(Xd[j],Xd[k]) + 2*numpy.dot(Xd[i],Xd[k]) - numpy.dot(Xd[i],Xd[i])
                            if score > 0:
                                number_correct += 1.0

                    accuracy = number_correct/len(test_S)
                    err = 1.0-accuracy
                x.append(num_reported_answers)
                y.append(err)
            alg_dict = {'legend_label':alg_label, 'x':x,'y':y}
            try:
                x_min = min(x_min,min(x))
                x_max = max(x_max,max(x))
                y_min = min(y_min,min(y))
                y_max = max(y_max,max(y))
            except:
                pass
            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered triplets')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min,x_max])
        ax.set_ylim([y_min,y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Triplet Test Error', size=14)
        legend = ax.legend(loc=2,ncol=3,mode="expand")
        for label in legend.get_texts():
          label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()
        return plot_dict
示例#15
0
    def test_error_multiline_plot(self,app_id,exp_uid):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to 
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          plot_type 'multi_line_plot'
          (string) x_label : 'Number of answered triplets'
          (float) x_min : 1
          (float) x_max : maximum number of reported answers for any algorithm
          (string) y_label : 'Error on hold-out set'
          (float) y_min : 0.
          (float) y_max : maximum duration value achieved by any algorithm
          (list of dicts with fields) data : 
            (list of strings) t : list of timestamp strings
            (list of floats) x : integers ranging from 1 to maximum number of elements in y (or t)
            (list of floats) y : list of durations
            (string) legend_label : alg_label
        """

        # get list of algorithms associated with project
        alg_list,didSucceed,message = self.db.get(app_id+':experiments',exp_uid,'alg_list')

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_id = algorithm['alg_id']
            alg_uid = algorithm['alg_uid']
            alg_label = algorithm['alg_label']

            list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app_id+':ALG-EVALUATION',{'alg_uid':alg_uid})
            list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )

            x = []
            y = []
            t = []
            for item in list_of_log_dict:
                x.append(item['num_reported_answers'])
                _y = float(item['error'])
                y.append(_y)
                t.append(str(item['timestamp'])[:-3])

                if _y >0.:
                    y_min = min(y_min,_y)
        
            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            alg_dict['t'] = t
            try:
                x_min = min(x_min,min(x))
                x_max = max(x_max,max(x))
                y_max = max(y_max,max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        return_dict = {}
        return_dict['data'] = list_of_alg_dicts
        return_dict['plot_type'] = 'multi_line_plot'
        return_dict['x_label'] = 'Number of answered triplets'
        return_dict['x_min'] = x_min
        return_dict['x_max'] = x_max
        return_dict['y_label'] = 'Error on hold-out set'
        return_dict['y_min'] = y_min
        return_dict['y_max'] = y_max

        return return_dict
示例#16
0
    def most_current_embedding(self,app_id,exp_uid,alg_label):
        """
        Description: Returns embedding in the form of a list of dictionaries, which is conveneint for downstream applications

        Expected input:
          (string) alg_label : must be a valid alg_label contained in alg_list list of dicts 

        Expected output (in dict):
          plot_type : 'scatter2d_noaxis'
          (float) x_min : minimum x-value to display in viewing box
          (float) x_max : maximum x-value to display in viewing box
          (float) y_min : minimum y-value to display in viewing box
          (float) y_max : maximum y-value to display in viewing box
          (list of dicts with fields) data : 
            (int) index : index of target
            (float) x : x-value of target
            (float) y : y-value of target
        """

        alg_list,didSucceed,message = self.db.get(app_id+':experiments',exp_uid,'alg_list')

        for algorithm in alg_list:
            if algorithm['alg_label'] == alg_label:
                alg_id = algorithm['alg_id']
                alg_uid = algorithm['alg_uid']
        
        list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app_id+':ALG-EVALUATION',{'alg_uid':alg_uid})
        list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )
        
        item = list_of_log_dict[-1]

        embedding = item['X2']

        data = []
        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        for idx,target in enumerate(embedding):

            target_dict = {}
            target_dict['index'] = idx
            target_dict['x'] = target[0]
            target_dict['y'] = target[1]

            x_min = min(x_min,target[0])
            x_max = max(x_max,target[0])
            y_min = min(y_min,target[1])
            y_max = max(y_max,target[1])

            data.append(target_dict)
    
        return_dict = {}
        return_dict['x_min'] = x_min
        return_dict['x_max'] = x_max
        return_dict['y_min'] = y_min
        return_dict['y_max'] = y_max
        return_dict['data'] = data
        return_dict['plot_type'] = 'scatter2d_noaxis'

        return return_dict
示例#17
0
    def test_error_multiline_plot(self,app, butler):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """
        args = butler.experiment.get(key='args')
        alg_list = args['alg_list']
        test_alg_label = alg_list[0]['test_alg_label']

        test_queries, didSucceed, message = butler.db.get_docs_with_filter(app.app_id+':queries',{'exp_uid':app.exp_uid, 'alg_label':test_alg_label})

        test_S = [(query['target_index'], query['target_label']) 
                            for query in test_queries
                            if 'target_index' in query.keys()]

        targets = butler.targets.get_targetset(app.exp_uid)
        targets = sorted(targets,key=lambda x: x['target_id'])
        target_features = []

        for target_index in range(len(targets)):
            target_vec = targets[target_index]['meta']['features']
            target_vec.append(1.)
            target_features.append(target_vec)

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_label = algorithm['alg_label']
            list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app.app_id+':ALG-EVALUATION',{'exp_uid':app.exp_uid, 'alg_label':alg_label})
            list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )
            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                weights = item['weights']

                err = 0.
                for q in test_S:
                    estimated_label = numpy.sign(numpy.dot( numpy.array(target_features[q[0]]), numpy.array(weights) ))
                    err += estimated_label*q[1]<0. #do the labels agree or not

                m = float(len(test_S))
                err = err/m
                x.append(num_reported_answers)
                y.append(err)

            x = numpy.argsort(x)
            x = [x[i] for i in x]
            y = [y[i] for i in x]
        
            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            try:
                x_min = min(x_min,min(x))
                x_max = max(x_max,max(x))
                y_min = min(y_min,min(y))
                y_max = max(y_max,max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered queries')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min,x_max])
        ax.set_ylim([y_min,y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Test Error', size=14)
        legend = ax.legend(loc=2,ncol=3,mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()

        return plot_dict
示例#18
0
    def compute_duration_multiline_plot(self, app_id, exp_uid, task):
        """
    Description: Returns multiline plot where there is a one-to-one mapping lines to 
    algorithms and each line indicates the durations to complete the task (wrt to the api call) 

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}

    Expected output (in dict):
      plot_type 'multi_line_plot'
      (string) x_label : 'API Call'
      (float) x_min : 1
      (float) x_max : maximum number of reported answers for any algorithm
      (string) y_label : 'Duration (s)'
      (float) y_min : 0.
      (float) y_max : maximum duration value achieved by any algorithm
      (list of dicts with fields) data : 
        (list of strings) t : list of timestamp strings
        (list of floats) x : integers ranging from 1 to maximum number of elements in y (or t)
        (list of floats) y : list of durations
        (string) legend_label : alg_label
    """
        alg_list, didSucceed, message = self.db.get(app_id + ":experiments", exp_uid, "alg_list")

        x_min = numpy.float("inf")
        x_max = -numpy.float("inf")
        y_min = numpy.float("inf")
        y_max = -numpy.float("inf")
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_id = algorithm["alg_id"]
            alg_uid = algorithm["alg_uid"]
            alg_label = algorithm["alg_label"]

            list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter(
                app_id + ":ALG-DURATION", {"alg_uid": alg_uid, "task": task}
            )
            list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item["timestamp"]))

            x = []
            y = []
            t = []
            k = 0
            for item in list_of_log_dict:
                k += 1
                x.append(k)
                y.append(item.get("app_duration", 0.0) + item.get("duration_enqueued", 0.0))

                t.append(str(item["timestamp"])[:-3])

            alg_dict = {}
            alg_dict["legend_label"] = alg_label
            alg_dict["x"] = x
            alg_dict["y"] = y
            alg_dict["t"] = t
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_min = min(y_min, min(y))
                y_max = max(y_max, max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        return_dict = {}
        return_dict["data"] = list_of_alg_dicts
        return_dict["plot_type"] = "multi_line_plot"
        return_dict["x_label"] = "API Call"
        return_dict["x_min"] = x_min
        return_dict["x_max"] = x_max
        return_dict["y_label"] = "Duration (s)"
        return_dict["y_min"] = y_min
        return_dict["y_max"] = y_max

        return return_dict
示例#19
0
    def test_error_multiline_plot(self, app, butler):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """
        args = butler.experiment.get(key='args')
        alg_list = args['alg_list']
        test_alg_label = alg_list[0]['test_alg_label']

        test_queries, didSucceed, message = butler.db.get_docs_with_filter(
            app.app_id + ':queries', {
                'exp_uid': app.exp_uid,
                'alg_label': test_alg_label
            })

        test_S = [(query['target_index'], query['target_label'])
                  for query in test_queries if 'target_index' in query.keys()]

        targets = butler.targets.get_targetset(app.exp_uid)
        targets = sorted(targets, key=lambda x: x['target_id'])
        target_features = []

        for target_index in range(len(targets)):
            target_vec = targets[target_index]['meta']['features']
            target_vec.append(1.)
            target_features.append(target_vec)

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_label = algorithm['alg_label']
            list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter(
                app.app_id + ':ALG-EVALUATION', {
                    'exp_uid': app.exp_uid,
                    'alg_label': alg_label
                })
            list_of_log_dict = sorted(
                list_of_log_dict,
                key=lambda item: utils.str2datetime(item['timestamp']))
            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                weights = item['weights']

                err = 0.
                for q in test_S:
                    estimated_label = numpy.sign(
                        numpy.dot(numpy.array(target_features[q[0]]),
                                  numpy.array(weights)))
                    err += estimated_label * q[
                        1] < 0.  #do the labels agree or not

                m = float(len(test_S))
                err = err / m
                x.append(num_reported_answers)
                y.append(err)

            x = numpy.argsort(x)
            x = [x[i] for i in x]
            y = [y[i] for i in x]

            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_min = min(y_min, min(y))
                y_max = max(y_max, max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],
                    alg_dict['y'],
                    label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered queries')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min, x_max])
        ax.set_ylim([y_min, y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Test Error', size=14)
        legend = ax.legend(loc=2, ncol=3, mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()

        return plot_dict
示例#20
0
    def test_error_multiline_plot(self,app_id,exp_uid):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to 
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """

        # get list of algorithms associated with project
        alg_list,didSucceed,message = self.db.get(app_id+':experiments',exp_uid,'alg_list')

        for algorithm in alg_list:
            test_alg_label = algorithm['test_alg_label']

        predict_id = 'get_queries'
        params = {'alg_label':test_alg_label}
        predict_args_dict = {'predict_id':predict_id,'params':params}
        predict_args_json = json.dumps(predict_args_dict)
        next_app = utils.get_app(app_id)
        args_out_json,didSucceed,message = next_app.predict(exp_uid, predict_args_json, self.db, self.ell)
        predict_args_dict = json.loads(args_out_json)
        test_S = predict_args_dict['args']['queries']

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_id = algorithm['alg_id']
            alg_uid = algorithm['alg_uid']
            alg_label = algorithm['alg_label']

            list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app_id+':ALG-EVALUATION',{'alg_uid':alg_uid})
            list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )

            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                Xd = item['Xd']

                err = 0.5
                if len(test_S)>0:
                    # compute error rate
                    number_correct = 0.
                    for q in test_S:
                        i,j,k = q
                        score =  numpy.dot(Xd[j],Xd[j]) -2*numpy.dot(Xd[j],Xd[k]) + 2*numpy.dot(Xd[i],Xd[k]) - numpy.dot(Xd[i],Xd[i])
                        if score > 0:
                            number_correct += 1.0

                    accuracy = number_correct/len(test_S)
                    err = 1.0-accuracy

                x.append(num_reported_answers)
                y.append(err)

        
            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            try:
                x_min = min(x_min,min(x))
                x_max = max(x_max,max(x))
                y_min = min(y_min,min(y))
                y_max = max(y_max,max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered triplets')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min,x_max])
        ax.set_ylim([y_min,y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Triplet Test Error', size=14)
        legend = ax.legend(loc=2,ncol=3,mode="expand")
        for label in legend.get_texts():
          label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)

        return plot_dict
示例#21
0
    def test_error_multiline_plot(self, app_id, exp_uid):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to 
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          mpld3 plot object
        """

        # get list of algorithms associated with project
        alg_list, didSucceed, message = self.db.get(app_id + ':experiments',
                                                    exp_uid, 'alg_list')

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_id = algorithm['alg_id']
            alg_uid = algorithm['alg_uid']
            alg_label = algorithm['alg_label']

            list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter(
                app_id + ':ALG-EVALUATION', {'alg_uid': alg_uid})
            list_of_log_dict = sorted(
                list_of_log_dict,
                key=lambda item: utils.str2datetime(item['timestamp']))

            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                err = item['error']
                x.append(num_reported_answers)
                y.append(err)

            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_min = min(y_min, min(y))
                y_max = max(y_max, max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],
                    alg_dict['y'],
                    label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered triplets')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min, x_max])
        ax.set_ylim([y_min, y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Triplet Test Error', size=14)
        legend = ax.legend(loc=2, ncol=3, mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)

        return plot_dict
示例#22
0
  def compute_duration_multiline_plot(self,app_id,exp_uid,task):
    """
    Description: Returns multiline plot where there is a one-to-one mapping lines to 
    algorithms and each line indicates the durations to complete the task (wrt to the api call) 

    Expected input:
      (string) task :  must be in {'getQuery','reportAnswer','predict'}

    Expected output (in dict):
      plot_type 'multi_line_plot'
      (string) x_label : 'API Call'
      (float) x_min : 1
      (float) x_max : maximum number of reported answers for any algorithm
      (string) y_label : 'Duration (s)'
      (float) y_min : 0.
      (float) y_max : maximum duration value achieved by any algorithm
      (list of dicts with fields) data : 
        (list of strings) t : list of timestamp strings
        (list of floats) x : integers ranging from 1 to maximum number of elements in y (or t)
        (list of floats) y : list of durations
        (string) legend_label : alg_label
    """
    alg_list,didSucceed,message = self.db.get(app_id+':experiments',exp_uid,'alg_list')

    x_min = numpy.float('inf')
    x_max = -numpy.float('inf')
    y_min = numpy.float('inf')
    y_max = -numpy.float('inf')
    list_of_alg_dicts = []

    for algorithm in alg_list:
      alg_id = algorithm['alg_id']
      alg_uid = algorithm['alg_uid']
      alg_label = algorithm['alg_label']
      
      list_of_log_dict,didSucceed,message = self.ell.get_logs_with_filter(app_id+':ALG-DURATION',{'alg_uid':alg_uid,'task':task})
      list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )

      x = []
      y = []
      t = []
      k=0
      for item in list_of_log_dict:
        k+=1
        x.append(k)
        y.append( item.get('app_duration',0.) + item.get('duration_enqueued',0.) )

        t.append(str(item['timestamp'])[:-3])


      alg_dict = {}
      alg_dict['legend_label'] = alg_label
      alg_dict['x'] = x
      alg_dict['y'] = y
      alg_dict['t'] = t
      try:
        x_min = min(x_min,min(x))
        x_max = max(x_max,max(x))
        y_min = min(y_min,min(y))
        y_max = max(y_max,max(y))
      except:
        pass

      list_of_alg_dicts.append(alg_dict)

    return_dict = {}
    return_dict['data'] = list_of_alg_dicts
    return_dict['plot_type'] = 'multi_line_plot'
    return_dict['x_label'] = 'API Call'
    return_dict['x_min'] = x_min
    return_dict['x_max'] = x_max
    return_dict['y_label'] = 'Duration (s)'
    return_dict['y_min'] = y_min
    return_dict['y_max'] = y_max
    
    return return_dict