Пример #1
0
def evaluation(input_folder, output_file):

    analyzed_files = (Path(input_folder)).listdir('*analyzed.csv')
    REL = len(analyzed_files)
    REC = 0
    RR = 0
    ARI = 0
    BRI = 0
    for f_path in analyzed_files:
        topic_id = f_path.basename()[3:-13]

        topic_row = get_topic_info(topic_id)

        detection_time = dt_parser(str(topic_row[1]))
        RR_temp = 0

        for i, wind_time, count, eta, trend_analysis in pandas.read_csv(
                f_path, sep=',', header=None).itertuples():
            if trend_analysis == 1:
                REC += 1
                current_time = dt_parser(wind_time)
                if current_time.strftime(
                        CORPUS_DATE_FORMAT) == detection_time.strftime(
                            CORPUS_DATE_FORMAT):
                    RR_temp = 1
                elif current_time < detection_time:
                    BRI += 1
                else:
                    ARI += 1
        RR += RR_temp
    NR = REL - RR
    if not Path(output_file).exists():
        header = 'k,folder,rel,rec,rr,nr,bri,ari,recobrado,precision\n'
        with open(output_file, 'x') as out:
            out.write(header)

    with open(output_file, 'a') as out:
        csv_output = f'{Path(input_folder).basename()},{input_folder},{REL},{REC},{RR},{NR},{BRI},{ARI},{RR/float(REL):1.3f},{RR/float(RR + BRI):1.3f}\n'
        out.write(csv_output)
Пример #2
0
def analyze(generator, model):
    """
    This function acts on CSV data for a single counter.
    It loops over the items generated by the first argument.
    Each item is expected to be a tuple of:
        [interval_start_time] [interval_duration_in_sec] [interval_count]
    Each count is used to update the model, and the model result is added to the return list.
    """

    logger = logging.getLogger("analyze")
    if logger.handlers == []:
        fmtr = logging.Formatter(
            '%(asctime)s %(name)s:%(lineno)s - %(levelname)s - %(message)s')
        hndlr = logging.StreamHandler()
        hndlr.setFormatter(fmtr)
        logger.addHandler(hndlr)

    output_data = []
    for line in generator:
        try:
            time_interval_start = dt_parser(line[0])
        except ValueError:
            print(line[0])
            sys.exit()
        time_interval_duration = line[1]
        count = float(line[2])

        model.update(count=count, interval_start_time=time_interval_start)
        # result = float(model.get_result())

        trend, result = model.get_result()
        result = float(result)

        # trim digits in outputs
        if count > 0:
            trimmed_count = round(count, -int(floor(log10(count))) + 1)
        else:
            trimmed_count = 0
        if result > 0:
            trimmed_result = round(result, -int(floor(log10(result))) + 1)
        else:
            trimmed_result = 0

        # output_data.append( (str(time_interval_start), count, trimmed_result) )
        output_data.append(
            (str(time_interval_start), count, trimmed_result, trend))

        logger.debug("{0} {1:>8} {2}".format(time_interval_start,
                                             trimmed_count, trimmed_result))

    return output_data
Пример #3
0
def check_date_range(args, img_time):
    """Check image time versus included date range

    :param args: (object) argparse object.
    :param img_time: date-time string
    :return: boolean
    """
    # Convert image datetime to unix time
    timestamp = dt_parser(img_time)
    time_delta = timestamp - datetime.datetime(1970, 1, 1)
    unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds
    # Does the image date-time fall outside or inside the included range
    if unix_time < args.start_date or unix_time > args.end_date:
        return False
    else:
        return True
Пример #4
0
def check_date_range(args, img_time):
    """Check image time versus included date range

    :param args: (object) argparse object.
    :param img_time: date-time string
    :return: boolean
    """
    # Convert image datetime to unix time
    timestamp = dt_parser(img_time)
    time_delta = timestamp - datetime.datetime(1970, 1, 1)
    unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds
    # Does the image date-time fall outside or inside the included range
    if unix_time < args.start_date or unix_time > args.end_date:
        return False
    else:
        return True
Пример #5
0
def _check_date_range(start_date, end_date, img_time):
    """Check image time versus included date range.

    Args:
        start_date: Start date in Unix time
        end_date:   End date in Unix time
        img_time:   Image datetime

    :param start_date: int
    :param end_date: int
    :param img_time: str
    :return: bool
    """

    # Convert image datetime to unix time
    timestamp = dt_parser(img_time)
    time_delta = timestamp - datetime.datetime(1970, 1, 1)
    unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds
    # Does the image date-time fall outside or inside the included range
    if unix_time < start_date or unix_time > end_date:
        return False
    else:
        return True
Пример #6
0
def _check_date_range(start_date, end_date, img_time):
    """Check image time versus included date range.

    Args:
        start_date: Start date in Unix time
        end_date:   End date in Unix time
        img_time:   Image datetime

    :param start_date: int
    :param end_date: int
    :param img_time: str
    :return: bool
    """

    # Convert image datetime to unix time
    timestamp = dt_parser(img_time.replace("-", ""))
    time_delta = timestamp - datetime.datetime(1970, 1, 1)
    unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds
    # Does the image date-time fall outside or inside the included range
    if unix_time < start_date or unix_time > end_date:
        return False
    else:
        return True
Пример #7
0
def process_results(args):
    """
  Get results from individual files.
  Parse the results and recompile for SQLite.
  
  Args:
    args: (object) argparse object.
  Returns:
    
  Raises:
    
  """
    # Add a header to each output file
    # Metadata table
    metadata_fields = ["image_id", "run_id"]
    metadata_fields.extend(args.valid_meta.keys())
    # args.metadata_file.write('#' + '\t'.join(map(str, metadata_fields)) + '\n')

    # Feature data table
    feature_fields = [
        "area",
        "hull-area",
        "solidity",
        "perimeter",
        "width",
        "height",
        "longest_axis",
        "center-of-mass-x",
        "center-of-mass-y",
        "hull_vertices",
        "in_bounds",
        "ellipse_center_x",
        "ellipse_center_y",
        "ellipse_major_axis",
        "ellipse_minor_axis",
        "ellipse_angle",
        "ellipse_eccentricity",
    ]
    opt_feature_fields = [
        "y-position",
        "height_above_bound",
        "height_below_bound",
        "above_bound_area",
        "percent_above_bound_area",
        "below_bound_area",
        "percent_below_bound_area",
    ]

    # args.features_file.write('#' + '\t'.join(map(str, feature_fields + opt_feature_fields)) + '\n')

    # Signal channel data table
    signal_fields = ["bin-number", "channel_name", "values"]

    # bin-number	blue	green	red	lightness	green-magenta	blue-yellow	hue	saturation	value

    # Initialize the database with the schema template if create is true
    if args.create:
        # Create SQL structure based on accepted metadata and features
        args.sq.execute(
            "CREATE TABLE IF NOT EXISTS `runinfo` (`run_id` INTEGER PRIMARY KEY, `datetime` INTEGER NOT NULL, `command` TEXT NOT NULL);"
        )
        args.sq.execute(
            "CREATE TABLE IF NOT EXISTS `metadata` (`image_id` INTEGER PRIMARY KEY, `run_id` INTEGER NOT NULL, `"
            + "` TEXT NOT NULL, `".join(map(str, metadata_fields[2:]))
            + "` TEXT NOT NULL);"
        )
        args.sq.execute(
            "CREATE TABLE IF NOT EXISTS `features` (`image_id` INTEGER PRIMARY KEY, `"
            + "` TEXT NOT NULL, `".join(map(str, feature_fields + opt_feature_fields))
            + "` TEXT NOT NULL);"
        )
        args.sq.execute(
            "CREATE TABLE IF NOT EXISTS `analysis_images` (`image_id` INTEGER NOT NULL, `type` TEXT NOT NULL, `image_path` TEXT NOT NULL);"
        )
        args.sq.execute(
            "CREATE TABLE IF NOT EXISTS `signal` (`image_id` INTEGER NOT NULL, `"
            + "` TEXT NOT NULL, `".join(map(str, signal_fields))
            + "` TEXT NOT NULL);"
        )

    # Walk through the image processing job directory and process data from each file
    for (dirpath, dirnames, filenames) in os.walk(args.jobdir):
        for filename in filenames:
            meta = {}
            images = {}
            features = []
            feature_data = {}
            signal = []
            signal_data = {}
            boundary = []
            boundary_data = {}
            # Open results file
            with open(dirpath + "/" + filename) as results:
                # For each line in the file
                for row in results:
                    # Remove the newline character
                    row = row.rstrip("\n")
                    # Split the line by tab characters
                    cols = row.split("\t")
                    # If the data is of class meta, store in the metadata dicitonary
                    if cols[0] == "META":
                        meta[cols[1]] = cols[2]
                    # If the data is of class image, store in the image dictionary
                    elif cols[0] == "IMAGE":
                        images[cols[1]] = cols[2]
                    # If the data is of class shapes, store in the shapes dictionary
                    elif cols[0] == "HEADER_SHAPES":
                        features = cols
                    elif cols[0] == "SHAPES_DATA":
                        for i, datum in enumerate(cols):
                            if i > 0:
                                feature_data[features[i]] = datum
                    # If the data is of class histogram/signal, store in the signal dictionary
                    elif cols[0] == "HEADER_HISTOGRAM":
                        signal = cols
                    elif cols[0] == "HISTOGRAM_DATA":
                        for i, datum in enumerate(cols):
                            if i > 0:
                                signal_data[signal[i]] = datum
                    # If the data is of class boundary (horizontal rule), store in the boundary dictionary
                    elif "HEADER_BOUNDARY" in cols[0]:
                        boundary = cols
                        # Temporary hack
                        boundary_data["y-position"] = cols[0].replace("HEADER_BOUNDARY", "")
                    elif cols[0] == "BOUNDARY_DATA":
                        for i, datum in enumerate(cols):
                            if i > 0:
                                boundary_data[boundary[i]] = datum

            # Check to see if the image failed, if not continue
            # Convert image datetime to unix time
            timestamp = dt_parser(meta["timestamp"])
            time_delta = timestamp - datetime.datetime(1970, 1, 1)
            unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds

            # Print the image metadata to the aggregate output file
            args.image_id += 1
            meta["image_id"] = args.image_id
            meta["run_id"] = args.run_id
            meta["unixtime"] = unix_time

            meta_table = []
            for field in metadata_fields:
                meta_table.append(meta[field])

            if len(feature_data) != 0:
                args.metadata_file.write("|".join(map(str, meta_table)) + "\n")

                # Print the image feature data to the aggregate output file
                feature_data["image_id"] = args.image_id

                # Boundary data is optional, if it's not there we need to add in placeholder data
                if len(boundary_data) == 0:
                    for field in opt_feature_fields:
                        boundary_data[field] = 0
                feature_data.update(boundary_data)

                feature_table = [args.image_id]
                for field in feature_fields + opt_feature_fields:
                    feature_table.append(feature_data[field])

                args.features_file.write("|".join(map(str, feature_table)) + "\n")

                # Print the analysis image data to the aggregate output file
                for img_type in images:
                    args.analysis_images_file.write(
                        "|".join(map(str, (args.image_id, img_type, images[img_type]))) + "\n"
                    )

                # Print the image signal data to the aggregate output file
                for key in signal_data.keys():
                    if key != "bin-number":
                        signal_data[key] = signal_data[key].replace("[", "")
                        signal_data[key] = signal_data[key].replace("]", "")
                        signal_table = [args.image_id, signal_data["bin-number"], key, signal_data[key]]
                        args.signal_file.write("|".join(map(str, signal_table)) + "\n")
            else:
                args.fail_log.write("|".join(map(str, meta_table)) + "\n")
Пример #8
0
def plot(input_generator,config):            
    """
    input_generator is a generator of tuples with the following structure:
        (time_interval_start, count, eta)
    """
    
    logger = logging.getLogger("plot") 
    if logger.handlers == []:
        fmtr = logging.Formatter('%(asctime)s %(name)s:%(lineno)s - %(levelname)s - %(message)s') 
        hndlr = logging.StreamHandler()
        hndlr.setFormatter(fmtr)
        logger.addHandler(hndlr) 
    
    # if this throws a configparser.NoSectionError, 
    # then let it rise uncaught, since nothing will work
    plot_config = config['plot'] 
  
    # get parameters and set defaults
    logscale_eta = plot_config.getboolean('logscale_eta',fallback=False)
    use_x_var = plot_config.getboolean('use_x_var',fallback=True)
    do_plot_parameters = plot_config.getboolean('do_plot_parameters',fallback=False)
    start_tm = dt_parser( plot_config.get("start_time","1900-01-01") )
    stop_tm = dt_parser( plot_config.get("stop_time","2050-01-01") )
    rebin_factor = plot_config.getint("rebin_factor",fallback=1)

    rebin_config = dict(config.items("rebin"))
    plot_config["x_unit"] = "{0:d} {1:s}" .format( int(rebin_config["n_binning_unit"]) * rebin_factor, rebin_config["binning_unit"])

    """
    # only useful if we revive 'counter_name' parameter
    if 'counter_name' in rebin_config:
        if plot_config["plot_title"] == "":
            plot_config["plot_title"] = rebin_config["counter_name"]
        if plot_config["plot_file_name"] == "":
            plot_config["plot_file_name"] = rebin_config["counter_name"]
    """

    # TODO: should just put this in a dataframe
    data = [(dt_parser(tup[0]),float(tup[1]),float(tup[2])) for tup in input_generator if dt_parser(tup[0]) > start_tm and dt_parser(tup[0]) < stop_tm ]
    
    if rebin_factor <= 1:
        tbs = [tup[0] for tup in data]
        cts = [tup[1] for tup in data]
        eta = [tup[2] for tup in data]
    # do a hacky rebin, just for plotting 
    else:
        tbs = []
        cts = []
        eta = []
        tbs_tmp = None
        cts_tmp = 0
        eta_tmp = 0
        counter = 0
        for tbs_i,cts_i,eta_i in data:
            tbs_tmp = tbs_i
            cts_tmp += cts_i
            eta_tmp += eta_i
            counter += 1
            if counter == rebin_factor:
                counter = 0
                tbs.append(tbs_tmp)
                cts.append(cts_tmp)
                eta.append(eta_tmp/float(rebin_factor))
                tbs_tmp = None
                cts_tmp = 0
                eta_tmp = 0

    if cts == []:
        sys.stderr.write("'cts' list is empty\n") 
        return -1
    max_cts = max(cts)
    min_cts = min(cts)
   
    # build the plotting surface
    fig,(ax1,ax2) = plt.subplots(2,sharex=True) 
   
    # plot the data
    if use_x_var:
        ax1.plot(tbs,cts,'k-') 
    else:
        ax1.plot(cts,'k-') 
        ax1.set_xlim(0,len(cts))
   
    plotter="plot"
    if logscale_eta:
        plotter="semilogy"
    if use_x_var:
        getattr(ax2,plotter)(tbs,eta,'r')
    else:
        getattr(ax2,plotter)(eta,'r')
        ax2.set_xlim(0,len(eta))

    # adjust spacing
    ax1.set_ylim(min_cts*0.9,max_cts*1.7)
    min_eta = 0
    if min(eta) > 0:
        min_eta = min(eta) * 0.9
    ax2.set_ylim(min_eta, max(eta)*1.1)

    # remove the horizintal space between plots
    plt.subplots_adjust(hspace=0)
    # modify ticklabels
    for tl in ax1.get_yticklabels():
        tl.set_color('k')
        tl.set_fontsize(10)
    for tl in ax2.get_yticklabels():
        tl.set_color('r')
        tl.set_fontsize(10)
   
    # y labels
    y_label = plot_config.get('y_label','counts')
    ax1.set_ylabel(y_label,color='k',fontsize=12)
    ax2.set_ylabel("eta",color='r',fontsize=12)

    ax1.yaxis.set_major_locator(plticker.MaxNLocator(4))
    ax2.yaxis.set_major_locator(plticker.MaxNLocator(5))

    # x date formatting
    if use_x_var:
        day_locator = mdates.DayLocator()
        hour_locator = mdates.HourLocator()
        day_formatter = mdates.DateFormatter('%Y-%m-%d')
        ax2.xaxis.set_major_formatter( day_formatter ) 
        ax2.xaxis.set_major_locator( day_locator ) 
        ax2.xaxis.set_minor_locator( hour_locator ) 
        fig.autofmt_xdate()
    ax2.set_xlabel("time ({} bins)".format(plot_config["x_unit"].rstrip('s')))

    ax1.grid(True)
    ax2.grid(True)
 
    # build text box for parameter display
    if do_plot_parameters:
        props = dict(boxstyle='round',facecolor='white', alpha=0.5)
        model_name = config['analyze']['model_name']
        model_pars = ""
        for k,v in list(config[model_name + '_model'].items()):
            model_pars += "{}: {}\n".format(k,v) 
        text_str = "model: {}\n{}".format(model_name,str(model_pars))
        ax1.text(0.05,0.95,
                text_str,
                bbox=props,
                verticalalignment='top',
                fontsize=8,
                transform=ax1.transAxes
                )
    
    plt.suptitle("{}".format( plot_config.get("plot_title","SET A PLOT TITLE")))
    
    # write the image 
    try:
        os.makedirs(plot_config.get("plot_dir",".")) 
    except OSError:
        pass

    plot_file_name = "{}/{}.{}".format(
            plot_config.get("plot_dir",".").rstrip('/'), 
            plot_config.get("plot_file_name","plot"),
            plot_config.get("plot_file_extension","png")
            )
    plt.savefig(plot_file_name) 
    plt.close()
Пример #9
0
def rebin(input_generator,
        start_time = str(datetime.datetime(1970,1,1)),
        stop_time = str(datetime.datetime(2020,1,1)),
        binning_unit = 'hours',
        n_binning_unit = 1,
        **kwargs
        ):
    """
    This function must be passed the following positional argument:
        input_generator
    Optional keyword arguments are:
        binning_unit
        n_binning_unit
        stop_time
        start_time

    The 'input_generator' object must yield tuples like:
        [interval start time], [interval duration in sec], [interval count]

    The function return a list of tuples like:
        [new interval start time], [new interval duration in sec], [new interval count]
    """
    
    logger = logging.getLogger("rebin")
    
    start_time = dt_parser(start_time)  
    stop_time = dt_parser(stop_time)  

    # these are just for keeping track of what range of date/times we observe in the data
    max_stop_time = datetime.datetime(1970,1,1)
    min_start_time = datetime.datetime(2020,1,1)

    input_data = []

    # put the data into a list of (TimeBucket, count) tuples
    for line in input_generator:
        
        try:
            this_start_time = dt_parser(line[0])
        except ValueError:
            continue
        dt = datetime.timedelta(seconds=int(float(line[1])))
        this_stop_time = this_start_time + dt
       
        if this_stop_time > stop_time:
            continue
        if this_start_time < start_time:
            continue
        time_bucket = TimeBucket(this_start_time, this_stop_time)  
        
        count = line[2]
        input_data.append((time_bucket, count)) 
        
        if this_stop_time > max_stop_time:
            max_stop_time = this_stop_time
        if this_start_time < min_start_time:
            min_start_time = this_start_time

    input_data_sorted = sorted(input_data)

    # make a grid with appropriate bin size
    grid_start_time = datetime_truncate.truncate(min_start_time,binning_unit.rstrip('s'))
    grid_stop_time = datetime_truncate.truncate(max_stop_time,binning_unit.rstrip('s'))
    grid_dt = datetime.timedelta(**{binning_unit:int(n_binning_unit)})

    tb_stop_time = grid_start_time + grid_dt
    tb = TimeBucket(grid_start_time,tb_stop_time)

    # make list of TimeBuckets for bins
    grid = []
    while tb.stop_time <= grid_stop_time:
        #logger.debug("{}".format(tb))
        grid.append(tb)
        tb_start_time = tb.stop_time
        tb_stop_time = tb_start_time + grid_dt
        tb = TimeBucket(tb_start_time,tb_stop_time) 
    grid.append(tb)


    # add data to a dictionary with keys mapped to the grid indicies
    output_data = collections.defaultdict(float)
    for input_tb,input_count in input_data_sorted:
        logger.debug("input. TB: {}, count: {}".format(input_tb,input_count))
       
        for grid_tb in grid:
            if input_tb in grid_tb:
                idx = grid.index(grid_tb) 
                output_data[idx] += float(input_count)
                break
            elif input_tb.intersects(grid_tb):
                # assign partial count of input_tb to grid_tb
                idx_lower = grid.index(grid_tb) 
                frac_lower = input_tb.get_fraction_overlapped_by(grid_tb)  
                output_data[idx_lower] += (float(input_count) * frac_lower)
                
                try:
                    idx = idx_lower + 1
                    frac = input_tb.get_fraction_overlapped_by(grid[idx])  
                    while frac > 0:
                        output_data[idx] += (frac * float(input_count))
                        idx += 1
                        frac = input_tb.get_fraction_overlapped_by(grid[idx])   
                except IndexError:
                    pass
                
                break
            else:
                pass

    # put data back into a sorted list of tuples
    sorted_output_data = []

    # use these to strip off leading and trailing zero-count entries
    prev_count = 0
    last_non_zero_ct_idx = -1

    # the grid is already time ordered, and the output_data are indexed
    for idx,dt in enumerate(grid):
        if idx in output_data:
            count = output_data[idx]
            last_non_zero_ct_idx = idx
        else:
            count = 0
        if count != 0 or prev_count != 0:

            if count > 0:
                trimmed_count = int(count)
                #trimmed_count = round(count, -int(floor(log10(count)))+1) 
            else:
                trimmed_count = 0
            sorted_output_data.append((str(dt.start_time),dt.size().total_seconds(),trimmed_count)) 
        
        prev_count = count
    sorted_output_data = sorted_output_data[:last_non_zero_ct_idx+1]
    
    # return the data structure
    return sorted_output_data
Пример #10
0
def process_results(args):
    """
  Get results from individual files.
  Parse the results and recompile for SQLite.
  
  Args:
    args: (object) argparse object.
  Returns:
    
  Raises:
    
  """
    # Add a header to each output file
    # Metadata table
    metadata_fields = ['image_id', 'run_id']
    metadata_fields.extend(args.valid_meta.keys())
    #args.metadata_file.write('#' + '\t'.join(map(str, metadata_fields)) + '\n')

    # Feature data table
    feature_fields = [
        'area', 'hull-area', 'solidity', 'perimeter', 'width', 'height',
        'longest_axis', 'center-of-mass-x', 'center-of-mass-y',
        'hull_vertices', 'in_bounds', 'ellipse_center_x', 'ellipse_center_y',
        'ellipse_major_axis', 'ellipse_minor_axis', 'ellipse_angle',
        'ellipse_eccentricity'
    ]
    opt_feature_fields = [
        'y-position', 'height_above_bound', 'height_below_bound',
        'above_bound_area', 'percent_above_bound_area', 'below_bound_area',
        'percent_below_bound_area'
    ]

    #args.features_file.write('#' + '\t'.join(map(str, feature_fields + opt_feature_fields)) + '\n')

    # Signal channel data table
    signal_fields = ['bin-number', 'channel_name', 'values']

    #bin-number	blue	green	red	lightness	green-magenta	blue-yellow	hue	saturation	value

    # Initialize the database with the schema template if create is true
    if args.create:
        # Create SQL structure based on accepted metadata and features
        args.sq.execute(
            'CREATE TABLE IF NOT EXISTS `runinfo` (`run_id` INTEGER PRIMARY KEY, `datetime` INTEGER NOT NULL, `command` TEXT NOT NULL);'
        )
        args.sq.execute(
            'CREATE TABLE IF NOT EXISTS `metadata` (`image_id` INTEGER PRIMARY KEY, `run_id` INTEGER NOT NULL, `'
            + '` TEXT NOT NULL, `'.join(map(str, metadata_fields[2:])) +
            '` TEXT NOT NULL);')
        args.sq.execute(
            'CREATE TABLE IF NOT EXISTS `features` (`image_id` INTEGER PRIMARY KEY, `'
            + '` TEXT NOT NULL, `'.join(
                map(str, feature_fields + opt_feature_fields)) +
            '` TEXT NOT NULL);')
        args.sq.execute(
            'CREATE TABLE IF NOT EXISTS `analysis_images` (`image_id` INTEGER NOT NULL, `type` TEXT NOT NULL, `image_path` TEXT NOT NULL);'
        )
        args.sq.execute(
            'CREATE TABLE IF NOT EXISTS `signal` (`image_id` INTEGER NOT NULL, `'
            + '` TEXT NOT NULL, `'.join(map(str, signal_fields)) +
            '` TEXT NOT NULL);')

    # Walk through the image processing job directory and process data from each file
    for (dirpath, dirnames, filenames) in os.walk(args.jobdir):
        for filename in filenames:
            meta = {}
            images = {}
            features = []
            feature_data = {}
            signal = []
            signal_data = {}
            boundary = []
            boundary_data = {}
            # Open results file
            with open(dirpath + '/' + filename) as results:
                # For each line in the file
                for row in results:
                    # Remove the newline character
                    row = row.rstrip('\n')
                    # Split the line by tab characters
                    cols = row.split('\t')
                    # If the data is of class meta, store in the metadata dicitonary
                    if cols[0] == 'META':
                        meta[cols[1]] = cols[2]
                    # If the data is of class image, store in the image dictionary
                    elif cols[0] == 'IMAGE':
                        images[cols[1]] = cols[2]
                    # If the data is of class shapes, store in the shapes dictionary
                    elif cols[0] == 'HEADER_SHAPES':
                        features = cols
                    elif cols[0] == 'SHAPES_DATA':
                        for i, datum in enumerate(cols):
                            if i > 0:
                                feature_data[features[i]] = datum
                    # If the data is of class histogram/signal, store in the signal dictionary
                    elif cols[0] == 'HEADER_HISTOGRAM':
                        signal = cols
                    elif cols[0] == 'HISTOGRAM_DATA':
                        for i, datum in enumerate(cols):
                            if i > 0:
                                signal_data[signal[i]] = datum
                    # If the data is of class boundary (horizontal rule), store in the boundary dictionary
                    elif 'HEADER_BOUNDARY' in cols[0]:
                        boundary = cols
                        # Temporary hack
                        boundary_data['y-position'] = cols[0].replace(
                            'HEADER_BOUNDARY', '')
                    elif cols[0] == 'BOUNDARY_DATA':
                        for i, datum in enumerate(cols):
                            if i > 0:
                                boundary_data[boundary[i]] = datum

            # Check to see if the image failed, if not continue
            # Convert image datetime to unix time
            timestamp = dt_parser(meta['timestamp'])
            time_delta = timestamp - datetime.datetime(1970, 1, 1)
            unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds

            # Print the image metadata to the aggregate output file
            args.image_id += 1
            meta['image_id'] = args.image_id
            meta['run_id'] = args.run_id
            meta['unixtime'] = unix_time

            meta_table = []
            for field in metadata_fields:
                meta_table.append(meta[field])

            if (len(feature_data) != 0):
                args.metadata_file.write('|'.join(map(str, meta_table)) + '\n')

                # Print the image feature data to the aggregate output file
                feature_data['image_id'] = args.image_id

                # Boundary data is optional, if it's not there we need to add in placeholder data
                if len(boundary_data) == 0:
                    for field in opt_feature_fields:
                        boundary_data[field] = 0
                feature_data.update(boundary_data)

                feature_table = [args.image_id]
                for field in feature_fields + opt_feature_fields:
                    feature_table.append(feature_data[field])

                args.features_file.write('|'.join(map(str, feature_table)) +
                                         '\n')

                # Print the analysis image data to the aggregate output file
                for img_type in images:
                    args.analysis_images_file.write('|'.join(
                        map(str, (args.image_id, img_type,
                                  images[img_type]))) + '\n')

                # Print the image signal data to the aggregate output file
                for key in signal_data.keys():
                    if key != 'bin-number':
                        signal_data[key] = signal_data[key].replace('[', '')
                        signal_data[key] = signal_data[key].replace(']', '')
                        signal_table = [
                            args.image_id, signal_data['bin-number'], key,
                            signal_data[key]
                        ]
                        args.signal_file.write(
                            '|'.join(map(str, signal_table)) + '\n')
            else:
                args.fail_log.write('|'.join(map(str, meta_table)) + '\n')
Пример #11
0
def query_tweets(tweet_ids, 
        groupings, 
        endpoint, 
        engagement_types,
        max_tweet_ids = 25,
        date_range = (None,None)
        ):
    """ 
    Return engagements for specified Tweets, groupings
    engagements, and endpoint. 
    Providing start/end times enables historical mode.
    
    There are two iterations to manage:
    - splitting the tweet IDs into acceptably small chunks 
    - splitting the date range into acceptably small chunks
    """ 

    if ( date_range[0] is None and date_range[1] is not None ) or \
    ( date_range[0] is not None and date_range[1] is None ):
        raise DateRangeException("Must specify both or neither of the 'date_range' tuple elements")

    MAX_DATE_RANGE_IN_DAYS = 27
    def yield_date_range(start_date, end_date):
        """ yield datetime objects in MAX_DATE_RANGE_IN_DAYS intervals """
        for n in range(0, int((end_date - start_date).days), MAX_DATE_RANGE_IN_DAYS):
            yield start_date + datetime.timedelta(n)


    results = {}

    # must create physical list run 'len' on it
    tweet_ids = list(tweet_ids)
    # split tweet ID list into chunks of size 'max_tweet_ids' 
    for tweet_ids_chunk in [tweet_ids[i:i+max_tweet_ids] for i in range(0, len(tweet_ids), max_tweet_ids)]:  
       
        results_for_these_ids = {}
        post_data = {
            'tweet_ids' : tweet_ids_chunk,
            'engagement_types' : engagement_types,
            'groupings' : groupings
            } 
        
        if date_range == (None,None):
            results_for_these_ids = make_request(post_data,endpoint)
        else: 
            # this is historical mode
            start_time = dt_parser(date_range[0])
            end_time = dt_parser(date_range[1])
        
            # standard timed query
            if (end_time - start_time).days <= MAX_DATE_RANGE_IN_DAYS:
                start_time = dt_parser(date_range[0])
                end_time = dt_parser(date_range[1])
                post_data['start'] = start_time.strftime('%Y-%m-%d')
                post_data['end'] = end_time.strftime('%Y-%m-%d')
                results_for_these_ids = make_request(post_data,endpoint)
            
            # extended timed query
            else:
                # iterate over all chunks of dates
                for this_start_time in yield_date_range(start_time, end_time):
                    this_end_time = this_start_time + datetime.timedelta(MAX_DATE_RANGE_IN_DAYS)
                    if this_end_time > end_time:
                        this_end_time = end_time

                    post_data['start'] = this_start_time.strftime('%Y-%m-%d')
                    post_data['end'] = this_end_time.strftime('%Y-%m-%d')
                    
                    results_for_these_ids_and_dates = make_request(post_data,endpoint)
                    combine_results(results_for_these_ids,results_for_these_ids_and_dates,groupings)
            
        combine_results(results,results_for_these_ids,groupings)
    return results
def query_tweets(tweet_ids,
                 groupings,
                 endpoint,
                 engagement_types,
                 max_tweet_ids=25,
                 date_range=(None, None)):
    """ 
    Return engagements for specified Tweets, groupings
    engagements, and endpoint. 
    Providing start/end times enables historical mode.
    
    There are two iterations to manage:
    - splitting the tweet IDs into acceptably small chunks 
    - splitting the date range into acceptably small chunks
    """

    if ( date_range[0] is None and date_range[1] is not None ) or \
    ( date_range[0] is not None and date_range[1] is None ):
        raise DateRangeException(
            "Must specify both or neither of the 'date_range' tuple elements")

    #if datetime.datetime.strptime(date_range[1],'%Y-%m-%d') > datetime.datetime.now():
    #    raise DateRangeException("Tweet was posted less than 27 days ago. Use 'totals' endpoint.")

    MAX_DATE_RANGE_IN_DAYS = 27

    def yield_date_range(start_date, end_date):
        """ yield datetime objects in MAX_DATE_RANGE_IN_DAYS intervals """
        for n in range(0, int((end_date - start_date).days),
                       MAX_DATE_RANGE_IN_DAYS):
            yield start_date + datetime.timedelta(n)

    def chunks(iterable, size=1):
        """ 
        yield list representations of
        'size'-length, consecutive slices of 'iterable' 
        """
        iterator = iter(iterable)
        for first in iterator:
            yield list(
                itertools.chain([first], itertools.islice(iterator, size - 1)))

    results = {}

    # split tweet ID list into chunks of size 'max_tweet_ids'

    for tweet_ids_chunk in chunks(tweet_ids, max_tweet_ids):
        results_for_these_ids = {}
        post_data = {
            'tweet_ids': tweet_ids_chunk,
            'engagement_types': engagement_types,
            'groupings': groupings
        }

        if date_range == (None, None):
            # this is '28hr' or 'totals' mode
            results_for_these_ids = make_request(post_data, endpoint)
        else:
            # this is historical mode
            start_time = dt_parser(date_range[0])
            end_time = dt_parser(date_range[1])

            # standard timed query (only one call required)
            if (end_time - start_time).days <= MAX_DATE_RANGE_IN_DAYS:
                start_time = dt_parser(date_range[0])
                end_time = dt_parser(date_range[1])
                post_data['start'] = start_time.strftime('%Y-%m-%d')
                post_data['end'] = end_time.strftime('%Y-%m-%d')
                results_for_these_ids = make_request(post_data, endpoint)

            # extended timed query (multiple calls required)
            else:
                # iterate over all chunks of dates
                for this_start_time in yield_date_range(start_time, end_time):
                    this_end_time = this_start_time + datetime.timedelta(
                        MAX_DATE_RANGE_IN_DAYS)
                    if this_end_time > end_time:
                        this_end_time = end_time

                    post_data['start'] = this_start_time.strftime('%Y-%m-%d')
                    post_data['end'] = this_end_time.strftime('%Y-%m-%d')

                    results_for_these_ids_and_dates = make_request(
                        post_data, endpoint)
                    combine_results(results_for_these_ids,
                                    results_for_these_ids_and_dates, groupings)
        combine_results(results, results_for_these_ids, groupings)
    return results
def query_tweets(tweet_ids, 
        groupings, 
        endpoint, 
        engagement_types,
        max_tweet_ids = 25,
        date_range = (None,None)
        ):
    """ 
    Return engagements for specified Tweets, groupings
    engagements, and endpoint. 
    Providing start/end times enables historical mode.
    
    There are two iterations to manage:
    - splitting the tweet IDs into acceptably small chunks 
    - splitting the date range into acceptably small chunks
    """ 

    MAX_DATE_RANGE_IN_DAYS = 27
    def yield_date_range(start_date, end_date):
        for n in range(0, int((end_date - start_date).days), MAX_DATE_RANGE_IN_DAYS):
            yield start_date + datetime.timedelta(n)


    results = {}

    # must creat physical list run 'len' on it
    tweet_ids = list(tweet_ids)
    # split tweet ID list into chunks of size 'max_tweet_ids' 
    for tweet_ids_chunk in [tweet_ids[i:i+max_tweet_ids] for i in range(0, len(tweet_ids), max_tweet_ids)]:  
       
        results_for_these_ids = {}
        post_data = {
            'tweet_ids' : tweet_ids_chunk,
            'engagement_types' : engagement_types,
            'groupings' : groupings
            } 
        
        if date_range == (None,None):
            results_for_these_ids = make_request(post_data,endpoint)
        else: 
            start_time = dt_parser(date_range[0])
            end_time = dt_parser(date_range[1])
        
            # standard timed query
            if (end_time - start_time).days <= MAX_DATE_RANGE_IN_DAYS:
                start_time = dt_parser(date_range[0])
                end_time = dt_parser(date_range[1])
                post_data['start'] = start_time.strftime('%Y-%m-%d')
                post_data['end'] = end_time.strftime('%Y-%m-%d')
                results_for_these_ids = make_request(post_data,endpoint)
            
            # extended timed query
            else:
                # iterate over all chunks of dates
                for this_start_time in yield_date_range(start_time, end_time):
                    this_end_time = this_start_time + datetime.timedelta(MAX_DATE_RANGE_IN_DAYS)
                    if this_end_time > end_time:
                        this_end_time = end_time

                    post_data['start'] = this_start_time.strftime('%Y-%m-%d')
                    post_data['end'] = this_end_time.strftime('%Y-%m-%d')
                    
                    results_for_these_ids_and_dates = make_request(post_data,endpoint)
                    combine_results(results_for_these_ids,results_for_these_ids_and_dates,groupings)
            
        combine_results(results,results_for_these_ids,groupings)
    return results
Пример #14
0
def multi_plot(input_generators, config, styles=[]):
    """
    input_generators is a list and each element is a  generator of tuples with the following structure:
        (time_interval_start, count, eta, trend)
    """

    # if this throws a configparser.NoSectionError,
    # then let it rise uncaught, since nothing will work
    plot_config = config['plot']

    # get parameters and set defaults
    logscale_eta = plot_config.getboolean('logscale_eta', fallback=False)
    use_x_var = plot_config.getboolean('use_x_var', fallback=True)
    do_plot_parameters = plot_config.getboolean('do_plot_parameters',
                                                fallback=False)
    legend = plot_config.getboolean('legend', fallback=False)
    start_tm = dt_parser(plot_config.get("start_time", "1900-01-01"))
    stop_tm = dt_parser(plot_config.get("stop_time", "2050-01-01"))
    rebin_factor = plot_config.getint("rebin_factor", fallback=1)

    rebin_config = dict(config.items("rebin"))
    plot_config["x_unit"] = "{0:d} {1:s}".format(
        int(rebin_config["n_binning_unit"]) * rebin_factor,
        rebin_config["binning_unit"])
    """
    # only useful if we revive 'counter_name' parameter
    if 'counter_name' in rebin_config:
        if plot_config["plot_title"] == "":
            plot_config["plot_title"] = rebin_config["counter_name"]
        if plot_config["plot_file_name"] == "":
            plot_config["plot_file_name"] = rebin_config["counter_name"]
    """

    # build the plotting surface

    fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)
    max_cts = 0
    min_cts = sys.maxsize
    max_eta = 0
    min_eta = sys.maxsize
    used_colors = []

    for index, input_generator in enumerate(input_generators):

        data = [
            (dt_parser(tup[0]), float(tup[1]), float(tup[2]), float(tup[3]))
            for tup in input_generator
            if dt_parser(tup[0]) > start_tm and dt_parser(tup[0]) < stop_tm
        ]

        if rebin_factor <= 1:
            tbs = [tup[0] for tup in data]
            cts = [tup[1] for tup in data]
            eta = [tup[2] for tup in data]
            # trends:
            #  -1 = 'decreasing'
            #   0 = 'no trend'
            #   1 = 'increasing'
            trends = [
                random.uniform(0.5, 1.0) if tup[3] == 1 else 0 for tup in data
            ]
        else:
            tbs = []
            cts = []
            eta = []
            trends = []
            tbs_tmp = None
            cts_tmp = 0
            eta_tmp = 0
            trend_tmp = 0
            counter = 0
            for tbs_i, cts_i, eta_i, trend_i in data:
                tbs_tmp = tbs_i
                cts_tmp += cts_i
                eta_tmp += eta_i
                trend_tmp += 1 if trend_i == 1 else 0

                counter += 1
                if counter == rebin_factor:
                    counter = 0
                    tbs.append(tbs_tmp)
                    cts.append(cts_tmp)
                    eta.append(eta_tmp / float(rebin_factor))
                    trends.append(trend_tmp)
                    tbs_tmp = None
                    cts_tmp = 0
                    eta_tmp = 0
                    trend_tmp = 0

        if cts == []:
            sys.stderr.write("'cts' list is empty\n")
            continue
            # return -1
        max_cts = max(max_cts, max(cts))
        min_cts = min(min_cts, min(cts))

        ## PLOTTING PART

        color = styles[index][1]

        # plot the counts
        if use_x_var:
            ax1.plot(tbs, cts, color, alpha=0.7)
        else:
            ax1.plot(cts, color, alpha=0.7)
            ax1.set_xlim(0, len(cts))

        # plot the etas
        plotter = "plot"
        if logscale_eta:
            plotter = "semilogy"
        if use_x_var:
            getattr(ax2, plotter)(tbs, eta, color, alpha=0.7)
        else:
            getattr(ax2, plotter)(eta, color, alpha=0.7)
            ax2.set_xlim(0, len(eta))

        max_eta = max(max_eta, max(eta))
        min_eta = min(min_eta, min(cts))

        # plot the trends
        if use_x_var:
            ax3.plot(tbs,
                     trends,
                     color + '.',
                     alpha=0.7,
                     label=styles[index][0])
        elif trends != []:
            ax3.plot(trends, color + '.', alpha=0.7, label=styles[index][0])
            ax3.set_xlim(0, len(trends))

        for i, t in enumerate(trends):
            if t > 0:
                ax1.axvline(x=tbs[i],
                            color=color,
                            linewidth=0.5,
                            linestyle='--',
                            alpha=0.7)
                ax2.axvline(x=tbs[i],
                            color=color,
                            linewidth=0.5,
                            linestyle='--',
                            alpha=0.7)
                ax3.axvline(x=tbs[i],
                            color=color,
                            linewidth=0.5,
                            linestyle='--',
                            alpha=0.7)

    # adjust spacing
    ax1.set_ylim(min_cts * 0.9, max_cts * 1.7)
    min_eta = 0
    if min_eta > 0:
        min_eta = min_eta * 0.9
    ax2.set_ylim(min_eta, max_eta * 1.1)

    # remove the horizintal space between plots
    plt.subplots_adjust(hspace=0)

    # modify ticklabels
    plt.xticks(rotation=30)
    for tl in ax1.get_yticklabels():
        tl.set_color('k')
        tl.set_fontsize(10)
    for tl in ax2.get_yticklabels():
        tl.set_color('r')
        tl.set_fontsize(10)
    for tl in ax3.get_yticklabels():
        tl.set_color('b')
        tl.set_fontsize(10)
    for tl in ax3.get_xticklabels():
        tl.set_color('k')
        tl.set_fontsize(7)

    # y labels
    y_label = plot_config.get('y_label', 'counts')
    ax1.set_ylabel(y_label, color='k', fontsize=12)
    ax2.set_ylabel("eta", color='r', fontsize=12)
    ax3.set_ylabel("trend", color='b', fontsize=12)

    ax1.yaxis.set_major_locator(plticker.MaxNLocator(4))
    ax2.yaxis.set_major_locator(plticker.MaxNLocator(5))

    # x date formatting
    # if use_x_var:
    #     day_locator = mdates.DayLocator()
    #     hour_locator = mdates.HourLocator()
    #     day_formatter = mdates.DateFormatter('%Y-%m-%d')
    #     ax2.xaxis.set_major_formatter( day_formatter )
    #     ax2.xaxis.set_major_locator( day_locator )
    #     ax2.xaxis.set_minor_locator( hour_locator )
    #     fig.autofmt_xdate()
    # ax2.set_xlabel("time ({} bins)".format(plot_config["x_unit"].rstrip('s')))

    ax1.grid(True)
    ax2.grid(True)
    ax3.grid(True)

    # build text box for parameter display
    if do_plot_parameters:
        props = dict(boxstyle='round', facecolor='white', alpha=0.5)
        model_name = config['analyze']['model_name']
        model_pars = ""
        for k, v in config[model_name + '_model'].items():
            model_pars += "{}: {}\n".format(k, v)
        text_str = "model: {}\n{}".format(model_name, str(model_pars))
        text_str += 'topics_count: ' + str(len(input_generators))
        text_str += '\nrebin_factor: ' + str(rebin_factor)
        ax1.text(0.05,
                 0.95,
                 text_str,
                 bbox=props,
                 verticalalignment='top',
                 fontsize=8,
                 transform=ax1.transAxes)
    if legend:
        ax3.legend(fontsize=5.8,
                   fancybox=True,
                   loc='lower left',
                   title='Topics')

    plt.suptitle(u"{}".format(plot_config.get("plot_title",
                                              "SET A PLOT TITLE")))

    ## write the image

    plot_file_name = u"{}/{}.{}".format(
        plot_config.get("plot_dir", ".").rstrip('/'),
        plot_config.get("plot_file_name", "output"),
        plot_config.get("plot_file_extension", "png"))
    # print("Saved at: " + plot_file_name)
    plt.savefig(plot_file_name)
    plt.close()
Пример #15
0
def plot(input_generator, config):
    """
    input_generator is a generator of tuples with the following structure:
        (time_interval_start, count, eta)
    """
    use_x_var = True
    if "use_x_var" in config:
        use_x_var = bool(config["use_x_var"])
    if "y_label" not in config:
        config["y_label"] = "counts"
    if "start_time" in config and "stop_time" in config:
        start_tm = dt_parser(config["start_time"])
        stop_tm = dt_parser(config["stop_time"])
        data = [(dt_parser(tup[0]), float(tup[1]), float(tup[2]))
                for tup in input_generator
                if dt_parser(tup[0]) > start_tm and dt_parser(tup[0]) < stop_tm
                ]
    else:
        data = [(dt_parser(tup[0]), float(tup[1]), float(tup[2]))
                for tup in input_generator]

    if "rebin_factor" not in config or int(config["rebin_factor"]) == 1:
        tbs = [tup[0] for tup in data]
        cts = [tup[1] for tup in data]
        eta = [tup[2] for tup in data]
    # do a hacky rebin, just for plotting
    else:
        tbs = []
        cts = []
        eta = []
        tbs_tmp = None
        cts_tmp = 0
        eta_tmp = 0
        counter = 0
        for tbs_i, cts_i, eta_i in data:
            tbs_tmp = tbs_i
            cts_tmp += cts_i
            eta_tmp += eta_i
            counter += 1
            if counter == int(config["rebin_factor"]):
                counter = 0
                tbs.append(tbs_tmp)
                cts.append(cts_tmp)
                eta.append(eta_tmp / float(config["rebin_factor"]))
                tbs_tmp = None
                cts_tmp = 0
                eta_tmp = 0

    if cts == []:
        print("'cts' list is empty")
        return -1
    max_cts = max(cts)
    min_cts = min(cts)

    # build the plotting surface
    fig, (ax1, ax2) = plt.subplots(2, sharex=True)

    # plot the data
    if use_x_var:
        ax1.plot(tbs, cts, 'k-')
    else:
        ax1.plot(cts, 'k-')
        ax1.set_xlim(0, len(cts))

    plotter = "plot"
    if config["logscale_eta"]:
        plotter = "semilogy"
    if use_x_var:
        getattr(ax2, plotter)(tbs, eta, 'r')
    else:
        getattr(ax2, plotter)(eta, 'r')
        ax2.set_xlim(0, len(eta))

    # adjust spacing
    ax1.set_ylim(min_cts * 0.9, max_cts * 1.7)
    min_eta = 0
    if min(eta) > 0:
        min_eta = min(eta) * 0.9
    ax2.set_ylim(min_eta, max(eta) * 1.1)

    # remove the horizintal space between plots
    plt.subplots_adjust(hspace=0)
    # modify ticklabels
    for tl in ax1.get_yticklabels():
        tl.set_color('k')
        tl.set_fontsize(10)
    for tl in ax2.get_yticklabels():
        tl.set_color('r')
        tl.set_fontsize(10)

    # y labels
    ax1.set_ylabel(config["y_label"], color='k', fontsize=12)
    ax2.set_ylabel("eta", color='r', fontsize=12)

    ax1.yaxis.set_major_locator(plticker.MaxNLocator(4))
    ax2.yaxis.set_major_locator(plticker.MaxNLocator(5))

    # x date formatting
    if use_x_var:
        formatter = mdates.DateFormatter('%Y-%m-%d')
        ax2.xaxis.set_major_formatter(formatter)
        fig.autofmt_xdate()
    ax2.set_xlabel("time ({} bins)".format(config["x_unit"].rstrip('s')))

    ax1.grid(True)
    ax2.grid(True)

    plt.suptitle(u"{}".format(config["plot_title"]))

    try:
        os.makedirs(config["plot_dir"])
    except OSError:
        pass
    plot_file_name = u"{}/{}.{}".format(config["plot_dir"].rstrip('/'),
                                        config["plot_file_name"],
                                        config["plot_file_extension"])
    plt.savefig(plot_file_name)
    plt.close()
Пример #16
0
def process_results(args):
  """
  Get results from individual files.
  Parse the results and recompile for SQLite.
  
  Args:
    args: (object) argparse object.
  Returns:
    
  Raises:
    
  """
  # Add a header to each output file
  # Metadata table
  metadata_fields = ['image_id', 'run_id']
  metadata_fields.extend(args.valid_meta.keys())
  #args.metadata_file.write('#' + '\t'.join(map(str, metadata_fields)) + '\n')
  
  # Feature data table
  feature_fields = ['area', 'hull-area', 'solidity', 'perimeter', 'width', 'height',
                  'longest_axis', 'center-of-mass-x', 'center-of-mass-y', 'hull_vertices',
                  'in_bounds']
  opt_feature_fields = ['y-position', 'height_above_bound', 'height_below_bound',
                        'above_bound_area', 'percent_above_bound_area', 'below_bound_area',
                        'percent_below_bound_area']
  
  #args.features_file.write('#' + '\t'.join(map(str, feature_fields + opt_feature_fields)) + '\n')
  
  # Signal channel data table
  signal_fields = ['bin-number', 'channel_name', 'values']
  
  #bin-number	blue	green	red	lightness	green-magenta	blue-yellow	hue	saturation	value
  
  # Initialize the database with the schema template if create is true
  if args.create:
    # Create SQL structure based on accepted metadata and features
    args.sq.execute('CREATE TABLE IF NOT EXISTS `runinfo` (`run_id` INTEGER PRIMARY KEY, `datetime` INTEGER NOT NULL, `command` TEXT NOT NULL);')
    args.sq.execute('CREATE TABLE IF NOT EXISTS `metadata` (`image_id` INTEGER PRIMARY KEY, `run_id` INTEGER NOT NULL, `' + '` TEXT NOT NULL, `'.join(map(str, metadata_fields[2:])) + '` TEXT NOT NULL);')
    args.sq.execute('CREATE TABLE IF NOT EXISTS `features` (`image_id` INTEGER PRIMARY KEY, `' + '` TEXT NOT NULL, `'.join(map(str, feature_fields + opt_feature_fields)) + '` TEXT NOT NULL);')
    args.sq.execute('CREATE TABLE IF NOT EXISTS `analysis_images` (`image_id` INTEGER NOT NULL, `type` TEXT NOT NULL, `image_path` TEXT NOT NULL);')
    args.sq.execute('CREATE TABLE IF NOT EXISTS `signal` (`image_id` INTEGER NOT NULL, `' + '` TEXT NOT NULL, `'.join(map(str, signal_fields)) + '` TEXT NOT NULL);')
  
  # Walk through the image processing job directory and process data from each file
  for (dirpath, dirnames, filenames) in os.walk(args.jobdir):
    for filename in filenames:
      meta = {}
      images = {}
      features = []
      feature_data = {}
      signal = []
      signal_data = {}
      boundary = []
      boundary_data = {}
      # Open results file
      with open(dirpath + '/' + filename) as results:
        # For each line in the file
        for row in results:
          # Remove the newline character
          row = row.rstrip('\n')
          # Split the line by tab characters
          cols = row.split('\t')
          # If the data is of class meta, store in the metadata dicitonary
          if cols[0] == 'META':
            meta[cols[1]] = cols[2]
          # If the data is of class image, store in the image dictionary
          elif cols[0] == 'IMAGE':
            images[cols[1]] = cols[2]
          # If the data is of class shapes, store in the shapes dictionary
          elif cols[0] == 'HEADER_SHAPES':
            features = cols
          elif cols[0] == 'SHAPES_DATA':
            for i, datum in enumerate(cols):
              if i > 0:
                feature_data[features[i]] = datum
          # If the data is of class histogram/signal, store in the signal dictionary
          elif cols[0] == 'HEADER_HISTOGRAM':
            signal = cols
          elif cols[0] == 'HISTOGRAM_DATA':
            for i, datum in enumerate(cols):
              if i > 0:
                signal_data[signal[i]] = datum
          # If the data is of class boundary (horizontal rule), store in the boundary dictionary
          elif 'HEADER_BOUNDARY' in cols[0]:
            boundary = cols
            # Temporary hack
            boundary_data['y-position'] = cols[0].replace('HEADER_BOUNDARY', '')
          elif cols[0] == 'BOUNDARY_DATA':
            for i, datum in enumerate(cols):
              if i > 0:
                boundary_data[boundary[i]] = datum
      
      # Check to see if the image failed, if not continue
      if (len(feature_data) != 0):
        # Convert image datetime to unix time
        timestamp = dt_parser(meta['timestamp'])
        time_delta = timestamp - datetime.datetime(1970,1,1)
        unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds
        
        # Print the image metadata to the aggregate output file
        args.image_id += 1
        meta['image_id'] = args.image_id
        meta['run_id'] = args.run_id
        meta['unixtime'] = unix_time
        
        meta_table = []
        for field in metadata_fields:
          meta_table.append(meta[field])
        
        args.metadata_file.write('|'.join(map(str, meta_table)) + '\n')
        
        # Print the image feature data to the aggregate output file
        feature_data['image_id'] = args.image_id
        
        # Boundary data is optional, if it's not there we need to add in placeholder data
        if len(boundary_data) == 0:
          for field in opt_feature_fields:
            boundary_data[field] = 0
        feature_data.update(boundary_data)
        
        feature_table = [args.image_id]
        for field in feature_fields + opt_feature_fields:
          feature_table.append(feature_data[field])
        
        args.features_file.write('|'.join(map(str, feature_table)) + '\n')
        
        # Print the analysis image data to the aggregate output file
        for img_type in images:
          args.analysis_images_file.write('|'.join(map(str, (args.image_id, img_type, images[img_type]))) + '\n')
        
        # Print the image signal data to the aggregate output file
        for key in signal_data.keys():
          if key != 'bin-number':
            signal_data[key] = signal_data[key].replace('[','')
            signal_data[key] = signal_data[key].replace(']','')
            signal_table = [args.image_id, signal_data['bin-number'], key, signal_data[key]]
            args.signal_file.write('|'.join(map(str, signal_table)) + '\n')
Пример #17
0
def plot(input_generator, config):
    """
    input_generator is a generator of tuples with the following structure:
        (time_interval_start, count, eta)
    """
    use_x_var = True
    if "use_x_var" in config:
        use_x_var = bool(config["use_x_var"])  
    if "y_label" not in config:
        config["y_label"] = "counts"
    if "start_time" in config and "stop_time" in config:
        start_tm = dt_parse(config["start_time"])
        stop_tm = dt_parser(config["stop_time"])
        data = [(dt_parser(tup[0]),float(tup[1]),float(tup[2])) for tup in input_generator if dt_parser(tup[0]) > start_tm and dt_parser(tup[0]) < stop_tm ]
    else:
        data = [(dt_parser(tup[0]),float(tup[1]),float(tup[2])) for tup in input_generator] 
    
    if "rebin_factor" not in config or int(config["rebin_factor"]) == 1:
        tbs = [tup[0] for tup in data]
        cts = [tup[1] for tup in data]
        eta = [tup[2] for tup in data]
    # do a hacky rebin, just for plotting
    else:
        tbs = []
        cts = []
        eta = []
        tbs_tmp = None
        cts_tmp = 0
        eta_tmp = 0
        counter = 0
        for tbs_i,cts_i,eta_i in data:
            tbs_tmp = tbs_i
            cts_tmp += cts_i
            eta_tmp += eta_i
            counter += 1
            if counter == int(config["rebin_factor"]):
                counter = 0
                tbs.append(tbs_tmp)
                cts.append(cts_tmp)
                eta.append(eta_tmp/float(config["rebin_factor"]))
                tbs_tmp = None
                cts_tmp = 0
                eta_tmp = 0

    if cts == []:
        print("'cts' list is empty") 
        return -1
    max_cts = max(cts)
    min_cts = min(cts)
   
    # build the plot
    fig = plt.figure()
    plt.title(u"{}".format(config["plot_title"]))
    
    ax1 = fig.add_subplot(111)
    if use_x_var:
        ax1.plot(tbs,cts,'k-') 
    else:
        ax1.plot(cts,'k-') 
        ax1.set_xlim(0,len(cts))
    
    ## fancify
    ax1.set_ylim(min_cts*0.9,max_cts*1.7)
    for tl in ax1.get_yticklabels():
        tl.set_color('k')
        ax1.set_ylabel(config["y_label"],color='k',fontsize=12)
        tl.set_fontsize(10)
    plt.locator_params(axis = 'y', nbins = 4)
    if use_x_var:
        formatter = mdates.DateFormatter('%Y-%m-%d')
        ax1.xaxis.set_major_formatter( formatter ) 
        fig.autofmt_xdate()
    ax1.set_xlabel("time ({} bins)".format(config["x_unit"].rstrip('s')))

    if config['plot_eta']:
        ax2 = ax1.twinx()
        plotter="plot"
        if config["logscale_eta"]:
            plotter="semilogy"
        if use_x_var:
            getattr(ax2,plotter)(tbs,eta,'r')
        else:
            getattr(ax2,plotter)(eta,'r')
            ax2.set_xlim(0,len(eta))
        min_eta = 0
        if min(eta) > 0:
            min_eta = min(eta) * 0.9
        ax2.set_ylim(min_eta, max(eta)*1.1)
        ax2.set_ylabel("eta",color='r',fontsize=12)
        for tl in ax2.get_yticklabels():
            tl.set_color('r')
            tl.set_fontsize(10)

    if not config["plot_eta"]:
        config["plot_file_name"] += "_no_eta"
    try:
        os.makedirs(config["plot_dir"]) 
    except OSError:
        pass
    plot_file_name = u"{}/{}.{}".format(config["plot_dir"].rstrip('/'), config["plot_file_name"],config["plot_file_extension"])
    plt.savefig(plot_file_name)
    plt.close()