def load_from_stats(self): '''loads stats data and converts to timeseries without saving''' stats = pd.read_csv(self.stats_filename, parse_dates=['timestamp']) u = stats['timestamp'].unique() u = pd.to_datetime(u) sample_volume = scpp.get_sample_volume( self.settings.PostProcess.pix_size, path_length=self.settings.PostProcess.path_length) dias, bin_lims = scpp.get_size_bins() vd_oil = np.zeros((len(u), len(dias))) vd_gas = np.zeros_like(vd_oil) vd_total = np.zeros_like(vd_oil) d50_gas = np.zeros(len(u)) d50_oil = np.zeros_like(d50_gas) d50_total = np.zeros_like(d50_gas) self.cos = np.zeros_like(d50_total) # @todo make this number of particles per image, and sum according to index later nparticles_all = 0 nparticles_total = 0 nparticles_oil = 0 nparticles_gas = 0 for i, s in enumerate(tqdm(u)): substats = stats[stats['timestamp'] == s] nparticles_all += len(substats) nims = scpp.count_images_in_stats(substats) sv = sample_volume * nims oil = scog.extract_oil(substats) nparticles_oil += len(oil) dias, vd_oil_ = scpp.vd_from_stats(oil, self.settings.PostProcess) vd_oil_ /= sv vd_oil[i, :] = vd_oil_ gas = scog.extract_gas(substats) nparticles_gas += len(gas) dias, vd_gas_ = scpp.vd_from_stats(gas, self.settings.PostProcess) vd_gas_ /= sv vd_gas[i, :] = vd_gas_ d50_gas[i] = scpp.d50_from_vd(vd_gas_, dias) nparticles_total += len(oil) + len(gas) vd_total_ = vd_oil_ + vd_gas_ d50_total[i] = scpp.d50_from_vd(vd_total_, dias) vd_total[i, :] = vd_total_ self.cos[i] = scog.cos_check(dias, vd_total[i, :]) self.vd_total = vd_total self.vd_gas = vd_gas self.vd_oil = vd_oil self.d50_total = d50_total self.d50_oil = d50_oil self.d50_gas = d50_gas self.u = u.tz_localize('UTC') self.dias = dias
def convert_to_pj_format(stats_csv_file, config_file): '''converts stats files into a total, and gas-only time-series csvfile which can be read by the old matlab SummaryPlot exe''' settings = PySilcamSettings(config_file) logger.info('Loading stats....') stats = pd.read_csv(stats_csv_file) base_name = stats_csv_file.replace('-STATS.csv', '-PJ.csv') gas_name = base_name.replace('-PJ.csv', '-PJ-GAS.csv') ogdatafile = DataLogger(base_name, ogdataheader()) ogdatafile_gas = DataLogger(gas_name, ogdataheader()) stats['timestamp'] = pd.to_datetime(stats['timestamp']) u = stats['timestamp'].unique() sample_volume = sc_pp.get_sample_volume( settings.PostProcess.pix_size, path_length=settings.PostProcess.path_length) logger.info('Analysing time-series') for s in tqdm(u): substats = stats[stats['timestamp'] == s] nims = sc_pp.count_images_in_stats(substats) sv = sample_volume * nims oil = extract_oil(substats) dias, vd_oil = sc_pp.vd_from_stats(oil, settings.PostProcess) vd_oil /= sv gas = extract_gas(substats) dias, vd_gas = sc_pp.vd_from_stats(gas, settings.PostProcess) vd_gas /= sv d50_gas = sc_pp.d50_from_vd(vd_gas, dias) vd_total = vd_oil + vd_gas d50_total = sc_pp.d50_from_vd(vd_total, dias) data_total = cat_data_pj(s, vd_total, d50_total, len(oil) + len(gas)) ogdatafile.append_data(data_total) data_gas = cat_data_pj(s, vd_gas, d50_gas, len(gas)) ogdatafile_gas.append_data(data_gas) logger.info(' OK.') logger.info('Deleting header!') with open(base_name, 'r') as fin: data = fin.read().splitlines(True) with open(base_name, 'w') as fout: fout.writelines(data[1:]) with open(gas_name, 'r') as fin: data = fin.read().splitlines(True) with open(gas_name, 'w') as fout: fout.writelines(data[1:]) logger.info('Conversion complete.')
def cat_data(timestamp, stats, settings): ''' Possibly a redundant function.... ''' dias, vd = sc_pp.vd_from_stats(stats, settings.PostProcess) d50 = sc_pp.d50_from_vd(vd, dias) data = [[ timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, timestamp.second + timestamp.microsecond / 1e6 ], vd, [d50, len(stats)]] data = list(itertools.chain.from_iterable(data)) return data
def load_from_timeseries(self): '''uses timeseries xls sheets assuming they are available''' timeseriesgas_file = self.stats_filename.replace( '-STATS.csv', '-TIMESERIESgas.xlsx') timeseriesoil_file = self.stats_filename.replace( '-STATS.csv', '-TIMESERIESoil.xlsx') gas = pd.read_excel(timeseriesgas_file, parse_dates=['Time']) oil = pd.read_excel(timeseriesoil_file, parse_dates=['Time']) self.dias = np.array(oil.columns[0:52], dtype=float) self.vd_oil = oil.as_matrix(columns=oil.columns[0:52]) self.vd_gas = gas.as_matrix(columns=gas.columns[0:52]) self.vd_total = self.vd_oil + self.vd_gas self.u = pd.to_datetime(oil['Time'].values) self.d50_gas = gas['D50'] self.d50_oil = oil['D50'] self.d50_total = np.zeros_like(self.d50_oil) for i, vd in enumerate(self.vd_total): self.d50_total[i] = scpp.d50_from_vd(vd, self.dias)
def export_timeseries(configfile, statsfile): settings = PySilcamSettings(configfile) print('Loading STATS data: ', statsfile) stats = pd.read_csv(statsfile) stats['timestamp'] = pd.to_datetime(stats['timestamp']) stats.sort_values(by='timestamp', inplace=True) print('Extracting oil and gas') stats_oil = scog.extract_oil(stats) stats_gas = scog.extract_gas(stats) print('Calculating timeseries') u = pd.to_datetime(stats['timestamp']).unique() sample_volume = sc_pp.get_sample_volume(settings.PostProcess.pix_size, path_length=settings.PostProcess.path_length) td = pd.to_timedelta('00:00:' + str(settings.PostProcess.window_size / 2.)) vdts_all = [] vdts_oil = [] vdts_gas = [] d50_all = [] d50_oil = [] d50_gas = [] timestamp = [] d50_av_all = [] d50_av_oil = [] d50_av_gas = [] gor = [] for s in tqdm(u): timestamp.append(pd.to_datetime(s)) dt = pd.to_datetime(s) dias, vd_all = sc_pp.vd_from_stats(stats[stats['timestamp'] == s], settings.PostProcess) dias, vd_oil = sc_pp.vd_from_stats(stats_oil[stats_oil['timestamp'] == s], settings.PostProcess) dias, vd_gas = sc_pp.vd_from_stats(stats_gas[stats_gas['timestamp'] == s], settings.PostProcess) nims = sc_pp.count_images_in_stats(stats[stats['timestamp'] == s]) sv = sample_volume * nims vd_all /= sv vd_oil /= sv vd_gas /= sv d50_all.append(sc_pp.d50_from_vd(vd_all, dias)) d50_oil.append(sc_pp.d50_from_vd(vd_oil, dias)) d50_gas.append(sc_pp.d50_from_vd(vd_gas, dias)) vdts_all.append(vd_all) vdts_oil.append(vd_oil) vdts_gas.append(vd_gas) stats_av = stats[(stats['timestamp']<(dt+td)) & (stats['timestamp']>(dt-td))] stats_av_oil = scog.extract_oil(stats_av) stats_av_gas = scog.extract_gas(stats_av) d50_av_all.append(sc_pp.d50_from_stats(stats_av, settings.PostProcess)) d50_av_oil.append(sc_pp.d50_from_stats(stats_av_oil, settings.PostProcess)) d50_av_gas.append(sc_pp.d50_from_stats(stats_av_gas, settings.PostProcess)) dias, vdts_av = sc_pp.vd_from_stats(stats_av, settings.PostProcess) dias, vdts_av_oil = sc_pp.vd_from_stats(stats_av_oil, settings.PostProcess) dias, vdts_av_gas = sc_pp.vd_from_stats(stats_av_gas, settings.PostProcess) nims = sc_pp.count_images_in_stats(stats_av) sv = sample_volume * nims vdts_av /= sv vdts_av_oil /= sv vdts_av_gas /= sv gor.append(np.sum(vdts_av_gas)/np.sum(vdts_av_oil)) outpath, outfile = os.path.split(statsfile) outfile = outfile.replace('-STATS.csv','') outfile = os.path.join(outpath, outfile) time_series = pd.DataFrame(data=np.squeeze(vdts_all), columns=dias) time_series['D50'] = d50_all time_series['Time'] = timestamp time_series.to_excel(outfile + '-TIMESERIES' + '' + '.xlsx') time_series = pd.DataFrame(data=np.squeeze(vdts_oil), columns=dias) time_series['D50'] = d50_oil time_series['Time'] = timestamp time_series.to_excel(outfile + '-TIMESERIES' + 'oil' + '.xlsx') time_series = pd.DataFrame(data=np.squeeze(vdts_gas), columns=dias) time_series['D50'] = d50_gas time_series['Time'] = timestamp time_series.to_excel(outfile + '-TIMESERIES' + 'gas' + '.xlsx') plt.figure(figsize=(20, 10)) if not np.min(np.isnan(d50_oil)): plt.plot(timestamp, d50_oil, 'ro') if not np.min(np.isnan(d50_av_oil)): plt.plot(timestamp, d50_av_oil, 'r-') lns1 = plt.plot(np.nan, np.nan, 'r-', label='OIL') if not np.min(np.isnan(d50_gas)): plt.plot(timestamp, d50_gas, 'bo') if not np.min(np.isnan(d50_av_gas)): plt.plot(timestamp, d50_av_gas, 'b-') lns2 = plt.plot(np.nan, np.nan, 'b-', label='GAS') plt.ylabel('d50 [um]') plt.ylim(0, max(plt.gca().get_ylim())) ax = plt.gca().twinx() plt.sca(ax) plt.ylabel('GOR') if not np.min(np.isnan(gor)): plt.plot(timestamp, gor, 'k') lns3 = plt.plot(np.nan, np.nan, 'k', label='GOR') plt.ylim(0, max(plt.gca().get_ylim())) lns = lns1 + lns2 + lns3 labs = [l.get_label() for l in lns] plt.legend(lns, labs) plt.savefig(outfile + '-d50_TimeSeries.png', dpi=600, bbox_inches='tight') plt.close() print('Export figure made. ') print('Exporting averages... ') # average all dias, vd = sc_pp.vd_from_stats(stats, settings.PostProcess) nims = sc_pp.count_images_in_stats(stats) sv = sample_volume * nims vd /= sv d50 = sc_pp.d50_from_vd(vd, dias) dfa = pd.DataFrame(data=[vd], columns=dias) dfa['d50'] = d50 timestamp = np.min(pd.to_datetime(stats['timestamp'])) dfa['Time'] = timestamp dfa.to_excel(statsfile.replace('-STATS.csv', '') + '-AVERAGE' + '' + '.xlsx') #average oil dias, vd = sc_pp.vd_from_stats(stats_oil, settings.PostProcess) vd /= sv # sample volume remains the same as 'all' d50 = sc_pp.d50_from_vd(vd, dias) dfa = pd.DataFrame(data=[vd], columns=dias) dfa['d50'] = d50 timestamp = np.min(pd.to_datetime(stats['timestamp'])) # still use total stats for this time dfa['Time'] = timestamp dfa.to_excel(statsfile.replace('-STATS.csv', '') + '-AVERAGE' + 'oil' + '.xlsx') #average gas dias, vd = sc_pp.vd_from_stats(stats_gas, settings.PostProcess) vd /= sv # sample volume remains the same as 'all' d50 = sc_pp.d50_from_vd(vd, dias) dfa = pd.DataFrame(data=[vd], columns=dias) dfa['d50'] = d50 timestamp = np.min(pd.to_datetime(stats['timestamp'])) # still use total stats for this time dfa['Time'] = timestamp dfa.to_excel(statsfile.replace('-STATS.csv', '') + '-AVERAGE' + 'gas' + '.xlsx') print('Export done: ', outfile)
def generate_report(report_name, PIX_SIZE = 28.758169934640524, PATH_LENGTH=40, d50 = 400, TotalVolumeConcentration = 800, MinD = 108, config_file=''): '''Create a report of the expected response of the silcam to the provided experimental setup Args: report_name (str) : The path and filename of a pdf to be created PIX_SIZE (float) : pixel size of the setup [um] PATH_LENGTH (float) : the path length of the setup [mm] Path length is the gap between housings d50 (float) : the expected of the oil d50 (50th percentile of the cumulative sum of the volume distribution) TotalVolumeConcentration (float) : the expected concentration of oil in the sample volume [uL/L] MinD (float) : minimum resolvable diameter of the setup [um]. this would usually scale with the pixel size. synthesized particles smaller than this are also removed for speed purposes ''' plt.close('all') pp = PdfPages(report_name) # image dimensions (fixed always for GC2450 camera) imx = 2448 imy = 2048 # get diameters and limits of size bins diams, bin_limits_um = scpp.get_size_bins() # initial volume distribution, close to Oystein's MPB paper in 2013 vd = weibull(diams, n=d50) vd = vd/np.sum(vd)*TotalVolumeConcentration # scale the distribution according to concentration DropletVolume=((4/3)*np.pi*((diams*1e-6)/2)**3) # the volume of each droplet in m3 nd=vd/(DropletVolume*1e9) # the number distribution in each bin nd[diams<MinD] = 0 # remove small particles for speed purposes # calculate the sample volume of the SilCam specified sv = scpp.get_sample_volume(PIX_SIZE, path_length=PATH_LENGTH, imx=imx, imy=imy) nd = nd*sv # scale the number distribution by the sample volume so resulting units are #/L/bin nc = int(sum(nd)) # calculate the total number concentration vd2 = scpp.vd_from_nd(nd,diams,sv) # convert the number distribution to volume distribution in uL/L/bin vc_initial = sum(vd2) # obtain the resulting concentration, now having remove small particles d50_theory = scpp.d50_from_vd(vd2, diams) # calculate the d50 in um plt.plot(diams, vd2, 'k', label='Initial') plt.plot(diams, vd, 'r:', label='Theoretical') plt.vlines(d50_theory, 0, max(vd2), linestyle='--') plt.xscale('log') plt.xlabel('ECD [um]') plt.xlabel('Volume distribution [uL/L/bin]') plt.legend() plt.title('Initial conditions:' + '\n\n' + str(nc) + ' particles per image volume' + '\n' + str(int(vc_initial)) + ' initial volume concentration [uL/L]' + '\n' + str(int(d50_theory)) + ' initial d50 [um]', horizontalalignment='left', loc='left') pp.savefig(bbox_inches='tight') nims = 40 # the number of images to simulate # preallocate variables log_vd = np.zeros((nims,len(diams))) cvd = np.zeros(nims) cd50 = np.zeros(nims) for I in range(nims): # randomly select a droplet radius from the input distribution rad = np.random.choice(diams/2, size=nc, p=nd/sum(nd)) / PIX_SIZE # radius is in pixels log_ecd = rad*2*PIX_SIZE # log this size as a diameter in um necd, edges = np.histogram(log_ecd,bin_limits_um) # count particles into number distribution log_vd[I,:] = scpp.vd_from_nd(necd,diams) # convert to volume distribution cvd[I] = np.sum(np.mean(log_vd[0:I,:],axis=0)) # calculated the cumulate volume distribution over image number cd50[I] = scpp.d50_from_vd(np.mean(log_vd,axis=0), diams) # calcualte the cumulate d50 over image number f, a = plt.subplots(1,3,figsize=(16,4)) plt.sca(a[0]) plt.plot(diams, vd2, 'k') plt.plot(diams, log_vd[0,:]/sv, alpha=0.5, label='1 image') plt.plot(diams, np.mean(log_vd[0:4,:], axis=0)/sv, alpha=0.5, label='4 images') plt.plot(diams, np.mean(log_vd, axis=0)/sv, alpha=0.5, label=(str(nims) + ' images')) plt.xscale('log') plt.vlines(d50_theory, 0, max(vd2), linestyle='--') plt.xlabel('ECD [um]') plt.ylabel('Volume distribution [uL/L/bin]') plt.legend() plt.title('Statistical summaries') plt.sca(a[1]) plt.plot(cvd/sv,'k') plt.hlines(vc_initial, 0, nims, linestyle='--') plt.xlabel('Image number') plt.ylabel('Volume concentration [uL/L]') plt.sca(a[2]) plt.plot(cd50,'k') plt.hlines(d50_theory, 0, nims, linestyle='--') plt.xlabel('Image number') plt.ylabel('d50 [um]') pp.savefig(bbox_inches='tight') # synthesize an image, returning the segmented image and the inputted volume distribution img, log_vd = synthesize(diams, bin_limits_um, nd, imx, imy, PIX_SIZE) plt.figure(figsize=(10,10)) plt.imshow(img, vmin=0, vmax=255, extent=[0,imx*PIX_SIZE/1000,0,imy*PIX_SIZE/1000]) plt.xlabel('[mm]') plt.ylabel('[mm]') plt.title('Synthetic image') pp.savefig(bbox_inches='tight') vd = np.zeros_like(log_vd) imbw = np.zeros_like(img[:,:,0]) stat_extract_time = pd.Timedelta(seconds=0) # @todo this should be handles properly as part of testing try: diams, vd, imbw, stat_extract_time = test_analysis(img, PIX_SIZE, PATH_LENGTH, config_file=config_file) except: print('Analysis failed') pass f, a = plt.subplots(1,2,figsize=(20,8)) plt.sca(a[0]) plt.plot(diams, vd2, 'r:', label='Initial') plt.plot(diams, log_vd/sv ,'k', label='Statistical Best') plt.plot(diams, vd, 'g', alpha=0.5, label='PySilCam') plt.xscale('log') plt.xlabel('ECD [um]') plt.ylabel('Volume distribution [uL/L]') plt.legend() plt.title('Single image assessment:' + '\n\n' + 'Statextract took ' + str(stat_extract_time.seconds) + ' seconds', horizontalalignment='left', loc='left') plt.sca(a[1]) plt.imshow(imbw, vmin=0, vmax=1, extent=[0,imx*PIX_SIZE/1000,0,imy*PIX_SIZE/1000], cmap='gray') plt.xlabel('[mm]') plt.ylabel('[mm]') plt.title('imbw') pp.savefig(bbox_inches='tight') pp.close()
def update_plot(self, save=False): '''update the plots and save to excel is save=True''' start_time = self.mid_time - self.av_window / 2 end_time = self.mid_time + self.av_window / 2 u = pd.to_datetime(self.u) timeind = np.argwhere((u >= start_time) & (u < end_time)) psd_nims = len(timeind) if psd_nims < 1: plt.sca(self.axispsd) plt.cla() plt.sca(self.axistext) string = '' string += '\n Num images: {:0.0f}'.format(psd_nims) string += '\n Start: ' + str(start_time) string += '\n End: ' + str(end_time) string += '\n Window [sec.] {:0.3f}:'.format( (end_time - start_time).total_seconds()) plt.title(string, verticalalignment='top', horizontalalignment='right', loc='right') plt.sca(self.axisconstant) self.line1.remove() self.line2.remove() self.line1 = plt.vlines(start_time, 1, 12000, 'r', linestyle='--') self.line2 = plt.vlines(end_time, 1, 12000, 'r', linestyle='--') self.canvas.draw() return psd_start = min(u[timeind]) psd_end = max(u[timeind]) psd_total = np.mean(self.vd_total[timeind, :], axis=0)[0] psd_oil = np.mean(self.vd_oil[timeind, :], axis=0)[0] psd_gas = np.mean(self.vd_gas[timeind, :], axis=0)[0] psd_vc_total = np.sum(psd_total) psd_vc_oil = np.sum(psd_oil) psd_vc_gas = np.sum(psd_gas) psd_d50_total = scpp.d50_from_vd(psd_total, self.dias) psd_d50_oil = scpp.d50_from_vd(psd_oil, self.dias) psd_d50_gas = scpp.d50_from_vd(psd_gas, self.dias) psd_gor = sum(psd_gas) / (sum(psd_oil) + sum(psd_gas)) * 100 plt.sca(self.axispsd) plt.cla() plt.plot(self.dias, psd_total, 'k', linewidth=5, label='Total') plt.plot(self.dias, psd_oil, color=[0.7, 0.4, 0], label='Oil') plt.plot(self.dias, psd_gas, 'b', label='Gas') plt.xlabel('ECD [um]') plt.ylabel('VD [uL/L]') plt.xscale('log') plt.xlim(10, 12000) plt.sca(self.axistext) string = '' string += 'GOR: {:0.01f}'.format(psd_gor) string += '\n d50 total [um]: {:0.0f}'.format(psd_d50_total) string += '\n d50 oil [um]: {:0.0f}'.format(psd_d50_oil) string += '\n d50 gas [um]: {:0.0f}'.format(psd_d50_gas) string += '\n VC total [uL/L]: {:0.0f}'.format(psd_vc_total) string += '\n VC oil [uL/L]: {:0.0f}'.format(psd_vc_oil) string += '\n VC gas [uL/L]: {:0.0f}'.format(psd_vc_gas) string += '\n Num images: {:0.0f}'.format(psd_nims) string += '\n Start: ' + str(pd.to_datetime(psd_start[0])) string += '\n End: ' + str(pd.to_datetime(psd_end[0])) string += '\n Window [sec.] {:0.3f}:'.format( pd.to_timedelta(psd_end[0] - psd_start[0]).total_seconds()) plt.title(string, verticalalignment='top', horizontalalignment='right', loc='right') plt.sca(self.axisconstant) self.line1.remove() self.line2.remove() self.line1 = plt.vlines(pd.to_datetime(psd_start[0]), 1, 12000, 'r') self.line2 = plt.vlines(pd.to_datetime(psd_end[0]), 1, 12000, 'r') self.canvas.draw() if save: timestring = pd.to_datetime( psd_start[0]).strftime('D%Y%m%dT%H%M%S') outputname = self.stats_filename.replace('-STATS.csv', '-PSD-' + timestring) outputname = QFileDialog.getSaveFileName(self, "Select file to Save", outputname, ".xlsx") if outputname[1] == '': return outputname = outputname[0] + outputname[1] wb = Workbook() ws = wb.active ws['A1'] = 'Start:' ws['B1'] = min(u) ws['A2'] = 'Weighted average:' ws['B2'] = 'NOT IMPLEMENTED' ws['A3'] = 'End:' ws['B3'] = max(u) ws['A5'] = 'Number of images:' ws['B5'] = psd_nims ws['D5'] = 'd50(microns):' ws['E5'] = psd_d50_total ws['A6'] = 'Number of particles:' ws['B6'] = 'NOT IMPLEMENTED' ws['D6'] = 'peak || modal size class (microns):' ws['E6'] = 'NOT IMPLEMENTED' ws['D13'] = 'd50(microns):' ws['E13'] = psd_d50_oil ws['D14'] = 'peak || modal size class (microns):' ws['E14'] = 'NOT IMPLEMENTED' ws['D21'] = 'd50(microns):' ws['E21'] = psd_d50_gas ws['D22'] = 'peak || modal size class (microns):' ws['E22'] = 'NOT IMPLEMENTED' ws['A8'] = 'Bin mid-sizes (microns):' ws['A9'] = 'Vol. Conc. / bin (uL/L):' ws['A16'] = 'Vol. Conc. / bin (uL/L):' ws['A24'] = 'Vol. Conc. / bin (uL/L):' ws['A12'] = 'OIL Info' ws['A20'] = 'GAS Info' # d = ws.cells(row='8') for c in range(len(self.dias)): ws.cell(row=8, column=c + 2, value=self.dias[c]) ws.cell(row=9, column=c + 2, value=psd_total[c]) ws.cell(row=16, column=c + 2, value=psd_oil[c]) ws.cell(row=24, column=c + 2, value=psd_gas[c]) wb.save(outputname) print('Saved:', outputname)