def make_temp_filenames(self, temp_dir=None): tth_filen = make_filename(self.img_filename, 'tth.cake.npy', temp_dir=temp_dir) azi_filen = make_filename(self.img_filename, 'azi.cake.npy', temp_dir=temp_dir) int_filen = make_filename(self.img_filename, 'int.cake.npy', temp_dir=temp_dir) return tth_filen, azi_filen, int_filen
def make_temp_filenames(self, temp_dir=None): if not os.path.exists(temp_dir): os.makedirs(temp_dir) bgsub_filen = make_filename(self.fname, 'bgsub.chi', temp_dir=temp_dir) bg_filen = make_filename(self.fname, 'bg.chi', temp_dir=temp_dir) return bgsub_filen, bg_filen
def make_temp_filenames(self, temp_dir=None): if not os.path.exists(temp_dir): os.makedirs(temp_dir) tth_filen = make_filename(self.img_filename, 'tth.cake.npy', temp_dir=temp_dir) azi_filen = make_filename(self.img_filename, 'azi.cake.npy', temp_dir=temp_dir) int_filen = make_filename(self.img_filename, 'int.cake.npy', temp_dir=temp_dir) return tth_filen, azi_filen, int_filen
def download(media_url, folder, audio=True, video=True, subtitles=False, **kwargs): """ Download the streams for the media url. :param str media_url: the url. :param str folder: the local folder for the output. :param bool audio: download audio. :param bool video: download video. :param bool subtitles: download subtitles. :param kwargs: additional video properties (see `streams`). :return: a `Download` instance. """ streams_dict = streams(media_url, **kwargs) md = metadata(media_url) fn = utils.make_filename(md['title']) if not video: if not streams_dict[0]: print('Sorry, audio only is not available for this stream.') return None streams_dict[1] = None if not audio: streams_dict[0] = None filepath = os.path.join(folder, fn) if subtitles: subs = _subtitles(media_url, lang=subtitles) if subs: with open(f'{filepath}.srt', 'w') as f: f.write(subs) d = Download(streams_dict, f'{filepath}', utils.merge_files) return d
def download(self, site, id): vid = self.channels[site]["videos"][id] fullid = site + "_" + vid["id"] filename = fullid + "_" + vid["title"][:25] filename = make_filename(filename) file = FileRef(creator=self, path=filename) #cmd = read_instructions(site)["download"] try: downloader = read_instructions(site)["downloader"] DLclass = downloaders.__dict__[downloader] #os.system(cmd.format(url=vid["url"],filename=filename)) outputfile = DLclass().download(url=vid["url"], path=file.relative()) #print(outputfile) newvid = Video(id=fullid, title=vid["title"], thumbnail=URIRef(vid["thumbnail"]), creators=[self], remote={site: id}, file=FileRef(creator=self, path=outputfile)) self.videos[fullid] = newvid newvid.save() except: raise
def output_data(centroids, times, num_nonzeros, video_src): """Writes select data into text file Saves centroid coordinates, traveled distance and elapsed time to file. output_data is called in postprocessing. Parameters ----------- centroids : list list of centroids of tracked object sampled at times times : list List of sampling times. Sampling every step-th frame. num_nonzeros : list List of total number of nonzero pixel values in binary foreground mask for each time step References ----------- getcoords.find_scale, getcoords.projective_transform, set_review_flags """ # Initalize variables that hold sums total_dist = 0 dists = np.empty(len(centroids)) rep_counter = 0 max_rep_count = 0 scale = np.load("src\\scaling_factor.npy") M = np.load("src\\projection_matrix.npy") fname_out = utils.make_filename(video_src, ".txt", parent="res") try: os.mkdir("res") except FileExistsError: pass with open(fname_out, "w") as f: f.write("No.,cX,cY,time,dist\n") for i, cent in enumerate(centroids): #Skip point if we have lost track if np.any(cent[:2] == 0) or (i > 0 and np.any(centroids[i - 1][:2] == 0)): #f.write("#Discarding line! {:0.2f},{:0.2f} -> {:0.2f},{:0.2f}\n".format(centroids[i-1][0], centroids[i-1][1], cent[0], cent[1])) dist = np.nan continue time = times[i] dist, cX, cY, cZ = compute_distance(centroids, cent, i, M, scale) f.write("{:d},{:0.2f},{:0.2f},{:0.2f},{:0.2f}\n".format( i, cX, cY, time, dist)) total_dist += dist if dist is not np.nan else 0 dists[i] = dist f.write("Total dist in mm: {:0.4f}\n".format(total_dist)) f.write("Total time in sec: {:0.4f}\n".format(times[-1])) f = flag_file_review(f, dists, num_nonzeros, times)
def run_mencoder(self, accepted=False): if self.mplayer_preview_pid: call(['kill', str(self.mplayer_preview_pid)]) self.mplayer_preview_pid = 0 parameters = self.get_params_from_gui() self.schedule_timer.stop() channel_text = parameters.get('channel_text') append_suffix = self.append_suffix.isChecked() play_while_recording = self.play_while_recording.isChecked() filename = utils.make_filename(str(self.outputfile.text()), channel_text, append_suffix=append_suffix) self.filename = filename if not accepted and os.path.exists(filename): dialog = FileExistsDialog(self) dialog.show() else: self.stopButton.setEnabled(True) self.runButton.setEnabled(False) pre_command = str(self.pre_command.text()) if pre_command: cmds = [c for c in re.split("\s+", pre_command) if c] try: call(cmds) except OSError: self.error_dialog.showMessage("excecution of %s failed" % (pre_command, )) cmd = utils.generate_command(parameters) env = os.environ.copy() if parameters.get('setenvvars'): for key, val in parameters.get('envvars').items(): env[key] = val try: self.mencoder_instance = Popen(cmd, env=env) self.mencoder_pid = self.mencoder_instance.pid except OSError: self.error_dialog.showMessage("excecution of %s failed" % " ".join(cmd)) if self.mencoder_pid: self.status_label.setText( self.tr('Recording... %1').arg( utils.secs_to_str(self.time_running))) self.checker_timer.start(1000) self.scheduleButton.setEnabled(False) self.cancel_sheduleButton.setEnabled(False) if play_while_recording: self.preview_file_timer.start(2000) else: self.stopButton.setEnabled(False) self.runButton.setEnabled(True)
def run_mencoder(self, accepted=False): if self.mplayer_preview_pid: call(['kill', str(self.mplayer_preview_pid)]) self.mplayer_preview_pid = 0 parameters = self.get_params_from_gui() self.schedule_timer.stop() channel_text = parameters.get('channel_text') append_suffix = self.append_suffix.isChecked() play_while_recording = self.play_while_recording.isChecked() filename = utils.make_filename(str(self.outputfile.text()), channel_text, append_suffix=append_suffix) self.filename = filename if not accepted and os.path.exists(filename): dialog = FileExistsDialog(self) dialog.show() else: self.stopButton.setEnabled(True) self.runButton.setEnabled(False) pre_command = str(self.pre_command.text()) if pre_command: cmds = [c for c in re.split("\s+", pre_command) if c] try: call(cmds) except OSError: self.error_dialog.showMessage("excecution of %s failed" % (pre_command,)) cmd = utils.generate_command(parameters) env = os.environ.copy() if parameters.get('setenvvars'): for key, val in parameters.get('envvars').items(): env[key] = val try: self.mencoder_instance = Popen(cmd, env=env) self.mencoder_pid = self.mencoder_instance.pid except OSError: self.error_dialog.showMessage("excecution of %s failed" % " ".join(cmd)) if self.mencoder_pid: self.status_label.setText(self.tr('Recording... %1').arg( utils.secs_to_str(self.time_running) )) self.checker_timer.start(1000) self.scheduleButton.setEnabled(False) self.cancel_sheduleButton.setEnabled(False) if play_while_recording: self.preview_file_timer.start(2000) else: self.stopButton.setEnabled(False) self.runButton.setEnabled(True)
def save_xls(self): """ Export jlist to an excel file """ if not self.model.jcpds_exist(): return temp_dir = get_temp_dir(self.model.get_base_ptn_filename()) filen_xls_t = make_filename(self.model.get_base_ptn_filename(), 'jlist.xls', temp_dir=temp_dir) filen_xls = dialog_savefile(self.widget, filen_xls_t) if str(filen_xls) == '': return xls_jlist(filen_xls, self.model.jcpds_lst, self.widget.doubleSpinBox_Pressure.value(), self.widget.doubleSpinBox_Temperature.value())
def save_to_xls(self): temp_dir = get_temp_dir(self.model.get_base_ptn_filename()) filen_xls = make_filename(self.model.get_base_ptn_filename(), 'peakfit.xls', temp_dir=temp_dir) reply = QtWidgets.QMessageBox.question( self.widget, 'Question', 'Do you want to save in default filename, %s ?' % filen_xls, QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No, QtWidgets.QMessageBox.Yes) if reply == QtWidgets.QMessageBox.No: filen_xls = QtWidgets.QFileDialog.getSaveFileName( self.widget, "Save an Excel File", filen_xls, "(*.xls)") else: if os.path.exists(filen_xls): reply = QtWidgets.QMessageBox.question( self.widget, 'Question', 'The file already exist. Overwrite?', QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No, QtWidgets.QMessageBox.No) if reply == QtWidgets.QMessageBox.No: return self.model.save_peak_fit_results_to_xls(filen_xls)
def _goto_dpp_next_file(self, move): filelist_chi = get_sorted_filelist( self.model.chi_path, sorted_by_name=self.widget.radioButton_SortbyNme.isChecked(), search_ext='*.chi') filelist_dpp = get_sorted_filelist( self.model.chi_path, sorted_by_name=self.widget.radioButton_SortbyNme.isChecked(), search_ext='*.dpp') idx_chi = find_from_filelist( filelist_chi, os.path.split(self.model.base_ptn.fname)[1]) dpp_filen = make_filename(self.model.base_ptn.fname, 'dpp') idx_dpp = find_from_filelist(filelist_dpp, dpp_filen) if idx_chi == -1: QtWidgets.QMessageBox.warning(self.widget, "Warning", "Cannot find current chi file") return # added newly # for radioButton_NavDPP if idx_dpp == -1: QtWidgets.QMessageBox.warning( self.widget, "Warning", "Cannot find current dpp file.\n" + "Manually save one for current chi file first.") return # added newly step = self.widget.spinBox_FileStep.value() if move == 'next': idx_chi_new = idx_chi + step elif move == 'previous': idx_chi_new = idx_chi - step elif move == 'last': idx_chi_new = filelist_chi.__len__() - 1 if idx_chi == idx_chi_new: QtWidgets.QMessageBox.warning(self.widget, "Warning", "It is already the last file.") return elif move == 'first': idx_chi_new = 0 if idx_chi == idx_chi_new: QtWidgets.QMessageBox.warning(self.widget, "Warning", "It is already the first file.") return if idx_chi_new > filelist_chi.__len__() - 1: idx_chi_new = filelist_chi.__len__() - 1 if idx_chi == idx_chi_new: QtWidgets.QMessageBox.warning(self.widget, "Warning", "It is already the last file.") return if idx_chi_new < 0: idx_chi_new = 0 if idx_chi == idx_chi_new: QtWidgets.QMessageBox.warning(self.widget, "Warning", "It is already the first file.") return if self.widget.checkBox_SaveDPPMove.isChecked(): self.session_ctrl.save_dpp(quiet=True) else: reply = QtWidgets.QMessageBox.question( self.widget, 'Message', 'Do you want to save this to dpp before you move to the next?', QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No, QtWidgets.QMessageBox.Yes) if reply == QtWidgets.QMessageBox.Yes: self.session_ctrl.save_dpp() new_filename_chi = filelist_chi[idx_chi_new] new_filename_dpp = make_filename(new_filename_chi, 'dpp') idx = find_from_filelist(filelist_dpp, new_filename_dpp) if idx == -1: # no pre-existing dpp # check the checkbox for autogenerate if self.widget.checkBox_AutoGenDPP.isChecked(): self.base_ptn_ctrl._load_a_new_pattern(new_filename_chi) self.session_ctrl.save_dpp(quiet=True) self.model.clear_section_list() self.plot_ctrl.update() else: QtWidgets.QMessageBox.warning( self.widget, "Warning", "Cannot find pre-existing dpp.\n" + "Consider Create with Move function.") return # call autogenerate subroutine # self._load_a_new_pattern(new_filename_chi) # self.model.set_base_ptn_color(self.obj_color) # self.plot_ctrl.update() else: # pre-existing dpp # question if overwrite or not if self.widget.checkBox_AutoGenDPP.isChecked() and \ (not self.widget.checkBox_AutogenMissing.isChecked()): reply = QtWidgets.QMessageBox.question( self.widget, 'Message', "The next pattern already has a dpp.\n" + "If you want to overwrite the existing one based" + " on the dpp of the last pattern, choose YES.\n" + "If you want to keep and open the existing dpp, choose NO.", QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No, QtWidgets.QMessageBox.No) if reply == QtWidgets.QMessageBox.Yes: self.base_ptn_ctrl._load_a_new_pattern(new_filename_chi) self.session_ctrl.save_dpp(quiet=True) self.model.clear_section_list() self.plot_ctrl.update() else: # load the existing dpp # QtWidgets.QMessageBox.warning( # self.widget, "Warning", "The existing dpp will be open.") success = self.session_ctrl._load_dpp(new_filename_dpp) if success: if self.model.exist_in_waterfall( self.model.base_ptn.fname): self.widget.pushButton_AddBasePtn.setChecked(True) else: self.widget.pushButton_AddBasePtn.setChecked(False) if self.widget.checkBox_ShowCake.isChecked(): self.session_ctrl._load_cake_format_file() self.plot_ctrl.update() else: QtWidgets.QMessageBox.warning( self.widget, "Warning", "DPP loading was not successful.") return else: # simply open the next existing one success = self.session_ctrl._load_dpp(new_filename_dpp) if success: if self.model.exist_in_waterfall( self.model.base_ptn.fname): self.widget.pushButton_AddBasePtn.setChecked(True) else: self.widget.pushButton_AddBasePtn.setChecked(False) if self.widget.checkBox_ShowCake.isChecked(): self.session_ctrl._load_cake_format_file() self.plot_ctrl.update() else: QtWidgets.QMessageBox.warning( self.widget, "Warning", "DPP loading was not successful.") return self.jcpdstable_ctrl.update() self.peakfit_table_ctrl.update_sections() self.peakfit_table_ctrl.update_peak_parameters() self.peakfit_table_ctrl.update_baseline_constraints() self.peakfit_table_ctrl.update_peak_constraints() return
def get_connecting_nodes(diff_start_end_elr_dat, route_name=None, update=False, verbose=False): """ Get data of connecting points for different ELRs. :param diff_start_end_elr_dat: data frame where StartELR != EndELR :type diff_start_end_elr_dat: pandas.DataFrame :param route_name: name of a Route; if ``None`` (default), all Routes :type route_name: str or None :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: data of connecting points for different ELRs :rtype: pandas.DataFrame **Test**:: from mssqlserver.metex import view_metex_schedule8_incident_locations from models.prototype.furlong import get_connecting_nodes update = False verbose = True route_name = None diff_start_end_elr_dat = view_metex_schedule8_incident_locations( route_name=route_name, start_and_end_elr='diff', verbose=verbose) connecting_nodes = get_connecting_nodes(diff_start_end_elr_dat, route_name, update, verbose) print(connecting_nodes) route_name = 'Anglia' diff_start_end_elr_dat = view_metex_schedule8_incident_locations( route_name=route_name, start_and_end_elr='diff', verbose=verbose) connecting_nodes = get_connecting_nodes(diff_start_end_elr_dat, route_name, update, verbose) print(connecting_nodes) """ filename = "connections-between-different-ELRs" pickle_filename = make_filename(filename, route_name) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: return load_pickle(path_to_pickle, verbose=verbose) else: try: pickle_filename_temp = make_filename(filename) path_to_pickle_temp = cdd_geodata(pickle_filename_temp) if os.path.isfile(path_to_pickle_temp) and not update: connecting_nodes_all = load_pickle(path_to_pickle_temp) connecting_nodes = get_subset(connecting_nodes_all, route_name) else: diff_elr_mileages = diff_start_end_elr_dat.drop_duplicates() em = ELRMileages() print("Searching for connecting ELRs ... ", end="") if verbose else "" mileage_file_dir = cdd_railway_codes("line data\\elrs-and-mileages\\mileages") # noinspection PyTypeChecker conn_mileages = diff_elr_mileages.apply( lambda x: em.get_conn_mileages(x.StartELR, x.EndELR, update, pickle_mileage_file=True, data_dir=mileage_file_dir), axis=1) print("\nFinished.") if verbose else "" conn_mileages_data = pd.DataFrame(conn_mileages.to_list(), index=diff_elr_mileages.index, columns=['StartELR_EndMileage', 'ConnELR', 'ConnELR_StartMileage', 'ConnELR_EndMileage', 'EndELR_StartMileage']) connecting_nodes = diff_elr_mileages.join(conn_mileages_data) connecting_nodes.set_index(['StartELR', 'StartMileage', 'EndELR', 'EndMileage'], inplace=True) save_pickle(connecting_nodes, path_to_pickle, verbose=verbose) return connecting_nodes except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
def get_adjusted_mileages_same_start_end_elrs(route_name, weather_category, shift_yards_same_elr, update=False, verbose=False): """ Get adjusted mileages for each incident location where StartELR == EndELR. :param route_name: name of a Route; if ``None``, all Routes :type route_name: str or None :param weather_category: weather category; if ``None``, all weather categories :type weather_category: str or None :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR == EndELR :type shift_yards_same_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: adjusted mileages for each incident location where StartELR == EndELR :rtype: pandas.DataFrame **Test**:: from models.prototype.furlong import get_adjusted_mileages_same_start_end_elrs route_name = None weather_category = None shift_yards_same_elr = 220 update = True verbose = True adj_mileages = get_adjusted_mileages_same_start_end_elrs(route_name, weather_category, shift_yards_same_elr, update, verbose) print(adj_mileages) """ filename = "adjusted-mileages-same-start-end-ELRs" pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_same_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: adj_mileages = load_pickle(path_to_pickle) return adj_mileages else: try: # Get data of incident locations where the 'StartELR' and 'EndELR' are THE SAME incident_locations = metex.view_metex_schedule8_incident_locations( route_name, weather_category, start_and_end_elr='same', verbose=verbose) # Get furlong information as reference ref_furlongs = vegetation.view_nr_vegetation_furlong_data(verbose=verbose) # Calculate adjusted furlong locations for each incident (for vegetation conditions) # noinspection PyTypeChecker adjusted_mileages = incident_locations.apply( lambda x: adjust_incident_mileages( ref_furlongs, x.StartELR, x.StartMileage_num, x.EndMileage_num, shift_yards_same_elr), axis=1) # Get adjusted mileage data adj_mileages = pd.DataFrame(list(adjusted_mileages), index=incident_locations.index, columns=['StartMileage_Adj', 'EndMileage_Adj', 'StartMileage_num_Adj', 'EndMileage_num_Adj', 'Section_Length_Adj', # yards 'Critical_FurlongIDs']) save_pickle(adj_mileages, path_to_pickle, verbose=verbose) return adj_mileages except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
def get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category, shift_yards_diff_elr, update=False, verbose=False): """ Get adjusted mileages for each incident location where StartELR != EndELR. :param route_name: name of a Route; if ``None``, all Routes :type route_name: str or None :param weather_category: weather category; if ``None``, all weather categories :type weather_category: str or None :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR == EndELR :type shift_yards_diff_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: adjusted mileages for each incident location where StartELR != EndELR :rtype: pandas.DataFrame **Test**:: from models.prototype.furlong import get_adjusted_mileages_diff_start_end_elrs route_name = None weather_category = None shift_yards_diff_elr = 220 update = True verbose = True adj_mileages = get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category, shift_yards_diff_elr, update, verbose) print(adj_mileages) """ filename = "adjusted-mileages-diff-start-end-ELRs" pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_diff_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: return load_pickle(path_to_pickle) else: try: # Get data for which the 'StartELR' and 'EndELR' are DIFFERENT incident_locations_diff_start_end_elr = metex.view_metex_schedule8_incident_locations( route_name, weather_category, start_and_end_elr='diff', verbose=verbose) # Get connecting points for different (ELRs, mileages) connecting_nodes = get_connecting_nodes(incident_locations_diff_start_end_elr, route_name, update=False, verbose=False) # Find End Mileage and Start Mileage of StartELR and EndELR, respectively locations_conn = incident_locations_diff_start_end_elr.join( connecting_nodes.set_index(['StanoxSection'], append=True), on=list(connecting_nodes.index.names) + ['StanoxSection'], rsuffix='_conn').dropna() locations_conn.drop(columns=[x for x in locations_conn.columns if '_conn' in x], inplace=True) # Remove the data records where connecting nodes are unknown locations_conn = locations_conn[~((locations_conn.StartELR_EndMileage == '') | (locations_conn.EndELR_StartMileage == ''))] # Convert str mileages to num num_conn_colnames = ['StartELR_EndMileage_num', 'EndELR_StartMileage_num', 'ConnELR_StartMileage_num', 'ConnELR_EndMileage_num'] str_conn_colnames = ['StartELR_EndMileage', 'EndELR_StartMileage', 'ConnELR_StartMileage', 'ConnELR_EndMileage'] locations_conn[num_conn_colnames] = locations_conn[str_conn_colnames].applymap( nr_mileage_str_to_num) # Get furlong information nr_furlong_data = vegetation.view_nr_vegetation_furlong_data(verbose=verbose) adjusted_conn_elr_mileages = locations_conn.apply( lambda x: adjust_incident_mileages( nr_furlong_data, x.ConnELR, x.ConnELR_StartMileage_num, x.ConnELR_EndMileage_num, 0) if x.ConnELR != '' else tuple(['', '', np.nan, np.nan, 0.0, []]), axis=1) adjusted_conn_mileages = pd.DataFrame(adjusted_conn_elr_mileages.tolist(), index=locations_conn.index, columns=['Conn_StartMileage_Adj', 'ConnELR_EndMileage_Adj', 'Conn_StartMileage_num_Adj', 'ConnELR_EndMileage_num_Adj', 'ConnELR_Length_Adj', # yards 'ConnELR_Critical_FurlongIDs']) # Processing Start locations adjusted_start_elr_mileages = locations_conn.apply( lambda x: adjust_incident_mileages( nr_furlong_data, x.StartELR, x.StartMileage_num, x.StartELR_EndMileage_num, shift_yards_diff_elr), axis=1) # Create a dataframe adjusted mileage data of the Start ELRs adjusted_start_mileages = pd.DataFrame(adjusted_start_elr_mileages.tolist(), index=locations_conn.index, columns=['StartMileage_Adj', 'StartELR_EndMileage_Adj', 'StartMileage_num_Adj', 'StartELR_EndMileage_num_Adj', 'StartELR_Length_Adj', # yards 'StartELR_Critical_FurlongIDs']) # Processing End locations adjusted_end_elr_mileages = locations_conn.apply( lambda x: adjust_incident_mileages(nr_furlong_data, x.EndELR, x.EndELR_StartMileage_num, x.EndMileage_num, shift_yards_diff_elr), axis=1) # Create a dataframe of adjusted mileage data of the EndELRs adjusted_end_mileages = pd.DataFrame(adjusted_end_elr_mileages.tolist(), index=locations_conn.index, columns=['EndELR_StartMileage_Adj', 'EndMileage_Adj', 'EndELR_StartMileage_num_Adj', 'EndMileage_num_Adj', 'EndELR_Length_Adj', # yards 'EndELR_Critical_FurlongIDs']) # Combine 'adjusted_start_mileages' and 'adjusted_end_mileages' adj_mileages = adjusted_start_mileages.join(adjusted_conn_mileages).join( adjusted_end_mileages) adj_mileages.dropna(subset=['StartMileage_num_Adj', 'EndMileage_num_Adj'], inplace=True) adj_mileages['Section_Length_Adj'] = list(zip( adj_mileages.StartELR_Length_Adj, adj_mileages.ConnELR_Length_Adj, adj_mileages.EndELR_Length_Adj)) adj_mileages['Critical_FurlongIDs'] = \ adj_mileages.StartELR_Critical_FurlongIDs + \ adj_mileages.EndELR_Critical_FurlongIDs + \ adj_mileages.ConnELR_Critical_FurlongIDs adj_mileages.Critical_FurlongIDs = adj_mileages.Critical_FurlongIDs.map( lambda x: list(set(x))) save_pickle(adj_mileages, path_to_pickle, verbose=update) return adj_mileages except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
def get_incident_location_furlongs(route_name=None, weather_category=None, shift_yards_same_elr=220, shift_yards_diff_elr=220, update=False, verbose=False): """ Get data of furlongs for incident locations. :param route_name: name of a Route; if ``None`` (default), all available Routes :type route_name: str or None :param weather_category: weather category; if ``None`` (default), all available weather categories :type weather_category: str or None :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment, given that ``StartELR == EndELR``, defaults to ``220`` :type shift_yards_same_elr: int or float :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment, given that ``StartELR != EndELR``, defaults to ``220`` :type shift_yards_diff_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: data of furlongs for incident locations :rtype: pandas.DataFrame or None **Test**:: >>> from coordinator.furlong import get_incident_location_furlongs weather_category = None shift_yards_same_elr = 220 shift_yards_diff_elr = 440 update = True verbose = True >>> il_furlongs = get_incident_location_furlongs(update=True, verbose=True) >>> il_furlongs.tail() >>> il_furlongs = get_incident_location_furlongs(route_name='Anglia', update=True, verbose=True) >>> il_furlongs.tail() """ filename = "incident-location-furlongs" pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: incident_location_furlongs = load_pickle(path_to_pickle) else: try: use_col_names = ['Section_Length_Adj', 'Critical_FurlongIDs'] adjusted_mileages_same_start_end_elrs = get_adjusted_mileages_same_start_end_elrs( route_name, weather_category, shift_yards_same_elr, verbose=verbose) ilf_same = adjusted_mileages_same_start_end_elrs[use_col_names] adjusted_mileages_diff_start_end_elrs = get_adjusted_mileages_diff_start_end_elrs( route_name, weather_category, shift_yards_diff_elr, verbose=verbose) ilf_diff = adjusted_mileages_diff_start_end_elrs[use_col_names] furlongs_dat = pd.concat([ilf_same, ilf_diff]) incident_locations = metex.view_metex_schedule8_incident_locations( route_name, weather_category, verbose=verbose) # Merge the above data sets incident_location_furlongs = incident_locations.join(furlongs_dat, how='right') incident_location_furlongs.drop(['StartMileage_num', 'EndMileage_num'], axis=1, inplace=True) incident_location_furlongs.index = range(len(incident_location_furlongs)) save_pickle(incident_location_furlongs, path_to_pickle, verbose=verbose) except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e)) incident_location_furlongs = None return incident_location_furlongs
def make_filename(self, extension, original=False): """ :param extension: extension without a dot """ return make_filename(self.base_ptn.fname, extension, original=original)
def get_furlongs_diff_start_end_elrs(route_name=None, weather_category=None, shift_yards_diff_elr=220, update=False, verbose=False): """ Get furlongs data for incident locations each identified by the same start and end ELRs, i.e. StartELR != EndELR. :param route_name: name of a Route; if ``None`` (default), all Routes :type route_name: str or None :param weather_category: weather category; if ``None`` (default), all weather categories :type weather_category: str or None :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR == EndELR, defaults to ``220`` :type shift_yards_diff_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: furlongs data of incident locations each identified by the same start and end ELRs :rtype: pandas.DataFrame **Test**:: from models.prototype.furlong import get_furlongs_diff_start_end_elrs route_name = None weather_category = None shift_yards_diff_elr = 220 update = True verbose = True furlongs_diff_start_end_elr = get_furlongs_diff_start_end_elrs( route_name, weather_category, shift_yards_diff_elr, update, verbose) print(furlongs_diff_start_end_elr) """ filename = "furlongs-diff-start-end-ELRs" pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_diff_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: furlongs_diff_start_end_elr = load_pickle(path_to_pickle) return furlongs_diff_start_end_elr else: adj_mileages = get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category, shift_yards_diff_elr, verbose=verbose) try: # Get furlong information nr_furlong_data = vegetation.view_nr_vegetation_furlong_data(verbose=verbose) # Form a list containing all the furlong IDs furlong_ids = list(set(itertools.chain(*adj_mileages.Critical_FurlongIDs))) # Select critical (i.e. incident) furlongs furlongs_diff_start_end_elr = nr_furlong_data.loc[furlong_ids] # Save 'incident_furlongs_diff_start_end_elr' save_pickle(furlongs_diff_start_end_elr, path_to_pickle, verbose=verbose) return furlongs_diff_start_end_elr except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
def perform_ucfit(self): # get jcpds data in df. use display to choose data points if self.model.section_lst == []: QtWidgets.QMessageBox.warning( self.widget, "Warning", "No peak fitting result exist for this file.") return if self.phase == None: QtWidgets.QMessageBox.warning( self.widget, "Warning", "No phase has been chosen for fitting") return data_by_phase_df = self._get_all_peakfit_results_df() data_to_fit_df = data_by_phase_df[self.phase].loc[data_by_phase_df[ self.phase]['display'] == True] # number of data point check n_data_points = len(data_to_fit_df.index) if n_data_points < 2: QtWidgets.QMessageBox.warning(self.widget, "Warning", "You need at least 2 data points.") return # perform ucfit text_output = self.phase + '\n\n' text_output += 'Fitted unit cell parameters \n' text_output += 'Crystal system = ' + \ self.widget.comboBox_Symmetry.currentText() + '\n' wavelength = self.model.get_base_ptn_wavelength() if self.widget.comboBox_Symmetry.currentText() == 'cubic': a, s_a, v, s_v, res_lin, res_nlin = fit_cubic_cell(data_to_fit_df, wavelength, verbose=False) cell_params = [a, a, a] text_output += "a = {0:.5f} +/- {1:.5f} \n".format(a, s_a) text_output += "V = {0:.5f} +/- {1:.5f} \n\n".format(v, s_v) elif self.widget.comboBox_Symmetry.currentText() == 'tetragonal': if n_data_points < 3: QtWidgets.QMessageBox.warning( self.widget, "Warning", "You need at least 3 data points for tetragonal.") return a, s_a, c, s_c, v, s_v, res_lin, res_nlin = \ fit_tetragonal_cell(data_to_fit_df, wavelength, verbose=False) cell_params = [a, a, c] text_output += "a = {0:.5f} +/- {1:.5f} \n".format(a, s_a) text_output += "c = {0:.5f} +/- {1:.5f} \n".format(c, s_c) text_output += "V = {0:.5f} +/- {1:.5f} \n\n".format(v, s_v) elif self.widget.comboBox_Symmetry.currentText() == 'hexagonal': if n_data_points < 3: QtWidgets.QMessageBox.warning( self.widget, "Warning", "You need at least 3 data points for hexagonal.") return a, s_a, c, s_c, v, s_v, res_lin, res_nlin = \ fit_hexagonal_cell(data_to_fit_df, wavelength, verbose=False) cell_params = [a, a, c] text_output += "a = {0:.5f} +/- {1:.5f} \n".format(a, s_a) text_output += "c = {0:.5f} +/- {1:.5f} \n".format(c, s_c) text_output += "V = {0:.5f} +/- {1:.5f} \n\n".format(v, s_v) elif self.widget.comboBox_Symmetry.currentText() == 'orthorhombic': if n_data_points < 4: QtWidgets.QMessageBox.warning( self.widget, "Warning", "You need at least 4 data points for orthorhombic.") return a, s_a, b, s_b, c, s_c, v, s_v, res_lin, res_nlin = \ fit_orthorhombic_cell(data_to_fit_df, wavelength, verbose=False) cell_params = [a, b, c] text_output += "a = {0:.5f} +/- {1:.5f} \n".format(a, s_a) text_output += "b = {0:.5f} +/- {1:.5f} \n".format(b, s_b) text_output += "c = {0:.5f} +/- {1:.5f} \n".format(c, s_c) text_output += "V = {0:.5f} +/- {1:.5f} \n\n".format(v, s_v) # output results output_df = make_output_table(res_lin, res_nlin, data_to_fit_df) text_output += 'Output table\n' text_output += output_df[[ 'h', 'k', 'l', 'twoth', 'dsp', 'twoth residue' ]].to_string() text_output += '\n\nHat: influence for the fit result. \n' text_output += ' 1 ~ large influence, 0 ~ no influence.\n' text_output += output_df[['h', 'k', 'l', 'twoth', 'dsp', 'hat']].to_string() text_output += '\n\nRstudent: how much the parameter would change' + \ ' if deleted.\n' text_output += output_df[['h', 'k', 'l', 'twoth', 'dsp', 'Rstudent']].to_string() text_output += '\n\ndfFits: deletion diagnostic giving' + \ ' the change in\n' text_output += ' the predicted value twotheta.\n' text_output += ' upon deletion of the data point as a ' + \ 'multiple of\n' text_output += ' the standard deviation for 1/d-spacing^2.\n' text_output += output_df[['h', 'k', 'l', 'twoth', 'dsp', 'dfFits']].to_string() text_output += '\n\ndfBetas: normalized residual\n' text_output += output_df[['h', 'k', 'l', 'twoth', 'dsp', 'dfBetas']].to_string() text_output += '\n\nNon-linear fit statistics \n' text_output += lmfit.fit_report(res_nlin) self.widget.plainTextEdit_UCFitOutput.setPlainText(text_output) # save jcpds and save output file automatically. # ask for filename. at the moment, simply overwrite temp_dir = get_temp_dir(self.model.get_base_ptn_filename()) ext = "ucfit.jcpds" #filen_t = self.model.make_filename(ext) filen_t = make_filename(self.template_jcpds.file, ext, temp_dir=temp_dir) filen_j = dialog_savefile(self.widget, filen_t) if str(filen_j) == '': return self._write_to_jcpds(filen_j, cell_params) # write to a textfile ext = "ucfit.output" #filen_t = self.model.make_filename(ext) filen_t = make_filename(self.template_jcpds.file, ext, temp_dir=temp_dir) filen_o = dialog_savefile(self.widget, filen_t) if str(filen_o) == '': return with open(filen_o, "w") as f: f.write(text_output)
def train(self, train_data, dev_data): """Train function for the neural network This method trains the relation classification neural network. All parameters are contained in the configuration file. Multiple metrics are computed within this function, including a classification report (per class precision, recall, and f1), hits@3, hits@10, and mean reciprocal rank. If the command line parameter to log the metrics is passed, all metrics, including the loss for training and development sets, are logged using mlflow. Args: train_data: training data composed of (embeddings, labels, indices) dev_data: development data composed of (embeddings, labels, indices) """ # Epoch level metrics epoch_loss = [] epoch_train_metrics = [] epoch_train_f1_metrics = [] epoch_train_hits3 = [] epoch_train_hits10 = [] epoch_train_mrr = [] epoch_dev_metrics = [] epoch_dev_f1_metrics = [] epoch_dev_report_metrics = [] epoch_dev_hits3 = [] epoch_dev_hits10 = [] epoch_dev_mrr = [] classes = list(range(281)) batches = utils.split_into_batches(train_data, int(self.config["batch_size"])) step = 0 # step counter for mlflow logging # Train model for a number of epochs for epoch in range(int(self.config["epochs"])): start_time = time.time() # Batch level metrics train_loss = 0 train_metrics = [] train_f1_metrics = [] train_hits3 = [] train_hits10 = [] train_mrr = [] dev_loss = 0 dev_metrics = [] dev_f1_metrics = [] dev_hits3 = [] dev_hits10 = [] dev_mrr = [] # Run training in batches for i, batch in enumerate(batches): step += 1 self.optimizer.zero_grad() # TODO what does it do? embs, rels, idxs = ( torch.tensor(batch[0]), torch.tensor(batch[1], dtype=torch.long), torch.tensor(batch[2]), ) self.optimizer.zero_grad() # embs, rels = embs.cuda(), rels.cuda() embs, rels = embs.to(self.device), rels.to(self.device) outputs = self.model(embs) loss = F.cross_entropy(outputs, rels) loss.backward() self.optimizer.step() train_loss += loss.item() # Compute train metrics num_corrects = (outputs.argmax(1) == rels).sum().item() train_acc = num_corrects / len(rels) train_metrics.append(train_acc) train_f1 = f1_score( rels.tolist(), outputs.argmax(1).tolist(), average="macro", ) train_f1_metrics.append(train_f1) train_ranks = self.get_ranks(outputs, rels.tolist()) _train_hits3 = metrics.hits_at_k(train_ranks, k=3) _train_hits10 = metrics.hits_at_k(train_ranks, k=10) train_hits3.append(_train_hits3) train_hits10.append(_train_hits10) _train_mrr = metrics.mrr(train_ranks) train_mrr.append(_train_mrr) # Compute development metrics every 500 batches if i % 500 == 0: with torch.no_grad(): dev_embs, dev_rels, dev_idxs = dev_data dev_embs, dev_rels, dev_idxs = ( torch.tensor(dev_embs), torch.tensor(dev_rels, dtype=torch.long), torch.tensor(dev_idxs), ) dev_embs, dev_rels = ( dev_embs.to(self.device), dev_rels.to(self.device), # dev_embs.cuda() # dev_embs.to(self.device), # dev_rels.cuda() # dev_rels.to(self.device), ) dev_outputs = self.model(dev_embs) _loss = F.cross_entropy(dev_outputs, dev_rels) dev_loss += _loss.item() num_corrects = ( dev_outputs.argmax(1) == dev_rels).sum().item() dev_acc = num_corrects / len(dev_rels) dev_metrics.append(dev_acc) dev_f1 = f1_score( dev_rels.tolist(), dev_outputs.argmax(1).tolist(), average="macro", ) dev_f1_metrics.append(dev_f1) dev_report = metrics.get_classification_report( dev_rels.tolist(), dev_outputs.argmax(1).tolist(), #classes ) epoch_dev_report_metrics.append(dev_report) dev_ranks = self.get_ranks(dev_outputs, dev_rels.tolist()) _dev_hits3 = metrics.hits_at_k(dev_ranks, k=3) _dev_hits10 = metrics.hits_at_k(dev_ranks, k=10) dev_hits3.append(_dev_hits3) dev_hits10.append(_dev_hits10) _dev_mrr = metrics.mrr(dev_ranks) dev_mrr.append(_dev_mrr) # Log metrics if self.params.log_metrics: mlflow.log_metric("Train loss", train_loss / len(train_metrics), step=step) mlflow.log_metric("Train acc", train_acc, step=step) mlflow.log_metric("Train hitsat3", _train_hits3, step=step) mlflow.log_metric("Train hitsat10", _train_hits10, step=step) mlflow.log_metric("Train mrr", _train_mrr, step=step) mlflow.log_metric("Dev loss", dev_loss / len(dev_metrics), step=step) mlflow.log_metric("Dev acc", dev_acc, step=step) mlflow.log_metric("Dev f1", dev_f1, step=step) mlflow.log_metric("Dev hitsat3", _dev_hits3, step=step) mlflow.log_metric("Dev hitsat10", _dev_hits10, step=step) mlflow.log_metric("Dev mrr", _dev_mrr, step=step) secs = int(time.time() - start_time) mins = secs / 60 secs = secs % 60 self.scheduler.step() epoch_loss.append(train_loss / len(train_metrics)) epoch_train_metrics.append(sum(train_metrics) / len(train_metrics)) epoch_train_f1_metrics.append( sum(train_f1_metrics) / len(train_f1_metrics)) epoch_train_hits3.append(sum(train_hits3) / len(train_hits3)) epoch_train_hits10.append(sum(train_hits10) / len(train_hits10)) epoch_train_mrr.append(sum(train_mrr) / len(train_mrr)) epoch_dev_metrics.append(sum(dev_metrics) / len(dev_metrics)) epoch_dev_f1_metrics.append( sum(dev_f1_metrics) / len(dev_f1_metrics)) epoch_dev_hits3.append(sum(dev_hits3) / len(dev_hits3)) epoch_dev_hits10.append(sum(dev_hits10) / len(dev_hits10)) epoch_dev_mrr.append(sum(dev_mrr) / len(dev_mrr)) # Print out results if self.params.print_output: print( "Epoch: %d" % (epoch + 1), " | time in %d minutes, %d seconds" % (mins, secs), ) print( f"\tEpoch avg Loss: {sum(epoch_loss)/len(epoch_loss):.4f}(train)" ) print( f"\tEpoch avg Acc: {sum(epoch_train_metrics)/len(epoch_train_metrics):.4f} (train)" ) print( f"\tEpoch avg F1: {sum(epoch_train_f1_metrics)/len(epoch_train_f1_metrics):.4f} (train)" ) print( f"\tEpoch avg hitsat3: {sum(epoch_train_hits3)/len(epoch_train_hits3):.4f} (train)" ) print( f"\tEpoch avg hitsat10: {sum(epoch_train_hits10)/len(epoch_train_hits10):.4f} (train)" ) print( f"\tEpoch avg mrr: {sum(epoch_train_mrr)/len(epoch_train_mrr):.4f} (train)" ) print( f"\tEpoch avg Acc: {sum(epoch_dev_metrics)/len(epoch_dev_metrics):.4f} (dev)" ) print( f"\tEpoch avg F1: {sum(epoch_dev_f1_metrics)/len(epoch_dev_f1_metrics):.4f} (dev)" ) print( f"\tEpoch avg hitsat3: {sum(epoch_dev_hits3)/len(epoch_dev_hits3):.4f} (dev)" ) print( f"\tEpoch avg hitsat10: {sum(epoch_dev_hits10)/len(epoch_dev_hits10):.4f} (dev)" ) print( f"\tEpoch avg mrr: {sum(epoch_dev_mrr)/len(epoch_dev_mrr):.4f} (dev)" ) print(f"\tLast Acc: {epoch_dev_metrics[-1]:.4f} (dev)") print(f"\tLast F1: {epoch_dev_f1_metrics[-1]:.4f} (dev)") print(f"\tEpoch last hitsat3: {epoch_dev_hits3[-1]:.4f} (dev") print( f"\tEpoch last hitsat10: {epoch_dev_hits10[-1]:.4f} (dev") print(f"\tEpoch last mrr: {epoch_dev_mrr[-1]:.4f} (dev") # Log metrics if self.params.log_metrics: mlflow.log_metric("Epoch Loss", sum(epoch_loss) / len(epoch_loss), step=epoch + 1) mlflow.log_metric( "Epoch Avg Acc train", sum(epoch_train_metrics) / len(epoch_train_metrics), step=epoch + 1, ) mlflow.log_metric( "Epoch Avg F1 train", sum(epoch_train_f1_metrics) / len(epoch_train_f1_metrics), step=epoch + 1, ) mlflow.log_metric("Epoch Hits at3 train", epoch_train_hits3[-1], step=epoch + 1) mlflow.log_metric("Epoch Hits at10 train", epoch_train_hits10[-1], step=epoch + 1) mlflow.log_metric("Epoch MRR train", epoch_train_mrr[-1], step=epoch + 1) mlflow.log_metric( "Epoch Avg Acc dev", sum(epoch_dev_metrics) / len(epoch_dev_metrics), step=epoch + 1, ) mlflow.log_metric( "Epoch Avg F1 dev", sum(epoch_dev_f1_metrics) / len(epoch_dev_f1_metrics), step=epoch + 1, ) mlflow.log_metric( "Epoch Avg MRR dev", sum(epoch_dev_mrr) / len(epoch_dev_mrr), step=epoch + 1, ) mlflow.log_metric("Epoch Acc dev", epoch_dev_metrics[-1], step=epoch + 1) mlflow.log_metric("Epoch F1 dev", epoch_dev_f1_metrics[-1], step=epoch + 1) mlflow.log_metric("Epoch Hits at3 dev", epoch_dev_hits3[-1], step=epoch + 1) mlflow.log_metric("Epoch Hits at10 dev", epoch_dev_hits10[-1], step=epoch + 1) mlflow.log_metric("Epoch MRR dev", epoch_dev_mrr[-1], step=epoch + 1) # print final report if self.params.print_output: print(epoch_dev_report_metrics[-1]) # Save model if not os.path.isdir("saved_models"): os.mkdir("saved_models") if not self.model_path: self.model_path = "saved_models/" + utils.make_filename( self.config) else: self.model_path.split(".pt")[0] + "_" + time.strftime( "%Y%m%d-%H%M%S") + ".pt" self.save_model() # Save classification report report_path = self.model_path.split(".pt")[0] + ".dev.report.tsv" report = metrics.get_classification_report( # dev_rels.tolist(), dev_outputs.argmax(1).tolist(), classes, output_dict=True dev_rels.tolist(), dev_outputs.argmax(1).tolist(), output_dict=True) report_df = pd.DataFrame(report).transpose() report_df.to_csv(report_path, sep="\t", index=False)
def make_temp_filenames(self, temp_dir=None): bgsub_filen = make_filename(self.fname, 'bgsub.chi', temp_dir=temp_dir) bg_filen = make_filename(self.fname, 'bg.chi', temp_dir=temp_dir) return bgsub_filen, bg_filen
def test_make_filename_init(self, fname): res = utils.make_filename(fname, init_fname="init") assert res == "parent\\init.ext"
def test_make_filename(self, fname): res = utils.make_filename(fname, ".new_ext", parent="new_parent") assert res == "new_parent\\file.new_ext"
from train import model_fn_builder, FLAGS from models.rnn_lstm import create_rnn_lstm_model, LSTMConfig from models.cnn import CNNConfig, create_cnn_model from models.cnn_keras import CNNKerasConfig, create_cnnKeras_model from models.contextualized_cnn import create_contextualized_cnn_model, ContextualizedCNNConfig from models.fully_connected import create_fully_connected_model, FullyConnectedConfig DATA_BERT_DIRECTORY = FLAGS.data_bert_directory BERT_CONFIG_FILE = "%s/bert_config.json" % DATA_BERT_DIRECTORY bert_config = BertConfig.from_json_file(BERT_CONFIG_FILE) INIT_CHECKPOINT = FLAGS.output_dir if FLAGS.init_checkpoint is not None: INIT_CHECKPOINT = '%s/%s' % (FLAGS.output_dir, FLAGS.init_checkpoint) DEV_FILENAME = make_filename('dev', 1., FLAGS.features_dir, FLAGS.fine_tune, FLAGS.n_examples) print('DEV_FILENAE %s' % DEV_FILENAME) RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) def load_and_save_config(filename): with tf.gfile.GFile(filename, 'r') as json_data: parsed = json.load(json_data) parsed['max_seq_length'] = FLAGS.max_seq_length parsed['bert_config'] = bert_config create_model = None config_class = None if parsed['model'] == 'lstm':
tf.flags.DEFINE_bool("do_lower_case", True, "Bert embeddings are lower cased.") DATA_BERT_DIRECTORY = FLAGS.data_bert_directory BERT_CONFIG_FILE = "%s/bert_config.json" % DATA_BERT_DIRECTORY OUTPUT_DIR = FLAGS.output_dir + "/" + datetime.now().isoformat() INIT_CHECKPOINT = None if FLAGS.fine_tune: INIT_CHECKPOINT = '%s/bert_model.ckpt' % DATA_BERT_DIRECTORY elif FLAGS.init_checkpoint is not None: INIT_CHECKPOINT = '%s/%s' % (OUTPUT_DIR, FLAGS.init_checkpoint) N_TRAIN_EXAMPLES = FLAGS.n_examples TRAIN_FILE_NAME = make_filename('train', (1.0 - FLAGS.eval_percent), FLAGS.features_dir, FLAGS.fine_tune, N_TRAIN_EXAMPLES) EVAL_FILE_NAME = make_filename('eval', (FLAGS.eval_percent), FLAGS.features_dir, FLAGS.fine_tune, N_TRAIN_EXAMPLES) bert_config = BertConfig.from_json_file(BERT_CONFIG_FILE) N_TOTAL_SQUAD_EXAMPLES = 130319 def load_and_save_config(filename): with tf.gfile.GFile(filename, 'r') as json_data: parsed = json.load(json_data) parsed['max_seq_length'] = FLAGS.max_seq_length parsed['bert_config'] = bert_config.to_dict()
def get_furlongs_data(route_name=None, weather_category=None, shift_yards_same_elr=220, shift_yards_diff_elr=220, update=False, verbose=False) -> pd.DataFrame: """ Get furlongs data. :param route_name: name of a Route; if ``None`` (default), all Routes :type route_name: str or None :param weather_category: weather category, defaults to ``None`` :type weather_category: str or None :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR == EndELR, defaults to ``220`` :type shift_yards_same_elr: int or float :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment, given that StartELR != EndELR, defaults to ``220`` :type shift_yards_diff_elr: int or float :param update: whether to check on update and proceed to update the package data, defaults to ``False`` :type update: bool :param verbose: whether to print relevant information in console as the function runs, defaults to ``False`` :type verbose: bool or int :return: data of furlongs for incident locations :rtype: pandas.DataFrame **Test**:: from models.prototype.furlong import get_furlongs_data weather_category = None shift_yards_same_elr = 220 shift_yards_diff_elr = 220 update = True verbose = True route_name = None furlongs_data = get_furlongs_data(route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr, update, verbose) print(furlongs_data) route_name = 'Anglia' furlongs_data = get_furlongs_data(route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr, update, verbose) print(furlongs_data) """ filename = "furlongs" pickle_filename = make_filename( filename, route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr) path_to_pickle = cdd_geodata(pickle_filename) if os.path.isfile(path_to_pickle) and not update: furlongs_data = load_pickle(path_to_pickle) else: try: # Data of incident furlongs: both start and end identified by the same ELR furlongs_data_same_elr = get_furlongs_same_start_end_elrs( route_name=route_name, weather_category=weather_category, shift_yards_same_elr=shift_yards_same_elr, verbose=verbose) # Data of incident furlongs: start and end are identified by different ELRs furlongs_data_diff_elr = get_furlongs_diff_start_end_elrs( route_name=route_name, weather_category=weather_category, shift_yards_diff_elr=shift_yards_diff_elr, verbose=verbose) # Merge the above two data sets furlongs_data = furlongs_data_same_elr.append(furlongs_data_diff_elr) furlongs_data.drop_duplicates(['AssetNumber', 'StructuredPlantNumber'], inplace=True) furlongs_data.sort_index(inplace=True) save_pickle(furlongs_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e)) furlongs_data = None return furlongs_data