def get_pokestops(request): # return HttpResponse("") args = {"csrftoken": "KrC5dILDzSfyGYTdOuk0P7ZNanrVRNsv", "SACSID": """~AJKiYcG_GzhmrIH-wHeFC1QE2U540HyYy8n7WK1uwi_6jTgLzPS5UFWrfvxe1XiSfbSEoN1L6jY8BWxX7zdPM_1FVyv4jwbwqfd1FS-HcL8IhD769flzqNpWX5EmbdnWFje81MNPOW2NYRCygRd8PHBXjKMVT_O0cM5ZXy8Jg5Tm75WYJ8tKEBeIRq8UeeriC-avEbgUlRnIqw868OUY2K6NTu6V15Lz-5GSlGwGWxkPzwaXfQr6iA1oLU-tjitfp5RsRFNZM3Ai84dH-tpgxcMAZEFM5co1N69T-Mr2-jxCgzBfD9x_lH4PUty-oyjAR90Z7sS8CzSo"""} coordinates = get_coords() total = len(coordinates) count = 0 for c in coordinates: count += 1 args['latitude'] = c[0] args['longitude'] = c[1] pokestop = Pokestop(args) pokestops = pokestop.entities() for p in pokestops: q = Poke() q.guid = p.get('guid') q.latitude = p.get('latitude') q.longitude = p.get('longitude') q.image = p.get('image') q.name = p.get('name') q.save() print_progress(count, total) time.sleep(1) return HttpResponse('success')
def save_pokestops(): settings.configure() args = { "csrftoken": "3DX2hXU05R1u9SJNJetgemM3GYHE9V3I", "SACSID": """~AJKiYcG3G0qMy0FrvJ1ZetpfHv3FhKc_gb1bw909JXFmrzhIZR6xMW1dlxB4xyG0_rX_ZKCa2Zsrv9N0Mx_c5_uRp0TzRodIH8IxwjO49nJr07XBPypIrwUCmphMvdJhzVPdVePsGIEWRETQTjP2qingSLN0l3acas4Fw7HFDFFJSBGVOymIfEOQmbwvKbB0tLMn4ZHIPGPqNBeRHaoMstBuiyRjUsmZp5YSWcdo6sOUiCWehR2Ab0nvFKE6S_uxjvYCAV_avud7_F-kKUbNEiudZAaPwtKFUEMFDhdev5Gmyw7NnR7ektCZfmigZ3TVsc5szPVFDD3CuGwI_r2y_PZ8jB67DuOfww""" } # db_test_server = MySQLdb.connect(user="******", passwd="India123", db='hifitestdb', # host='hifidbinstance.cvc882wdgyyf.us-east-1.rds.amazonaws.com') db_test_server = MySQLdb.connect(user="******", passwd="'", db="hifi") cur = db_test_server.cursor() coordinates = get_coords() total = len(coordinates) count = 0 for c in coordinates: count += 1 args['latitude'] = c[0] args['longitude'] = c[1] pokestop = Pokestop(args) pokestops = pokestop.entities() for p in pokestops: cur.execute( """INSERT INTO `pokemongo_pokestop`(`distance`, `name`, `bearing`, `latitude`, `longitude`, `image`, `guid`,`compass`) VALUES('{distance}','{name}','{bearing}','{latitude}','{longitude}','{image}' ,'{guid}', '{compass}')""", format({ 'distance': str(p.get('distance')), 'name': str(p.get('name')), 'bearing': str(p.get('bearing')), 'latitude': str(p.get('latitude')), 'longitude': str(p.get('longitude')), 'image': str(p.get('image')), 'guid': str(p.get('guid')), 'compass': str(p.get('compass')) })) print_progress(count, total) time.sleep(2) db_test_server.commit() return True
def dataset_to_tfrecords(self, data, output_path): # Number of images. Used when printing the progress. num_images = len(data['image']) # TODO: aSk: Handle case when images do not fit batch batch_size = self.config.tfr_images_per_record iters = int(num_images / batch_size) for iter_no in range(iters): idx_start = iter_no * batch_size idx_end = idx_start + batch_size output_path_mod = os.path.join(output_path, 'record_' + str(iter_no) + '.tfr') self.create_tfrecord(data, idx_start, idx_end, output_path_mod, iter_no) # Print the percentage-progress. utils.print_progress(count=idx_start, total=num_images)
def add_missing_edge_geometries(graph, edge_dicts): edge_count = len(edge_dicts) for idx, edge_d in enumerate(edge_dicts): if ('geometry' not in edge_d): node_from = edge_d['uvkey'][0] node_to = edge_d['uvkey'][1] # interpolate missing geometry as straigth line between nodes edge_geom = get_edge_geom_from_node_pair(graph, node_from, node_to) # set geometry attribute of the edge nx.set_edge_attributes(graph, {edge_d['uvkey']: { 'geometry': edge_geom }}) # set length attribute nx.set_edge_attributes( graph, {edge_d['uvkey']: { 'length': round(edge_d['geometry'].length, 3) }}) utils.print_progress(idx + 1, edge_count, percentages=True) print('\nEdge geometries & lengths set.')
def dataset_to_tfrecords(self, image_paths, labels, output_path): num_images = len(image_paths) batch_size = self.config.tfr_images_per_record iters = int(num_images / batch_size) print('\nnum_images: {}'.format(num_images)) print('batch_size: {}'.format(batch_size)) idx_start = 0 idx_end = 0 for iter_no in range(iters): idx_start = iter_no * batch_size idx_end = idx_start + batch_size print('\nidx:[{}-{}]'.format(idx_start, idx_end)) output_path_mod = os.path.join(output_path, 'record_' + str(iter_no) + '.tfr') self.create_tfrecord(image_paths, labels, idx_start, idx_end, output_path_mod, iter_no) # Print the percentage-progress. utils.print_progress(count=idx_start, total=num_images) # For images < batch_size and # For images which do not fit the last batch idx_start = iters * batch_size idx_end = idx_start + (num_images % batch_size) print('\nidx:[{}-{}]'.format(idx_start, idx_end)) if (num_images % batch_size): output_path_mod = os.path.join(output_path, 'record_' + str(iters) + '.tfr') self.create_tfrecord(image_paths, labels, idx_start, idx_end, output_path_mod) # Print the percentage-progress. # utils.print_progress(count=idx_start, total=num_images) print('\n')
def populate(): i = 0 total_items = len(products['products']) print_progress(i, total_items) for product in products['products']: osigu_product = product['osigu'] osigu_product['full_name'] += ' - QA TESTING' elasticSearchClient.post(index='product', doc_id=osigu_product['id'], body=osigu_product) provider_products = providerHandler.create_product(osigu_product) insurer_products = insurerHandler.create_product(osigu_product) if 'associate_provider' not in product: osiguHandler.associate_provider_product( osigu_product['id'], provider_products) if 'associate_insurer' not in product: osiguHandler.associate_insurer_product(osigu_product['id'], insurer_products) if 'diagnoses' in product: osiguHandler.create_product_diagnosis_association( osigu_product['id'], product['diagnoses']) if 'insurer_product_diagnoses' not in product: insurerHandler.create_insurer_product_diagnoses( insurer_products, product['diagnoses']) if 'pbm' in product: pbmHandler.build_product_categories_statement(product) if 'substitute_osigu_product_id' in product['pbm']: pbmHandler.build_substitute_products_statement(product) i += 1 print_progress(i, total_items) pbmHandler.populate()
def test_distr_centers_with_DT(districts_gdf): datetime = times.get_next_weekday_datetime(8, 30, skipdays=7) test_latLon = {'lat': 60.23122, 'lon': 24.83998} distr_valids = {} districts_gdf = districts_gdf.copy() for idx, distr in districts_gdf.iterrows(): utils.print_progress(idx, len(districts_gdf), percentages=False) try: itins = DT_routing.get_route_itineraries(test_latLon, distr['distr_latLon'], '1.6666', datetime, itins_count=3, max_walk_distance=6000) except Exception: itins = [] valid = 'yes' if (len(itins) > 0) else 'no' distr_valids[distr['id_distr']] = valid districts_gdf['DT_valid'] = [ distr_valids[id_distr] for id_distr in districts_gdf['id_distr'] ] return districts_gdf
print('Unprocessed:', len(axyinds)) #%% get axyinds to reprocess axyinds = commutes_utils.get_axyinds_to_reprocess(grid, reprocessed) axyinds = [axyind for axyind in axyinds if axyind not in reprocessed] print('Processed:', len(reprocessed), 'axyinds') print('Unprocessed:', len(axyinds), 'axyinds') #%% take subset of axyinds to process #%% axyinds = axyinds[:10] print('Start processing:', len(axyinds), 'axyinds') #%% one by one start_time = time.time() all_home_walks_dfs = [] for idx, axyind in enumerate(axyinds): utils.print_progress(idx, len(axyinds), False) print('\nStart processing:', axyind) all_home_walks_dfs.append(get_home_walk_gdf(axyind)) reprocessed.append(axyind) # print time stats time_elapsed = round(time.time() - start_time) avg_origin_time = round(time_elapsed / len(axyinds)) print('--- %s min --- %s' % (round( time_elapsed / 60, 1), 'processed: ' + str(len(axyinds)) + ' origins')) print('Average origin processing time:', avg_origin_time, 's') #%% export to GDF for debugging # all_home_walks_df = pd.concat(all_home_walks_dfs, ignore_index=True) # all_home_walks_gdf = gpd.GeoDataFrame(all_home_walks_df, geometry='DT_geom', crs=from_epsg(4326)) # all_home_walks_gdf.drop(columns=['DT_dest_Point']).to_file('outputs/YKR_commutes_output/test.gpkg', layer='dt_paths', driver='GPKG') # all_home_walks_gdf = gpd.GeoDataFrame(all_home_walks_df, geometry='DT_dest_Point', crs=from_epsg(4326))
driver="GPKG") print('exported', filt_edges_filename, 'to data/networks.gpkg') # get edge gdf from graph edge_gdf = nw.get_edge_gdf(graph, by_nodes=True, attrs=['geometry', 'length', 'osmid']) # add osmid as string to edge gdfs edge_gdf['osmid_str'] = [ nw.osmid_to_string(osmid) for osmid in edge_gdf['osmid'] ] #%% 3.3 Find matching (unwalkable) edges from the graph edges_to_rm = [] edges_to_rm_gdfs = [] for idx, filt_edge in filt_edge_gdf.iterrows(): utils.print_progress(idx, len(filt_edge_gdf), percentages=True) edges_found = edge_gdf.loc[edge_gdf['osmid_str'].str.contains( filt_edge['osmid_str'])].copy() if (len(edges_found) > 0): edges_found['filter_match'] = [ geom_utils.lines_overlap(filt_edge['geometry'], geom, min_intersect=0.5) for geom in edges_found['geometry'] ] edges_match = edges_found.loc[edges_found['filter_match'] == True].copy() edges_to_rm_gdfs.append(edges_match) rm_edges = list(edges_match['uvkey']) edges_to_rm += rm_edges all_edges_to_rm_gdf = gpd.GeoDataFrame(pd.concat(edges_to_rm_gdfs,
def preprocess(self, properties, datasets, logger, kind=PreprocessKind.train.value): """ Checks if the input and the rating file exist and loads them from the output folder. Otherwise, takes the rating, movies and tags datasets and converts them to dataframes and also loads the glove file. It iterates the ratings dataframe keeping from every row the movie id, user id and the rating. It uses the functions preprocess_rating, preprocess_text and text_to_glove to create a vector corresponding to a movie's features and user id. The user's id is added on the first position of that vector. Every vector is added to a list of vectors called input_data. Finally, the rating of every user for a particular movie is added to a list called ratings and both this list as well as the input_data list are being saved to the output folder. Args: properties(dict): properties loaded from yaml file. Used so as to get the output folder datasets (dict): contains the dataframes of all the movielens csvs logger (Logger): the logger to print messages kind (str): if set to train the ratings.csv is used for input vectors otherwise the generated test_recommendation.csv is used """ output_folder = properties["output_folder"] input_data_pickle_filename = self.input_data_pickle + "_{}_{}".format(properties["dataset"], properties["classification"]) ratings_pickle_filename = self.ratings_pickle + "_{}_{}".format(properties["dataset"], properties["classification"]) test_dataset_pickle_filename = self.test_dataset_pickle + "_{}_{}".format(properties["dataset"], properties["classification"]) if self.check_pickle_files_exist(properties=properties): logger.info("Content-based input data already exist and will be loaded from pickle file") input_filename = input_data_pickle_filename if kind == PreprocessKind.train.value else \ test_dataset_pickle_filename self.input_data = utils.load_from_pickle(output_folder, input_filename) self.ratings = utils.load_from_pickle(output_folder, ratings_pickle_filename) logger.info("Loaded inputs of shape {}".format(self.input_data.shape)) logger.info("Loaded ratings of shape {}".format(self.ratings.shape)) else: ratings_df = datasets["ratings"] if kind == PreprocessKind.train.value else datasets["test_recommendation"] movies_df = datasets["movies"] tags_df = datasets["tags"] glove_df = utils.load_glove_file(properties=properties, logger=logger) users_dict_dummy = self.__create_dummy_variables(ratings=ratings_df) logger.info("Generating input vectors") self.input_data = [] self.ratings = [] for index, row in ratings_df.iterrows(): user_id, movie_id, rating, _ = row movie_id = int(movie_id) user_id = int(user_id) logger.debug("Preprocessing userid {} and movieid {} with rating {}".format(user_id, movie_id, rating)) # preprocess rating = self._preprocess_rating(properties, rating) logger.debug("Preprocessed rating: {}".format(rating)) movie_text = self._preprocess_text(movies_df, tags_df, movie_id, user_id, logger) logger.debug("Preprocessed text: {}".format(" ".join(movie_text))) movie_vector = self._text_to_glove(properties, glove_df, movie_text) if movie_vector.size == 0: continue movie_vector = np.concatenate((users_dict_dummy[user_id], movie_vector), axis=1) self.input_data.append(movie_vector) self.ratings.append(rating) utils.print_progress(self.ratings, logger=logger) self.ratings = np.asarray(self.ratings) self.input_data = np.concatenate(self.input_data) logger.info("Produced a feature matrix of shape {}".format(self.input_data.shape)) # standardization logger.info("Standardize input vectors") self.input_data = preprocessing.scale(self.input_data) logger.info("Save input vectors to file") input_filename = input_data_pickle_filename if kind == PreprocessKind.train.value else \ test_dataset_pickle_filename utils.write_to_pickle(obj=self.input_data, directory=output_folder, filename=input_filename) utils.write_to_pickle(obj=self.ratings, directory=output_folder, filename=ratings_pickle_filename)
def preprocess(self, properties, datasets, logger, kind=PreprocessKind.train.value): """ Initially, checks if the ratings list exists in the output folder and if this is the case it loads it. Otherwise, it takes from the ratings dataset the ratings of the users, the name of the movies from the movies dataset and creates a list with the movies ids. Then, within a for loop iterates the ratings dataframe and for each user keeps track of the ratings he gave to every movie. If he didn't rate a movie, the algorithm put a zero to the corresponding position of the vector. After finishing this process for every user, it returns the vectors of the users as a list called user_ratings and writes it to the output folder as a pickle file. Args properties (dict): dictionary with the loaded properties from the yaml file datasets (dict): the datasets' dictionary which was created from the read_csv function """ output_folder = properties["output_folder"] users_ratings_pickle_filename = self.users_ratings_pickle + "_{}".format( properties["dataset"]) users_ids_pickle_filename = self.users_ids_pickle + "_{}".format( properties["dataset"]) movie_ids_pickle_filename = self.movie_ids_pickle + "_{}".format( properties["dataset"]) test_dataset_pickle_filename = self.test_dataset_pickle + "_{}".format( properties["dataset"]) if utils.check_file_exists(output_folder, users_ratings_pickle_filename): logger.info( "Collaborative input vectors already exist and will be loaded from pickle file" ) input_filename = users_ratings_pickle_filename if kind == PreprocessKind.train.value else \ test_dataset_pickle_filename self.users_ratings = utils.load_from_pickle( output_folder, input_filename) self.user_ids = utils.load_from_pickle(output_folder, users_ids_pickle_filename) self.movie_ids = utils.load_from_pickle(output_folder, movie_ids_pickle_filename) logger.info("Loaded user ratings of shape {}".format( self.users_ratings.shape)) else: os.makedirs(output_folder, exist_ok=True) ratings_df = datasets[ "ratings"] if kind == PreprocessKind.train.value else datasets[ "test_recommendation"] movies_df = datasets["movies"] self.users_ratings = [] self.user_ids = [] self.movie_ids = movies_df["movieId"].values.tolist() logger.info("Generating input vectors") for _, row in ratings_df.iterrows(): user_id = row["userId"] if user_id not in self.user_ids: self.user_ids.append(user_id) user_ratings = ratings_df[ratings_df["userId"] == user_id] user_vector = [] for movie_id in self.movie_ids: rating_row = user_ratings[user_ratings["movieId"] == movie_id] if not rating_row.empty: rating_row = rating_row["rating"].values.tolist() user_vector.append(rating_row[0]) else: user_vector.append(0.0) user_vector = np.array(user_vector) self.users_ratings.append(user_vector) utils.print_progress(self.users_ratings, logger=logger) logger.info("Writing input vectors into pickle file") self.users_ratings = np.array(self.users_ratings) self.user_ids = np.asarray(self.user_ids) self.movie_ids = np.asarray(self.movie_ids) input_filename = users_ratings_pickle_filename if kind == PreprocessKind.train.value else \ test_dataset_pickle_filename utils.write_to_pickle(self.users_ratings, output_folder, input_filename) utils.write_to_pickle(self.user_ids, output_folder, users_ids_pickle_filename) utils.write_to_pickle(self.movie_ids, output_folder, movie_ids_pickle_filename)
def get_home_work_walks(axyind=None, work_rows=None, districts=None, datetime=None, walk_speed=None, subset=True, logging=True, graph=None, edge_gdf=None, node_gdf=None): stats_path = 'outputs/YKR_commutes_output/home_workplaces_stats/' geom_home = work_rows['geom_home'].iloc[0] home_latLon = work_rows['home_latLon'].iloc[0] # adjust origin if necessary to work with DT routing requests valid_home_latLon = get_valid_latLon_for_DT(home_latLon, distance=45, datetime=datetime, graph=graph, edge_gdf=edge_gdf, node_gdf=node_gdf) if (valid_home_latLon == None): return None destinations = get_work_destinations_gdf(geom_home, districts, axyind=axyind, work_rows=work_rows, logging=logging) if (destinations == None): return None work_destinations = destinations['destinations'] home_work_stats = destinations['home_work_stats'] # filter rows of work_destinations for testing work_destinations = work_destinations[:14] if subset == True else work_destinations # print('work_destinations', work_destinations) # filter out destination if it's the same as origin work_destinations = work_destinations[work_destinations.apply( lambda x: str(x['id_destination']) != str(axyind), axis=1)] total_origin_workers_flow = work_destinations['yht'].sum() if (logging == True): print('Routing to', len(work_destinations.index), 'destinations:') # get routes to all workplaces of the route home_walks_all = [] for idx, destination in work_destinations.iterrows(): utils.print_progress(idx, destinations['total_dests_count'], percentages=False) # execute routing request to Digitransit API try: itins = DT_routing.get_route_itineraries(valid_home_latLon, destination['to_latLon'], walk_speed, datetime, itins_count=3, max_walk_distance=2500) except Exception: print('Error in DT routing request between:', axyind, 'and', destination['id_destination']) itins = [] # if no itineraries got, try adjusting the origin & destination by snapping them to network if (len(itins) == 0): print('no itineraries got -> try adjusting destination') adj_destination = get_valid_latLon_for_DT(destination['to_latLon'], datetime=datetime, graph=graph, edge_gdf=edge_gdf, node_gdf=node_gdf) time.sleep(0.3) try: itins = DT_routing.get_route_itineraries( valid_home_latLon, adj_destination, walk_speed, datetime, itins_count=3, max_walk_distance=2500) print('found', len(itins), 'with adjusted origin & destination locations') except Exception: print('error in DT routing with adjusted origin & destination') itins = [] od_itins_count = len(itins) od_workers_flow = destination['yht'] if (od_itins_count > 0): # calculate utilization of the itineraries for identifying the probability of using the itinerary from the origin # based on number of commuters and number of alternative itineraries to the destination # if only one itinerary is got for origin-destination (commute flow), utilization equals the number of commutes between the OD pair utilization = round(od_workers_flow / od_itins_count, 6) od_walk_dicts = DT_routing.parse_itin_attributes( itins, axyind, destination['id_destination'], utilization=utilization) home_walks_all += od_walk_dicts else: print('No DT itineraries got between:', axyind, 'and', destination['id_destination']) error_df = pd.DataFrame([{ 'axyind': axyind, 'destination_type': destination['destination_type'], 'destination_id': destination['id_destination'], 'destination_yht': destination['yht'] }]) error_df.to_csv( 'outputs/YKR_commutes_output/home_stops_errors/axyind_' + str(axyind) + '_to_' + str(destination['id_destination']) + '.csv') # print(home_walks_all) # collect walks to stops/destinations to GDF if (len(home_walks_all) == 0): return None home_walks_all_df = pd.DataFrame(home_walks_all) home_walks_all_df['uniq_id'] = home_walks_all_df.apply( lambda row: DT_utils.get_walk_uniq_id(row), axis=1) # group similar walks and calculate realtive utilization rates of them home_walks_g = DT_utils.group_home_walks(home_walks_all_df) # check that no commute data was lost in the analysis (flows match) total_utilization_sum = round(home_walks_g['utilization'].sum()) total_probs = round(home_walks_g['prob'].sum()) works_misings_routing = total_origin_workers_flow - total_utilization_sum if (works_misings_routing != 0 or total_probs != 100): print( 'Error: utilization sum of walks does not match the total flow of commuters' ) error_df = pd.DataFrame([{ 'axyind': axyind, 'total_origin_workers_flow': total_origin_workers_flow, 'total_utilization_sum': total_utilization_sum, 'total_probs': total_probs }]) error_df.to_csv( 'outputs/YKR_commutes_output/home_stops_errors/axyind_' + str(axyind) + '_no_flow_match.csv') home_work_stats['works_misings_routing'] = works_misings_routing home_work_stats['works_misings_routing_rat'] = round( (works_misings_routing / total_origin_workers_flow) * 100, 1) home_work_stats['total_probs'] = total_probs home_work_stats.to_csv(stats_path + 'axyind_' + str(axyind) + '.csv') return home_walks_g
def create_tfrecord(self, image_paths, labels, idx_start, idx_end, output_path): # Open a TFRecordWriter for the output-file. with tf.python_io.TFRecordWriter(output_path) as writer: for i in range(idx_start, idx_end): utils.print_progress(count=i, total=(idx_end - idx_start)) image_path = image_paths[i] label = labels[i] # TODO: Do center cropping # img = cv2.imread(image_paths[i]) # img = cv2.resize(img, (224, 224)) # Load images img = Image.open(image_path) # TODO: # Center crop and resize image. size: The requested size in pixels, as a 2-tuple: (width, height) img = ImageOps.fit(img, (self.config.tfr_image_width, self.config.tfr_image_height), Image.LANCZOS, 0, (0.5, 0.5)) # img = img.resize(size=(self.config.tfr_image_width, self.config.tfr_image_height)) img = np.array(img) if output_path is not None: img_path_name = os.path.join(os.path.dirname(output_path), os.path.basename(image_path)) utils_image.save_image(img, img_path_name) ## Color constancy # img = utils_image.color_constancy(img, power=6, gamma=None) # if output_path is not None: # img_path_name = os.path.join(os.path.dirname(output_path), os.path.basename(image_path)) # img_path_name = img_path_name.split('.')[0] + '_ilu.' + img_path_name.split('.')[1] # # utils_image.save_image(img, img_path_name) # img_save = Image.fromarray(img.astype('uint8')) # img_save.save(img_path_name) # Convert the image to raw bytes. img_bytes = img.tostring() data = { 'image': self.wrap_bytes(img_bytes), 'label': self.wrap_int64(label) } # Wrap the data as TensorFlow Features. feature = tf.train.Features(feature=data) # Wrap again as a TensorFlow Example. example = tf.train.Example(features=feature) # Serialize the data. serialized = example.SerializeToString() # Write the serialized data to the TFRecords file. writer.write(serialized)
raise Exception('task type % s not allowed' % task_type) output_file.close() if __name__ == '__main__': args = arg_parser.parse_args() config_file = args.config if not os.path.exists(config_file): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), config_file) config_parser.read(config_file) sessions = config_parser.sections() if 'STATUS' in sessions: is_train = config_parser['STATUS'].get('status', 'train') == 'train' if is_train: print_progress('Start Training') else: print_progress('Start Decoding') else: is_train = True if 'IO' in sessions: print_progress("Start config IO") IO_session = config_parser['IO'] for key, val in IO_session.items(): print(key, '=', val) file_type = IO_session.get('type', None) if file_type: if file_type == 'csv': if is_train: # training
def train(self, model, train_data, train_size, num_steps, num_epochs, min_loss=0.3): #为了使所有op产生的随机序列在会话之间是可重复的,设置一个图级别的seed tf.set_random_seed(1234) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) total_loss = [] timings = [] steps_per_epoch = int(train_size / self.batch_size) num_epoch = 1 for step in range(1, num_steps): beg_t = timeit.default_timer() #X, L = train_data.next() X, L = train_data, [1, 2, 3, 5] seq_len = np.max(L) print('X:', X) print('L:', L) print('seq_len:', seq_len) # For anomaly detection problem we reconstruct input data, so # targets and inputs are identical. feed_dict = { model.inputs: X, model.targets: X, model.lengths: L, model.dropout: self.dropout, model.batch_size: self.batch_size, model.max_seq_len: seq_len } fetches = [ model.loss, model.decoder_outputs, model.train_optimizer ] step_loss, _, _ = sess.run(fetches, feed_dict) total_loss.append(step_loss) timings.append(timeit.default_timer() - beg_t) print('total_loss:', total_loss) if step % steps_per_epoch == 0: num_epoch += 1 if step % 200 == 0 or step == 1: print_progress(int(step / 200), num_epoch, np.mean(total_loss), np.mean(step_loss), np.sum(timings)) timings = [] if step == 1: _ = tf.train.export_meta_graph( filename=self.path_to_graph + '.meta') if np.mean(total_loss) < min_loss or num_epoch > num_epochs: model.saver.save(sess, self.path_to_graph, global_step=step) print("Training is finished.") break
def train(self, model, train_data, train_size, num_steps, num_epochs, min_loss=0.3): """ Trains a given model architecture with given train data. """ # global_step = tf.Variable(0, name='global_step', trainable=False) tf.set_random_seed(1234) saver = tf.train.Saver() with tf.Session() as sess: train_writer = tf.summary.FileWriter('./logs/1/train', sess.graph) try: saver.restore(sess, tf.train.latest_checkpoint(self.checkpoints)) # print("Model restore finished, current globle step: %d" % global_step.eval()) except ValueError: print("Restoring failed.") sess.run(tf.global_variables_initializer()) total_loss = [] timings = [] steps_per_epoch = int(train_size / self.batch_size) num_epoch = 1 # last_step = global_step.eval(sess) # print("Last step: ", global_step.eval(sess)) print("Start training.") for step in range(1, num_steps): beg_t = timeit.default_timer() X, L = train_data.__next__() seq_len = np.max(L) merge = tf.summary.merge_all() # For anomaly detection problem we reconstruct input data, so # targets and inputs are identical. feed_dict = { model.inputs: X, model.targets: X, model.lengths: L, model.dropout: self.dropout, model.batch_size: self.batch_size, model.max_seq_len: seq_len } fetches = [ merge, model.loss, model.decoder_outputs, model.train_optimizer ] summary, step_loss, _, _ = sess.run(fetches, feed_dict) train_writer.add_summary(summary, step) total_loss.append(step_loss) timings.append(timeit.default_timer() - beg_t) if step % steps_per_epoch == 0: num_epoch += 1 model.saver.save(sess, self.path_to_graph, global_step=step) print("Save checkpoints.") if step % 20 == 0 or step == 1: print_progress(step, num_epoch, np.mean(total_loss), np.mean(step_loss), np.sum(timings)) timings = [] if step == 1: _ = tf.train.export_meta_graph( filename=self.path_to_graph + '.meta') if np.mean(total_loss) < min_loss or num_epoch > num_epochs: model.saver.save(sess, self.path_to_graph, global_step=step) print("Training is finished.") break