class MultipleBalancedTreeStructuredTraitFactory(object): def __init__(self, simconfig): self.simconfig = simconfig self.prng = RandomState() def initialize_traits(self): self.r = int(self.simconfig.branching_factor) self.h = int(self.simconfig.depth_factor) self.n = self.simconfig.num_trees (trees, roots) = utils.generate_forest_balanced_trees(self.r, self.h, self.n) #log.debug("num traits: %s roots: %s", len(trees.nodes()), pp.pformat(self.roots)) self.trait_set = MultipleTreeStructuredTraitSet( trees, roots, self.prng, self.simconfig) return self.trait_set # def initialize_population(self, pop_graph): # """ # Initializes a population with # # """ # mt = self.simconfig.maxtraits # for nodename in pop_graph.nodes(): # # get a random number of initial trait chains # agent_traits = set() # init_trait_num = self.prng.random_integers(1, mt) # #log.debug("init trait num: %s", init_trait_num) # # for i in range(0, init_trait_num): # path = self.trait_set.get_random_trait_path() # agent_traits.update(path) # # #log.debug("agent traits: %s", agent_traits) # # #log.debug("traits: %s", pp.pformat(agent_traits)) # pop_graph.node[nodename]['traits'] = agent_traits def initialize_population(self, pop_graph): """ Initializes a population with traits, biased toward the roots. """ mt = self.simconfig.maxtraits for nodename in pop_graph.nodes(): # get a random number of initial trait chains agent_traits = set() init_trait_num = self.prng.random_integers(1, mt) #log.debug("init trait num: %s", init_trait_num) for i in range(0, init_trait_num): path = self.trait_set.get_random_trait_path_rootbiased() agent_traits.update(path) #log.debug("agent traits: %s", agent_traits) #log.debug("traits: %s", pp.pformat(agent_traits)) pop_graph.node[nodename]['traits'] = agent_traits
def load(self): rng = RandomState(np.uint64(hash("RandomDataset"))) input_shape = [500,3,600,1] y = rng.random_integers(0,1,size=input_shape[0]) y = OneHotFormatter(2).format(y) topo_view = rng.rand(*input_shape) super(RandomDataset, self).__init__(topo_view=topo_view, y=y, axes=('b', 'c', 0, 1))
def load(self): rng = RandomState(np.uint64(hash("RandomDataset"))) input_shape = [500, 3, 600, 1] y = rng.random_integers(0, 1, size=input_shape[0]) y = OneHotFormatter(2).format(y) topo_view = rng.rand(*input_shape) super(RandomDataset, self).__init__(topo_view=topo_view, y=y, axes=('b', 'c', 0, 1))
def get_sample_key(N, k, seed): """ Get a pseudorandom key for sampling from data. :param N: Number of items in population. :param k: Number of samples to draw. :param seed: The seed for shuffling (best leave unchanged to compare results). :return: Key for sampling. """ random = RandomState(seed) return random.random_integers(low=0, high=N, size=k)
def get_bootstrap(true, pred, n_bootstraps=2000, seed=None): """ :param true: true label stored as numpy array :param pred: predicted score stored as numpy array :param n_bootstraps: Number of bootsraps. :param seed: Random seed for result reproducibility. :return: :return: """ # Get accuracy and printing original_acc = accuracy_score(true, pred) original_roc = roc_auc_score(true, pred) # Generating random numbers from seed for reproducibility rs = RandomState(seed) # Start bootstrapping, initialize bootstrap_accuracies btstrp_accs = [] for i in range(n_bootstraps): # bootstrap by sampling with replacement on the prediction indices indices = rs.random_integers(0, len(pred) - 1, len(pred)) # We need at least one positive and one negative sample for ROC AUC # to be defined: reject the sample if len(np.unique(true[indices])) >= 2: btstrp_accs.append(accuracy_score(true[indices], pred[indices])) # obtain the 95 % CI from the results sorted_accuracies = np.array(btstrp_accs) sorted_accuracies.sort() # Get upper and lower bounds conf_low = sorted_accuracies[int(0.025 * len(sorted_accuracies))] conf_up = sorted_accuracies[int(0.975 * len(sorted_accuracies))] return original_acc, original_roc, sorted_accuracies, conf_low, conf_up
def make_classification_data(num_examples=100, train_test_ratio=0.5, num_features=10, use_feature_hashing=False, feature_bins=4, num_labels=2, empty_labels=False, string_label_list=None, feature_prefix='f', id_type='string', class_weights=None, non_negative=False, one_string_feature=False, num_string_values=4, random_state=1234567890): # use sklearn's make_classification to generate the data for us num_numeric_features = (num_features - 1 if one_string_feature else num_features) X, y = make_classification(n_samples=num_examples, n_features=num_numeric_features, n_informative=num_numeric_features, n_redundant=0, n_classes=num_labels, weights=class_weights, random_state=random_state) if string_label_list: assert (len(string_label_list) == num_labels) label_to_string = np.vectorize(lambda n: string_label_list[n]) y = label_to_string(y) # if we were told to only generate non-negative features, then # we can simply take the absolute values of the generated features if non_negative: X = abs(X) # since we want to use SKLL's FeatureSet class, we need to # create a list of IDs; we create IDs that either can also # be numbers or pure strings if id_type == 'string': ids = ['EXAMPLE_{}'.format(n) for n in range(1, num_examples + 1)] elif id_type == 'integer_string': ids = ['{}'.format(n) for n in range(1, num_examples + 1)] elif id_type == 'float': ids = [float(n) for n in range(1, num_examples + 1)] elif id_type == 'integer': ids = list(range(1, num_examples + 1)) # create a string feature that has four possible values # 'a', 'b', 'c' and 'd' and add it to X at the end if one_string_feature: prng = RandomState(random_state) random_indices = prng.random_integers(0, num_string_values - 1, num_examples) possible_values = [chr(x) for x in range(97, 97 + num_string_values)] string_feature_values = [possible_values[i] for i in random_indices] string_feature_column = np.array(string_feature_values, dtype=object).reshape(100, 1) X = np.append(X, string_feature_column, 1) # create a list of dictionaries as the features feature_names = [ '{}{:02d}'.format(feature_prefix, n) for n in range(1, num_features + 1) ] features = [dict(zip(feature_names, row)) for row in X] # split everything into training and testing portions num_train_examples = int(round(train_test_ratio * num_examples)) train_features, test_features = (features[:num_train_examples], features[num_train_examples:]) train_y, test_y = y[:num_train_examples], y[num_train_examples:] train_ids, test_ids = ids[:num_train_examples], ids[num_train_examples:] # are we told to generate empty labels train_labels = None if empty_labels else train_y test_labels = None if empty_labels else test_y # create a FeatureHasher if we are asked to use feature hashing # with the specified number of feature bins vectorizer = (FeatureHasher( n_features=feature_bins) if use_feature_hashing else None) train_fs = FeatureSet('classification_train', train_ids, labels=train_labels, features=train_features, vectorizer=vectorizer) if train_test_ratio < 1.0: test_fs = FeatureSet('classification_test', test_ids, labels=test_labels, features=test_features, vectorizer=vectorizer) else: test_fs = None return (train_fs, test_fs)
gamma = xpy / xpx nobs = lhs.shape[0] stat = nobs * (gamma - 1.0) return stat if __name__ == '__main__': trends = ('nc', 'c', 'ct', 'ctt') T = array((20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, 2000)) T = T[::-1] m = T.shape[0] percentiles = list(arange(0.5, 100.0, 0.5)) rng = RandomState(0) seeds = rng.random_integers(0, 2**31 - 2, size=EX_NUM) parallel, p_func, n_jobs = parallel_func(wrapper, n_jobs=NUM_JOBS, verbose=2) parallel.pre_dispatch = NUM_JOBS for tr in trends: results = zeros((len(percentiles), len(T), EX_NUM)) * nan filename = 'adf_z_' + tr + '.npz' for i in range(EX_NUM): print("Experiment Number {0} for Trend {1}".format(i + 1, tr)) # Non parallel version # out = lmap(wrapper, T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m)) now = datetime.datetime.now() out = parallel(p_func(t, tr, EX_SIZE, seed=seeds[i]) for t in T)
class MultipleBalancedTreeStructuredTraitFactory(object): def __init__(self, simconfig): self.simconfig = simconfig self.prng = RandomState() def initialize_traits(self): self.r = int(self.simconfig.branching_factor) self.h = int(self.simconfig.depth_factor) self.n = self.simconfig.num_trees (trees, roots) = utils.generate_forest_balanced_trees(self.r,self.h,self.n) #log.debug("num traits: %s roots: %s", len(trees.nodes()), pp.pformat(self.roots)) self.trait_set = MultipleTreeStructuredTraitSet(trees, roots, self.prng, self.simconfig) return self.trait_set # def initialize_population(self, pop_graph): # """ # Initializes a population with # # """ # mt = self.simconfig.maxtraits # for nodename in pop_graph.nodes(): # # get a random number of initial trait chains # agent_traits = set() # init_trait_num = self.prng.random_integers(1, mt) # #log.debug("init trait num: %s", init_trait_num) # # for i in range(0, init_trait_num): # path = self.trait_set.get_random_trait_path() # agent_traits.update(path) # # #log.debug("agent traits: %s", agent_traits) # # #log.debug("traits: %s", pp.pformat(agent_traits)) # pop_graph.node[nodename]['traits'] = agent_traits def initialize_population(self, pop_graph): """ Initializes a population with traits, biased toward the roots. """ mt = self.simconfig.maxtraits for nodename in pop_graph.nodes(): # get a random number of initial trait chains agent_traits = set() init_trait_num = self.prng.random_integers(1, mt) #log.debug("init trait num: %s", init_trait_num) for i in range(0, init_trait_num): path = self.trait_set.get_random_trait_path_rootbiased() agent_traits.update(path) #log.debug("agent traits: %s", agent_traits) #log.debug("traits: %s", pp.pformat(agent_traits)) pop_graph.node[nodename]['traits'] = agent_traits
def load_and_filter_data(csv_file, label_column, id_column, length_column, second_human_score_column, candidate_column, requested_feature_names, reserved_column_names, given_trim_min, given_trim_max, flag_column_dict, subgroups, exclude_zero_scores=True, exclude_zero_sd=False, feature_subset_specs=None, feature_subset=None, feature_prefix=None, use_fake_labels=False): """ Load the data from `csv_file` and filters it to remove rows that have zero/non-numeric values for `label_column`. If feature_names are specified, it checks whether any features that are specifically requested in `feature_names` are missing from the data. If no feature_names are specified, these are generated based on column names and subset information if available. The function then excludes non-numeric values for any feature. It also generates fake labels between 1 and 10 if `use_fake_parameters` is set to True. Finally, it renames the id and label column and splits the data into the data frame with feature values and score label and the data frame with other available metadata. """ logger = logging.getLogger(__name__) # read the csv file into a data frame but we want to make # sure to read in the `id_column`, `candidate_column` and # subgroups (if any) as a string to ensure # that we do not lose information, e.g., initial zeros string_columns = [id_column, candidate_column] + subgroups converter_dict = dict([(column, str) for column in string_columns if column]) # read in the CSV file df = pd.read_csv(csv_file, converters=converter_dict) # make sure that the columns specified in the config file actually exist columns_to_check = [id_column, label_column] if length_column: columns_to_check.append(length_column) if second_human_score_column: columns_to_check.append(second_human_score_column) missing_columns = set(columns_to_check).difference(df.columns) if missing_columns: raise KeyError("Columns {} from the config file " "do not exist in the data.".format(missing_columns)) # it is possible for the `id_column` and `candidate_column` to be # set to the same column name in the CSV file, e.g., if there is # only one response per candidate. If this happens, we neeed to # create a duplicate column for candidates or id for the downstream # processing to work as usual. if id_column == candidate_column: # if the name for both columns is `candidate`, we need to # create a separate id_column name if id_column == 'candidate': df['spkitemid'] = df['candidate'].copy() id_column = 'spkitemid' # else we create a separate `candidate` column else: df['candidate'] = df[id_column].copy() candidate_column = 'candidate' df = rename_default_columns(df, requested_feature_names, id_column, label_column, second_human_score_column, length_column, None, candidate_column) # check that the id_column contains unique values if df['spkitemid'].size != df['spkitemid'].unique().size: raise ValueError("The data contains duplicate response IDs in " "'{}'. Please make sure all response IDs are " "unique and re-run the tool.".format(id_column)) # Generate feature names if no feature .json file was provided. if len(requested_feature_names) == 0: feature_names = generate_feature_names(df, reserved_column_names, feature_subset_specs=feature_subset_specs, feature_subset=feature_subset, feature_prefix=feature_prefix) else: feature_names = requested_feature_names # make sure that feature names do not contain reserved column names illegal_feature_names = set(feature_names).intersection(reserved_column_names) if illegal_feature_names: raise ValueError("The following reserved column names " "cannot be used as feature names: '{}'. " "Please rename these columns and " "re-run the experiment.".format(', '.join(illegal_feature_names))) # check to make sure that the subgroup columns are all present df = check_subgroups(df, subgroups) # filter out the responses based on flag columns (df_responses_with_requested_flags, df_responses_with_excluded_flags) = filter_on_flag_columns(df, flag_column_dict) # filter out the rows that have non-numeric or zero labels # unless we are going to generate fake labels in the first place if not use_fake_labels: (df_filtered, df_excluded) = filter_on_column(df_responses_with_requested_flags, 'sc1', 'spkitemid', exclude_zeros=exclude_zero_scores) # make sure that the remaining data frame is not empty if len(df_filtered) == 0: raise ValueError("No responses remaining after filtering out " "non-numeric human scores. No further analysis " "can be run. ") trim_min = given_trim_min if given_trim_min else df_filtered['sc1'].min() trim_max = given_trim_max if given_trim_max else df_filtered['sc1'].max() else: df_filtered = df_responses_with_requested_flags.copy() trim_min = given_trim_min if given_trim_min else 1 trim_max = given_trim_max if given_trim_max else 10 logger.info("Generating labels randomly " "from [{}, {}]".format(trim_min, trim_max)) randgen = RandomState(seed=1234567890) df_filtered[label_column] = randgen.random_integers(trim_min, trim_max, size=len(df_filtered)) # make sure there are no missing features in the data missing_features = set(feature_names).difference(df_filtered.columns) if not missing_features: # make sure all features selected for model building are numeric # and also replace any non-numeric feature values in already # excluded data with NaNs for consistency for feat in feature_names: df_excluded[feat] = pd.to_numeric(df_excluded[feat], errors='coerce').astype(float) newdf, newdf_excluded = filter_on_column(df_filtered, feat, 'spkitemid', exclude_zeros=False, exclude_zero_sd=exclude_zero_sd) del df_filtered df_filtered = newdf df_excluded = pd.merge(df_excluded, newdf_excluded, how='outer') # make sure that the remaining data frame is not empty if len(df_filtered) == 0: raise ValueError("No responses remaining after filtering " "out non-numeric feature values. No further " "analysis can be run.") # Raise warning if we excluded features that were # specified in the .json file because sd == 0. omitted_features = set(requested_feature_names).difference(df_filtered.columns) if omitted_features: logger.warning("The following requested features " "were excluded because their standard " "deviation on the training set was 0: {}.\n" "Please edit the feature file to exclude " "these features and re-run the " "tool".format(', '.join(omitted_features))) # Update the feature names feature_names = [feature for feature in feature_names if feature in df_filtered] else: raise KeyError("{} does not contain " "columns for all features specified in " "the feature file. Please check for " "capitalization and other spelling " "errors and make sure the feature " "names do not contain hyphens. " "The data does not have columns " "for the following features: " "{}".format(csv_file, ', '.join(missing_features))) # check the values for length column. We do this after filtering # to make sure we have removed responses that have not been # processed correctly. Else rename length column to # ##ORIGINAL_NAME##. if (length_column and (len(df_filtered[df_filtered['length'].isnull()]) != 0 or df_filtered['length'].std() <= 0)): logger.warning("The {} column either has missing values or a standard" " deviation <= 0. No length-based analysis will be" " provided. The column will be renamed as ##{}## and" " saved in *train_other_columns.csv.".format(length_column, length_column)) df_filtered.rename(columns={'length': '##{}##'.format(length_column)}, inplace=True) # create separate data-frames for features and sc1, all other # information, and responses excluded during filtering not_other_columns = set() feature_columns = ['spkitemid', 'sc1'] + feature_names df_filtered_features = df_filtered[feature_columns] not_other_columns.update(feature_columns) metadata_columns = ['spkitemid'] + subgroups if candidate_column: metadata_columns.append('candidate') df_filtered_metadata = df_filtered[metadata_columns] not_other_columns.update(metadata_columns) df_filtered_length = pd.DataFrame() length_columns = ['spkitemid', 'length'] if length_column and 'length' in df_filtered: df_filtered_length = df_filtered[length_columns] not_other_columns.update(length_columns) df_filtered_human_scores = pd.DataFrame() human_score_columns = ['spkitemid', 'sc1', 'sc2'] if second_human_score_column and 'sc2' in df_filtered: df_filtered_human_scores = df_filtered[human_score_columns].copy() not_other_columns.update(['sc2']) # filter out any non-numeric value rows # as well as zeros, if we were asked to df_filtered_human_scores['sc2'] = pd.to_numeric(df_filtered_human_scores['sc2'], errors='coerce').astype(float) if exclude_zero_scores: df_filtered_human_scores['sc2'] = df_filtered_human_scores['sc2'].replace(0, nan) # now extract all other columns and add 'spkitemid' other_columns = ['spkitemid'] + [column for column in df_filtered.columns if column not in not_other_columns] df_filtered_other_columns = df_filtered[other_columns] return (df_filtered_features, df_filtered_metadata, df_filtered_other_columns, df_excluded, df_filtered_length, df_filtered_human_scores, df_responses_with_excluded_flags, trim_min, trim_max, feature_names)
def trainer( train, dev, # training and development tuples dim=1000, # embedding dimensionality dim_im=4096, # image dimensionality dim_s=4800, # sentence dimensionality margin=0.2, # margin for pairwise ranking ncon=50, # number of contrastive terms max_epochs=15, lrate=0.01, # not needed with Adam dispFreq=10, optimizer='adam', batch_size=100, valid_batch_size=100, saveto='./db/pretrained-model/ssg/models/cocorank1000_combine.npz', validFreq=500, saveFreq=500, reload_=False): # Model options model_options = {} model_options['dim'] = dim model_options['dim_im'] = dim_im model_options['dim_s'] = dim_s model_options['margin'] = margin model_options['ncon'] = ncon model_options['max_epochs'] = max_epochs model_options['lrate'] = lrate model_options['dispFreq'] = dispFreq model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['valid_batch_size'] = valid_batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['saveFreq'] = saveFreq model_options['reload_'] = reload_ model_options = validate_options(model_options) print(model_options) # reload options if reload_ and os.path.exists(saveto): print("Reloading options") with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) print('Building model') params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): print("Reloading model") params = load_params(saveto, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print('Building encoder') inps_e, lim, ls = build_encoder(tparams, model_options) print('Building functions') f_cost = theano.function(inps, -cost, profile=False) f_emb = theano.function(inps_e, [lim, ls], profile=False) # gradient computation print('Computing gradients') grads = tensor.grad(cost, wrt=itemlist(tparams)) lr = tensor.scalar(name='lr') f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print('Optimization') uidx = 0 estop = False start = 1234 seed = 1234 inds = numpy.arange(len(train[0])) numbatches = len(inds) / batch_size curr = 0 counter = 0 target = None history_errs = [] # Main loop for eidx in range(max_epochs): tic = time.time() prng = RandomState(seed - eidx - 1) prng.shuffle(inds) for minibatch in range(numbatches): uidx += 1 conprng_im = RandomState(seed + uidx + 1) conprng_s = RandomState(2 * seed + uidx + 1) im = train[1][inds[minibatch::numbatches]] s = train[2][inds[minibatch::numbatches]] cinds_im = conprng_im.random_integers(low=0, high=len(train[0]) - 1, size=ncon * len(im)) cinds_s = conprng_s.random_integers(low=0, high=len(train[0]) - 1, size=ncon * len(s)) cim = train[1][cinds_im] cs = train[2][cinds_s] ud_start = time.time() cost = f_grad_shared(im, s, cim, cs) f_update(lrate) ud_duration = time.time() - ud_start if numpy.mod(uidx, dispFreq) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud_duration) if numpy.mod(uidx, validFreq) == 0: print('Computing ranks...') lim, ls = f_emb(dev[1], dev[2]) (r1, r5, r10, medr) = i2t(lim, ls) print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)) (r1i, r5i, r10i, medri) = t2i(lim, ls) print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print('Saving...', end=' ') params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print('Done')
print("w1:", sess.run(w1)) print("w2:", sess.run(w2)) print("\n") # 训练模型。 STEPS = 50000 for i in range(STEPS): start = (i * batch_size) % 128 end = (i * batch_size) % 128 + batch_size xx = X[start:end] yy = Y[start:end] sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]}) if i % 1000 == 0: start_index = rdm.random_integers(low=0, high=120) print('start_index ==', start_index) total_cross_entropy = sess.run( cross_entropy, feed_dict={ x: X[start_index:start_index + batch_size], y_: Y[start_index:start_index + batch_size] }) print( "After %d training step(s), cross entropy on all data is %g" % (i, total_cross_entropy)) # 输出训练后的参数取值。 print("\n") print("w1:", sess.run(w1)) print("w2:", sess.run(w2))
def make_classification_data(num_examples=100, train_test_ratio=0.5, num_features=10, use_feature_hashing=False, feature_bins=4, num_labels=2, empty_labels=False, feature_prefix='f', class_weights=None, non_negative=False, one_string_feature=False, num_string_values=4, random_state=1234567890): # use sklearn's make_classification to generate the data for us num_numeric_features = (num_features - 1 if one_string_feature else num_features) X, y = make_classification(n_samples=num_examples, n_features=num_numeric_features, n_informative=num_numeric_features, n_redundant=0, n_classes=num_labels, weights=class_weights, random_state=random_state) # if we were told to only generate non-negative features, then # we can simply take the absolute values of the generated features if non_negative: X = abs(X) # since we want to use SKLL's FeatureSet class, we need to # create a list of IDs ids = ['EXAMPLE_{}'.format(n) for n in range(1, num_examples + 1)] # create a string feature that has four possible values # 'a', 'b', 'c' and 'd' and add it to X at the end if one_string_feature: prng = RandomState(random_state) random_indices = prng.random_integers(0, num_string_values - 1, num_examples) possible_values = [chr(x) for x in range(97, 97 + num_string_values)] string_feature_values = [possible_values[i] for i in random_indices] string_feature_column = np.array(string_feature_values, dtype=object).reshape(100, 1) X = np.append(X, string_feature_column, 1) # create a list of dictionaries as the features feature_names = ['{}{:02d}'.format(feature_prefix, n) for n in range(1, num_features + 1)] features = [dict(zip(feature_names, row)) for row in X] # split everything into training and testing portions num_train_examples = int(round(train_test_ratio * num_examples)) train_features, test_features = (features[:num_train_examples], features[num_train_examples:]) train_y, test_y = y[:num_train_examples], y[num_train_examples:] train_ids, test_ids = ids[:num_train_examples], ids[num_train_examples:] # are we told to generate empty labels train_labels = None if empty_labels else train_y test_labels = None if empty_labels else test_y # create a FeatureHasher if we are asked to use feature hashing # with the specified number of feature bins vectorizer = (FeatureHasher(n_features=feature_bins) if use_feature_hashing else None) train_fs = FeatureSet('classification_train', train_ids, labels=train_labels, features=train_features, vectorizer=vectorizer) if train_test_ratio < 1.0: test_fs = FeatureSet('classification_test', test_ids, labels=test_labels, features=test_features, vectorizer=vectorizer) else: test_fs = None return (train_fs, test_fs)
def trainer(train, dev, # training and development tuples dim=1000, # embedding dimensionality dim_im=4096, # image dimensionality dim_s=4800, # sentence dimensionality margin=0.2, # margin for pairwise ranking ncon=50, # number of contrastive terms max_epochs=15, lrate=0.01, # not needed with Adam dispFreq=10, optimizer='adam', batch_size = 100, valid_batch_size = 100, saveto='/ais/gobi3/u/rkiros/ssg/models/cocorank1000_combine.npz', validFreq=500, saveFreq=500, reload_=False): # Model options model_options = {} model_options['dim'] = dim model_options['dim_im'] = dim_im model_options['dim_s'] = dim_s model_options['margin'] = margin model_options['ncon'] = ncon model_options['max_epochs'] = max_epochs model_options['lrate'] = lrate model_options['dispFreq'] = dispFreq model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['valid_batch_size'] = valid_batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['saveFreq'] = saveFreq model_options['reload_'] = reload_ model_options = validate_options(model_options) print model_options # reload options if reload_ and os.path.exists(saveto): print "Reloading options" with open('%s.pkl'%saveto, 'rb') as f: model_options = pkl.load(f) print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): print "Reloading model" params = load_params(saveto, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print 'Building encoder' inps_e, lim, ls = build_encoder(tparams, model_options) print 'Building functions' f_cost = theano.function(inps, -cost, profile=False) f_emb = theano.function(inps_e, [lim, ls], profile=False) # gradient computation print 'Computing gradients' grads = tensor.grad(cost, wrt=itemlist(tparams)) lr = tensor.scalar(name='lr') f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' uidx = 0 estop = False start = 1234 seed = 1234 inds = numpy.arange(len(train[0])) numbatches = len(inds) / batch_size curr = 0 counter = 0 target=None history_errs = [] # Main loop for eidx in range(max_epochs): tic = time.time() prng = RandomState(seed - eidx - 1) prng.shuffle(inds) for minibatch in range(numbatches): uidx += 1 conprng_im = RandomState(seed + uidx + 1) conprng_s = RandomState(2*seed + uidx + 1) im = train[1][inds[minibatch::numbatches]] s = train[2][inds[minibatch::numbatches]] cinds_im = conprng_im.random_integers(low=0, high=len(train[0])-1, size=ncon * len(im)) cinds_s = conprng_s.random_integers(low=0, high=len(train[0])-1, size=ncon * len(s)) cim = train[1][cinds_im] cs = train[2][cinds_s] ud_start = time.time() cost = f_grad_shared(im, s, cim, cs) f_update(lrate) ud_duration = time.time() - ud_start if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud_duration if numpy.mod(uidx, validFreq) == 0: print 'Computing ranks...' lim, ls = f_emb(dev[1], dev[2]) (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print 'Saving...', params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done'
class MobilityGenerator(object): """ Generates intermodal mobility for SUMO starting from a synthetic population. """ _conf = None _profiling = None _random_generator = None _sumo_network = None _sumo_parkings = collections.defaultdict(list) _parking_cache = dict() _parking_position = dict() _taz_weights = dict() _buildings_by_taz = dict() _edges_by_taz = dict() _blacklisted_edges = set() _all_trips = collections.defaultdict(dict) def __init__(self, conf, profiling=False): """ Initialize the synthetic population. :param conf: distionary with the configurations :param profiling=False: enable cProfile """ self._conf = conf self._profiling = profiling self._random_generator = RandomState(seed=self._conf['seed']) logging.info('Starting TraCI with file %s.', conf['sumocfg']) sumocfg = '{}/{}'.format(BASE_DIR, conf['sumocfg']) traci.start(['sumo', '-c', sumocfg]) logging.info('Loading SUMO net file %s%s', BASE_DIR, conf['SUMOnetFile']) self._sumo_network = sumolib.net.readNet( '{}/{}'.format(BASE_DIR, conf['SUMOnetFile'])) logging.info('Loading SUMO parking lots from file %s%s', BASE_DIR, conf['SUMOadditionals']['parkings']) self._load_parkings('{}/{}'.format(BASE_DIR, conf['SUMOadditionals']['parkings'])) logging.info('Loading TAZ weights from %s%s', BASE_DIR, conf['population']['tazWeights']) self._load_weights_from_csv( '{}/{}'.format(BASE_DIR, conf['population']['tazWeights'])) logging.info('Loading buildings weights from %s%s', BASE_DIR, conf['population']['buildingsWeight']) self._load_buildings_weight_from_csv_dir( '{}/{}'.format(BASE_DIR, conf['population']['buildingsWeight'])) logging.info('Loading edges in each TAZ from %s%s', BASE_DIR, conf['population']['tazDefinition']) self._load_edges_from_taz( '{}/{}'.format(BASE_DIR, conf['population']['tazDefinition'])) logging.info('Computing the number of entities for each mobility slice..') self._compute_entities_per_slice() def mobility_generation(self): """ Generate the mobility for the synthetic population. """ logging.info('Generating trips for each mobility slice..') self._compute_trips_per_slice() def save_mobility(self): """ Save the generated trips to files. """ logging.info('Saving trips files..') self._saving_trips_to_files() @staticmethod def close_traci(): """ Artefact to close TraCI properly. """ logging.info('Closing TraCI.') traci.close() ## ---------------------------------------------------------------------------------------- ## ## Loaders ## ## ---------------------------------------------------------------------------------------- ## def _load_parkings(self, filename): """ Load parkings ids from XML file. """ xml_tree = xml.etree.ElementTree.parse(filename).getroot() for child in xml_tree: if (child.tag == 'parkingArea' and child.attrib['id'] in self._conf['intermodalOptions']['parkingAreaWhitelist']): edge = child.attrib['lane'].split('_')[0] position = float(child.attrib['startPos']) + 2.5 self._sumo_parkings[edge].append(child.attrib['id']) self._parking_position[child.attrib['id']] = position def _load_weights_from_csv(self, filename): """ Load the TAZ weight from a CSV file. """ with open(filename, 'r') as csvfile: weightreader = csv.reader(csvfile) header = None for row in weightreader: if not header: header = row else: self._taz_weights[int(row[0])] = { header[0]: int(row[0]), header[1]: row[1], header[2]: int(row[2]), header[3]: float(row[3]), 'weight': (int(row[2])/float(row[3])), } def _load_buildings_weight_from_csv_dir(self, directory): """ Load the buildings weight from multiple CSV files. """ allfiles = [os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] for filename in sorted(allfiles): logging.debug('Loding %s', filename) with open(filename, 'r') as csvfile: weightreader = csv.reader(csvfile) header = None taz = None buildings = [] for row in weightreader: if not header: header = row else: taz = row[0] buildings.append((float(row[3]), # weight row[4], # generic edge row[5])) # pedestrian edge if len(buildings) < 10: logging.debug('Dropping %s, only %d buildings found.', filename, len(buildings)) continue weighted_buildings = [] cum_sum = 0.0 for weight, g_edge, p_edge in sorted(buildings): cum_sum += weight weighted_buildings.append((cum_sum, g_edge, p_edge, weight)) self._buildings_by_taz[taz] = weighted_buildings def _load_edges_from_taz(self, filename): """ Load edges from the TAZ file. """ xml_tree = xml.etree.ElementTree.parse(filename).getroot() for child in xml_tree: if child.tag == 'taz': self._edges_by_taz[child.attrib['id']] = child.attrib['edges'].split(' ') ## ---------------------------------------------------------------------------------------- ## ## Mobility Generation ## ## ---------------------------------------------------------------------------------------- ## def _compute_entities_per_slice(self): """ Compute the absolute number of entities that are going to be created for each moblitiy slice, given a population. """ logging.info('Population: %d', self._conf['population']['entities']) for m_slice in self._conf['distribution'].keys(): self._conf['distribution'][m_slice]['tot'] = int( self._conf['population']['entities'] * self._conf['distribution'][m_slice]['perc']) logging.info('\t %s: %d', m_slice, self._conf['distribution'][m_slice]['tot']) def _compute_trips_per_slice(self): """ Compute the trips for the synthetic population for each mobility slice. """ total = 0 for name, m_slice in self._conf['distribution'].items(): logging.info('[%s] Computing %d trips from %s to %s ... ', name, m_slice['tot'], m_slice['loc_origin'], m_slice['loc_primary']) ## Activity chains preparation activity_chains = [] activity_chains_weights = [] for _weight, _chain, _modes in m_slice['activityChains']: activity_chains.append((_chain, _modes)) activity_chains_weights.append(_weight) activity_index = [i for i in range(len(activity_chains))] if self._profiling: _pr = cProfile.Profile() _pr.enable() for entity_id in tqdm(range(m_slice['tot'])): ## Select the activity chain _index = self._random_generator.choice( activity_index, p=activity_chains_weights) _chain, _modes = activity_chains[_index] logging.debug('Chain: %s', '{}'.format(_chain)) logging.debug('Modes: %s', '{}'.format(_modes)) _person_trip = None # (Intermodal) trip _final_chain = None _stages = None _error_counter = 0 while not _person_trip: try: _final_chain, _stages = self._generate_trip_traci( self._conf['taz'][m_slice['loc_origin']], self._conf['taz'][m_slice['loc_primary']], _chain, _modes) ## Generating departure time _depart = numpy.round(_final_chain[1]['start'], decimals=2) if _depart not in self._all_trips[name].keys(): self._all_trips[name][_depart] = [] ## fix the last stop with 1.0 duration if _stages[-1].stageType == tc.STAGE_WAITING: _stages[-1] = _stages[-1]._replace(travelTime=1.0) _stages[-1] = _stages[-1]._replace(cost=1.0) ## fix the last ride with cost = 1.0 on order to fix the last stop _pos = len(_stages) - 1 while _pos >= 0: if _stages[_pos].stageType == tc.STAGE_DRIVING: if not _stages[_pos].destStop: _stages[_pos] = _stages[_pos]._replace(travelTime=1.0) _stages[_pos] = _stages[_pos]._replace(cost=1.0) break _pos -= 1 _person_trip = { 'id': '{}_{}'.format(name, entity_id), 'depart': _depart, # 'from': _from, # 'to': _to, # 'type': v_type, # 'mode': modes, # 'withParking': with_parking, # 'PLid': parking_id, 'stages': _stages, } complete_trip = self._generate_sumo_trip_from_activitygen(_person_trip) _person_trip['string'] = complete_trip except Error: _person_trip = None _error_counter += 1 if _error_counter % 10 == 0: logging.error( '_generate_trip_traci from %s to %s generated %d errors' ' and counting..', self._conf['taz'][m_slice['loc_origin']], self._conf['taz'][m_slice['loc_primary']], _error_counter) # Trip creation self._all_trips[name][_depart].append(_person_trip) total += 1 if self._profiling: _pr.disable() _s = io.StringIO() _ps = pstats.Stats(_pr, stream=_s).sort_stats('cumulative') _ps.print_stats(10) print(_s.getvalue()) input("Press any key to continue..") logging.info('Generated %d trips.', total) ## ---- PARKING AREAS: location and selection ---- ## def _check_parkings_cache(self, edge): """ Check among the previously computed results of _find_closest_parking """ if edge in self._parking_cache.keys(): return self._parking_cache[edge] return None def _find_closest_parking(self, edge): """ Given and edge, find the closest parking area. """ distance = sys.float_info.max ret = self._check_parkings_cache(edge) if ret: return ret p_id = None for p_edge, parkings in self._sumo_parkings.items(): for parking in parkings: if parking in self._conf['intermodalOptions']['parkingAreaWhitelist']: p_id = parking break if p_id: try: route = traci.simulation.findIntermodalRoute( p_edge, edge, pType="pedestrian") except traci.exceptions.TraCIException: route = None if route: cost = self._cost_from_route(route) if distance > cost: distance = cost ret = p_id, p_edge, route if ret: self._parking_cache[edge] = ret return ret logging.fatal('Edge %s is not reachable from any parking lot.', edge) self._blacklisted_edges.add(edge) return None, None, None ## ---- Functions for _compute_trips_per_slice: _generate_trip_traci ---- ## def _generate_trip_traci(self, from_area, to_area, activity_chain, modes): """ Returns the trip for the given activity chain. """ trip = None person_stages = self._generate_person_stages(from_area, to_area, activity_chain, modes[0]) solutions = [] for mode in modes: _person_steps = [] _new_start_time = None _mode, _ptype, _vtype = self._get_mode_parameters(mode) for pos, stage in person_stages.items(): # findIntermodalRoute(self, fromEdge, toEdge, modes='', depart=-1.0, # routingMode=0, speed=-1.0, walkFactor=-1.0, # departPos=0.0, arrivalPos=-1073741824, departPosLat=0.0, # pType='', vType='', destStop='') if not _new_start_time: _new_start_time = stage['start'] route = None ## If the vtype is among the one that require parking, and we are not going home, # look for a parking and build the additional walk back and forth. if (stage['activity'] != 'Home' and _vtype in self._conf['intermodalOptions']['vehicleAllowedParking']): ## find parking p_id, p_edge, _last_mile = self._find_closest_parking(stage['to']) if _last_mile: route = traci.simulation.findIntermodalRoute( stage['from'], p_edge, depart=_new_start_time, walkFactor=.9, modes=_mode, pType=_ptype, vType=_vtype) if (self._is_valid_route(mode, route) and route[-1].stageType == tc.STAGE_DRIVING): route[-1] = route[-1]._replace(destStop=p_id) route[-1] = route[-1]._replace(arrivalPos=self._parking_position[p_id]) route.extend(_last_mile) else: route = None if route: ## build the waiting to destination (if required) if stage['duration']: wait = self._generate_waiting_stage(stage) route.append(wait) ## build the walk back to the parking walk_back = traci.simulation.findIntermodalRoute( stage['to'], p_edge, walkFactor=.9, pType="pedestrian") walk_back = walk_back[0]._replace(arrivalPos=self._parking_position[p_id]) route.append(walk_back) ## update the next stage to make it start from the parking if pos + 1 in person_stages: person_stages[pos+1]['from'] = p_edge else: ## PUBLIC, ON-DEMAND, trip to HOME, and NO-PARKING required vehicles. route = traci.simulation.findIntermodalRoute( stage['from'], stage['to'], depart=_new_start_time, walkFactor=.9, modes=_mode, pType=_ptype, vType=_vtype) if not self._is_valid_route(mode, route): route = None ## Add stop if route and stage['duration']: route.append(self._generate_waiting_stage(stage)) if not route: raise TripGenerationError( 'Route not found between {} and {}.'.format(stage['from'], stage['to'])) ## Add the stage to the full planned trip. for step in route: _new_start_time += step.travelTime _person_steps.append(step) ## Cost computation. solutions.append((self._cost_from_route(_person_steps), _person_steps)) for position, thingy in enumerate(_person_steps): if (thingy.stageType == tc.STAGE_DRIVING and thingy.line == '' and ## Not PUBLIC TRANSPORT thingy.edges[0] == thingy.edges[-1]): pprint.pprint(person_stages) pprint.pprint(_person_steps) print(position, thingy) sys.exit() ## Compose the final person trip. if solutions: ## TODO: pick and chose a winner among the different modes, # for the moment there is only one. trip = (person_stages, solutions[0][1]) # print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STEPS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") # for pos, step in enumerate(solutions[0][1]): # print(pos, step) # print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") else: raise TripGenerationError( 'No solution foud for chain {} and modes {}.'.format(person_stages, modes)) return trip @staticmethod def _generate_waiting_stage(stage): """ Builds a STAGE_WAITING type of stage compatible with findIntermodalRoute. """ wait = Stage( stageType=tc.STAGE_WAITING, description=stage['activity'], edges='{}_0'.format(stage['to']), travelTime=stage['duration'], cost=stage['duration']) return wait def _generate_person_stages(self, from_area, to_area, activity_chain, mode): """ Returns the trip for the given activity chain. """ ## Mode split: _mode, _ptype, _vtype = self._get_mode_parameters(mode) # Define a generic Home and Primary activity location. # The locations must be reachable in some ways. route = None from_edge = None to_edge = None while not route: ## Origin and Destination Selection from_edge, to_edge = self._select_pair(from_area, to_area) from_allowed = (self._sumo_network.getEdge(from_edge).allows('pedestrian') and self._sumo_network.getEdge(from_edge).allows('passenger')) to_allowed = (self._sumo_network.getEdge(to_edge).allows('pedestrian') and self._sumo_network.getEdge(to_edge).allows('passenger')) if self._valid_pair(from_edge, to_edge) and from_allowed and to_allowed: try: route = traci.simulation.findIntermodalRoute( from_edge, to_edge, modes=_mode, pType=_ptype, vType=_vtype) if not self._is_valid_route(mode, route): route = None except traci.exceptions.TraCIException: logging.debug('_generate_person_stages: findRoute FAILED.') route = None else: logging.debug('_generate_person_stages: unusable pair of edges.') ## Generate perliminary stages for a person person_stages = dict() for pos, activity in enumerate(activity_chain): if activity not in self._conf['activities']: raise Exception('Activity {} is not define in the config file.'.format(activity)) _start, _duration = self._get_timing_from_activity(activity) if pos == 0: if activity != 'Home': raise Exception("Every activity chain MUST start with 'Home'," " '{}' given.".format(activity)) ## Beginning person_stages[pos] = { 'activity': activity, 'from': from_edge, 'to': None, 'start': _start, 'duration': _duration, } elif 'P-' in activity: ## This is a primary activity person_stages[pos] = { 'activity': activity, 'from': None, 'to': to_edge, 'start': _start, 'duration': _duration, } elif 'S-' in activity: ## This is a secondary activity person_stages[pos] = { 'activity': activity, 'from': None, 'to': None, 'start': _start, 'duration': _duration, } elif activity == 'Home': ## End of the activity chain. person_stages[pos] = { 'activity': activity, 'from': None, 'to': from_edge, 'start': _start, 'duration': _duration, } if len(person_stages) <= 2: raise Exception("Invalid activity chain. (Minimal: H -> P-? -> H", activity_chain) ## Define secondary activity location for pos, stage in person_stages.items(): if 'S-' in stage['activity']: ## look for what is coming before _prec = None _pos = pos - 1 while not _prec and _pos in person_stages: if 'Home' in person_stages[_pos]['activity']: _prec = 'H' elif 'P-' in person_stages[_pos]['activity']: _prec = 'P' _pos -= 1 ## look for what is coming next _succ = None _pos = pos + 1 while not _succ and _pos in person_stages: if 'Home' in person_stages[_pos]['activity']: _succ = 'H' elif 'P-' in person_stages[_pos]['activity']: _succ = 'P' _pos += 1 destination = None if _prec == 'H' and _succ == 'H': destination = self._random_location_circle(center=from_edge, other=to_edge) elif _prec == 'P' and _succ == 'P': destination = self._random_location_circle(center=to_edge, other=from_edge) elif _prec != _succ: destination = self._random_location_ellipse(from_edge, to_edge) else: raise Exception("WTF", _prec, _succ) stage['to'] = destination ## Remove the initial 'Home' stage and update the from of the second stage. person_stages[1]['from'] = person_stages[0]['from'] is_start_to_fix = True if person_stages[0]['start']: is_start_to_fix = False person_stages[1]['start'] = person_stages[0]['start'] del person_stages[0] ## Fixing the 'from' field with a forward chain pos = 2 while pos in person_stages: person_stages[pos]['from'] = person_stages[pos-1]['to'] pos += 1 ## IF NECESSARY, compute the real starting time for the activity chain. # Find the first 'start' defined. if is_start_to_fix: pos = 1 while pos in person_stages: if person_stages[pos]['start']: break pos += 1 start = person_stages[pos]['start'] while pos in person_stages: ett = 500.0 try: ett = traci.simulation.findRoute( person_stages[pos]['from'], person_stages[pos]['to']).travelTime except traci.exceptions.TraCIException: pass if pos-1 in person_stages: if person_stages[pos-1]['duration']: ett += person_stages[pos-1]['duration'] start -= ett pos -= 1 person_stages[1]['start'] = start return person_stages def _random_location_circle(self, center, other): """ Return a random edge in within a radius (*) from the given center. (*) Uses the ellipses defined by the foci center and other, and the major axe of 1.30 * distance between the foci. """ length = None try: length = traci.simulation.findRoute(center, other).length except traci.exceptions.TraCIException: raise TripGenerationError('No route between {} and {}'.format(center, other)) major_axe = length * 1.3 minor_axe = numpy.sqrt(numpy.square(major_axe) - numpy.square(length)) radius = minor_axe / 2.0 edges = self._get_all_reachable_edges(center, radius) if not edges: raise TripGenerationError('No edges from {} with range {}.'.format(center, length)) ret = self._random_generator.choice(edges) allowed = (self._sumo_network.getEdge(ret).allows('pedestrian') and self._sumo_network.getEdge(ret).allows('passenger')) while edges and (ret == center or ret == other) and not allowed: edges.remove(ret) ret = self._random_generator.choice(edges) allowed = (self._sumo_network.getEdge(ret).allows('pedestrian') and self._sumo_network.getEdge(ret).allows('passenger')) if not edges: raise TripGenerationError( 'No valid edges from {} with range {}.'.format(center, length)) return ret def _random_location_ellipse(self, focus1, focus2): """ Return a random edge in within the ellipse defined by the foci, and the major axe of 1.30 * distance between the foci. """ length = None try: length = traci.simulation.findRoute(focus1, focus2).length logging.debug('%s --> %s : %.2f', focus1, focus2, length) except traci.exceptions.TraCIException: raise TripGenerationError('No route between {} and {}'.format(focus1, focus2)) major_axe = length * 1.3 edges = self._get_all_reachable_edges(focus1, length) while edges: edge = self._random_generator.choice(edges) edges.remove(edge) if edge == focus1 or edge == focus2: continue allowed = (self._sumo_network.getEdge(edge).allows('pedestrian') and self._sumo_network.getEdge(edge).allows('passenger')) if not allowed: continue try: first = traci.simulation.findRoute(focus1, edge).length second = traci.simulation.findRoute(edge, focus2).length if first + second <= major_axe: logging.debug('%s --> %s : %.2f', focus1, edge, first) logging.debug('%s --> %s : %.2f', edge, focus2, second) return edge except traci.exceptions.TraCIException: pass raise TripGenerationError( "No location available for _random_location_ellipse [{}, {}]".format(focus1, focus2)) def _get_all_reachable_edges(self, origin, distance): """ Returns all the edges reachable from the origin within the given radius. """ logging.debug('Computing all reachable edges from %s in a %.2f radius.', origin, distance) ### "BFS" with distance _edges_already_done = set() _nodes_already_done = set() _edges_to_evaluate = [(origin, 0.0)] _reachable_edges = set() while _edges_to_evaluate: _edge, _distance = _edges_to_evaluate.pop(0) _edges_already_done.add(_edge) # print(_edge, _distance, _edges_to_evaluate) #retrieve node from _from_node = self._sumo_network.getEdge(_edge).getFromNode() if _from_node.getID() not in _nodes_already_done: _nodes_already_done.add(_from_node.getID()) # if node from distance is smaller than the target, # add all the incoming edge to the queue if _distance < distance: _reachable_edges.add(_edge) #add all the incoming edges for _inc_edge in _from_node.getIncoming(): if (_inc_edge.allows('passenger') and _inc_edge.getID() not in _edges_already_done and _inc_edge.getID() not in _edges_to_evaluate): # print(_inc_edge.getID()) _edges_to_evaluate.append((_inc_edge.getID(), _distance + _inc_edge.getLength())) #retrieve node to _to_node = self._sumo_network.getEdge(_edge).getToNode() if _to_node.getID() not in _nodes_already_done: _nodes_already_done.add(_to_node.getID()) # if node to distance is smaller than the target, # add all the incoming edge to the queue if _distance < distance: _reachable_edges.add(_edge) #add all the outgoing edges for _out_edge in _to_node.getOutgoing(): if (_out_edge.allows('passenger') and _out_edge.getID() not in _edges_already_done and _out_edge.getID() not in _edges_to_evaluate): # print(_out_edge.getID()) _edges_to_evaluate.append((_out_edge.getID(), _distance + _out_edge.getLength())) # with open('test.edges.txt', 'w') as out: # for edge in _reachable_edges: # out.write('edge:{}\n'.format(edge)) # input("Check the edges!") return list(_reachable_edges) def _get_timing_from_activity(self, activity): """ Compute start and duration from the activity defined in the config file. """ start = None if self._conf['activities'][activity]['start']: start = self._random_generator.normal( loc=self._conf['activities'][activity]['start']['m'], scale=self._conf['activities'][activity]['start']['s']) if start < 0: return self._get_timing_from_activity(activity) duration = None if self._conf['activities'][activity]['duration']: duration = self._random_generator.normal( loc=self._conf['activities'][activity]['duration']['m'], scale=self._conf['activities'][activity]['duration']['s']) if duration <= 0: return self._get_timing_from_activity(activity) return start, duration ## ---- PAIR SELECTION: origin - destination - mode ---- ## def _select_pair(self, from_area, to_area, pedestrian=False): """ Randomly select one pair, chosing between buildings and TAZ. """ from_taz = str(self._select_taz_from_weighted_area(from_area)) to_taz = str(self._select_taz_from_weighted_area(to_area)) if from_taz in self._buildings_by_taz.keys() and to_taz in self._buildings_by_taz.keys(): return self._select_pair_from_taz_wbuildings( self._buildings_by_taz[from_taz][:], self._buildings_by_taz[to_taz][:], pedestrian) return self._select_pair_from_taz( self._edges_by_taz[from_taz][:], self._edges_by_taz[to_taz][:]) def _select_taz_from_weighted_area(self, area): """ Select a TAZ from an area using its weight. """ selection = self._random_generator.uniform(0, 1) total_weight = sum([self._taz_weights[taz]['weight'] for taz in area]) cumulative = 0.0 for taz in area: cumulative += self._taz_weights[taz]['weight'] / total_weight if selection <= cumulative: return taz return None # this is matematically impossible, # if this happens, there is a mistake in the weights. def _valid_pair(self, from_edge, to_edge): """ This is just to avoid a HUGE while condition. sumolib.net.edge.is_fringe() """ from_edge_sumo = self._sumo_network.getEdge(from_edge) to_edge_sumo = self._sumo_network.getEdge(to_edge) if from_edge_sumo.is_fringe(from_edge_sumo.getOutgoing()): return False if to_edge_sumo.is_fringe(to_edge_sumo.getIncoming()): return False if from_edge == to_edge: return False if to_edge in self._blacklisted_edges: return False if not to_edge_sumo.allows('pedestrian'): return False return True def _select_pair_from_taz(self, from_taz, to_taz): """ Randomly select one pair from a TAZ. Important: from_taz and to_taz MUST be passed by copy. Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions. """ from_edge = from_taz.pop( self._random_generator.random_integers(0, len(from_taz) - 1)) to_edge = to_taz.pop( self._random_generator.random_integers(0, len(to_taz) - 1)) _to = False while not self._valid_pair(from_edge, to_edge) and from_taz and to_taz: if not self._sumo_network.getEdge(to_edge).allows('pedestrian') or _to: to_edge = to_taz.pop( self._random_generator.random_integers(0, len(to_taz) - 1)) _to = False else: from_edge = from_taz.pop( self._random_generator.random_integers(0, len(from_taz) - 1)) _to = True return from_edge, to_edge def _select_pair_from_taz_wbuildings(self, from_buildings, to_buildings, pedestrian): """ Randomly select one pair from a TAZ. Important: from_buildings and to_buildings MUST be passed by copy. Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions. """ from_edge, _index = self._get_weighted_edge( from_buildings, self._random_generator.random_sample(), False) del from_buildings[_index] to_edge, _index = self._get_weighted_edge( to_buildings, self._random_generator.random_sample(), pedestrian) del to_buildings[_index] _to = True while not self._valid_pair(from_edge, to_edge) and from_buildings and to_buildings: if not self._sumo_network.getEdge(to_edge).allows('pedestrian') or _to: to_edge, _index = self._get_weighted_edge( to_buildings, self._random_generator.random_sample(), pedestrian) del to_buildings[_index] _to = False else: from_edge, _index = self._get_weighted_edge( from_buildings, self._random_generator.random_sample(), False) del from_buildings[_index] _to = True return from_edge, to_edge @staticmethod def _get_weighted_edge(edges, double, pedestrian): """ Return an edge and its position using the cumulative sum of the weigths in the area. """ pos = -1 ret = None for cum_sum, g_edge, p_edge, _ in edges: if ret and cum_sum > double: return ret, pos if pedestrian and p_edge: ret = p_edge elif not pedestrian and g_edge: ret = g_edge elif g_edge: ret = g_edge else: ret = p_edge pos += 1 return edges[-1][1], len(edges) - 1 ## ---- INTERMODAL: modes and route validity ---- ## @staticmethod def _get_mode_parameters(mode): """ Return the correst TraCI parameters for the requested mode. Parameters: _mode, _ptype, _vtype """ if mode == 'public': return 'public', '', '' elif mode == 'bicycle': return 'bicycle', '', 'bicycle' elif mode == 'walk': return '', 'pedestrian', '' return '', '', mode # 'car', '', mode (but car is not really necessary, # cause it creates unusable alternatives) def _is_valid_route(self, mode, route): """ Handle findIntermodalRoute results. """ if route is None: # traci failed return False _mode, _ptype, _vtype = self._get_mode_parameters(mode) if _mode is None: # only for findRoute if len(route.edges) >= 2: return True elif _mode == 'public': for stage in route: if stage.line: return True elif mode == 'car': for stage in route: if stage.stageType == tc.STAGE_DRIVING and len(stage.edges) >= 2: return True else: for stage in route: if len(stage.edges) >= 2: return True return False @staticmethod def _cost_from_route(route): """ Compute the route cost. """ cost = 0.0 for stage in route: cost += stage.cost return cost ## ---------------------------------------------------------------------------------------- ## ## Saving trips to files ## ## ---------------------------------------------------------------------------------------- ## ROUTES_TPL = """<?xml version="1.0" encoding="UTF-8"?> <!-- SUMO Activity-Based Mobility Generator Copyright (c) 2019 Lara CODECA - EURECOM This program and the accompanying materials are made available under the terms of the Eclipse Public License 2.0 which is available at http://www.eclipse.org/legal/epl-2.0. --> <routes xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://sumo.dlr.de/xsd/routes_file.xsd"> {trips} </routes>""" VEHICLE = """ <vehicle id="{id}" type="{v_type}" depart="{depart}" departLane="best" arrivalPos="{arrival}">{route}{stop} </vehicle>""" ROUTE = """ <route edges="{edges}"/>""" STOP_PARKING_TRIGGERED = """ <stop parkingArea="{id}" triggered="true" expected="{person}"/>""" STOP_EDGE_TRIGGERED = """ <stop lane="{lane}" parking="true" triggered="true" expected="{person}"/>""" ONDEMAND_TRIGGERED = """ <stop lane="{lane}" parking="true" duration="1.0"/>""" FINAL_STOP = """ <stop lane="{lane}" duration="1.0"/>""" PERSON = """ <person id="{id}" type="pedestrian" depart="{depart}">{stages} </person>""" WAIT = """ <stop lane="{lane}" duration="{duration}" actType="{action}"/>""" WALK = """ <walk edges="{edges}"/>""" WALK_W_ARRIVAL = """ <walk edges="{edges}" arrivalPos="{arrival}"/>""" WALK_BUS = """ <walk edges="{edges}" busStop="{busStop}"/>""" RIDE_BUS = """ <ride busStop="{busStop}" lines="{lines}" intended="{intended}" depart="{depart}"/>""" RIDE_TRIGGERED = """ <ride from="{from_edge}" to="{to_edge}" lines="{vehicle_id}"/>""" VEHICLE_TRIGGERED = """ <vehicle id="{id}" type="{v_type}" depart="triggered" departLane="best" arrivalPos="{arrival}">{route}{stops} </vehicle>""" def _get_stopping_lane(self, edge): """ Returns the vehicle-friendly stopping lange closer to the sidewalk. """ for lane in self._sumo_network.getEdge(edge).getLanes(): if lane.allows('passenger'): return lane.getID() raise TripGenerationError("'passenger' cannot stop on edge {}".format(edge)) def _generate_sumo_trip_from_activitygen(self, person): """ Generate the XML string for SUMO route file from a person-trip. """ complete_trip = '' triggered = '' _triggered_counter = 0 ## to be used with on-demand vehicles _tr_id = '{}_tr'.format(person['id']) _triggered_vtype = '' _triggered_route = [] _triggered_stops = '' stages = '' for stage in person['stages']: if stage.stageType == tc.STAGE_WAITING: stages += self.WAIT.format(lane=stage.edges, duration=stage.travelTime, action=stage.description) elif stage.stageType == tc.STAGE_WALKING: if stage.destStop: stages += self.WALK_BUS.format( edges=' '.join(stage.edges), busStop=stage.destStop) else: if stage.arrivalPos: stages += self.WALK_W_ARRIVAL.format( edges=' '.join(stage.edges), arrival=stage.arrivalPos) else: stages += self.WALK.format(edges=' '.join(stage.edges)) elif stage.stageType == tc.STAGE_DRIVING: if stage.line != stage.intended: # intended is the transport id, so it must be different stages += self.RIDE_BUS.format( busStop=stage.destStop, lines=stage.line, intended=stage.intended, depart=stage.depart) else: # triggered vehicle (line = intended) _ride_id = None if stage.intended == 'on-demand': ## generate a new vehicle _triggered_counter += 1 ## I don't want to start from 0 _ride_id = '{}_{}_od'.format(person['id'], _triggered_counter) _route = self.ROUTE.format(edges=' '.join(stage.edges)) _vtype = stage.vType _stop = '' if stage.travelTime == 1.0: _stop = self.FINAL_STOP.format( lane=self._get_stopping_lane(stage.edges[-1])) else: _stop = self.ONDEMAND_TRIGGERED.format( lane=self._get_stopping_lane(stage.edges[-1])) triggered += self.VEHICLE_TRIGGERED.format( id=_ride_id, v_type=_vtype, route=_route, stops=_stop, arrival='random') else: ## add to the existing one _ride_id = _tr_id if _triggered_route: ## check for contiguity if _triggered_route[-1] != stage.edges[0]: raise TripGenerationError('Triggered vehicle has a broken route.') else: ## remove the duplicated edge _triggered_route.extend(stage.edges[1:]) else: ## nothing to be "fixed" _triggered_route.extend(stage.edges) _triggered_vtype = stage.vType _stop = '' # print(stage.travelTime, stage.destStop) if stage.travelTime == 1.0: # print('final stop') _stop = self.FINAL_STOP.format( lane=self._get_stopping_lane(stage.edges[-1])) else: if stage.destStop: # print('parking') _stop = self.STOP_PARKING_TRIGGERED.format( id=stage.destStop, person=person['id']) else: # print('side edge') _stop = self.STOP_EDGE_TRIGGERED.format( lane=self._get_stopping_lane(stage.edges[-1]), person=person['id']) _triggered_stops += _stop stages += self.RIDE_TRIGGERED.format( from_edge=stage.edges[0], to_edge=stage.edges[-1], vehicle_id=_ride_id) ## fixing the personal triggered vehicles if _triggered_route: _route = self.ROUTE.format(edges=' '.join(_triggered_route)) triggered += self.VEHICLE_TRIGGERED.format( id=_tr_id, v_type=_triggered_vtype, route=_route, stops=_triggered_stops, arrival='random') ## result complete_trip += triggered complete_trip += self.PERSON.format( id=person['id'], depart=person['depart'], stages=stages) return complete_trip def _saving_trips_to_files(self): """ Saving all te trips to files divided by slice. """ for name, dict_trips in self._all_trips.items(): filename = '{}/{}{}.rou.xml'.format(BASE_DIR, self._conf['outputPrefix'], name) with open(filename, 'w') as tripfile: all_trips = '' for time in sorted(dict_trips.keys()): for person in dict_trips[time]: all_trips += person['string'] tripfile.write(self.ROUTES_TPL.format(trips=all_trips)) logging.info('Saved %s', filename)
# SET UP TASK ENVIRONMENTS # ############################ # Define environments environments = dict() # ENVIRONMENT 1 rng = RandomState(123) features = np.zeros((3, 21 * 10)) features[1, rng.randint(0, 21 * 10, 100)] = 1 features[0, 35:39] = 1 features[0, 45:49] = 1 features[0, 55:59] = 1 # Reward features[2, rng.random_integers(0, 209, 15)] = 1 testMDP1 = HexGridMDP(features, (21, 10)) testAgent1 = Agent('Predator_1', [1, 0, 0, 0, 0], position=200, solver_kwargs={'discount': 0.9, 'tol': 1e-4}) testAgent2 = Agent('Prey_1', [0, 0, 1, 0, 0], position=198, solver_kwargs={'discount': 0.9, 'tol': 1e-4}) testEnvironment1 = HexEnvironment(testMDP1, [testAgent1, testAgent2]) environments['env_0'] = testEnvironment1 # ENVIRONMENT 2 rng = RandomState(123) features = np.zeros((3, 21 * 10)) features[1, rng.randint(0, 21 * 10, 100)] = 1 features[0, 10:20] = 1 features[0, 20:30] = 1 features[0, 30:40] = 1
dview.execute('import numpy as np') dview['MAX_MEMORY_SIZE'] = MAX_MEMORY_SIZE dview['wrapper'] = wrapper dview['adf_simulation'] = adf_simulation lview = rc.load_balanced_view() trends = ('nc', 'c', 'ct', 'ctt') T = array( (20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, 2000)) T = T[::-1] m = T.shape[0] percentiles = list(arange(0.5, 100.0, 0.5)) rng = RandomState(0) seeds = rng.random_integers(0, 2 ** 31 - 2, size=EX_NUM) for tr in trends: results = zeros((len(percentiles), len(T), EX_NUM)) * nan filename = 'adf_z_' + tr + '.npz' for i in range(EX_NUM): print("Experiment Number {0} for Trend {1}".format(i + 1, tr)) # Non parallel version # out = lmap(wrapper, T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m)) now = datetime.datetime.now() out = lview.map_sync(wrapper, T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m) # Prevent unnecessary results from accumulating
def trainer(train, valid, test, n_chars=33, img_w=128, max_len=27, feature_maps=100, filter_hs=[2, 3, 4], max_epochs=20, gamma=10, ncon=50, lrate=0.0002, batch_size=100, dispFreq=10, validFreq=100, saveto='example.npz'): """ train, valid, test : datasets n_chars : vocabulary size img_w : character embedding dimension. max_len : the maximum length of a sentence feature_maps : the number of feature maps we used filter_hs: the filter window sizes we used max_epochs : The maximum number of epoch to run gamma: hyper-parameter using in ranking ncon: the number of negative samples we used for each postive sample lrate : learning rate batch_size : batch size during training dispFreq : Display to stdout the training progress every N updates validFreq : Compute the validation rank score after this number of update. saveto: where to save the result. """ img_h = max_len + 2 * (filter_hs[-1] - 1) model_options = {} model_options['n_chars'] = n_chars model_options['img_w'] = img_w model_options['img_h'] = img_h model_options['feature_maps'] = feature_maps model_options['filter_hs'] = filter_hs model_options['max_epochs'] = max_epochs model_options['gamma'] = gamma model_options['ncon'] = ncon model_options['lrate'] = lrate model_options['batch_size'] = batch_size model_options['dispFreq'] = dispFreq model_options['validFreq'] = validFreq model_options['saveto'] = saveto logger.info('Model options {}'.format(model_options)) logger.info('Building model...') filter_w = img_w filter_shapes = [] pool_sizes = [] for filter_h in filter_hs: filter_shapes.append((feature_maps, 1, filter_h, filter_w)) pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1)) model_options['filter_shapes'] = filter_shapes model_options['pool_sizes'] = pool_sizes params = init_params(model_options) tparams = init_tparams(params) use_noise, inps, cost = build_model(tparams, model_options) logger.info('Building encoder...') inps_e, feat_x, feat_y = build_encoder(tparams, model_options) logger.info('Building functions...') f_emb = theano.function(inps_e, [feat_x, feat_y], name='f_emb') lr = tensor.scalar(name='lr') f_grad_shared, f_update = Adam(tparams, cost, inps, lr) logger.info('Training model...') uidx = 0 seed = 1234 curr = 0 history_errs = [] valid_x = prepare_data(valid[0], max_len, n_chars, filter_hs[-1]) valid_y = prepare_data(valid[1], max_len, n_chars, filter_hs[-1]) test_x = prepare_data(test[0], max_len, n_chars, filter_hs[-1]) test_y = prepare_data(test[1], max_len, n_chars, filter_hs[-1]) zero_vec_tensor = tensor.vector() zero_vec = np.zeros(img_w).astype(theano.config.floatX) set_zero = theano.function([zero_vec_tensor], updates=[(tparams['Wemb'], tensor.set_subtensor( tparams['Wemb'][n_chars - 1, :], zero_vec_tensor))]) # Main loop for eidx in range(max_epochs): prng = RandomState(seed - eidx - 1) trainA = train[0] trainB = train[1] num_samples = len(trainA) inds = np.arange(num_samples) prng.shuffle(inds) numbatches = len(inds) / batch_size for minibatch in range(numbatches): use_noise.set_value(0.) uidx += 1 conprng = RandomState(seed + uidx + 1) x = [trainA[seq] for seq in inds[minibatch::numbatches]] y = [trainB[seq] for seq in inds[minibatch::numbatches]] cinds = conprng.random_integers(low=0, high=num_samples - 1, size=ncon * len(x)) cy = [trainB[seq] for seq in cinds] x = prepare_data(x, max_len, n_chars, filter_hs[-1]) y = prepare_data(y, max_len, n_chars, filter_hs[-1]) cy = prepare_data(cy, max_len, n_chars, filter_hs[-1]) cost = f_grad_shared(x, y, cy) f_update(lrate) # the special token does not need to update. set_zero(zero_vec) if np.mod(uidx, dispFreq) == 0: logger.info('Epoch {} Update {} Cost {}'.format( eidx, uidx, cost)) if np.mod(uidx, validFreq) == 0: use_noise.set_value(0.) logger.info('Computing ranks...') feats_x, feats_y = f_emb(valid_x, valid_y) (r1, r3, r10, medr, meanr, h_meanr) = rank(feats_x, feats_y) history_errs.append([r1, r3, r10, medr, meanr, h_meanr]) logger.info('Valid Rank:{}, {}, {}, {},{},{}'.format( r1, r3, r10, medr, meanr, h_meanr)) currscore = r1 + r3 + r10 if currscore > curr: curr = currscore logger.info('Saving...') params = unzip(tparams) np.savez(saveto, history_errs=history_errs, **params) logger.info('Done...') use_noise.set_value(0.) zipp(params, tparams) logger.info('Final results...') feats_x, feats_y = f_emb(valid_x, valid_y) (r1, r3, r10, medr, meanr, h_meanr) = rank(feats_x, feats_y) logger.info('Valid Rank:{}, {}, {}, {},{},{}'.format( r1, r3, r10, medr, meanr, h_meanr)) feats_x, feats_y = f_emb(test_x, test_y) (r1, r3, r10, medr, meanr, h_meanr) = rank(feats_x, feats_y) logger.info('Test Rank:{}, {}, {}, {},{},{}'.format( r1, r3, r10, medr, meanr, h_meanr)) # np.savez("./cnn_feats.npz", feats_x=feats_x, feats_y=feats_y) return (r1, r3, r10, medr, meanr, h_meanr)