def test_not_destructive(self): # Checks that manipulating a cloned graph leaves the original unchanged. r1, r2, r5 = MyVariable(1), MyVariable(2), MyVariable(5) node = MyOp.make_node(MyOp.make_node(r1, r2).outputs[0], r5) _, new = clone([r1, r2, r5], node.outputs, False) new_node = new[0].owner new_node.inputs = MyVariable(7), MyVariable(8) assert self.str(inputs(new_node.outputs), new_node.outputs) == ["MyOp(R7, R8)"] assert self.str(inputs(node.outputs), node.outputs) == ["MyOp(MyOp(R1, R2), R5)"]
def _get_variables(self): """Collect variables, updates and auxiliary variables. In addition collects all :class:`.Scan` ops and recurses in the respective inner Theano graphs. """ updates = OrderedDict() shared_outputs = [o for o in self.outputs if is_shared_variable(o)] usual_outputs = [o for o in self.outputs if not is_shared_variable(o)] variables = shared_outputs if usual_outputs: # Sort apply nodes topologically, get variables and remove # duplicates inputs = graph.inputs(self.outputs) sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs) self.scans = list(unique([node.op for node in sorted_apply_nodes if isinstance(node.op, Scan)], key=lambda op: id(op))) self._scan_graphs = [ComputationGraph(scan.outputs) for scan in self.scans] seen = set() main_vars = ( [var for var in list(chain( *[apply_node.inputs for apply_node in sorted_apply_nodes])) if not (var in seen or seen.add(var))] + [var for var in self.outputs if var not in seen]) # While preserving order add auxiliary variables, and collect # updates seen = set() # Intermediate variables could be auxiliary seen_avs = set(main_vars) variables = [] for var in main_vars: variables.append(var) for annotation in getattr(var.tag, 'annotations', []): if annotation not in seen: seen.add(annotation) new_avs = [ av for av in annotation.auxiliary_variables if not (av in seen_avs or seen_avs.add(av))] variables.extend(new_avs) updates = dict_union(updates, annotation.updates) # If shared_variables is assigned default_update (cloned), we cannot eval() # it to get the real numpy array value, hence, try to trace back # original shared variable def shared_variable_filter(var): if is_shared_variable(var) and hasattr(var, 'default_update'): for annotation in var.tag.annotations: if hasattr(annotation, var.name) and \ is_shared_variable(getattr(annotation, var.name)): return getattr(annotation, var.name) return var self.variables = map(shared_variable_filter, variables) self.updates = updates
def check_parameter(name, value): parameters = set() constants = set() observeds = set() if isinstance(value, SharedVariable): parameters.add(value) elif isinstance(value, T.TensorConstant): constants.add(value) elif isinstance(value, T.TensorVariable): inputs = graph.inputs([value]) for var in inputs: if isinstance(var, SharedVariable): parameters.add(var) elif isinstance(var, T.TensorConstant): constants.add(var) elif isinstance(var, T.TensorVariable): if not var.name: raise ValueError("Observed variables must be named.") observeds.add(var) else: # XXX allow for lists and convert them to ndarray if isinstance(value, np.ndarray): value = theano.shared(value, name=name) else: value = theano.shared(float(value), name=name) parameters.add(value) return value, parameters, constants, observeds
def _get_variables(self): """Collect variables, updates and auxiliary variables. In addition collects all :class:`.Scan` ops and recurses in the respective inner Theano graphs. """ updates = OrderedDict() shared_outputs = [o for o in self.outputs if is_shared_variable(o)] usual_outputs = [o for o in self.outputs if not is_shared_variable(o)] variables = shared_outputs if usual_outputs: # Sort apply nodes topologically, get variables and remove # duplicates inputs = graph.inputs(self.outputs) self.sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs) self.scans = list( unique([ node.op for node in self.sorted_apply_nodes if isinstance(node.op, Scan) ])) self.sorted_scan_nodes = [ node for node in self.sorted_apply_nodes if isinstance(node.op, Scan) ] self._scan_graphs = [ ComputationGraph(scan.outputs) for scan in self.scans ] seen = set() main_vars = ([ var for var in list( chain(*[ apply_node.inputs for apply_node in self.sorted_apply_nodes ])) if not (var in seen or seen.add(var)) ] + [var for var in self.outputs if var not in seen]) # While preserving order add auxiliary variables, and collect # updates seen = set() # Intermediate variables could be auxiliary seen_avs = set(main_vars) variables = [] for var in main_vars: variables.append(var) for annotation in getattr(var.tag, 'annotations', []): if annotation not in seen: seen.add(annotation) new_avs = [ av for av in annotation.auxiliary_variables if not (av in seen_avs or seen_avs.add(av)) ] variables.extend(new_avs) updates = dict_union(updates, annotation.updates) self.variables = variables self.updates = updates
def predict(): """ An example of how to load a train model and use it to predict labels. """ # load the saved model model_file = open('best_model_linear.pkl', 'rb') classifier = pickle.load(model_file) model_file.close() y_pred = classifier.y_pred # find the input to theano graph inputs = graph.inputs([y_pred]) # select only x inputs = [item for item in inputs if item.name == 'x'] # compile a predictor function predict_model = theano.function( inputs=inputs, outputs=y_pred) X_test = np.random.rand(1000,500)*.75 +.25 X_test= np.append(X_test, np.random.rand(1000,500)*.75,axis=0) y_test = np.random.rand(1000,)*.3 y_test = np.append(y_test, np.random.rand(1000,)*.3+.7,axis=0) predicted_values = predict_model(X_test) print ("Predicted values for the first 10 examples in test set:") plt.hist(predicted_values) print (predicted_values)
def elemwise_logp(model, var): terms = filter(lambda term: var in inputs([term]), model.factors) p = function(model.vars, builtin_sum(terms)) def fn(x): return p(**x) return fn
def check_parameter(name, value): """Check, convert and extract inputs of a parameter value. This function wraps scalar or lists into a Theano shared variable, then acting as a parameter. Theano expressions are left unchanged. Parameters ---------- * `name` [string]: The parameter name. * `value` [theano expression, list or scalar]: The parameter value. Returns ------- * `value` [theano expression]: The parameter expression. * `parameters` [set of theano shared variables]: Set of base shared variables on which `value` depends. * `constants` [set of theano constants]: Set of base constants on which `value` depends. * `observeds` [set of theano tensor variables]: Set of base unset variables on which `value` depends. """ parameters = set() constants = set() observeds = set() if isinstance(value, SharedVariable): parameters.add(value) elif isinstance(value, T.TensorConstant): constants.add(value) elif isinstance(value, T.TensorVariable): inputs = graph.inputs([value]) for var in inputs: if isinstance(var, SharedVariable): parameters.add(var) elif isinstance(var, T.TensorConstant): constants.add(var) elif isinstance(var, T.TensorVariable): if not var.name: raise ValueError("Observed variables must be named.") observeds.add(var) else: if isinstance(value, list): value = np.ndarray(value) if isinstance(value, np.ndarray): value = theano.shared(value, name=name) else: value = theano.shared(float(value), name=name) parameters.add(value) return value, parameters, constants, observeds
def test_inputs(): r1, r2, r3 = MyVariable(1), MyVariable(2), MyVariable(3) o1 = MyOp(r1, r2) o1.name = "o1" o2 = MyOp(r3, o1) o2.name = "o2" res = inputs([o2], blockers=None) res_list = list(res) assert res_list == [r3, r1, r2]
def inputvars(a): """ Get the inputs into a theano variables Parameters ---------- a : theano variable Returns ------- r : list of tensor variables that are inputs """ return [v for v in inputs(makeiter(a)) if isinstance(v, t.TensorVariable)]
def inputvars(a): """ Get the inputs into a theano variables Parameters ---------- a : theano variable Returns ------- r : list of tensor variables that are inputs """ return [v for v in inputs(makeiter(a)) if isinstance(v, tt.TensorVariable)]
def _get_variables(self): """Collect variables, updates and auxiliary variables. In addition collects all :class:`.Scan` ops and recurses in the respective inner Theano graphs. """ updates = OrderedDict() shared_outputs = [o for o in self.outputs if is_shared_variable(o)] usual_outputs = [o for o in self.outputs if not is_shared_variable(o)] variables = shared_outputs if usual_outputs: # Sort apply nodes topologically, get variables and remove # duplicates inputs = graph.inputs(self.outputs) self.sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs) self.scans = list(unique([node.op for node in self.sorted_apply_nodes if isinstance(node.op, Scan)])) self.sorted_scan_nodes = [node for node in self.sorted_apply_nodes if isinstance(node.op, Scan)] self._scan_graphs = [ComputationGraph(scan.outputs) for scan in self.scans] seen = set() main_vars = ( [var for var in list(chain( *[apply_node.inputs for apply_node in self.sorted_apply_nodes])) if not (var in seen or seen.add(var))] + [var for var in self.outputs if var not in seen]) # While preserving order add auxiliary variables, and collect # updates seen = set() # Intermediate variables could be auxiliary seen_avs = set(main_vars) variables = [] for var in main_vars: variables.append(var) for annotation in getattr(var.tag, 'annotations', []): if annotation not in seen: seen.add(annotation) new_avs = [ av for av in annotation.auxiliary_variables if not (av in seen_avs or seen_avs.add(av))] variables.extend(new_avs) updates = dict_union(updates, annotation.updates) self.variables = variables self.updates = updates
def predict(X_test, filename='trained_model.pkl'): # load the saved model with open(filename, 'rb') as in_strm: regressor = pickle.load(in_strm) in_strm.close() y_pred = regressor.linearLayer.y_pred # find the input to theano graph inputs = graph.inputs([y_pred]) # select only x inputs = [item for item in inputs if item.name == 'x'] # compile a predictor function predict_model = theano.function(inputs=inputs, outputs=y_pred) X_test = X_test.astype(numpy.float32) predicted_values = predict_model(X_test) return predicted_values
def predict(X_test, filename='best_model_actual_data.pkl'): # load the saved model model_file = open(filename, 'rb') classifier = pickle.load(model_file) model_file.close() y_pred = classifier.y_pred # find the input to theano graph inputs = graph.inputs([y_pred]) # select only x inputs = [item for item in inputs if item.name == 'x'] # compile a predictor function predict_model = theano.function( inputs=inputs, outputs=y_pred) predicted_values = predict_model(X_test.astype(numpy.float32)) return predicted_values
def _get_variables(self): """Collect variables, updates and auxiliary variables.""" updates = OrderedDict() shared_outputs = [o for o in self.outputs if is_shared_variable(o)] usual_outputs = [o for o in self.outputs if not is_shared_variable(o)] variables = shared_outputs if usual_outputs: # Sort apply nodes topologically, get variables and remove # duplicates inputs = graph.inputs(self.outputs) sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs) seen = set() main_vars = [ var for var in list( chain(*[ apply_node.inputs for apply_node in sorted_apply_nodes ])) if not (var in seen or seen.add(var)) ] + self.outputs # While preserving order add auxiliary variables, and collect # updates seen = set() # Intermediate variables could be auxiliary seen_avs = set(main_vars) variables = [] for var in main_vars: variables.append(var) for annotation in getattr(var.tag, 'annotations', []): if annotation not in seen: seen.add(annotation) new_avs = [ av for av in annotation.auxiliary_variables if not (av in seen_avs or seen_avs.add(av)) ] variables.extend(new_avs) updates = dict_union(updates, annotation.updates) self.variables = variables self.updates = updates
def _get_inputs(self, var, func): """Get all inputs to a function, doing some accounting for deterministics Specifically, if a deterministic is an input, theano.gof.graph.inputs will return only the inputs *to the deterministic*. However, if we pass in the deterministic as a blocker, it will skip those nodes. """ deterministics = self.get_deterministics(var) upstream = self._inputs(var, func) parents = self._inputs(var, func, blockers=deterministics) if parents != upstream: det_map = {} for d in deterministics: d_set = {j for j in inputs([func], blockers=[d])} if upstream - d_set: det_map[d] = d_set for d, d_set in det_map.items(): if all(d_set.issubset(other) for other in det_map.values()): parents.add(d) return parents
def is_same_graph_with_merge(var1, var2, givens=None): """ Merge-based implementation of `theano.gof.graph.is_same_graph`. See help on `theano.gof.graph.is_same_graph` for additional documentation. """ from theano.gof.opt import MergeOptimizer if givens is None: givens = {} # Copy variables since the MergeOptimizer will modify them. copied = copy.deepcopy([var1, var2, givens]) vars = copied[0:2] givens = copied[2] # Create FunctionGraph. graph_inputs = list(inputs(vars)) # The clone isn't needed as we did a deepcopy and we cloning will # break the mapping in givens. fgraph = theano.gof.fg.FunctionGraph(graph_inputs, vars, clone=False) # Perform Variable substitution. for to_replace, replace_by in givens.items(): fgraph.replace(to_replace, replace_by) # Perform merge optimization. MergeOptimizer().optimize(fgraph) # When two variables perform the same computations, they will have the same # owner in the optimized graph. # We need to be careful with the special case where the owner is None, # which happens when the graph is made of a single Variable. # We also need to make sure we replace a Variable if it is present in # `givens`. vars_replaced = [givens.get(v, v) for v in vars] o1, o2 = [v.owner for v in vars_replaced] if o1 is None and o2 is None: # Comparing two single-Variable graphs: they are equal if they are # the same Variable. return vars_replaced[0] == vars_replaced[1] else: return o1 is o2
def predict(X_test, sa_model='sa_trained_model.pkl', tox_model='tox_trained_model.pkl'): # load the saved model with open(sa_model, 'rb') as in_strm: regressor = pickle.load(in_strm) in_strm.close() y_pred = regressor.linearLayer.y_pred # find the input to theano graph inputs = graph.inputs([y_pred]) # select only x inputs = [item for item in inputs if item.name == 'x'] # compile a predictor function predict_model = theano.function(inputs=inputs, outputs=y_pred) X_test = X_test.astype(np.float32) predicted_values = predict_model(X_test) predicted_values = np.asarray(predicted_values) predicted_values = np.reshape(predicted_values, (len(predicted_values), 1)) xtree = joblib.load(tox_model) proba = xtree.predict_proba(X_test)[:, 1] print('Prediction done!') return predicted_values, proba
def elemwise_logp(model, var): terms = [term for term in model.factors if var in inputs([term])] return add(*terms)
def test_inputs(self): r1, r2 = MyVariable(1), MyVariable(2) node = MyOp.make_node(r1, r2) assert inputs(node.outputs) == [r1, r2]
def _get_variables(self): """Collect variables, updates and auxiliary variables. In addition collects all :class:`.Scan` ops and recurses in the respective inner Theano graphs. """ updates = OrderedDict() shared_outputs = [o for o in self.outputs if is_trainable_variable(o)] usual_outputs = [ o for o in self.outputs if not is_trainable_variable(o) ] variables = shared_outputs if usual_outputs: # Sort apply nodes topologically, get variables and remove # duplicates inputs = graph.inputs(self.outputs) sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs) self.scans = list( _unique([ node.op for node in sorted_apply_nodes if isinstance(node.op, Scan) ], key=lambda op: id(op))) self._scan_graphs = [ ComputationGraph(scan.outputs) for scan in self.scans ] seen = set() main_vars = ([ var for var in list( chain(*[ apply_node.inputs for apply_node in sorted_apply_nodes ])) if not (var in seen or seen.add(var)) ] + [var for var in self.outputs if var not in seen]) # While preserving order add auxiliary variables, and collect # updates seen = set() # Intermediate variables could be auxiliary seen_avs = set(main_vars) variables = [] for var in main_vars: variables.append(var) # updates _ = getattr(var.tag, 'updates', OrderedDict()) _ = OrderedDict([(i, j) for i, j in _.iteritems() if is_variable(i)]) updates = dict_union(updates, _) # auxiliary_variables for _ in getattr(var.tag, 'auxiliary_variables', []): if _ not in seen and \ not (_ in seen_avs or seen_avs.add(_)): variables.append(_) # If trainable_variables is assigned default_update (cloned), we cannot eval() # it to get the real numpy array value, hence, try to trace back # original shared variable def shared_variable_filter(var): if is_trainable_variable(var) and hasattr(var, 'default_update'): for v in _CREATED_VARIABLE.values(): if v.name == var.name and v.ndim == var.ndim: return v return var self.variables = map(shared_variable_filter, variables) self.updates = updates
def test_inputs_deep(self): r1, r2, r5 = MyVariable(1), MyVariable(2), MyVariable(5) node = MyOp.make_node(r1, r2) node2 = MyOp.make_node(node.outputs[0], r5) i = inputs(node2.outputs) assert i == [r1, r2, r5], i
def elemwise_logp(model, var): terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([ v.logpt])] return model.fn(add(*terms))
def _inputs(self, var, func, blockers=None): """Get inputs to a function that are also named PyMC3 variables""" return set([ j for j in inputs([func], blockers=blockers) if j in self.var_list and j != var ])