def binary_report(predictions, col_true='CLASS'): print('Binary classification results:') y_true = (predictions[col_true] == 'QSO') y_pred_proba = predictions['QSO_PHOTO'] y_pred_binary = (predictions['CLASS_PHOTO'] == 'QSO') n_pos = y_pred_binary.sum() n_all = len(y_pred_binary) print('Predicted positives: {}/{} ({:.2f}%)'.format( n_pos, n_all, n_pos / n_all * 100)) logloss, logloss_err = bootstrap_metric(log_loss, y_true, y_pred_proba) print('Logloss = {:.4f} ({:.4f})'.format(logloss, logloss_err)) binary_metrics = OrderedDict([ ('Accuracy', partial(bootstrap_metric, accuracy_score)), ('F1', partial(bootstrap_metric, f1_score)), ('Precision', partial(bootstrap_metric, precision_score)), ('Recall', partial(bootstrap_metric, recall_score)), ]) for metric_name, metric_func in binary_metrics.items(): score, score_err = metric_func(y_true, y_pred_binary) print('{} = {:.4f} ({:.4f})'.format(metric_name, score, score_err)) # ROC AUC fpr, tpr, _ = roc_curve(y_true, y_pred_proba) roc_auc = auc(fpr, tpr) print('ROC AUC = {:.4f}'.format(roc_auc)) plot_roc_curve(fpr, tpr, roc_auc) # Precision - recall curve average_precision = average_precision_score(y_true, y_pred_proba) precision, recall = precision_score(y_true, y_pred_binary), recall_score( y_true, y_pred_binary) precisions, recalls, thresholds = precision_recall_curve( y_true, y_pred_proba) plot_precision_recall_curve(precisions, recalls, average_precision, precision, recall)
def convert_to_layer_nodes(root): """ At each level in the SPN rooted in the 'root' node, model all the nodes as a single layer-node. Args: root (Node): The root of the SPN graph. Returns: root (Node): The root of the SPN graph, with each layer modelled as a single layer-node. """ parents = defaultdict(list) depths = defaultdict(list) node_to_depth = OrderedDict() node_to_depth[root] = 1 def get_parents(node): # Add to Parents dict if node.is_op: for i in node.inputs: if (i and # Input not empty not (i.is_param or i.is_var)): parents[i.node].append(node) node_to_depth[i.node] = node_to_depth[node] + 1 def permute_inputs(input_values, input_sizes): # For a given list of inputs and their corresponding sizes, create a # nested-list of (input, index) pairs. # E.g: input_values = [(A, [2, 5]), (B, None)] # input_sizes = [2, 3] # inputs = [[('A', 2), ('A', 5)], # [('B', 0), ('B', 1), ('B', 2)]] inputs = [ list(product([inp.node], inp.indices)) if inp and inp.indices else list(product([inp.node], list(range(inp_size)))) for inp, inp_size in zip(input_values, input_sizes) ] # For a given nested-list of (input, index) pairs, permute over the inputs # E.g: permuted_inputs = [('A', 2), ('B', 0), # ('A', 2), ('B', 1), # ('A', 2), ('B', 2), # ('A', 5), ('B', 0), # ('A', 5), ('B', 1), # ('A', 5), ('B', 2)] permuted_inputs = list(product(*[inps for inps in inputs])) return list(chain(*permuted_inputs)) # Create a parents dictionary of the SPN graph traverse_graph(root, fun=get_parents, skip_params=True) # Create a depth dictionary of the SPN graph for key, value in node_to_depth.items(): depths[value].append(key) spn_depth = len(depths) # Iterate through each depth of the SPN, starting from the deepest layer, # moving up to the root node for depth in range(spn_depth, 1, -1): if isinstance(depths[depth][0], (Sum, ParallelSums)): # A Sums Layer # Create a default SumsLayer node with tf.name_scope("Layer%s" % depth): sums_layer = SumsLayer(name="SumsLayer-%s.%s" % (depth, 1)) # Initialize a counter for keeping track of number of sums # modelled in the layer node layer_num_sums = 0 # Initialize an empty list for storing sum-input-sizes of sums # modelled in the layer node num_or_size_sums = [] # Iterate through each node at the current depth of the SPN for node in depths[depth]: # TODO: To be replaced with node.num_sums once AbstractSums # class is introduced # No. of sums modelled by the current node node_num_sums = (1 if isinstance(node, Sum) else node.num_sums) # Add Input values of the current node to the SumsLayer node sums_layer.add_values(*node.values * node_num_sums) # Add sum-input-size, of each sum modelled in the current node, # to the list num_or_size_sums += [sum(node.get_input_sizes()[2:]) ] * node_num_sums # Visit each parent of the current node for parent in parents[node]: try: # 'Values' in case parent is an Op node values = list(parent.values) except AttributeError: # 'Inputs' in case parent is a Concat node values = list(parent.inputs) # Iterate through each input value of the current parent node for i, value in enumerate(values): # If the value is the current node if value.node == node: # Check if it has indices if value.indices is not None: # If so, then just add the num-sums of the # layer-op as offset indices = (np.asarray(value.indices) + layer_num_sums).tolist() else: # If not, then create a list accrodingly indices = list( range(layer_num_sums, (layer_num_sums + node_num_sums))) # Replace previous (node) Input value in the # current parent node, with the new layer-node value values[i] = (sums_layer, indices) break # Once child-node found, don't have to search further # Reset values of the current parent node, by including # the new child (Layer-node) try: # set 'values' in case parent is an Op node parent.set_values(*values) except AttributeError: # set 'inputs' in case parent is a Concat node parent.set_inputs(*values) # Increment num-sums-counter of the layer-node layer_num_sums += node_num_sums # Disconnect node.disconnect_inputs() # After all nodes at a certain depth are modelled into a Layer-node, # set num-sums parameter accordingly sums_layer.set_sum_sizes(num_or_size_sums) elif isinstance(depths[depth][0], (Product, PermuteProducts)): # A Products Layer with tf.name_scope("Layer%s" % depth): prods_layer = ProductsLayer(name="ProductsLayer-%s.%s" % (depth, 1)) # Initialize a counter for keeping track of number of prods # modelled in the layer node layer_num_prods = 0 # Initialize an empty list for storing prod-input-sizes of prods # modelled in the layer node num_or_size_prods = [] # Iterate through each node at the current depth of the SPN for node in depths[depth]: # Get input values and sizes of the product node input_values = list(node.values) input_sizes = list(node.get_input_sizes()) if isinstance(node, PermuteProducts): # Permute over input-values to model permuted products input_values = permute_inputs(input_values, input_sizes) node_num_prods = node.num_prods prod_input_size = len(input_values) // node_num_prods elif isinstance(node, Product): node_num_prods = 1 prod_input_size = int(sum(input_sizes)) # Add Input values of the current node to the ProductsLayer node prods_layer.add_values(*input_values) # Add prod-input-size, of each product modelled in the current # node, to the list num_or_size_prods += [prod_input_size] * node_num_prods # Visit each parent of the current node for parent in parents[node]: values = list(parent.values) # Iterate through each input value of the current parent node for i, value in enumerate(values): # If the value is the current node if value.node == node: # Check if it has indices if value.indices is not None: # If so, then just add the num-prods of the # layer-op as offset indices = value.indices + layer_num_prods else: # If not, then create a list accrodingly indices = list( range(layer_num_prods, (layer_num_prods + node_num_prods))) # Replace previous (node) Input value in the # current parent node, with the new layer-node value values[i] = (prods_layer, indices) # Reset values of the current parent node, by including # the new child (Layer-node) parent.set_values(*values) # Increment num-prods-counter of the layer node layer_num_prods += node_num_prods # Disconnect node.disconnect_inputs() # After all nodes at a certain depth are modelled into a Layer-node, # set num-prods parameter accordingly prods_layer.set_prod_sizes(num_or_size_prods) elif isinstance(depths[depth][0], (SumsLayer, ProductsLayer, Concat)): # A Concat node pass else: raise StructureError("Unknown node-type: {}".format( depths[depth][0])) return root
class _ProcedureWorker(Worker): def __init__(self, cl_environment, compile_flags, cl_function, kernel_data, double_precision, use_local_reduction): super().__init__(cl_environment) self._cl_function = cl_function self._kernel_data = OrderedDict(sorted(kernel_data.items())) self._double_precision = double_precision self._use_local_reduction = use_local_reduction self._mot_float_dtype = np.float32 if double_precision: self._mot_float_dtype = np.float64 for data in self._kernel_data.values(): data.set_mot_float_dtype(self._mot_float_dtype) self._kernel = self._build_kernel(self._get_kernel_source(), compile_flags) self._workgroup_size = self._kernel.run_procedure.get_work_group_info( cl.kernel_work_group_info.PREFERRED_WORK_GROUP_SIZE_MULTIPLE, self._cl_environment.device) if not self._use_local_reduction: self._workgroup_size = 1 self._kernel_inputs = { name: data.get_kernel_inputs(self._cl_context, self._workgroup_size) for name, data in self._kernel_data.items() } def calculate(self, range_start, range_end): nmr_problems = range_end - range_start func = self._kernel.run_procedure func.set_scalar_arg_dtypes(self.get_scalar_arg_dtypes()) kernel_inputs_list = [] for inputs in [ self._kernel_inputs[name] for name in self._kernel_data ]: kernel_inputs_list.extend(inputs) func(self._cl_queue, (int(nmr_problems * self._workgroup_size), ), (int(self._workgroup_size), ), *kernel_inputs_list, global_offset=(int(range_start * self._workgroup_size), )) for name, data in self._kernel_data.items(): data.enqueue_readouts(self._cl_queue, self._kernel_inputs[name], range_start, range_end) def _build_kernel(self, kernel_source, compile_flags=()): """Convenience function for building the kernel for this worker. Args: kernel_source (str): the kernel source to use for building the kernel Returns: cl.Program: a compiled CL kernel """ from mot import configuration if configuration.should_ignore_kernel_compile_warnings(): warnings.simplefilter("ignore") return cl.Program(self._cl_context, kernel_source).build(' '.join(compile_flags)) def _get_kernel_source(self): assignment = '' if self._cl_function.get_return_type() != 'void': assignment = '__results[gid] = ' variable_inits = [] function_call_inputs = [] post_function_callbacks = [] for parameter in self._cl_function.get_parameters(): data = self._kernel_data[parameter.name] call_args = (parameter.name, '_' + parameter.name, 'gid', parameter.data_type.address_space) variable_inits.append(data.initialize_variable(*call_args)) function_call_inputs.append( data.get_function_call_input(*call_args)) post_function_callbacks.append( data.post_function_callback(*call_args)) kernel_source = '' kernel_source += get_float_type_def(self._double_precision) kernel_source += '\n'.join(data.get_type_definitions() for data in self._kernel_data.values()) kernel_source += self._cl_function.get_cl_code() kernel_source += ''' __kernel void run_procedure(''' + ",\n".join(self._get_kernel_arguments()) + '''){ ulong gid = (ulong)(get_global_id(0) / get_local_size(0)); ''' + '\n'.join(variable_inits) + ''' ''' + assignment + ' ' + self._cl_function.get_cl_function_name() + '(' + \ ', '.join(function_call_inputs) + '''); ''' + '\n'.join(post_function_callbacks) + ''' } ''' return kernel_source def _get_kernel_arguments(self): """Get the list of kernel arguments for loading the kernel data elements into the kernel. This will use the sorted keys for looping through the kernel input items. Returns: list of str: the list of parameter definitions """ declarations = [] for name, data in self._kernel_data.items(): declarations.extend(data.get_kernel_parameters('_' + name)) return declarations def get_scalar_arg_dtypes(self): """Get the location and types of the input scalars. Returns: list: for every kernel input element either None if the data is a buffer or the numpy data type if if is a scalar. """ dtypes = [] for name, data in self._kernel_data.items(): dtypes.extend(data.get_scalar_arg_dtypes()) return dtypes
def get_entitiesdata(self, datatype, since, sf): now = datetime.now(pytz.UTC) entities = [] end = datetime.now( pytz.UTC) # we need to use UTC as salesforce API requires this if since is None: result = [] created_date_stmt = "" while True: query = "SELECT Id, CreatedDate FROM {} {} ORDER BY CreatedDate".format( datatype, created_date_stmt) records = sf.query(query)["records"] temp_result = [x['Id'] for x in records] result.extend(temp_result) if records: created_date_stmt = "WHERE CreatedDate > {}".format( records[-1]['CreatedDate']) if len(temp_result) < 2000: # salesforce limit 2000 rows break else: start = iso8601.parse_date(since) logging.info("Since datetime presented: %s", start) logging.info("End -30 days delta: %s", (end - timedelta(days=30))) if start < (end - timedelta(days=30) ): # salesforce replicates only last 30 days logging.warning( "Salesforce replicates only last 30 days but since is set to {}" .format(start)) start = datetime.now( pytz.UTC) - timedelta(days=30) + timedelta(seconds=60) logging.warning("Changed since to {}".format(start)) if getattr(sf, datatype): if end > (start + timedelta(seconds=60)): result = getattr(sf, datatype).updated(start, end)["ids"] deleted = getattr(sf, datatype).deleted(start, end)["deletedRecords"] for e in deleted: c = OrderedDict({"_id": e["id"]}) c.update({"_updated": "%s" % e["deletedDate"]}) c.update({"_deleted": True}) entities.append(c) if result: for e in result: c = getattr(sf, datatype).get(e) c.update({"_id": e}) c.update({"_updated": "%s" % c["LastModifiedDate"]}) for property, value in c.items(): schema = [ item for item in self._entities[datatype] if item["name"] == property ] if value and len(schema) > 0 and "type" in schema[ 0] and schema[0]["type"] == "datetime": c[property] = to_transit_datetime(parse(value)) entities.append(c) return entities