Exemplo n.º 1
def binary_report(predictions, col_true='CLASS'):
    print('Binary classification results:')

    y_true = (predictions[col_true] == 'QSO')
    y_pred_proba = predictions['QSO_PHOTO']
    y_pred_binary = (predictions['CLASS_PHOTO'] == 'QSO')

    n_pos = y_pred_binary.sum()
    n_all = len(y_pred_binary)
    print('Predicted positives: {}/{} ({:.2f}%)'.format(
        n_pos, n_all, n_pos / n_all * 100))

    logloss, logloss_err = bootstrap_metric(log_loss, y_true, y_pred_proba)
    print('Logloss = {:.4f} ({:.4f})'.format(logloss, logloss_err))

    binary_metrics = OrderedDict([
        ('Accuracy', partial(bootstrap_metric, accuracy_score)),
        ('F1', partial(bootstrap_metric, f1_score)),
        ('Precision', partial(bootstrap_metric, precision_score)),
        ('Recall', partial(bootstrap_metric, recall_score)),
    for metric_name, metric_func in binary_metrics.items():
        score, score_err = metric_func(y_true, y_pred_binary)
        print('{} = {:.4f} ({:.4f})'.format(metric_name, score, score_err))

    # ROC AUC
    fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
    roc_auc = auc(fpr, tpr)
    print('ROC AUC = {:.4f}'.format(roc_auc))
    plot_roc_curve(fpr, tpr, roc_auc)

    # Precision - recall curve
    average_precision = average_precision_score(y_true, y_pred_proba)
    precision, recall = precision_score(y_true, y_pred_binary), recall_score(
        y_true, y_pred_binary)
    precisions, recalls, thresholds = precision_recall_curve(
        y_true, y_pred_proba)
    plot_precision_recall_curve(precisions, recalls, average_precision,
                                precision, recall)
Exemplo n.º 2
def convert_to_layer_nodes(root):
    At each level in the SPN rooted in the 'root' node, model all the nodes
    as a single layer-node.

        root (Node): The root of the SPN graph.

        root (Node): The root of the SPN graph, with each layer modelled as a
                     single layer-node.

    parents = defaultdict(list)
    depths = defaultdict(list)
    node_to_depth = OrderedDict()
    node_to_depth[root] = 1

    def get_parents(node):
        # Add to Parents dict
        if node.is_op:
            for i in node.inputs:
                if (i and  # Input not empty
                        not (i.is_param or i.is_var)):
                    node_to_depth[i.node] = node_to_depth[node] + 1

    def permute_inputs(input_values, input_sizes):
        # For a given list of inputs and their corresponding sizes, create a
        # nested-list of (input, index) pairs.
        # E.g: input_values = [(A, [2, 5]), (B, None)]
        #      input_sizes = [2, 3]
        #      inputs = [[('A', 2), ('A', 5)],
        #                [('B', 0), ('B', 1), ('B', 2)]]
        inputs = [
            list(product([inp.node], inp.indices)) if inp and inp.indices else
            list(product([inp.node], list(range(inp_size))))
            for inp, inp_size in zip(input_values, input_sizes)

        # For a given nested-list of (input, index) pairs, permute over the inputs
        # E.g: permuted_inputs = [('A', 2), ('B', 0),
        #                         ('A', 2), ('B', 1),
        #                         ('A', 2), ('B', 2),
        #                         ('A', 5), ('B', 0),
        #                         ('A', 5), ('B', 1),
        #                         ('A', 5), ('B', 2)]
        permuted_inputs = list(product(*[inps for inps in inputs]))
        return list(chain(*permuted_inputs))

    # Create a parents dictionary of the SPN graph
    traverse_graph(root, fun=get_parents, skip_params=True)

    # Create a depth dictionary of the SPN graph
    for key, value in node_to_depth.items():
    spn_depth = len(depths)

    # Iterate through each depth of the SPN, starting from the deepest layer,
    # moving up to the root node
    for depth in range(spn_depth, 1, -1):
        if isinstance(depths[depth][0], (Sum, ParallelSums)):  # A Sums Layer
            # Create a default SumsLayer node
            with tf.name_scope("Layer%s" % depth):
                sums_layer = SumsLayer(name="SumsLayer-%s.%s" % (depth, 1))
            # Initialize a counter for keeping track of number of sums
            # modelled in the layer node
            layer_num_sums = 0
            # Initialize an empty list for storing sum-input-sizes of sums
            # modelled in the layer node
            num_or_size_sums = []
            # Iterate through each node at the current depth of the SPN
            for node in depths[depth]:
                # TODO: To be replaced with node.num_sums once AbstractSums
                # class is introduced
                # No. of sums modelled by the current node
                node_num_sums = (1 if isinstance(node, Sum) else node.num_sums)
                # Add Input values of the current node to the SumsLayer node
                sums_layer.add_values(*node.values * node_num_sums)
                # Add sum-input-size, of each sum modelled in the current node,
                # to the list
                num_or_size_sums += [sum(node.get_input_sizes()[2:])
                                     ] * node_num_sums
                # Visit each parent of the current node
                for parent in parents[node]:
                        # 'Values' in case parent is an Op node
                        values = list(parent.values)
                    except AttributeError:
                        # 'Inputs' in case parent is a Concat node
                        values = list(parent.inputs)
                    # Iterate through each input value of the current parent node
                    for i, value in enumerate(values):
                        # If the value is the current node
                        if value.node == node:
                            # Check if it has indices
                            if value.indices is not None:
                                # If so, then just add the num-sums of the
                                # layer-op as offset
                                indices = (np.asarray(value.indices) +
                                # If not, then create a list accrodingly
                                indices = list(
                                          (layer_num_sums + node_num_sums)))
                            # Replace previous (node) Input value in the
                            # current parent node, with the new layer-node value
                            values[i] = (sums_layer, indices)
                            break  # Once child-node found, don't have to search further
                    # Reset values of the current parent node, by including
                    # the new child (Layer-node)
                        # set 'values' in case parent is an Op node
                    except AttributeError:
                        # set 'inputs' in case parent is a Concat node
                # Increment num-sums-counter of the layer-node
                layer_num_sums += node_num_sums
                # Disconnect

            # After all nodes at a certain depth are modelled into a Layer-node,
            # set num-sums parameter accordingly
        elif isinstance(depths[depth][0],
                        (Product, PermuteProducts)):  # A Products Layer
            with tf.name_scope("Layer%s" % depth):
                prods_layer = ProductsLayer(name="ProductsLayer-%s.%s" %
                                            (depth, 1))
            # Initialize a counter for keeping track of number of prods
            # modelled in the layer node
            layer_num_prods = 0
            # Initialize an empty list for storing prod-input-sizes of prods
            # modelled in the layer node
            num_or_size_prods = []
            # Iterate through each node at the current depth of the SPN
            for node in depths[depth]:
                # Get input values and sizes of the product node
                input_values = list(node.values)
                input_sizes = list(node.get_input_sizes())
                if isinstance(node, PermuteProducts):
                    # Permute over input-values to model permuted products
                    input_values = permute_inputs(input_values, input_sizes)
                    node_num_prods = node.num_prods
                    prod_input_size = len(input_values) // node_num_prods
                elif isinstance(node, Product):
                    node_num_prods = 1
                    prod_input_size = int(sum(input_sizes))

                # Add Input values of the current node to the ProductsLayer node
                # Add prod-input-size, of each product modelled in the current
                # node, to the list
                num_or_size_prods += [prod_input_size] * node_num_prods
                # Visit each parent of the current node
                for parent in parents[node]:
                    values = list(parent.values)
                    # Iterate through each input value of the current parent node
                    for i, value in enumerate(values):
                        # If the value is the current node
                        if value.node == node:
                            # Check if it has indices
                            if value.indices is not None:
                                # If so, then just add the num-prods of the
                                # layer-op as offset
                                indices = value.indices + layer_num_prods
                                # If not, then create a list accrodingly
                                indices = list(
                                          (layer_num_prods + node_num_prods)))
                            # Replace previous (node) Input value in the
                            # current parent node, with the new layer-node value
                            values[i] = (prods_layer, indices)
                    # Reset values of the current parent node, by including
                    # the new child (Layer-node)
                # Increment num-prods-counter of the layer node
                layer_num_prods += node_num_prods
                # Disconnect

            # After all nodes at a certain depth are modelled into a Layer-node,
            # set num-prods parameter accordingly

        elif isinstance(depths[depth][0],
                        (SumsLayer, ProductsLayer, Concat)):  # A Concat node
            raise StructureError("Unknown node-type: {}".format(

    return root
Exemplo n.º 3
class _ProcedureWorker(Worker):
    def __init__(self, cl_environment, compile_flags, cl_function, kernel_data,
                 double_precision, use_local_reduction):
        self._cl_function = cl_function
        self._kernel_data = OrderedDict(sorted(kernel_data.items()))
        self._double_precision = double_precision
        self._use_local_reduction = use_local_reduction

        self._mot_float_dtype = np.float32
        if double_precision:
            self._mot_float_dtype = np.float64

        for data in self._kernel_data.values():

        self._kernel = self._build_kernel(self._get_kernel_source(),

        self._workgroup_size = self._kernel.run_procedure.get_work_group_info(
        if not self._use_local_reduction:
            self._workgroup_size = 1

        self._kernel_inputs = {
            name: data.get_kernel_inputs(self._cl_context,
            for name, data in self._kernel_data.items()

    def calculate(self, range_start, range_end):
        nmr_problems = range_end - range_start

        func = self._kernel.run_procedure

        kernel_inputs_list = []
        for inputs in [
                self._kernel_inputs[name] for name in self._kernel_data

        func(self._cl_queue, (int(nmr_problems * self._workgroup_size), ),
             (int(self._workgroup_size), ),
             global_offset=(int(range_start * self._workgroup_size), ))

        for name, data in self._kernel_data.items():
            data.enqueue_readouts(self._cl_queue, self._kernel_inputs[name],
                                  range_start, range_end)

    def _build_kernel(self, kernel_source, compile_flags=()):
        """Convenience function for building the kernel for this worker.

            kernel_source (str): the kernel source to use for building the kernel

            cl.Program: a compiled CL kernel
        from mot import configuration
        if configuration.should_ignore_kernel_compile_warnings():
        return cl.Program(self._cl_context,
                          kernel_source).build(' '.join(compile_flags))

    def _get_kernel_source(self):
        assignment = ''
        if self._cl_function.get_return_type() != 'void':
            assignment = '__results[gid] = '

        variable_inits = []
        function_call_inputs = []
        post_function_callbacks = []
        for parameter in self._cl_function.get_parameters():
            data = self._kernel_data[parameter.name]
            call_args = (parameter.name, '_' + parameter.name, 'gid',


        kernel_source = ''
        kernel_source += get_float_type_def(self._double_precision)
        kernel_source += '\n'.join(data.get_type_definitions()
                                   for data in self._kernel_data.values())
        kernel_source += self._cl_function.get_cl_code()
        kernel_source += '''
            __kernel void run_procedure(''' + ",\n".join(self._get_kernel_arguments()) + '''){
                ulong gid = (ulong)(get_global_id(0) / get_local_size(0));
                ''' + '\n'.join(variable_inits) + '''     
                ''' + assignment + ' ' + self._cl_function.get_cl_function_name() + '(' + \
                         ', '.join(function_call_inputs) + ''');
                ''' + '\n'.join(post_function_callbacks) + '''
        return kernel_source

    def _get_kernel_arguments(self):
        """Get the list of kernel arguments for loading the kernel data elements into the kernel.

        This will use the sorted keys for looping through the kernel input items.

            list of str: the list of parameter definitions
        declarations = []
        for name, data in self._kernel_data.items():
            declarations.extend(data.get_kernel_parameters('_' + name))
        return declarations

    def get_scalar_arg_dtypes(self):
        """Get the location and types of the input scalars.

            list: for every kernel input element either None if the data is a buffer or the numpy data type if
                if is a scalar.
        dtypes = []
        for name, data in self._kernel_data.items():
        return dtypes
Exemplo n.º 4
    def get_entitiesdata(self, datatype, since, sf):
        now = datetime.now(pytz.UTC)
        entities = []
        end = datetime.now(
            pytz.UTC)  # we need to use UTC as salesforce API requires this

        if since is None:
            result = []
            created_date_stmt = ""
            while True:
                query = "SELECT Id, CreatedDate FROM {} {} ORDER BY CreatedDate".format(
                    datatype, created_date_stmt)
                records = sf.query(query)["records"]
                temp_result = [x['Id'] for x in records]
                if records:
                    created_date_stmt = "WHERE CreatedDate > {}".format(
                if len(temp_result) < 2000:  # salesforce limit 2000 rows
            start = iso8601.parse_date(since)
            logging.info("Since datetime presented: %s", start)
            logging.info("End -30 days delta: %s", (end - timedelta(days=30)))
            if start < (end - timedelta(days=30)
                        ):  # salesforce replicates only last 30 days
                    "Salesforce replicates only last 30 days but since is set to {}"
                start = datetime.now(
                    pytz.UTC) - timedelta(days=30) + timedelta(seconds=60)
                logging.warning("Changed since to {}".format(start))

            if getattr(sf, datatype):
                if end > (start + timedelta(seconds=60)):
                    result = getattr(sf, datatype).updated(start, end)["ids"]
                    deleted = getattr(sf,
                    for e in deleted:
                        c = OrderedDict({"_id": e["id"]})
                        c.update({"_updated": "%s" % e["deletedDate"]})
                        c.update({"_deleted": True})

        if result:
            for e in result:
                c = getattr(sf, datatype).get(e)
                c.update({"_id": e})
                c.update({"_updated": "%s" % c["LastModifiedDate"]})

                for property, value in c.items():
                    schema = [
                        item for item in self._entities[datatype]
                        if item["name"] == property
                    if value and len(schema) > 0 and "type" in schema[
                            0] and schema[0]["type"] == "datetime":
                        c[property] = to_transit_datetime(parse(value))

        return entities