Пример #1
0
 def predict(self, domains, probability=False):
     """This function accepts cudf series of domains as an argument to classify domain names as benign/malicious and returns the learned label for each object in the form of cudf series.
     :param domains: List of domains.
     :type domains: cudf.Series
     :return: Predicted results with respect to given domains.
     :rtype: cudf.Series
     Examples
     --------
     >>> dd.predict(['nvidia.com', 'dgadomain'])
     0    0.010
     1    0.924
     Name: dga_probability, dtype: decimal
     """
     df = cudf.DataFrame({"domain": domains})
     domains_len = df["domain"].count()
     temp_df = utils.str2ascii(df, domains_len)
     # Assigning sorted domains index to return learned labels as per the given input order.
     df.index = temp_df.index
     df["domain"] = temp_df["domain"]
     temp_df = temp_df.drop("domain", axis=1)
     input, seq_lengths = self._create_variables(temp_df)
     del temp_df
     model_result = self.model(input, seq_lengths)
     if probability:
         model_result = model_result[:, 0]
         preds = torch.sigmoid(model_result)
         preds = preds.view(-1).tolist()
         df["preds"] = preds
     else:
         preds = model_result.data.max(1, keepdim=True)[1]
         preds = preds.view(-1).tolist()
         df["preds"] = preds
     df = df.sort_index()
     return df["preds"]
Пример #2
0
    def predict(self, domains, probability=False, truncate=100):
        """This function accepts cudf series of domains as an argument to classify domain names as benign/malicious and returns the learned label for each object in the form of cudf series.

        :param domains: List of domains.
        :type domains: cudf.Series
        :return: Predicted results with respect to given domains.
        :rtype: cudf.Series
        :param truncate: Truncate string to n number of characters.
        :type truncate: int
        Examples
        --------
        >>> dd.predict(['nvidia.com', 'dgadomain'])
        0    0.010
        1    0.924
        Name: dga_probability, dtype: decimal
        """
        log.debug("Initiating model inference ...")
        self.model.eval()
        df = cudf.DataFrame({"domain": domains})
        log.debug('Truncate domains to width: {}'.format(truncate))
        df['domain'] = df['domain'].str.slice_replace(truncate, repl='')
        temp_df = utils.str2ascii(df, 'domain')
        # Assigning sorted domains index to return learned labels as per the given input order.
        df.index = temp_df.index
        df["domain"] = temp_df["domain"]
        temp_df = temp_df.drop("domain", axis=1)
        input, seq_lengths = self._create_variables(temp_df)
        del temp_df
        model_result = self.model(input, seq_lengths)
        if probability:
            model_result = model_result[:, 0]
            preds = torch.sigmoid(model_result)
            preds = preds.view(-1).tolist()
            df["preds"] = preds
        else:
            preds = model_result.data.max(1, keepdim=True)[1]
            preds = preds.view(-1).tolist()
            df["preds"] = preds
        df = df.sort_index()
        return df["preds"]
Пример #3
0
 def __preprocess(self, df, truncate):
     df['domain'] = df['domain'].str.slice_replace(truncate, repl='')
     df = utils.str2ascii(df, 'domain')
     return df
Пример #4
0
 def __preprocess(self, df):
     df = utils.str2ascii(df, df.shape[0])
     return df
Пример #5
0
def test_str2ascii():
    actual_output_df = utils.str2ascii(test_input_df, 'domain')
    assert actual_output_df.equals(expected_output_df)
Пример #6
0
 def __preprocess(self, df):
     df = utils.str2ascii(df, 'domain')
     return df