示例#1
0
    def __init__(self, is_target=False, aim=ENCODER_AIM.BALANCE):
        super().__init__(is_target)
        self.name = 'Text Transformer Encoder'
        self._tokenizer = None
        self._model = None
        self._pad_id = None
        self._max_len = None
        self._max_ele = None
        self._model_type = None
        self.desired_error = 0.01
        self.max_training_time = 7200
        self._head = None
        # Possible: speed, balance, accuracy
        self.aim = aim

        if self.aim == ENCODER_AIM.SPEED:
            # uses more memory, takes very long to train and outputs weird debugging statements to the command line,
            # consider waiting until it gets better or try to investigate why this happens
            # (changing the pretrained model doesn't seem to help)
            self._classifier_model_class = AlbertForSequenceClassification
            self._embeddings_model_class = AlbertModel
            self._tokenizer_class = AlbertTokenizer
            self._pretrained_model_name = 'albert-base-v2'
        if self.aim == ENCODER_AIM.BALANCE:
            self._classifier_model_class = DistilBertForSequenceClassification
            self._embeddings_model_class = DistilBertModel
            self._tokenizer_class = DistilBertTokenizer
            self._pretrained_model_name = 'distilbert-base-uncased'
        if self.aim == ENCODER_AIM.ACCURACY:
            self._classifier_model_class = DistilBertForSequenceClassification
            self._embeddings_model_class = DistilBertModel
            self._tokenizer_class = DistilBertTokenizer
            self._pretrained_model_name = 'distilbert-base-uncased'

        self.device, _ = get_devices()
示例#2
0
    def __init__(self, in_features, out_features, bias=True):
        """

        :param in_features:  as name suggests
        :param out_features: this essentially the number of neurons
        :param bias: if you want a specific bias
        """

        super(PLinear, self).__init__()

        self.in_features = in_features
        self.out_features = out_features

        # these are the matrices that we will optimize for
        self.sigma = Parameter(torch.Tensor(out_features, in_features))
        self.mean = Parameter(torch.Tensor(out_features, in_features))

        # there can be various ways to sample, given various distributions,
        # we will stick with discrete normal as it is way faster
        self.w_sampler = self.w_discrete_normal

        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

        # make sure that we tell the graph that these two need to be optimized
        self.sigma.requiresGrad = True  # set requiresGrad to true!
        self.mean.requiresGrad = True  # set requiresGrad to true!

        self.device, _ = get_devices()
示例#3
0
    def __init__(self, input_size, output_size, nr_outputs):
        super(SelfAware, self).__init__()

        self.input_size = input_size
        self.output_size = output_size
        self.nr_outputs = nr_outputs

        awareness_layers = []
        awareness_net_shape = [
            (self.input_size + self.output_size),
            max([int((self.input_size + self.output_size) * 1.5),
                 300]), self.nr_outputs
        ]

        for ind in range(len(awareness_net_shape) - 1):
            rectifier = torch.nn.SELU
            awareness_layers.append(
                torch.nn.Linear(awareness_net_shape[ind],
                                awareness_net_shape[ind + 1]))
            if ind < len(awareness_net_shape) - 2:
                awareness_layers.append(rectifier())

        self.net = torch.nn.Sequential(*awareness_layers)

        for layer in self.net:
            if hasattr(layer, 'weight'):
                torch.nn.init.normal_(layer.weight,
                                      mean=0.,
                                      std=1 / math.sqrt(layer.out_features))
            if hasattr(layer, 'bias'):
                torch.nn.init.normal_(layer.bias, mean=0., std=0.1)

        self.device, self.available_devices = get_devices()
        self.to(self.device, self.available_devices)
示例#4
0
    def to(self, device=None, available_devices=None):
        if device is None or available_devices is None:
            device, available_devices = get_devices()

        self.net = self.net.to(device)
        if self.selfaware:
            self.awareness_net = self.awareness_net.to(device)

        available_devices = 1
        if 'cuda' in str(device):
            available_devices = torch.cuda.device_count()

        if available_devices > 1:
            self._foward_net = torch.nn.DataParallel(self.net)
            if self.selfaware:
                self._foward_awareness_net = torch.nn.DataParallel(self.awareness_net)
        else:
            self._foward_net = self.net
            if self.selfaware:
                self._foward_awareness_net = self.awareness_net

        self.device = device
        self.available_devices = available_devices

        return self
示例#5
0
    def __init__(self, config):
        super(InferSent, self).__init__()
        self.bsize = config['bsize']
        self.word_emb_dim = config['word_emb_dim']
        self.enc_lstm_dim = config['enc_lstm_dim']
        self.pool_type = config['pool_type']
        self.dpout_model = config['dpout_model']
        self.version = 1 if 'version' not in config else config['version']

        self.enc_lstm = nn.LSTM(self.word_emb_dim,
                                self.enc_lstm_dim,
                                1,
                                bidirectional=True,
                                dropout=self.dpout_model)

        self.device, _ = get_devices()

        self.enc_lstm = self.enc_lstm.to(self.device)

        assert self.version in [1, 2]
        if self.version == 1:
            self.bos = '<s>'
            self.eos = '</s>'
            self.max_pad = True
            self.moses_tok = False
        elif self.version == 2:
            self.bos = '<p>'
            self.eos = '</p>'
            self.max_pad = False
            self.moses_tok = True
示例#6
0
    def learn(self, from_data, test_data=None):
        """
        Train and save a model (you can use this to retrain model from data).

        :param from_data: DataFrame or DataSource
            The data to learn from

        :param test_data: DataFrame or DataSource
            The data to test accuracy and learn_error from
        """
        device, _available_devices = get_devices()
        log.info(f'Computing device used: {device}')
        # generate the configuration and set the order for the input and output columns
        if self._generate_config is True:
            self._input_columns = [col for col in from_data if col not in self._output_columns]
            self.config = {
                'input_features': [{'name': col, 'type': self._type_map(from_data, col)} for col in self._input_columns],
                'output_features': [{'name': col, 'type': self._type_map(from_data, col)} for col in self._output_columns]
            }
            self.config = predictor_config_schema.validate(self.config)
            log.info('Automatically generated a configuration')
            log.info(self.config)
        else:
            self._output_columns = [col['name'] for col in self.config['output_features']]
            self._input_columns = [col['name'] for col in self.config['input_features']]

        if isinstance(from_data, pandas.DataFrame):
            train_ds = DataSource(from_data, self.config)
        elif isinstance(from_data, DataSource):
            train_ds = from_data
        else:
            raise TypeError(':from_data: must be either DataFrame or DataSource')

        nr_subsets = 3 if len(train_ds) > 100 else 1

        if test_data is None:
            test_ds = train_ds.subset(0.1)
        elif isinstance(test_data, pandas.DataFrame):
            test_ds = train_ds.make_child(test_data)
        elif isinstance(test_data, DataSource):
            test_ds = test_data
        else:
            raise TypeError(':test_data: must be either DataFrame or DataSource')

        train_ds.create_subsets(nr_subsets)
        test_ds.create_subsets(nr_subsets)

        train_ds.train()
        test_ds.train()

        mixer_class = self.config['mixer']['class']
        mixer_kwargs = self.config['mixer']['kwargs']
        self._mixer = mixer_class(**mixer_kwargs)
        self._mixer.fit(train_ds=train_ds, test_ds=test_ds)
        self.train_accuracy = self._mixer.calculate_accuracy(test_ds)

        return self
示例#7
0
    def convert_to_device(self, device_str=None):
        if hasattr(self._mixer, 'to') and callable(self._mixer.to):
            if device_str is not None:
                device = torch.device(device_str)
                available_devices = 1
                if device_str == 'cuda':
                    available_devices = torch.cuda.device_count()
            else:
                device, available_devices = get_devices()

            self._mixer.to(device, available_devices)
示例#8
0
    def __init__(self, model):
        """ Img2Vec
        :param model: name of the model to use
        """
        super(Img2Vec, self).__init__()

        self.device, _ = get_devices()
        self.model_name = model

        self.model = self._get_model()
        self.model = self.model.to(self.device)
示例#9
0
    def convert_to_device(self, device_str=None):
        if device_str is not None:
            device = torch.device(device_str)
            available_devices = 1
            if device_str == 'cuda':
                available_devices = torch.cuda.device_count()
        else:
            device, available_devices = get_devices()

        self._mixer.to(device, available_devices)
        for e in self._mixer.encoders:
            if hasattr(self._mixer.encoders[e], 'to'):
                self._mixer.encoders[e].to(device, available_devices)
示例#10
0
文件: rnn.py 项目: paxcema/lightwood
    def __init__(self,
                 encoded_vector_size=4,
                 train_iters=75000,
                 stop_on_error=0.8,
                 learning_rate=0.01,
                 is_target=False):
        self._stop_on_error = stop_on_error
        self._learning_rate = learning_rate
        self._encoded_vector_size = encoded_vector_size
        self._train_iters = train_iters
        self._pytorch_wrapper = torch.FloatTensor
        self._encoder = None
        self._prepared = False

        self.device, _ = get_devices()
示例#11
0
    def __init__(self, model='resnet-18', layer='default', layer_output_size=512):
        """ Img2Vec
        :param cuda: If set to True, will run forward pass on GPU
        :param model: String name of requested model
        :param layer: String or Int depending on model.  See more docs: https://github.com/christiansafka/img2vec.git
        :param layer_output_size: Int depicting the output size of the requested layer
        """
        self.device, _ = get_devices()
        self.layer_output_size = layer_output_size
        self.model_name = model

        self.model, self.extraction_layer = self._get_model_and_layer(model, layer)
        self.model = self.model.to(self.device)

        self.scaler = transforms.Scale((224, 224))
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                              std=[0.229, 0.224, 0.225])
        self.to_tensor = transforms.ToTensor()
示例#12
0
    def __init__(self, enabled=True):
        self.major = 0  # GPU major version
        torch_version = [int(i) for i in torch.__version__.split('.')[:-1]]

        if not enabled or not torch.cuda.is_available(
        ) or torch_version[0] < 1 or torch_version[1] < 6:
            self._enabled = False
        else:
            device, _ = get_devices()
            if device.type == 'cuda':
                # tensor cores only exist from 7 onwards
                # if this is not the case, then AMP is unnecessary overhead
                self.major, _ = torch.cuda.get_device_capability(device)
                self._enabled = enabled if self.major > 6 else False
            else:
                self._enabled = False  # gpu is available but cpu is forced

        self.prev = self._enabled  # necessary reference to exit
示例#13
0
文件: rnn.py 项目: mindsdb/lightwood
 def __init__(self, encoded_vector_size=128, train_iters=100, stop_on_error=0.01, learning_rate=0.01,
              is_target=False, ts_n_dims=1, encoder_class=EncoderRNNNumerical):
     super().__init__(is_target)
     self.device, _ = get_devices()
     self.encoder_class = encoder_class
     self._stop_on_error = stop_on_error
     self._learning_rate = learning_rate
     self._encoded_vector_size = encoded_vector_size
     self._transformer_hidden_size = None
     self._epochs = train_iters  # training epochs
     self._pytorch_wrapper = torch.FloatTensor
     self._prepared = False
     self._is_setup = False
     self._max_ts_length = 0
     self._sos = 0.0  # start of sequence for decoding
     self._eos = 0.0  # end of input sequence -- padding value for batches
     self._n_dims = ts_n_dims
     self._normalizer = None
     self._target_ar_normalizers = []
示例#14
0
    def predict(self, when_data=None, when=None):
        """
        Predict given when conditions.

        :param when_data: pandas.DataFrame
        :param when: dict

        :return: pandas.DataFrame
        """
        device, _available_devices = get_devices()
        log.info(f'Computing device used: {device}')
        if when is not None:
            when_dict = {key: [when[key]] for key in when}
            when_data = pandas.DataFrame(when_dict)

        when_data_ds = DataSource(when_data, self.config, prepare_encoders=False)

        when_data_ds.eval()

        kwargs = {'include_extra_data': self.config.get('include_extra_data', False)}

        return self._mixer.predict(when_data_ds, **kwargs)
示例#15
0
    def __init__(self, dynamic_parameters, input_size=None, output_size=None, nr_outputs=None, shape=None, selfaware=False, size_parameters={}, pretrained_net=None, deterministic=False):
        self.input_size = input_size
        self.output_size = output_size
        self.nr_outputs = nr_outputs

        self.selfaware = selfaware
        # How many devices we can train this network on
        self.available_devices = 1
        self.max_variance = None

        self.device, _ = get_devices()

        if deterministic:
            '''
                Seed that always has the same value on the same dataset plus setting the bellow CUDA options
                In order to make sure pytorch randomly generate number will be the same every time
                when training on the same dataset
            '''
            torch.manual_seed(66)

            if 'cuda' in str(self.device):
                torch.backends.cudnn.deterministic = True
                torch.backends.cudnn.benchmark = False
                self.available_devices = torch.cuda.device_count()
            else:
                self.available_devices = 1

        self.dynamic_parameters = dynamic_parameters

        """
        Here we define the basic building blocks of our model,
        in forward we define how we put it all together along with an input
        """
        super(DefaultNet, self).__init__()

        if shape is None and pretrained_net is None:
            shape = [self.input_size, max([self.input_size*2,self.output_size*2,400]), self.output_size]

        if pretrained_net is None:
            logging.info(f'Building network of shape: {shape}')
            rectifier = torch.nn.SELU  #alternative: torch.nn.ReLU

            layers = []
            for ind in range(len(shape) - 1):
                linear_function = PLinear if CONFIG.USE_PROBABILISTIC_LINEAR else torch.nn.Linear
                layers.append(linear_function(shape[ind],shape[ind+1]))
                if ind < len(shape) - 2:
                    layers.append(rectifier())

            self.net = torch.nn.Sequential(*layers)
        else:
            self.net = pretrained_net
            for layer in self.net:
                if isinstance(layer, torch.nn.Linear):
                    if self.input_size is None:
                        self.input_size = layer.in_features
                    self.output_size = layer.out_features

        if self.selfaware:
            awareness_net_shape = [(self.input_size + self.output_size), max([int((self.input_size + self.output_size) * 1.5), 300]), self.nr_outputs]
            awareness_layers = []


            for ind in range(len(awareness_net_shape) - 1):
                rectifier = torch.nn.SELU  #alternative: torch.nn.ReLU
                awareness_layers.append(torch.nn.Linear(awareness_net_shape[ind], awareness_net_shape[ind + 1]))
                if ind < len(awareness_net_shape) - 2:
                    awareness_layers.append(rectifier())

            self.awareness_net = torch.nn.Sequential(*awareness_layers)

        if deterministic and pretrained_net is None: # set initial weights based on a specific distribution if we have deterministic enabled
            # lambda function so that we can do this for either awareness layer or the internal layers of net
            def reset_layer_params(layer):
                if isinstance(layer, torch.nn.Linear):
                    torch.nn.init.normal_(layer.weight, mean=0., std=1 / math.sqrt(layer.out_features))
                    torch.nn.init.normal_(layer.bias, mean=0., std=0.1)

                elif isinstance(layer, PLinear):
                    torch.nn.init.normal_(layer.mean, mean=0., std=1 / math.sqrt(layer.out_features))
                    torch.nn.init.normal_(layer.bias, mean=0., std=0.1)

            if self.selfaware:
                for layer in self.awareness_net:
                    reset_layer_params(layer)

            for layer in self.net:
                reset_layer_params(layer)

        self.net = self.net.to(self.device)
        if self.available_devices > 1:
            self._foward_net = torch.nn.DataParallel(self.net)
        else:
            self._foward_net = self.net

        if self.selfaware:
            self.awareness_net = self.awareness_net.to(self.device)
            if self.available_devices > 1:
                self._foward_awareness_net = torch.nn.DataParallel(self.awareness_net)
            else:
                self._foward_awareness_net = self.awareness_net
示例#16
0
    def __init__(self,
                 dynamic_parameters,
                 input_size=None,
                 output_size=None,
                 nr_outputs=None,
                 shape=None,
                 dropout=None,
                 pretrained_net=None):
        self.input_size = input_size
        self.output_size = output_size
        self.nr_outputs = nr_outputs
        self.max_variance = None
        # How many devices we can train this network on
        self.available_devices = 1

        self.device, _ = get_devices()
        self.dynamic_parameters = dynamic_parameters
        """
        Here we define the basic building blocks of our model,
        in forward we define how we put it all together along with an input
        """
        super(DefaultNet, self).__init__()

        if shape is None and pretrained_net is None:
            shape = [
                self.input_size,
                max([self.input_size * 2, self.output_size * 2, 400]),
                self.output_size
            ]

        if pretrained_net is None:
            log.info(f'Building network of shape: {shape}')
            rectifier = torch.nn.SELU  #alternative: torch.nn.ReLU

            layers = []
            for ind in range(len(shape) - 1):
                if (dropout is not None) and (0 < ind < len(shape)):
                    layers.append(torch.nn.Dropout(p=dropout))
                linear_function = PLinear if CONFIG.USE_PROBABILISTIC_LINEAR else torch.nn.Linear
                layers.append(linear_function(shape[ind], shape[ind + 1]))
                if ind < len(shape) - 2:
                    layers.append(rectifier())

            self.net = torch.nn.Sequential(*layers)
        else:
            self.net = pretrained_net
            for layer in self.net:
                if isinstance(layer, torch.nn.Linear):
                    if self.input_size is None:
                        self.input_size = layer.in_features
                    self.output_size = layer.out_features

        self.net = self.net.to(self.device)

        if 'cuda' in str(self.device):
            self.available_devices = torch.cuda.device_count()
        else:
            self.available_devices = 1

        if self.available_devices > 1:
            self._foward_net = torch.nn.DataParallel(self.net)
        else:
            self._foward_net = self.net
示例#17
0
import torchvision
import numpy as np
import torch
import sys

if sys.version_info < (3, 6):
    sys.exit('Sorry, For Lightwood Python < 3.6 is not supported')

from lightwood.__about__ import __package_name__ as name, __version__
from lightwood.api.predictor import Predictor
import lightwood.model_building
import lightwood.constants.lightwood as CONST
from lightwood.helpers.device import get_devices

COLUMN_DATA_TYPES = CONST.COLUMN_DATA_TYPES

# fix random seed for reproducibility
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
if 'cuda' in str(get_devices()[0]):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False