Пример #1
0
class Writer:
    def __init__(self,
                 profile='default',
                 key=None,
                 crt=None,
                 chain=None,
                 relay=None,
                 port=443,
                 credential_path=None,
                 **kwargs):

        self.profile = profile
        self.key = key
        self.crt = crt
        self.chain = chain
        self.relay = relay
        self.port = port

        if credential_path is None:
            self.credential_path = Path.home() / '.devo_credentials'
        else:
            self.credential_path = Path(credential_path).resolve().expanduser()

        if not all([key, crt, chain, relay]):
            self._read_profile()

        if not all([self.key, self.crt, self.chain, self.relay]):
            raise Exception(
                'Credentials and relay must be specified or in ~/.devo_credentials'
            )

        config_dict = kwargs
        config_dict.update(
            dict(address=self.relay,
                 port=self.port,
                 key=self.key,
                 cert=self.crt,
                 chain=self.chain))

        self.sender = Sender(config_dict)

    def _read_profile(self):

        config = configparser.ConfigParser()
        config.read(self.credential_path)

        if self.profile in config:
            profile_config = config[self.profile]

            self.key = profile_config.get('key')
            self.crt = profile_config.get('crt')
            self.chain = profile_config.get('chain')
            self.relay = profile_config.get('relay')
            self.port = int(profile_config.get('port', 443))

    def load_file(self,
                  file_path,
                  tag,
                  historical=True,
                  ts_index=None,
                  ts_name=None,
                  delimiter=',',
                  header=False,
                  columns=None,
                  linq_func=print):

        with open(file_path, 'r') as f:
            data = csv.reader(f, delimiter=delimiter)
            first = next(data)

            if historical:
                chunk_size = 50
                num_cols = len(first) - 1
            else:
                chunk_size = 1
                num_cols = len(first)

            if header:
                if columns is None:
                    columns = first
                if ts_name is not None:
                    ts_index = columns.index(ts_name)
            else:
                f.seek(0)

            if linq_func is not None:
                linq = self._build_linq(tag, num_cols, columns)
                linq_output = linq_func(linq)
            else:
                linq_output = None

            self._load(data, tag, historical, ts_index, chunk_size)

        return linq_output

    def load(self,
             data,
             tag,
             historical=True,
             ts_index=None,
             ts_name=None,
             columns=None,
             linq_func=print):

        data = iter(data)
        first = next(data)

        if historical:
            chunk_size = 50
            num_cols = len(first) - 1
        else:
            chunk_size = 1
            num_cols = len(first)

        if isinstance(first, abc.Sequence):
            data = self._process_seq(data, first)
        elif isinstance(
                first, (abc.Mapping, np.ndarray,
                        pd.core.series.Series)) and not isinstance(first, str):
            if columns:
                names = columns[:]
            else:
                names = sorted(first)

            if historical and columns:
                names.append(ts_name)
                ts_index = num_cols
            elif historical:
                names.remove(ts_name)
                columns = names[:]
                names.append(ts_name)
                ts_index = num_cols
            else:
                columns = names
            data = self._process_mapping(data, first, names)
        else:
            raise Exception(
                f'data of type {type(first)} is not supported for loading')

        if linq_func is not None:
            linq = self._build_linq(tag, num_cols, columns)
            linq_output = linq_func(linq)
        else:
            linq_output = None

        self._load(data, tag, historical, ts_index, chunk_size)

        return linq_output

    def load_df(self, df, tag, ts_index=None, ts_name=None, linq_func=print):
        data = df.values.tolist()

        if ts_index is None:
            ts_index = df.columns.get_loc(ts_name)

        self.load(data,
                  tag,
                  historical=True,
                  ts_index=ts_index,
                  linq_func=linq_func)

    def _load(self, data, tag, historical, ts_index=None, chunk_size=50):

        message_header_base = self._make_message_header(tag, historical)
        counter = 0
        bulk_msg = ''

        if not historical:
            message_header = message_header_base

        for row in data:

            if historical:
                ts = row.pop(ts_index)
                ts = self._to_ts_string(ts)
                message_header = message_header_base.format(ts)

            bulk_msg += self._make_msg(message_header, row)
            counter += 1

            if counter == chunk_size:
                self.sender.send_raw(bulk_msg.encode())
                counter = 0
                bulk_msg = ''

        if bulk_msg:
            self.sender.send_raw(bulk_msg.encode())

    @staticmethod
    def _make_message_header(tag, historical):
        hostname = socket.gethostname()

        if historical:
            tag = f'(usd){tag}'
            prefix = f'<14>{{0}}'
        else:
            prefix = '<14>Jan  1 00:00:00'

        return f'{prefix} {hostname} {tag}: '

    @staticmethod
    def _make_msg(header, row):
        """
        Takes row (without timestamp)

        Concats column values in row
        calculates string indices of
        where columns start and begin

        :param row: list with column values as strings
        :return: string in form ofL indices<>cols
        """

        lengths = [len(s) for s in row]
        lengths.insert(0, 0)

        indices = np.cumsum(lengths)
        indices = ','.join(str(i) for i in indices)

        row_concated = ''.join(row)

        msg = indices + '<>' + row_concated

        return header + msg + '\n'

    @staticmethod
    def _process_seq(data, first):
        yield [str(c) for c in first]
        for row in data:
            yield [str(c) for c in row]

    @staticmethod
    def _process_mapping(data, first, names):
        yield [str(first[c]) for c in names]
        for row in data:
            yield [str(row[c]) for c in names]

    @staticmethod
    def _build_linq(tag, num_cols=None, columns=None):

        if columns is None:
            columns = ['col_{0}'.format(i) for i in range(num_cols)]

        col_extract = '''
        select substring(payload,
        int(split(indices, ",", {i})),
        int(split(indices, ",", {i}+1)) - int(split(indices, ",", {i}))
        ) as `{col_name}`
        '''

        linq = '''
        from {tag}

        select split(message, "<>", 0) as indices
        select subs(message, re("[0-9,]*<>"), template("")) as payload

        '''.format(tag=tag)

        for i, col_name in enumerate(columns):
            linq += col_extract.format(i=i, col_name=col_name)

        return linq

    def load_multi(self,
                   data,
                   tag_name=None,
                   historical=True,
                   ts_name=None,
                   default_schema=None,
                   schemas=None,
                   linq_func=None):

        data = iter(data)
        first = next(data)

        chunk_size = 50 if historical else 1

        if isinstance(first,
                      (abc.Sequence, np.ndarray,
                       pd.core.series.Series)) and not isinstance(first, str):
            self.processor = ListProcessor(historical, linq_func)
        elif isinstance(first, abc.Mapping):
            self.processor = DictProcessor(schemas, default_schema, historical,
                                           tag_name, ts_name, linq_func)
        else:
            raise Exception(
                f'data of type {type(first)} is not supported for loading')

        data = self.processor.process_data(data, first)
        self._load_multi(data, historical, chunk_size)

    def _load_multi(self, data, historical, chunk_size=50):

        counter = 0
        bulk_msg = ''

        for header, row in data:
            if historical:
                ts, tag = header
                ts = self._to_ts_string(ts)
                message_header = self._make_message_header(
                    tag, historical).format(ts)
            else:
                tag = header[0]
                message_header = self._make_message_header(tag, historical)

            bulk_msg += self._make_msg(message_header, row)
            counter += 1

            if counter == chunk_size:
                self.sender.send_raw(bulk_msg.encode())
                counter = 0
                bulk_msg = ''

        if bulk_msg:
            self.sender.send_raw(bulk_msg.encode())

    @staticmethod
    def _to_ts_string(ts):
        if isinstance(ts, (int, float)):
            ts = pd.to_datetime(ts, unit='s')
        elif isinstance(ts, str):
            ts = pd.to_datetime(ts)
        elif isinstance(ts, (pd.Timestamp, datetime.datetime)):
            ts = ts.replace(tzinfo=None)

        return str(ts)
Пример #2
0
class JSONWriter:
    def __init__(self,
                 profile='default',
                 key=None,
                 crt=None,
                 chain=None,
                 relay=None,
                 port=443,
                 credential_path=None,
                 **kwargs):

        self.profile = profile
        self.key = key
        self.crt = crt
        self.chain = chain
        self.relay = relay
        self.port = port

        if credential_path is None:
            self.credential_path = Path.home() / '.devo_credentials'
        else:
            self.credential_path = Path(credential_path).resolve().expanduser()

        if not all([key, crt, chain, relay]):
            self._read_profile()

        if not all([self.key, self.crt, self.chain, self.relay]):
            raise Exception(
                'Credentials and relay must be specified or in ~/.devo_credentials'
            )

        config_dict = kwargs
        config_dict.update(
            dict(address=self.relay,
                 port=self.port,
                 key=self.key,
                 cert=self.crt,
                 chain=self.chain))

        self.sender = Sender(config_dict)

    def _read_profile(self):

        config = configparser.ConfigParser()
        config.read(self.credential_path)

        if self.profile in config:
            profile_config = config[self.profile]

            self.key = profile_config.get('key')
            self.crt = profile_config.get('crt')
            self.chain = profile_config.get('chain')
            self.relay = profile_config.get('relay')
            self.port = int(profile_config.get('port', 443))

    def load_file(self,
                  file_path,
                  tag,
                  historical=True,
                  ts_index=None,
                  ts_name=None,
                  delimiter=',',
                  header=False,
                  columns=None):

        with open(file_path, 'r') as f:
            data = csv.reader(f, delimiter=delimiter)

            if header:
                if columns is None:
                    columns = next(data)
                else:
                    next(data)

            self.load(data,
                      tag,
                      historical,
                      ts_index=ts_index,
                      ts_name=ts_name,
                      columns=columns)

    def load(self,
             data,
             tag,
             historical=True,
             ts_index=None,
             ts_name=None,
             columns=None):

        data = iter(data)
        first = next(data)

        if historical:
            chunk_size = 50
        else:
            chunk_size = 1

        if isinstance(first,
                      (abc.Sequence, np.ndarray,
                       pd.core.series.Series)) and not isinstance(first, str):
            if (columns is None) and (historical):
                columns = [f'col{i}' for i in range(1, ts_index + 1)] + [
                    'ts'
                ] + [f'col{i}' for i in range(ts_index + 1,
                                              len(first) + 1)]
                ts_name = 'ts'
            elif historical:
                if ts_name is None:
                    ts_name = columns[ts_index]
            elif columns is None:
                columns = [f'col{i}' for i in range(1, len(first) + 1)]
            data = self._process_seq(data, first, columns)
        elif isinstance(first, abc.Mapping):
            data = self._process_mapping(data, first)
        else:
            raise Exception(
                f'data of type {type(first)} is not supported for loading')

        self._load(data, tag, historical, ts_name, chunk_size)

    def load_df(self, df, tag, ts_index=None, ts_name=None):
        if (ts_index is None) and (ts_name is None):
            raise Exception("must specify ts_index or ts_name")

        data = df.to_dict(orient="records")

        if ts_name is None:
            ts_name = df.columns[ts_index]

        self.load(data, tag=tag, historical=True, ts_name=ts_name)

    def _load(self, data, tag, historical, ts_name=None, chunk_size=50):

        message_header_base = self._make_message_header(tag, historical)
        counter = 0
        bulk_msg = ''

        if not historical:
            message_header = message_header_base

        for row in data:

            if historical:
                ts = row.pop(ts_name)
                ts = self._to_ts_string(ts)
                message_header = message_header_base.format(ts)

            bulk_msg += message_header + json.dumps(row, cls=_Encoder) + '\n'
            counter += 1

            if counter == chunk_size:
                self.sender.send_raw(bulk_msg.encode())
                counter = 0
                bulk_msg = ''

        if bulk_msg:
            self.sender.send_raw(bulk_msg.encode())

    @staticmethod
    def _make_message_header(tag, historical):
        hostname = socket.gethostname()

        if historical:
            tag = f'(usd){tag}'
            prefix = f'<14>{{0}}'
        else:
            prefix = '<14>Jan  1 00:00:00'

        return f'{prefix} {hostname} {tag}: '

    @staticmethod
    def _process_seq(data, first, columns):
        yield dict(zip(columns, first))
        for row in data:
            yield dict(zip(columns, row))

    @staticmethod
    def _process_mapping(data, first):
        yield first.copy()
        for row in data:
            yield row.copy()

    def load_multi(self, data, tag_name=None, historical=True, ts_name=None):
        data = iter(data)
        first = next(data)

        if not isinstance(first, abc.Mapping):
            raise Exception(
                f'data of type {type(first)} is not supported for multi loading. data must be a dict'
            )

        chunk_size = 50 if historical else 1

        data = self._process_mapping(data, first)

        counter = 0
        bulk_msg = ''

        for row in data:
            tag = row.pop(tag_name)
            if historical:
                ts = row.pop(ts_name)
                ts = self._to_ts_string(ts)
                message_header = self._make_message_header(
                    tag, historical).format(ts)
            else:
                message_header = self._make_message_header(tag, historical)

            bulk_msg += message_header + json.dumps(row) + '\n'
            counter += 1

            if counter == chunk_size:
                self.sender.send_raw(bulk_msg.encode())
                counter = 0
                bulk_msg = ''

        if bulk_msg:
            self.sender.send_raw(bulk_msg.encode())

    def load_df_multi(self,
                      df,
                      tag_name=None,
                      tag_index=None,
                      ts_index=None,
                      ts_name=None):
        if (ts_index is None) and (ts_name is None):
            raise Exception("must specify ts_index or ts_name")
        if (tag_index is None) and (tag_name is None):
            raise Exception("must specify tag_index or tag_name")

        data = df.to_dict(orient="records")

        if ts_name is None:
            ts_name = df.columns[ts_index]
        if tag_name is None:
            tag_name = df.columns[tag_index]

        self.load_multi(data,
                        tag_name=tag_name,
                        historical=True,
                        ts_name=ts_name)

    @staticmethod
    def _to_ts_string(ts):
        if isinstance(ts, (int, float, np.integer)):
            ts = pd.to_datetime(ts, unit='s')
        elif isinstance(ts, str):
            ts = pd.to_datetime(ts)
        elif isinstance(ts, (pd.Timestamp, datetime.datetime)):
            ts = ts.replace(tzinfo=None)

        return str(ts)