Exemplo n.º 1
0
    def test_invalid_input_path(self):
        """test invalid input filepath"""

        Collection = demo_pb2.Collection()

        with pytest.raises(IOError):
            read_protobuf('message', Collection)
Exemplo n.º 2
0
    def test_invalid_input(self):
        """test invalid input pb type"""

        Collection = demo_pb2.Collection()

        with pytest.raises(TypeError):
            read_protobuf(123, Collection)
Exemplo n.º 3
0
def read_sensorburst(media):
    """Download and read sensorburst records.

    Requires:
    - read-protobuf: `pip install read-protobuf`
    - sensorburst_pb2: Download from https://api.weathercitizen.org/static/sensorburst_pb2.py
        - Once downloaded, put this file in the directory as your analysis

    Parameters
    ----------
    media : str, dict, list of str, list of dict
        Media record(s) or media record object id(s) in the media or geomedia collections.

    Returns
    -------
    pd.DataFrame
        Returns pandas dataframe of records
    """

    try:
        from read_protobuf import read_protobuf
    except ImportError:
        raise ImportError(
            "Reading sensorburst requires `read_protobuf` module. Install using `pip install read-protobuf`."
        )

    # import sensorburst definition
    try:
        from podpac.datalib import weathercitizen_sensorburst_pb2 as sensorburst_pb2
    except ImportError:
        try:
            import sensorburst_pb2
        except ImportError:
            raise ImportError(
                "Processing WeatherCitizen protobuf requires `sensorburst_pb2.py` in the current working directory. Download from https://api.weathercitizen.org/static/sensorburst_pb2.py."
            )

    if isinstance(media, (str, dict)):
        media = [media]

    # get pb content
    pbs = [get_file(m) for m in media]

    # initialize protobuf object
    Burst = sensorburst_pb2.Burst()

    # get the first dataframe
    df = read_protobuf(pbs[0], Burst)

    # append later dataframes
    if len(pbs) > 1:
        for pb in pbs[1:]:
            df = df.append(read_protobuf(pb, Burst), sort=False)

    return df
Exemplo n.º 4
0
def read_chunks3(f, chunk_size=4096):
    """Lazy function (generator) to read a file piece by piece.
    Default chunk size: 4096."""
    data = f.read(chunk_size)
    if not data:
        return
    n = 0
    while n < len(data):
        msg_len, new_pos = _DecodeVarint32(data, n)
        if (new_pos + msg_len) > len(data):
            n = new_pos
            # read first part
            msg_buf = data[n:n + msg_len]
            # refill buffer
            data = f.read(chunk_size)
            if not data:
                return
            n = 0
            # read remaining bytes
            new_pos = msg_len - len(msg_buf)
            msg_buf += data[n:n + msg_len - len(msg_buf)]
            n += new_pos
        else:
            n = new_pos
            msg_buf = data[n:n + msg_len]
            n += msg_len
        # refill buffer if necessary
        if not n < len(data):
            data = f.read(chunk_size)
            if not data:
                return
            n = 0
        yield read_protobuf(msg_buf, netcap.TCP())
Exemplo n.º 5
0
    def test_invalid_pb(self):
        """test invalid pb class"""

        Message = write_demo()
        Collection = demo_pb2.Collection()

        Record = demo_pb2.Record()
        Record.int = 1234
        Record.float = 43.685
        Message2 = Record.SerializeToString()

        df = read_protobuf([Message, Message2], Collection)

        assert df is not None and isinstance(df, pd.DataFrame)

        with pytest.raises(ValueError):
            df = read_protobuf([Message2], Collection)
Exemplo n.º 6
0
    def test_read_bytes(self):
        """test input serialized bytes"""

        Message = write_demo()
        Collection = demo_pb2.Collection()
        df = read_protobuf(Message, Collection)

        assert df is not None and isinstance(df, pd.DataFrame)
Exemplo n.º 7
0
    def test_read_file(self):
        """test input file path"""

        path = write_demo_file()
        Collection = demo_pb2.Collection()
        df = read_protobuf(path, Collection)

        assert df is not None and isinstance(df, pd.DataFrame)
Exemplo n.º 8
0
    def test_prefix(self):
        """test prefix option"""

        Message = write_demo()
        Collection = demo_pb2.Collection()

        df = read_protobuf(Message, Collection, prefix_nested=True)

        assert 'nested.data' in df.columns
Exemplo n.º 9
0
    def test_flatten(self):
        """test flatten option"""

        Message = write_demo()
        Collection = demo_pb2.Collection()

        df = read_protobuf(Message, Collection, flatten=False)

        assert 'records' in df.columns and len(df.columns) == 1
Exemplo n.º 10
0
    def test_defaults(self):
        """test default inputs"""

        Message = write_demo()
        Collection = demo_pb2.Collection()

        df = read_protobuf(Message, Collection)

        assert 'data' in df.columns
Exemplo n.º 11
0
    def test_multiple_input_files(self):
        """test input multiple files"""

        path1 = write_demo_file()
        path2 = write_demo_file('demo2.pb')

        Collection = demo_pb2.Collection()

        df = read_protobuf([path1, path2], Collection)

        assert df is not None and isinstance(df, pd.DataFrame)
Exemplo n.º 12
0
    def test_multiple_input_types(self):
        """test input multiple files and strings"""

        Message = write_demo(n=29)
        path2 = write_demo_file('demo2.pb')

        Collection = demo_pb2.Collection()

        df = read_protobuf([Message, path2], Collection)

        assert df is not None and isinstance(df, pd.DataFrame)
Exemplo n.º 13
0
    def test_multiple_input_bytes(self):
        """test input multiple bytes"""

        Message = write_demo(n=29)
        Message2 = write_demo(n=5)

        Collection = demo_pb2.Collection()

        df = read_protobuf([Message, Message2], Collection)

        assert df is not None and isinstance(df, pd.DataFrame)
 def get_list_of_sequences(self):
     protofiles = self.__get_list_of_protofiles()
     df_sequences = {}
     if len(protofiles) == 0:
         self.logger.warning("No files found in directory \n%s ",
                             self.path_to_directory)
         return list()
     for key in protofiles.keys():
         df_sequences[key] = [
             read_protobuf(pb, UserSequence()) for pb in protofiles[key]
         ]
     return df_sequences
Exemplo n.º 15
0
    def read_chunks(self, f, NCType, dataframe, chunk_size=4096):
        """Lazy function (generator) to read a file piece by piece.
        Default chunk size: 4096."""
        
        data = f.read(chunk_size)
        if not data:
            return
        n = 0
        
        while n < len(data):
            msg_len, new_pos = _DecodeVarint32(data, n)
            if (new_pos + msg_len) > len(data):
                
                n = new_pos
                # read first part
                msg_buf = data[n:n+msg_len]
                
                # refill buffer
                data = f.read(chunk_size)
                if not data:
                    return
                n = 0
                # read remaining bytes
                new_pos = msg_len-len(msg_buf)
                msg_buf += data[n:n+msg_len-len(msg_buf)]
                n += new_pos
            else:
                n = new_pos
                msg_buf = data[n:n+msg_len]
                n += msg_len

            # refill buffer if necessary
            if not n < len(data):
                data = f.read(chunk_size)
                if not data:
                    return
                n = 0

            # print(msg_buf, msg_len)
            if not self.gotHeader:
                self.gotHeader = True
            else:
                yield read_protobuf(bytes(msg_buf), NCType, dataframe, index=self.count)
 def get_single_sequence(self, filename):
     path_to_file = Path(
         self.path_to_directory).joinpath(filename).__str__()
     df_sequence = read_protobuf(path_to_file, UserSequence())
     return df_sequence
Exemplo n.º 17
0
 def check_pd_type():
     #We can upload the data using read-protobuf. I will show how I will do it but I wasn't a
     record = recordings_pb2.Recording()
     fields = record.DESCRIPTOR.fields_by_name.keys()
     df = read_protobuf(['10732.pb', '10740.pb', '10742.pb'], Record)
     return df