Exemplos de merge em Python, exemplos de HinetPy.win32.merge em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_win32.py Projeto: zlinahot/HinetPy

    def test_merge_with_deep_level_directory(self):
        datas = sorted(glob.glob(os.path.join(path, "20170101000?0101VM.cnt")))
        final_to_check = os.path.join(path, "0101_201701010000_3.cnt")
        total_data = "test_merge/with/deep/level/directory/output.cnt"

        win32.merge(datas, total_data)
        assert os.path.exists(total_data)
        assert filecmp.cmp(total_data, final_to_check)
        shutil.rmtree("test_merge")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_win32.py Projeto: zlinahot/HinetPy

    def test_merge_with_wildcard(self):
        final_to_check = os.path.join(path, "0101_201701010000_3.cnt")
        total_data = "test_merge_with_wildcard.cnt"

        datas = os.path.join(path, "20170101000?0101VM.cnt")
        win32.merge(datas, total_data)
        assert os.path.exists(total_data)
        assert filecmp.cmp(total_data, final_to_check)
        os.unlink(total_data)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_win32.py Projeto: zlinahot/HinetPy

    def test_merge_without_sort(self):
        datas = sorted(glob.glob(os.path.join(path, "20170101000?0101VM.cnt")))
        final_to_check = os.path.join(path, "0101_201701010000_3.cnt")
        total_data = "test_merge_without_sort.cnt"

        win32.merge(datas, total_data)
        assert os.path.exists(total_data)
        assert filecmp.cmp(total_data, final_to_check)
        os.unlink(total_data)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_win32.py Projeto: seisman/HinetScripts

    def test_merge_with_wildcard(self):
        final_to_check = os.path.join(path, "0101_201701010000_3.cnt")
        total_data = "test_merge_with_wildcard.cnt"

        datas = os.path.join(path, "20170101000?0101VM.cnt")
        win32.merge(datas, total_data)
        assert os.path.exists(total_data)
        assert filecmp.cmp(total_data, final_to_check)
        os.unlink(total_data)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_win32.py Projeto: seisman/HinetScripts

    def test_merge_with_deep_level_directory(self):
        datas = sorted(glob.glob(os.path.join(path, "20170101000?0101VM.cnt")))
        final_to_check = os.path.join(path, "0101_201701010000_3.cnt")
        total_data = "test_merge/with/deep/level/directory/output.cnt"

        win32.merge(datas, total_data)
        assert os.path.exists(total_data)
        assert filecmp.cmp(total_data, final_to_check)
        shutil.rmtree("test_merge")

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_win32.py Projeto: seisman/HinetScripts

    def test_merge_without_sort(self):
        datas = sorted(glob.glob(os.path.join(path, "20170101000?0101VM.cnt")))
        final_to_check = os.path.join(path, "0101_201701010000_3.cnt")
        total_data = "test_merge_without_sort.cnt"

        win32.merge(datas, total_data)
        assert os.path.exists(total_data)
        assert filecmp.cmp(total_data, final_to_check)
        os.unlink(total_data)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_win32.py Projeto: zlinahot/HinetPy

    def test_merge_with_sort(self):
        # datas is unsorted
        datas = glob.glob(os.path.join(path, "20170101000?0101VM.cnt"))[::-1]
        final_to_check = os.path.join(path, "0101_201701010000_3.cnt")
        total_data = "test_merge_with_sort.cnt"

        win32.merge(datas, total_data, force_sort=True)
        assert os.path.exists(total_data)
        assert filecmp.cmp(total_data, final_to_check)
        os.unlink(total_data)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_win32.py Projeto: seisman/HinetScripts

    def test_merge_with_sort(self):
        # datas is unsorted
        datas = glob.glob(os.path.join(path, "20170101000?0101VM.cnt"))[::-1]
        final_to_check = os.path.join(path, "0101_201701010000_3.cnt")
        total_data = "test_merge_with_sort.cnt"

        win32.merge(datas, total_data, force_sort=True)
        assert os.path.exists(total_data)
        assert filecmp.cmp(total_data, final_to_check)
        os.unlink(total_data)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_win32.py Projeto: zlinahot/HinetPy

 def test_merge_not_a_valid_wildcard(self):
     datas = os.path.join(path, "not-a-valid-wildcard.cnt")
     total_data = "test_merge_not_a_valid_wildcard.cnt"
     with pytest.raises(FileNotFoundError):
         win32.merge(datas, total_data)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: client.py Projeto: wangyf/HinetPy

    def get_waveform(self,
                     code,
                     starttime,
                     span,
                     max_span=None,
                     data=None,
                     ctable=None,
                     outdir=None,
                     threads=3):
        '''
        Get waveform from Hi-net server.

        Parameters
        ----------
        code: str
            Network code. See :meth:`~HinetPy.client.Client.info` for details.
        starttime: :py:class:`datetime.datetime` or str
            Starttime of data request.
        span: int
            Time span in minutes.
        max_span: int
            Maximum time span for sub-requests. Defaults to be determined
            automatically.
        data: str
            Filename of downloaded win32 data.
            Default format: CODE_YYYYmmddHHMM_SPAN.cnt
        ctable: str
            Filename of downloaded channel table file.
            Default format: CODE_YYYYmmdd.ch
        outdir: str
            Save win32 and channel table data to specified directory.
            Default is current directory.
        threads: int
            How many threads used to speedup data downloading.

        Returns
        -------
        data: str
            Filename of downloaded win32 data.
        ctable: str
            Filename of downloaded channel table file.

        Examples
        --------
        Request 6 minutes data since 2010-01-01T05:35 (GMT+0900) from Hi-net.

        >>> client.get_waveform('0101', '201001010535', 6)
        ('0101_201001010535_6.cnt', '0101_20100101.ch')

        Several other string formats of ``starttime`` are also supported:

        >>> client.get_waveform('0101', '2010-01-01 05:35', 6)
        >>> client.get_waveform('0101', '2010-01-01T05:35', 6)

        ``starttime`` can be given as :py:class:`datetime.datetime`:

        >>> from datetime import datetime
        >>> starttime = datetime(2010, 1, 1, 5, 35)
        >>> client.get_waveform('0101', starttime, 6)
        ('0101_201001010535_6.cnt', '0101_20100101.ch')

        Request full-day data of 2010-01-01T00:00 (GMT+0900) of F-net:

        >>> client.get_waveform('0103', starttime, 1440, max_span=25)
        ('0103_201001010000_1440.cnt', '0103_20100101.ch')

        Notes
        -----
        **TimeZone**

        All times in HinetPy are in JST (GMT+0900).

        **max_span**

        Hi-net set three limitations of each data request:

        1. Record_Length <= 60 min
        2. Number_of_channels * Record_Length <= 12000 min
        3. Only the latest 150 requested data are kept

        For example, Hi-net network has about 24000 channels. Acoording to
        limitation 2, the record length should be no more than 5 minutes
        in each data request. HinetPy "break" the limitation by splitting
        a long data request into several short sub-requsts.

        **Workflow**

        1. do several checks
        2. split a long request into several short sub-requests
        3. loop over all sub-requests and return data id to download
        4. download all data based on data id
        5. extract all zip files and merge into one win32 format data
        6. cleanup
        '''
        # 1. check span:
        #    max limits is determined by the max number of data points
        #    allowed in code s4win2sacm.c
        if not isinstance(span, int):
            raise TypeError("span must be integer.")
        if not 1 <= span <= (2**31 - 1) / 6000:
            raise ValueError("Span is NOT in the allowed range [1, 357913]")

        # 2. check starttime and endtime
        time0 = NETWORK[code].starttime
        # time1 = UTCTime + JST(GMT+0900) - 2 hour delay
        time1 = datetime.utcnow() + timedelta(hours=9) + timedelta(hours=-2)
        if not isinstance(starttime, datetime):
            starttime = _string2datetime(starttime)
        endtime = starttime + timedelta(minutes=span)
        if not time0 <= starttime < endtime <= time1:
            msg = "Data not available in the time period. " + \
                  "Call Client.info('{}') for help.".format(code)
            raise ValueError(msg)

        # 3. set max_span
        if self._code != code:  # update default max_span
            self._code = code
            self._max_span = self._get_allowed_span(code)
        if not (max_span and 1 <= max_span <= 60):
            max_span = self._max_span

        # 4. prepare jobs
        jobs = prepare_jobs(starttime, span, max_span)

        cnts = []
        ch_euc = set()
        logger.info("%s ~%s", starttime.strftime("%Y-%m-%d %H:%M"), span)
        # 5. request and download
        count = len(jobs)
        for j in range(0, count, 100):  # to break the limitation of 150
            # 5.1. request <=100 data
            for i in range(j, min(j + 100, count)):
                logger.info("[%s/%d] => %s ~%d",
                            str(i + 1).zfill(len(str(count))), count,
                            jobs[i].starttime.strftime("%Y-%m-%d %H:%M"),
                            jobs[i].span)
                jobs[i].id = self._request_waveform(code, jobs[i].starttime,
                                                    jobs[i].span)

            # 5.2. check ids
            if not [job.id for job in jobs]:
                logger.error("No data requested succesuflly. Skipped.")
                return None, None
            # check if all ids are not None
            if not all([job.id for job in jobs]):
                logger.error("Fail to request some data. Skipped.")
                return None, None

            # 5.3. parallel downloading
            with ThreadPool(min(threads, len(jobs))) as p:
                rvalue = p.map(self._download_waveform, jobs)
            for value in rvalue:
                cnts.extend(value[0])
                ch_euc.add(value[1])

        # post processes
        # 1. always sort cnts by name/time to avoid use -s option of catwin32
        cnts = sorted(cnts)
        #    always use the first ctable
        ch_euc = list(sorted(ch_euc))[0]

        # 2. merge all cnt files
        if not data:
            data = "{}_{}_{:d}.cnt".format(code,
                                           starttime.strftime("%Y%m%d%H%M"),
                                           span)
        dirname = None
        if os.path.dirname(data):
            dirname = os.path.dirname(data)
        elif outdir:
            dirname = outdir
            data = os.path.join(dirname, data)
        merge(cnts, data)

        # 3. rename channeltable file
        if not ctable:
            ctable = "{}_{}.ch".format(code, starttime.strftime("%Y%m%d"))

        dirname = None
        if os.path.dirname(ctable):
            dirname = os.path.dirname(ctable)
        elif outdir:
            dirname = outdir
            ctable = os.path.join(dirname, ctable)
        if dirname and not os.path.exists(dirname):
            os.makedirs(dirname, exist_ok=True)
        os.rename(ch_euc, ctable)

        # 4. cleanup
        for cnt in cnts:
            os.remove(cnt)

        return data, ctable