def test_merge_with_deep_level_directory(self): datas = sorted(glob.glob(os.path.join(path, "20170101000?0101VM.cnt"))) final_to_check = os.path.join(path, "0101_201701010000_3.cnt") total_data = "test_merge/with/deep/level/directory/output.cnt" win32.merge(datas, total_data) assert os.path.exists(total_data) assert filecmp.cmp(total_data, final_to_check) shutil.rmtree("test_merge")
def test_merge_with_wildcard(self): final_to_check = os.path.join(path, "0101_201701010000_3.cnt") total_data = "test_merge_with_wildcard.cnt" datas = os.path.join(path, "20170101000?0101VM.cnt") win32.merge(datas, total_data) assert os.path.exists(total_data) assert filecmp.cmp(total_data, final_to_check) os.unlink(total_data)
def test_merge_without_sort(self): datas = sorted(glob.glob(os.path.join(path, "20170101000?0101VM.cnt"))) final_to_check = os.path.join(path, "0101_201701010000_3.cnt") total_data = "test_merge_without_sort.cnt" win32.merge(datas, total_data) assert os.path.exists(total_data) assert filecmp.cmp(total_data, final_to_check) os.unlink(total_data)
def test_merge_with_sort(self): # datas is unsorted datas = glob.glob(os.path.join(path, "20170101000?0101VM.cnt"))[::-1] final_to_check = os.path.join(path, "0101_201701010000_3.cnt") total_data = "test_merge_with_sort.cnt" win32.merge(datas, total_data, force_sort=True) assert os.path.exists(total_data) assert filecmp.cmp(total_data, final_to_check) os.unlink(total_data)
def test_merge_not_a_valid_wildcard(self): datas = os.path.join(path, "not-a-valid-wildcard.cnt") total_data = "test_merge_not_a_valid_wildcard.cnt" with pytest.raises(FileNotFoundError): win32.merge(datas, total_data)
def get_waveform(self, code, starttime, span, max_span=None, data=None, ctable=None, outdir=None, threads=3): ''' Get waveform from Hi-net server. Parameters ---------- code: str Network code. See :meth:`~HinetPy.client.Client.info` for details. starttime: :py:class:`datetime.datetime` or str Starttime of data request. span: int Time span in minutes. max_span: int Maximum time span for sub-requests. Defaults to be determined automatically. data: str Filename of downloaded win32 data. Default format: CODE_YYYYmmddHHMM_SPAN.cnt ctable: str Filename of downloaded channel table file. Default format: CODE_YYYYmmdd.ch outdir: str Save win32 and channel table data to specified directory. Default is current directory. threads: int How many threads used to speedup data downloading. Returns ------- data: str Filename of downloaded win32 data. ctable: str Filename of downloaded channel table file. Examples -------- Request 6 minutes data since 2010-01-01T05:35 (GMT+0900) from Hi-net. >>> client.get_waveform('0101', '201001010535', 6) ('0101_201001010535_6.cnt', '0101_20100101.ch') Several other string formats of ``starttime`` are also supported: >>> client.get_waveform('0101', '2010-01-01 05:35', 6) >>> client.get_waveform('0101', '2010-01-01T05:35', 6) ``starttime`` can be given as :py:class:`datetime.datetime`: >>> from datetime import datetime >>> starttime = datetime(2010, 1, 1, 5, 35) >>> client.get_waveform('0101', starttime, 6) ('0101_201001010535_6.cnt', '0101_20100101.ch') Request full-day data of 2010-01-01T00:00 (GMT+0900) of F-net: >>> client.get_waveform('0103', starttime, 1440, max_span=25) ('0103_201001010000_1440.cnt', '0103_20100101.ch') Notes ----- **TimeZone** All times in HinetPy are in JST (GMT+0900). **max_span** Hi-net set three limitations of each data request: 1. Record_Length <= 60 min 2. Number_of_channels * Record_Length <= 12000 min 3. Only the latest 150 requested data are kept For example, Hi-net network has about 24000 channels. Acoording to limitation 2, the record length should be no more than 5 minutes in each data request. HinetPy "break" the limitation by splitting a long data request into several short sub-requsts. **Workflow** 1. do several checks 2. split a long request into several short sub-requests 3. loop over all sub-requests and return data id to download 4. download all data based on data id 5. extract all zip files and merge into one win32 format data 6. cleanup ''' # 1. check span: # max limits is determined by the max number of data points # allowed in code s4win2sacm.c if not isinstance(span, int): raise TypeError("span must be integer.") if not 1 <= span <= (2**31 - 1) / 6000: raise ValueError("Span is NOT in the allowed range [1, 357913]") # 2. check starttime and endtime time0 = NETWORK[code].starttime # time1 = UTCTime + JST(GMT+0900) - 2 hour delay time1 = datetime.utcnow() + timedelta(hours=9) + timedelta(hours=-2) if not isinstance(starttime, datetime): starttime = _string2datetime(starttime) endtime = starttime + timedelta(minutes=span) if not time0 <= starttime < endtime <= time1: msg = "Data not available in the time period. " + \ "Call Client.info('{}') for help.".format(code) raise ValueError(msg) # 3. set max_span if self._code != code: # update default max_span self._code = code self._max_span = self._get_allowed_span(code) if not (max_span and 1 <= max_span <= 60): max_span = self._max_span # 4. prepare jobs jobs = prepare_jobs(starttime, span, max_span) cnts = [] ch_euc = set() logger.info("%s ~%s", starttime.strftime("%Y-%m-%d %H:%M"), span) # 5. request and download count = len(jobs) for j in range(0, count, 100): # to break the limitation of 150 # 5.1. request <=100 data for i in range(j, min(j + 100, count)): logger.info("[%s/%d] => %s ~%d", str(i + 1).zfill(len(str(count))), count, jobs[i].starttime.strftime("%Y-%m-%d %H:%M"), jobs[i].span) jobs[i].id = self._request_waveform(code, jobs[i].starttime, jobs[i].span) # 5.2. check ids if not [job.id for job in jobs]: logger.error("No data requested succesuflly. Skipped.") return None, None # check if all ids are not None if not all([job.id for job in jobs]): logger.error("Fail to request some data. Skipped.") return None, None # 5.3. parallel downloading with ThreadPool(min(threads, len(jobs))) as p: rvalue = p.map(self._download_waveform, jobs) for value in rvalue: cnts.extend(value[0]) ch_euc.add(value[1]) # post processes # 1. always sort cnts by name/time to avoid use -s option of catwin32 cnts = sorted(cnts) # always use the first ctable ch_euc = list(sorted(ch_euc))[0] # 2. merge all cnt files if not data: data = "{}_{}_{:d}.cnt".format(code, starttime.strftime("%Y%m%d%H%M"), span) dirname = None if os.path.dirname(data): dirname = os.path.dirname(data) elif outdir: dirname = outdir data = os.path.join(dirname, data) merge(cnts, data) # 3. rename channeltable file if not ctable: ctable = "{}_{}.ch".format(code, starttime.strftime("%Y%m%d")) dirname = None if os.path.dirname(ctable): dirname = os.path.dirname(ctable) elif outdir: dirname = outdir ctable = os.path.join(dirname, ctable) if dirname and not os.path.exists(dirname): os.makedirs(dirname, exist_ok=True) os.rename(ch_euc, ctable) # 4. cleanup for cnt in cnts: os.remove(cnt) return data, ctable