def _splitext(p: AnyStr_, sep: AnyStr_, altsep: AnyStr_, extsep: AnyStr_) -> Tuple_[AnyStr_, AnyStr_]: """Split the extension from a pathname. Extension is everything from the last dot to the end, ignoring leading dots. Returns "(root, ext)"; ext may be empty.""" # NOTE: This code must work for text and bytes strings. sepIndex = p.rfind(sep) if altsep: altsepIndex = p.rfind(altsep) sepIndex = max(sepIndex, altsepIndex) dotIndex = p.rfind(extsep) if dotIndex > sepIndex: # skip all leading dots filenameIndex = sepIndex + 1 while filenameIndex < dotIndex: if p[filenameIndex:filenameIndex+1] != extsep: return p[:dotIndex], p[dotIndex:] filenameIndex += 1 return p, p[:0]
def deserialize(self, s: t.AnyStr) -> Deck: maindeck = Multiset() sideboard = Multiset() pattern = re.compile('({}\s+)?(\d+) \[([A-Z0-9]*)\] (.*?)\s*$'.format( self._sideboard_indicator.rstrip())) for ln in s.split('\n'): m = pattern.match(ln) if m: is_sideboard, qty, expansion, name = m.groups() (sideboard if is_sideboard else maindeck).add( self._get_printing(name.replace('/', '//'), expansion), int(qty), ) return Deck( maindeck, sideboard, )
def _detect_encoding_by_bom( sample: typing.AnyStr, default: typing.Optional[str] = None) -> typing.Optional[str]: """ Detects the encoding of a `sample` string, among various Unicode variants, by looking at the BOM (Byte Order Mark) as defined in the `codecs` module. """ # JSON always starts with two ASCII characters, so detection is as # easy as counting the nulls and from their location and count # determine the encoding. Also detect a BOM, if present. sample = sample[:4] if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): return 'utf-32' # BOM included if sample[:3] == codecs.BOM_UTF8: return 'utf-8-sig' # BOM included, MS style (discouraged) if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): return 'utf-16' # BOM included nullcount = sample.count(_null) if nullcount == 0: return default if nullcount == 2: if sample[::2] == _null2: # 1st and 3rd are null return 'utf-16-be' if sample[1::2] == _null2: # 2nd and 4th are null return 'utf-16-le' # Did not detect 2 valid UTF-16 ascii-range characters if nullcount == 3: if sample[:3] == _null3: return 'utf-32-be' if sample[1:] == _null3: return 'utf-32-le' # Did not detect a valid UTF-32 ascii-range character return default
def __encode_fields( self, fields: typing.Dict[str, str], request_method: typing.AnyStr, ): """ create request string """ sep = ',' encoded_string = [] request_method = request_method.upper() for k in fields: encoded_string.append(k + ':"' + fields[k] + '"') encoded_string = "{" + sep.join(encoded_string) + "}" if 'GET' == request_method: encoded_string = base_64_encode(encoded_string) return encoded_string
def iupac_replace(sequence: typing.AnyStr): iupac_regex = { 'M': '[AC]', 'R': '[AG]', 'W': '[AT]', 'S': '[CG]', 'Y': '[CT]', 'K': '[GT]', 'V': '[ACG]', 'H': '[ACT]', 'D': '[AGT]', 'B': '[CGT]', 'X': '[ACGT]', 'N': '[ACGT]' } for i, j in iupac_regex.items(): sequence = sequence.replace(i, j) if debug: print(sequence) return compile(sequence)
def should_descend(self, path: ty.AnyStr) -> bool: for idx, label in enumerate(path.split(self._sep)): # Always descend into any directory below a recursive pattern as we # cannot predict what we will later do a tail match on if self._pat[idx] is None: return True # Do not descend further if we reached the last label of the pattern # (unless the final pattern label is a recursive match, see above) # # This is independent of whether this *directory* will be included # or not. if idx == (len(self._pat) - 1): return False # Match the current pattern to decide whether to keep looking or not if not self._pat[idx].match(label): return False # The given path matched part of this pattern, so we should include this # directory to go further return True
def _translate_player_input(player_input: t.AnyStr) -> t.AnyStr: """ >>> _translate_player_input('w') 'player_up' >>> _translate_player_input('s') 'player_down' >>> _translate_player_input('a') 'player_left' >>> _translate_player_input('d') 'player_right' >>> _translate_player_input('q') 'player_quit' >>> _translate_player_input('W') 'player_up' >>> _translate_player_input('foo') 'unknown' """ player_input = player_input.lower() if player_input in ['w']: return PlayerCommands.PLAYER_UP if player_input in ['s']: return PlayerCommands.PLAYER_DOWN if player_input in ['a']: return PlayerCommands.PLAYER_LEFT if player_input in ['d']: return PlayerCommands.PLAYER_RIGHT if player_input in ['b']: return PlayerCommands.SHOW_BACKPACK if player_input in ['q']: return PlayerCommands.PLAYER_QUIT return PlayerCommands.UNKNOWN
def viz_predictions( predictions_path: typing.AnyStr, dataframe_path: typing.AnyStr, test_config_path: typing.AnyStr, ): """Displays a looping visualization of the GHI predictions saved by the evaluation script. This visualization requires OpenCV3+ ('cv2'), and will loop while refreshing a local window until the program is killed, or 'q' is pressed. The arrow keys allow the user to change which day is being shown. """ assert os.path.isfile(test_config_path) and test_config_path.endswith( ".json"), "invalid test config" with open(test_config_path, "r") as fd: test_config = json.load(fd) stations = test_config["stations"] target_datetimes = test_config["target_datetimes"] start_bound = datetime.datetime.fromisoformat(test_config["start_bound"]) end_bound = datetime.datetime.fromisoformat(test_config["end_bound"]) horiz_deltas = [ pd.Timedelta(d).to_pytimedelta() for d in test_config["target_time_offsets"] ] assert os.path.isfile( predictions_path), f"invalid preds file path: {predictions_path}" with open(predictions_path, "r") as fd: predictions = fd.readlines() assert len(predictions) == len(target_datetimes) * len(stations), \ "predicted ghi sequence count mistmatch wrt target datetimes x station count" assert len(predictions) % len(stations) == 0 predictions = np.asarray( [float(ghi) for p in predictions for ghi in p.split(",")]) predictions = predictions.reshape( (len(stations), len(target_datetimes), -1)) pred_horiz = predictions.shape[-1] target_datetimes = pd.DatetimeIndex( [datetime.datetime.fromisoformat(t) for t in target_datetimes]) assert os.path.isfile( dataframe_path), f"invalid dataframe path: {dataframe_path}" dataframe = pd.read_pickle(dataframe_path) dataframe = dataframe[dataframe.index >= start_bound] dataframe = dataframe[dataframe.index < end_bound] assert dataframe.index.get_loc( start_bound) == 0, "invalid start bound (should land at first index)" assert len(dataframe.index.intersection(target_datetimes)) == len(target_datetimes), \ "bad dataframe target datetimes overlap, index values are missing" # we will display 24-hour slices with some overlap (configured via hard-coded param below) time_window, time_overlap, time_sample = \ datetime.timedelta(hours=24), datetime.timedelta(hours=3), datetime.timedelta(minutes=15) assert len(dataframe.asfreq("15min").index) == len(dataframe.index), \ "invalid dataframe index padding (should have an entry every 15 mins)" sample_count = ((time_window + 2 * time_overlap) // time_sample) + 1 day_count = int(math.ceil((end_bound - start_bound) / time_window)) clearsky_ghi_data = np.full((day_count, len(stations), sample_count), fill_value=float("nan"), dtype=np.float32) station_ghi_data = np.full((day_count, len(stations), sample_count), fill_value=float("nan"), dtype=np.float32) pred_ghi_data = np.full( (day_count, len(stations), pred_horiz, sample_count), fill_value=float("nan"), dtype=np.float32) days_range = pd.date_range(start_bound, end_bound, freq=time_window, closed="left") for day_idx, day_start in enumerate( tqdm.tqdm(days_range, desc="preparing daytime GHI intervals")): window_start, window_end = day_start - time_overlap, day_start + time_window + time_overlap sample_start, sample_end = ( window_start - start_bound) // time_sample, ( window_end - start_bound) // time_sample for sample_iter_idx, sample_idx in enumerate( range(sample_start, sample_end + 1)): if sample_idx < 0 or sample_idx >= len(dataframe.index): continue sample_row = dataframe.iloc[sample_idx] sample_time = window_start + sample_iter_idx * time_sample target_iter_idx = target_datetimes.get_loc( sample_time) if sample_time in target_datetimes else None for station_idx, station_name in enumerate(stations): clearsky_ghi_data[day_idx, station_idx, sample_iter_idx] = sample_row[ station_name + "_CLEARSKY_GHI"] station_ghi_data[day_idx, station_idx, sample_iter_idx] = sample_row[station_name + "_GHI"] if target_iter_idx is not None: pred_ghi_data[day_idx, station_idx, :, sample_iter_idx] = predictions[ station_idx, target_iter_idx] displays = [] for day_idx, day_start in enumerate( tqdm.tqdm(days_range, desc="preparing plots")): displays.append( draw_daily_ghi( clearsky_ghi=clearsky_ghi_data[day_idx], station_ghi=station_ghi_data[day_idx], pred_ghi=pred_ghi_data[day_idx], stations=stations, horiz_deltas=horiz_deltas, window_start=(day_start - time_overlap), window_end=(day_start + time_window + time_overlap), sample_step=time_sample, )) display = np.stack(displays) day_idx = 0 while True: cv.imshow("ghi", display[day_idx]) ret = cv.waitKey(100) if ret == ord('q') or ret == 27: # q or ESC break elif ret == 81 or ret == 84: # UNIX: left or down arrow day_idx = max(day_idx - 1, 0) elif ret == 82 or ret == 83: # UNIX: right or up arrow day_idx = min(day_idx + 1, len(displays) - 1)
def __init__(self, pat: ty.AnyStr, *, period_special: bool = True): """ Arguments --------- pat The glob pattern to use for matching period_special Whether a leading period in file/directory names should be matchable by ``*``, ``?`` and ``[…]`` – traditionally they are not, but many modern shells allow one to disable this behaviour """ self.period_special = period_special # type: bool self._sep = utils.maybe_fsencode(os.path.sep, pat) # type: ty.AnyStr dblstar = utils.maybe_fsencode("**", pat) # type: ty.AnyStr dot = utils.maybe_fsencode(".", pat) # type: ty.AnyStr pat_ndot = utils.maybe_fsencode(r"(?![.])", pat) # type: ty.AnyStr # Normalize path separator if os.path.altsep: pat = pat.replace(utils.maybe_fsencode(os.path.altsep, pat), self._sep) # Sanity checks for stuff that will definitely NOT EVER match # (there is another one in the loop below) assert not os.path.isabs( pat), "Absolute matching patterns will never match" # Note the extra final slash for its effect of only matching directories # # (TBH, I find it hard to see how that is useful, but everybody does it # and it keeps things consistent overall – something to only match files # would be nice however.) self._dir_only = pat.endswith(self._sep) # type: bool self._pat = [] # type: ty.List[ty.Optional[re_pattern_t]] for label in pat.split(self._sep): # Skip over useless path components if len(label) < 1 or label == dot: continue assert label != dot + dot, 'Matching patterns containing ".." will never match' if label == dblstar: self._pat.append(None) elif dblstar in label: raise NotImplementedError( "Using double-star (**) and other characters in the same glob " "path label ({0}) is not currently supported – please do file " "an issue if you need this!".format(os.fsdecode(label))) else: #re_expr: ty.AnyStr if not isinstance(label, bytes): re_expr = fnmatch.translate(label) else: re_expr = fnmatch.translate( label.decode("latin-1")).encode("latin-1") if period_special and not label.startswith(dot): re_expr = pat_ndot + re_expr self._pat.append(re.compile(re_expr))
def _fixed_smithsonian_id(id: typing.AnyStr) -> typing.AnyStr: id_no_n2t = id.removeprefix("http://n2t.net/") return id_no_n2t
def _is_path(path: typing.AnyStr) -> bool: """Check if the csv file is a valid path""" return (isinstance(path, pathlib.Path) and path.exists() and path.is_file() and path.suffix.lower() == ".csv")
def charCountBiggerEqualThanX(text: typing.AnyStr, cha: typing.AnyStr): res = text.count(cha) if debug: print(res) return res
def __call__(self, data: typing.AnyStr, **metadata) -> CVIEWSWAPCASE: return "case-swapped text", contentviews.format_text(data.swapcase())
def args_line2dict(argv: typing.AnyStr, output_dict: typing.Dict): r = argv.split('=', maxsplit=1) if len(r) != 2: return output_dict[r[0]] = r[1]
def write(self, s: typing.AnyStr): if isinstance(s, str) and 'w' in self.fd.mode: s = s.encode() self.fd.write(s)
def _encode(value: t.AnyStr, encoding: str = "utf-8") -> bytes: if isinstance(value, str): return value.encode(encoding) return value
def _fixed_sesar_id(id: typing.AnyStr) -> typing.AnyStr: fixed_id = id.replace("igsn", "IGSN") return fixed_id