Пример #1
0
def _splitext(p: AnyStr_, sep: AnyStr_, altsep: AnyStr_,
              extsep: AnyStr_) -> Tuple_[AnyStr_, AnyStr_]:
    """Split the extension from a pathname.

    Extension is everything from the last dot to the end, ignoring
    leading dots.  Returns "(root, ext)"; ext may be empty."""
    # NOTE: This code must work for text and bytes strings.

    sepIndex = p.rfind(sep)
    if altsep:
        altsepIndex = p.rfind(altsep)
        sepIndex = max(sepIndex, altsepIndex)

    dotIndex = p.rfind(extsep)
    if dotIndex > sepIndex:
        # skip all leading dots
        filenameIndex = sepIndex + 1
        while filenameIndex < dotIndex:
            if p[filenameIndex:filenameIndex+1] != extsep:
                return p[:dotIndex], p[dotIndex:]
            filenameIndex += 1

    return p, p[:0]
Пример #2
0
    def deserialize(self, s: t.AnyStr) -> Deck:
        maindeck = Multiset()
        sideboard = Multiset()
        pattern = re.compile('({}\s+)?(\d+) \[([A-Z0-9]*)\] (.*?)\s*$'.format(
            self._sideboard_indicator.rstrip()))
        for ln in s.split('\n'):
            m = pattern.match(ln)
            if m:
                is_sideboard, qty, expansion, name = m.groups()
                (sideboard if is_sideboard else maindeck).add(
                    self._get_printing(name.replace('/', '//'), expansion),
                    int(qty),
                )

        return Deck(
            maindeck,
            sideboard,
        )
Пример #3
0
def _detect_encoding_by_bom(
        sample: typing.AnyStr,
        default: typing.Optional[str] = None) -> typing.Optional[str]:
    """
    Detects the encoding of a `sample` string, among various Unicode
    variants, by looking at the BOM (Byte Order Mark) as defined in
    the `codecs` module.
    """

    # JSON always starts with two ASCII characters, so detection is as
    # easy as counting the nulls and from their location and count
    # determine the encoding. Also detect a BOM, if present.

    sample = sample[:4]

    if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
        return 'utf-32'  # BOM included
    if sample[:3] == codecs.BOM_UTF8:
        return 'utf-8-sig'  # BOM included, MS style (discouraged)
    if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
        return 'utf-16'  # BOM included

    nullcount = sample.count(_null)

    if nullcount == 0:
        return default

    if nullcount == 2:
        if sample[::2] == _null2:  # 1st and 3rd are null
            return 'utf-16-be'
        if sample[1::2] == _null2:  # 2nd and 4th are null
            return 'utf-16-le'
        # Did not detect 2 valid UTF-16 ascii-range characters

    if nullcount == 3:
        if sample[:3] == _null3:
            return 'utf-32-be'
        if sample[1:] == _null3:
            return 'utf-32-le'
        # Did not detect a valid UTF-32 ascii-range character

    return default
    def __encode_fields(
        self,
        fields: typing.Dict[str, str],
        request_method: typing.AnyStr,
    ):
        """ create request string """

        sep = ','
        encoded_string = []
        request_method = request_method.upper()

        for k in fields:
            encoded_string.append(k + ':"' + fields[k] + '"')

        encoded_string = "{" + sep.join(encoded_string) + "}"

        if 'GET' == request_method:
            encoded_string = base_64_encode(encoded_string)

        return encoded_string
Пример #5
0
def iupac_replace(sequence: typing.AnyStr):
    iupac_regex = {
        'M': '[AC]',
        'R': '[AG]',
        'W': '[AT]',
        'S': '[CG]',
        'Y': '[CT]',
        'K': '[GT]',
        'V': '[ACG]',
        'H': '[ACT]',
        'D': '[AGT]',
        'B': '[CGT]',
        'X': '[ACGT]',
        'N': '[ACGT]'
    }
    for i, j in iupac_regex.items():
        sequence = sequence.replace(i, j)
    if debug:
        print(sequence)
    return compile(sequence)
Пример #6
0
    def should_descend(self, path: ty.AnyStr) -> bool:
        for idx, label in enumerate(path.split(self._sep)):
            # Always descend into any directory below a recursive pattern as we
            # cannot predict what we will later do a tail match on
            if self._pat[idx] is None:
                return True

            # Do not descend further if we reached the last label of the pattern
            # (unless the final pattern label is a recursive match, see above)
            #
            # This is independent of whether this *directory* will be included
            # or not.
            if idx == (len(self._pat) - 1):
                return False

            # Match the current pattern to decide whether to keep looking or not
            if not self._pat[idx].match(label):
                return False

        # The given path matched part of this pattern, so we should include this
        # directory to go further
        return True
Пример #7
0
def _translate_player_input(player_input: t.AnyStr) -> t.AnyStr:
    """
    >>> _translate_player_input('w')
    'player_up'
    >>> _translate_player_input('s')
    'player_down'
    >>> _translate_player_input('a')
    'player_left'
    >>> _translate_player_input('d')
    'player_right'
    >>> _translate_player_input('q')
    'player_quit'
    >>> _translate_player_input('W')
    'player_up'
    >>> _translate_player_input('foo')
    'unknown'
    """
    player_input = player_input.lower()

    if player_input in ['w']:
        return PlayerCommands.PLAYER_UP

    if player_input in ['s']:
        return PlayerCommands.PLAYER_DOWN

    if player_input in ['a']:
        return PlayerCommands.PLAYER_LEFT

    if player_input in ['d']:
        return PlayerCommands.PLAYER_RIGHT

    if player_input in ['b']:
        return PlayerCommands.SHOW_BACKPACK

    if player_input in ['q']:
        return PlayerCommands.PLAYER_QUIT

    return PlayerCommands.UNKNOWN
Пример #8
0
def viz_predictions(
    predictions_path: typing.AnyStr,
    dataframe_path: typing.AnyStr,
    test_config_path: typing.AnyStr,
):
    """Displays a looping visualization of the GHI predictions saved by the evaluation script.
    This visualization requires OpenCV3+ ('cv2'), and will loop while refreshing a local window until the program
    is killed, or 'q' is pressed. The arrow keys allow the user to change which day is being shown.
    """
    assert os.path.isfile(test_config_path) and test_config_path.endswith(
        ".json"), "invalid test config"
    with open(test_config_path, "r") as fd:
        test_config = json.load(fd)
    stations = test_config["stations"]
    target_datetimes = test_config["target_datetimes"]
    start_bound = datetime.datetime.fromisoformat(test_config["start_bound"])
    end_bound = datetime.datetime.fromisoformat(test_config["end_bound"])
    horiz_deltas = [
        pd.Timedelta(d).to_pytimedelta()
        for d in test_config["target_time_offsets"]
    ]
    assert os.path.isfile(
        predictions_path), f"invalid preds file path: {predictions_path}"
    with open(predictions_path, "r") as fd:
        predictions = fd.readlines()
    assert len(predictions) == len(target_datetimes) * len(stations), \
        "predicted ghi sequence count mistmatch wrt target datetimes x station count"
    assert len(predictions) % len(stations) == 0
    predictions = np.asarray(
        [float(ghi) for p in predictions for ghi in p.split(",")])
    predictions = predictions.reshape(
        (len(stations), len(target_datetimes), -1))
    pred_horiz = predictions.shape[-1]
    target_datetimes = pd.DatetimeIndex(
        [datetime.datetime.fromisoformat(t) for t in target_datetimes])
    assert os.path.isfile(
        dataframe_path), f"invalid dataframe path: {dataframe_path}"
    dataframe = pd.read_pickle(dataframe_path)
    dataframe = dataframe[dataframe.index >= start_bound]
    dataframe = dataframe[dataframe.index < end_bound]
    assert dataframe.index.get_loc(
        start_bound) == 0, "invalid start bound (should land at first index)"
    assert len(dataframe.index.intersection(target_datetimes)) == len(target_datetimes), \
        "bad dataframe target datetimes overlap, index values are missing"
    # we will display 24-hour slices with some overlap (configured via hard-coded param below)
    time_window, time_overlap, time_sample = \
        datetime.timedelta(hours=24), datetime.timedelta(hours=3), datetime.timedelta(minutes=15)
    assert len(dataframe.asfreq("15min").index) == len(dataframe.index), \
        "invalid dataframe index padding (should have an entry every 15 mins)"
    sample_count = ((time_window + 2 * time_overlap) // time_sample) + 1
    day_count = int(math.ceil((end_bound - start_bound) / time_window))
    clearsky_ghi_data = np.full((day_count, len(stations), sample_count),
                                fill_value=float("nan"),
                                dtype=np.float32)
    station_ghi_data = np.full((day_count, len(stations), sample_count),
                               fill_value=float("nan"),
                               dtype=np.float32)
    pred_ghi_data = np.full(
        (day_count, len(stations), pred_horiz, sample_count),
        fill_value=float("nan"),
        dtype=np.float32)
    days_range = pd.date_range(start_bound,
                               end_bound,
                               freq=time_window,
                               closed="left")
    for day_idx, day_start in enumerate(
            tqdm.tqdm(days_range, desc="preparing daytime GHI intervals")):
        window_start, window_end = day_start - time_overlap, day_start + time_window + time_overlap
        sample_start, sample_end = (
            window_start - start_bound) // time_sample, (
                window_end - start_bound) // time_sample
        for sample_iter_idx, sample_idx in enumerate(
                range(sample_start, sample_end + 1)):
            if sample_idx < 0 or sample_idx >= len(dataframe.index):
                continue
            sample_row = dataframe.iloc[sample_idx]
            sample_time = window_start + sample_iter_idx * time_sample
            target_iter_idx = target_datetimes.get_loc(
                sample_time) if sample_time in target_datetimes else None
            for station_idx, station_name in enumerate(stations):
                clearsky_ghi_data[day_idx, station_idx,
                                  sample_iter_idx] = sample_row[
                                      station_name + "_CLEARSKY_GHI"]
                station_ghi_data[day_idx, station_idx,
                                 sample_iter_idx] = sample_row[station_name +
                                                               "_GHI"]
                if target_iter_idx is not None:
                    pred_ghi_data[day_idx, station_idx, :,
                                  sample_iter_idx] = predictions[
                                      station_idx, target_iter_idx]
    displays = []
    for day_idx, day_start in enumerate(
            tqdm.tqdm(days_range, desc="preparing plots")):
        displays.append(
            draw_daily_ghi(
                clearsky_ghi=clearsky_ghi_data[day_idx],
                station_ghi=station_ghi_data[day_idx],
                pred_ghi=pred_ghi_data[day_idx],
                stations=stations,
                horiz_deltas=horiz_deltas,
                window_start=(day_start - time_overlap),
                window_end=(day_start + time_window + time_overlap),
                sample_step=time_sample,
            ))
    display = np.stack(displays)
    day_idx = 0
    while True:
        cv.imshow("ghi", display[day_idx])
        ret = cv.waitKey(100)
        if ret == ord('q') or ret == 27:  # q or ESC
            break
        elif ret == 81 or ret == 84:  # UNIX: left or down arrow
            day_idx = max(day_idx - 1, 0)
        elif ret == 82 or ret == 83:  # UNIX: right or up arrow
            day_idx = min(day_idx + 1, len(displays) - 1)
Пример #9
0
    def __init__(self, pat: ty.AnyStr, *, period_special: bool = True):
        """
		Arguments
		---------
		pat
			The glob pattern to use for matching
		period_special
			Whether a leading period in file/directory names should be matchable by
			``*``, ``?`` and ``[…]`` – traditionally they are not, but many modern
			shells allow one to disable this behaviour
		"""
        self.period_special = period_special  # type: bool

        self._sep = utils.maybe_fsencode(os.path.sep, pat)  # type: ty.AnyStr
        dblstar = utils.maybe_fsencode("**", pat)  # type: ty.AnyStr
        dot = utils.maybe_fsencode(".", pat)  # type: ty.AnyStr
        pat_ndot = utils.maybe_fsencode(r"(?![.])", pat)  # type: ty.AnyStr

        # Normalize path separator
        if os.path.altsep:
            pat = pat.replace(utils.maybe_fsencode(os.path.altsep, pat),
                              self._sep)

        # Sanity checks for stuff that will definitely NOT EVER match
        # (there is another one in the loop below)
        assert not os.path.isabs(
            pat), "Absolute matching patterns will never match"

        # Note the extra final slash for its effect of only matching directories
        #
        # (TBH, I find it hard to see how that is useful, but everybody does it
        #  and it keeps things consistent overall – something to only match files
        #  would be nice however.)
        self._dir_only = pat.endswith(self._sep)  # type: bool

        self._pat = []  # type: ty.List[ty.Optional[re_pattern_t]]
        for label in pat.split(self._sep):
            # Skip over useless path components
            if len(label) < 1 or label == dot:
                continue

            assert label != dot + dot, 'Matching patterns containing ".." will never match'

            if label == dblstar:
                self._pat.append(None)
            elif dblstar in label:
                raise NotImplementedError(
                    "Using double-star (**) and other characters in the same glob "
                    "path label ({0}) is not currently supported – please do file "
                    "an issue if you need this!".format(os.fsdecode(label)))
            else:
                #re_expr: ty.AnyStr
                if not isinstance(label, bytes):
                    re_expr = fnmatch.translate(label)
                else:
                    re_expr = fnmatch.translate(
                        label.decode("latin-1")).encode("latin-1")

                if period_special and not label.startswith(dot):
                    re_expr = pat_ndot + re_expr
                self._pat.append(re.compile(re_expr))
Пример #10
0
def _fixed_smithsonian_id(id: typing.AnyStr) -> typing.AnyStr:
    id_no_n2t = id.removeprefix("http://n2t.net/")
    return id_no_n2t
Пример #11
0
def _is_path(path: typing.AnyStr) -> bool:
    """Check if the csv file is a valid path"""
    return (isinstance(path, pathlib.Path) and path.exists()
            and path.is_file() and path.suffix.lower() == ".csv")
Пример #12
0
def charCountBiggerEqualThanX(text: typing.AnyStr, cha: typing.AnyStr):
    res = text.count(cha)
    if debug:
        print(res)
    return res
Пример #13
0
 def __call__(self, data: typing.AnyStr, **metadata) -> CVIEWSWAPCASE:
     return "case-swapped text", contentviews.format_text(data.swapcase())
Пример #14
0
def args_line2dict(argv: typing.AnyStr, output_dict: typing.Dict):
    r = argv.split('=', maxsplit=1)
    if len(r) != 2:
        return
    output_dict[r[0]] = r[1]
Пример #15
0
    def write(self, s: typing.AnyStr):
        if isinstance(s, str) and 'w' in self.fd.mode:
            s = s.encode()

        self.fd.write(s)
Пример #16
0
def _encode(value: t.AnyStr, encoding: str = "utf-8") -> bytes:
    if isinstance(value, str):
        return value.encode(encoding)
    return value
Пример #17
0
def _fixed_sesar_id(id: typing.AnyStr) -> typing.AnyStr:
    fixed_id = id.replace("igsn", "IGSN")
    return fixed_id