def doc(elt): "Show `show_doc` info in preview window" md = show_doc(elt, disp=False) output = md2html(md) if IN_COLAB: get_ipython().run_cell_magic(u'html', u'', output) else: try: page.page({'text/html': output}) except: display(Markdown(md))
add_docs, #Func with kwargs Path.ls #Monkey-patched ] ################### from local.notebook.showdoc import doc_link, show_doc ###################### # add_doc_links(text) = add doc link to the text where a func/class appeared # very convenient to add doc links when writing docs def add_doc_links(text): "Search for doc links for any item between backticks in `text`." pat = re.compile("\[`([^`]*)`\](?:\([^)]*\))|`([^`]*)`") def _replace_link(m): return doc_link(m.group(1) or m.group(2)) return re.sub(pat, _replace_link, text) # This function not only add links to backstick keywords, it also update the links that are already in the text. tst = add_doc_links('This is an example of `Pipeline`') assert tst == "This is an example of [`Pipeline`](/data.pipeline.html#Pipeline)" tst = add_doc_links('Here we alread add a link in [`Tensor`](fake)') assert tst == "Here we alread add a link in [`Tensor`](https://pytorch.org/docs/stable/tensors.html#torch-tensor)" show_doc(Pipeline)
from local.test import * from local.imports import * from local.notebook.showdoc import show_doc from local.core import * from local.core import _listify #################### #### run this block to rewrite class L from scratch # class L(GetAttr, metaclass=NewChkMeta): # "Behaves like a list of `items` but can also index with list of indices or masks" # _xtra = [o for o in dir(list) if not o.startswith('_')] #################### show_doc(L) @patch def L_doc(cls: L): """ why need `L`? - although with `_listify`, we can make everything a list - but don't we always wish for more features to work on a list of things? - why don't we add more flexibilities and functionalities beyond 'list' What new features `NewChkMeta` offer us with `L`? - create a new L object from values `items` - if `items` is instance of `L`, return the instance What new features `GetAttr` offer to `L`? - inherit from `GetAttr`, `L` can borrow others' methods - it actually can borrow all methods from `list` with `_xtra`
# `to_float(b)` = Recursively map lists of float tensors in `b` to float32 def to_float(b): """ purpose: 1. we sometimes want to convert x to float32 dtype through all levels 2. apply(lambda: , x) make sure recursively deep 3. we only consider two kinds of x/cases for converting 3.1 group int: torch.int64, ...32, ...16 => do nothing 3.2 everything else: torch.int8, torch.float... => convert by `x.float()` 3.3 `x.float()` is to convert to float32 """ return apply( lambda x: x.float() if x.dtype not in [torch.int64, torch.int32, torch.int16] else x, b) # torch.int8, 16, 32, 64 b = [tensor(1, 2), tensor(1.3394858, 3.59483)] apply(lambda x: x.dtype, b) b = to_half(b) apply(lambda x: x.dtype, b) b = [tensor(1, 2), tensor(3.3, 4.1)] apply(lambda x: x.dtype, b) b = to_float(b) apply(lambda x: x.dtype, b) show_doc(torch.float)
# %% #export def test(a, b, cmp, cname=None): "`assert` that `cmp(a,b)`; display inputs and `cname or cmp.__name__` if it fails" if cname is None: cname = cmp.__name__ assert cmp(a, b), f"{cname}:\n{a}\n{b}" # %% test([1, 2], [1, 2], operator.eq) test_fail(lambda: test([1, 2], [1], operator.eq)) test([1, 2], [1], operator.ne) test_fail(lambda: test([1, 2], [1, 2], operator.ne)) # %% show_doc(all_equal) # %% test(['abc'], ['abc'], all_equal) # %% show_doc(equals) # %% test([['abc'], ['a']], [['abc'], ['a']], equals) # %% #export def nequals(a, b): "Compares `a` and `b` for `not equals`" return not equals(a, b)
one_batch="Grab first batch of `dl`") # %% tfm = Transform(torch.neg, decodes=torch.neg) dummy_tfm = Transform(noop, assoc=Item) start = range(50) tl = TfmdList(start, dummy_tfm) tdl = TfmdDL(tl, tfm, is_tuple=False, bs=4) test_eq(start, tdl.dataset) test_eq(len(tdl), (len(tl) - 1) // 4 + 1) test_eq(tdl.batch_size, 4) # %% markdown # ### Methods # %% show_doc(TfmdDL.one_batch) # %% b = tdl.one_batch() test_eq([0, -1, -2, -3], b) # %% show_doc(TfmdDL.decode) # %% test_eq(tdl.decode(b), [[0, 1, 2, 3]]) # %% show_doc(TfmdDL.show_batch) # %% test_stdout(tdl.show_batch(), """(tensor(0),) (tensor(1),) (tensor(2),) (tensor(3),)""") # %% markdown
purpose: 1. to create an image of bw or color cross, we need a tensor first; 2. by change values in the tensor to add pattern or color 3. permutate to change the dim position of channels is essential in creating images Note: this is just for example usage I think """ if bw: im = torch.zeros(5, 5) im[2, :] = 1. im[:, 2] = 1. else: im = torch.zeros(3, 5, 5) im[0, 2, :] = 1. im[1, :, 2] = 1. return im make_cross_image() plt.imshow(make_cross_image(), cmap="Greys") make_cross_image(False) make_cross_image(False).shape make_cross_image(False).permute(1, 2, 0).shape plt.imshow(make_cross_image(False).permute(1, 2, 0)) ########################################################################### # what does permute actually do to a tensor show_doc(make_cross_image(False).permute) make_cross_image(False)[0, :, :] make_cross_image(False).permute(1, 2, 0)[:, :, 0]
################### def to_detach(b, cpu=True): """ purpose: 1. often we need to detach data from graph, we use `tensor.detach` 2. but we want to detach tensors at all levels in a complex data object 2.1 so we use `apply(_inner, x)` to do it recursively 2.2 `_inner` is to detach everything 3. so we want _inner to `detach` anything: 3.1 non-tensor: just return x 3.2 tensor: run x.detach() a. if cpu == True, return x.cpu() b. if not, return x """ def _inner(x, cpu=True): if not isinstance(x, Tensor): return x x = x.detach() # detach from graph, no gradient required return x.cpu() if cpu else x return apply(_inner, b, cpu=cpu) b = tensor(1, 2) to_detach(b, cpu=True) b = [torch.Tensor([1, 2]), tensor(3, 4), tensor(5, 7)] to_detach(b, cpu=True) t = tensor(1, 2, 3) show_doc(t.detach)
how does `__getattr__` work? 0. when calling `a_instance.b_method()`, it runs below 1. make sure `_xtra` is not None 2. allow `a.k` to return `a.default.k` 3. if `k` is not in _xtra, raise AttributeError """ assert self._xtra, "Inherited from `GetAttr` but no `_xtra` attrs listed" if k in self._xtra: return getattr(self.default, k) raise AttributeError(k) def __dir__(self): return custom_dir(self, self._xtra) show_doc(GetAttr) ############################################ # how _C borrow str.lower to use class _C(GetAttr): default, _xtra = 'Hi', ['lower'] t = _C() t.default t._xtra t.__getattr__('lower') t.__getattr__('lower')() t.lower() test_fail(lambda: t.upper(), contains='upper')
from local.test import * from local.imports import * from local.notebook.showdoc import show_doc from local.core import * ################### # `uniqueify(x, sort=False, bidir=False, start=None)` # > = return a unique list # > `x` = a list of values, duplicated, and not sorted # > `sort = True` = sort the unique list # > `bidir=True` = also return a dict where the unique list are the keys # > `start=None` = if not None, then add `start` on to the unique list show_doc(uniqueify) def uniqueify(x, sort=False, bidir=False, start=None): """ why uniquefy(...): 1. of course, we want to be able to get unique values from a long list with duplicated values 2. also we want the freedom to add extra values to the front of the unique list 3. also we want the flexibility for the list to be sorted 4. even better, we want the flexibility to output the unique list with index as well. how to achieve it? 1. `(OrderedDict.fromkeys(x).keys())` achieve step 1 2. L.__add__ achieves step 2 3. L.sort() -> step 3 4. enumerate(res) -> step 4 Note: 1. x has a list-like, but values have to be numeric, Path won't do here.
from local.test import * from local.imports import * from local.notebook.showdoc import show_doc from local.core import * ###################### # `replicate(item,match)` # > copy `item` `match` times into a tuple # > `item` = single value, list or tuple show_doc(replicate) def replicate(item, match): """ why replcate(...) 1. sometimes, we just want to make copies of an object, scalar, tuple, list, whatever; 2. we want to control how many times to copy 3. the number of copies == len(match) 4. all copies are put into a tuple """ return (item, ) * len(match) match = [1, 1] item = [1, 2] test_eq(replicate([1, 2], t), ([1, 2], [1, 2])) test_eq(replicate(1, t), (1, 1)) test_eq(replicate((1, 2), t), ((1, 2), (1, 2)))
from local.imports import * from local.notebook.showdoc import show_doc from local.core import test_fail, test, equals ################### def test_eq(a,b): """ `test` that `a==b` purpose: 1. you often `test` `a` and `b` using `equals` and name `equals` to be `==` 2. you want to be lazy by just typing `test_eq(a, b)` 3. so you use 2 to wrap 1 """ test(a,b,equals, '==') test_eq([1,2],[1,2]) test_eq([1,2],map(int,[1,2])) test_eq(torch.tensor(1),1) test_eq(1,torch.tensor(1)) test_eq(torch.tensor([1,2]),torch.tensor([1,2])) test_eq(array([1,2]),array([1,2])) test_eq([array([1,2]),3],[array([1,2]),3]) show_doc(test)
from local.imports import * from local.test import * from local.notebook.showdoc import show_doc from local.core import * show_doc(mk_class) # check latest source and test def mk_class(nm, *fld_names, sup=None, doc=None, funcs=None, **flds): """ "Create a class using `get_class` and add to the caller's module" why mk_class(...) - `get_class(...)` can create a new class with specified attrs for us - but what if we want to add this class onto the caller's module - we use `mk_class` to create and add onto module altogether how to use mk_class(...) - the same as get_class(...) - but does not return anything Note: - get_class(...) returns a new class - new class is added onto local.core - for some reason, `t.__repr__` won't work - so, just use `wrap_class` or `get_class`, not `mk_class` """ def mk_class(nm, *fld_names, sup=None, doc=None,
from local.test import * from local.imports import * from local.notebook.showdoc import show_doc from local.core import * show_doc(L._xtra) # L._xtra is a property defined inside the class # we can define it outside the class too L._xtra = property(lambda x: [o for o in dir(list) if not o.startswith('_')]) # example t = L(1,2,3); t t._xtra
return x + a def decodes(self, x, a=1): return x - a addt = _AddTfm() addt.__dict__ addt.assoc addt.encodes(3) addt(3) addt.decodes(4, a=1) addt.show(4) addt.filt = 1 addt(3, filt=1) # still not sure about the usage of `filt` addt(3, filt=0) # but maybe this is how we should use `filt`: 1 or 0 ###How to make sure of `mask` show_doc(Transform.__init__) addt = _AddTfm(is_tuple=True) start = (1, 2, 3) t = addt(start) t # only apply to the first item addt.decode(t) tfm = _AddTfm(is_tuple=True, mask=(True, True, True)) # apply to all three items start = (1, 2, 3) t = tfm(start) t tfm.decode(t)
- if no physical folder created in LOCAL_PATH, then return Config path - `shutil.move(path, path_bak)` move content from one dir to another """ fname = url.split('/')[-1] local_path = URLs.LOCAL_PATH / ('models' if c_key == ConfigKey.Model else 'data') / fname if local_path.exists(): return local_path return get_path(c_key) / fname # Create two folder paths in LOCAL_PATH path, path_bak = URLs.LOCAL_PATH / 'data', URLs.LOCAL_PATH / 'data1' URLs.LOCAL_PATH.ls() # if these two folder paths have actual physical folders available, delete them if path.exists(): shutil.move(path, path_bak) # usually no physical folder 'models' or 'data' created in LOCAL_PATH, so use Config folder url = URLs.MNIST_TINY test_eq(_url2path(URLs.MNIST_TINY), get_path(ConfigKey.Archive) / 'mnist_tiny.tgz') # how to quickly change the suffix of a url or filename test_eq(_url2path(URLs.MNIST_TINY.replace('tgz', 'tar')), get_path(ConfigKey.Archive) / 'mnist_tiny.tar') test_eq(_url2path(URLs.MNIST_TINY, c_key=ConfigKey.Model), get_path(ConfigKey.Model) / 'mnist_tiny.tgz') if path_bak.exists(): shutil.move(path_bak, path) show_doc(shutil.move) os.makedirs('data', exist_ok=True) download_url(f"{URLs.MNIST_TINY}.tgz", 'data/mnist_tiny.tgz') test_eq(_url2path(URLs.MNIST_TINY), Path.cwd() / 'data' / 'mnist_tiny.tgz')
from local.test import * from local.imports import * from local.notebook.showdoc import show_doc from local.core import L def custom_dir(c, add: List): """ Why need `custom_dir` - sometimes want to see both `dir(c)` and `dict(c)` - why don't we put them together and get them in one go? - what if we want to see additional methods? - we put the additional/extra methods into a list `add` - in one go, we output the names in `dir, dict, add` """ return dir(type(c)) + list(c.__dict__.keys()) + add show_doc(custom_dir) custom_dir(int, list())[-2:]
#%% #export class _Sig(): def __getattr__(self, k): def _inner(*args, **kwargs): return Func(k, *args, **kwargs) return _inner Sig = _Sig() #%% show_doc(Sig, name="Sig") #%% [markdown] # `Sig` is just sugar-syntax to create a `Func` object more easily with the syntax `Sig.name(*args, **kwargs)`. #%% f = Sig.sqrt() test_eq(f(math), math.sqrt) test_eq(f(torch), torch.sqrt) #%% #export class SelfFunc(): "Search for `name` attribute and call it with `args` and `kwargs` on any object it's passed."
2. we would like to be able to compare equality on all types including `Tensor, ndarray, string, list, tuple, Generator, Iterator` 3. as output, True for equal, False for not steps: 1. if `a` is a tensor with dim >= 1, `cmp = torch.equal` 2. if `a` is a `ndarray`, `cmp = np.array_equal` 3. if `a` is a `str`, `cmp = operator.eq` 4. if `a` is any of `(list, tuple, Generator, Iterator)`, `cmp = all_equal` 5. else `cmp = operator.eq` """ cmp = (torch.equal if isinstance(a, Tensor ) and a.dim() else np.array_equal if isinstance(a, ndarray ) else operator.eq if isinstance(a, str ) else all_equal if isinstance(a, (list,tuple,Generator,Iterator)) else operator.eq) return cmp(a,b) # a, b must be iterable show_doc(itertools.zip_longest) # expect a, b to be iterable test_fail(lambda: all_equal(1, 2), msg='should fail', contains='iteration') # what are iterables equals(range(3), range(3)) equals([1],[1]) a = [1,2,3] b = [1,2,3] all_equal(a, b) a = [1,2,3] b = [1,1,2,3] all_equal(a, b)
x = L(([1, 2, 3], [2, 3, 4], [5, 6, 7])) item_find(x=x, idx=2) x = ((1, 2, 3), (2, 3, 4), (5, 6, 7)) item_find(x=x, idx=0) x = {'a': {"m": 5, "n": 7, "p": 9}, 'b': {"m": 15, "n": 17, "p": 19}} item_find(x=x, idx=0) item_find(x=x, idx=1) item_find(x=x, idx='b') ############################################################################### def find_device(b): """ purpose: 1. sometimes you may want to checkout the device which the lowest/final_level tensor is on 2. this func can give us the device of the first branch's lowest leaf of data `x` """ return item_find(b).device t1, (t2, t3) = to_device([3, [tensor(3), tensor(2)]]) test_eq(find_device(t2), defaults.device) test_eq(find_device([t2, t2]), defaults.device) test_eq(find_device({'a': t2, 'b': t2}), defaults.device) test_eq(find_device({'a': [[t2], [t2]], 'b': t2}), defaults.device) show_doc(to_device)
test_stdout(lambda: pipe.show_at(1), '-1') test_eq(pipe.assoc, Item) # %% # Check opposite order pipe = Pipeline([floattfm(), negtfm()]) pipe.setup() t = pipe(2) test_eq(t, -2.0) # `show` is on `tfloat` so needs to decode negtfm first test_stdout(lambda: pipe.show_at(1), '1') test_eq(pipe.assoc, Item) # %% markdown # ### Methods # %% show_doc(Pipeline.__call__) # %% show_doc(Pipeline.decode) # %% show_doc(Pipeline.delete) # %% show_doc(Pipeline.remove) # %% show_doc(Pipeline.add) # %% show_doc(Pipeline.show_at) # %% show_doc(Pipeline.decode_at) # %%
3.2 after array function done working, convert output back to tensor 4. this way, tensor in, array function process, and tensor out Note: the way _inner or wrapper function only convert the positional arguments from tensor to array, not the named arguments """ def _inner(*args, **kwargs): nargs = [ to_np(arg) if isinstance(arg, Tensor) else arg for arg in args ] return tensor(f(*nargs, **kwargs)) functools.update_wrapper(_inner, f) return _inner # This decorator is particularly useful for using numpy functions as fastai metrics, for instance: from sklearn.metrics import f1_score @np_func def f1(inp, targ): return f1_score(targ, inp) # see only positional args a1, a2 = array([0, 1, 1]), array([1, 0, 1]) t = f1(tensor(a1), tensor(a2)) t # tensor in and tensor out f1_score(a1, a2) # array in show_doc(f1_score)
from local.test import * from local.imports import * from local.notebook.showdoc import show_doc from local.core import tensor show_doc(property) # a quick way to add property to a class Tensor.ndim = property(lambda x: x.dim()) # We add an `ndim` property to `Tensor` with same semantics as # [numpy ndim](https://docs.scipy.org/doc/numpy/reference/generated/ # numpy.ndarray.ndim.html), which allows tensors to be used in matplotlib # and other places that assume this property exists. tensor(0).ndim tensor(5).ndim tensor(1, 2, 3).shape tensor(1, 2, 3).ndim tensor([[1, 4, 4], [2, 5, 5]]).shape tensor([[1], [4]]).ndim ######### learn the Property usage Tensor.scalar = property(lambda x: x.dim() == 0) Tensor.vector = property(lambda x: x.dim() == 1) Tensor.matrix = property(lambda x: x.dim() == 2) Tensor.image = property(lambda x: x.dim() == 3) Tensor.video = property(lambda x: x.dim() == 4) tensor(0).scalar tensor(1, 2, 3).scalar
@patch def setup(cls: TfmdList): """ purpose: - it is asked to do `setup` on `Pipeline` and `Transform` level - if it is a Pipeline, `pipeline.setup` will sort `tfms` by `order` - and set `prev` based on the order of Transforms, also - get Transforms from `self._tfms` to `self.tfms` - second `transform.setup` will make `_is_setup`, `_done_setup` true, and nothing else - Note: on Pipeline and Transform level, see args `setup(TfmdList)` - the full process: `TfmdList.setup()`=> `Pipeline.setup(tfmdlist as item)` inherit without overwritten from `Transform.setup(items)` => `Pipeline.setups(items)` inherit and overwritten => `Pipeline.add(tfms, items)` to order all tfms and loop through them for each tfm setup => `Transform.setup(items)` (turn `_is_setup` and `_done_setup` True) => `Transform.setups(items)` (pass) or other overwritten funcs """ getattr(cls.tfm, 'setup', noop)(cls) show_doc(TfmdList.setup) tfm = Pipeline(tfms=[operator.neg, float]) make_tfm(tfm)._tfms t = TfmdList([1, 2, 3], tfm, do_setup=True) tfm = [operator.neg, float] make_tfm(tfm)._tfms t = TfmdList([1, 2, 3], tfm, do_setup=True) tfm = float make_tfm(tfm) t = TfmdList([1, 2, 3], tfm, do_setup=True)
from local.test import * from local.imports import * from local.notebook.showdoc import show_doc from local.core import * show_doc(setify) def setify(o): """ why setify(..): 1. you may want to turn everything into a set 2. if it is already a set, just return it 3. if not, make it a L, then turn it to set """ return o if isinstance(o,set) else set(L(o)) setify(None) setify('abc') setify([1,2,2]) setify(range(0,3)) setify({1,2})
from local.notebook.showdoc import show_doc from local.data.pipeline import * # prepare for the create of such an instance, what are needed? @patch def __init__(cls:TfmdList, items, tfm, do_setup=True): """ purpose: - What special about TfmdList? - really apply tfms to data (TfmOver, Pipeline, Transform are just preparation) - do `setup()` at __init__, not a seperate step - make sure all tfms are either Transform or Pipeline steps: - differ from Pipeline, we need to deal with `items`, so put them under management of `L` - to tranform, we need either `Transform` or `Pipeline` instances - why don't we do the other `setup` here too, instead of another separate step? """ cls.items = L(items) cls.default = cls.tfm = make_tfm(tfm) if do_setup: cls.setup() show_doc(TfmdList.__init__) items = [1,2,3] tfm = str tl = TfmdList(items, tfm) tl.tfm.__class__ tl.items
from local.test import * from local.imports import * from local.notebook.showdoc import show_doc from local.core import * # check the official source show_doc(PrePostInitMeta, title_level=3) class PrePostInitMeta(type): """ "A metaclass that calls optional `__pre_init__` and `__post_init__` methods" Why need `PrePostInitMeta`? - not only run `__init__`, but also - automatically run `__pre_init__`, `__post_init__` How to use `PrePostInitMeta`? - create a subclass to `PrePostInitMeta` - you can add `__pre_init__` and `__post_init__` to `__init__` - program will run them in the order of `__pre_init__`, `__init__` and `__post_init__` - if any of them is missing, a `_pass()` method will run instead How to create `PrePostInitMeta`? - how to lay out the logic flow? - use pdb break at the first line of `__new__` - basically `__new__` run before running `t=_T()` - to prepare all the additional methods of `x` or `_T` """ def __new__(cls, name, bases, dct): # pdb break here to run the hidden codes
3.3 again loop through the remaining funcs `inp`, and do the previous step, until `inp` is empty 4. return res """ end = L(getattr(f, 'toward_end', False) for f in fs) inp, res = L(fs)[~end] + L(fs)[end], [] while len(inp) > 0: for i, o in enumerate(inp): if _is_first(o, inp): res.append(inp.pop(i)) break else: raise Exception("Impossible to sort") return res show_doc(sort_by_run) class Tst(): pass class Tst1(): run_before = [Tst] class Tst2(): run_before = Tst run_after = Tst1
from local.test import * from local.imports import * from local.notebook.showdoc import show_doc from local.core import * show_doc(coll_repr) def coll_repr(c, max=10): """ why `coll_repr`? because - we want to print out a representation of a collection of things - a good view of the whole thing but not too long how to use `coll_repr`? - `coll_repr(c, max=1000)` - `c`: the collection, like L - `max`: the number of items you want to print out how `coll_repr` work? - first, make each element of the collection a string - second, slice `max` number of elements from `c` into an iterator - third, bind each sliced element with ',' - the rest string is easier to understand """ return f'(#{len(c)}) [' + ','.join(itertools.islice(map(str,c), max)) + ('...' if len(c)>max else '') + ']' # to experiment with the itertools.islice g = itertools.islice(map(str,c), max) next(g) # simple examples
test_eq(L([tensor(0),tensor(1)]),[tensor(0),tensor(1)]) test_eq(L(tensor([0.,1.1]))[0],tensor([0.,1.1])) test_eq(L(tensor([0.,1.1]), use_list=True), [0.,1.1]) # `use_list=True` to unwrap arrays/tensors # %% markdown # If `match` is not `None` then the created list is same len as `match`, either by: # # - If `len(items)==1` then `items` is replicated, # - Otherwise an error is raised if `match` and `items` are not already the same size. # %% test_eq(L(1,match=[1,2,3]),[1,1,1]) test_eq(L([1,2],match=[2,3]),[1,2]) test_fail(lambda: L([1,2],match=[1,2,3])) # %% markdown # ### Methods # %% show_doc(L.__getitem__) # %% t = L(range(12)) test_eq(t[1,2], [1,2]) # implicit tuple test_eq(t[[1,2]], [1,2]) # list test_eq(t[:3], [0,1,2]) # slice test_eq(t[[False]*11 + [True]], [11]) # mask test_eq(t[tensor(3)], 3) # %% show_doc(L.__setitem__) # %% t[4,6] = 0 test_eq(t[4,6], [0,0]) t[4,6] = [1,2] test_eq(t[4,6], [1,2]) # %%