Пример #1
0
def matrix_by_vector():
  # define a matrix
  #  2  1  3
  #  3  1  4
  #  5  7 12
  # cf. https://numpy.org/doc/stable/reference/generated/numpy.vstack.html
  log('column-major\n', np.dstack([[2, 3, 5], [1, 1, 7], [3, 4, 12]]))
  log('row-major\n', np.vstack([[2, 1, 3], [3, 1, 4], [5, 7, 12]]))
Пример #2
0
def jax():
    '''infer with JAX'''

    import jax
    import jax.numpy as jnp

    sys.path.insert(0, '../math')
    import jax_adabelief as jb
    sys.path.remove('../math')

    def predict(params, input):
        previous_velocity, action = input[1], input[4]
        inc = jax.lax.cond(action, lambda: params[0], lambda: -params[0])
        velocity = previous_velocity + inc
        return velocity

    batched_predict = jax.vmap(predict, in_axes=(None, 0))

    def loss(params, xs, ys):
        pred = batched_predict(params, xs)
        return jnp.mean((pred - ys)**2)

    _, inputs, outputs = load_inputs()
    xs = jnp.array(inputs)
    ys = jnp.array([o[1] for o in outputs])

    def plot_loss():
        lossˉ = jax.jit(loss).lower(jnp.array([0.1]), xs, ys).compile()
        a = [[' ' for x in range(111)] for y in range(5)]
        pxs, pys = [], []
        for p in range(9):
            pxs.append(p / 10)
            pys.append(lossˉ(jnp.array([p / 10]), xs, ys))
        map = plot(a, pxs, pys)
        print('\n'.join(''.join(y) for y in a))

    plot_loss()

    m = jnp.zeros(1)
    s = jnp.zeros(1)
    rkey = jax.random.PRNGKey(1)
    params = jax.random.normal(rkey, (1, ))

    def optimise(epoch, m, s, params, xs, ys):
        lossʹ, grads = jax.value_and_grad(loss)(params, xs, ys)
        m, s, params = jb.adabeliefʹ(epoch, grads, m, s, params)
        return m, s, params, lossʹ

    optimiseˉ = jax.jit(optimise).lower(1, m, s, params, xs, ys).compile()
    for epoch in range(314):
        m, s, params, lossʹ = optimiseˉ(epoch, m, s, params, xs, ys)
        log(epoch, params, np.format_float_positional(lossʹ))
Пример #3
0
def plotio(inputs, outputs, xs):
    wh = shutil.get_terminal_size((111, 11))
    wh = (wh.columns - 1, min(3, wh.lines - 3))
    param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:squarederror'}
    for rounds in range(1, 9):
        best = xgb.train(param,
                         xgb.DMatrix(inputs, label=outputs),
                         num_boost_round=rounds)
        ys = list(best.predict(xgb.DMatrix([[x]]))[0] for x in xs)
        log(rounds, xs, ys)
        a = [[' ' for x in range(wh[0])] for u in range(wh[1])]
        map = plot(a, xs, ys)
        for *_, ax, ay in map.zip([i[0] for i in inputs],
                                  [o[0] for o in outputs]):
            a[ay][ax] = '\033[34m*\033[0m'
        print('\n'.join(''.join(y) for y in a))

    gv = xgb.to_graphviz(best)
    open('plotio.pdf', 'wb').write(gv.pipe())
Пример #4
0
def elm():
    '''infer with ELM'''

    sys.path.insert(0, '../elm')
    import elm
    sys.path.remove('../elm')

    # infer: elm (stateⱼ₋₁, actionⱼ) = stateⱼ
    _, inputs, outputs = load_inputs()
    weights, bias, β = elm.train(31, inputs, outputs)
    #    3 .. 0.27
    #   31 .. 0.22
    #  314 .. 0.22
    # 1234 .. 0.22

    predictions = []
    for input in inputs:
        predictions.append(elm.infer(weights, bias, β, input))

    mse = np.square(np.subtract(outputs, predictions)).mean()
    log(floorʹ(mse))
Пример #5
0
def mnist():
    from keras.datasets import mnist

    log('loading MNIST…')
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    assert os.path.exists(os.path.expanduser('~/.keras/datasets/mnist.npz'))

    assert x_train.shape == (60000, 28, 28)
    assert x_train.shape[1] * x_train.shape[2] == 784
    assert y_train.shape == (60000, )

    assert x_test.shape == (10000, 28, 28)
    assert x_test.shape[1] * x_test.shape[2] == 784
    assert y_test.shape == (10000, )

    # https://stackoverflow.com/a/61573347/257568
    # “it seems that X needs to be 2-dimensional, and Y needs to be 1-dimensional”
    # https://github.com/dmlc/xgboost/issues/2000#issuecomment-283033116
    # “xgboost (like sklearn) expects X as 2D data (n_samples, n_features)”
    x_train = x_train.reshape(x_train.shape[0], -1)
    assert x_train.shape == (60000, 784)
    y_train = y_train.reshape(y_train.shape[0], -1)
    assert y_train.shape == (60000, 1)
    dtrain = xgb.DMatrix(x_train, label=y_train)

    x_test = x_test.reshape(x_test.shape[0], -1)
    assert x_test.shape == (10000, 784)
    y_test = y_test.reshape(y_test.shape[0], -1)
    assert y_test.shape == (10000, 1)
    dtest = xgb.DMatrix(x_test, label=y_test)

    param = {'max_depth': 2, 'eta': 1, 'objective': 'reg:squarederror'}
    log('training…')
    best = xgb.train(param,
                     dtrain,
                     num_boost_round=31,
                     evals=[(dtrain, 'train'), (dtest, 'test')],
                     verbose_eval=True)

    log('testing…')
    ypred = best.predict(dtest)
    loss = 0
    for pred, test in zip(ypred, y_test):
        loss += (pred - test[0])**2
    mse = loss / len(ypred)
    # cf. error rates at http://yann.lecun.com/exdb/mnist/
    log('MSE', floorʹ(mse), 'RMSE', floorʹ(sqrt(mse)))
Пример #6
0
def commoncrawl_s3(path):
  # https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-2021-10/warc.paths.gz

  import http.client
  conn = None
  pause = 3
  host = 'commoncrawl.s3.amazonaws.com'
  for attempt in range(314):
    try:
      conn or (conn := http.client.HTTPSConnection(host))
      conn.request('GET', path)  # headers={'User-Agent': 'curl/7.79.1'}
      resp = conn.getresponse()
      # Might get the 503 Slow Down from Amazon S3
      # https://aws.amazon.com/premiumsupport/knowledge-center/s3-resolve-503-slowdown-throttling/
      # “You can send 3,500 PUT/COPY/POST/DELETE and 5,500 GET/HEAD requests per second per partitioned prefix”
      if resp.status == 503:
        bytes = resp.read()
        try:
          why = re.sub(r'[^\w<> =/!\.]', '?', bytes.decode())
          if 234 < len(why):
            why = why[:234]
        except Exception:
          why = ''
        log(f"503, retrying in {pause}; “{why}”")
        time.sleep(pause)
        pause = floorʹ(pause + random.uniform(0.1, pause))
        continue
      if resp.status != 200:
        raise Exception(f"Unexpected status: {resp.status}")
      return resp
    except (ImproperConnectionState, socket.gaierror) as ex:
      log(f"{ex}, retrying in {pause}")
      time.sleep(pause)
      conn = None
      continue
  raise Exception(f"Out of attempts with {host}")
Пример #7
0
        log(epoch, params, np.format_float_positional(lossʹ))


if '--xgboost' in sys.argv:  # infer with xgboost
    import xgboost as xgb
    _, inputs, outputs = load_inputs()
    velocity = [o[1] for o in outputs]
    param = {'max_depth': 3}
    dtrain = xgb.DMatrix(inputs, label=velocity)
    best = xgb.train(param,
                     dtrain,
                     evals=[(dtrain, 'train')],
                     num_boost_round=314)
    for count, (input, expected) in enumerate(zip(inputs, outputs)):
        prediction = best.predict(xgb.DMatrix([input]))[0]
        log(f"prediction {prediction} expected {expected[1]}")
        if 32 < count:
            break

    gv = xgb.to_graphviz(best)
    open('velocity.pdf', 'wb').write(gv.pipe())

if '--tf' in sys.argv:  # Inference with TF
    from tensorflow import keras
    from tensorflow.keras import layers

    tf_inputs = keras.Input(shape=(5, ), name="state-and-action")
    x = layers.Dense(314, activation="relu", name="dense_1")(tf_inputs)
    x = layers.Dense(314, activation="relu", name="dense_2")(x)
    tf_outputs = layers.Dense(4, activation="softmax",
                              name="state-prediction")(x)
Пример #8
0
if __name__ == '__main__':
    import shutil

    from llog import floorᵃ, log, plot

    wh = shutil.get_terminal_size((111, 11))
    wh = (wh.columns - 1, min(7, wh.lines - 3))
    a = [[' ' for x in range(wh[0])] for u in range(wh[1])]
    wofs = 0

    inputs = [[1], [2]]
    outputs = [[1], [2]]
    while 7 < wh[0] - wofs:
        weights, bias, β = train(2, inputs, outputs)
        if wofs == 0:
            log('id (1) =', infer(weights, bias, β, [1]))
            log('id (2) =', infer(weights, bias, β, [2]))

        xs = np.linspace(-1, 7, 44)
        ys = list(infer(weights, bias, β, [x])[0] for x in xs)
        map = plot(a, xs, ys, wofs)
        for *_, ax, ay in map.zip([i[0] for i in inputs],
                                  [o[0] for o in outputs]):
            a[ay][ax] = '\033[34m*\033[0m'
        wofs += map.width // 2 + 3

    print('\n'.join(''.join(y) for y in a))

    inputs = [[1], [2], [3]]
    outputs = [[3], [2], [1]]
    weights, bias, β = train(3, inputs, outputs)
Пример #9
0
    predicted_class = jnp.argmax(batched_predict(params, inputs), axis=1)
    return jnp.mean(predicted_class == targets)


def one_hot(x, k):
    # NB: Reverse of one_hot is argmax, https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
    # 2022-03: For comprehension is more readable but takes a lot of time to trace and compile
    #return jnp.array([[k == x for k in range(k)] for x in x], dtype=jnp.float32)
    # 2022-03: This arcane trick compiles much faster, but there is still a type mismatch at runtime
    return jnp.array(x[:, None] == jnp.arange(k), dtype=jnp.float32)


if __name__ == '__main__':
    from keras.datasets import mnist

    log('allocating parameters…')
    layer_sizes = [784, 512, 512, 10]
    rkey = random.PRNGKey(1)
    params = init_network_params(layer_sizes, rkey)
    m, s = init_ms(layer_sizes), init_ms(layer_sizes)

    log('loading MNIST…')
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    assert os.path.exists(os.path.expanduser('~/.keras/datasets/mnist.npz'))

    assert x_train.shape == (60000, 28, 28)
    assert x_train.shape[1] * x_train.shape[2] == layer_sizes[0]
    assert y_train.shape == (60000, )

    assert x_test.shape == (10000, 28, 28)
    assert x_test.shape[1] * x_test.shape[2] == layer_sizes[0]
Пример #10
0
import io
import random
import re
import socket
import subprocess
import time
import zlib
from http.client import ImproperConnectionState

import lib
import requests
from llog import floorʹ, log
from llog.state import State

if __name__ == '__main__':
    log('started..')
    state = State('common-crawl.mdb')
    log('hostname..')
    hostname = socket.gethostname()
    log(f"hostname: {hostname}")
    # if hostname == 'MSI':
    #   subprocess.call(['wsl', '.', 'to-us-east-1.sh'])
    #   exit(0)

    with state.begin() as st:
        if 'warc.paths.gz' in st:
            paths_hdr, paths_gz = st['warc.paths.gz']
        else:
            resp = lib.commoncrawl_s3(
                '/crawl-data/CC-MAIN-2021-10/warc.paths.gz')
            #open('warc.paths.gz', 'wb').write(bytes)