Exemplo n.º 1
0
    def births(self, deltat):
        # First consider only females
        females = self.data[self.data.DC1117EW_C_SEX == 2].copy()

        # Now map the appropriate fertility rate to each female
        # might be a more efficient way of generating this array
        rates = females.join(
            self.fertility,
            on=["NewEthpop_ETH", "DC1117EW_C_SEX",
                "DC1117EW_C_AGE"])["Rate"].values
        # Then randomly determine if a birth occurred (neworder callback)
        h = neworder.hazard(rates * deltat)

        # The babies are a clone of the new mothers, with with changed PID, reset age and randomised gender (keeping location and ethnicity)
        newborns = females[h == 1].copy()
        newborns.PID = range(self.counter, self.counter + len(newborns))
        newborns.Age = neworder.ustream(
            len(newborns))  # born within the last 12 months
        newborns.DC1117EW_C_AGE = 1  # this is 0-1 in census category
        # NOTE: do not convert to pd.Series here to stay as this has its own index which conflicts with the main table
        newborns.DC1117EW_C_SEX = neworder.hazard(0.5, len(newborns)) + 1

        # Finally append newborns to main population and adjust counter
        self.data = self.data.append(newborns)
        self.counter = self.counter + len(newborns)
Exemplo n.º 2
0
  def migrations(self, deltat):

    # internal immigrations: 
    # - assign the rates to the incumbent popultion appropriately by age,sex,ethnicity
    # - randomly sample this population, clone and append
    in_rates = self.data.join(self.in_migration, on=["LAD", "NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"].values
    # in-migration should be sampling from the whole population ex-LAD, instead do an approximation by scaling up the LAD population
    # NOTE this is wrong for a number of reasons esp. as it cannot sample category combinations that don't already exist in the LAD
    h_in = neworder.hazard(in_rates * deltat)
    
    incoming = self.data[h_in == 1].copy()

    # Append incomers to main population and adjust counter
    # Assign a new id
    incoming.PID = range(self.counter, self.counter + len(incoming))
    incoming.Area = incoming.LAD
    # assign a new random fractional age based on census age category
    incoming.Age = incoming.DC1117EW_C_AGE - neworder.ustream(len(incoming)).tolist()
    self.data = self.data.append(incoming, sort=False)
    self.counter = self.counter + len(incoming)

    # internal emigration
    out_rates = self.data.join(self.out_migration, on=["LAD", "NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"]
    h_out = neworder.hazard(out_rates.values * deltat)
    # remove outgoing migrants
    self.data = self.data[h_out!=1]

    intl_in_rates = self.data.join(self.immigration, on=["LAD", "NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"]
    h_intl_in = neworder.hazard(intl_in_rates.values * deltat)

    intl_incoming = self.data[h_intl_in == 1].copy()
    intl_incoming.PID = range(self.counter, self.counter + len(intl_incoming))
    intl_incoming.Area = "INTL" #self.lad
    # assign a new random fractional age based on census age category
    intl_incoming.Age = intl_incoming.DC1117EW_C_AGE - neworder.ustream(len(intl_incoming)).tolist()
    self.data = self.data.append(intl_incoming)
    self.counter = self.counter + len(intl_incoming)

    # international emigrtion
    intl_out_rates = self.data.join(self.emigration, on=["LAD", "NewEthpop_ETH", "DC1117EW_C_SEX", "DC1117EW_C_AGE"])["Rate"]
    h_intl_out = neworder.hazard(intl_out_rates.values * deltat)
    # remove outgoing migrants
    self.data = self.data[h_intl_out!=1]

    # record net migration
    self.in_out = (h_in.sum(), h_out.sum(), h_intl_in.sum(), h_intl_out.sum())
Exemplo n.º 3
0
def test():
    t = test_.Test()

    if neworder.size() == 1:
        neworder.log("Skipping MPI tests")
        return True

    # test ustream/sequence
    t.check(not neworder.indep())

    u = neworder.ustream(1000)
    v = neworder.broadcast(u, 0)
    # u == v for all processes

    t.check(np.array_equal(u, v))

    return not t.any_failed
Exemplo n.º 4
0
    def __init__(self, inputdata, asfr, asmr, asir, asor, ascr, asxr):

        # guard for no input data (if more MPI processes than input files)
        if not len(inputdata):
            raise ValueError("proc {}/{}: no input data".format(
                neworder.rank(), neworder.size()))

        self.lads = [file.split("_")[2] for file in inputdata]

        self.data = pd.DataFrame()
        for file in inputdata:
            data = pd.read_csv(file)
            data["LAD"] = file.split("_")[2]
            self.data = self.data.append(data)

        neworder.log("Preprocessing transition data for %s" %
                     ", ".join(self.lads))
        self.fertility = ethpop.create_multi(pd.read_csv(asfr), self.lads)
        self.mortality = ethpop.create_multi(pd.read_csv(asmr), self.lads)
        self.in_migration = ethpop.local_rates_from_national_rate(
            ethpop.create_multi(pd.read_csv(asir), self.lads), self.data)
        self.out_migration = ethpop.create_multi(pd.read_csv(asor), self.lads)
        self.immigration = ethpop.local_rates_from_absolute(
            ethpop.create_multi(pd.read_csv(ascr), self.lads), self.data)
        self.emigration = ethpop.local_rates_from_absolute(
            ethpop.create_multi(pd.read_csv(asxr), self.lads), self.data)

        # Force flat rates for testing purposes
        self.in_migration.Rate = 0.05
        self.out_migration.Rate = 0.05
        # The actual rates cause exponential growth
        self.immigration.Rate = 0.01
        self.emigration.Rate = 0.005

        # use this to identify people (uniquely only within this table)
        self.counter = len(self.data)

        # Reformatting of input data is required to match Ethpop categories
        # actual age is randomised within the bound of the category
        self.data["Age"] = self.data.DC1117EW_C_AGE - neworder.ustream(
            len(self.data))
        self.data = ethpop.from_census_eth(self.data)
Exemplo n.º 5
0
    def __init__(self, inputdata, asfr, asmr, asir, asor, ascr, asxr):

        self.lad = inputdata.split("_")[1]

        self.data = pd.read_csv(inputdata)

        self.fertility = ethpop.create(pd.read_csv(asfr), self.lad)
        self.mortality = ethpop.create(pd.read_csv(asmr), self.lad)
        # assume the in-migration rates are based on the national population and need to be rescaled...
        base_pop = len(self.data)
        # deal with census-merged LADs
        if self.lad == "E09000001" or self.lad == "E09000033":
            base_pop = 219340 + 7397
        elif self.lad == "E06000052" or self.lad == "E06000053":
            raise NotImplementedError("Cornwall CM LAD adj")
        self.in_migration = ethpop.local_rate_from_national_rate(
            ethpop.create(pd.read_csv(asir), self.lad), base_pop)
        # assume the out-migration rates don't require adjustment
        self.out_migration = ethpop.create(pd.read_csv(asor), self.lad)
        self.immigration = ethpop.local_rate_rescale_from_absolute(
            ethpop.create(pd.read_csv(ascr), self.lad), base_pop)
        self.emigration = ethpop.local_rate_rescale_from_absolute(
            ethpop.create(pd.read_csv(asxr), self.lad), base_pop)

        # Force flat rates for testing purposes
        #self.in_migration.Rate = 0.05
        #self.out_migration.Rate = 0.05
        self.immigration.Rate = 0.01
        self.emigration.Rate = 0.005

        # use this to identify people (uniquely only within this table)
        self.counter = len(self.data)

        # Reformatting of input data is required to match Ethpop categories
        # actual age is randomised within the bound of the category
        self.data["Age"] = self.data.DC1117EW_C_AGE - neworder.ustream(
            len(self.data))
        self.data = ethpop.from_census_eth(self.data)
Exemplo n.º 6
0
def test():
    t = test_.Test()

    x = -1e10
    t.check(no.distant_past() < x)
    t.check(no.far_future() > x)
    x = 1e10
    t.check(no.distant_past() < x)
    t.check(no.far_future() > x)

    # dreams never end
    t.check(no.never() != no.never())
    t.check(not no.never() == x)
    t.check(no.never() != x)
    t.check(not x < no.never())
    t.check(not x >= no.never())
    # no nay never:
    t.check(not no.isnever(x))
    # no nay never no more:
    t.check(no.isnever(no.never()))

    #t.check(False)
    s = no.ustream(10000)
    t.check(isinstance(s, np.ndarray))

    t.check(len(s) == 10000)

    t.check(abs(np.mean(s) - 0.5) < 0.02)

    f = no.lazy_eval("2 + 2")
    t.check(f() == 4)

    # # TODO this overlaps/duplicates tests in op.py - reorganise

    # # test thinning algorithm for non-homogeneous Poisson process
    # h = np.array([0.014] * 10)
    # #l = no.stopping(h)
    # l = no.first_arrival(h, 1.0, 10000)
    # t.check(abs(np.mean(l) * 0.014 - 1.0) < 0.03)
    # # varying timestep should make no difference
    # l = no.first_arrival(h, 0.1, 10000)
    # t.check(abs(np.mean(l) * 0.014 - 1.0) < 0.03)

    # # test a certain(ish) hazard rate
    # h = np.array([0.99, 0.99, 0.01])
    # l = no.first_arrival(h, 1.0, 10000)
    # no.log("TODO NHPP appears broken: %f" % np.mean(l))

    # # test a zero(ish) hazard rate
    # h = np.array([1e-30, 1e-30, 1e-30, .9999])
    # l = no.first_arrival(h, 1.0, 10000)
    # no.log("TODO NHPP appears broken: %f" % np.mean(l))

    # # this also tests a zero hazard rate
    # h = np.array([i/3000 for i in range(100)])
    # #no.log(h)
    # le = no.first_arrival(h, 1.0, 10000)
    # no.log(sum(le)/len(le))

    # # y
    # h = np.array([0.999, 0.1])
    # le = no.first_arrival(h, 1.0, 1000)
    # no.log(sum(le)/len(le))

    sometime = no.isnever(np.full(10, 1.0))
    t.check(np.all(~sometime))
    never = no.isnever(np.full(10, no.never()))
    no.log(never)
    t.check(np.all(never))

    # # DataFrame ops

    # modify df passing column
    df = pd.read_csv("../../tests/df.csv")

    # modify df passing directly
    no.directmod(df, "DC2101EW_C_ETHPUK11")
    t.check(
        np.array_equal(df["DC2101EW_C_ETHPUK11"].values,
                       np.zeros(len(df)) + 3))

    df = pd.read_csv("../../tests/df.csv")
    cats = np.array(range(4))
    transitions = np.identity(len(cats)) * 0 + 0.25
    #no.log(transitions)
    no.transition(cats, transitions, df, "DC2101EW_C_ETHPUK11")
    # it's possible this could fail depending on random draw
    t.check(
        np.array_equal(np.sort(df["DC2101EW_C_ETHPUK11"].unique()),
                       np.array(range(4))))

    # df2 = df.copy()
    # df3 = no.append(df,df2)
    # t.check(len(df3) == len(df) + len(df2))

    return not t.any_failed
Exemplo n.º 7
0
def nstream(n):
    """ Return a vector of n normally distributed pseudorandom variates (mean zero unity variance) """
    return scipy.stats.norm.ppf(neworder.ustream(n))
Exemplo n.º 8
0
def test():
    t = test_.Test()

    if neworder.size() == 1:
        neworder.log("Skipping MPI tests")
        return True

    t.check(send_recv(True))
    t.check(send_recv(10))
    t.check(send_recv(10.01))
    t.check(send_recv("abcdef"))
    t.check(send_recv([1, 2, 3]))
    t.check(send_recv({"a": "fghdfkgh"}))

    x = np.array([1, 4, 9, 16])
    if neworder.rank() == 0:
        neworder.send(x, 1)
    if neworder.rank() == 1:
        y = neworder.receive(0)
        neworder.log("MPI: 0 sent {}={} 1 recd {}={}".format(
            type(x), x, type(y), y))
        t.check(np.array_equal(x, y))

    df = pd.read_csv("../../tests/ssm_E09000001_MSOA11_ppp_2011.csv")
    if neworder.rank() == 0:
        neworder.log("sending (as csv) df len %d rows from 0" % len(df))
        neworder.send_csv(df, 1)
    if neworder.rank() == 1:
        dfrec = neworder.receive_csv(0)
        neworder.log("got (as csv) df len %d rows from 0" % len(dfrec))
        t.check(dfrec.equals(df))

    if neworder.rank() == 0:
        neworder.log("sending (pickle) df len %d rows from 0" % len(df))
        neworder.send(df, 1)
    if neworder.rank() == 1:
        dfrec = neworder.receive(0)
        neworder.log("got (pickle) df len %d rows from 0" % len(dfrec))
        t.check(dfrec.equals(df))

    # TODO how to test?
    neworder.log("process %d syncing..." % neworder.rank())
    neworder.sync()
    neworder.log("process %d synced" % neworder.rank())

    i = "rank " + str(neworder.rank())
    root = 0
    if root == neworder.rank():
        neworder.log("broadcasting '%s' from %d" % (i, root))
    i = neworder.broadcast(i, root)
    neworder.log("%d got broadcast: '%s' from %d" % (neworder.rank(), i, root))

    t.check(i == "rank 0")

    # a0 will be different for each proc
    a0 = np.random.rand(2, 2)
    if root == neworder.rank():
        neworder.log("broadcasting '%s' from %d" % (str(a0), root))
    a1 = neworder.broadcast(a0, root)
    # a1 will equal a0 on rank 0 only
    neworder.log("%d got broadcast: '%s' from %d" %
                 (neworder.rank(), str(a1), root))
    if neworder.rank() == 0:
        t.check(np.array_equal(a0, a1))
    else:
        t.check(not np.array_equal(a0, a1))

    # test ustream/sequence
    t.check(neworder.indep())
    if root == neworder.rank():
        u0 = neworder.ustream(1000)
        u1 = np.zeros(1000)
    else:
        u0 = np.zeros(1000)
        u1 = neworder.ustream(1000)
    # broadcast u1 from 1
    neworder.broadcast(u1, 1)
    # proc 0 should have 2 different random arrays
    # proc 1 should have zeros and a random array
    t.check(not np.array_equal(u0, u1))

    # check independent streams
    u = neworder.ustream(1000)
    v = neworder.broadcast(u, root)

    # u == v on broadcasting process only
    t.check(np.array_equal(u, v) == (neworder.rank() == root))

    # test gather
    x = (neworder.rank() + 1)**2 / 8
    a = neworder.gather(x, 0)
    if neworder.rank() == 0:
        t.check(np.array_equal(a, [0.125, 0.5]))
    else:
        t.check(len(a) == 0)
    #neworder.log(a)

    # test scatter
    if neworder.rank() == 0:
        a = (np.array(range(neworder.size())) + 1)**2 / 8
    else:
        a = np.zeros(neworder.size())
    neworder.log(a)
    x = neworder.scatter(a, 0)
    t.check(x == (neworder.rank() + 1)**2 / 8)

    # test allgather
    a = np.zeros(neworder.size()) - 1
    a[neworder.rank()] = (neworder.rank() + 1)**2 / 8
    a = neworder.allgather(a)
    t.check(np.array_equal(a, np.array([0.125, 0.5])))

    # this should probably fail (gather not implemented for int)
    x = neworder.rank() + 100
    a = neworder.gather(x, 0)
    #neworder.log(type(x))
    #neworder.log(type(a))

    return not t.any_failed