Пример #1
0
def ref_with_vcf_dicts_strategy_factory(draw):
    '''
    Generate vcf records for randomish locations along a randomishly generated
    reference sequence. Each vcf record generator will have a randomish sized
    "chunk" of the reference to use

    Returns (reference sequence(str), iterable(vcf dicts))
    '''
    seq = draw(st.text(alphabet='ACGT', min_size=10, max_size=20))
    size = len(seq)
    # This gets you a list of numbers that are randomish and increasing
    ranges = draw(
        rolling_sum(1, 3, int(size / 2)).map(
            lambda xs: ifilter(lambda x: x < size, xs)))  #.filter(_not(bool)))
    # Stream lets you re-use a generator without draining it.
    # Pairs will hold start/stop values for each part of sequence
    pairs = Stream() << partition(2, ranges)
    # POSs will contain the start position of each vcf row
    POSs = Stream() << imap(operator.itemgetter(0), pairs)
    # VCF files start at index 1; python starts at 0
    pairs_offset_1 = imap(lambda x: (x[0] - 1, x[1] - 1), pairs)
    #grab the pieces of the reference to build our elts from
    chunks = map(lambda x: seq[x[0]:x[1]], pairs_offset_1)
    #random chromosome name
    chrom = draw(st.text(string.ascii_letters))
    # Draw a new record for each of the Positions we have made
    vcfs = map(compose(draw, partial(vcf_dict_strategy_factory, chrom)), POSs,
               chunks)
    #TODO: ranges must be non-empty. Assuming vcfs for now.
    # vcfs can be a a generator
    #assume(len(vcfs) > 0)
    return (seq, vcfs)
Пример #2
0
def ref_with_vcf_dicts_strategy_factory(draw):
    '''
    Generate vcf records for randomish locations along a randomishly generated
    reference sequence. Each vcf record generator will have a randomish sized
    "chunk" of the reference to use

    Returns (reference sequence(str), iterable(vcf dicts))
    '''
    seq = draw(st.text(alphabet='ACGT', min_size=10, max_size=20))
    size = len(seq)
    # This gets you a list of numbers that are randomish and increasing
    ranges = draw(rolling_sum(1, 3, int(size/2)).map(lambda xs: ifilter(lambda x: x < size, xs)) )#.filter(_not(bool)))
    # Stream lets you re-use a generator without draining it.
    # Pairs will hold start/stop values for each part of sequence
    pairs = Stream() << partition(2, ranges)
    # POSs will contain the start position of each vcf row
    POSs = Stream() << imap(operator.itemgetter(0), pairs)
    # VCF files start at index 1; python starts at 0
    pairs_offset_1 = imap(lambda x: (x[0] - 1, x[1] - 1), pairs)
    #grab the pieces of the reference to build our elts from
    chunks = map(lambda x: seq[x[0]:x[1]], pairs_offset_1)
    #random chromosome name
    chrom = draw(st.text(string.ascii_letters))
    # Draw a new record for each of the Positions we have made
    vcfs = map(compose(draw, partial(vcf_dict_strategy_factory, chrom)), POSs, chunks)
    #TODO: ranges must be non-empty. Assuming vcfs for now.
    # vcfs can be a a generator
    #assume(len(vcfs) > 0)
    return (seq, vcfs)
Пример #3
0
def agentToDict(agent):
    return dict(partition(2, repr(agent).replace(":","").split(" ")))
Пример #4
0
from sklearn.preprocessing import MinMaxScaler, StandardScaler
scaler = MinMaxScaler(feature_range=(0, 1))
#scaler=StandardScaler()
#fit train data
scaler_flow_train = scaler.fit(flow)
print('Min: %f, Max: %f' %
      (scaler_flow_train.data_min_, scaler_flow_train.data_max_))
#scale train data
normalized_flow = scaler_flow_train.transform(flow)
normalized_flow
#from array to list
normalized_flow = normalized_flow.tolist()
len(normalized_flow)
from toolz.itertoolz import sliding_window, partition
#for every day of the train set store the flow observations
day_flow = list(partition(240, normalized_flow))
day_flow
len(day_flow)
#from list to multidimensional array
day_flow = np.asarray(day_flow)
day_flow
from tslearn.utils import to_time_series, to_time_series_dataset
#create univariate series for normalized flow_observation
first_time_series = to_time_series(day_flow)
print(first_time_series.shape)

#treatment of density variable
density = df.loc[:, 'Density']
#normalization/standardization of train data
density = np.array(density)
density = density.reshape((len(density), 1))
Пример #5
0
def test_partition():
    assert list(partition(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)]
    assert list(partition(3, range(7))) == [(0, 1, 2), (3, 4, 5)]
    assert list(partition(3, range(4), pad=-1)) == [(0, 1, 2),
                                                    (3, -1, -1)]
    assert list(partition(2, [])) == []
flow = flow.reshape((len(flow), 1))
from sklearn.preprocessing import MinMaxScaler, StandardScaler
scaler = MinMaxScaler(feature_range=(0, 1))
#scaler=StandardScaler()
#fit train data
scaler_flow_train = scaler.fit(flow)
#print('Min: %f, Max: %f' % (scaler_flow_train.data_min_, scaler_flow_train.data_max_))
#scale train data
normalized_flow = scaler_flow_train.transform(flow)
normalized_flow
#from array to list
normalized_flow = normalized_flow.tolist()
len(normalized_flow)
from toolz.itertoolz import sliding_window, partition
#for every day of the train set store the flow observations
day_flow = list(partition(48, normalized_flow))
day_flow
len(day_flow)
#from list to multidimensional array
day_flow = np.asarray(day_flow)
day_flow
from tslearn.utils import to_time_series, to_time_series_dataset
#create univariate series for normalized flow_observation
first_time_series = to_time_series(day_flow)
print(first_time_series.shape)

#treatment of speed variable
speed = df.loc[:, 'Speed']
#normalization/standardization of train data
speed = np.array(speed)
speed = speed.reshape((len(speed), 1))
Пример #7
0
def dets_filter(*args, event_codes=None, operator='&', or_bykik=True):
    """
    Return valid l3t/pyami filter strings in a useful format.

    The function takes in arbitrary dets whose prefixes are the ami names,
    along with low and highs. Event codes are handled as a special case, since
    you always want high vs low.

    .. note::
        By default this will treat bykik at an l3t pass! This is so you don't
        lose your off shots when the l3t trigger is in veto mode. You can
        disable this with ``or_bykik=False``, but this will remain the default
        behavior for backwards compatibility and to prevent someone from losing
        shots that they wanted in the data.

    Parameters
    ----------
    *args: (`AmiDet`, ``float``, ``float``) n times
        A sequence of (detector, low, high), which create filters that make
        sure the detector is between low and high. You can omit the first
        `AmiDet` as a shorthand for the current monitor, assuming a monitor has
        been set with `Daq.set_monitor` or `set_monitor_det`.

    event_codes: ``list``, optional
        A list of event codes to include in the filter. l3pass will be when the
        event code is present.

    operator: ``str``, optional
        The operator for combining the detector ranges and event codes. This
        can either be ``|`` to ``or`` the conditions together, so l3pass will
        happen if any filter passes, or it can be left at the default ``&`` to
        ``and`` the conditions together, so l3pass will only happen if all
        filters pass.

    or_bykik: ``bool``, optional
        True by default, appends an ``or`` condition that marks l3t pass when
        we see the bykik event code. This makes sure the off shots make it into
        the data if we're in l3t veto mode.

    Returns
    -------
    filter_string: ``str``
        A valid filter string for `AmiDet` or for ``pyami.set_l3t``
    """
    filter_strings = []
    if len(args) % 3 == 2:
        # One arg missing, add the monitor det as first arg
        if monitor_det is None:
            raise RuntimeError('Did not recieve args multiple of 3, but ',
                               'monitor_det is not set. Aborting.')
        else:
            args = [monitor_det] + list(args)
    for det, lower, upper in partition(3, args):
        if isinstance(det, str):
            ami_name = det
        elif isinstance(det, AmiDet):
            ami_name = det.prefix
        else:
            raise TypeError('Must use AmiDet or string for filtering!')
        filter_strings.append(basic_filter(ami_name, lower, upper))
    if event_codes is not None:
        for code in event_codes:
            filter_strings.append(evr_filter(code))
    if len(filter_strings) == 0:
        return None
    else:
        base = concat_filter_strings(filter_strings, operator=operator)
        if or_bykik:
            bykik = evr_filter(162)
            return concat_filter_strings([base, bykik], operator='|')
        else:
            return base
Пример #8
0
import numpy as np

#########TRAIN DATA##########
#import  train data
df= pd.read_csv("/Users/nronzoni/Downloads/Multivariate-Time-series-clustering-main/I35W_NB 30min 2013/S61.csv") 
df
#treatment of the first variable 
first_train=df.loc[:,'Flow']
first_train=np.array(first_train)
first_train= first_train.reshape((len(first_train), 1))
#from array to list 
first_train=first_train.tolist()
len(first_train)
from toolz.itertoolz import sliding_window, partition
#for every day of the train set store the flow observations 
days_first=list(partition(48,first_train))
days_first
len(days_first)
#from list to multidimensional array 
days_first=np.asarray(days_first)
days_first
from tslearn.utils import to_time_series, to_time_series_dataset
#create univariate series for normalized flow_observation 
first_time_series = to_time_series(days_first)
print(first_time_series.shape)
#normalize time series
from tslearn.preprocessing import TimeSeriesScalerMeanVariance, TimeSeriesScalerMinMax
first_time_series = TimeSeriesScalerMinMax(value_range=(0.0, 1.0)).fit_transform(first_time_series)
#first_time_series = TimeSeriesScalerMeanVariance(mu=0.0, std=1.0).fit_transform(first_time_series)
print(first_time_series.shape)
Пример #9
0
def main(mytimer: func.TimerRequest,
         document: func.Out[func.Document]) -> None:
    utc_timestamp = (datetime.datetime.utcnow().replace(
        tzinfo=datetime.timezone.utc).isoformat())

    get_vars()

    # Read the secrets from the environment
    client_id = os.environ.get("SWISSCOM_CLIENT_ID")
    client_secret = os.environ.get("SWISSCOM_CLIENT_SECRET")

    # See https://requests-oauthlib.readthedocs.io/en/latest/oauth2_workflow.html#backend-application-flow.
    client = BackendApplicationClient(client_id=client_id)
    oauth = OAuth2Session(client=client)

    # Fetch an access token.
    oauth.fetch_token(token_url=TOKEN_URL,
                      client_id=client_id,
                      client_secret=client_secret)

    # Use the access token to query an endpoint.
    resp = oauth.get(
        "https://api.swisscom.com/layer/heatmaps/demo/grids/municipalities/261",
        headers={"scs-version": "2"},
    )

    if not resp.ok:
        logging.error("Failed to reach Swisscom API")
        return

    tiles = resp.json()["tiles"]

    logging.info("Loaded %d tiles from Swisscom", len(tiles))

    tile_density = {}

    tile_ids = (tile["tileId"] for tile in tiles)
    for chunk in partition(100, tile_ids):
        resp = oauth.get(
            "https://api.swisscom.com/layer/heatmaps/demo/heatmaps/dwell-density/daily/2020-03-28",  # TODO this should load data for the previous day instead
            params={"tiles": chunk},
            headers={"scs-version": "2"},
        )

        if not resp.ok:
            logging.error("Failed to reach Swisscom API: %s", resp.json())
            continue

        tile_density.update(
            (tile["tileId"], tile["score"]) for tile in resp.json()["tiles"])

    logging.info("Loaded densitiy for %d tiles from Swisscom",
                 len(tile_density))

    documents = func.DocumentList()
    for tile in tiles:
        tile_id = tile["tileId"]
        if tile_id not in tile_density:
            continue

        density = tile_density[tile_id]
        location = {
            "type": "Point",
            "coordinates": [tile["ll"]["x"], tile["ll"]["y"]]
        }

        documents.append(
            func.Document.from_dict({
                "id": str(tile_id),
                "tileId": tile_id,
                "density": density,
                "location": location
            }))

    document.set(documents)

    logging.info("Finished outputting data")
Пример #10
0
def rowTuples(rows):
    result = list(partitionby(is_header_row, rows))
    return partition(2, result[1:])  # Skip Chapter 0