def ref_with_vcf_dicts_strategy_factory(draw): ''' Generate vcf records for randomish locations along a randomishly generated reference sequence. Each vcf record generator will have a randomish sized "chunk" of the reference to use Returns (reference sequence(str), iterable(vcf dicts)) ''' seq = draw(st.text(alphabet='ACGT', min_size=10, max_size=20)) size = len(seq) # This gets you a list of numbers that are randomish and increasing ranges = draw( rolling_sum(1, 3, int(size / 2)).map( lambda xs: ifilter(lambda x: x < size, xs))) #.filter(_not(bool))) # Stream lets you re-use a generator without draining it. # Pairs will hold start/stop values for each part of sequence pairs = Stream() << partition(2, ranges) # POSs will contain the start position of each vcf row POSs = Stream() << imap(operator.itemgetter(0), pairs) # VCF files start at index 1; python starts at 0 pairs_offset_1 = imap(lambda x: (x[0] - 1, x[1] - 1), pairs) #grab the pieces of the reference to build our elts from chunks = map(lambda x: seq[x[0]:x[1]], pairs_offset_1) #random chromosome name chrom = draw(st.text(string.ascii_letters)) # Draw a new record for each of the Positions we have made vcfs = map(compose(draw, partial(vcf_dict_strategy_factory, chrom)), POSs, chunks) #TODO: ranges must be non-empty. Assuming vcfs for now. # vcfs can be a a generator #assume(len(vcfs) > 0) return (seq, vcfs)
def ref_with_vcf_dicts_strategy_factory(draw): ''' Generate vcf records for randomish locations along a randomishly generated reference sequence. Each vcf record generator will have a randomish sized "chunk" of the reference to use Returns (reference sequence(str), iterable(vcf dicts)) ''' seq = draw(st.text(alphabet='ACGT', min_size=10, max_size=20)) size = len(seq) # This gets you a list of numbers that are randomish and increasing ranges = draw(rolling_sum(1, 3, int(size/2)).map(lambda xs: ifilter(lambda x: x < size, xs)) )#.filter(_not(bool))) # Stream lets you re-use a generator without draining it. # Pairs will hold start/stop values for each part of sequence pairs = Stream() << partition(2, ranges) # POSs will contain the start position of each vcf row POSs = Stream() << imap(operator.itemgetter(0), pairs) # VCF files start at index 1; python starts at 0 pairs_offset_1 = imap(lambda x: (x[0] - 1, x[1] - 1), pairs) #grab the pieces of the reference to build our elts from chunks = map(lambda x: seq[x[0]:x[1]], pairs_offset_1) #random chromosome name chrom = draw(st.text(string.ascii_letters)) # Draw a new record for each of the Positions we have made vcfs = map(compose(draw, partial(vcf_dict_strategy_factory, chrom)), POSs, chunks) #TODO: ranges must be non-empty. Assuming vcfs for now. # vcfs can be a a generator #assume(len(vcfs) > 0) return (seq, vcfs)
def agentToDict(agent): return dict(partition(2, repr(agent).replace(":","").split(" ")))
from sklearn.preprocessing import MinMaxScaler, StandardScaler scaler = MinMaxScaler(feature_range=(0, 1)) #scaler=StandardScaler() #fit train data scaler_flow_train = scaler.fit(flow) print('Min: %f, Max: %f' % (scaler_flow_train.data_min_, scaler_flow_train.data_max_)) #scale train data normalized_flow = scaler_flow_train.transform(flow) normalized_flow #from array to list normalized_flow = normalized_flow.tolist() len(normalized_flow) from toolz.itertoolz import sliding_window, partition #for every day of the train set store the flow observations day_flow = list(partition(240, normalized_flow)) day_flow len(day_flow) #from list to multidimensional array day_flow = np.asarray(day_flow) day_flow from tslearn.utils import to_time_series, to_time_series_dataset #create univariate series for normalized flow_observation first_time_series = to_time_series(day_flow) print(first_time_series.shape) #treatment of density variable density = df.loc[:, 'Density'] #normalization/standardization of train data density = np.array(density) density = density.reshape((len(density), 1))
def test_partition(): assert list(partition(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)] assert list(partition(3, range(7))) == [(0, 1, 2), (3, 4, 5)] assert list(partition(3, range(4), pad=-1)) == [(0, 1, 2), (3, -1, -1)] assert list(partition(2, [])) == []
flow = flow.reshape((len(flow), 1)) from sklearn.preprocessing import MinMaxScaler, StandardScaler scaler = MinMaxScaler(feature_range=(0, 1)) #scaler=StandardScaler() #fit train data scaler_flow_train = scaler.fit(flow) #print('Min: %f, Max: %f' % (scaler_flow_train.data_min_, scaler_flow_train.data_max_)) #scale train data normalized_flow = scaler_flow_train.transform(flow) normalized_flow #from array to list normalized_flow = normalized_flow.tolist() len(normalized_flow) from toolz.itertoolz import sliding_window, partition #for every day of the train set store the flow observations day_flow = list(partition(48, normalized_flow)) day_flow len(day_flow) #from list to multidimensional array day_flow = np.asarray(day_flow) day_flow from tslearn.utils import to_time_series, to_time_series_dataset #create univariate series for normalized flow_observation first_time_series = to_time_series(day_flow) print(first_time_series.shape) #treatment of speed variable speed = df.loc[:, 'Speed'] #normalization/standardization of train data speed = np.array(speed) speed = speed.reshape((len(speed), 1))
def dets_filter(*args, event_codes=None, operator='&', or_bykik=True): """ Return valid l3t/pyami filter strings in a useful format. The function takes in arbitrary dets whose prefixes are the ami names, along with low and highs. Event codes are handled as a special case, since you always want high vs low. .. note:: By default this will treat bykik at an l3t pass! This is so you don't lose your off shots when the l3t trigger is in veto mode. You can disable this with ``or_bykik=False``, but this will remain the default behavior for backwards compatibility and to prevent someone from losing shots that they wanted in the data. Parameters ---------- *args: (`AmiDet`, ``float``, ``float``) n times A sequence of (detector, low, high), which create filters that make sure the detector is between low and high. You can omit the first `AmiDet` as a shorthand for the current monitor, assuming a monitor has been set with `Daq.set_monitor` or `set_monitor_det`. event_codes: ``list``, optional A list of event codes to include in the filter. l3pass will be when the event code is present. operator: ``str``, optional The operator for combining the detector ranges and event codes. This can either be ``|`` to ``or`` the conditions together, so l3pass will happen if any filter passes, or it can be left at the default ``&`` to ``and`` the conditions together, so l3pass will only happen if all filters pass. or_bykik: ``bool``, optional True by default, appends an ``or`` condition that marks l3t pass when we see the bykik event code. This makes sure the off shots make it into the data if we're in l3t veto mode. Returns ------- filter_string: ``str`` A valid filter string for `AmiDet` or for ``pyami.set_l3t`` """ filter_strings = [] if len(args) % 3 == 2: # One arg missing, add the monitor det as first arg if monitor_det is None: raise RuntimeError('Did not recieve args multiple of 3, but ', 'monitor_det is not set. Aborting.') else: args = [monitor_det] + list(args) for det, lower, upper in partition(3, args): if isinstance(det, str): ami_name = det elif isinstance(det, AmiDet): ami_name = det.prefix else: raise TypeError('Must use AmiDet or string for filtering!') filter_strings.append(basic_filter(ami_name, lower, upper)) if event_codes is not None: for code in event_codes: filter_strings.append(evr_filter(code)) if len(filter_strings) == 0: return None else: base = concat_filter_strings(filter_strings, operator=operator) if or_bykik: bykik = evr_filter(162) return concat_filter_strings([base, bykik], operator='|') else: return base
import numpy as np #########TRAIN DATA########## #import train data df= pd.read_csv("/Users/nronzoni/Downloads/Multivariate-Time-series-clustering-main/I35W_NB 30min 2013/S61.csv") df #treatment of the first variable first_train=df.loc[:,'Flow'] first_train=np.array(first_train) first_train= first_train.reshape((len(first_train), 1)) #from array to list first_train=first_train.tolist() len(first_train) from toolz.itertoolz import sliding_window, partition #for every day of the train set store the flow observations days_first=list(partition(48,first_train)) days_first len(days_first) #from list to multidimensional array days_first=np.asarray(days_first) days_first from tslearn.utils import to_time_series, to_time_series_dataset #create univariate series for normalized flow_observation first_time_series = to_time_series(days_first) print(first_time_series.shape) #normalize time series from tslearn.preprocessing import TimeSeriesScalerMeanVariance, TimeSeriesScalerMinMax first_time_series = TimeSeriesScalerMinMax(value_range=(0.0, 1.0)).fit_transform(first_time_series) #first_time_series = TimeSeriesScalerMeanVariance(mu=0.0, std=1.0).fit_transform(first_time_series) print(first_time_series.shape)
def main(mytimer: func.TimerRequest, document: func.Out[func.Document]) -> None: utc_timestamp = (datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).isoformat()) get_vars() # Read the secrets from the environment client_id = os.environ.get("SWISSCOM_CLIENT_ID") client_secret = os.environ.get("SWISSCOM_CLIENT_SECRET") # See https://requests-oauthlib.readthedocs.io/en/latest/oauth2_workflow.html#backend-application-flow. client = BackendApplicationClient(client_id=client_id) oauth = OAuth2Session(client=client) # Fetch an access token. oauth.fetch_token(token_url=TOKEN_URL, client_id=client_id, client_secret=client_secret) # Use the access token to query an endpoint. resp = oauth.get( "https://api.swisscom.com/layer/heatmaps/demo/grids/municipalities/261", headers={"scs-version": "2"}, ) if not resp.ok: logging.error("Failed to reach Swisscom API") return tiles = resp.json()["tiles"] logging.info("Loaded %d tiles from Swisscom", len(tiles)) tile_density = {} tile_ids = (tile["tileId"] for tile in tiles) for chunk in partition(100, tile_ids): resp = oauth.get( "https://api.swisscom.com/layer/heatmaps/demo/heatmaps/dwell-density/daily/2020-03-28", # TODO this should load data for the previous day instead params={"tiles": chunk}, headers={"scs-version": "2"}, ) if not resp.ok: logging.error("Failed to reach Swisscom API: %s", resp.json()) continue tile_density.update( (tile["tileId"], tile["score"]) for tile in resp.json()["tiles"]) logging.info("Loaded densitiy for %d tiles from Swisscom", len(tile_density)) documents = func.DocumentList() for tile in tiles: tile_id = tile["tileId"] if tile_id not in tile_density: continue density = tile_density[tile_id] location = { "type": "Point", "coordinates": [tile["ll"]["x"], tile["ll"]["y"]] } documents.append( func.Document.from_dict({ "id": str(tile_id), "tileId": tile_id, "density": density, "location": location })) document.set(documents) logging.info("Finished outputting data")
def rowTuples(rows): result = list(partitionby(is_header_row, rows)) return partition(2, result[1:]) # Skip Chapter 0