def spc_national_map_previews(pkg): return F.filter(F.first, [(F.first( F.filter( _is_cesium_view, toolkit.get_action('resource_view_list')({ 'user': toolkit.c.user }, { 'id': res['id'] }))), res) for res in pkg['resources']])
def _load_call_info(self, dirname): path = os.path.join(self._path, dirname) files = os.listdir(path) arg_files = sorted(filter(r'^a', files)) args = tuple(map(self._read_data, (os.path.join(path, f) for f in arg_files))) kwarg_files = filter(r'^k', files) kwarg_files = {filename[1:]: os.path.join(path, filename) for filename in kwarg_files} kwargs = walk_values(self._read_data, kwarg_files) return args, kwargs
def get_minimum_metric_set_keys_from_mapping_file(name): """ Returns the list of metrics that should be collected based from the container. These metrics are defined in mapping files for each image name. """ path = os.path.join('mappings', name + '.yml') mappings = yaml.safe_load(util.get_asset_file_contents(path)) is_mandatory_metric = lambda x: not funcy.get_in(x, 'optional', False) return list( map(lambda x: x['key'], funcy.filter(is_mandatory_metric, mappings)))
def _map_gdl_to_publication(data_dict, obj): dataset = { "id": str(uuid.uuid3(uuid.NAMESPACE_DNS, str(data_dict['id']))), "type": "publications", "title": data_dict['title'], "creator": [a['name'] for a in data_dict['authors']], # "subject": data_dict, "notes": data_dict['description'], "publisher": data_dict.get('relatedOrganisation'), # "contributor": [a['name'] for a in data_dict['authors']], "date": data_dict.get('created'), "metadata_modified": data_dict.get('created'), # "publication_type": data_dict, # "format": data_dict, "identifier": data_dict['identifier'], "source": data_dict.get('source'), # "language": data_dict, # "relation": data_dict, # "spatial": data_dict, # "rights": data_dict, "license_id": 'notspecified', "member_countries": 'other', # relatedCountry, optional "harvest_source": 'GDL' } thematic_area = data_dict.get('thematicArea', {}).get('area') if thematic_area: dataset["thematic_area_string"] = thematic_area_mapping.get( thematic_area) related_country = data_dict.get('relatedCountry') if related_country: schema = sh.scheming_get_dataset_schema('publications') choices = sh.scheming_field_by_name(schema['dataset_fields'], 'member_countries')['choices'] member_country = F.first( F.filter( F.compose(F.rpartial(contains, related_country), itemgetter('label')), choices)) if member_country: dataset['member_countries'] = member_country['value'] spatial = get_extent_for_country(member_country['label']) if spatial: dataset['spatial'] = spatial['value'] if data_dict['file']: res_url = _gl_url(obj.source.url, 'download') + '?id=' + str( data_dict['id']) res = {'name': data_dict['file'], 'url': res_url} res['format'] = splitext(res['name'])[1].lstrip('.') dataset['resources'] = [res] return dataset
def get_onus(ip): mark, ports = get_pon_ports(ip)[:-1] if mark == 'fail': return ('fail', None, ip) try: child = telnet(ip) gpo = partial(get_port_onus, child) rslt = lmap(gpo, ports) child.sendline('exit') child.close() except (pexpect.EOF, pexpect.TIMEOUT) as e: return ('fail', None, ip) rslt1 = filter(lambda x: bool(x[1]), rslt) return ('success', rslt1, ip)
def get_infs(ip): def _inf(record): name = re_find(r'interface\s+(X?Gigabit\S+)', record) desc = re_find(r'description\s+(\S+ *\S*)', record) group = re_find(r'link-aggregation\s+(group\s+\d+)', record) return dict(name=name, desc=desc, group=group) try: child = telnet(ip) rslt = do_some(child, 'disp cu interface') close(child) except (pexpect.EOF, pexpect.TIMEOUT) as e: return ('fail', None, ip) rslt1 = filter(r'X?GigabitEthernet', rslt.split('#')) rslt2 = map(_inf, rslt1) return ('success', rslt2, ip)
def get_data(self): all_transactions = filter( lambda t: t["type"] in ("request-changes", "accept"), cat(pluck("transactions", self.raw_data)), ) accept_transactions, reject_transactions = split( lambda t: t["type"] == "accept", all_transactions) most_accepting_author, most_accepting_count = Counter( count_by(itemgetter("authorPHID"), accept_transactions)).most_common(1)[0] most_rejecting_author, most_rejecting_count = Counter( count_by(itemgetter("authorPHID"), reject_transactions)).most_common(1)[0] return ( { "author": self.users_mapping[most_accepting_author], "count": most_accepting_count, }, { "author": self.users_mapping[most_rejecting_author], "count": most_rejecting_count, }, )
def tfilter(f,*seq): return tuple(F.filter(f,*seq)) if seq \ else lambda *xs: tuple(F.filter(f,*xs))
def get_extent_for_country(country): spatial = F.first( F.filter(F.compose(F.partial(eq, country), itemgetter('text')), get_eez_options())) return spatial
def scan_etl_events(etl_id, selected_columns: Optional[Iterable[str]] = None) -> None: """ Scan for all events belonging to a specific ETL. If a list of columns is provided, then the output is limited to those columns. But note that the target (schema.table) and the event are always present. """ ddb = DynamoDBStorage.factory() table = ddb.get_table(create_if_not_exists=False) available_columns = [ "target", "step", "event", "timestamp", "elapsed", "rowcount" ] if selected_columns is None: selected_columns = available_columns # We will always select "target" and "event" to have a meaningful output. columns = list( fy.filter( frozenset(selected_columns).union(["target", "event"]), available_columns)) keys = [ "extra.rowcount" if column == "rowcount" else column for column in columns ] # We need to scan here since the events are stored by "target" and not by "etl_id". # TODO Try to find all the "known" relations and query on them with a filter on the etl_id. client = boto3.client("dynamodb") paginator = client.get_paginator("scan") response_iterator = paginator.paginate( TableName=table.name, ConsistentRead=False, ExpressionAttributeNames={"#timestamp": "timestamp"}, ExpressionAttributeValues={ ":etl_id": { "S": etl_id }, ":marker": { "S": _DUMMY_TARGET }, ":start_event": { "S": STEP_START }, }, FilterExpression= "etl_id = :etl_id and target <> :marker and event <> :start_event", ProjectionExpression= "target, step, event, #timestamp, elapsed, extra.rowcount", ReturnConsumedCapacity="TOTAL", # PaginationConfig={ # "PageSize": 100 # } ) logger.info("Scanning events table '%s' for elapsed times", table.name) consumed_capacity = 0.0 scanned_count = 0 rows: List[List[str]] = [] deserialize = TypeDeserializer().deserialize for response in response_iterator: consumed_capacity += response["ConsumedCapacity"]["CapacityUnits"] scanned_count += response["ScannedCount"] # We need to turn something like "'event': {'S': 'finish'}" into "'event': 'finish'". deserialized = [{ key: deserialize(value) for key, value in item.items() } for item in response["Items"]] # Lookup "elapsed" or "extra.rowcount" (the latter as ["extra", "rowcount"]). items = [{key: fy.get_in(item, key.split(".")) for key in keys} for item in deserialized] # Scope down to selected keys and format the columns. rows.extend([_format_output_column(key, item[key]) for key in keys] for item in items) logger.info("Scan result: scanned count = %d, consumed capacity = %f", scanned_count, consumed_capacity) if "timestamp" in keys: rows.sort(key=itemgetter(keys.index("timestamp"))) else: rows.sort(key=itemgetter(keys.index("target"))) print(etl.text.format_lines(rows, header_row=columns))
def nfilter(pred, nodes): return NodeSet.from_seq(filter(pred, nodes.as_node_set()))
def select_in_execution_order( relations: Sequence[RelationDescription], selector: TableSelector, include_dependents=False, include_immediate_views=False, continue_from: Optional[str] = None, ) -> List[RelationDescription]: """ Return list of relations that were selected, optionally adding dependents or skipping forward. The values supported for skipping forward are: - '*' to start from the beginning - ':transformations' to only run transformations of selected relations - a specific relation to continue from that one in the original execution order - a specific schema to include all relations in that source schema as well as any originally selected transformation Note that these operate on the list of relations selected by the selector patterns. The option of '*' exists to we can have a default value in our pipeline definitions. The last option of specifying a schema is most useful with a source schema when you want to restart the load step followed by all transformations. No error is raised when the selector does not select any relations. An error is raised when the "continue from" condition does not resolve to a list of relations. """ logger.info("Pondering execution order of %d relation(s)", len(relations)) execution_order = order_by_dependencies(relations) selected = find_matches(execution_order, selector) if not selected: logger.warning("Found no relations matching: %s", selector) return [] if include_dependents: dependents = find_dependents(execution_order, selected) combined = frozenset(selected).union(dependents) selected = [ relation for relation in execution_order if relation in combined ] elif include_immediate_views: immediate_views = find_immediate_dependencies(execution_order, selector) combined = frozenset(selected).union(immediate_views) selected = [ relation for relation in execution_order if relation in combined ] if continue_from is None or continue_from == "*": return selected transformations = [ relation for relation in selected if relation.is_transformation ] if continue_from in (":transformations", ":transformation"): if transformations: logger.info( "Continuing with %d transformation(s) in selected relations", len(transformations)) return transformations raise InvalidArgumentError("found no transformations to continue from") logger.info("Trying to fast forward to '%s' within %d relation(s)", continue_from, len(selected)) starting_from_match = list( fy.dropwhile(lambda relation: relation.identifier != continue_from, selected)) if starting_from_match: logger.info( "Continuing with %d relation(s) after skipping %d", len(starting_from_match), len(selected) - len(starting_from_match), ) return starting_from_match single_schema = frozenset( fy.filter(lambda relation: relation.source_name == continue_from, selected)) if single_schema.intersection(transformations): raise InvalidArgumentError( f"schema '{continue_from}' contains transformations") if single_schema: combined = single_schema.union(transformations) logger.info( "Continuing with %d relation(s) in '%s' and %d transformation(s)", len(single_schema), continue_from, len(combined) - len(single_schema), ) return [ relation for relation in execution_order if relation in combined ] raise InvalidArgumentError("found no matching relations to continue from")
def filter(f, *seq): return F.filter(f,*seq) if seq \ else lambda *xs: F.filter(f,*xs)
def sync(self): key_properties = self.catalog.get('key_properties') table = self.TABLE singer.write_schema(self.catalog.get('stream'), self.catalog.get('schema'), key_properties=key_properties) start = self.get_start_date(table) end = start interval = timedelta(hours=1) LOGGER.info('Syncing outbound activities.') while end < datetime.now(pytz.utc): self.login() start = end end = start + interval LOGGER.info("Fetching activities from {} to {}".format(start, end)) _filter = self.make_filter(start, end) field_selector = get_field_selector(self.catalog.get('schema')) hasMore = True while hasMore: try: results = \ self.client.service.readRecentOutboundActivities( _filter) except suds.WebFault as e: if '116' in e.fault.faultstring: hasMore = False break else: raise result_dicts = [ suds.sudsobject.asdict(result) for result in results ] parsed_results = [ field_selector(result) for result in result_dicts ] for result in parsed_results: ids = [ 'createdDate', 'activityType', 'contactId', 'listId', 'segmentId', 'keywordId', 'messageId' ] result['id'] = hashlib.md5('|'.join( filter( identity, project( result, ids).values())).encode('utf-8')).hexdigest() singer.write_records(table, parsed_results) LOGGER.info('... {} results'.format(len(results))) _filter.readDirection = 'NEXT' if len(results) == 0: hasMore = False self.state = incorporate(self.state, table, 'createdDate', start.replace(microsecond=0).isoformat()) save_state(self.state) LOGGER.info('Done syncing outbound activities.')
def filter_activities(self, pred): return filter(pred, deepcopy(self.activities))