def issues(self): if self.tag: url = self.base_url + "/api/0/projects?tags=" + self.tag response = self.session.get(url) if not bool(response): raise IOError('Failed to talk to %r %r' % (url, response)) all_repos = [r['name'] for r in response.json()['projects']] else: all_repos = [self.repo] repos = filter(self.filter_repos, all_repos) issues = [] for repo in repos: issues.extend(self.get_issues(repo, ('issues', 'issues'))) issues.extend(self.get_issues(repo, ('pull-requests', 'requests'))) log.debug(" Found %i issues.", len(issues)) issues = list(filter(self.include, issues)) log.debug(" Pruned down to %i issues.", len(issues)) for repo, issue in issues: # Stuff this value into the upstream dict for: # https://pagure.com/ralphbean/bugwarrior/issues/159 issue['repo'] = repo issue_obj = self.get_issue_for_record(issue) extra = { 'project': repo, 'type': 'pull_request' if 'branch' in issue else 'issue', 'annotations': self.annotations(issue, issue_obj) } issue_obj.update_extra(extra) yield issue_obj
def gen_svm_nodearray(xi, feature_max=None, isKernel=None): if isinstance(xi, dict): index_range = xi.keys() elif isinstance(xi, (list, tuple)): if not isKernel: xi = [0] + xi # idx should start from 1 index_range = range(len(xi)) else: raise TypeError('xi should be a dictionary, list or tuple') if feature_max: assert(isinstance(feature_max, int)) index_range = list(filter(lambda j: j <= feature_max, index_range)) if not isKernel: index_range = list(filter(lambda j:xi[j] != 0, index_range)) index_range = sorted(index_range) ret = (svm_node * (len(index_range) + 1))() ret[-1].index = -1 for idx, j in enumerate(index_range): ret[idx].index = j ret[idx].value = xi[j] max_idx = 0 if index_range: max_idx = index_range[-1] return ret, max_idx
def test_element(client): sensors = Sensor.where(client, include=[Element]) found_sensors = list(filter(lambda s: s.element(use_included=True) is not None, sensors)) assert len(found_sensors) > 0 found_sensor = found_sensors[0] found_element = found_sensor.element(use_included=True) assert found_element is not None lost_sensors = list(filter(lambda s: s.element(use_included=True) is None, sensors)) assert len(lost_sensors) > 0 lost_sensor = lost_sensors[0] # switch to include based relationship to_one(Element, type=RelationType.INCLUDE, reverse=to_many)(Sensor) # exercise the include based to_one relationship assert found_sensor.element() is not None assert lost_sensor.element() is None found_sensor = Sensor.find(client, found_sensor.id, include=[Element]) assert found_sensor.element(use_included=True) == found_element # try non included found_sensor = Sensor.find(client, found_sensor.id) with pytest.raises(AttributeError): found_sensor.element(use_included=True)
def fetch(self, reference=None, start=None, end=None, filters=None, incl_left=True, incl_right=True): file_obj = pysam.TabixFile(native_str(self._file_path), parser=self._parser) with contextlib.closing(file_obj) as tb_file: if reference is not None: reference = native_str(reference) records = self._fetch(tb_file, reference=reference, start=start, end=end) # Filter records on additional filters. if filters is not None: def _filter(records, name, value): for r in records: if hasattr(r, name) and getattr(r, name) == value: yield r for name, value in filters.items(): records = _filter(records, name, value) # Filter inclusive/exclusive if needed. if not incl_left: records = filter(lambda r: r.start > start, records) if not incl_right: records = filter(lambda r: r.end < end, records) # Yield records. for record in records: yield record
def strict_deps_for_target(self, target, predicate=None): """Get the dependencies of `target` filtered by `predicate`, accounting for 'strict_deps'. If 'strict_deps' is on, instead of using the transitive closure of dependencies, targets will only be able to see their immediate dependencies declared in the BUILD file. The 'strict_deps' setting is obtained from the result of `get_compile_settings()`. NB: This includes the current target in the result. """ if self._native_build_settings.get_strict_deps_value_for_target(target): strict_deps = target.strict_dependencies(DependencyContext()) if predicate: filtered_deps = list(filter(predicate, strict_deps)) else: filtered_deps = strict_deps deps = [target] + filtered_deps else: deps = self.context.build_graph.transitive_subgraph_of_addresses( [target.address], predicate=predicate) # Filter out the beginning target depending on whether it matches the predicate. # TODO: There should be a cleaner way to do this. deps = filter(predicate, deps) return deps
def process_pdfs(corpus_path, ignore=['.json', '.log', '.err', '.pickle', '.npz']): from topicexplorer.lib import pdf if os.path.isfile(corpus_path): print("PDF file detected, extracting plaintext to", corpus_path.replace('.pdf', '.txt')) pdf.main(corpus_path) corpus_path = corpus_path.replace('.pdf', '.txt') elif os.path.isdir(corpus_path): print("PDF files detected, extracting plaintext to", corpus_path + '-txt') if corpus_path.endswith('/'): corpus_path = corpus_path[:-1] # TODO: Add processing of collections contents = listdir_nohidden(corpus_path) contents = [os.path.join(corpus_path, obj) for obj in contents if not any([obj.endswith(suffix) for suffix in ignore])] count_dirs = len(list(filter(os.path.isdir, contents))) count_files = len(list(filter(os.path.isfile, contents))) if count_files > 0 and count_dirs == 0: # process all files pdf.main(corpus_path, corpus_path + '-txt') elif count_dirs > 0 and count_files == 0: # process each subdirectory for directory in contents: pdf.main(directory, directory.replace(corpus_path, corpus_path + '-txt')) else: raise IOError("Invalid Path: empty directory") corpus_path += '-txt' return corpus_path
def assert_depmaps_equal(self, expected, received): jvm_deps = {target.name: ''.join(sorted({t.name for t in deps})) for target, deps in received.items()} for target, deps in sorted(expected.items()): got = jvm_deps.get(target, ()) self.assertEqual(set(deps), set(got), '{}\n expected {}\n got {}\n \n{}' .format(target, deps, got, '\n'.join( '{}: {}'.format(key, val) for key, val in sorted(jvm_deps.items()) ))) self.assertEqual(len(list(filter(expected.get, expected))), len(list(filter(jvm_deps.get, jvm_deps))))
def wait4tasks(self, tasks, tout=60, verbose=False): running = list(filter(self.is_task_running, tasks[:])) count = 0 while running: if verbose: print(('Tasks still running after %s seconds', count)) print(running) time.sleep(1) count += 1 running = list(filter(self.is_task_running, running)) if count > tout: raise HPOneViewTimeout('Waited 60 seconds for task to complete' ', aborting')
def oldfilter(*args): """ filter(function or None, sequence) -> list, tuple, or string Return those items of sequence for which function(item) is true. If function is None, return the items that are true. If sequence is a tuple or string, return the same type, else return a list. """ mytype = type(args[1]) if isinstance(args[1], basestring): return mytype().join(builtins.filter(*args)) elif isinstance(args[1], (tuple, list)): return mytype(builtins.filter(*args)) else: # Fall back to list. Is this the right thing to do? return list(builtins.filter(*args))
def map_register_name(self, reg): if not self._register_names: #Fetch register names ... result = self._gdb.sync_cmd(["-data-list-register-names"], "done") self._register_names = dict(filter(lambda x: x[0] != "", zip(result["register-names"], range(0, 10000)))) return self._register_names[reg]
def execute(self): # Only executes the tests from the package specified by the target roots, so # we don't run the tests for _all_ dependencies of said package. targets = filter(self.is_local_src, self.context.target_roots) for target in targets: self.ensure_workspace(target) self._go_test(target)
def _filter_cookies(self, cookies, hashes): filter_lambda = lambda c: c.startswith('uid') or c.startswith('pass') replace_lambda = lambda c: self._replace_sensitive_data(c, hashes) cookies = list(filter(filter_lambda, cookies)) cookies = list(map(replace_lambda, cookies)) return cookies
def test_lfilter(self): func = lambda x: x lst = list(builtins.range(10)) results = lfilter(lambda x: x, lst), lengths = 9, expecteds = list(builtins.filter(func, lst)), self.check_results(results, expecteds, lengths)
def neutral_ratios_iter(oxidations, stoichs=False, threshold=5): """ Iterator for charge-neutral stoichiometries Given a list of oxidation states of arbitrary length, yield ratios in which these form a charge-neutral compound. Stoichiometries may be provided as a set of legal stoichiometries per site (e.g. a known family of compounds); otherwise all unique ratios are tried up to a threshold coefficient. Args: oxidations : list of integers stoichs : stoichiometric ratios for each site (if provided) threshold : single threshold to go up to if stoichs are not provided Yields: tuple: ratio that gives neutrality """ if not stoichs: stoichs = [list(range(1,threshold+1))] * len(oxidations) # First filter: remove combinations which have a common denominator # greater than 1 (i.e. Use simplest form of each set of ratios) # Second filter: return only charge-neutral combinations return filter( lambda x: _isneutral(oxidations, x) and _gcd_recursive(*x) == 1, # Generator: enumerate all combinations of stoichiometry itertools.product(*stoichs) )
def get_rst(self): env = self.state.document.settings.env entries = [] all_docnames = env.found_docs.copy() found = set([env.docname]) # don't include myself for entry in self.content: if not entry: continue patname = docname_join(env.docname, entry) docnames = sorted(patfilter(all_docnames, patname)) for docname in docnames: if not docname in found: found.add(docname) entries.append(self.entry_class.create(env, docname)) expr = self.options.get('filter') if expr: def func(e): return eval(expr, dict(e=e)) entries = list(filter(func, entries)) orderby = self.options.get('orderby') if orderby: def func(a, b): va = getattr(a, orderby, '') vb = getattr(b, orderby, '') return cmp(va, vb) entries = sorted(entries, func) headers = self.get_headers() rows = [] for e in entries: rows.append(self.format_entry(e)) return rstgen.table(headers, rows)
def poly2mask(polygons, im_size): """Converts polygons to a sparse binary mask. >>> from sima.ROI import poly2mask >>> poly1 = [[0,0], [0,1], [1,1], [1,0]] >>> poly2 = [[0,1], [0,2], [2,2], [2,1]] >>> mask = poly2mask([poly1, poly2], (3, 3)) >>> mask[0].todense() matrix([[ True, False, False], [ True, True, False], [False, False, False]]) Parameters ---------- polygons : sequence of coordinates or sequence of Polygons A sequence of polygons where each is either a sequence of (x,y) or (x,y,z) coordinate pairs, an Nx2 or Nx3 numpy array, or a Polygon object. im_size : tuple Final size of the resulting mask Output ------ mask A list of sparse binary masks of the points contained within the polygons, one mask per plane """ if len(im_size) == 2: im_size = (1,) + im_size polygons = _reformat_polygons(polygons) mask = np.zeros(im_size, dtype=bool) for poly in polygons: # assuming all points in the polygon share a z-coordinate z = int(np.array(poly.exterior.coords)[0][2]) if z > im_size[0]: warn('Polygon with zero-coordinate {} '.format(z) + 'cropped using im_size = {}'.format(im_size)) continue x_min, y_min, x_max, y_max = poly.bounds # Shift all points by 0.5 to move coordinates to corner of pixel shifted_poly = Polygon(np.array(poly.exterior.coords)[:, :2] - 0.5) points = [Point(x, y) for x, y in product(np.arange(int(x_min), np.ceil(x_max)), np.arange(int(y_min), np.ceil(y_max)))] points_in_poly = list(filter(shifted_poly.contains, points)) for point in points_in_poly: xx, yy = point.xy x = int(xx[0]) y = int(yy[0]) if 0 <= y < im_size[1] and 0 <= x < im_size[2]: mask[z, y, x] = True masks = [] for z_coord in np.arange(mask.shape[0]): masks.append(lil_matrix(mask[z_coord, :, :])) return masks
def images_js(request): response = {} status = 403 if request.is_ajax() and request.method == 'POST': status = 200 response['imageCategories'] = serializer.serialize( ImageCategory.objects.filter(category_owner=request.user)) response['images'] = [] user_images = UserImage.objects.filter(owner=request.user) for user_image in user_images: image = user_image.image if image.image: field_obj = { 'id': image.id, 'title': user_image.title, 'image': image.image.url, 'file_type': image.file_type, 'added': mktime(image.added.timetuple()) * 1000, 'checksum': image.checksum, 'cats': list( map( int, list(filter(bool, user_image.image_cat.split(','))) ) ) } if image.thumbnail: field_obj['thumbnail'] = image.thumbnail.url field_obj['height'] = image.height field_obj['width'] = image.width response['images'].append(field_obj) return JsonResponse( response, status=status )
def gen_feature_nodearray(xi, feature_max=None): if feature_max: assert(isinstance(feature_max, int)) xi_shift = 0 # ensure correct indices of xi if scipy and isinstance(xi, tuple) and len(xi) == 2\ and isinstance(xi[0], scipy.ndarray) and isinstance(xi[1], scipy.ndarray): # for a sparse vector index_range = xi[0] + 1 # index starts from 1 if feature_max: index_range = index_range[scipy.where(index_range <= feature_max)] elif scipy and isinstance(xi, scipy.ndarray): xi_shift = 1 index_range = xi.nonzero()[0] + 1 # index starts from 1 if feature_max: index_range = index_range[scipy.where(index_range <= feature_max)] elif isinstance(xi, (dict, list, tuple)): if isinstance(xi, dict): index_range = xi.keys() elif isinstance(xi, (list, tuple)): xi_shift = 1 index_range = range(1, len(xi) + 1) index_range = list(filter(lambda j: xi[j - xi_shift] != 0, index_range)) if feature_max: index_range = list(filter(lambda j: j <= feature_max, index_range)) index_range = sorted(index_range) else: raise TypeError('xi should be a dictionary, list, tuple, 1-d numpy array, or tuple of (index, data)') ret = (feature_node * (len(index_range) + 2))() ret[-1].index = -1 # for bias term ret[-2].index = -1 if scipy and isinstance(xi, tuple) and len(xi) == 2\ and isinstance(xi[0], scipy.ndarray) and isinstance(xi[1], scipy.ndarray): # for a sparse vector for idx, j in enumerate(index_range): ret[idx].index = j ret[idx].value = (xi[1])[idx] else: for idx, j in enumerate(index_range): ret[idx].index = j ret[idx].value = xi[j - xi_shift] max_idx = 0 if len(index_range) > 0: max_idx = index_range[-1] return ret, max_idx
def load(cls, path, label=None, fmt='pkl', reassign_label=False): """Initialize an ROIList from either a saved pickle file or an Imagej ROI zip file. Parameters ---------- path : string Path to either a pickled ROIList, an ImageJ ROI zip file, or the path to the direcotry containing the 'IC filter' .mat files for inscopix/mosaic data. label : str, optional The label for selecting the ROIList if multiple ROILists have been saved in the same file. By default, the most recently saved ROIList will be selected. fmt : {'pkl', 'ImageJ', 'inscopix'} The file format being imported. reassign_label: boolean If true, assign ascending integer strings as labels Returns ------- sima.ROI.ROIList Returns an ROIList loaded from the passed in path. """ if fmt == 'pkl': with open(path, 'rb') as f: roi_sets = pickle.load(f) if label is None: label = sima.misc.most_recent_key(roi_sets) try: rois = roi_sets[label] except KeyError: raise Exception( 'No ROIs with were saved with the given label.') roi_list = cls(**rois) elif fmt == 'ImageJ': roi_list = cls(rois=sima.misc.imagej.read_imagej_roi_zip(path)) elif fmt == 'inscopix': dirnames = next(os.walk(path))[1] # this naming convetion for ROI masks is used in Mosiac 1.0.0b files = [glob.glob(os.path.join(path, dirname, '*IC filter*.mat')) for dirname in dirnames] files = filter(lambda f: len(f) > 0, files)[0] rois = [] for filename in files: label = re.findall('\d+', filename)[-1] data = scipy.io.loadmat(filename) # this is the ROI mask index in Mosiac 1.0.0b mask = data['Object'][0][0][11] rois.append(ROI(mask=mask, id=label, im_shape=mask.shape)) roi_list = cls(rois=rois) else: raise ValueError('Unrecognized file format.') if reassign_label: for idx, roi in zip(it.count(), roi_list): roi.label = str(idx) return roi_list
def _read_side_inputs(self, tags_and_types): """Generator reading side inputs in the order prescribed by tags_and_types. Args: tags_and_types: List of tuples (tag, type). Each side input has a string tag that is specified in the worker instruction. The type is actually a boolean which is True for singleton input (read just first value) and False for collection input (read all values). Yields: With each iteration it yields the result of reading an entire side source either in singleton or collection mode according to the tags_and_types argument. """ # Only call this on the old path where side_input_maps was not # provided directly. assert self.side_input_maps is None # We will read the side inputs in the order prescribed by the # tags_and_types argument because this is exactly the order needed to # replace the ArgumentPlaceholder objects in the args/kwargs of the DoFn # getting the side inputs. # # Note that for each tag there could be several read operations in the # specification. This can happen for instance if the source has been # sharded into several files. for i, (side_tag, view_class, view_options) in enumerate(tags_and_types): sources = [] # Using the side_tag in the lambda below will trigger a pylint warning. # However in this case it is fine because the lambda is used right away # while the variable has the value assigned by the current iteration of # the for loop. # pylint: disable=cell-var-from-loop for si in filter( lambda o: o.tag == side_tag, self.spec.side_inputs): if not isinstance(si, operation_specs.WorkerSideInputSource): raise NotImplementedError('Unknown side input type: %r' % si) sources.append(si.source) # The tracking of time spend reading and bytes read from side inputs is # behind an experiment flag to test its performance impact. si_counter = opcounters.SideInputReadCounter( self.counter_factory, self.state_sampler, declaring_step=self.name_context.step_name, # Inputs are 1-indexed, so we add 1 to i in the side input id input_index=i + 1) iterator_fn = sideinputs.get_iterator_fn_for_sources( sources, read_counter=si_counter) # Backwards compatibility for pre BEAM-733 SDKs. if isinstance(view_options, tuple): if view_class == pvalue.AsSingleton: has_default, default = view_options view_options = {'default': default} if has_default else {} else: view_options = {} yield apache_sideinputs.SideInputMap( view_class, view_options, sideinputs.EmulatedIterable(iterator_fn))
def _filterOfferedNodes(self, offers): if not self.nodeFilter: return offers executorInfoOrNone = [self.executors.get(socket.gethostbyname(offer.hostname)) for offer in offers] executorInfos = [_f for _f in executorInfoOrNone if _f] executorsToConsider = list(filter(self.nodeFilter[0], executorInfos)) ipsToConsider = {ex.nodeAddress for ex in executorsToConsider} return [offer for offer in offers if socket.gethostbyname(offer.hostname) in ipsToConsider]
def getLayer(self, name): """find a layer by name""" try: labellayer = next(filter(lambda l: l.name == name, self.layerstack)) except StopIteration: return None else: return labellayer
def _CombineParenthesis(self): for i in range(len(self.stack) - 2): if (self.stack[i] == "(" and self.stack[i + 2] == ")" and isinstance(self.stack[i + 1], Expression)): self.stack[i] = None self.stack[i + 2] = None self.stack = list(filter(None, self.stack))
def targets(self, predicate=None): """Returns all the targets in the graph in no particular order. :API: public :param predicate: A target predicate that will be used to filter the targets returned. """ return list(filter(predicate, self._target_by_address.values()))
def num_cores(self): """The number of virtual cores of this VM.""" valid_vmcss = list(filter(self.is_valid_vmcs, self.vmcss)) # Count only unique VPIDs if the hypervisor uses them. uniq_vpids = set([v.VPID for v in valid_vmcss]) if len(uniq_vpids) != 1: return len(uniq_vpids) else: return len(valid_vmcss)
def __init__(self, base_fd, pathspec=None, progress_callback=None, full_pathspec=None): super(RegistryFile, self).__init__( base_fd, pathspec=pathspec, full_pathspec=full_pathspec, progress_callback=progress_callback) if base_fd is None: self.pathspec.Append(pathspec) elif base_fd.IsDirectory(): self.pathspec.last.path = utils.JoinPath(self.pathspec.last.path, pathspec.path) else: raise IOError("Registry handler can not be stacked on another handler.") path_components = list(filter(None, self.pathspec.last.path.split("/"))) try: # The first component MUST be a hive self.hive_name = path_components[0] self.hive = KeyHandle(getattr(_winreg, self.hive_name)) except AttributeError: raise IOError("Unknown hive name %s" % self.hive_name) except IndexError: # A hive is not specified, we just list all the hives. return # Normalize the path casing if needed self.key_name = "/".join(path_components[1:]) self.local_path = CanonicalPathToLocalPath(self.key_name) try: # Maybe its a value key_name, value_name = os.path.split(self.local_path) with OpenKey(self.hive, key_name) as key: self.value, self.value_type = QueryValueEx(key, value_name) # We are a value and therefore not a directory. self.is_directory = False except exceptions.WindowsError: try: # Try to get the default value for this key with OpenKey(self.hive, self.local_path) as key: # Check for default value. try: self.value, self.value_type = QueryValueEx(key, "") except exceptions.WindowsError: # Empty default value self.value = "" self.value_type = _winreg.REG_NONE except exceptions.WindowsError: raise IOError("Unable to open key %s" % self.key_name)
def issues(self): issues = {} if self.query: issues.update(self.get_query(self.query)) if self.config.get('include_user_repos', True, asbool): all_repos = self.client.get_repos(self.username) assert(type(all_repos) == list) repos = filter(self.filter_repos, all_repos) for repo in repos: issues.update( self.get_owned_repo_issues( self.username + "/" + repo['name']) ) if self.config.get('include_user_issues', True, asbool): issues.update( filter(self.filter_issues, self.get_directly_assigned_issues().items()) ) log.debug(" Found %i issues.", len(issues)) issues = list(filter(self.include, issues.values())) log.debug(" Pruned down to %i issues.", len(issues)) for tag, issue in issues: # Stuff this value into the upstream dict for: # https://github.com/ralphbean/bugwarrior/issues/159 issue['repo'] = tag issue_obj = self.get_issue_for_record(issue) tagParts = tag.split('/') projectName = tagParts[1] if self.project_owner_prefix: projectName = tagParts[0]+"."+projectName extra = { 'project': projectName, 'type': 'pull_request' if 'pull_request' in issue else 'issue', 'annotations': self.annotations(tag, issue, issue_obj), 'namespace': self.username, } issue_obj.update_extra(extra) yield issue_obj
def _filter_cookies(self, cookies): def filter_lambda(c): return c.startswith('phpbb2mysql_4_sid') or c.startswith('phpbb2mysql_4_data') def replace_lambda(c): return self._replace_sensitive_data(c) cookies = list(filter(filter_lambda, cookies)) cookies = list(map(replace_lambda, cookies)) return cookies
def meta(dset) : pos = dset.get_info('pos') result = [] for p in pos : result.append(len(list(builtins.filter(lambda x: x != ',', p)))) return result
def _calculate_python_sources(self, targets): """Generate a set of source files from the given targets.""" python_eval_targets = filter(self.is_non_synthetic_python_target, targets) sources = set() for target in python_eval_targets: sources.update( source for source in target.sources_relative_to_buildroot() if os.path.splitext(source)[1] == self._PYTHON_SOURCE_EXTENSION ) return list(sources)
def owned_control_elements(self): return list( map(first, filter(second, iteritems(self._nested_control_elements))))
def getRasterLayers(self): return list(filter(self.isRaster, LayerRegistry.layers))
def getVectorLayers(self): return list(filter(self.isVector, LayerRegistry.layers))
def _parse_output(self): """Internal function for parsing MEKA output.""" if self.output_ is None: self._results = None self._statistics = None return None predictions_split_head = '==== PREDICTIONS' predictions_split_foot = '|===========' if self._label_count is None: self._label_count = map(lambda y: int(y.split(')')[1].strip()), [ x for x in self.output_.split('\n') if 'Number of labels' in x ])[0] if self._instance_count is None: self._instance_count = int( float( filter(lambda x: '==== PREDICTIONS (N=' in x, self.output_.split('\n'))[0].split('(')[1].split( '=')[1].split(')')[0])) predictions = self.output_.split(predictions_split_head)[1].split( predictions_split_foot)[0].split('\n')[1:-1] predictions = [ y.split(']')[0] for y in [x.split('] [')[1] for x in predictions] ] predictions = [[ a for a in [f.strip() for f in z.split(',')] if len(a) > 0 ] for z in predictions] predictions = [[int(a) for a in z] for z in predictions] assert self._verbosity == 5 self._results = sparse.lil_matrix( (self._instance_count, self._label_count), dtype='int') for row in range(self._instance_count): for label in predictions[row]: self._results[row, label] = 1 statistics = [ x for x in self.output_.split('== Evaluation Info')[1].split('\n') if len(x) > 0 and '==' not in x ] statistics = [y for y in [z.strip() for z in statistics] if ' ' in y] array_data = [z for z in statistics if '[' in z] non_array_data = [z for z in statistics if '[' not in z] self._statistics = {} for row in non_array_data: r = row.strip().split(' ') r = [z for z in r if len(z) > 0] r = [z.strip() for z in r] if len(r) < 2: continue try: test_value = float(r[1]) except ValueError: test_value = r[1] r[1] = test_value self._statistics[r[0]] = r[1] for row in array_data: r = row.strip().split('[') r = [z.strip() for z in r] r[1] = r[1].replace(', ', ' ').replace(',', '.').replace(']', '').split(' ') r[1] = [x for x in r[1] if len(x) > 0] self._statistics[r[0]] = r[1]
def findSequencesOnDisk(cls, pattern, include_hidden=False, strictPadding=False, pad_style=PAD_STYLE_DEFAULT, allow_subframes=False): """ Yield the sequences found in the given directory. Examples:: FileSequence.findSequencesOnDisk('/path/to/files') The `pattern` can also specify glob-like shell wildcards including the following: * ``?`` - 1 wildcard character * ``*`` - 1 or more wildcard character * ``{foo,bar}`` - either 'foo' or 'bar' Exact frame ranges are not considered, and padding characters are converted to wildcards (``#`` or ``@``) Examples:: FileSequence.findSequencesOnDisk('/path/to/files/image_stereo_{left,right}.#.jpg') FileSequence.findSequencesOnDisk('/path/to/files/imag?_*_{left,right}.@@@.jpg', strictPadding=True) Args: pattern (str): directory to scan, or pattern to filter in directory include_hidden (bool): if true, show .hidden files as well strictPadding (bool): if True, ignore files with padding length different from pattern pad_style (`.PAD_STYLE_DEFAULT` or `.PAD_STYLE_HASH1` or `.PAD_STYLE_HASH4`): padding style allow_subframes (bool): if True, handle subframe filenames Returns: list: """ # reserve some functions we're going to need quick access to _not_hidden = lambda f: not f.startswith('.') _match_pattern = None _filter_padding = None _join = os.path.join seq = None dirpath = pattern # Support the pattern defining a filter for the files # in the existing directory if not os.path.isdir(pattern): dirpath, filepat = os.path.split(pattern) if not os.path.isdir(dirpath): return [] # Start building a regex for filtering files seq = cls(filepat, pad_style=pad_style, allow_subframes=allow_subframes) patt = r'\A' patt += cls._globCharsToRegex(seq.basename()) if seq.padding(): patt += '(' if seq.framePadding(): patt += r'\d+' if seq.subframePadding(): patt += r'\.\d+' patt += ')' if seq.extension(): patt += cls._globCharsToRegex(seq.extension()) # Convert braces groups into regex capture groups matches = re.finditer(r'{(.*?)(?:,(.*?))*}', patt) for match in reversed(list(matches)): i, j = match.span() regex = '(?:%s)' % '|'.join( [m.strip() for m in match.groups()]) patt = "".join((patt[0:i], regex, patt[j:])) patt += r'\Z' try: _match_pattern = re.compile(patt).match except re.error: msg = 'Invalid file pattern: {!r}'.format(filepat) raise FileSeqException(msg) if seq.padding() and strictPadding: get_frame = lambda f: _match_pattern(f).group(1) _filter_padding = functools.partial( cls._filterByPaddingNum, zfill=seq.zfill(), decimal_places=seq.decimalPlaces(), get_frame=get_frame) # Get just the immediate files under the dir. # Avoids testing the os.listdir() for files as # a second step. ret = next(os.walk(dirpath), None) files = ret[-1] if ret else [] # collapse some generators to get us the files that match our regex if not include_hidden: files = filter(_not_hidden, files) # Filter by files that match the provided file pattern if _match_pattern: files = filter(_match_pattern, files) # Filter by files that match the frame padding in the file pattern if _filter_padding: # returns a generator files = _filter_padding(files) # Ensure our dirpath ends with a path separator, so # that we can control which sep is used during the # os.path.join sep = utils._getPathSep(dirpath) if not dirpath.endswith(sep): dirpath += sep files = [_join(dirpath, f) for f in files] seqs = list( cls.yield_sequences_in_list(files, pad_style=pad_style, allow_subframes=allow_subframes)) if _filter_padding and seq: frame_pad = cls.conformPadding(seq.framePadding(), pad_style=pad_style) subframe_pad = cls.conformPadding(seq.subframePadding(), pad_style=pad_style) # strict padding should preserve the original padding # characters in the found sequences. for s in seqs: s.setFramePadding(frame_pad) s.setSubframePadding(subframe_pad) return seqs
def Filter(self, objects): """Returns a list of objects that pass the filter.""" return list(filter(self.Matches, objects))
def entity_examples(self): return list( filter(lambda e: "entities" in e, self.entity_examples_only + self.common_examples))
def components(self): u""" Tuple of all components, that have been registered for this control surface. """ return tuple(filter(lambda comp: not comp.is_private, self._components))
def call_listeners(self, listeners): with self.component_guard(): for listener in filter(liveobj_valid, listeners): listener()
def user_agent(): """ Return a string representing the user agent. """ data = { "installer": { "name": "pip", "version": pip.__version__ }, "python": platform.python_version(), "implementation": { "name": platform.python_implementation(), }, } if data["implementation"]["name"] == 'CPython': data["implementation"]["version"] = platform.python_version() elif data["implementation"]["name"] == 'PyPy': if sys.pypy_version_info.releaselevel == 'final': pypy_version_info = sys.pypy_version_info[:3] else: pypy_version_info = sys.pypy_version_info data["implementation"]["version"] = ".".join( [str(x) for x in pypy_version_info]) elif data["implementation"]["name"] == 'Jython': # Complete Guess data["implementation"]["version"] = platform.python_version() elif data["implementation"]["name"] == 'IronPython': # Complete Guess data["implementation"]["version"] = platform.python_version() if sys.platform.startswith("linux"): from pip._vendor import distro distro_infos = dict( list( filter( lambda x: x[1], list( zip(["name", "version", "id"], distro.linux_distribution())), ))) libc = dict( list( filter( lambda x: x[1], list(zip(["lib", "version"], libc_ver())), ))) if libc: distro_infos["libc"] = libc if distro_infos: data["distro"] = distro_infos if sys.platform.startswith("darwin") and platform.mac_ver()[0]: data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]} if platform.system(): data.setdefault("system", {})["name"] = platform.system() if platform.release(): data.setdefault("system", {})["release"] = platform.release() if platform.machine(): data["cpu"] = platform.machine() # Python 2.6 doesn't have ssl.OPENSSL_VERSION. if HAS_TLS and sys.version_info[:2] > (2, 6): data["openssl_version"] = ssl.OPENSSL_VERSION return "{data[installer][name]}/{data[installer][version]} {json}".format( data=data, json=json.dumps(data, separators=(",", ":"), sort_keys=True), )
def filter(function, iterable): return list(builtins.filter(function, iterable))
def metric_classes(self): return filter(lambda m: isinstance(m, definitions.MetricClass), self._metrics)
def __init__(self, dataset, capDivCont, sampleSize, version): ''' Constructor. You have to determine these parameters via envoriment IDs @param dataset : 'iqiyi' or 'movielens' @param capDivCont : Storage size / total content number @param sampleSize : int. Randomly select this size of CONTENTS. `None` means using the whole set (iqiyi = 233045, movielens = 26744 contents) @param version: int. Version code. See filterVersion ''' fakeSeed = random.randrange(sys.maxsize) try: with open( os.path.dirname(__file__) + '/.%s_%s_%s_%s_%s.tmp' % (dataset, capDivCont, sampleSize, fakeSeed, version), 'rb') as f: self.requests, self.sampleSize, _version = pickle.load(f) if _version != VERSION: logger.info("Old cache found, will not use") raise FileNotFoundError logger.info('Loading from cache') except: logger.info('Input cache not found, loading from raw input') self.requests = inputDataset(dataset) self.requests = filter(filterVersion(version), self.requests) self.requests = list( self.requests ) # self.requests will be used twice, so can't be an iterator def unique(sequence): last = None for item in sequence: if item != last: last = item yield item contents = list( unique(sorted(map(lambda r: r.content, self.requests)))) logger.info('%d contents in total' % (len(contents))) if sampleSize is not None: if sampleSize > len(contents): logger.warning('sampling size larger than total size') else: contents = [ contents[i] for i in sorted( random.sample(range(len(contents)), sampleSize)) ] for req in self.requests: pos = bisect.bisect_left(contents, req.content) req.content = pos if pos < len( contents) and contents[pos] == req.content else None self.requests = list( filter(lambda r: r.content is not None, self.requests)) self.sampleSize = len( contents ) # Don't use parameter `sampleSize`, which can be None with open( os.path.dirname(__file__) + '/.%s_%s_%s_%s_%s.tmp' % (dataset, capDivCont, sampleSize, fakeSeed, version), 'wb') as f: pickle.dump((self.requests, self.sampleSize, VERSION), f) logger.info('Cached input') self.requestsIter = None self.state = None self.storeSize = int(capDivCont * self.sampleSize) self.done = True if self.storeSize == 0: logging.warning("Storage size = 0. Please increase capacity.")
def build_features(self, variable_types=None, verbose=False): """Automatically builds feature definitions for target entity using Deep Feature Synthesis algorithm Args: variable_types (list[Variable] or str, optional): Types of variables to return. If None, default to Numeric, Categorical, Ordinal, and Boolean. If given as the string 'all', use all available variable types. verbose (bool, optional): If True, print progress. Returns: list[BaseFeature]: Returns a list of features for target entity, sorted by feature depth (shallow first). """ all_features = {} for e in self.es.entities: if e not in self.ignore_entities: all_features[e.id] = {} # add seed features, if any, for dfs to build on top of if self.seed_features is not None: for f in self.seed_features: self._handle_new_feature(all_features=all_features, new_feature=f) self.where_clauses = defaultdict(set) self._run_dfs(self.es[self.target_entity_id], [], all_features, max_depth=self.max_depth) new_features = list(all_features[self.target_entity_id].values()) if variable_types is None: variable_types = [Numeric, Discrete, Boolean] elif variable_types == 'all': variable_types = None else: msg = "variable_types must be a list, or 'all'" assert isinstance(variable_types, list), msg if variable_types is not None: new_features = [ f for f in new_features if any( issubclass(f.variable_type, vt) for vt in variable_types) ] def check_secondary_index(f): secondary_time_index = self.es[ self.target_entity_id].secondary_time_index for s_time_index, exclude in secondary_time_index.items(): if isinstance(f, IdentityFeature) and f.variable.id in exclude: return False elif isinstance(f, (BinaryFeature, Compare)): if (not check_secondary_index(f.left) or not check_secondary_index(f.right)): return False if isinstance(f, TimeSince) and not check_secondary_index( f.base_features[0]): return False return True def filt(f): # remove identity features of the ID field of the target entity if (isinstance(f, IdentityFeature) and f.entity.id == self.target_entity_id and f.variable.id == self.es[self.target_entity_id].index): return False if (isinstance( f, (IdentityFeature, BinaryFeature, Compare, TimeSince)) and not check_secondary_index(f)): return False return True new_features = list(filter(filt, new_features)) # sanity check for duplicate features l = [f.hash() for f in new_features] assert len(set([f for f in l if l.count(f) > 1])) == 0, \ 'Multiple features with same name' + \ str(set([f for f in l if l.count(f) > 1])) new_features.sort(key=lambda f: f.get_depth()) new_features = self._filter_features(new_features) if self.max_features > 0: new_features = new_features[:self.max_features] if verbose: print("Built {} features".format(len(new_features))) verbose = None return new_features
def workerScript(jobStore, config, jobName, jobStoreID, redirectOutputToLogFile=True): """ Worker process script, runs a job. :param str jobName: The "job name" (a user friendly name) of the job to be run :param str jobStoreLocator: Specifies the job store to use :param str jobStoreID: The job store ID of the job to be run :param bool redirectOutputToLogFile: Redirect standard out and standard error to a log file """ logging.basicConfig() setLogLevel(config.logLevel) ########################################## #Create the worker killer, if requested ########################################## logFileByteReportLimit = config.maxLogFileSize if config.badWorker > 0 and random.random() < config.badWorker: def badWorker(): #This will randomly kill the worker process at a random time time.sleep(config.badWorkerFailInterval * random.random()) os.kill(os.getpid(), signal.SIGKILL) #signal.SIGINT) #TODO: FIX OCCASIONAL DEADLOCK WITH SIGINT (tested on single machine) t = Thread(target=badWorker) # Ideally this would be a daemon thread but that causes an intermittent (but benign) # exception similar to the one described here: # http://stackoverflow.com/questions/20596918/python-exception-in-thread-thread-1-most-likely-raised-during-interpreter-shutd # Our exception is: # Exception in thread Thread-1 (most likely raised during interpreter shutdown): # <type 'exceptions.AttributeError'>: 'NoneType' object has no attribute 'kill' # This attribute error is caused by the call os.kill() and apparently unavoidable with a # daemon t.start() ########################################## #Load the environment for the jobGraph ########################################## #First load the environment for the jobGraph. with jobStore.readSharedFileStream("environment.pickle") as fileHandle: environment = safeUnpickleFromStream(fileHandle) for i in environment: if i not in ("TMPDIR", "TMP", "HOSTNAME", "HOSTTYPE"): os.environ[i] = environment[i] # sys.path is used by __import__ to find modules if "PYTHONPATH" in environment: for e in environment["PYTHONPATH"].split(':'): if e != '': sys.path.append(e) toilWorkflowDir = Toil.getWorkflowDir(config.workflowID, config.workDir) ########################################## #Setup the temporary directories. ########################################## # Dir to put all this worker's temp files in. localWorkerTempDir = tempfile.mkdtemp(dir=toilWorkflowDir) os.chmod(localWorkerTempDir, 0o755) ########################################## #Setup the logging ########################################## #This is mildly tricky because we don't just want to #redirect stdout and stderr for this Python process; we want to redirect it #for this process and all children. Consequently, we can't just replace #sys.stdout and sys.stderr; we need to mess with the underlying OS-level #file descriptors. See <http://stackoverflow.com/a/11632982/402891> #When we start, standard input is file descriptor 0, standard output is #file descriptor 1, and standard error is file descriptor 2. #What file do we want to point FDs 1 and 2 to? tempWorkerLogPath = os.path.join(localWorkerTempDir, "worker_log.txt") if redirectOutputToLogFile: # Save the original stdout and stderr (by opening new file descriptors # to the same files) origStdOut = os.dup(1) origStdErr = os.dup(2) # Open the file to send stdout/stderr to. logFh = os.open(tempWorkerLogPath, os.O_WRONLY | os.O_CREAT | os.O_APPEND) # Replace standard output with a descriptor for the log file os.dup2(logFh, 1) # Replace standard error with a descriptor for the log file os.dup2(logFh, 2) # Since we only opened the file once, all the descriptors duped from # the original will share offset information, and won't clobber each # others' writes. See <http://stackoverflow.com/a/5284108/402891>. This # shouldn't matter, since O_APPEND seeks to the end of the file before # every write, but maybe there's something odd going on... # Close the descriptor we used to open the file os.close(logFh) debugging = logging.getLogger().isEnabledFor(logging.DEBUG) ########################################## #Worker log file trapped from here on in ########################################## workerFailed = False statsDict = MagicExpando() statsDict.jobs = [] statsDict.workers.logsToMaster = [] blockFn = lambda: True listOfJobs = [jobName] try: #Put a message at the top of the log, just to make sure it's working. logger.info("---TOIL WORKER OUTPUT LOG---") sys.stdout.flush() logProcessContext(config) ########################################## #Load the jobGraph ########################################## jobGraph = jobStore.load(jobStoreID) listOfJobs[0] = str(jobGraph) logger.debug("Parsed job wrapper") ########################################## #Cleanup from any earlier invocation of the jobGraph ########################################## if jobGraph.command == None: logger.debug("Wrapper has no user job to run.") # Cleanup jobs already finished f = lambda jobs: [ z for z in [[y for y in x if jobStore.exists(y.jobStoreID)] for x in jobs] if len(z) > 0 ] jobGraph.stack = f(jobGraph.stack) jobGraph.services = f(jobGraph.services) logger.debug( "Cleaned up any references to completed successor jobs") #This cleans the old log file which may #have been left if the job is being retried after a job failure. oldLogFile = jobGraph.logJobStoreFileID if oldLogFile != None: jobGraph.logJobStoreFileID = None jobStore.update(jobGraph) #Update first, before deleting any files jobStore.deleteFile(oldLogFile) ########################################## # If a checkpoint exists, restart from the checkpoint ########################################## # The job is a checkpoint, and is being restarted after previously completing if jobGraph.checkpoint != None: logger.debug("Job is a checkpoint") # If the checkpoint still has extant jobs in its # (flattened) stack and services, its subtree didn't # complete properly. We handle the restart of the # checkpoint here, removing its previous subtree. if len([i for l in jobGraph.stack for i in l]) > 0 or len(jobGraph.services) > 0: logger.debug("Checkpoint has failed.") # Reduce the retry count assert jobGraph.remainingRetryCount >= 0 jobGraph.remainingRetryCount = max( 0, jobGraph.remainingRetryCount - 1) jobGraph.restartCheckpoint(jobStore) # Otherwise, the job and successors are done, and we can cleanup stuff we couldn't clean # because of the job being a checkpoint else: logger.debug( "The checkpoint jobs seems to have completed okay, removing any checkpoint files to delete." ) #Delete any remnant files list( map( jobStore.deleteFile, list( filter(jobStore.fileExists, jobGraph.checkpointFilesToDelete)))) ########################################## #Setup the stats, if requested ########################################## if config.stats: startClock = getTotalCpuTime() startTime = time.time() while True: ########################################## #Run the jobGraph, if there is one ########################################## if jobGraph.command is not None: assert jobGraph.command.startswith("_toil ") logger.debug("Got a command to run: %s" % jobGraph.command) #Load the job job = Job._loadJob(jobGraph.command, jobStore) # If it is a checkpoint job, save the command if job.checkpoint: jobGraph.checkpoint = jobGraph.command # Create a fileStore object for the job fileStore = FileStore.createFileStore( jobStore, jobGraph, localWorkerTempDir, blockFn, caching=not config.disableCaching) with job._executor(jobGraph=jobGraph, stats=statsDict if config.stats else None, fileStore=fileStore): with fileStore.open(job): # Get the next block function and list that will contain any messages blockFn = fileStore._blockFn job._runner(jobGraph=jobGraph, jobStore=jobStore, fileStore=fileStore) # Accumulate messages from this job & any subsequent chained jobs statsDict.workers.logsToMaster += fileStore.loggingMessages else: #The command may be none, in which case #the jobGraph is either a shell ready to be deleted or has #been scheduled after a failure to cleanup logger.debug("No user job to run, so finishing") break if FileStore._terminateEvent.isSet(): raise RuntimeError("The termination flag is set") ########################################## #Establish if we can run another jobGraph within the worker ########################################## successorJobGraph = nextChainableJobGraph(jobGraph, jobStore) if successorJobGraph is None or config.disableChaining: # Can't chain any more jobs. break ########################################## #We have a single successor job that is not a checkpoint job. #We transplant the successor jobGraph command and stack #into the current jobGraph object so that it can be run #as if it were a command that were part of the current jobGraph. #We can then delete the successor jobGraph in the jobStore, as it is #wholly incorporated into the current jobGraph. ########################################## # add the successor to the list of jobs run listOfJobs.append(str(successorJobGraph)) #Clone the jobGraph and its stack jobGraph = copy.deepcopy(jobGraph) #Remove the successor jobGraph jobGraph.stack.pop() #Transplant the command and stack to the current jobGraph jobGraph.command = successorJobGraph.command jobGraph.stack += successorJobGraph.stack # include some attributes for better identification of chained jobs in # logging output jobGraph.unitName = successorJobGraph.unitName jobGraph.jobName = successorJobGraph.jobName assert jobGraph.memory >= successorJobGraph.memory assert jobGraph.cores >= successorJobGraph.cores #Build a fileStore to update the job fileStore = FileStore.createFileStore( jobStore, jobGraph, localWorkerTempDir, blockFn, caching=not config.disableCaching) #Update blockFn blockFn = fileStore._blockFn #Add successorJobGraph to those to be deleted fileStore.jobsToDelete.add(successorJobGraph.jobStoreID) #This will update the job once the previous job is done fileStore._updateJobWhenDone() #Clone the jobGraph and its stack again, so that updates to it do #not interfere with this update jobGraph = copy.deepcopy(jobGraph) logger.debug("Starting the next job") ########################################## #Finish up the stats ########################################## if config.stats: totalCPUTime, totalMemoryUsage = getTotalCpuTimeAndMemoryUsage() statsDict.workers.time = str(time.time() - startTime) statsDict.workers.clock = str(totalCPUTime - startClock) statsDict.workers.memory = str(totalMemoryUsage) # log the worker log path here so that if the file is truncated the path can still be found if redirectOutputToLogFile: logger.info( "Worker log can be found at %s. Set --cleanWorkDir to retain this log", localWorkerTempDir) logger.info( "Finished running the chain of jobs on this node, we ran for a total of %f seconds", time.time() - startTime) ########################################## #Trapping where worker goes wrong ########################################## except: #Case that something goes wrong in worker traceback.print_exc() logger.error("Exiting the worker because of a failed job on host %s", socket.gethostname()) FileStore._terminateEvent.set() ########################################## #Wait for the asynchronous chain of writes/updates to finish ########################################## blockFn() ########################################## #All the asynchronous worker/update threads must be finished now, #so safe to test if they completed okay ########################################## if FileStore._terminateEvent.isSet(): jobGraph = jobStore.load(jobStoreID) jobGraph.setupJobAfterFailure(config) workerFailed = True ########################################## #Cleanup ########################################## # Close the worker logging # Flush at the Python level sys.stdout.flush() sys.stderr.flush() if redirectOutputToLogFile: # Flush at the OS level os.fsync(1) os.fsync(2) # Close redirected stdout and replace with the original standard output. os.dup2(origStdOut, 1) # Close redirected stderr and replace with the original standard error. os.dup2(origStdErr, 2) # sys.stdout and sys.stderr don't need to be modified at all. We don't # need to call redirectLoggerStreamHandlers since they still log to # sys.stderr # Close our extra handles to the original standard output and standard # error streams, so we don't leak file handles. os.close(origStdOut) os.close(origStdErr) # Now our file handles are in exactly the state they were in before. #Copy back the log file to the global dir, if needed if workerFailed and redirectOutputToLogFile: jobGraph.logJobStoreFileID = jobStore.getEmptyFileStoreID( jobGraph.jobStoreID) jobGraph.chainedJobs = listOfJobs with jobStore.updateFileStream(jobGraph.logJobStoreFileID) as w: with open(tempWorkerLogPath, "r") as f: if os.path.getsize( tempWorkerLogPath) > logFileByteReportLimit != 0: if logFileByteReportLimit > 0: f.seek(-logFileByteReportLimit, 2) # seek to last tooBig bytes of file elif logFileByteReportLimit < 0: f.seek(logFileByteReportLimit, 0) # seek to first tooBig bytes of file w.write( f.read().encode('utf-8')) # TODO load file using a buffer jobStore.update(jobGraph) elif debugging and redirectOutputToLogFile: # write log messages with open(tempWorkerLogPath, 'r') as logFile: if os.path.getsize( tempWorkerLogPath) > logFileByteReportLimit != 0: if logFileByteReportLimit > 0: logFile.seek(-logFileByteReportLimit, 2) # seek to last tooBig bytes of file elif logFileByteReportLimit < 0: logFile.seek(logFileByteReportLimit, 0) # seek to first tooBig bytes of file logMessages = logFile.read().splitlines() statsDict.logs.names = listOfJobs statsDict.logs.messages = logMessages if (debugging or config.stats or statsDict.workers.logsToMaster ) and not workerFailed: # We have stats/logging to report back jobStore.writeStatsAndLogging(json.dumps(statsDict, ensure_ascii=True)) #Remove the temp dir cleanUp = config.cleanWorkDir if cleanUp == 'always' or (cleanUp == 'onSuccess' and not workerFailed) or (cleanUp == 'onError' and workerFailed): shutil.rmtree(localWorkerTempDir) #This must happen after the log file is done with, else there is no place to put the log if (not workerFailed) and jobGraph.command == None and len( jobGraph.stack) == 0 and len(jobGraph.services) == 0: # We can now safely get rid of the jobGraph jobStore.delete(jobGraph.jobStoreID)
d = dict() for index, i in list(LANGS.items()): kw = parse(a[i]) if index == 0: d.update(kw) else: for k, v in list(kw.items()): d[k + settings.SITE.languages[index].suffix] = v def not_empty(x): return x #~ print d if 'name' in d: # if there's at least one non-empty value if list(filter(not_empty, list(d.values()))): COMPANY_TYPES.append(d) def objects(): #~ yield companyType('Firma','Firma') #~ yield companyType('asbl','asbl') #~ yield companyType('A.S.B.L.','A.S.B.L.') #~ yield companyType('sprl','sprl') #~ yield companyType('GmbH','GmbH') #~ yield companyType('AG','AG') #~ yield companyType('S.A.','S.A.') #~ yield companyType('S.C.','S.C.') #~ yield companyType('V.o.G.','V.o.G.') #~ yield companyType('G.o.E.','G.o.E.')
def _n_filter(filters, tuples): for f in filters: tuples = list(filter(f, tuples)) return tuples
from builtins import filter import os from glob import glob from setuptools import setup import io import os.path from ast import parse name = 'sporco' # Get version number from sporco/__init__.py # See http://stackoverflow.com/questions/2058802 with open(os.path.join(name, '__init__.py')) as f: version = parse( next(filter(lambda line: line.startswith('__version__'), f))).body[0].value.s packages = ['sporco', 'sporco.admm'] docdirbase = 'share/doc/%s-%s' % (name, version) data = [(docdirbase, glob("*.txt"))] dd = os.path.join(docdirbase, 'examples') pp = os.path.join('examples') data.append( (dd, glob(os.path.join(pp, "*/*.py")) + glob(os.path.join(pp, "*/*.ipynb")))) longdesc = \ """
def __call__(self, message): self._messages = list( filter( lambda m: m[:NUM_SET_PROPERTY_HEADER_BYTES] != message[:NUM_SET_PROPERTY_HEADER_BYTES], self._messages)) self._messages.append(message)
def io_regular_files(self): return filter(lambda m: isinstance(m, definitions.IoRegularFile), self._io_files)
def intent_examples(self): return list( filter(lambda e: "intent" in e, self.intent_examples_only + self.common_examples))
def filter(*args, **kwargs): return list(builtins.filter(*args, **kwargs))
def _read_side_inputs(self, tags_and_types): """Generator reading side inputs in the order prescribed by tags_and_types. Args: tags_and_types: List of tuples (tag, type). Each side input has a string tag that is specified in the worker instruction. The type is actually a boolean which is True for singleton input (read just first value) and False for collection input (read all values). Yields: With each iteration it yields the result of reading an entire side source either in singleton or collection mode according to the tags_and_types argument. """ # Only call this on the old path where side_input_maps was not # provided directly. assert self.side_input_maps is None # We will read the side inputs in the order prescribed by the # tags_and_types argument because this is exactly the order needed to # replace the ArgumentPlaceholder objects in the args/kwargs of the DoFn # getting the side inputs. # # Note that for each tag there could be several read operations in the # specification. This can happen for instance if the source has been # sharded into several files. for i, (side_tag, view_class, view_options) in enumerate(tags_and_types): sources = [] # Using the side_tag in the lambda below will trigger a pylint warning. # However in this case it is fine because the lambda is used right away # while the variable has the value assigned by the current iteration of # the for loop. # pylint: disable=cell-var-from-loop for si in filter(lambda o: o.tag == side_tag, self.spec.side_inputs): if not isinstance(si, operation_specs.WorkerSideInputSource): raise NotImplementedError('Unknown side input type: %r' % si) sources.append(si.source) # The tracking of time spend reading and bytes read from side inputs is # behind an experiment flag to test its performance impact. si_counter = opcounters.SideInputReadCounter( self.counter_factory, self.state_sampler, declaring_step=self.name_context.step_name, # Inputs are 1-indexed, so we add 1 to i in the side input id input_index=i + 1) iterator_fn = sideinputs.get_iterator_fn_for_sources( sources, read_counter=si_counter) # Backwards compatibility for pre BEAM-733 SDKs. if isinstance(view_options, tuple): if view_class == pvalue.AsSingleton: has_default, default = view_options view_options = {'default': default} if has_default else {} else: view_options = {} yield apache_sideinputs.SideInputMap( view_class, view_options, sideinputs.EmulatedIterable(iterator_fn))
def metric_instances(self): return filter(lambda m: isinstance(m, definitions.MetricInstance), self._metrics)
def test_no_available_cell(self, sim_engine, function_under_test): sim_engine = sim_engine( diff_config = { 'exec_numSlotframesPerRun': 1000, 'exec_numMotes' : 2, 'app_pkPeriod' : 0, 'sf_class' : 'MSF', 'conn_class' : 'Linear' } ) # for quick access root = sim_engine.motes[0] hop_1 = sim_engine.motes[1] asn_at_end_of_simulation = ( sim_engine.settings.tsch_slotframeLength * sim_engine.settings.exec_numSlotframesPerRun ) # wait for hop_1 to get ready. u.run_until_mote_is_ready_for_app(sim_engine, hop_1) assert sim_engine.getAsn() < asn_at_end_of_simulation # fill up the hop_1's schedule channel_offset = 0 cell_options = [d.CELLOPTION_TX] used_slots = hop_1.tsch.get_busy_slots(hop_1.sf.SLOTFRAME_HANDLE_NEGOTIATED_CELLS) for _slot in range(sim_engine.settings.tsch_slotframeLength): if _slot in used_slots: continue else: hop_1.tsch.addCell( slotOffset = _slot, channelOffset = channel_offset, neighbor = root.get_mac_addr(), cellOptions = cell_options, slotframe_handle = hop_1.sf.SLOTFRAME_HANDLE_NEGOTIATED_CELLS ) assert ( len(hop_1.tsch.get_busy_slots(hop_1.sf.SLOTFRAME_HANDLE_NEGOTIATED_CELLS)) == sim_engine.settings.tsch_slotframeLength ) # trigger scheduling adaptation root_mac_addr = root.get_mac_addr() hop_1.sf.retry_count[root_mac_addr] = -1 # put dummy stats so that scheduling adaptation can be triggered hop_1.sf.tx_cell_utilization = 100 if function_under_test == 'adapt_to_traffic': hop_1.sf._adapt_to_traffic(root_mac_addr, hop_1.sf.TX_CELL_OPT) elif function_under_test == 'relocate': for relocating_cell in filter(lambda cell: cell.options == [d.CELLOPTION_TX], hop_1.tsch.get_cells(root.get_mac_addr(), hop_1.sf.SLOTFRAME_HANDLE_NEGOTIATED_CELLS)): hop_1.sf._request_relocating_cells( neighbor = root_mac_addr, cell_options = [d.CELLOPTION_TX], num_relocating_cells = 1, cell_list = [relocating_cell] ) break else: # not implemented assert False # make sure the log is written into the file SimEngine.SimLog.SimLog().flush() # MSF should output a "schedule-full" error in the log file logs = u.read_log_file( filter = [SimLog.LOG_MSF_ERROR_SCHEDULE_FULL['type']], after_asn = sim_engine.getAsn() - 1 ) assert len(logs) == 1 assert logs[0]['_mote_id'] == hop_1.id
print(aa(1, 2)) print(aa(3, 4)) kbs = lambda a, su=10: a + su print(kbs(5)) print(kbs(5, 6)) sbs = lambda a, *tu, **di: print(a, tu, di) sbs(1, 2, 3, m=4, n=5) li = [lambda a, b: a + b, lambda a, b: a * b] print(li[0](3, 4)) print(li[1](3, 4)) # 다른 함수에서 람다 사용 print(list(filter(lambda a: a < 5, range(10)))) # filter 내장함수에서 lambda 사용 print(list(filter(lambda a: a % 2, range(10)))) print('--함수 장식자------') def make2(fn): return lambda: '안녕' + fn() def make1(fn): return lambda: '반가워' + fn() def hello(): return '홍길동'
def filter(function, iterable): '''Replacement for the built-in :func:`filter() <python:filter>` function.''' return builtins.filter(function, iterable)
def current_date(self): """ :return: """ return ''.join(filter(str.isdigit, str(self.current_filename)))
def io_directories(self): return filter(lambda m: isinstance(m, definitions.IoDirectory), self._io_files)
def yield_sequences_in_list(cls, paths, using=None, pad_style=PAD_STYLE_DEFAULT, allow_subframes=False): """ Yield the discrete sequences within paths. This does not try to determine if the files actually exist on disk, it assumes you already know that. A template :obj:`FileSequence` object can also be provided via the ``using`` parameter. Given this template, the dirname, basename, and extension values will be used to extract the frame value from the paths instead of parsing each path from scratch. Examples: The ``using`` field can supply a template for extracting the frame component from the paths:: paths = [ '/dir/file_001.0001.ext', '/dir/file_002.0001.ext', '/dir/file_003.0001.ext', ] template = FileSequence('/dir/file_#.0001.ext') seqs = FileSequence.yield_sequences_in_list(paths, using) # [<FileSequence: '/dir/file_1-3@@@.0001.ext'>] Args: paths (list[str]): a list of paths using (:obj:`FileSequence`): Optional sequence to use as template pad_style (`.PAD_STYLE_DEFAULT` or `.PAD_STYLE_HASH1` or `.PAD_STYLE_HASH4`): padding style allow_subframes (bool): if True, handle subframe filenames Yields: :obj:`FileSequence`: """ seqs = {} if allow_subframes: _check = cls.DISK_SUB_RE.match else: _check = cls.DISK_RE.match using_template = isinstance(using, FileSequence) if using_template: dirname, basename, ext = using.dirname(), using.basename( ), using.extension() head = len(dirname + basename) tail = -len(ext) frames = set() for path in filter(None, map(utils.asString, paths)): frame = path[head:tail] try: int(frame) except ValueError: if not allow_subframes: continue try: decimal.Decimal(frame) except decimal.DecimalException: continue _, _, subframe = frame.partition(".") key = (dirname, basename, ext, len(subframe)) seqs.setdefault(key, frames).add(frame) else: for match in filter(None, map(_check, map(utils.asString, paths))): dirname, basename, frame, ext = match.groups() if not basename and not ext: continue if frame: _, _, subframe = frame.partition(".") key = (dirname, basename, ext, len(subframe)) else: key = (dirname, basename, ext, 0) seqs.setdefault(key, set()) if frame: seqs[key].add(frame) def start_new_seq(): seq = cls.__new__(cls) seq._dir = dirname or '' seq._base = basename or '' seq._ext = ext or '' return seq def finish_new_seq(seq): if seq._subframe_pad: seq._pad = '.'.join([seq._frame_pad, seq._subframe_pad]) else: seq._pad = seq._frame_pad seq.__init__(utils.asString(seq), pad_style=pad_style, allow_subframes=allow_subframes) def get_frame_width(frame_str): frame_num, _, _ = frame_str.partition(".") return len(frame_num) def get_frame_minwidth(frame_str): # find the smallest padding width for a frame string frame_num, _, _ = frame_str.partition(".") size = len(frame_num) num = int(frame_num) num_size = len(str(num)) if size == num_size: return 1 return size def frames_to_seq(frames, pad_length, decimal_places): seq = start_new_seq() seq._frameSet = FrameSet(sorted( decimal.Decimal(f) for f in frames)) seq._frame_pad = cls.getPaddingChars(pad_length, pad_style=pad_style) if decimal_places: seq._subframe_pad = cls.getPaddingChars(decimal_places, pad_style=pad_style) else: seq._subframe_pad = '' finish_new_seq(seq) return seq for (dirname, basename, ext, decimal_places), frames in iteritems(seqs): # Short-circuit logic if we do not have multiple frames, since we # only need to build and return a single simple sequence if not frames: seq = start_new_seq() seq._frameSet = None seq._frame_pad = '' seq._subframe_pad = '' finish_new_seq(seq) yield seq continue # If we have multiple frames, then we need to check them for different # padding and possibly yield more than one sequence. # sort the frame list by their string padding width sorted_frames = sorted(((get_frame_width(f), f) for f in frames), key=operator.itemgetter(0)) current_frames = [] current_width = None for width, frame in sorted_frames: # initialize on first item if current_width is None: current_width = width if width != current_width and get_frame_minwidth( frame) > current_width: # We have a new padding length. # Commit the current sequence, and then start a new one. yield frames_to_seq(current_frames, current_width, decimal_places) # Start tracking the next group of frames using the new length current_frames = [frame] current_width = width continue current_frames.append(frame) # Commit the remaining frames as a sequence if current_frames: yield frames_to_seq(current_frames, current_width, decimal_places)