def check(vals1, vals2, thresh=3): mismatch = 0 for (*atoms1, final1), (*atoms2, final2) in zip(vals1, vals2): if atoms1 != atoms2: mismatch += 1 continue if thresh is None or abs(final2 - final1) > 10**-thresh: f = '-'.join(['{:>3} {:<2}']*int(len(atoms1))) + (' = {:> 5.' + f'{thresh}' + 'f}').format(final2 - final1) print(f.format(*collapse(atoms1))) return mismatch
def coverage_summary_per_gene(self, target_csv_path=None): coverage_summary = self.coverage_summary() gene_coverage_records = [] for gene, gene_intervals in coverage_summary.groupby('Gene'): # entries like "LOW_COVERAGE;READS" statuses = ';'.join(gene_intervals['Interval Status']) unique_statuses = ', '.join(set(statuses.split(';')) - {'PASS'}) # entries like "Alz, Park" conditions = ', '.join(gene_intervals['Associated Conditions']) unique_conditions = ', '.join(set(conditions.split(', ')) - {''}) weighted_mean = np.average(gene_intervals['Interval Mean Coverage'], weights=gene_intervals['Interval Length']) # entries like "rs1, rs2" variants_str = ', '.join(collapse(gene_intervals['Interval Variants'])) variants = set(variants_str.split(', ')) variant_count = len(set(variants)) # The join is cautionary. It will always be a single sample: unique_sample = ', '.join(gene_intervals['Sample ID'].unique()) gene_record = { 'Gene': gene, 'Sample ID': unique_sample, 'Filters': unique_statuses or None, 'Variant Count': variant_count, 'Coverage Average': round(weighted_mean, 2), 'Associated Conditions': unique_conditions, } if len(gene_intervals) > 1: gene_record.update({ 'Min Coverage': gene_intervals['Interval Mean Coverage'].min(), 'Max Coverage': gene_intervals['Interval Mean Coverage'].max(), }) gene_coverage_records.append(gene_record) col_order = [ 'Sample ID', 'Gene', 'Variant Count', 'Coverage Average', 'Associated Conditions', 'Min Coverage', 'Max Coverage', 'Filters', ] gene_coverage_table = pd.DataFrame(gene_coverage_records)[col_order] if target_csv_path: gene_coverage_table.to_csv(target_csv_path, index=False) return gene_coverage_table
def print(self, bonds=True, angles=False, dihedrals=False): """ Print values """ out = '' if bonds: for *atoms, final in self.bond_vals: f = '-'.join(['{:>3} {:<2}']*int(len(atoms))) + f' = {final:> 5.4f}\n' out += f.format(*collapse(atoms)) if angles: for val in self.angle_vals + self.linear_vals: pass if dihedrals: for val in self.dihedral_vals: pass return out
def run_importer(self): """ First confirms that all import parameters are set, then runs ASL2BIDS using multi-threading """ # Set (or reset if this is another run) the essential variables self.n_import_workers = 0 self.import_parms = None self.import_summaries.clear() self.failed_runs.clear() self.import_workers.clear() # Disable the run button to prevent accidental re-runs self.set_widgets_on_or_off(state=False) # Ensure the dcm2niix path is visible chdir( Path(self.config["ProjectDir"]) / "External" / "DCM2NIIX" / f"DCM2NIIX_{system()}") # Get the import parameters self.import_parms = self.get_import_parms() if self.import_parms is None: # Reset widgets back to normal and change the directory back self.set_widgets_on_or_off(state=True) chdir(self.config["ScriptsDir"]) return # Get the dicom directories subject_dirs: List[Tuple[Path]] = get_dicom_directories( config=self.import_parms) # Set the progressbar self.progbar_import.setValue(0) self.progbar_import.setMaximum(len(list(collapse(subject_dirs)))) if self.config["DeveloperMode"]: print("Detected the following dicom directories:") pprint(subject_dirs) print('\n') NTHREADS = min([len(subject_dirs), 4]) # NTHREADS = 1 # For troubleshooting for idx, subjects_subset in enumerate(divide(NTHREADS, subject_dirs)): dicom_dirs = flatten(subjects_subset) worker = Importer_Worker( dcm_dirs=dicom_dirs, # The list of dicom directories config=self.import_parms, # The import parameters use_legacy_mode=self.chk_uselegacy.isChecked(), name=f"Converter_{str(idx).zfill(3)}" ) # Whether to use legacy mode or not self.signal_stop_import.connect(worker.slot_stop_import) worker.signals.signal_send_summaries.connect( self.slot_is_ready_postprocessing) worker.signals.signal_send_errors.connect( self.slot_update_failed_runs_log) worker.signals.signal_confirm_terminate.connect( self.slot_cleanup_postterminate) worker.signals.signal_update_progressbar.connect( self.slot_update_progressbar) self.import_workers.append(worker) self.n_import_workers += 1 # Launch them for worker in self.import_workers: self.threadpool.start(worker) # Change the cursor self.btn_terminate_importer.setEnabled(True) QApplication.setOverrideCursor(Qt.WaitCursor)
def out_vars(self, with_aug=True, flattened=True): r = [comp.get_out_vars(with_aug) for comp in self.components] if flattened: return list(collapse(r)) return r
def get_file_str(blocks): """ """ blocks = collapse(blocks) blocks = list(filter(lambda e: type(e) != int, blocks)) return "".join(blocks)
def from_lines(cls, lines): size = len(lines) data = [int(e) for e in collapse([e.split() for e in lines])] return cls(len(lines), data, [False] * (size * size))
def find_possible_moves(self): return list(collapse( x.make_moves(self) for x in self.move_types ))
def load_vocabulary(word_vector_name, text_list, use_position_label=False, begin_tokens=None, end_tokens=None) -> Vocabulary: namd_embedding_dict = {"glove": GloveWordEmbedding, "fasttext": FastTextWordEmbedding, "glove_300d": Glove300dWordEmbedding} word_set = more_itertools.collapse(text_list) return Vocabulary(namd_embedding_dict[word_vector_name](), word_set, use_position_label=use_position_label, begin_tokens=begin_tokens, end_tokens=end_tokens)
def dict_to_args(config): # C collapsed_list = list(collapse([["--" + k, v] for k, v in config.items()])) collapsed_list = [str(entry) for entry in collapsed_list] command_line_args = " ".join(collapsed_list) return command_line_args
for text in texts: for token in text: frequency1[token] += 1 ## remove words with frequency less than 5. #下面的列表是三维的,第一维度表示多篇文章,第二维表示文章里的多个句子,第三维表示一句话里面的所有词 x_train_texts = [[[token for token in text if frequency1[token] > 5] for text in texts] for texts in x_train_texts] x_test_texts = [[[token for token in text if frequency1[token] > 5] for text in texts] for texts in x_test_texts] x_val_texts = [[[token for token in text if frequency1[token] > 5] for text in texts] for texts in x_val_texts] texts = list( more_itertools.collapse(x_train_texts[:] + x_test_texts[:] + x_val_texts[:], levels=1)) #把列表里的所有元素都列出来变成一个扁平列表,levels表示去掉几层[] word2vec = Word2Vec(texts, size=200, min_count=5) word2vec.save("dictonary_yelp") ## convert 3D text list to 3D list of index #这里每个单词存的不是词向量,而是词语所在的训练好的word2vec词向量字典里面的编号 x_train_vec = [[[word2vec.wv.vocab[token].index for token in text] for text in texts] for texts in x_train_texts] x_test_vec = [[[word2vec.wv.vocab[token].index for token in text] for text in texts] for texts in x_test_texts] x_val_vec = [[[word2vec.wv.vocab[token].index for token in text] for text in texts] for texts in x_val_texts]
if len(fmus) < 2: raise ValueError('Please select multiple FMUs') # create tmp directory with TemporaryDirectory() as dir: # unpack fmus to tmp directory binaries = [] for fmu in fmus: with ZipFile(fmu, 'r') as z: z.extractall(join(dir, basename(fmu))) # add all subdirectories of "binaries" in the zip files binaries.append(list(filter(lambda f: not isdir(f), listdir(join(dir, basename(fmu), "binaries"))))) # check if all fmus provide different binaries (binary/*) if len(set(collapse(binaries))) != len(list(collapse(binaries))): raise ValueError(f'The provided binary folders are not unique: {list(zip(fmus, binaries))}') # check if their xmls are sufficiently similar if len(set([get_fmu_key(join(dir, basename(fmu))) for fmu in fmus])) > 1: raise ValueError('The FMUs appear to be generated from different model files.' + str([get_fmu_key(join(dir, basename(fmu))) for fmu in fmus])) # copy content of binary folder from secondary fmus to the folder of the primary copy_binaries(dir, fmus, binaries) # change the working directory into the primary fmu folder which we want to package chdir(join(dir, basename(fmus[0]))) # zip the primary folder, delete tempfolder with ZipFile(asksaveasfilename(filetypes=['Functional\u00A0Mockup\u00A0Unit {*.fmu}']), 'w') as zipObj: # Iterate over all the files in directory
def get_summarization_method_name_vocabulary_set(): method_names = set(more_itertools.collapse(read_method_name_token())) return method_names
def get_summarization_java_code_vocabulary_set(): tokens = set(more_itertools.collapse(read_source_code_token())) return tokens
def flatten(*args: any) -> list: """Flattens the given series of inputs, accepts nested list or non-list.""" return list(more_itertools.collapse(args))
frequency1[token] += 1 ## remove words with frequency less than 5. print('remove words with frequency less than 5...') x_train_texts = [[[token for token in text if frequency1[token] > 5] for text in texts] for texts in x_train_texts] x_test_texts = [[[token for token in text if frequency1[token] > 5] for text in texts] for texts in x_test_texts] x_val_texts = [[[token for token in text if frequency1[token] > 5] for text in texts] for texts in x_val_texts] texts = list( more_itertools.collapse(x_train_texts[:] + x_test_texts[:] + x_val_texts[:], levels=1)) ## train word2vec model on all the words print('train word2vec model...') word2vec = Word2Vec(texts, size=200, min_count=5) word2vec.save("dictonary_yelp") ## convert 3D text list to 3D list of index print('convert 3D text list to 3D list of index...') x_train_vec = [[[word2vec.wv.vocab[token].index for token in text] for text in texts] for texts in x_train_texts] x_test_vec = [[[word2vec.wv.vocab[token].index for token in text] for text in texts] for texts in x_test_texts]
def args(*args: Number) -> Sequence[Number]: return tuple(collapse((a, a.conjugate()) if isinstance(a, Complex) and not isinstance(a, Real) else a for a in args))
def eval(self, context): if self.var is not None: self.var = list(more_itertools.collapse(self.var)) self.var = [var.eval(context) for var in self.var] return self.value.replace("_", "{}").format(*self.var) return self.value
def get_common_error_vocabulary_set(): tokens = set( more_itertools.collapse(read_filter_without_include_ac_token())) action_tokens = set(more_itertools.collapse(read_modify_action_token())) return tokens | action_tokens
def __init__(self, variables: List[Union[List, str]], callback: Optional[Callable[[Figure], None]] = None, **kwargs): """ Provides an iterable of variables and plot parameters like ('induction1', {'color':'green', 'style': '--'}). It contains logic to automatically match up the variables and provided kwargs to allow for a simple syntax. e.g. when called with [['a','b'],['c','d']] and style = [['.', None],'--'] it will detect the grouping and apply the dotted style to 'a' and the dashed style to 'c' and 'd' :param vars: nested list of strings. Each string represents a variable of the FMU or a measurement that should be plotted by the environment. :param callback: if provided, it is executed after the plot is finished. Will get the generated figure as parameter to allow further modifications. :param kwargs: those arguments are merged (see omg.util.flatten_together) with the variables and than provided to the pd.DataFrame.plot(·) function """ self.vars = list(collapse(variables)) self._callback = callback # set colors None if not provided colorkey = ({'c', 'color'} & set(kwargs.keys())) if not colorkey: kwargs['c'] = None colorkey = 'c' elif len(colorkey) > 1: raise ValueError( f'Multiple color parameters provided "{colorkey}"') else: colorkey = colorkey.pop() args = dict() for k, v in dict(kwargs).items(): args[k] = flatten_together(variables, v) # apply to a group only if all color values are none inside that group if colorkey: # if all elements in the variables are lists and they are all of equal length lengths = set([isinstance(l, list) and len(l) for l in variables]) if len(lengths) == 1: # set contains either the length of all lists or false if all values where non-list values length = lengths.pop() if length: for groups in range(len(variables)): for i in range(length): if args[colorkey][length * groups + i] is None: args[colorkey][length * groups + i] = 'C' + str(i + 1) else: # all elements are single values for i, c in enumerate(args[colorkey]): if c is None: args[colorkey][i] = 'C' + str(i + 1) # merge parameters to the variables for indexing access self.kwargs = [] for i, _ in enumerate(self.vars): args_ = dict() for k, arg in args.items(): v = arg[i] if v is not None: args_[k] = v self.kwargs.append(args_)
def _write_report_lines_from_hooks( self, lines: List[Union[str, List[str]]] ) -> None: lines.reverse() for line in collapse(lines): self.write_line(line)
'https://en.wikipedia.org/wiki/Technology', 'https://en.wikipedia.org/wiki/Electronics', 'https://en.wikipedia.org/wiki/Media_(communication)', ] dataset = [] topic_list = {} for url in url_list: text = BeautifulSoup(requests.get(url).text, 'html.parser').get_text() topic = list(Counter(preprocess( text)))[0:list(preprocess(text)).index(tldextract.extract(url).domain)] if wordnet.synsets(topic[0]): for h in list( collapse([ s.lemma_names() for s in wordnet.synsets(topic[0])[0].hyponyms() ])): topic_list[h] = url_list.index(url) for t in topic: topic_list[t] = url_list.index(url) print('I have learned about', ' '.join(topic)) dataset.append(array(text)) print('Dataset complete. Ready for use.') print('Keyword list:', list(topic_list.keys())) def response(user_input): print('Gathering data...') dataset_sims = {} nlp = spacy.load('en_core_web_sm')
def print_file(list_): """ """ list_ = list(collapse(list_)) lines = list(filter(lambda item: type(item) == str, list_)) file_str = "".join(lines) print(file_str)
def num_values(layer, value=0): return sum([pixel == value for pixel in collapse(layer)])
def in_vars(self): return list(collapse([comp.get_in_vars() for comp in self.components]))
def python_exceptions_handled(code_text: str) -> Iterable[str]: """Return a list of all exceptions handled in the given code.""" ast_except_handlers = python_ast_objects_of_type(code_text, ast.ExceptHandler) yield from more_itertools.collapse( list(map(python_ast_exception_handler_exceptions_handled, ast_except_handlers)), base_type=str )
def __init__(self, params, inChannel=None, outChannel=None): """ ODE has a lot of parameters. These are present in a dictionary from which the model is initialized. Parameters ---------- params : dictionary of parameters Many of the parameters are easy to fix because they are determined by the COVID situation in India. For example kt is the testing rate. Other parameters such as beta/beta1 which are related to how the disease spreads aren't so easy to specify. """ self.inChannel = inChannel self.outChannel = outChannel self.tl = params['tl'] self.te = params['te'] self.k0 = params['k0'] self.kt = params['kt'] self.mu = params['mu'] self.sigma = params['sigma'] self.gamma1 = params['gamma1'] self.gamma2 = params['gamma2'] self.gamma3 = params['gamma3'] self.N = params['N'] self.beta = params['beta'] self.beta2 = params['beta2'] self.f = params['f'] self.lockdownLeakiness = params['lockdownLeakiness'] self.contactHome = params['contactHome'] self.contactTotal = params['contactTotal'] self.bins = params['bins'] # Age bins self.Nbar = params['Nbar'] self.adultBins = params['adultBins'] self.testingFraction1 = params['testingFraction1'] self.testingFraction2 = params['testingFraction2'] self.testingFraction3 = params['testingFraction3'] self.mortality = params['mortality'] self.totalOut = params['totalOut'] names = ['S', 'E', 'A', 'I', 'Xs', 'Xe', 'Xa', 'Xi', 'P', 'R'] self.names = [[n + str(i) for i in range(1, self.bins + 1)] for n in names] self.names = list(collapse(self.names)) r = [random.random() for _ in range(30)] g = [random.random() for _ in range(30)] b = [random.random() for _ in range(30)] self.colors = list(zip(r, g, b))
def handler_5(items): result = list(more_itertools.collapse(items)) return result
def just_one(obj): # If we have an iterable, sometimes we only want one item return first(collapse(obj))
def _dump_notes(self, circle_free: bool = False) -> Iterator[str]: # Split notes and events into bars notes_by_bar: Dict[int, List[AnyNote]] = defaultdict(list) for note in self.notes: time_in_section = note.time % BeatsTime(4) bar_index = int(time_in_section) notes_by_bar[bar_index].append(note) events_by_bar: Dict[int, List[Union[BPMEvent, StopEvent]]] = defaultdict(list) for event in self.events: time_in_section = event.time % BeatsTime(4) bar_index = int(time_in_section) events_by_bar[bar_index].append(event) # Pre-render timing bars bars: Dict[int, List[str]] = defaultdict(list) chosen_symbols: Dict[BeatsTime, str] = {} symbols_iterator = iter(NOTE_SYMBOLS) for bar_index in range(4): notes = notes_by_bar.get(bar_index, []) events = events_by_bar.get(bar_index, []) bar_length = lcm( *( [note.time.denominator for note in notes] + [event.time.denominator for event in events] ) ) if bar_length < 3: bar_length = 4 bar_dict: Dict[int, BarEvent] = defaultdict(BarEvent) for note in notes: time_in_section = note.time % BeatsTime(4) time_in_bar = note.time % Fraction(1) time_index = time_in_bar.numerator * ( bar_length // time_in_bar.denominator ) if time_index not in bar_dict: symbol = next(symbols_iterator) chosen_symbols[time_in_section] = symbol bar_dict[time_index].note = symbol for event in events: time_in_bar = event.time % Fraction(1) time_index = time_in_bar.numerator * ( bar_length // time_in_bar.denominator ) if isinstance(event, StopEvent): bar_dict[time_index].stops.append(event) elif isinstance(event, BPMEvent): bar_dict[time_index].bpms.append(event) bar = [] for i in range(bar_length): bar_event = bar_dict.get(i, BarEvent()) for stop in bar_event.stops: bar.append(f"[{int(stop.duration * 1000)}]") for bpm in bar_event.bpms: bar.append(f"({bpm.BPM})") bar.append(bar_event.note or EMPTY_BEAT_SYMBOL) bars[bar_index] = bar # Create frame by bar frames_by_bar: Dict[int, List[Frame]] = defaultdict(list) for bar_index in range(4): bar = bars.get(bar_index, []) frame = Frame() frame.bars[bar_index] = bar for note in notes_by_bar[bar_index]: time_in_section = note.time % BeatsTime(4) symbol = chosen_symbols[time_in_section] if isinstance(note, TapNote): if note.position in frame.positions: frames_by_bar[bar_index].append(frame) frame = Frame() frame.positions[note.position] = symbol elif isinstance(note, LongNote): needed_positions = set(note.positions_covered()) if needed_positions & frame.positions.keys(): frames_by_bar[bar_index].append(frame) frame = Frame() direction = note.tail_direction() arrow = DIRECTION_TO_ARROW[direction] line = DIRECTION_TO_LINE[direction] for is_first, is_last, pos in mark_ends(note.positions_covered()): if is_first: frame.positions[pos] = symbol elif is_last: frame.positions[pos] = arrow else: frame.positions[pos] = line elif isinstance(note, LongNoteEnd): if note.position in frame.positions: frames_by_bar[bar_index].append(frame) frame = Frame() if circle_free and symbol in NOTE_TO_CIRCLE_FREE_SYMBOL: symbol = NOTE_TO_CIRCLE_FREE_SYMBOL[symbol] frame.positions[note.position] = symbol frames_by_bar[bar_index].append(frame) # Merge bar-specific frames is possible final_frames: List[Frame] = [] for bar_index in range(4): frames = frames_by_bar[bar_index] # Merge if : # - No split in current bar (only one frame) # - There is a previous frame # - The previous frame is not a split frame (it holds a bar) # - The previous and current bars are all in the same 4-bar group # - The note positions in the previous frame do not clash with the current frame if ( len(frames) == 1 and final_frames and final_frames[-1].bars and max(final_frames[-1].bars.keys()) // 4 == min(frames[0].bars.keys()) // 4 and ( not (final_frames[-1].positions.keys() & frames[0].positions.keys()) ) ): final_frames[-1].bars.update(frames[0].bars) final_frames[-1].positions.update(frames[0].positions) else: final_frames.extend(frames) dumped_frames = map(lambda f: f.dump(), final_frames) yield from collapse(intersperse("", dumped_frames))
def _write_report_lines_from_hooks(self, lines): lines.reverse() for line in collapse(lines): self.write_line(line)
def to_image(self, text: str, color: Color = Colors.YELLOW) -> Image.Image: bm = self.render_text(text).add_border(10) color = color.rgb[:3] colors = bytearray( collapse(color if px else (0, 0, 0) for px in bm.pixels)) return Image.frombytes('RGB', (bm.width, bm.height), bytes(colors))
def create_word_set(df: pd.DataFrame): word_set = set(more_itertools.collapse(df['tokenized_question1'])) word_set |= set(more_itertools.collapse(df['tokenized_question2'])) return word_set