def consolidate_payloads(payload_list: List[dict]): """ Given a list of sorted payloads, collapse identical payloads that are immediately consecutive, i.e. payloads with the same fields, where the end date of one corresponds to the start date of the next The function assumes that input does not contain overlapping timespans """ def payloads_identical(p1, p2): check_keys = (p1.keys() | p2.keys()) - {"validity"} return all(p1.get(k) == p2.get(k) for k in check_keys) def is_not_consecutive(p1, p2): p1_to = datetime.strptime(p1["validity"]["to"], "%Y-%m-%d") p2_from = datetime.strptime(p2["validity"]["from"], "%Y-%m-%d") return not (payloads_identical(p1, p2) and p1_to + timedelta(days=1) == p2_from) def merge_objects(group): start, end = first(group), last(group) start["validity"]["to"] = end["validity"]["to"] return start if len(payload_list) < 2: return payload_list consecutive_groups = split_when(payload_list, is_not_consecutive) return list(map(merge_objects, consecutive_groups))
def groups(gap: timedelta = timedelta( hours=3)) -> Iterable[Res[Sequence[Entry]]]: vit, eit = split_errors(entries(), ET=Exception) yield from eit import more_itertools from more_itertools import split_when yield from split_when(vit, lambda a, b: (b.dt - a.dt) > gap)
def extract_negative( self, journey: Journey ) -> Generator[Union[ExampleIndices, None], None, None]: if any(session.conversion for session in journey): yield # don't extract negative examples from journeys with conversions else: # extract all subsequences of length up to lookback_window_days from regressors daily_segments = list( more_itertools.split_when(journey, lambda s1, s2: s1.date != s2.date)) first, last = 0, 1 while first < len(daily_segments): def date_action(session: Session) -> Tuple[int, int]: date = session.date - daily_segments[ last - 1][0].date + self.lookback_window_days - 1 return date, session.action chained_daily_segments = itertools.chain.from_iterable( daily_segments[first:last]) yield sorted( set( date_action(session) for session in chained_daily_segments)) if last < len(daily_segments): last += 1 while daily_segments[first][ 0].date + self.lookback_window_days <= daily_segments[ last - 1][0].date: first += 1 else: first += 1
def from_amazon_uri(uri) -> Job: """Create a Job Object based on the TranscriptFileUri""" response = requests.get(uri) response.raise_for_status() transcription = response.json() name = transcription['jobName'] base_text = transcription['results']['transcripts'][0]['transcript'] if 'speaker_labels' in transcription['results']: labels = transcription['results']['speaker_labels'] speakers = [add_speaker(x) for x in range(labels['speakers'])] markers = [ add_marker(x, has_speakers=True) for x in labels['segments'] ] else: segments = transcription['results']['items'] speakers = [] items_segments = more_itertools.split_when( segments, lambda x: x['type'] == 'punctuation', ) markers = [add_marker(x) for x in items_segments] return Job( base_text=base_text, name=name, transcription=transcription, speakers=speakers, markers=markers, )
def get_indices_contiguous_elems_verifying(iterable, pred): indices_satisfying_pred = [ i for i, elem in enumerate(iterable) if pred(elem) ] # we mean 'not successive'. here this is sufficient. otherwise y != x + 1 or x != y + 1 not_contiguous = lambda x, y: y != x + 1 indices_grouped_by_contiguity = split_when(indices_satisfying_pred, not_contiguous) return list(indices_grouped_by_contiguity)
def _get_ranked_players( self, previous_rounds: t.Sequence[CompletedRound[P]], ) -> t.Tuple[t.Sequence[t.Tuple[P, int]], t.Mapping[P, int]]: match_wins_map = defaultdict(int) game_wins_map = defaultdict(int) buys_map = defaultdict(int) for _round in previous_rounds: for result in _round.results: if len(result.results) == 1: buys_map[result.results.__iter__().__next__()] += 1 else: for player, wins in result.results.items(): game_wins_map[player] += wins winners = result.winners if len(winners) == 1: match_wins_map[winners.__iter__().__next__()] += 1 opponent_match_wins_map = defaultdict(int) opponent_game_win_percentage = defaultdict(int) for _round in previous_rounds: for result in _round.results: if len(result.results) == 1: continue for player, opponent in itertools.permutations( result.results.keys()): opponent_match_wins_map[player] += match_wins_map[opponent] opponent_game_win_percentage[player] += game_wins_map[ opponent] ranked_players = sorted( [(player, ( match_wins_map[player], opponent_match_wins_map[player], game_wins_map[player], opponent_game_win_percentage[player], -buys_map[player], -self._seed_map.get(player, 0), )) for player in self._players], key=lambda p: p[1], reverse=True, ) result = [] for idx, chunk in enumerate( more_itertools.split_when( ranked_players, lambda a, b: a[1] != b[1], )): for p, _ in chunk: result.append((p, idx)) return result, buys_map
def distinct_arrangements(self, style='calculated'): one_jolt_differences = more_itertools.split_when(self.joltage, lambda x, y: y - x == 3) one_jolt_difference_sections_greater_than_2 = [section for section in one_jolt_differences if len(section) > 2] section_lengths = [len(section) for section in one_jolt_difference_sections_greater_than_2] if style == 'calculated': length_to_arrangement = {length: self._arrangements(length) for length in set(section_lengths)} elif style == 'hardcoded': length_to_arrangement = {3: 2, 4: 4, 5: 7} else: raise Exception arrangements = [length_to_arrangement[section_length] for section_length in section_lengths] return math.prod(arrangements)
def Frame_ID_Extrator_In_Conditions(stim_dic, stim_ID_Lists, head_extend=2, tail_extend=2): ''' Return Frame ID in stim_ID_Lists,but in list of each conditions. Parameters ---------- stim_dic : (Dic) Stim_Frame_Align dictionary. stim_ID_Lists : (list) List of stim IDs you want to get. head_extend : (int), optional Frame extend of each conditions. Positive add, negative cut. The default is 2. tail_extend : (int), optional The same as head extend, just on the tail. The default is 2. Returns ------- cutted_frame_lists : (list) List of each condition frame ids.. ''' origin_list = Frame_ID_Extractor(stim_dic, stim_ID_Lists) cutted_origin_list = list( mit.split_when(origin_list, lambda x, y: (y - x) != 1)) cutted_frame_lists = lt.Element_Same_length(cutted_origin_list) # final adjust, adjusted_frame_lists = [] for i in range(len(cutted_frame_lists)): current_frame_list = cutted_frame_lists[i] # head if head_extend > 0: # add head_add_frame = list( range(current_frame_list[0] - head_extend, current_frame_list[0])) current_frame_list = head_add_frame + current_frame_list elif head_extend < 0: # cut current_frame_list = current_frame_list[abs(head_extend):] # tail if tail_extend > 0: # add tail_add_frame = list( range(current_frame_list[-1] + 1, current_frame_list[-1] + tail_extend + 1)) current_frame_list.extend(tail_add_frame) elif tail_extend < 0: # cut current_frame_list = current_frame_list[:tail_extend] adjusted_frame_lists.append(current_frame_list) return adjusted_frame_lists
def _rank_player_set( self, previous_round: CompletedRound[P], players: t.AbstractSet[P]) -> t.Sequence[t.Collection[P]]: player_match_wins_map = defaultdict(int) player_game_wins_map = defaultdict(int) player_relative_score_map = defaultdict(int) for result in previous_round.results: if not result.results.keys() <= players: continue winners = result.winners if len(winners) == 1: player_match_wins_map[winners.__iter__().__next__()] += 1 total_wins = sum(result.results.values()) for player, wins in result.results.items(): player_game_wins_map[player] += wins player_relative_score_map[player] += wins * 2 - total_wins ranked_players = sorted( [(player, ( player_match_wins_map[player], player_relative_score_map[player], player_game_wins_map[player], )) for player in players], key=lambda p: p[1], reverse=True, ) result = [] for player_group in more_itertools.split_when( ranked_players, lambda a, b: a[1] != b[1]): if len(player_group) > 1: players_set = frozenset(p[0] for p in player_group) if players_set >= players: result.append([p for p, _ in player_group]) else: result.extend( self._rank_player_set(previous_round, players_set)) else: result.append([player_group[0][0]]) return result
def get_round( self, previous_rounds: t.Sequence[CompletedRound[P]] = () ) -> t.Optional[Round[P]]: if len(previous_rounds) >= self._rounds: return None if not previous_rounds: buys_map = defaultdict(int) ranked_players = interleave( sorted(self._players, key=lambda p: (self._seed_map.get(p, 0), random.random())), ) else: _ranked_players, buys_map = self._get_ranked_players( previous_rounds) ranked_players = [] for players in more_itertools.split_when( reversed(_ranked_players), lambda a, b: a[1] != b[1]): random.shuffle(players) for p, _ in players: ranked_players.append(p) matches = [] if len(ranked_players) & 1: min_buys = min(buys_map[player] for player in self._players) for player in reversed(ranked_players): if buys_map[player] == min_buys: matches.append( ScheduledMatch( frozenset((ranked_players.pop( ranked_players.index(player)), )))) break for idx in range(0, len(ranked_players), 2): matches.append( ScheduledMatch(frozenset(ranked_players[idx:idx + 2]))) return Round(frozenset(matches))
def from_json(cls, json_file) -> Job: """Create a Job Object when given an Amazon JSON Object""" results = json_file['results'] if "speaker_labels" in results: labels = json_file["results"]["speaker_labels"] segments = labels["segments"] else: segment_content = more_itertools.split_when( json_file['results']['items'], lambda x: x['type'] == "pronunciation") segments = [] for segment in segment_content: segments.append({ 'start_time': float(item[0]["start_time"]), 'end_time': float(item[-1]["end_time"]), 'speaker': None, })
def get_ranked_players( self, previous_rounds: t.Sequence[CompletedRound[P]] ) -> t.Sequence[t.Collection[P]]: match_wins_map = defaultdict(int) for _round in previous_rounds: for result in _round.results: match_wins_map[result.winners.__iter__().__next__()] += 1 ranked_players = sorted( [(player, ( match_wins_map[player], -self._seed_map.get(player, 0), )) for player in self._players], key=lambda p: p[1], reverse=True, ) return [[p for p, _ in tier] for tier in more_itertools.split_when( ranked_players, lambda a, b: a[1] != b[1], )]
def get_ranked_players( self, previous_rounds: t.Sequence[CompletedRound[P]] ) -> t.Sequence[t.Collection[P]]: try: previous_round = previous_rounds.__iter__().__next__() except StopIteration: raise ResultException('tournament not complete') return list( itertools.chain(*([ sub_tier for sub_tier in more_itertools.split_when( sorted( tier, key=lambda p: -self._seed_map.get(p, 0), ), lambda a, b: self._seed_map.get(a, 0) != self._seed_map. get(b, 0), ) ] for tier in self._rank_player_set( previous_round, self._players, ))))
def tag_sentences(sentence_list: List, tagged_titles_list: List): final = [] for sent, tagged in zip(sentence_list, tagged_titles_list): grouped = list(split_when(tagged, split_BIO)) found = {} last = -1 for w in grouped: for i, word in enumerate(sent): if w[0][0] in word[0] and i > last: if len(w) > 1: for ii, z in enumerate(w): found[i + ii] = z[1] else: found[i] = w[0][1] last = i break indexed = pd.DataFrame.from_dict(found, orient="index", columns=["tag"]) final.append( pd.DataFrame(sent + [(".", "PUNCT")], columns=["word", "tokenized_as"]).join(indexed).fillna("O")) return pd.concat(final)
def get_ranked_players( self, previous_rounds: t.Sequence[CompletedRound[P]] ) -> t.Sequence[t.Collection[P]]: ranked_players, _ = self._get_ranked_players(previous_rounds) return [[p for p, _ in tier] for tier in more_itertools.split_when( ranked_players, lambda a, b: a[1] != b[1])]
def Spike_Train_Generator(all_tif_name, cell_information, Base_F_type='most_unactive', stim_train=None, ignore_ISI_frame=1, unactive_prop=0.1, LP_Para=False, HP_Para=False, filter_method=False): """ Generate Spike Train from graphs. Multiple find method provided. Filter here indicating 2D spacial filter. No time course filter provided here. Parameters ---------- all_tif_name : (list) List of all tif graph. cell_information : (list) Skimage generated cell information lists. Base_F_type : ('global','most_unactive','last_ISI','begining_ISI','all_ISI','nearest_0','all_0'), optional Base F find method. Describtion as below: 'global' : Use all frame average. 'most_unactive': Use most lazy frames of every cell. 'before_ISI': Use the very ISI before stim onset as base. 'begining_ISI': Use ISI before stim onset as base. 'all_ISI': Use average of all ISI as base. Each ISI will be cut based on ignore_ISI_frame. 'nearest_0': Use nearest stim id 0 as base. 'all_0': Use average of all id 0 data as base. The default is 'global'. stim_train : (list), optional Stim id train. If Base type include stim information, this must be given. The default is None. ignore_ISI_frame : TYPE, optional For mode 'last_ISI'/'all_ISI'/'begining_ISI'. How much ISI fram will be ingored. The default is 1. unactive_prop : (float), optional For mode 'most_unactive'. Propotion of most unactive frame used. The default is 0.1. Returns ------- dF_F_trains : (Dictionary) Spike train of every cell. Note there is only spike train, submap will be processed later. F_value_Dictionary : (Dictionary) Origional F value dictionary. """ # Initialization Cell_Num = len(cell_information) Frame_Num = len(all_tif_name) F_value_Dictionary = {} height, width = np.shape(cv2.imread(all_tif_name[0], -1)) all_graph_matrix = np.zeros(shape=(height, width, Frame_Num), dtype='u2') # Step 1, read in all graphs. Do filter is required. for i in range(Frame_Num): current_graph = cv2.imread(all_tif_name[i], -1) if filter_method != False: # Meaning we need filter here. current_graph = My_Filter.Filter_2D(current_graph, LP_Para, HP_Para, filter_method) all_graph_matrix[:, :, i] = current_graph # Step 2, generate origin F value list first. for i in range(Cell_Num): # cycle cell cell_location = cell_information[i].coords cell_area = len(cell_location) current_cell_train = all_graph_matrix[cell_location[:, 0], cell_location[:, 1], :].astype('f8') current_cell_F_train = np.sum(current_cell_train, axis=0) / cell_area F_value_Dictionary[i] = current_cell_F_train del all_graph_matrix # Step3, after getting F Dictionary, it's time to calculate dF/F matrix. dF_F_trains = {} all_keys = list(F_value_Dictionary.keys()) if Base_F_type == 'global': for i in range(len(all_keys)): current_cell_F_train = F_value_Dictionary[all_keys[i]] base_F = current_cell_F_train.mean() current_spike_train = np.nan_to_num( (current_cell_F_train - base_F) / base_F) dF_F_trains[all_keys[i]] = current_spike_train elif Base_F_type == 'most_unactive': for i in range(len(all_keys)): current_cell_F_train = F_value_Dictionary[all_keys[i]] # Base is avr. of most unactive frames. sorted_list = sorted(current_cell_F_train) # Use this to get mean. unactive_frame_num = round(len(sorted_list) * unactive_prop) sorted_list = sorted_list[:unactive_frame_num] base_F = np.mean(sorted_list) current_spike_train = np.nan_to_num( (current_cell_F_train - base_F) / base_F) dF_F_trains[all_keys[i]] = current_spike_train elif Base_F_type == 'before_ISI': # Use ISI Before stim onset as base. if stim_train == None: raise IOError('Please input stim train!') stim_train = np.asarray(stim_train) #ignore_ISI_frame = 1 all_keys = list(F_value_Dictionary.keys()) cutted_stim_train = list( mit.split_when(stim_train, lambda x, y: (x - y) > 0)) for i in range(len(all_keys)): current_cell_train = F_value_Dictionary[all_keys[i]] frame_counter = 0 current_cell_dF_train = [] for j in range(len(cutted_stim_train)): current_stim_train = np.asarray(cutted_stim_train[j]) current_F_train = np.asarray(current_cell_train[frame_counter:( frame_counter + len(current_stim_train))]) null_id = np.where(current_stim_train == -1)[0] if len(null_id) > 1: null_id = null_id[ignore_ISI_frame:] else: warnings.warn("ISI frame less than 2, use all ISIs", UserWarning) current_base = current_F_train[null_id].mean() current_dF_train = np.nan_to_num( (current_F_train - current_base) / current_base) current_cell_dF_train.extend(current_dF_train) # Then add frame counter at last. frame_counter = frame_counter + len(cutted_stim_train[j]) dF_F_trains[all_keys[i]] = np.asarray(current_cell_dF_train) elif Base_F_type == 'begining_ISI': # Use First ISI as global base. if stim_train == None: raise IOError('Please input stim train!') first_stim_id = np.where(np.asarray(stim_train) > 0)[0][0] all_keys = list(F_value_Dictionary.keys()) for i in range(len(all_keys)): current_F_series = F_value_Dictionary[all_keys[i]] base_F_series = current_F_series[ignore_ISI_frame:first_stim_id] base_F = base_F_series.mean() current_spike_train = np.nan_to_num( (current_F_series - base_F) / base_F) dF_F_trains[all_keys[i]] = current_spike_train elif Base_F_type == 'all_ISI': if stim_train == None: raise IOError('Please input stim train!') stim_train = np.asarray(stim_train) all_ISI_frame_loc = np.where(stim_train == -1)[0] cutted_ISI_frame_loc = list( mit.split_when(all_ISI_frame_loc, lambda x, y: (y - x) > 1)) used_ISI_id = [] for i in range(len(cutted_ISI_frame_loc)): used_ISI_id.extend(cutted_ISI_frame_loc[i][ignore_ISI_frame:]) all_keys = list(F_value_Dictionary.keys()) for i in range(len(all_keys)): current_cell_F_train = F_value_Dictionary[all_keys[i]] current_base_F = current_cell_F_train[used_ISI_id] base_F = current_base_F.mean() current_dF_train = np.nan_to_num( (current_cell_F_train - base_F) / base_F) dF_F_trains[all_keys[i]] = current_dF_train elif Base_F_type == 'nearest_0': stim_train = np.asarray(stim_train) blank_location = np.where(stim_train == 0)[0] cutted_blank_location = list( mit.split_when(blank_location, lambda x, y: (y - x) > 1)) all_blank_start_frame = [] # This is the start frame of every blank. for i in range(len(cutted_blank_location)): all_blank_start_frame.append(cutted_blank_location[i][0]) #%% Get base_F_of every blank. all_keys = list(F_value_Dictionary.keys()) for i in range(len(all_keys)): current_key = all_keys[i] current_cell_F_train = F_value_Dictionary[current_key] # First, get base F of every blank. all_blank_base_F = [] # base F of every blank. for j in range(len(cutted_blank_location)): all_blank_base_F.append( current_cell_F_train[cutted_blank_location[j]].mean()) # Then, generate dF train. current_dF_train = [] for j in range(len(current_cell_F_train)): current_F = current_cell_F_train[j] _, current_base_loc = List_Tools.Find_Nearest( all_blank_start_frame, j) current_base = all_blank_base_F[current_base_loc] current_dF_F = np.nan_to_num( (current_F - current_base) / current_base) current_dF_train.append(current_dF_F) dF_F_trains[all_keys[i]] = np.asarray(current_dF_train) elif Base_F_type == 'all_0': stim_train = np.asarray(stim_train) all_blank_frame_id = np.where(stim_train == 0)[0] all_keys = list(F_value_Dictionary.keys()) for i in range(len(all_keys)): current_cell_F_train = F_value_Dictionary[all_keys[i]] current_base = current_cell_F_train[all_blank_frame_id].mean() current_dF_train = np.nan_to_num( (current_cell_F_train - current_base) / current_base) dF_F_trains[all_keys[i]] = current_dF_train else: raise IOError('Not finished functions.') return F_value_Dictionary, dF_F_trains
def Stim_Frame_Align( stim_folder, stim_thres=2, frame_thres=1, jmp_step=3000, head_extend=1, tail_extend=0, ): """ Get stim belongings of every frame. Parameters ---------- stim_folder : (str) Stimlus data folder. '.smr' file and '.txt' file shall be in the same folder. stim_thres :(number),optional Threshold voltage used to binary square wave. The default is 2. frame_thres:(number),optional Threshold voltage used to binary triangel wave. The default is 1. jmp_step:(int),optional How many point you jump after find a frame. Usually, 10000 point = 1s head_extend(int),optional Number of frame regarded as stim on before stim. Positive will extend frame on, Negative will cut. tail_extend(int),optional Number of frame ragarded as stim on after stim. Positive will extend frame on, Negative will cut. Returns ------- Frame_Stim_Sequence : (list) List type of frame belongings. This can be used if ISI base changes. Frame_Stim_Dictionary : (Dictionary) Dictionary type. This Dictionary have stim id belonged frames. Can be used directly. """ # Step 1, read in data. smr_name = os_tools.Get_File_Name(stim_folder, file_type='.smr')[0] frame_train = os_tools.Spike2_Reader(smr_name, physical_channel=3)['Channel_Data'] stim_train = os_tools.Spike2_Reader(smr_name, physical_channel=0)['Channel_Data'] txt_name = os_tools.Last_Saved_name(stim_folder, file_type='.txt') # Step 2, square wave series processing binary_stim_train = (stim_train > stim_thres).astype('i4') cutted_stim_list = list( mit.split_when(binary_stim_train, lambda x, y: (x - y) == -1)) # If stop at high voltage level, change last square to -1. last_part_set = np.unique(cutted_stim_list[-1]) if len(last_part_set) == 1: # Which means stop at high voltage last_part = np.array(cutted_stim_list[-1]) last_part[:] = 0 cutted_stim_list[-1] = list(last_part) # Combine stimuls lists final_stim_list = [] for i in range(len(cutted_stim_list)): current_list = np.dot(cutted_stim_list[i], i + 1) - 1 final_stim_list.extend(current_list) del cutted_stim_list, stim_train, binary_stim_train # square wave process done, final_stim_list is stim-time relation. # Step3, triangle wave list processing. binary_frame_train = (frame_train > frame_thres).astype('i4').ravel() dislocation_binary_frame_train = np.append(binary_frame_train[1:], 0) frame_time_finder = binary_frame_train - dislocation_binary_frame_train stop_point = np.where(frame_time_finder == -1)[ 0] # Not filtered yet, mis calculation are many. # Wash stop points, make sure they have all_graph_time = [stop_point[0]] # Use first stop as first graph. last_frame_time = all_graph_time[0] # First stop for i in range(1, len(stop_point)): # Frame 0 ignored. current_time = stop_point[i] if (current_time - last_frame_time) > jmp_step: all_graph_time.append(current_time) last_frame_time = current_time all_graph_time = all_graph_time[:-2] # Last 2 frame may not be saved. # Triangle wave process done, all_graph_time is list of every frame time. # Step4,Original frame stim relation acquire. frame_belongings = [] for i in range(len(all_graph_time)): current_graph_time = all_graph_time[i] frame_belongings.append(final_stim_list[current_graph_time] [0]) # Frame belong before adjust # Step5, Adjust frame stim relation. cutted_frame_list = list( mit.split_when(frame_belongings, lambda x, y: x != y)) # Adjust every single part. Stim add means ISI subs. adjusted_frame_list = [] import My_Wheels.List_Operation_Kit as List_Ops # Process head first adjusted_frame_list.append( List_Ops.List_extend(cutted_frame_list[0], 0, -head_extend)) # Then Process middle for i in range(1, len(cutted_frame_list) - 1): # First and last frame use differently. if (i % 2) != 0: # odd id means stim on. adjusted_frame_list.append( List_Ops.List_extend(cutted_frame_list[i], head_extend, tail_extend)) else: # even id means ISI. adjusted_frame_list.append( List_Ops.List_extend(cutted_frame_list[i], -tail_extend, -head_extend)) # Process last part then. adjusted_frame_list.append( List_Ops.List_extend(cutted_frame_list[-1], -tail_extend, 0)) # After adjustion, we need to combine the list. frame_stim_list = [] for i in range(len(adjusted_frame_list) - 1): # Ignore last ISI, this might be harmful. frame_stim_list.extend(adjusted_frame_list[i]) # Till now, frame_stim_list is adjusted frame stim relations. # Step6, Combine frame with stim id. with open(txt_name, 'r') as file: data = file.read() del file stim_sequence = data.split() stim_sequence = [int(x) for x in stim_sequence] Frame_Stim_Sequence = [] for i in range(len(frame_stim_list)): current_id = frame_stim_list[i] if current_id != -1: Frame_Stim_Sequence.append(stim_sequence[current_id - 1]) else: Frame_Stim_Sequence.append(-1) Frame_Stim_Dictionary = List_Ops.List_To_Dic(Frame_Stim_Sequence) Frame_Stim_Dictionary['Original_Stim_Train'] = Frame_Stim_Sequence return Frame_Stim_Sequence, Frame_Stim_Dictionary
@property def startpos(self): return self.segment.endpos @property def endpos(self): return self.segment.startpos # since we don't have code to properly reverse arcs, convert them to line segments. # for line in pattern.lines: if line.command in ("G02", "G03"): line.raw = "G01" + line.raw[3:] parts = split_when(pattern.lines, lambda a, b: not_g0(a) and is_g0(b)) preamble = next(parts) segments = [] for segment in parts: seg = Segment(segment) if seg.startpos and seg.endpos: segments.append(seg) segments.append(ReversedSegment(seg)) def distance(a, b): return math.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2)
def build(self) -> Job: """Create a Job Object when given an Amazon JSON Object""" job = transcribe.get_transcription_job(TranscriptionJobName=self.key) uri = job['TranscriptionJob']['Transcript']['TranscriptFileUri'] response = requests.get(uri) response.raise_for_status() transcription = response.json() markers = [] segments = transcription['results']['items'] if 'speaker_labels' in transcription['results']: labels = transcription['results']['speaker_labels'] speakers = [add_speaker(x) for x in range(labels['speakers'])] for segment in labels['segments']: start_time = segment['start_time'] end_time = segment['end_time'] speaker = [ x for x in speakers if x.base_name == segment['speaker_label'] ][0] content = text_in_range( segments, start_time=start_time, end_time=end_time, ) marker = Marker( start_time=timedelta(seconds=float(start_time)), end_time=timedelta(seconds=float(end_time)), content=content, speaker=speaker) markers.append(marker) else: speakers = [] items_segments = more_itertools.split_when( segments, lambda x, y: x['alternatives'][0]['content'] in ['.', '?', '!'], ) for index, item in enumerate(items_segments): start_time = timedelta(seconds=float(item[0]['start_time'])) end_time = timedelta(seconds=float(item[-2]['end_time'])) content = '' for word_block in item: if word_block['type'] == 'punctuation': content += word_block['alternatives'][0]['content'] else: content += " " + word_block['alternatives'][0][ 'content'] marker = Marker(start_time=start_time, end_time=end_time, content=content) markers.append(marker) # add alternatives alternatives = [] for item in segments: if item['type'] == 'pronunciation': for alt in item['alternatives']: alternatives.append( Alternative( start_time=item['start_time'], content=alt['content'], confidence=alt['confidence'], tag='orignal', _type='pronunciation', )) self.base_text = transcription['results']['transcripts'][0][ 'transcript'] self.transcription = transcription self.speakers = speakers self.markers = markers self.alternatives = alternatives
def groups(gap=timedelta(hours=3)): vit = entries() from more_itertools import split_when yield from split_when(vit, lambda a, b: (b.dt - a.dt) > gap)
def from_json(cls, transcription) -> Job: """Create a Job Object when given an Amazon JSON Object""" markers = [] segments = transcription['results']['items'] if 'speaker_labels' in transcription['results']: labels = transcription['results']['speaker_labels'] speakers = [add_speaker(x) for x in range(labels['speakers'])] for segment in labels['segments']: start_time = segment['start_time'] end_time = segment['end_time'] speaker = [ x for x in speakers if x.base_name == segment['speaker_label'] ][0] content = text_in_range( segments, start_time=start_time, end_time=end_time, ) marker = Marker( start_time=timedelta(seconds=float(start_time)), end_time=timedelta(seconds=float(end_time)), content=content, speaker=speaker) markers.append(marker) else: speakers = [] items_segments = more_itertools.split_when( segments, lambda x, y: x['alternatives'][0]['content'] in ['.', '?', '!'], ) for index, item in enumerate(items_segments): start_time = timedelta(seconds=float(item[0]['start_time'])) end_time = timedelta(seconds=float(item[-2]['end_time'])) content = '' for word_block in item: if word_block['type'] == 'punctuation': content += word_block['alternatives'][0]['content'] else: content += " " + word_block['alternatives'][0][ 'content'] marker = Marker(start_time=start_time, end_time=end_time, content=content) markers.append(marker) # add alternatives alternatives = [] for item in segments: if item['type'] == 'pronunciation': for alt in item['alternatives']: alternatives.append( Alternative( start_time=item['start_time'], content=alt['content'], confidence=alt['confidence'], tag='orignal', _type='pronunciation', )) return cls( base_text=transcription['results']['transcripts'][0]['transcript'], key=transcription['jobName'], transcription=transcription, speakers=speakers, markers=markers, alternatives=alternatives, )