def get_remote_segment(base_dir, node, channel, quality, hour, missing_segment, logger, timeout=TIMEOUT): """Get a segment from a node. Fetches channel/quality/hour/missing_segment from node and puts it in base_dir/channel/quality/hour/missing_segment. If the segment already exists locally, this does not attempt to fetch it.""" path = os.path.join(base_dir, channel, quality, hour, missing_segment) # check to see if file was created since we listed the local segments to # avoid unnecessarily copying if os.path.exists(path): logging.debug('Skipping existing segment {}'.format(path)) return dir_name = os.path.dirname(path) date, duration, _ = os.path.basename(path).split('-', 2) temp_name = "-".join([date, duration, "temp", str(uuid.uuid4())]) temp_path = os.path.join(dir_name, "{}.ts".format(temp_name)) common.ensure_directory(temp_path) hash = hashlib.sha256() try: logging.debug('Fetching segment {} from {}'.format(path, node)) uri = '{}/segments/{}/{}/{}/{}'.format(node, channel, quality, hour, missing_segment) resp = requests.get(uri, stream=True, timeout=timeout, metric_name='get_remote_segment') resp.raise_for_status() with open(temp_path, 'w') as f: for chunk in resp.iter_content(8192): f.write(chunk) hash.update(chunk) filename_hash = common.parse_segment_path(missing_segment).hash if filename_hash != hash.digest(): logger.warn( 'Hash of segment {} does not match. Discarding segment'.format( missing_segment)) hash_mismatches.labels(remote=node, channel=channel, quality=quality, hour=hour).inc() os.remove(temp_path) return #try to get rid of the temp file if an exception is raised. except Exception: if os.path.exists(temp_path): os.remove(temp_path) raise logging.debug('Saving completed segment {} as {}'.format(temp_path, path)) common.rename(temp_path, path) segments_backfilled.labels(remote=node, channel=channel, quality=quality, hour=hour).inc() logger.info('Segment {}/{}/{} backfilled'.format(quality, hour, missing_segment))
def backfill(self): """Backfill from remote node. Backfill from node/channel/qualities to base_dir/channel/qualities for each hour in hours. """ for channel, quality in itertools.product(self.channels, self.qualities): for hour in list_hours(self.node, channel, quality, self.start): # since backfilling can take a long time, recheck whether this # hour is after the start if self.start is not None: if not isinstance(self.start, datetime.datetime): start_hour = datetime.datetime.utcnow( ) - datetime.timedelta(hours=self.start) else: start_hour = self.start if datetime.datetime.strptime(hour, HOUR_FMT) < start_hour: break self.logger.info('Backfilling {}/{}'.format(quality, hour)) local_segments = set( list_local_segments(self.base_dir, channel, quality, hour)) remote_segments = set( list_remote_segments(self.node, channel, quality, hour)) missing_segments = list(remote_segments - local_segments) # randomise the order of the segments to reduce the chance that # multiple workers request the same segment at the same time random.shuffle(missing_segments) pool = gevent.pool.Pool(self.download_concurrency) workers = [] for missing_segment in missing_segments: if self.stopping.is_set(): return path = os.path.join(channel, quality, hour, missing_segment) # test to see if file is a segment and get the segments start time try: segment = common.parse_segment_path(path) except ValueError as e: self.logger.warning('File {} invaid: {}'.format( path, e)) continue # Ignore temp segments as they may go away by the time we fetch them. if segment.type == "temp": self.logger.debug( 'Skipping {} as it is a temp segment'.format(path)) continue # to avoid getting in the downloader's way ignore segments # less than recent_cutoff old if datetime.datetime.utcnow( ) - segment.start < datetime.timedelta( seconds=self.recent_cutoff): self.logger.debug( 'Skipping {} as too recent'.format(path)) continue # start segment as soon as a pool slot opens up, then track it in workers workers.append( pool.spawn(get_remote_segment, self.base_dir, self.node, channel, quality, hour, missing_segment, self.logger)) # verify that all the workers succeeded. if any failed, raise the exception from # one of them arbitrarily. for worker in workers: worker.get() # re-raise error, if any self.logger.info('{} segments in {}/{} backfilled'.format( len(workers), quality, hour)) hours_backfilled.labels(remote=self.node, channel=channel, quality=quality).inc()
def run(self): """Loop over available hours for each quality, checking segment coverage.""" self.logger.info('Starting') while not self.stopping.is_set(): for quality in self.qualities: if self.stopping.is_set(): break path = os.path.join(self.base_dir, self.channel, quality) try: hours = [ name for name in os.listdir(path) if not name.startswith('.') ] except OSError as e: if e.errno == errno.ENOENT: self.logger.warning('{} does not exist'.format(path)) continue hours.sort() previous_hour_segments = None all_hour_holes = {} all_hour_partials = {} for hour in hours: if self.stopping.is_set(): break self.logger.info('Checking {}/{}'.format(quality, hour)) # based on common.segments.best_segments_by_start # but more complicated to capture more detailed metrics hour_path = os.path.join(self.base_dir, self.channel, quality, hour) try: segment_names = [ name for name in os.listdir(hour_path) if not name.startswith('.') ] except OSError as e: if e.errno == errno.ENOENT: self.logger.warning( 'Hour {} was deleted between finding it and processing it, ignoring' .format(hour)) continue segment_names.sort() parsed = [] bad_segment_count = 0 for name in segment_names: try: parsed.append( common.parse_segment_path( os.path.join(hour_path, name))) except ValueError: self.logger.warning( "Failed to parse segment: {!r}".format( os.path.join(hour_path, name)), exc_info=True) bad_segment_count += 1 full_segment_count = 0 partial_segment_count = 0 full_segment_duration = datetime.timedelta() partial_segment_duration = datetime.timedelta() full_overlaps = 0 full_overlap_duration = datetime.timedelta() partial_overlaps = 0 partial_overlap_duration = datetime.timedelta() best_segments = [] holes = [] editable_holes = [] previous = None previous_editable = None coverage = datetime.timedelta() editable_coverage = datetime.timedelta() only_partials = [] # loop over all start times # first select the best segment for a start time # then update coverage for start_time, segments in itertools.groupby( parsed, key=lambda segment: segment.start): full_segments = [] partial_segments = [] for segment in segments: if segment.type == 'full': full_segments.append(segment) full_segment_count += 1 full_segment_duration += segment.duration elif segment.type == 'partial': partial_segments.append(segment) partial_segment_count += 1 partial_segment_duration += segment.duration if full_segments: full_segments.sort( key=lambda segment: (segment.duration)) best_segment = full_segments[-1] for segment in full_segments[:-1]: full_overlaps += 1 full_overlap_duration += segment.duration for segment in partial_segments: partial_overlaps += 1 partial_overlap_duration += segment.duration elif partial_segments: partial_segments.sort(key=lambda segment: os.stat( segment.path).st_size) best_segment = partial_segments[-1] only_partials.append( (best_segment.start, best_segment.start + best_segment.duration)) for segment in partial_segments[:-1]: partial_overlaps += 1 partial_overlap_duration += segment.duration else: # ignore any start times with only temporary segments continue self.logger.debug(best_segment.path.split('/')[-1]) best_segments.append(best_segment) # now update coverage, overlaps and holes if previous is None: coverage += best_segment.duration editable_coverage += best_segment.duration previous_editable = best_segment else: previous_end = previous.start + previous.duration if segment.start < previous_end: if segment.type == 'full': full_overlaps += 1 full_overlap_duration += previous_end - segment.start else: partial_overlaps += 1 partial_overlap_duration += previous_end - segment.start coverage += segment.start - previous_end + segment.duration else: coverage += segment.duration editable_coverage += segment.duration if segment.start > previous_end: holes.append((previous_end, segment.start)) previous_editable_end = previous_editable.start + previous_editable.duration if segment.start > previous_editable_end: editable_holes.append( (previous_editable_end, segment.start)) previous_editable = best_segment previous = best_segment if best_segments: start = best_segments[0].start end = best_segments[-1].start + best_segments[ -1].duration hole_duration = end - start - coverage editable_hole_duration = end - start - editable_coverage hour_start = datetime.datetime.strptime(hour, HOUR_FMT) hour_end = hour_start + datetime.timedelta(hours=1) # handle the case when there is a hole between the last segment of the previous hour and the first of this if previous_hour_segments: last_segment = previous_hour_segments[-1] if best_segments[ 0].start > last_segment.start + last_segment.duration: holes.append((hour_start, start)) hole_duration += start - hour_start editable_holes.append((hour_start, start)) editable_hole_duration += start - hour_start # handle the case when there is a hole between the last segment and the end of the hour if not the last hour if hour != hours[-1] and end < hour_end: holes.append((end, hour_end)) hole_duration += hour_end - end editable_holes.append((end, hour_end)) editable_hole_duration += hour_end - end # update the large number of Prometheus guages segment_count_gauge.labels( channel=self.channel, quality=quality, hour=hour, type='full').set(full_segment_count) segment_count_gauge.labels( channel=self.channel, quality=quality, hour=hour, type='partial').set(partial_segment_count) segment_count_gauge.labels( channel=self.channel, quality=quality, hour=hour, type='bad').set(bad_segment_count) segment_duration_gauge.labels( channel=self.channel, quality=quality, hour=hour, type='full').set(full_segment_duration.total_seconds()) segment_duration_gauge.labels( channel=self.channel, quality=quality, hour=hour, type='partial').set( partial_segment_duration.total_seconds()) raw_coverage_gauge.labels(channel=self.channel, quality=quality, hour=hour).set( coverage.total_seconds()) editable_coverage_gauge.labels( channel=self.channel, quality=quality, hour=hour).set(editable_coverage.total_seconds()) raw_holes_gauge.labels(channel=self.channel, quality=quality, hour=hour).set(len(holes)) editable_holes_gauge.labels(channel=self.channel, quality=quality, hour=hour).set( len(editable_holes)) overlap_count_gauge.labels(channel=self.channel, quality=quality, hour=hour, type='full').set(full_overlaps) overlap_count_gauge.labels( channel=self.channel, quality=quality, hour=hour, type='partial').set(partial_overlaps) overlap_duration_gauge.labels( channel=self.channel, quality=quality, hour=hour, type='full').set(full_overlap_duration.total_seconds()) overlap_duration_gauge.labels( channel=self.channel, quality=quality, hour=hour, type='partial').set( partial_overlap_duration.total_seconds()) # log the same information if best_segments: self.logger.info( '{}/{}: Start: {} End: {} ({} s)'.format( quality, hour, start, end, (end - start).total_seconds())) self.logger.info( '{}/{}: {} full segments totalling {} s'.format( quality, hour, full_segment_count, full_segment_duration.total_seconds())) self.logger.info('{}/{}: {} bad segments'.format( quality, hour, bad_segment_count)) self.logger.info( '{}/{}: {} overlapping full segments totalling {} s' .format(quality, hour, full_overlaps, full_overlap_duration.total_seconds())) self.logger.info( '{}/{}: {} partial segments totalling {} s'.format( quality, hour, partial_segment_count, partial_segment_duration.total_seconds())) self.logger.info( '{}/{}: {} overlapping partial segments totalling {} s' .format(quality, hour, partial_overlaps, partial_overlap_duration.total_seconds())) self.logger.info( '{}/{}: raw coverage {} s, editable coverage {} s ' .format(quality, hour, coverage.total_seconds(), editable_coverage.total_seconds())) self.logger.info( '{}/{}: {} holes totalling {} s '.format( quality, hour, len(holes), hole_duration.total_seconds())) self.logger.info( '{}/{}: {} editable holes totalling {} s '.format( quality, hour, len(editable_holes), editable_hole_duration.total_seconds())) self.logger.info('Checking {}/{} complete'.format( quality, hour)) # add holes for the start and end hours for the # coverage map. do this after updating gauges and # logging as these aren't likely real holes, just the # start and end of the stream. if previous_hour_segments is None: holes.append((hour_start, start)) if hour == hours[-1]: holes.append((end, hour_end)) all_hour_holes[hour] = holes all_hour_partials[hour] = only_partials previous_hour_segments = best_segments else: self.logger.info('{}/{} is empty'.format( quality, hour)) self.create_coverage_map(quality, all_hour_holes, all_hour_partials) self.stopping.wait(common.jitter(self.CHECK_INTERVAL))