def parse_metadata(metadatapath): _, ext = common.get_extension(metadatapath) accepted = common.config['accepted_formats'] if ext not in accepted: raise MetaDataException('"%s" is not an accepted format, convert to: %s' % ( metadatapath, ', '.join(accepted))) app = App() app.metadatapath = metadatapath app.id, _ = common.get_extension(os.path.basename(metadatapath)) with open(metadatapath, 'r') as mf: if ext == 'txt': parse_txt_metadata(mf, app) elif ext == 'json': parse_json_metadata(mf, app) elif ext == 'xml': parse_xml_metadata(mf, app) elif ext == 'yaml': parse_yaml_metadata(mf, app) else: raise MetaDataException('Unknown metadata format: %s' % metadatapath) post_metadata_parse(app) return app
def is_forbidden(self, request): """Returns whether this request is permitted by checking URL extension and regex XXX head request for mime? """ forbidden = False url = common.to_unicode(request.url().toString().toUtf8().data()).encode('utf-8') if common.get_extension(url) in self.banned_extensions: forbidden = True elif re.match(self.allowed_regex, url) is None: forbidden = True return forbidden
def is_forbidden(self, request): """Returns whether this request is permitted by checking URL extension and regex XXX head request for mime? """ forbidden = False url = common.to_unicode(request.url().url()) if common.get_extension(url) in self.banned_extensions: forbidden = True elif re.match(self.allowed_regex, url) is None: forbidden = True return forbidden
def get_default_app_info(metadatapath=None): if metadatapath is None: appid = None else: appid, _ = common.get_extension(os.path.basename(metadatapath)) app = App() app.metadatapath = metadatapath if appid is not None: app.id = appid return app
def save_as(self, url, filename=None, save_dir="images"): """Download url and save into disk. """ if url: _bytes = self.get(url, num_redirects=0) if _bytes: if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join( save_dir, filename or "%s.%s" % (hashlib.md5(url).hexdigest(), common.get_extension(url)) ) open(save_path, "wb").write(_bytes) return save_path
def save_as(self, url, filename=None, save_dir='images'): """Download url and save into disk. """ if url: _bytes = self.get(url, num_redirects=0) if _bytes: if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join( save_dir, filename or '%s.%s' % (hashlib.md5(url).hexdigest(), common.get_extension(url))) open(save_path, 'wb').write(_bytes) return save_path
def main(): global config, options # Parse command line... parser = ArgumentParser(usage="%(prog)s [options] [APPID [APPID ...]]") common.setup_global_opts(parser) parser.add_argument("-l", "--list", action="store_true", default=False, help="List files that would be reformatted") parser.add_argument("-t", "--to", default=None, help="Rewrite to a specific format") parser.add_argument("appid", nargs='*', help="app-id in the form APPID") options = parser.parse_args() config = common.read_config(options) # Get all apps... allapps = metadata.read_metadata(xref=True) apps = common.read_app_args(options.appid, allapps, False) if options.list and options.to is not None: parser.error("Cannot use --list and --to at the same time") supported = ['txt', 'yaml'] if options.to is not None and options.to not in supported: parser.error("Must give a valid format to --to") for appid, app in apps.iteritems(): base, ext = common.get_extension(app.metadatapath) if not options.to and ext not in supported: logging.info("Ignoring %s file at '%s'" % (ext, app.metadatapath)) continue to_ext = ext if options.to is not None: to_ext = options.to if options.list: if not proper_format(app): print app.metadatapath continue with open(base + '.' + to_ext, 'w') as f: metadata.write_metadata(to_ext, f, app) if ext != to_ext: os.remove(app.metadatapath) logging.debug("Finished.")
def valid(link): """Check if should crawl this link """ # check if a media file if common.get_extension(link) not in common.MEDIA_EXTENSIONS: # check if a proper HTTP link if link.lower().startswith('http'): # only crawl within website if common.same_domain(domain, link): # passes regex if self.allowed_urls.match(link) and not self.banned_urls.match(link): # not blocked by robots.txt if not self.robots or self.robots.can_fetch(settings.user_agent, link): # allowed to recrawl if self.crawl_existing or (D.cache and link not in D.cache): return True return False
def save_as(self, url, filename=None, save_dir='images'): """Download url and save to disk url: the webpage to download filename: Output file to save to. If not set then will save to file based on URL """ _bytes = self.get(url, num_redirects=0) if _bytes: if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join( save_dir, filename or '%s.%s' % (hashlib.md5(url).hexdigest(), common.get_extension(url))) open(save_path, 'wb').write(_bytes) return save_path
def demux(self): if self._write_chapters: with open(self._chapters_output_path, "w") as output_file: output_file.write(chapters.format_ogm_chapters(self.chapters)) if self._make_keyframes: SCXviD.make_keyframes(self._path, self._keyframes_output_path) ffargs = {} if self._demux_audio: ffargs['audio_stream'] = self._audio_stream.id ffargs['audio_path'] = self._audio_output_path ffargs['audio_rate'] = self._audio_sample_rate if self._demux_subs: ffargs['script_stream'] = self._script_stream.id ffargs['script_path'] = self._script_output_path if self._make_timecodes: def set_ffmpeg_timecodes(): ffargs['video_stream'] = self._mi.video[0].id ffargs['timecodes_path'] = self._timecodes_output_path if get_extension(self._path).lower() == '.mkv': try: MkvToolnix.extract_timecodes( self._path, stream_idx=self._mi.video[0].id, output_path=self._timecodes_output_path) except OSError as e: if e.errno == 2: set_ffmpeg_timecodes() else: raise else: set_ffmpeg_timecodes() if ffargs: FFmpeg.demux_file(self._path, **ffargs)
def save_as(self, url, filename=None, save_dir='images', override=False): """Download url and save to disk if does not already exist url: the webpage to download filename: output file to save to if not set then will save to file based on URL override: whether to download if output file already exists """ save_path = os.path.join( save_dir, filename or '%s.%s' % (hashlib.md5(url).hexdigest(), common.get_extension(url))) if not os.path.exists(save_path) or override: # need to download _bytes = self.get(url, num_redirects=0, write_cache=False) if _bytes: if not os.path.exists(save_dir): os.makedirs(save_dir) open(save_path, 'wb').write(_bytes) else: return None return save_path
def demux(self): if self._write_chapters: with open(self._chapters_output_path, "w") as output_file: output_file.write(chapters.format_ogm_chapters(self.chapters)) if self._make_keyframes: SCXviD.make_keyframes(self._path, self._keyframes_output_path) ffargs = {} if self._demux_audio: ffargs['audio_stream'] = self._audio_stream.id ffargs['audio_path'] = self._audio_output_path ffargs['audio_rate'] = self._audio_sample_rate if self._demux_subs: ffargs['script_stream'] = self._script_stream.id ffargs['script_path'] = self._script_output_path if self._make_timecodes: def set_ffmpeg_timecodes(): ffargs['video_stream'] = self._mi.video[0].id ffargs['timecodes_path'] = self._timecodes_output_path if get_extension(self._path).lower() == '.mkv': try: MkvToolnix.extract_timecodes(self._path, stream_idx=self._mi.video[0].id, output_path=self._timecodes_output_path) except OSError as e: if e.errno == 2: set_ffmpeg_timecodes() else: raise else: set_ffmpeg_timecodes() if ffargs: FFmpeg.demux_file(self._path, **ffargs)
def for_paravis(self): """Check if file object can be viewed in an editor.""" return self.valid and not self.is_reference and self.exists \ and get_extension(self.filename) in ("med", "rmed", "mmed")
def save_as(self, url, filename=None, save_dir='images'): """Download url and save to disk url: the webpage to download filename: Output file to save to. If not set then will save to file based on URL """ _bytes = self.get(url, num_redirects=0) if _bytes: if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, filename or '%s.%s' % (hashlib.md5(url).hexdigest(), common.get_extension(url))) open(save_path, 'wb').write(_bytes) return save_path
def __init__(self, path): super(Demuxer, self).__init__() self._path = path self._is_wav = get_extension(self._path) == '.wav' self._mi = None if self._is_wav else FFmpeg.get_media_info(self._path) self._demux_audio = self._demux_subs = self._make_timecodes = self._make_keyframes = self._write_chapters = False
def run(args): ignore_chapters = args.chapters_file is not None and args.chapters_file.lower() == 'none' write_plot = plot_enabled and args.plot_path if write_plot: plt.clf() plt.ylabel('Shift, seconds') plt.xlabel('Event index') # first part should do all possible validation and should NOT take significant amount of time check_file_exists(args.source, 'Source') check_file_exists(args.destination, 'Destination') check_file_exists(args.src_timecodes, 'Source timecodes') check_file_exists(args.dst_timecodes, 'Source timecodes') check_file_exists(args.script_file, 'Script') if not ignore_chapters: check_file_exists(args.chapters_file, 'Chapters') if args.src_keyframes not in ('auto', 'make'): check_file_exists(args.src_keyframes, 'Source keyframes') if args.dst_keyframes not in ('auto', 'make'): check_file_exists(args.dst_keyframes, 'Destination keyframes') if (args.src_timecodes and args.src_fps) or (args.dst_timecodes and args.dst_fps): raise SushiError('Both fps and timecodes file cannot be specified at the same time') src_demuxer = Demuxer(args.source) dst_demuxer = Demuxer(args.destination) if src_demuxer.is_wav and not args.script_file: raise SushiError("Script file isn't specified") if (args.src_keyframes and not args.dst_keyframes) or (args.dst_keyframes and not args.src_keyframes): raise SushiError('Either none or both of src and dst keyframes should be provided') create_directory_if_not_exists(args.temp_dir) # selecting source audio if src_demuxer.is_wav: src_audio_path = args.source else: src_audio_path = format_full_path(args.temp_dir, args.source, '.sushi.wav') src_demuxer.set_audio(stream_idx=args.src_audio_idx, output_path=src_audio_path, sample_rate=args.sample_rate) # selecting destination audio if dst_demuxer.is_wav: dst_audio_path = args.destination else: dst_audio_path = format_full_path(args.temp_dir, args.destination, '.sushi.wav') dst_demuxer.set_audio(stream_idx=args.dst_audio_idx, output_path=dst_audio_path, sample_rate=args.sample_rate) # selecting source subtitles if args.script_file: src_script_path = args.script_file else: stype = src_demuxer.get_subs_type(args.src_script_idx) src_script_path = format_full_path(args.temp_dir, args.source, '.sushi'+ stype) src_demuxer.set_script(stream_idx=args.src_script_idx, output_path=src_script_path) script_extension = get_extension(src_script_path) if script_extension not in ('.ass', '.srt'): raise SushiError('Unknown script type') # selection destination subtitles if args.output_script: dst_script_path = args.output_script dst_script_extension = get_extension(args.output_script) if dst_script_extension != script_extension: raise SushiError("Source and destination script file types don't match ({0} vs {1})" .format(script_extension, dst_script_extension)) else: dst_script_path = format_full_path(args.temp_dir, args.destination, '.sushi' + script_extension) # selecting chapters if args.grouping and not ignore_chapters: if args.chapters_file: if get_extension(args.chapters_file) == '.xml': chapter_times = chapters.get_xml_start_times(args.chapters_file) else: chapter_times = chapters.get_ogm_start_times(args.chapters_file) elif not src_demuxer.is_wav: chapter_times = src_demuxer.chapters output_path = format_full_path(args.temp_dir, src_demuxer.path, ".sushi.chapters.txt") src_demuxer.set_chapters(output_path) else: chapter_times = [] else: chapter_times = [] # selecting keyframes and timecodes if args.src_keyframes: def select_keyframes(file_arg, demuxer): auto_file = format_full_path(args.temp_dir, demuxer.path, '.sushi.keyframes.txt') if file_arg in ('auto', 'make'): if file_arg == 'make' or not os.path.exists(auto_file): if not demuxer.has_video: raise SushiError("Cannot make keyframes for {0} because it doesn't have any video!" .format(demuxer.path)) demuxer.set_keyframes(output_path=auto_file) return auto_file else: return file_arg def select_timecodes(external_file, fps_arg, demuxer): if external_file: return external_file elif fps_arg: return None elif demuxer.has_video: path = format_full_path(args.temp_dir, demuxer.path, '.sushi.timecodes.txt') demuxer.set_timecodes(output_path=path) return path else: raise SushiError('Fps, timecodes or video files must be provided if keyframes are used') src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer) dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer) src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps, src_demuxer) dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps, dst_demuxer) # after this point nothing should fail so it's safe to start slow operations # like running the actual demuxing src_demuxer.demux() dst_demuxer.demux() try: if args.src_keyframes: src_timecodes = Timecodes.cfr(args.src_fps) if args.src_fps else Timecodes.from_file(src_timecodes_file) src_keytimes = [src_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(src_keyframes_file)] dst_timecodes = Timecodes.cfr(args.dst_fps) if args.dst_fps else Timecodes.from_file(dst_timecodes_file) dst_keytimes = [dst_timecodes.get_frame_time(f) for f in keyframes.parse_keyframes(dst_keyframes_file)] script = AssScript.from_file(src_script_path) if script_extension == '.ass' else SrtScript.from_file(src_script_path) script.sort_by_time() src_stream = WavStream(src_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) dst_stream = WavStream(dst_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) search_groups = prepare_search_groups(script.events, source_duration=src_stream.duration_seconds, chapter_times=chapter_times, max_ts_duration=args.max_ts_duration, max_ts_distance=args.max_ts_distance) calculate_shifts(src_stream, dst_stream, search_groups, normal_window=args.window, max_window=args.max_window, rewind_thresh=args.rewind_thresh if args.grouping else 0) events = script.events if write_plot: plt.plot([x.shift for x in events], label='From audio') if args.grouping: if not ignore_chapters and chapter_times: groups = groups_from_chapters(events, chapter_times) for g in groups: fix_near_borders(g) smooth_events([x for x in g if not x.linked], args.smooth_radius) groups = split_broken_groups(groups) else: fix_near_borders(events) smooth_events([x for x in events if not x.linked], args.smooth_radius) groups = detect_groups(events) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') for g in groups: start_shift = g[0].shift end_shift = g[-1].shift avg_shift = average_shifts(g) logging.info(u'Group (start: {0}, end: {1}, lines: {2}), ' u'shifts (start: {3}, end: {4}, average: {5})' .format(format_time(g[0].start), format_time(g[-1].end), len(g), start_shift, end_shift, avg_shift)) if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() for g in groups: snap_groups_to_keyframes(g, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) else: fix_near_borders(events) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() snap_groups_to_keyframes(events, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) for event in events: event.apply_shift() script.save_to_file(dst_script_path) if write_plot: plt.plot([x.shift + (x._start_shift + x._end_shift)/2.0 for x in events], label='After correction') plt.legend(fontsize=5, frameon=False, fancybox=False) plt.savefig(args.plot_path, dpi=300) finally: if args.cleanup: src_demuxer.cleanup() dst_demuxer.cleanup()
def run(args): ignore_chapters = args.chapters_file is not None and args.chapters_file.lower( ) == 'none' write_plot = plot_enabled and args.plot_path if write_plot: plt.clf() plt.ylabel('Shift, seconds') plt.xlabel('Event index') # first part should do all possible validation and should NOT take significant amount of time check_file_exists(args.source, 'Source') check_file_exists(args.destination, 'Destination') check_file_exists(args.src_timecodes, 'Source timecodes') check_file_exists(args.dst_timecodes, 'Source timecodes') check_file_exists(args.script_file, 'Script') if not ignore_chapters: check_file_exists(args.chapters_file, 'Chapters') if args.src_keyframes not in ('auto', 'make'): check_file_exists(args.src_keyframes, 'Source keyframes') if args.dst_keyframes not in ('auto', 'make'): check_file_exists(args.dst_keyframes, 'Destination keyframes') if (args.src_timecodes and args.src_fps) or (args.dst_timecodes and args.dst_fps): raise SushiError( 'Both fps and timecodes file cannot be specified at the same time') src_demuxer = Demuxer(args.source) dst_demuxer = Demuxer(args.destination) if src_demuxer.is_wav and not args.script_file: raise SushiError("Script file isn't specified") if (args.src_keyframes and not args.dst_keyframes) or (args.dst_keyframes and not args.src_keyframes): raise SushiError( 'Either none or both of src and dst keyframes should be provided') create_directory_if_not_exists(args.temp_dir) # selecting source audio if src_demuxer.is_wav: src_audio_path = args.source else: src_audio_path = format_full_path(args.temp_dir, args.source, '.sushi.wav') src_demuxer.set_audio(stream_idx=args.src_audio_idx, output_path=src_audio_path, sample_rate=args.sample_rate) # selecting destination audio if dst_demuxer.is_wav: dst_audio_path = args.destination else: dst_audio_path = format_full_path(args.temp_dir, args.destination, '.sushi.wav') dst_demuxer.set_audio(stream_idx=args.dst_audio_idx, output_path=dst_audio_path, sample_rate=args.sample_rate) # selecting source subtitles if args.script_file: src_script_path = args.script_file else: stype = src_demuxer.get_subs_type(args.src_script_idx) src_script_path = format_full_path(args.temp_dir, args.source, '.sushi' + stype) src_demuxer.set_script(stream_idx=args.src_script_idx, output_path=src_script_path) script_extension = get_extension(src_script_path) if script_extension not in ('.ass', '.srt'): raise SushiError('Unknown script type') # selection destination subtitles if args.output_script: dst_script_path = args.output_script dst_script_extension = get_extension(args.output_script) if dst_script_extension != script_extension: raise SushiError( "Source and destination script file types don't match ({0} vs {1})" .format(script_extension, dst_script_extension)) else: dst_script_path = format_full_path(args.temp_dir, args.destination, '.sushi' + script_extension) # selecting chapters if args.grouping and not ignore_chapters: if args.chapters_file: if get_extension(args.chapters_file) == '.xml': chapter_times = chapters.get_xml_start_times( args.chapters_file) else: chapter_times = chapters.get_ogm_start_times( args.chapters_file) elif not src_demuxer.is_wav: chapter_times = src_demuxer.chapters output_path = format_full_path(args.temp_dir, src_demuxer.path, ".sushi.chapters.txt") src_demuxer.set_chapters(output_path) else: chapter_times = [] else: chapter_times = [] # selecting keyframes and timecodes if args.src_keyframes: def select_keyframes(file_arg, demuxer): auto_file = format_full_path(args.temp_dir, demuxer.path, '.sushi.keyframes.txt') if file_arg in ('auto', 'make'): if file_arg == 'make' or not os.path.exists(auto_file): if not demuxer.has_video: raise SushiError( "Cannot make keyframes for {0} because it doesn't have any video!" .format(demuxer.path)) demuxer.set_keyframes(output_path=auto_file) return auto_file else: return file_arg def select_timecodes(external_file, fps_arg, demuxer): if external_file: return external_file elif fps_arg: return None elif demuxer.has_video: path = format_full_path(args.temp_dir, demuxer.path, '.sushi.timecodes.txt') demuxer.set_timecodes(output_path=path) return path else: raise SushiError( 'Fps, timecodes or video files must be provided if keyframes are used' ) src_keyframes_file = select_keyframes(args.src_keyframes, src_demuxer) dst_keyframes_file = select_keyframes(args.dst_keyframes, dst_demuxer) src_timecodes_file = select_timecodes(args.src_timecodes, args.src_fps, src_demuxer) dst_timecodes_file = select_timecodes(args.dst_timecodes, args.dst_fps, dst_demuxer) # after this point nothing should fail so it's safe to start slow operations # like running the actual demuxing src_demuxer.demux() dst_demuxer.demux() try: if args.src_keyframes: src_timecodes = Timecodes.cfr( args.src_fps) if args.src_fps else Timecodes.from_file( src_timecodes_file) src_keytimes = [ src_timecodes.get_frame_time(f) for f in parse_keyframes(src_keyframes_file) ] dst_timecodes = Timecodes.cfr( args.dst_fps) if args.dst_fps else Timecodes.from_file( dst_timecodes_file) dst_keytimes = [ dst_timecodes.get_frame_time(f) for f in parse_keyframes(dst_keyframes_file) ] script = AssScript.from_file( src_script_path ) if script_extension == '.ass' else SrtScript.from_file( src_script_path) script.sort_by_time() src_stream = WavStream(src_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) dst_stream = WavStream(dst_audio_path, sample_rate=args.sample_rate, sample_type=args.sample_type) calculate_shifts( src_stream, dst_stream, script.events, chapter_times=chapter_times, window=args.window, max_window=args.max_window, rewind_thresh=args.rewind_thresh if args.grouping else 0, max_ts_duration=args.max_ts_duration, max_ts_distance=args.max_ts_distance) events = script.events if write_plot: plt.plot([x.shift for x in events], label='From audio') if args.grouping: if not ignore_chapters and chapter_times: groups = groups_from_chapters(events, chapter_times) for g in groups: fix_near_borders(g) smooth_events([x for x in g if not x.linked], args.smooth_radius) groups = split_broken_groups(groups, args.min_group_size) else: fix_near_borders(events) smooth_events([x for x in events if not x.linked], args.smooth_radius) groups = detect_groups(events, args.min_group_size) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') for g in groups: start_shift = g[0].shift end_shift = g[-1].shift avg_shift = average_shifts(g) logging.info( u'Group (start: {0}, end: {1}, lines: {2}), ' u'shifts (start: {3}, end: {4}, average: {5})'.format( format_time(g[0].start), format_time(g[-1].end), len(g), start_shift, end_shift, avg_shift)) if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() for g in groups: snap_groups_to_keyframes( g, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) if args.write_avs: write_shift_avs(dst_script_path + '.avs', groups, src_audio_path, dst_audio_path) else: fix_near_borders(events) if write_plot: plt.plot([x.shift for x in events], label='Borders fixed') if args.src_keyframes: for e in (x for x in events if x.linked): e.resolve_link() snap_groups_to_keyframes(events, chapter_times, args.max_ts_duration, args.max_ts_distance, src_keytimes, dst_keytimes, src_timecodes, dst_timecodes, args.max_kf_distance, args.kf_mode) for event in events: event.apply_shift() script.save_to_file(dst_script_path) if write_plot: plt.plot([ x.shift + (x._start_shift + x._end_shift) / 2.0 for x in events ], label='After correction') plt.legend(fontsize=5, frameon=False, fancybox=False) plt.savefig(args.plot_path, dpi=300) finally: if args.cleanup: src_demuxer.cleanup() dst_demuxer.cleanup()
def scan_source(build_dir, root_dir, build): count = 0 # Common known non-free blobs (always lower case): usual_suspects = { exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [ r'flurryagent', r'paypal.*mpl', r'google.*analytics', r'admob.*sdk.*android', r'google.*ad.*view', r'google.*admob', r'google.*play.*services', r'crittercism', r'heyzap', r'jpct.*ae', r'youtube.*android.*player.*api', r'bugsense', r'crashlytics', r'ouya.*sdk', r'libspen23', ] } def suspects_found(s): for n, r in usual_suspects.iteritems(): if r.match(s): yield n gradle_mavenrepo = re.compile(r'maven *{ *(url)? *[\'"]?([^ \'"]*)[\'"]?') allowed_repos = [re.compile(r'^https?://' + re.escape(repo) + r'/*') for repo in [ 'repo1.maven.org/maven2', # mavenCentral() 'jcenter.bintray.com', # jcenter() 'jitpack.io', 'repo.maven.apache.org/maven2', 'oss.sonatype.org/content/repositories/snapshots', 'oss.sonatype.org/content/repositories/releases', 'oss.sonatype.org/content/groups/public', 'clojars.org/repo', # Clojure free software libs 's3.amazonaws.com/repo.commonsware.com', # CommonsWare 'plugins.gradle.org/m2', # Gradle plugin repo ] ] scanignore = common.getpaths_map(build_dir, build.scanignore) scandelete = common.getpaths_map(build_dir, build.scandelete) scanignore_worked = set() scandelete_worked = set() def toignore(fd): for k, paths in scanignore.iteritems(): for p in paths: if fd.startswith(p): scanignore_worked.add(k) return True return False def todelete(fd): for k, paths in scandelete.iteritems(): for p in paths: if fd.startswith(p): scandelete_worked.add(k) return True return False def ignoreproblem(what, fd, fp): logging.info('Ignoring %s at %s' % (what, fd)) return 0 def removeproblem(what, fd, fp): logging.info('Removing %s at %s' % (what, fd)) os.remove(fp) return 0 def warnproblem(what, fd): if toignore(fd): return logging.warn('Found %s at %s' % (what, fd)) def handleproblem(what, fd, fp): if toignore(fd): return ignoreproblem(what, fd, fp) if todelete(fd): return removeproblem(what, fd, fp) logging.error('Found %s at %s' % (what, fd)) return 1 def is_executable(path): return os.path.exists(path) and os.access(path, os.X_OK) textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f}) def is_binary(path): d = None with open(path, 'rb') as f: d = f.read(1024) return bool(d.translate(None, textchars)) # False positives patterns for files that are binary and executable. safe_paths = [re.compile(r) for r in [ r".*/drawable[^/]*/.*\.png$", # png drawables r".*/mipmap[^/]*/.*\.png$", # png mipmaps ] ] def safe_path(path): for sp in safe_paths: if sp.match(path): return True return False gradle_compile_commands = get_gradle_compile_commands(build) def is_used_by_gradle(line): return any(command.match(line) for command in gradle_compile_commands) # Iterate through all files in the source code for r, d, f in os.walk(build_dir, topdown=True): # It's topdown, so checking the basename is enough for ignoredir in ('.hg', '.git', '.svn', '.bzr'): if ignoredir in d: d.remove(ignoredir) for curfile in f: if curfile in ['.DS_Store']: continue # Path (relative) to the file fp = os.path.join(r, curfile) if os.path.islink(fp): continue fd = fp[len(build_dir) + 1:] _, ext = common.get_extension(fd) if ext == 'so': count += handleproblem('shared library', fd, fp) elif ext == 'a': count += handleproblem('static library', fd, fp) elif ext == 'class': count += handleproblem('Java compiled class', fd, fp) elif ext == 'apk': removeproblem('APK file', fd, fp) elif ext == 'jar': for name in suspects_found(curfile): count += handleproblem('usual supect \'%s\'' % name, fd, fp) warnproblem('JAR file', fd) elif ext == 'java': if not os.path.isfile(fp): continue for line in file(fp): if 'DexClassLoader' in line: count += handleproblem('DexClassLoader', fd, fp) break elif ext == 'gradle': if not os.path.isfile(fp): continue with open(fp, 'r') as f: lines = f.readlines() for i, line in enumerate(lines): if is_used_by_gradle(line): for name in suspects_found(line): count += handleproblem('usual supect \'%s\' at line %d' % (name, i + 1), fd, fp) noncomment_lines = [l for l in lines if not common.gradle_comment.match(l)] joined = re.sub(r'[\n\r\s]+', ' ', ' '.join(noncomment_lines)) for m in gradle_mavenrepo.finditer(joined): url = m.group(2) if not any(r.match(url) for r in allowed_repos): count += handleproblem('unknown maven repo \'%s\'' % url, fd, fp) elif ext in ['', 'bin', 'out', 'exe']: if is_binary(fp): count += handleproblem('binary', fd, fp) elif is_executable(fp): if is_binary(fp) and not safe_path(fd): warnproblem('possible binary', fd) for p in scanignore: if p not in scanignore_worked: logging.error('Unused scanignore path: %s' % p) count += 1 for p in scandelete: if p not in scandelete_worked: logging.error('Unused scandelete path: %s' % p) count += 1 return count