def GetFilesInDirectory( directory: pathlib.Path, exclude_patterns: typing.List[str]) -> typing.List[pathlib.Path]: """Recursively list all files in a directory. Returns relative paths of all files in a directory which do not match the exclude patterns. The list of exclude patterns supports UNIX style globbing. Args: directory: The path to the directory. exclude_patterns: A list of patterns to exclude. Returns: A list of paths. """ exclude_patterns = set(exclude_patterns + ALWAYS_EXCLUDE_PATTERNS) files = [] for path in sorted(fs.lsfiles(directory, recursive=True)): for pattern in exclude_patterns: if fnmatch.fnmatch(path, pattern): logging.info('- %s', path) break else: logging.info('+ %s', path) files.append(pathlib.Path(path)) return files
def ReadClassificationsToTable(output_dir: pathlib.Path) -> pd.DataFrame: rows = [] counters = {} for f in fs.lsfiles(output_dir, recursive=True, abspaths=True): path = pathlib.Path(f) result_class, testbed_num, opt = path.parts[-4:-1] t = testbed_num + opt if t not in counters: counters[t] = collections.defaultdict(int) counters[t][result_class] += 1 for t, result_classes in counters.items(): rows.append([ t, result_classes['bc'], result_classes['bto'], result_classes['abf'], result_classes['arc'], result_classes['awo'], sum(result_classes.values()), ]) rows = sorted(rows, key=lambda x: (int(x[0][:-1]), x[0][-1])) rows.append([ 'Total', len(fs.lsfiles(output_dir / 'bc', recursive=True)), len(fs.lsfiles(output_dir / 'bto', recursive=True)), len(fs.lsfiles(output_dir / 'abf', recursive=True)), len(fs.lsfiles(output_dir / 'arc', recursive=True)), len(fs.lsfiles(output_dir / 'awo', recursive=True)), len(fs.lsfiles(output_dir / 'pass', recursive=True)), ]) df = pd.DataFrame( rows, columns=['Testbed', 'bc', 'bto', 'abf', 'arc', 'awo', 'pass']) df['Total'] = df.sum(axis=1) return df
def test_lsfiles_recursive(): assert fs.lsfiles("labm8/data/test/testdir", recursive=True) == [ "a", "b", "c/e", "c/f/f/i", "c/f/h", "c/g", "d", ]
def main(argv): """Main entry point.""" if len(argv) > 1: unknown_args = ', '.join(argv[1:]) raise app.UsageError(f'Unknown arguments "{unknown_args}"') logging.info('Initializing datastore.') config = pathlib.Path(FLAGS.datastore) ds = datastore.DataStore.FromFile(config) output_dir = pathlib.Path(FLAGS.output_directory) # Make directories to write the classifications to. We use the same shorthand # classification names as in Table 2 of the paper: # # http://chriscummins.cc/pub/2018-issta.pdf (output_dir / 'bc').mkdir(parents=True, exist_ok=True) (output_dir / 'bto').mkdir(exist_ok=True) (output_dir / 'abf').mkdir(exist_ok=True) (output_dir / 'arc').mkdir(exist_ok=True) (output_dir / 'awo').mkdir(exist_ok=True) (output_dir / 'pass').mkdir(exist_ok=True) result_dirs = [ pathlib.Path(x) for x in FLAGS.input_directories if pathlib.Path(x).is_dir() ] results_paths = labtypes.flatten([ pathlib.Path(x) for x in fs.lsfiles(x, recursive=True, abspaths=True) ] for x in result_dirs) logging.info('Importing %d results into datastore ...', len(results_paths)) with ds.Session(commit=True) as s: for path in progressbar.ProgressBar()(results_paths): # Instantiating a result from file has the side effect of adding the # result object to the datastore's session. result.Result.FromFile(s, path) with ds.Session() as s: testcases = s.query(testcase.Testcase) logging.info('Difftesting the results of %d testcases ...', testcases.count()) for t in progressbar.ProgressBar( max_value=testcases.count())(testcases): DifftestTestcase(s, t, output_dir) df = ReadClassificationsToTable(output_dir) print() print( 'Table of results. For each testbed, this shows the number of results') print('of each class, using the same shortand as in Table 2 of the paper:') print('http://chriscummins.cc/pub/2018-issta.pdf') print() print(df.to_string(index=False)) print() print('Individual classified programs are written to: ' f"'{output_dir}/<class>/<device>/'")
def main(argv): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(' '.join( argv[1:]))) if not FLAGS.export_path: raise app.UsageError('--export_path must be a directory') export_path = pathlib.Path(FLAGS.export_path) if export_path.is_file(): raise app.UsageError('--export_path must be a directory') # Make a directory for each outcome class. for key in fish_pb2.CompilerCrashDiscriminatorTrainingExample.Outcome.keys( ): (export_path / key.lower()).mkdir(parents=True, exist_ok=True) logging.info('Connecting to MySQL database') credentials = GetMySqlCredentials() cnx = MySQLdb.connect(database='dsmith_04_opencl', host='cc1', user=credentials[0], password=credentials[1]) cursor = cnx.cursor() logging.info('Determining last export ID') ids = sorted([ int(pathlib.Path(f).stem) for f in fs.lsfiles(export_path, recursive=True, abspaths=True) ]) last_export_id = ids[-1] if ids else 0 logging.info('Exporting results from ID %s', last_export_id) ExportOpenCLResults(cursor, last_export_id, export_path) cursor.close() cnx.close() logging.info('Exported training set of %s files to %s', humanize.intcomma(len(list(export_path.iterdir()))), export_path)
def test_lsfiles_single_file(self): self._test(["a"], fs.lsfiles("tests/data/testdir/a"))
def test_lsfiles_bad_path(self): with self.assertRaises(OSError): fs.lsfiles("/not/a/real/path/bro")
def test_lsfiles_recursive(self): self._test(["a", "b", "c/e", "c/f/f/i", "c/f/h", "c/g", "d"], fs.lsfiles("tests/data/testdir", recursive=True))
def test_lsfiles(self): self._test(["a", "b", "d"], fs.lsfiles("tests/data/testdir"))
def test_lsfiles_single_file(): assert fs.lsfiles("labm8/data/test/testdir/a") == ["a"]
def test_lsfiles_bad_path(): with pytest.raises(OSError): fs.lsfiles("/not/a/real/path/bro")
def test_lsfiles(): assert fs.lsfiles("labm8/data/test/testdir") == ["a", "b", "d"]