Exemplo n.º 1
0
 def test_get_filelist_10000_files_in_2_layers_of_subfolders_99_files_each(
         self):
     """List the files of a directory with 10000 files inside it, spread across 2 layer of subfolders of 99 files each.
     Expected output: list with 10000 strings, equal to the filepaths of the temporary files created
     """
     files_created = []
     # Python 2.x
     if sys.version_info[0] < 3:
         # Work in a temporary directory
         indir = tempfile.mkdtemp()
         # Create ten temporary subdirectories inside the temporary directory
         for _ in range(10):
             subdir = tempfile.mkdtemp(dir=indir)
             # Create ten temporary files inside each temporary subdirectory
             for _ in range(10):
                 file_handler = tempfile.NamedTemporaryFile(dir=subdir,
                                                            delete=False)
                 files_created.append(file_handler.name)
             # Create ten temporary subsubdirectories
             for _ in range(10):
                 subsubdir = tempfile.mkdtemp(dir=subdir)
                 # Create 99 temporary files inside each temporary subsubdirectory
                 for _ in range(99):
                     file_handler = tempfile.NamedTemporaryFile(
                         dir=subsubdir, delete=False)
                     files_created.append(file_handler.name)
         filelist = fs.get_filelist(indir)
         # Remove the temporary files manually as a precaution
         for filepath in files_created:
             os.remove(filepath)
         # Remove the temporary directory (mdktemp must be manually deleted)
         # shutil.rmtree is used instead of os.remove to avoid OSError
         shutil.rmtree(indir, ignore_errors=True)
     # Python 3.x
     else:
         # Work in a temporary directory
         with tempfile.TemporaryDirectory() as indir:
             # Create ten temporary subdirectories inside the temporary directory
             for _ in range(10):
                 subdir = tempfile.mkdtemp(dir=indir)
                 # Create ten temporary files inside each temporary subdirectory
                 for _ in range(10):
                     file_handler = tempfile.NamedTemporaryFile(
                         dir=subdir, delete=False)
                     files_created.append(file_handler.name)
                 # Create ten temporary subsubdirectories
                 for _ in range(10):
                     subsubdir = tempfile.mkdtemp(dir=subdir)
                     # Create 99 temporary files inside each temporary subsubdirectory
                     for _ in range(99):
                         file_handler = tempfile.NamedTemporaryFile(
                             dir=subsubdir, delete=False)
                         files_created.append(file_handler.name)
             filelist = fs.get_filelist(indir)
             # Remove the temporary file manually as a precaution
             # (the with statement automatically deletes a TemporaryDirectory type of folder)
             for filepath in files_created:
                 os.remove(filepath)
     # Typecast both lists to sets to make an unordered comparison
     self.assertEqual(set(files_created), set(filelist))
Exemplo n.º 2
0
 def test_get_filelist_1_file_in_folder(self):
     """List the files of a directory with only one file inside it.
     Expected output: list with one string, equal to the filepath of the temporary file created
     """
     files_created = []
     # Python 2.x
     if sys.version_info[0] < 3:
         # Work in a temporary directory
         indir = tempfile.mkdtemp()
         # delete is set to False to avoid OSError at the end of the test
         file1 = tempfile.NamedTemporaryFile(dir=indir, delete=False)
         files_created.append(file1.name)
         filelist = fs.get_filelist(indir)
         # Remove the temporary file manually as a precaution
         os.remove(file1.name)
         # Remove the temporary directory (mdktemp must be manually deleted)
         # shutil.rmtree is used instead of os.remove to avoid OSError
         shutil.rmtree(indir, ignore_errors=True)
     # Python 3.x
     else:
         # Work in a temporary directory
         with tempfile.TemporaryDirectory() as indir:
             # delete is set to False to avoid FileNotFoundError at the end of the test
             file1 = tempfile.NamedTemporaryFile(dir=indir, delete=False)
             files_created.append(file1.name)
             filelist = fs.get_filelist(indir)
             # Remove the temporary file manually as a precaution
             # (the with statement automatically deletes the folder)
             os.remove(file1.name)
     self.assertEqual(files_created, filelist)
Exemplo n.º 3
0
 def test_get_filelist_empty_folder(self):
     """List the files of an empty directory.
     It is necessary to make a temporary directory inside the folder obtained via tempfile.gettempdir(),
     otherwise files that are already in the temp folder (created by other programs in the system) will pollute
     the test and fill the list.
     Expected output: empty list []
     """
     # Python 2.x
     if sys.version_info[0] < 3:
         # Work in a temporary directory
         indir = tempfile.mkdtemp()
         filelist = fs.get_filelist(indir)
         # Remove the temporary directory (mdktemp must be manually deleted)
         # shutil.rmtree is used instead of os.remove to avoid OSError
         shutil.rmtree(indir, ignore_errors=True)
     # Python 3.x
     else:
         # Work in a temporary directory
         with tempfile.TemporaryDirectory() as indir:
             filelist = fs.get_filelist(indir)
     # Empty lists evaluate to false
     self.assertFalse(filelist)
Exemplo n.º 4
0
 def test_get_filelist_1000_files_in_folder(self):
     """List the files of a directory with 1000 files inside it.
     Expected output: list with 1000 strings, equal to the filepaths of the temporary files created
     """
     files_created = []
     # Python 2.x
     if sys.version_info[0] < 3:
         # Work in a temporary directory
         indir = tempfile.mkdtemp()
         for _ in range(1000):
             # delete is set to False to avoid OSError at the end of the test
             file_handler = tempfile.NamedTemporaryFile(dir=indir,
                                                        delete=False)
             files_created.append(file_handler.name)
         filelist = fs.get_filelist(indir)
         # Remove the temporary files manually as a precaution
         for filepath in files_created:
             os.remove(filepath)
         # Remove the temporary directory (mdktemp must be manually deleted)
         # shutil.rmtree is used instead of os.remove to avoid OSError
         shutil.rmtree(indir, ignore_errors=True)
     # Python 3.x
     else:
         # Work in a temporary directory
         with tempfile.TemporaryDirectory() as indir:
             for _ in range(1000):
                 # delete is set to False to avoid OSError at the end of the test
                 file_handler = tempfile.NamedTemporaryFile(dir=indir,
                                                            delete=False)
                 files_created.append(file_handler.name)
             filelist = fs.get_filelist(indir)
             # Remove the temporary file manually as a precaution
             # (the with statement automatically deletes the folder)
             for filepath in files_created:
                 os.remove(filepath)
     # Typecast both lists to sets to make an unordered comparison
     self.assertEqual(set(files_created), set(filelist))
Exemplo n.º 5
0
def main():
    # Get CLI arguments
    args = cli.parse_arguments()

    # If input is a file
    if os.path.isfile(args.input):

        # Set IO variables
        infile = args.input
        indir = os.path.dirname(infile)
        outdir = fs.set_outdir(args.output, indir)

        # Extract text
        text = txt.get_text(infile, disable_no_ext_prompt=args.noprompt)

        # If text has been extracted successfully (and infile was not empty)
        if text:
            outfile = fs.compose_unique_filepath(infile, outdir)
            try:
                fs.write_str_to_file(text, outfile)
            except OSError as e:
                print(e)

    # If input is a folder
    elif os.path.isdir(args.input):

        # Set IO variables
        indir = args.input
        outdir = fs.set_outdir(args.output, indir, input_isdir=True)

        filelist = fs.get_filelist(indir)

        for infile in filelist:
            text = txt.get_text(infile, disable_no_ext_prompt=args.noprompt)
            if text:
                outfile = fs.compose_unique_filepath(infile, outdir)
                try:
                    fs.write_str_to_file(text, outfile)
                except OSError as e:
                    print(e)

    else:
        exit("Error: input must be an existing file or directory")