Exemplo n.º 1
0
def get_files(path, dt_from, dt_to):
    """
    Get list of files under a path that were last modified between the two
    given timestamps.

    Parameters
    ----------
    path : str
        The file path in which to search for files
    dt_from : datetime.datetime
        The starting timestamp that will be used to determine which files go
        in this record
    dt_to : datetime.datetime
        The ending timestamp used to determine the last point in time for
        which files should be associated with this record

    Returns
    -------
    files : :obj:`list` of :obj:`str`
        A list of the files that have modification times within the
        time range provided (sorted by modification time)
    """
    _logger.info(f'Starting new file-finding in {path}')
    try:
        files = _gnu_find_files(path, dt_from, dt_to, _ext.keys())
    except (NotImplementedError, RuntimeError) as e:
        _logger.warning(f'GNU find returned error: {e}\nFalling back to pure '
                        f'Python implementation')
        files = _find_files(path, dt_from, dt_to)
    return files
Exemplo n.º 2
0
    def test_gnu_find(self, fix_mountain_time):
        files = gnu_find_files_by_mtime(
            os.path.join(os.environ["mmfnexus_path"], "Titan"),
            dt_from=datetime.fromisoformat("2018-11-13T13:00:00.000"),
            dt_to=datetime.fromisoformat("2018-11-13T16:00:00.000"),
            extensions=_ext.keys())

        assert len(files) == 37
Exemplo n.º 3
0
 def test_gnu_find_stderr(self):
     with pytest.raises(RuntimeError) as e:
         # bad path should cause output to stderr, which should raise error
         files = gnu_find_files_by_mtime(
             '...............',
             dt_from=datetime.fromisoformat("2019-11-06T15:00:00.000"),
             dt_to=datetime.fromisoformat("2019-11-06T18:00:00.000"),
             extensions=_ext.keys())
     assert '...............' in str(e.value)
Exemplo n.º 4
0
    def test_gnu_find_not_implemented(self, monkeypatch):
        monkeypatch.setattr(sys, 'platform', 'win32')

        with pytest.raises(NotImplementedError):
            files = gnu_find_files_by_mtime(
                os.path.join(os.environ["mmfnexus_path"], "643Titan"),
                dt_from=datetime.fromisoformat("2019-11-06T15:00:00.000"),
                dt_to=datetime.fromisoformat("2019-11-06T18:00:00.000"),
                extensions=_ext.keys())
Exemplo n.º 5
0
    def test_gnu_find_not_on_path(self, monkeypatch):
        monkeypatch.setenv('PATH', '.')

        with pytest.raises(RuntimeError) as e:
            files = gnu_find_files_by_mtime(
                os.path.join(os.environ["mmfnexus_path"], "643Titan"),
                dt_from=datetime.fromisoformat("2019-11-06T15:00:00.000"),
                dt_to=datetime.fromisoformat("2019-11-06T18:00:00.000"),
                extensions=_ext.keys())
        assert str(e.value) == 'find command was not found on the system PATH'
Exemplo n.º 6
0
    def test_gnu_and_pure_find_together(self):
        # both file-finding methods should return the same list (when sorted
        # by mtime) for the same path and date range
        path = os.path.join(os.environ["mmfnexus_path"], "JEOL3010")
        dt_from = datetime.fromisoformat("2019-07-24T11:00:00.000")
        dt_to = datetime.fromisoformat("2019-07-24T16:00:00.000")
        gnu_files = gnu_find_files_by_mtime(path,
                                            dt_from=dt_from,
                                            dt_to=dt_to,
                                            extensions=_ext.keys())
        find_files = find_files_by_mtime(path, dt_from=dt_from, dt_to=dt_to)

        gnu_files = sorted(gnu_files)
        find_files = sorted(find_files)

        assert len(gnu_files) == 55
        assert len(find_files) == 55
        assert gnu_files == find_files
Exemplo n.º 7
0
def build_acq_activities(instrument, dt_from, dt_to, sample_id,
                         generate_previews):
    """
    Build an XML string representation of each AcquisitionActivity for a
    single microscopy session. This includes setup parameters and metadata
    associated with each dataset obtained during a microscopy session. Unique
    AcquisitionActivities are delimited via clustering of file collection
    time to detect "long" breaks during a session.

    Parameters
    ----------
    instrument : :py:class:`~nexusLIMS.instruments.Instrument`
        One of the NexusLIMS instruments contained in the
        :py:attr:`~nexusLIMS.instruments.instrument_db` database.
        Controls what instrument calendar is used to get events.
    dt_from : datetime.datetime
        The starting timestamp that will be used to determine which files go
        in this record
    dt_to : datetime.datetime
        The ending timestamp used to determine the last point in time for
        which files should be associated with this record
    sample_id : str
        An identifier for the sample from which data was collected
    generate_previews : bool
        Whether or not to create the preview thumbnail images

    Returns
    -------
    acq_activities : str
        A string representing the XML output for each AcquisitionActivity
        associated with a given reservation/experiment on a microscope.

    activities : :obj:`list` of :obj:`~nexusLIMS.schemas.activity.AcquisitionActivity`:
        The list of :py:class:`~nexusLIMS.schemas.activity.AcquisitionActivity`
        objects generated for the record
    """
    _logging.getLogger('hyperspy.io_plugins.digital_micrograph').setLevel(
        _logging.WARNING)

    start_timer = _timer()
    path = _os.path.abspath(
        _os.path.join(_os.environ['mmfnexus_path'], instrument.filestore_path))
    # find the files to be included
    files = get_files(path, dt_from, dt_to)

    # remove all files but those supported by nexusLIMS.extractors
    files = [
        f for f in files if _os.path.splitext(f)[1].strip('.') in _ext.keys()
    ]

    end_timer = _timer()
    _logger.info(f'Found {len(files)} files in'
                 f' {end_timer - start_timer:.2f} seconds')

    # return a string indicating no files found if none were found
    if len(files) == 0:
        raise FileNotFoundError('No files found in this time range')

    # get the timestamp boundaries of acquisition activities
    aa_bounds = cluster_filelist_mtimes(files)

    # add the last file's modification time to the boundaries list to make
    # the loop below easier to process
    aa_bounds.append(_os.path.getmtime(files[-1]))

    activities = [None] * len(aa_bounds)

    i = 0
    aa_idx = 0
    while i < len(files):
        f = files[i]
        mtime = _os.path.getmtime(f)

        # check this file's mtime, if it is less than this iteration's value
        # in the AA bounds, then it belongs to this iteration's AA
        # if not, then we should move to the next activity
        if mtime <= aa_bounds[aa_idx]:
            # if current activity index is None, we need to start a new AA:
            if activities[aa_idx] is None:
                start_time = _datetime.fromtimestamp(mtime)
                activities[aa_idx] = _AcqAc(start=start_time)

            # add this file to the AA
            _logger.info(
                f'Adding file {i}/{len(files)} '
                f'{f.replace(_os.environ["mmfnexus_path"], "").strip("/")} '
                f'to activity {aa_idx}')
            activities[aa_idx].add_file(f, generate_previews)
            # assume this file is the last one in the activity (this will be
            # true on the last iteration where mtime is <= to the
            # aa_bounds value)
            activities[aa_idx].end = _datetime.fromtimestamp(mtime)
            i += 1
        else:
            # this file's mtime is after the boundary and is thus part of the
            # next activity, so increment AA counter and reprocess file (do
            # not increment i)
            aa_idx += 1

    acq_activities_str = ''
    _logger.info('Finished detecting activities')
    for i, a in enumerate(activities):
        # aa_logger = _logging.getLogger('nexusLIMS.schemas.activity')
        # aa_logger.setLevel(_logging.ERROR)
        _logger.info(f'Activity {i}: storing setup parameters')
        a.store_setup_params()
        _logger.info(f'Activity {i}: storing unique metadata values')
        a.store_unique_metadata()

        acq_activities_str += a.as_xml(i,
                                       sample_id,
                                       indent_level=1,
                                       print_xml=False)

    return acq_activities_str, activities