def parse_isatab(username, public, path, additional_raw_data_file_extension=None, isa_archive=None, pre_isa_archive=None, file_base_path=None):
    """
    Name: parse_isatab
    Description:
    parses in an ISA-TAB file to create database entries and creates or updates a dataset for the investigation to belong to; returns the dataset UUID or None if something went wrong. Use like this: parse_isatab(username, is_public, folder_name, additional_raw_data_file_extension, isa_archive=<path>, pre_isa_archive=<path>, file_base_path=<path>
    Parameters:
    username: username of the person the dataset will belong to
    public: boolean that determines if the dataset is public or not
    path: absolute path of the ISA-Tab file to parse
    additional_raw_data_file_extension: an optional argument that will append a suffix to items in Raw Data File as need be
    isa_archive: if you're passing a directory, a zipped version of the directory for storage and legacy purposes
    pre_isa_archive: optional copy of files that were converted to ISA-Tab
    file_base_path: if your file locations are relative paths, this is the base
    """
    logger.info("logging from parse_isatab")
    p = IsaTabParser()
    p.additional_raw_data_file_extension = additional_raw_data_file_extension
    p.file_base_path = file_base_path

    try:
        investigation = p.run(path, isa_archive=isa_archive, preisa_archive=pre_isa_archive)
        data_uuid = create_dataset(investigation.uuid, username, public=public)
        return data_uuid
    except: #prints the error message without breaking things
        logger.error("*** print_tb:")
        exc_type, exc_value, exc_traceback = sys.exc_info()
        logger.error(traceback.print_tb(exc_traceback, file=sys.stdout))
        logger.error("*** print_exception:")
        logger.error(traceback.print_exception(exc_type, exc_value,
                          exc_traceback, file=sys.stdout))
    return None
Пример #2
0
def parse_isatab(username,
                 public,
                 path,
                 additional_raw_data_file_extension=None,
                 isa_archive=None,
                 pre_isa_archive=None,
                 file_base_path=None):
    """
    Name: parse_isatab
    Description:
    parses in an ISA-TAB file to create database entries and creates or updates a dataset for the investigation to belong to; returns the dataset UUID or None if something went wrong. Use like this: parse_isatab(username, is_public, folder_name, additional_raw_data_file_extension, isa_archive=<path>, pre_isa_archive=<path>, file_base_path=<path>
    Parameters:
    username: username of the person the dataset will belong to
    public: boolean that determines if the dataset is public or not
    path: absolute path of the ISA-Tab file to parse
    additional_raw_data_file_extension: an optional argument that will append a suffix to items in Raw Data File as need be
    isa_archive: if you're passing a directory, a zipped version of the directory for storage and legacy purposes
    pre_isa_archive: optional copy of files that were converted to ISA-Tab
    file_base_path: if your file locations are relative paths, this is the base
    """
    logger.info("logging from parse_isatab")
    p = IsaTabParser()
    p.additional_raw_data_file_extension = additional_raw_data_file_extension
    p.file_base_path = file_base_path

    try:
        investigation = p.run(path,
                              isa_archive=isa_archive,
                              preisa_archive=pre_isa_archive)
        data_uuid = create_dataset(investigation.uuid, username, public=public)
        return data_uuid
    except:  #prints the error message without breaking things
        logger.error("*** print_tb:")
        exc_type, exc_value, exc_traceback = sys.exc_info()
        logger.error(traceback.print_tb(exc_traceback, file=sys.stdout))
        logger.error("*** print_exception:")
        logger.error(
            traceback.print_exception(exc_type,
                                      exc_value,
                                      exc_traceback,
                                      file=sys.stdout))
    return None
Пример #3
0
def parse_isatab(username, public, path,
                 additional_raw_data_file_extension=None, isa_archive=None,
                 pre_isa_archive=None, file_base_path=None):
    """parses in an ISA-TAB file to create database entries and creates or
    updates a dataset for the investigation to belong to; returns the dataset
    UUID or None if something went wrong. Use like this: parse_isatab(username,
    is_public, folder_name, additional_raw_data_file_extension,
    isa_archive=<path>, pre_isa_archive=<path>, file_base_path=<path>
    Parameters:
    username: username of the person the dataset will belong to
    public: boolean that determines if the dataset is public or not
    path: absolute path of the ISA-Tab file to parse
    additional_raw_data_file_extension: an optional argument that will append a
    suffix to items in Raw Data File as need be
    isa_archive: if you're passing a directory, a zipped version of the
    directory for storage and legacy purposes
    pre_isa_archive: optional copy of files that were converted to ISA-Tab
    file_base_path: if your file locations are relative paths, this is the base
    """
    p = IsaTabParser()
    p.additional_raw_data_file_extension = additional_raw_data_file_extension
    p.file_base_path = file_base_path
    """Get the study title and investigation id and see if anything is in the
    database and if so compare the checksum
    """
    # 1. First check whether the user exists
    try:
        user = User.objects.get(username__exact=username)
    except:
        user = None
    # 2. If user exists we need to quickly get the dataset title to see if its
    # already in the DB
    if user:
        checksum = None
        (identifier, title) = p.get_dataset_name(path)
        if identifier is None or title is None:
            datasets = []
        else:
            dataset_title = "%s: %s" % (identifier, title)
            datasets = DataSet.objects.filter(name=dataset_title)
        # check if the investigation already exists
        if len(datasets):  # if not 0, update dataset with new investigation
            # go through datasets until you find one with the correct owner
            for ds in datasets:
                own = ds.get_owner()
                if own == user:
                    # 3. Finally we need to get the checksum so that we can
                    # compare that to our given file.
                    investigation = ds.get_investigation()
                    fileStoreItem = FileStoreItem.objects.get(
                        uuid=investigation.isarchive_file)
                    if fileStoreItem:
                        try:
                            logger.info("Get file: %s",
                                        fileStoreItem.get_absolute_path())
                            checksum = calculate_checksum(
                                fileStoreItem.get_file_object())
                        except IOError as e:
                            logger.error(
                                "Original isatab archive wasn't found. "
                                "Error: '%s'", e)
        # 4. Finally if we got a checksum for an existing file, we calculate
        # the checksum for the new file and compare them
        if checksum:
            new_checksum = None
            # TODO: error handling
            with open(path, 'rb') as f:
                new_checksum = calculate_checksum(f)
            if (checksum == new_checksum):
                # Checksums are identical so we can skip this file.
                logger.info("The checksum of both files is the same: %s",
                            checksum)
                return (
                    investigation.investigationlink_set.all()[0].data_set.uuid,
                    os.path.basename(path),
                    True)
    try:
        investigation = p.run(path, isa_archive=isa_archive,
                              preisa_archive=pre_isa_archive)
        data_uuid = create_dataset(investigation.uuid, username, public=public)
        return (data_uuid, os.path.basename(path), False)
    except:  # prints the error message without breaking things
        logger.error("*** print_tb:")
        exc_type, exc_value, exc_traceback = sys.exc_info()
        logger.error(traceback.print_tb(exc_traceback, file=sys.stdout))
        logger.error("*** print_exception:")
        logger.error(
            traceback.print_exception(
                exc_type,
                exc_value,
                exc_traceback,
                file=sys.stdout
            )
        )
    return None, os.path.basename(path), False