def parse_isatab(username, public, path, additional_raw_data_file_extension=None, isa_archive=None, pre_isa_archive=None, file_base_path=None): """ Name: parse_isatab Description: parses in an ISA-TAB file to create database entries and creates or updates a dataset for the investigation to belong to; returns the dataset UUID or None if something went wrong. Use like this: parse_isatab(username, is_public, folder_name, additional_raw_data_file_extension, isa_archive=<path>, pre_isa_archive=<path>, file_base_path=<path> Parameters: username: username of the person the dataset will belong to public: boolean that determines if the dataset is public or not path: absolute path of the ISA-Tab file to parse additional_raw_data_file_extension: an optional argument that will append a suffix to items in Raw Data File as need be isa_archive: if you're passing a directory, a zipped version of the directory for storage and legacy purposes pre_isa_archive: optional copy of files that were converted to ISA-Tab file_base_path: if your file locations are relative paths, this is the base """ logger.info("logging from parse_isatab") p = IsaTabParser() p.additional_raw_data_file_extension = additional_raw_data_file_extension p.file_base_path = file_base_path try: investigation = p.run(path, isa_archive=isa_archive, preisa_archive=pre_isa_archive) data_uuid = create_dataset(investigation.uuid, username, public=public) return data_uuid except: #prints the error message without breaking things logger.error("*** print_tb:") exc_type, exc_value, exc_traceback = sys.exc_info() logger.error(traceback.print_tb(exc_traceback, file=sys.stdout)) logger.error("*** print_exception:") logger.error(traceback.print_exception(exc_type, exc_value, exc_traceback, file=sys.stdout)) return None
def parse_isatab(username, public, path, additional_raw_data_file_extension=None, isa_archive=None, pre_isa_archive=None, file_base_path=None): """ Name: parse_isatab Description: parses in an ISA-TAB file to create database entries and creates or updates a dataset for the investigation to belong to; returns the dataset UUID or None if something went wrong. Use like this: parse_isatab(username, is_public, folder_name, additional_raw_data_file_extension, isa_archive=<path>, pre_isa_archive=<path>, file_base_path=<path> Parameters: username: username of the person the dataset will belong to public: boolean that determines if the dataset is public or not path: absolute path of the ISA-Tab file to parse additional_raw_data_file_extension: an optional argument that will append a suffix to items in Raw Data File as need be isa_archive: if you're passing a directory, a zipped version of the directory for storage and legacy purposes pre_isa_archive: optional copy of files that were converted to ISA-Tab file_base_path: if your file locations are relative paths, this is the base """ logger.info("logging from parse_isatab") p = IsaTabParser() p.additional_raw_data_file_extension = additional_raw_data_file_extension p.file_base_path = file_base_path try: investigation = p.run(path, isa_archive=isa_archive, preisa_archive=pre_isa_archive) data_uuid = create_dataset(investigation.uuid, username, public=public) return data_uuid except: #prints the error message without breaking things logger.error("*** print_tb:") exc_type, exc_value, exc_traceback = sys.exc_info() logger.error(traceback.print_tb(exc_traceback, file=sys.stdout)) logger.error("*** print_exception:") logger.error( traceback.print_exception(exc_type, exc_value, exc_traceback, file=sys.stdout)) return None
def parse_isatab(username, public, path, additional_raw_data_file_extension=None, isa_archive=None, pre_isa_archive=None, file_base_path=None): """parses in an ISA-TAB file to create database entries and creates or updates a dataset for the investigation to belong to; returns the dataset UUID or None if something went wrong. Use like this: parse_isatab(username, is_public, folder_name, additional_raw_data_file_extension, isa_archive=<path>, pre_isa_archive=<path>, file_base_path=<path> Parameters: username: username of the person the dataset will belong to public: boolean that determines if the dataset is public or not path: absolute path of the ISA-Tab file to parse additional_raw_data_file_extension: an optional argument that will append a suffix to items in Raw Data File as need be isa_archive: if you're passing a directory, a zipped version of the directory for storage and legacy purposes pre_isa_archive: optional copy of files that were converted to ISA-Tab file_base_path: if your file locations are relative paths, this is the base """ p = IsaTabParser() p.additional_raw_data_file_extension = additional_raw_data_file_extension p.file_base_path = file_base_path """Get the study title and investigation id and see if anything is in the database and if so compare the checksum """ # 1. First check whether the user exists try: user = User.objects.get(username__exact=username) except: user = None # 2. If user exists we need to quickly get the dataset title to see if its # already in the DB if user: checksum = None (identifier, title) = p.get_dataset_name(path) if identifier is None or title is None: datasets = [] else: dataset_title = "%s: %s" % (identifier, title) datasets = DataSet.objects.filter(name=dataset_title) # check if the investigation already exists if len(datasets): # if not 0, update dataset with new investigation # go through datasets until you find one with the correct owner for ds in datasets: own = ds.get_owner() if own == user: # 3. Finally we need to get the checksum so that we can # compare that to our given file. investigation = ds.get_investigation() fileStoreItem = FileStoreItem.objects.get( uuid=investigation.isarchive_file) if fileStoreItem: try: logger.info("Get file: %s", fileStoreItem.get_absolute_path()) checksum = calculate_checksum( fileStoreItem.get_file_object()) except IOError as e: logger.error( "Original isatab archive wasn't found. " "Error: '%s'", e) # 4. Finally if we got a checksum for an existing file, we calculate # the checksum for the new file and compare them if checksum: new_checksum = None # TODO: error handling with open(path, 'rb') as f: new_checksum = calculate_checksum(f) if (checksum == new_checksum): # Checksums are identical so we can skip this file. logger.info("The checksum of both files is the same: %s", checksum) return ( investigation.investigationlink_set.all()[0].data_set.uuid, os.path.basename(path), True) try: investigation = p.run(path, isa_archive=isa_archive, preisa_archive=pre_isa_archive) data_uuid = create_dataset(investigation.uuid, username, public=public) return (data_uuid, os.path.basename(path), False) except: # prints the error message without breaking things logger.error("*** print_tb:") exc_type, exc_value, exc_traceback = sys.exc_info() logger.error(traceback.print_tb(exc_traceback, file=sys.stdout)) logger.error("*** print_exception:") logger.error( traceback.print_exception( exc_type, exc_value, exc_traceback, file=sys.stdout ) ) return None, os.path.basename(path), False