def _check_update(self, doc_name, definition): from helpers import md5sum from helpers.document import load_definition, definition_lookup path = definition_lookup(definition) md5 = md5sum(path) if not self.has_document(doc_name): load_definition(self, path) row = self.load_document(doc_name) row.update_property('info', 'definition_hash', md5) else: row = self.load_document(doc_name) if md5!=row.property('info', 'definition_hash'): #load_definition(self, path) row.update_property('info', 'definition_hash', md5)
def __call__(self, *args, **kwargs): sum_file = os.path.splitext(self.fn)[0] + '.md5' if not os.path.exists(sum_file): print(f'# no sum file: {sum_file}') print(f'rm {self.fn}') return with open(sum_file, 'r') as f: md5_in_sum_file = f.read().strip() md5_in_bam_file = md5sum(self.fn) if md5_in_bam_file != md5_in_sum_file: print(f'# bam md5: {md5_in_bam_file} sum file: {md5_in_sum_file}') print(f'rm {self.fn}')
def create_files_info(self,file_paths,file_sizes=None, create_md5=False,rel_file_base=None): """ create dict of file info for the info section of meta data. file_paths can also be a dict who's key is the file path and the value is the file size """ if not file_sizes: file_sizes = determine_file_sizes(file_paths) files_info = [] # go through our files adding thier info dict for path in file_paths: name = get_file_name(path,rel_file_base) file_info = { 'length': file_sizes.get(path), 'path': [x for x in name.split(os.sep) if x.strip()] } if create_md5: file_info['md5sum'] = md5sum(path) files_info.append(file_info) return files_info
def create_info_dict(self,file_paths,pieces=None,file_sizes=None, piece_size=None,total_size=None, private=False,create_md5=False,file_name=None, rel_file_base=None): """ creates a dict of the 'info' part of the meta data """ # fill out our data if not file_sizes: file_sizes = determine_file_sizes(file_paths) if not total_size: total_size = sum(file_sizes.itervalues()) if not piece_size: piece_size = determine_piece_size(total_size) # create our meta data dict info_data = { 'piece length': piece_size, 'pieces': ''.join(pieces), 'private': 1 if private else 0, } # don't have to have a file name if file_name: info_data['name'] = file_name # we need to determine common prefix for all the files # it will be our rel base, any paths for the info will # be relative to it rel_file_base = os.path.commonprefix(file_paths) log.debug('rel file base: %s',rel_file_base) # length only appropriate if there is a single file if len(file_paths) == 1: info_data['length'] = total_size # if they want us to create the optional md5 # for the files than lets do so if create_md5: info_data['md5sum'] = md5sum(file_paths[0]) if not info_data.get('name'): # we'll go ahead and put a name info_data['name'] = get_file_name(file_paths[0], rel_file_base) # if it's multiple files we give it each one individually else: info_data['files'] = self.create_files_info(file_paths, file_sizes, create_md5, rel_file_base) if not info_data.get('name'): # guess a name name = get_common_name(file_paths) if name: info_data['name'] = name # make sure our meta info is valid try: validate_info_data(info_data) except Exception, ex: raise