def handle(self, *args, **options): """See :meth:`django.core.management.base.BaseCommand.handle`. This method migrates existing data files into scale. """ logger.info(u'Command starting: migratedata') workspace, workspace_path, local_path, data_types = None, None, None, [] if options['workspace'] is not None and options[ 'workspace_path'] is not None: workspace, workspace_path = options['workspace'], options[ 'workspace_path'] tmp = Workspace.objects.filter(name=workspace) if tmp.count() > 0: workspace = tmp.first() else: workspace = Workspace.objects.get(id=int(workspace)) else: logger.error('Must specify workspace and workspace-path.') return False if options['data_type'] is not None: data_types.extend(options['data_type']) mnt_dirs = None if options['local_path'] is not None: local_path = options['local_path'] else: # mount mnt_dirs = "/tmp", tempfile.mkdtemp() workspace.setup_download_dir(*mnt_dirs) local_path = os.path.join(mnt_dirs[1], workspace_path) logger.info("Ingesting files from %s/%s", workspace.name, workspace_path) filenames = self.generate_file_list(local_path, options['include'], options['exclude']) logger.info("Found %d files", len(filenames)) # prepare for ingest ala strike ingest_records = {} for filename in filenames: logger.info("Generating ingest record for %s" % filename) ingest = Ingest() ingest.file_name = os.path.basename(filename) ingest.file_path = os.path.join( workspace_path, os.path.relpath(filename, local_path)) ingest.transfer_started = datetime.utcfromtimestamp( os.path.getatime(filename)) ingest.file_size = ingest.bytes_transferred = os.path.getsize( filename) ingest.transfer_ended = timezone.now() ingest.media_type = get_media_type(filename) ingest.workspace = workspace for data_type in data_types: ingest.add_data_type_tag(data_type) ingest.status = 'TRANSFERRED' if options['no_commit']: s = IngestDetailsSerializer() logger.info(s.to_representation(ingest)) else: ingest.save() ingest_records[filename] = ingest.id logging.info("Ingests records created") # start ingest tasks for all the files if not options['no_commit']: logging.info("Starting ingest tasks") for filename in filenames: ingest = Ingest.objects.get(id=ingest_records[filename]) logging.info("Processing ingest %s" % ingest.file_name) with transaction.atomic(): ingest.ingest_started = timezone.now() sf = ingest.source_file = SourceFile.create() sf.update_uuid(ingest.file_name) for tag in ingest.get_data_type_tags(): sf.add_data_type_tag(tag) sf.media_type = ingest.media_type sf.file_name = ingest.file_name sf.file_size = ingest.file_size sf.file_path = ingest.file_path sf.workspace = workspace sf.is_deleted = False sf.deleted = None sf.save() sf.set_countries() sf.save() ingest.status = 'INGESTED' ingest.ingest_ended = timezone.now() ingest.source_file = sf ingest.save() IngestTriggerHandler().process_ingested_source_file( ingest.source_file, ingest.ingest_ended) logging.info( "Ingests processed, monitor the queue for triggered jobs.") if mnt_dirs is not None: workspace.cleanup_download_dir(*mnt_dirs) logger.info(u'Command completed: migratedata')
def handle(self, *args, **options): """See :meth:`django.core.management.base.BaseCommand.handle`. This method migrates existing data files into scale. """ logger.info(u'Command starting: migratedata') workspace, workspace_path, local_path, data_types = None, None, None, [] if options['workspace'] is not None and options['workspace_path'] is not None: workspace, workspace_path = options['workspace'], options['workspace_path'] tmp = Workspace.objects.filter(name=workspace) if tmp.count() > 0: workspace = tmp.first() else: workspace = Workspace.objects.get(id=int(workspace)) else: logger.error('Must specify workspace and workspace-path.') return False if options['data_type'] is not None: data_types.extend(options['data_type']) mnt_dirs = None if options['local_path'] is not None: local_path = options['local_path'] else: # mount mnt_dirs = "/tmp", tempfile.mkdtemp() workspace.setup_download_dir(*mnt_dirs) local_path = os.path.join(mnt_dirs[1], workspace_path) logger.info("Ingesting files from %s/%s", workspace.name, workspace_path) filenames = self.generate_file_list(local_path, options['include'], options['exclude']) logger.info("Found %d files", len(filenames)) # prepare for ingest ala strike ingest_records = {} for filename in filenames: logger.info("Generating ingest record for %s" % filename) ingest = Ingest() ingest.file_name = os.path.basename(filename) ingest.transfer_path = filename ingest.file_path = os.path.join(workspace_path, os.path.relpath(filename, local_path)) ingest.transfer_started = datetime.utcfromtimestamp(os.path.getatime(filename)) ingest.file_size = ingest.bytes_transferred = os.path.getsize(filename) ingest.transfer_ended = datetime.utcnow() ingest.media_type = get_media_type(filename) ingest.workspace = workspace for data_type in data_types: ingest.add_data_type_tag(data_type) ingest.status = 'TRANSFERRED' if options['no_commit']: s = IngestDetailsSerializer() logger.info(s.to_representation(ingest)) else: ingest.save() ingest_records[filename] = ingest.id logging.info("Ingests records created") # start ingest tasks for all the files if not options['no_commit']: logging.info("Starting ingest tasks") for filename in filenames: ingest = Ingest.objects.get(id=ingest_records[filename]) logging.info("Processing ingest %s" % ingest.file_name) with transaction.atomic(): ingest.ingest_started = datetime.utcnow() sf = ingest.source_file = SourceFile() sf.update_uuid(ingest.file_name) for tag in ingest.get_data_type_tags(): sf.add_data_type_tag(tag) sf.media_type = ingest.media_type sf.file_name = ingest.file_name sf.file_size = ingest.file_size sf.file_path = ingest.file_path sf.workspace = workspace sf.is_deleted = False sf.deleted = None sf.save() sf.set_countries() sf.save() ingest.status = 'INGESTED' ingest.ingest_ended = datetime.utcnow() ingest.source_file = sf ingest.save() IngestTriggerHandler().process_ingested_source_file(ingest.source_file, ingest.ingest_ended) logging.info("Ingests processed, monitor the queue for triggered jobs.") if mnt_dirs is not None: workspace.cleanup_download_dir(*mnt_dirs) logger.info(u'Command completed: migratedata')