Пример #1
0
    def process_ids(self, ids, _index, _type, data_source_batch_name):
        # Process batch ids
        batch_ids = ids.keys()
        self.load_config.log(LOG_LEVEL_INFO, 'Processing batch: ',
                             data_source_batch_name, ',', len(batch_ids), 'docs')

        data_source_batches = self.load_data_source_batches()
        data_source_batches[data_source_batch_name] = 0
        self.save_data_source_batches(data_source_batches)

        failed_ids = self.process_ids_method(
            ids, _index, _type, data_source_batch_name)

        processed_indices = []
        for _id in batch_ids:
            processed_indices.append(_id)

        # Save batch info
        batch_info = {
            'batch_ids': batch_ids,
            'processed_indices':processed_indices,
            'failed_ids': failed_ids
        }

        # print 'Batch ids', len(ids)
        # print 'Saving batch info', batch_info

        data_source_directory = self.load_config.data_source_directory()
        file_utils.save_file(data_source_directory,
                             data_source_batch_name + '.json', batch_info)
Пример #2
0
    def get_progress(self):
        data_source_directory = self.load_config.data_source_directory()
        stats_file_name = self.data_source_stats_file_name()

        if self.data_source_stats is None:
            self.data_source_stats = file_utils.load_file(
                data_source_directory, stats_file_name)

        row_count = 0
        unique_ids = 0
        # if 'row_count' in self.data_source_stats:
        #     row_count = self.data_source_stats['row_count']
        if 'unique_ids' in self.data_source_stats:
            unique_ids = self.data_source_stats['unique_ids']

        docs_loaded = self.get_loaded_doc_count()

        if unique_ids > 0:
            self.load_config.log(LOG_LEVEL_INFO, 'docs loaded', docs_loaded,
                                 'unique_ids', unique_ids)
            progress = (docs_loaded / float(unique_ids)) * 100
            self.data_source_stats['progress'] = progress
            file_utils.save_file(data_source_directory, stats_file_name,
                                 self.data_source_stats)
            return progress

        return -1
Пример #3
0
    def save_language_data(self, language, language_data):
        """
        TODO Refactoring!
        """
        dir = self.directory + os.sep + "values"
        if language != self.default_lang:
            dir += "-" + language

        file_utils.save_file(language_data, dir, self.files_regex)
Пример #4
0
    def save_stats(self):
        self.load_config.log(LOG_LEVEL_INFO, 'Saving stats...')

        stats = {
            'total_rows': self.total_rows,
            'total_ids': self.total_ids
        }

        self.load_config.log(LOG_LEVEL_INFO, stats)

        data_source_directory = self.load_config.data_source_directory()
        file_utils.save_file(data_source_directory, 'stats.json', stats)
Пример #5
0
def upload(files, target):

    if len(files) == 0:
        return common_error("No sent files")

    filenames = []

    for file in files:

        # Looks if same filename was previously uploaded
        if not FileUtils.file_exist(target, file.filename):
            destination = FileUtils.save_file(target, file)

            if not destination:
                return common_error("Error uploading file...")

            if not FileUtils.is_valid_file(destination):
                return common_error("File format is not supported...")

            logging.info("Uploaded filename: %s" % destination)
        else:
            destination = FileUtils.get_destination(target, file.filename)
            logging.info("Previously uploaded filename: %s" % destination)

        SessionHelper.add_uploaded_file_to_session(file.filename)
        filenames.append(file.filename)

    return json.dumps(filenames)
Пример #6
0
def upload(files, target):

    if len(files) == 0:
        return common_error("No sent files")

    filenames = []

    for file in files:

        # Looks if same filename was previously uploaded
        if not FileUtils.file_exist(target, file.filename):
            destination = FileUtils.save_file(target, file)

            if not destination:
                return common_error("Error uploading file...")

            if not FileUtils.is_valid_file(destination):
                return common_error("File format is not supported...")

            logging.info("Uploaded filename: %s" % destination)
        else:
            destination = FileUtils.get_destination(target, file.filename)
            logging.info("Previously uploaded filename: %s" % destination)

        SessionHelper.add_uploaded_file_to_session(file.filename)
        filenames.append(file.filename)

    return json.dumps(filenames)
Пример #7
0
    def count_rows(self):
        data_source_directory = self.load_config.data_source_directory()
        stats_file_name = self.data_source_stats_file_name()
        self.data_source_stats = file_utils.load_file(data_source_directory,
                                                      stats_file_name)
        if self.data_source_stats is None or len(self.data_source_stats) == 0:
            self.count = 0
            self.data_source_batch = {}
            self.data_source.process_rows(0, self.count_row)
            self.load_config.log(LOG_LEVEL_INFO, 'Total rows:', self.count)
            self.load_config.log(LOG_LEVEL_INFO, 'Total ids:',
                                 len(self.data_source_batch))

            self.data_source_stats = {
                'row_count': self.count,
                'unique_ids': len(self.data_source_batch)
            }
            file_utils.save_file(data_source_directory, stats_file_name,
                                 self.data_source_stats)
Пример #8
0
    def save_batch_info(self, start_index, row_count, unique_ids,
                        data_source_batch_name):
        data_source_directory = self.load_config.data_source_directory()

        self.load_config.log(
            LOG_LEVEL_DEBUG,
            'Finished processing batches, saving batch data...')
        row_count = row_count - start_index
        batch_data = {
        }  #self.get_data_source_batch_summary(data_source_batch_name)
        batch_data['start_index'] = start_index
        batch_data['row_count'] = row_count
        batch_data['unique_ids'] = unique_ids

        if not self.load_config.test_mode:
            file_utils.save_file(data_source_directory,
                                 data_source_batch_name + '.json', batch_data)
            self.load_config.log(LOG_LEVEL_INFO, 'Saved batch data',
                                 data_source_batch_name)
Пример #9
0
    def save_summary(self, ids_to_load):
        data_loader_batch_name = file_utils.batch_file_name_with_prefix(
            DATA_LOADER_BATCH_PREFIX)

        # Find skipped ids
        for _id in ids_to_load:
            if _id not in self.updated_ids and _id not in self.indexed_ids and _id not in self.failed_docs:
                doc = self.data_loader_batch[_id]
                self.add_to_failed_docs(_id, doc, 'Skipped')

        # Save failed docs
        if len(self.failed_docs) > 0:
            file_utils.save_file(self.failed_docs_directory,
                                 data_loader_batch_name + '.json',
                                 self.failed_docs)

        # Save batch summary
        summary = {
            'indexed_ids': self.indexed_ids.keys(),
            'updated_ids': self.updated_ids.keys(),
        }

        file_utils.save_file(self.loaded_docs_directory,
                             data_loader_batch_name + '.json', summary)

        # Print summary
        self.load_config.log(
            LOG_LEVEL_INFO,
            '---------------------------------------------------------------------------------------------'
        )
        self.load_config.log(LOG_LEVEL_INFO, self.load_config.server,
                             self.load_config.server_username, self.index,
                             self.type, ' Updated docs:',
                             len(self.updated_ids) + len(self.indexed_ids),
                             ', Failed docs:', len(self.failed_docs))
        self.load_config.log(
            LOG_LEVEL_INFO,
            '---------------------------------------------------------------------------------------------'
        )
Пример #10
0
def upload(file, target):
    if not file.filename:
        return common_error("No sent file")

    if not FileUtils.is_valid_file(file.filename):
        return common_error("File extension is not supported...")

    destination = FileUtils.save_file (file, target)

    if not destination:
        return common_error("Error uploading file...")

    logging.debug("Uploaded filename: %s" % destination)
    session['uploaded_filename'] = file.filename

    return json.dumps( dict( filename = file.filename ) )
Пример #11
0
def upload(file, target):
    if not file.filename:
        return common_error("No sent file")

    destination = FileUtils.save_file(file, target)

    if not destination:
        return common_error("Error uploading file...")

    if not FileUtils.is_valid_file(destination):
        return common_error("File extension is not supported...")

    logging.debug("Uploaded filename: %s" % destination)
    session['uploaded_filename'] = file.filename

    return json.dumps(dict(filename=file.filename))
Пример #12
0
def upload(files, target):

    if len(files) == 0:
        return common_error("No sent files")

    filenames = []

    for file in files:

        destination = FileUtils.save_file(file, target)

        if not destination:
            return common_error("Error uploading file...")

        if not FileUtils.is_valid_file(destination):
            return common_error("File extension is not supported...")

        logging.info("Uploaded filename: %s" % destination)
        SessionHelper.add_uploaded_file_to_session(file.filename)
        filenames.append(file.filename)

    return json.dumps(filenames)
Пример #13
0
 def save_data_source_batches(self, data_source_batches):
     data_source_directory = self.load_config.data_source_directory()
     file_utils.save_file(data_source_directory,
                          DATA_SOURCE_BATCHES_FILE, data_source_batches)
Пример #14
0
 def save_language_data(self, language, language_data):
     """
     TODO Refactoring!
     """
     dir = self.directory + os.sep + "{}.lproj".format(language)
     file_utils.save_file(language_data, dir, self.files_regex)
Пример #15
0
 def save_summary(self):
     data_source_directory = self.load_config.data_source_directory()
     data_source_summary = self.get_combined_data_source_summary()
     file_utils.save_file(data_source_directory, 'summary.json',
                          data_source_summary)