示例#1
0
def dataload_finalize(args, api, configs):
    """
    Analyzes the results of previous steps and summarize in output

    Args:
        args: arguments captured from CLI
        api: object to perform the API calls
        configs: shared configuration variables used across the script
    """

    logger.info("Checkign results")
    print("\nDATALOAD RESULTS")

    success_result = configs["success_handler_filename"]
    fail_result = configs["fail_handler_filename"]
    retry_result = configs["csv_retry_writer"].get_filename()

    print("\t[{}] Total processed users\n".format(configs['total_records']))

    print("\t[{}] Import success. Number of new records inserted in database".
          format(count_lines_in_file(success_result)))
    print("\t[{}] Import failures".format(count_lines_in_file(fail_result)))

    result_files = [success_result, fail_result]

    # If retry file is not empty, add it to the result list and print the info,
    # otherwise, remove the file.
    retry_line_number = count_lines_in_file(retry_result)
    if retry_line_number > 0:
        print("\t[{}] Import retries\n".format(
            count_lines_in_file(retry_result)))
        result_files.append(retry_result)
    else:
        print("\n")
        retry_filename = configs["csv_retry_writer"].get_filename()
        delete_file(retry_filename, logger)

    # Delta migration is enable, get the update log files.
    if args.delta_migration:
        # Append to an existing logger list.
        update_success_result = configs["update_success_handler_filename"]
        update_fail_result = configs["update_fail_handler_filename"]
        result_files.extend((update_success_result, update_fail_result))

        print("\t[{}] Update success. Existing users that were updated".format(
            count_lines_in_file(update_success_result)))
        print("\t[{}] Update failures\n".format(
            count_lines_in_file(update_fail_result)))

    result = api.call('entity.count',
                      type_name=args.type_name,
                      timeout=args.timeout)
    print("\t[{}] Total number of records in Entity Type [{}] after execution".
          format(result["total_count"], args.type_name))

    print("\nPlease check detailed results in the files below:")
    for file in result_files:
        print("\t{}".format(file))
def main():
    for file in ['logs/chat.txt', 'logs/session.txt']:
        delete_file(file)
    
    if not is_already_trained():
        bot = initialize_bot()
        train_bot(bot)
    else:
        bot = initialize_bot()
    
    run_bot(bot)
示例#3
0
    def delete(self, key):
        try:
            file = FileModel.query.filter_by(id=key).first()
            path = f"{current_app.config.get('UPLOAD_FOLDER')}/{file.file_id}"
            delete_file(path)

            db.session.delete(file)
            db.session.commit()

            return True
        except:
            traceback.print_exc()
            return {'msg': f"Error on delete file"}, 500
示例#4
0
    def delete(self, key):
        try:
            model = TrainModel.query.filter_by(model_id=key).first()

            utils.delete_model_files(key)
            utils.delete_file(
                f"{current_app.config.get('PRE_PROCESSING_RAW')}/{key}.csv")
            db.session.delete(model)
            db.session.commit()

            return self.get()

        except:
            traceback.print_exc()
            return {'msg': f"Error to delete train model"}, 500
示例#5
0
    def delete(self):
        try:
            payload = request.get_json()

            if 'path' not in payload:
                return {'msg': 'path is required to delete pre-processing data'}, 500

            path = payload['path']
            utils.delete_file(path)

            return {'msg': 'Deleted with successful'}

        except:
            traceback.print_exc()
            return {"msg": "Error on DELETE Train"}, 500
示例#6
0
    def delete(self, key):
        try:
            datasource = DatasourceModel.query.filter_by(id=key).first()
            file = FileModel.query.filter_by(id=datasource.file_id).first()

            path = f"{current_app.config.get('UPLOAD_FOLDER')}/{file.file_id}"
            utils.delete_file(path)

            db.session.delete(datasource)
            db.session.commit()

            db.session.delete(file)
            db.session.commit()

            return self.get()

        except:
            traceback.print_exc()
            return {'msg': f"Error on delete datasource"}, 500
示例#7
0
def generate_results(midi_path, parallels_path, chords_path):
    """Generates results of the analysed midi file.

    Parameters
    ----------
    midi_path, parallels_path, chords_path : str
        The paths to the associated directories.

    Returns
    -------
    bool
        Returns True if the analysis is completed.
    """

    try:
        df = fp.midi_to_df(midi_path)
        chords = fp.PreProcessor(4).get_progression(df)
        chord_progression = fp.ChordProgression(chords)
    except Exception:
        delete_file(midi_path)
        return False

    if not generate_parallels_result(chord_progression, parallels_path):
        delete_file(midi_path)
        return False

    if not generate_pitch_class_set(chord_progression, chords_path):
        delete_file(midi_path)
        return False

    return True
示例#8
0
def dataload_update(args, api, configs):
    """
    Creates threads to update records if any of them were marked
    as duplicates during import

    Args:
        args: arguments captured from CLI
        api: object to perform the API calls
        configs: shared configuration variables used across the script
    """
    print("\n\nStarting the update process for the duplicated records\n")
    if args.dry_run:
        logger.debug("Dry run. Dataload update was skipped.")
        print("\tDry run mode detected. Skipping dataload update.")
        return

    if not args.delta_migration:
        return

    logger.info("Checking if there are any duplicate records to update")
    print("\tChecking if there are any duplicate records to update\n")
    data_file = configs['csv_tmp_writer'].get_filename()
    record_update_count = count_lines_in_file(data_file)
    plurals = configs['plurals']

    # Check if there is any record to be updated. If none, delete the temporary
    # file and proceed to finalize
    if record_update_count < 1:
        print("\tNo records found to be updated\n")
        logger.info("No records found to be updated")
        delete_file(data_file, logger)
        return
    print("\t{} duplicate records were found and will be updated\n".format(
        record_update_count))

    with ThreadPoolExecutor(max_workers=args.workers) as executor:
        logger.info(
            "Loading data from TEMP file into the '{}' entity type.".format(
                args.type_name))

        # Calculate minimum time per worker thread
        min_time = 0
        if args.rate_limit > 0:
            min_time = round(args.workers / args.rate_limit, 2)

        logger.debug("Minimum processing time per worker: {}".format(min_time))

        print("\tValidating UTF-8 encoding and checking for Byte Order Mark\n")
        # Create a CSV reader which will read the CSV TEMP file and return
        # a entire record.
        reader = CsvReader(data_file)

        # TQDM Progress Bar.
        pbar = tqdm(total=record_update_count, unit="rec")
        pbar.set_description("Updating Records.")

        # Iterate over records of rows in the CSV and dispatch update_record()
        # calls to the worker threads.
        futures = []
        for _, row in enumerate(reader):
            logger.debug(row)
            record_info = {
                'record': row[2],
                'batch_id': row[0],
                'line': row[1]
            }

            kwargs = {
                'api': api,
                'args': args,
                'record_info': record_info,
                'min_time': min_time,
                'pbar': pbar,
                'plurals': plurals
            }
            futures.append(executor.submit(update_record, **kwargs))

        # Iterate over the future results to raise any uncaught exceptions.
        # Note that this means uncaught exceptions will not be raised until
        # AFTER all workers are dispatched.
        logger.info("Waiting for workers to finish")
        for future in futures:
            future.result()

        pbar.close()
        logger.info("Update finished!")

        # Delete the temporary file.
        delete_file(data_file, logger)