Пример #1
0
    def generateCSV_test(self):
        '''
        make sure things are being packed into dataframes correctly;
        assumes Pandas writes dataframes to csv correctly. 
        '''

        truth = [True, False, False]
        results = [
            [False, False, False],
            [True, True, True]
        ]
        tests = ['x', 'y']
        keys = [1000,1001,1002]

        df = main.generateCSV(truth, results, tests, keys, 'test')
        dfTrue = pandas.DataFrame([[True, False, True],[False, False, True],[False, False, True]], index=keys, columns=['True Flags', 'x', 'y'])

        assert_frame_equal(df, dfTrue, check_names=True)
Пример #2
0
    def generateCSV_test(self):
        '''
        make sure things are being packed into dataframes correctly;
        assumes Pandas writes dataframes to csv correctly. 
        '''

        truth = [True, False, False]
        results = [[False, False, False], [True, True, True]]
        tests = ['x', 'y']
        keys = [1000, 1001, 1002]

        df = main.generateCSV(truth, results, tests, keys, 'test')
        dfTrue = pandas.DataFrame(
            [[True, False, True], [False, False, True], [False, False, True]],
            index=keys,
            columns=['True Flags', 'x', 'y'])

        assert_frame_equal(df, dfTrue, check_names=True)
Пример #3
0
    testNames.sort()
    print('{} quality control checks have been found'.format(len(testNames)))
    testNames = main.checkQCTestRequirements(testNames)
    print('{} quality control checks are able to be run:'.format(
        len(testNames)))
    for testName in testNames:
        print('  {}'.format(testName))

    # Identify data files and create a profile list.
    filenames = main.readInput('datafiles.json')
    profiles = main.extractProfiles(filenames)
    data.ds.profiles = profiles
    print('\n{} file(s) will be read containing {} profiles'.format(
        len(filenames), len(profiles)))

    # Parallel processing.
    print('\nPlease wait while QC is performed\n')
    processFile.parallel = main.parallel_function(processFile, sys.argv[2])
    parallel_result = processFile.parallel(filenames)

    # Recombine results
    truth, results, profileIDs = main.combineArrays(parallel_result)

    # Print summary statistics and write output file.
    main.printSummary(truth, results, testNames)
    main.generateCSV(truth, results, testNames, profileIDs, sys.argv[1])
else:
    print 'Please add command line arguments to name your output file and set parallelization:'
    print 'python AutoQC myFile 4'
    print 'will result in output written to results-myFile.csv, and will run the calculation parallelized across 4 cores.'
Пример #4
0
  # Identify and import tests
  testNames = main.importQC('qctests')
  testNames.sort()
  print('{} quality control checks have been found'.format(len(testNames)))
  testNames = main.checkQCTestRequirements(testNames)
  print('{} quality control checks are able to be run:'.format(len(testNames)))
  for testName in testNames:
    print('  {}'.format(testName))

  # Identify data files and create a profile list.
  filenames = main.readInput('datafiles.json')
  profiles  = main.extractProfiles(filenames)
  data.ds.profiles = profiles
  print('\n{} file(s) will be read containing {} profiles'.format(len(filenames), len(profiles)))

  # Parallel processing.
  print('\nPlease wait while QC is performed\n')
  processFile.parallel = main.parallel_function(processFile, sys.argv[2])
  parallel_result = processFile.parallel(filenames)

  # Recombine results
  truth, results, profileIDs = main.combineArrays(parallel_result)

  # Print summary statistics and write output file.
  main.printSummary(truth, results, testNames)
  main.generateCSV(truth, results, testNames, profileIDs, sys.argv[1])
else:
  print 'Please add command line arguments to name your output file and set parallelization:'
  print 'python AutoQC myFile 4'
  print 'will result in output written to results-myFile.csv, and will run the calculation parallelized across 4 cores.'