	def run(self):
		print "STARTING PROCESS " + str(self.year)
		for month in range(1,13):
			print "***Starting to clean %d %d" % (self.year, month)
			#clean_text(self.year, month)
			combine_data.combine_data(self.year, month)
			print "***Done with %d %d" % (self.year, month)
		#content = 'Clean-Up done with %s' % str(self.year)
		print "EXITING PROCESS " + str(self.year)
def collapse_data(data_dir, incl_excl_list, n_b0s_list, sep_av_list, transform_list, roi_list):
    collapse_data reads in files from a series of results_files and collapses
    across all of them so they can be plotted together
    Inputs:     data_dir
    Output:     data array
    import os
    import numpy as np
    import numpy.lib.recfunctions as rfn
    from glob import glob
    import itertools as it
    from combine_data import combine_data
    from get_b0_orders import get_b0_orders
    print '  Collapsing data: B0 orders by ec volume'

    # Find all the results files in all the b0_order folders
    for incl_excl, n_b0s, sep_av, transform, roi_name in it.product(incl_excl_list, n_b0s_list, sep_av_list, transform_list, roi_list):

        # Start off with an empty data array
        data_allorders = None
        b0_orders = get_b0_orders(np.int(n_b0s))

        for b0_order in b0_orders:
            glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s),
                                    'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name))

            files = glob(glob_string)

            dict = {'b0_order': b0_order}
            # Read in every file and combine them
            for file in files:
                data = np.genfromtxt(file, dtype=None, delimiter=' ', names=True)
                data_allorders = combine_data(data_allorders, data, dict)
            # Name the results dir that this is going into:
            results_allorders_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s),
                                    'ALL_ORDERS', sep_av, transform)
    return data_allorders, results_allorders_dir
def one_round_of_simulation(cwd,simultaneous_worker,wait_minute):

    #the first step we do is to generate the sh files

    #the second step is to check whether the final data is generated
    start_time = int(time.time())

    while not finished_generating_data:
        for index in range(simultaneous_worker):
            x_file_name = os.path.join(cwd, "X_training" + str(index) + ".p")
            y_file_name = os.path.join(cwd, "Y_training" + str(index) + ".p")

                #try to open those files
                #as sometimes the files can still not be generated
                pickle.load(open(x_file_name, "rb"))
                pickle.load(open(y_file_name, "rb"))




        if sum(sum(succeed))==simultaneous_worker:
            print('finished this round of simulation in normal end')
            print(' ')

        print('for this round of simulation, the time has elapsed this much',int(time.time())-start_time,'seconds')

        if int(time.time())-start_time>60*wait_minute:
            print('stop this round of simulation because time constraint')
            subprocess.call('qdel -u zh296', shell=True)
            finished_generating_data = True

    #the third step is to increase the data
    # append new data to the total pickle file
    total_good_data_so_far = combine_data(simultaneous_worker)

    return total_good_data_so_far
def Q_ec_vol_n6(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, colors, shapes):
    Q_ec_vol_n6 asks the question:
        "How does the volume that you register to affect the measurement
        when you use all the data"
    It reads in all the necessary files from a series of results_files and collapses
    across all of them so they can be plotted together
    Inputs:     data_dir
    Output:     data array
    import os
    import numpy as np
    import numpy.lib.recfunctions as rfn
    from glob import glob
    import itertools as it
    from combine_data import combine_data
    from get_b0_orders import get_b0_orders
    from plot_data import plot_data
    from read_in_data import read_in_data
    print '  Question: How does the choice of eddy correct volume affect the measurements?'

    # Find all the results files in all the b0_order folders
    for incl_excl, sep_av, transform, roi_name in it.product(incl_excl_list, sep_av_list, transform_list, roi_list):

        # Start off with an empty data array
        data_allorders = None
        b0_orders = get_b0_orders(np.int(6))

        for b0_order in b0_orders:
            glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_6',
                                    'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name))

            files = glob(glob_string)

            dict = {'b0_order': b0_order}
            # Read in every file and combine them
            for file in files:
                data = read_in_data(file)
                data_allorders = combine_data(data_allorders, data, dict)
        # Name the results dir that this is going into:
        results_allorders_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_6',
                                'ALL_ORDERS', sep_av, transform)
        # Now plot the data
        plot_data(data_allorders, results_allorders_dir, roi_name, colors, shapes)
if __name__ == '__main__':
    time_start = time.time()
    for i in range(LAX_ROUND):
        time_start_round = time.time()
        if i == 0:
            print('round:', i, 'is processing......')
            splite_data.splite_data(__ORIGIN_FOLDER__, __SPLITE_FOLDER__)
            print('Compelte splite data in round:', i)
            print('Compelte flix data in round:', i)
            print('Deleting splite data in round:', i)
                "cd /home/youyizhe/TrafficDataProcesser/test_script/splite/;rm *.txt"
            combine_data.combine_data(__FLIXED_FOLDER__, __COMBINE_FOLDER__)
            print('Compelte combine data in round:', i)
            print('Deleting flixed data in round:', i)
                "cd /home/youyizhe/TrafficDataProcesser/test_script/flixed/;rm *.txt"
            print('round:', i, 'is processing......')
            splite_data.splite_data(__COMBINE_FOLDER__, __SPLITE_FOLDER__)
            print('Compelte splite data in round:', i)
            print('Compelte flix data in round:', i)
            print('Deleting splite data in round:', i)
                "cd /home/youyizhe/TrafficDataProcesser/test_script/splite/;rm *.txt"

import docx
from combine_data import combine_data

combined_data, temperature_fail, unknown_fail  = combine_data()

total_tests = len(combined_data)

tests_failed = len(temperature_fail) + len(unknown_fail)

test_passed = total_tests - tests_failed

test_fail_temp = len(temperature_fail) 

test_fail_unknown = len(unknown_fail)

document = docx.Document()

document.add_heading("Acme Test report", 0)


test_ran_dates_str = "Tests Summary: Tests ran from: " + combined_data[0][0] + " to " + combined_data[-1][0]
document.add_heading(test_ran_dates_str, level  = 1)

p = document.add_paragraph("Total number of tests: ")

import docx
from combine_data import combine_data

combined_data, temperature_fail, unknown_fail = combine_data()

total_tests = len(combined_data)

tests_failed = len(temperature_fail) + len(unknown_fail)

test_passed = total_tests - tests_failed

test_fail_temp = len(temperature_fail)

test_fail_unknown = len(unknown_fail)

document = docx.Document()

document.add_heading("Acme Test report", 0)


test_ran_dates_str = "Tests Summary: Tests ran from: " + combined_data[0][
    0] + " to " + combined_data[-1][0]
document.add_heading(test_ran_dates_str, level=1)

p = document.add_paragraph("Total number of tests: ")

p.add_run(str(total_tests)).bold = True
for file_extension in file_extensions:
        obj.key for obj in s3.Bucket(manager_data['s3_bucket']).objects.all()
        if obj.key.startswith(f"results/{instance_id}")
        and obj.key.endswith(f".{file_extension}")

logging.info(f"Combining {len(files)} Partial Data Files")
for file in files:
    response = s3.meta.client.download_file(manager_data['s3_bucket'], file,

fileout = f"results/{instance_id}_{output_file}"

combine_data(files, fileout)

logging.info(f"Uploading combined data file '{fileout}' to S3 bucket")
response = s3.meta.client.upload_file(fileout, manager_data['s3_bucket'],

for file in files:
        response = s3.meta.client.delete_object(
            Bucket=manager_data['s3_bucket'], Key=file)
    except FileNotFoundError:

log_files = [
    obj.key for obj in s3.Bucket(manager_data['s3_bucket']).objects.all() if
def Q_n_b0s(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list,
            ec_b0_list, colors, shapes):
    Q_ec_vol_n6 asks the question:
        "How does the number of B0s you include change your measurement?"
    It reads in all the necessary files from a series of results_files and collapses
    across all of them so they can be plotted together
    Inputs:     data_dir
    Output:     data array

    import os
    import numpy as np
    import numpy.lib.recfunctions as rfn
    from glob import glob
    import itertools as it
    from combine_data import combine_data
    from get_b0_orders import get_b0_orders
    from plot_data import plot_data
    from read_in_data import read_in_data

    print '  Question: How does the number of B0s change your measurement'

    # Find all the results files in all the b0_order folders
    for incl_excl, sep_av, transform, roi_name, ec_b0 in it.product(
            incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list):

        # Start off with an empty data array
        data_allorders_allb0s = None

        for n_b0s in range(1, 7):

            b0_orders = get_b0_orders(np.int(n_b0s))

            b0_orders = [order for order in b0_orders if order[:2] == ec_b0]

            for b0_order in b0_orders:

                glob_string = os.path.join(data_dir, 'RESULTS', incl_excl,
                                           sep_av, transform,

                files = glob(glob_string)

                dict = {'b0_order': b0_order, 'n_b0s': n_b0s}

                # Read in every file and combine them
                for file in files:
                    data = read_in_data(file)
                    data_allorders_allb0s = combine_data(
                        data_allorders_allb0s, data, dict)

        # Name the results dir that this is going into:
        results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS',
                                                    incl_excl, 'ALL_B0S',
                                                    sep_av, transform)

        # Now plot the data
        plot_data(data_allorders_allb0s, results_allorders_allb0s_dir,
                  roi_name, colors, shapes)

    # Now do the same thing, but with REALLY all the B0s

    # Find all the results files in all the b0_order folders
    for incl_excl, sep_av, transform, roi_name in it.product(
            incl_excl_list, sep_av_list, transform_list, roi_list):

        # Start off with an empty data array
        data_allorders_allb0s = None

        for n_b0s in range(1, 7):

            b0_orders = get_b0_orders(np.int(n_b0s))

            for b0_order in b0_orders:

                glob_string = os.path.join(data_dir, 'RESULTS', incl_excl,
                                           sep_av, transform,

                files = glob(glob_string)

                dict = {'b0_order': b0_order, 'n_b0s': n_b0s}

                # Read in every file and combine them
                for file in files:
                    data = read_in_data(file)
                    data_allorders_allb0s = combine_data(
                        data_allorders_allb0s, data, dict)

        # Name the results dir that this is going into:
        results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS',
                                                    incl_excl, 'ALL_B0S',
                                                    'ALL_B0S', sep_av,

        # Now plot the data
        plot_data(data_allorders_allb0s, results_allorders_allb0s_dir,
                  roi_name, colors, shapes)
def collapse_data(data_dir, incl_excl_list, n_b0s_list, sep_av_list,
                  transform_list, roi_list):
    collapse_data reads in files from a series of results_files and collapses
    across all of them so they can be plotted together
    Inputs:     data_dir
    Output:     data array

    import os
    import numpy as np
    import numpy.lib.recfunctions as rfn
    from glob import glob
    import itertools as it
    from combine_data import combine_data
    from get_b0_orders import get_b0_orders

    print '  Collapsing data: B0 orders by ec volume'

    # Find all the results files in all the b0_order folders
    for incl_excl, n_b0s, sep_av, transform, roi_name in it.product(
            incl_excl_list, n_b0s_list, sep_av_list, transform_list, roi_list):

        # Start off with an empty data array
        data_allorders = None

        b0_orders = get_b0_orders(np.int(n_b0s))

        for b0_order in b0_orders:
            glob_string = os.path.join(data_dir, 'RESULTS', incl_excl,
                                       'B0_ORDER_{}'.format(b0_order), sep_av,

            files = glob(glob_string)

            dict = {'b0_order': b0_order}

            # Read in every file and combine them
            for file in files:
                data = np.genfromtxt(file,
                                     delimiter=' ',
                data_allorders = combine_data(data_allorders, data, dict)

            # Name the results dir that this is going into:
            results_allorders_dir = os.path.join(data_dir, 'RESULTS',
                                                 'ALL_ORDERS', sep_av,

    return data_allorders, results_allorders_dir
def Q_ec_vol_n6(data_dir, incl_excl_list, sep_av_list, transform_list,
                roi_list, colors, shapes):
    Q_ec_vol_n6 asks the question:
        "How does the volume that you register to affect the measurement
        when you use all the data"
    It reads in all the necessary files from a series of results_files and collapses
    across all of them so they can be plotted together
    Inputs:     data_dir
    Output:     data array

    import os
    import numpy as np
    import numpy.lib.recfunctions as rfn
    from glob import glob
    import itertools as it
    from combine_data import combine_data
    from get_b0_orders import get_b0_orders
    from plot_data import plot_data
    from read_in_data import read_in_data

    print '  Question: How does the choice of eddy correct volume affect the measurements?'

    # Find all the results files in all the b0_order folders
    for incl_excl, sep_av, transform, roi_name in it.product(
            incl_excl_list, sep_av_list, transform_list, roi_list):

        # Start off with an empty data array
        data_allorders = None

        b0_orders = get_b0_orders(np.int(6))

        for b0_order in b0_orders:
            glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_6',
                                       'B0_ORDER_{}'.format(b0_order), sep_av,

            files = glob(glob_string)

            dict = {'b0_order': b0_order}

            # Read in every file and combine them
            for file in files:
                data = read_in_data(file)
                data_allorders = combine_data(data_allorders, data, dict)

        # Name the results dir that this is going into:
        results_allorders_dir = os.path.join(data_dir, 'RESULTS', incl_excl,
                                             'B0S_6', 'ALL_ORDERS', sep_av,

        # Now plot the data
        plot_data(data_allorders, results_allorders_dir, roi_name, colors,
def Q_n_b0s(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list, colors, shapes):
    Q_ec_vol_n6 asks the question:
        "How does the number of B0s you include change your measurement?"
    It reads in all the necessary files from a series of results_files and collapses
    across all of them so they can be plotted together
    Inputs:     data_dir
    Output:     data array
    import os
    import numpy as np
    import numpy.lib.recfunctions as rfn
    from glob import glob
    import itertools as it
    from combine_data import combine_data
    from get_b0_orders import get_b0_orders
    from plot_data import plot_data
    from read_in_data import read_in_data
    print '  Question: How does the number of B0s change your measurement'

    # Find all the results files in all the b0_order folders
    for incl_excl, sep_av, transform, roi_name, ec_b0 in it.product(incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list):

        # Start off with an empty data array
        data_allorders_allb0s = None
        for n_b0s in range(1,7):
            b0_orders = get_b0_orders(np.int(n_b0s))

            b0_orders = [ order for order in b0_orders if order[:2] == ec_b0 ]
            for b0_order in b0_orders:
                glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s),
                                        'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name))

                files = glob(glob_string)

                dict = { 'b0_order': b0_order, 'n_b0s' : n_b0s }
                # Read in every file and combine them
                for file in files:
                    data = read_in_data(file)
                    data_allorders_allb0s = combine_data(data_allorders_allb0s, data, dict)
        # Name the results dir that this is going into:
        results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'ALL_B0S',
                                'B0_{}'.format(ec_b0), sep_av, transform)
        # Now plot the data
        plot_data(data_allorders_allb0s, results_allorders_allb0s_dir, roi_name, colors, shapes)
    # Now do the same thing, but with REALLY all the B0s
    # Find all the results files in all the b0_order folders
    for incl_excl, sep_av, transform, roi_name in it.product(incl_excl_list, sep_av_list, transform_list, roi_list):

        # Start off with an empty data array
        data_allorders_allb0s = None
        for n_b0s in range(1,7):
            b0_orders = get_b0_orders(np.int(n_b0s))
            for b0_order in b0_orders:
                glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s),
                                        'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name))

                files = glob(glob_string)

                dict = { 'b0_order': b0_order, 'n_b0s' : n_b0s }
                # Read in every file and combine them
                for file in files:
                    data = read_in_data(file)
                    data_allorders_allb0s = combine_data(data_allorders_allb0s, data, dict)
        # Name the results dir that this is going into:
        results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'ALL_B0S',
                                'ALL_B0S', sep_av, transform)
        # Now plot the data
        plot_data(data_allorders_allb0s, results_allorders_allb0s_dir, roi_name, colors, shapes)