Пример #1
0
def run_consumer():
    "collect data from workers"

    config = {}
    if len(sys.argv) > 1:
        for arg in sys.argv[1:]:
            k, v = arg.split("=")
            if k in config:
                config[k] = v

    print "consumer config:", config

    received_env_count = 1
    context = zmq.Context()
    socket = context.socket(zmq.DEALER)
    socket.setsockopt(zmq.IDENTITY, "aaa")
    socket.connect("tcp://cluster1:77774")

    socket.RCVTIMEO = 1000
    leave = False

    while not leave:

        try:
            result = socket.recv_json(encoding="latin-1")
        except:
            continue

        print "received work result ", received_env_count, " customId: ", result.get(
            "customId", "")

        with open("out-" + str(received_env_count) + ".csv", 'wb') as _:
            writer = csv.writer(_, delimiter=";")

            for data_ in result.get("data", []):
                results = data_.get("results", [])
                orig_spec = data_.get("origSpec", "")
                output_ids = data_.get("outputIds", [])

                if len(results) > 0:
                    writer.writerow([orig_spec.replace("\"", "")])
                    for row in monica_io.write_output_header_rows(
                            output_ids,
                            include_header_row=True,
                            include_units_row=True,
                            include_time_agg=False):
                        writer.writerow(row)

                    for row in monica_io.write_output(output_ids, results):
                        writer.writerow(row)

                writer.writerow([])

        received_env_count = received_env_count + 1

    print "exiting run_consumer()"
Пример #2
0
    def process_message(msg):

        if not hasattr(process_message, "wnof_count"):
            process_message.received_env_count = 0

        leave = False

        if msg["type"] == "finish":
            print "c: received finish message"
            leave = True

        else:
            print "c: received work result ", process_message.received_env_count, " customId: ", str(
                msg.get("customId", ""))

            process_message.received_env_count += 1

            #with open("out/out-" + str(i) + ".csv", 'wb') as _:
            with open(
                    config["out"] + str(process_message.received_env_count) +
                    ".csv", 'wb') as _:
                writer = csv.writer(_, delimiter=",")

                for data_ in msg.get("data", []):
                    results = data_.get("results", [])
                    orig_spec = data_.get("origSpec", "")
                    output_ids = data_.get("outputIds", [])

                    if len(results) > 0:
                        writer.writerow([orig_spec.replace("\"", "")])
                        for row in monica_io.write_output_header_rows(
                                output_ids,
                                include_header_row=True,
                                include_units_row=True,
                                include_time_agg=False):
                            writer.writerow(row)

                        for row in monica_io.write_output(output_ids, results):
                            writer.writerow(row)

                    writer.writerow([])
            if config["leave_after_finished_run"] == True:
                leave = True

        return leave
Пример #3
0
    def process_message(msg):

        if not hasattr(process_message, "wnof_count"):
            process_message.received_env_count = 0

        leave = False

        if msg["type"] == "finish":
            print "c: received finish message"
            leave = True

        else:
            print "c: received work result ", process_message.received_env_count, " customId: ", str(msg.get("customId", ""))

            process_message.received_env_count += 1

            #with open("out/out-" + str(i) + ".csv", 'wb') as _:
            with open(config["out"] + str(process_message.received_env_count) + ".csv", 'wb') as _:
                writer = csv.writer(_, delimiter=",")

                for data_ in msg.get("data", []):
                    results = data_.get("results", [])
                    orig_spec = data_.get("origSpec", "")
                    output_ids = data_.get("outputIds", [])

                    if len(results) > 0:
                        writer.writerow([orig_spec.replace("\"", "")])
                        for row in monica_io.write_output_header_rows(output_ids,
                                                                      include_header_row=True,
                                                                      include_units_row=True,
                                                                      include_time_agg=False):
                            writer.writerow(row)

                        for row in monica_io.write_output(output_ids, results):
                            writer.writerow(row)

                    writer.writerow([])
            if config["leave_after_finished_run"] == True :
                leave = True

        return leave
def main():
    "collect data from workers"

    data = defaultdict(list)

    i = 1
    context = zmq.Context()
    socket = context.socket(zmq.PULL)
    if LOCAL_RUN:
        socket.connect("tcp://localhost:7777")
    else:
        socket.connect("tcp://cluster2:7777")
    socket.RCVTIMEO = 1000
    leave = False
    write_normal_output_files = False
    start_writing_lines_threshold = 270
    while not leave:

        try:
            #result = socket.recv_json()
            result = socket.recv_json(encoding="latin-1")
            #result = socket.recv_string(encoding="latin-1")
            #result = socket.recv_string()
            #print result
            #with open("out/out-latin1.csv", "w") as _:
            #    _.write(result)
            #continue
        except:
            for crop_id, production_situation, period, grcp, climate_resolution, soil_resolution in data.keys(
            ):
                if len(data[(crop_id, production_situation, period, grcp,
                             climate_resolution, soil_resolution)]) > 0:
                    write_data(crop_id, production_situation, period, grcp,
                               climate_resolution, soil_resolution, data)
            continue

        if result["type"] == "finish":
            print "received finish message"
            leave = True

        elif not write_normal_output_files:
            print "received work result ", i, " customId: ", result.get(
                "customId", "")

            custom_id = result["customId"]
            ci_parts = custom_id.split("|")
            crop_id = ci_parts[0]
            climate_resolution = int(ci_parts[1])
            cl_row_, cl_col_ = ci_parts[2][1:-1].split("/")
            cl_row, cl_col = (int(cl_row_), int(cl_col_))
            soil_resolution = int(ci_parts[3])
            s_row_, s_col_ = ci_parts[4][1:-1].split("/")
            s_row, s_col = (int(s_row_), int(s_col_))
            period = ci_parts[5]
            grcp = ci_parts[6]
            gcm = ci_parts[7]
            production_situation = ci_parts[8]

            res = create_output(climate_resolution, cl_row, cl_col,
                                soil_resolution, s_row, s_col, crop_id, period,
                                gcm, result)
            data[(crop_id, production_situation, period, grcp,
                  climate_resolution, soil_resolution)].extend(res)

            if len(data[(crop_id, production_situation, period, grcp,
                         climate_resolution,
                         soil_resolution)]) >= start_writing_lines_threshold:
                write_data(crop_id, production_situation, period, grcp,
                           climate_resolution, soil_resolution, data)

            i = i + 1

        elif write_normal_output_files:
            print "received work result ", i, " customId: ", result.get(
                "customId", "")

            custom_id = result["customId"]
            ci_parts = custom_id.split("|")
            crop_id = ci_parts[0]
            climate_resolution = ci_parts[1]
            cl_row_, cl_col_ = ci_parts[2][1:-1].split("/")
            soil_resolution = ci_parts[3]
            s_row_, s_col_ = ci_parts[4][1:-1].split("/")
            period = ci_parts[5]
            grcp = ci_parts[6]
            production_situation = ci_parts[8]
            file_name = crop_id + "_" + production_situation + "_P" + period + "_GRP" + grcp + "_cr" + climate_resolution + "c" + cl_col_ + "r" + cl_row_ + "xsr" + soil_resolution + "c" + s_col_ + "r" + s_row_

            #with open("out/out-" + str(i) + ".csv", 'wb') as _:
            with open("out/out-" + file_name + ".csv", 'wb') as _:
                writer = csv.writer(_, delimiter=",")

                for data_ in result.get("data", []):
                    results = data_.get("results", [])
                    orig_spec = data_.get("origSpec", "")
                    output_ids = data_.get("outputIds", [])

                    if len(results) > 0:
                        writer.writerow([orig_spec.replace("\"", "")])
                        for row in monica_io.write_output_header_rows(
                                output_ids,
                                include_header_row=True,
                                include_units_row=True,
                                include_time_agg=False):
                            writer.writerow(row)

                        for row in monica_io.write_output(output_ids, results):
                            writer.writerow(row)

                    writer.writerow([])

            i = i + 1
Пример #5
0
def collector():
    "collect data from workers"

    year_data = defaultdict(list)
    crop_data = defaultdict(list)
    soc_data = defaultdict(list)
    pheno_data = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

    with open("out/general_data.csv", 'wb') as _:
        csv_writer = csv.writer(_, delimiter=",")
        csv_writer.writerow(["row_col", "clay", "tavg", "precip", "slope"])

    i = 0
    context = zmq.Context()
    socket = context.socket(zmq.PULL)
    #socket = context.socket(zmq.DEALER)
    #context.setsockopt(zmq.IDENTITY, "ts_sustag_nrw")
    if LOCAL_RUN:
        socket.connect("tcp://localhost:7777")
    else:
        socket.connect("tcp://cluster1:7777")
    socket.RCVTIMEO = 1000
    leave = False
    write_normal_output_files = False
    start_writing_lines_threshold = 1000
    while not leave:

        try:
            result = socket.recv_json()
        except:
            for region_id in crop_data.keys():
                if len(crop_data[region_id]) > 0:
                    write_data(region_id, year_data, crop_data, pheno_data,
                               soc_data, suffix)
            continue

        if result["type"] == "finish":
            print "received finish message"
            leave = True

        elif not write_normal_output_files:
            print "received work result ", i, " customId: ", result.get(
                "customId", ""), " len(year_data): ", len(
                    (year_data.values()[:1] or [[]])[0])

            def True_False_string(str_in):
                out = True
                if str_in.lower() == "false":
                    out = False
                return out

            custom_id = result["customId"]
            ci_parts = custom_id.split("|")
            rotation = ci_parts[0]
            prod_level = ci_parts[1]
            row_, col_ = ci_parts[3][1:-1].split("/")
            row, col = (int(row_), int(col_))
            slope = float(ci_parts[15])

            with open("out/general_data.csv", 'ab') as _:
                csv_writer = csv.writer(_, delimiter=",")
                csv_writer.writerow([
                    "{}{:03d}".format(row,
                                      col), result["data"][0]["results"][0][0],
                    result["data"][0]["results"][1][0],
                    result["data"][0]["results"][2][0], slope
                ])

            continue

            custom_id = result["customId"]
            ci_parts = custom_id.split("|")
            rotation = ci_parts[0]
            prod_level = ci_parts[1]
            row_, col_ = ci_parts[3][1:-1].split("/")
            row, col = (int(row_), int(col_))
            region_id = ci_parts[4]
            use_secondary_yields = True_False_string(ci_parts[6])
            start_recording_out = int(ci_parts[7])
            if SOC_STUDY:
                start_recording_out = 2045  #for SOC study, crop out is used only to check setup correctness
            residue_humus_balance = True_False_string(ci_parts[8])
            suffix = ci_parts[9]
            KA5_txt = ci_parts[10]
            soil_type = ci_parts[11]
            p_id = ci_parts[12]
            orgNkreis = ci_parts[13]
            unique_id = ci_parts[14]

            soc_res = [[]]
            row_col = "{}{:03d}".format(row, col)
            soc_res[0].append(str(row_col))
            soc_res[0].append(str(rotation))
            soc_res[0].append(str((p_id)))
            soc_res[0].append(str((KA5_txt)))
            soc_res[0].append(str((soil_type)))
            soc_res[0].append(str((orgNkreis)))
            soc_res[0].append(str((unique_id)))

            for data in result.get("data", []):
                results = data.get("results", [])
                orig_spec = data.get("origSpec", "")
                output_ids = data.get("outputIds", [])
                if len(results) > 0:
                    if orig_spec == '"yearly"':
                        if SOC_STUDY:
                            continue
                        else:
                            res = create_year_output(output_ids, row, col,
                                                     rotation, prod_level,
                                                     results,
                                                     start_recording_out,
                                                     KA5_txt, soil_type)
                            year_data[region_id].extend(res)
                    elif orig_spec == '"crop"':
                        res = create_crop_output(output_ids, row, col,
                                                 rotation, prod_level, results,
                                                 use_secondary_yields,
                                                 start_recording_out,
                                                 residue_humus_balance)
                        crop_data[region_id].extend(res)
                    elif re.search('from', orig_spec):
                        #only SOC is recorded with "from" "to"
                        soc_res[0].append(results[0][0] * 100)
                    #if re.search('anthesis', orig_spec) or re.search('maturity', orig_spec) or re.search('Harvest', orig_spec):
                    #    update_pheno_output(output_ids, row, col, rotation, prod_level, results, pheno_data, region_id)
            soc_data[region_id].extend(soc_res)

            for region_id in crop_data.keys():
                if len(crop_data[region_id]) > start_writing_lines_threshold:
                    write_data(region_id, year_data, crop_data, pheno_data,
                               soc_data, suffix)

            i = i + 1

        elif write_normal_output_files:
            print "received work result ", i, " customId: ", result.get(
                "customId", "")

            with open("out/out-" + str(i) + ".csv", 'wb') as _:
                writer = csv.writer(_, delimiter=",")

                for data in result.get("data", []):
                    results = data.get("results", [])
                    orig_spec = data.get("origSpec", "")
                    output_ids = data.get("outputIds", [])

                    if len(results) > 0:
                        writer.writerow([orig_spec])
                        for row in monica_io.write_output_header_rows(
                                output_ids,
                                include_header_row=True,
                                include_units_row=True,
                                include_time_agg=False):
                            writer.writerow(row)

                        for row in monica_io.write_output(output_ids, results):
                            writer.writerow(row)

                    writer.writerow([])

            i = i + 1
Пример #6
0
def main():
    "consume data from workers"

    config = {
        "port": 7777,
    }
    if len(sys.argv) > 1:
        for arg in sys.argv[1:]:
            k, v = arg.split("=")
            if k in config:
                config[k] = int(v)

    year_data = defaultdict(list)
    crop_data = defaultdict(list)
    pheno_data = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

    i = 0
    context = zmq.Context()
    socket = context.socket(zmq.PULL)
    if LOCAL_RUN:
        socket.connect("tcp://localhost:" + str(config["port"]))
    else:
        socket.connect("tcp://cluster" + str(RUN_ON_CLUSTER) + ":" +
                       str(config["port"]))

    socket.RCVTIMEO = 1000
    leave = False
    write_normal_output_files = False
    start_writing_lines_threshold = 1000
    while not leave:

        try:
            result = socket.recv_json()
        except:
            for region_id in crop_data.keys():
                if len(crop_data[region_id]) > 0:
                    write_data(region_id, crop_data)
            continue

        if result["type"] == "finish":
            print "received finish message"
            leave = True

        elif not write_normal_output_files:
            print "received work result ", i, " customId: ", result.get(
                "customId", ""), " len(crop_data): ", len(
                    (crop_data.values()[:1] or [[]])[0])

            custom_id = result["customId"]
            ci_parts = custom_id.split("|")
            rotation = ci_parts[0]
            soil_id = ci_parts[1]
            row, col = map(int, ci_parts[2][1:-1].split("/"))
            region_id = ci_parts[3]

            for data in result.get("data", []):
                results = data.get("results", [])
                orig_spec = data.get("origSpec", "")
                output_ids = data.get("outputIds", [])
                if len(results) > 0:
                    if orig_spec == '"crop"':
                        res = create_crop_output(output_ids, row, col,
                                                 rotation, results)
                        crop_data[region_id].extend(res)

            for region_id in crop_data.keys():
                if len(crop_data[region_id]) > start_writing_lines_threshold:
                    write_data(region_id, crop_data)

            i = i + 1

        elif write_normal_output_files:
            print "received work result ", i, " customId: ", result.get(
                "customId", "")

            with open("out/out-" + str(i) + ".csv", 'wb') as _:
                writer = csv.writer(_, delimiter=",")

                for data in result.get("data", []):
                    results = data.get("results", [])
                    orig_spec = data.get("origSpec", "")
                    output_ids = data.get("outputIds", [])

                    if len(results) > 0:
                        writer.writerow([orig_spec])
                        for row in monica_io.write_output_header_rows(
                                output_ids,
                                include_header_row=True,
                                include_units_row=True,
                                include_time_agg=False):
                            writer.writerow(row)

                        for row in monica_io.write_output(output_ids, results):
                            writer.writerow(row)

                    writer.writerow([])

            i = i + 1
def run_consumer(server = {"server": None, "port": None, "nd-port": None}, path_to_output_dir=None):
    "collect data from workers"

    config = {
        "user": "******",
        "port": server["port"] if server["port"] else "77773",
        "no-data-port": server["nd-port"] if server["nd-port"] else "5555",
        "server": server["server"] if server["server"] else "localhost"
    }
    if len(sys.argv) > 1:
        for arg in sys.argv[1:]:
            k,v = arg.split("=")
            if k in config:
                config[k] = v

    paths = PATHS[config["user"]]
    if path_to_output_dir:
        paths["local-path-to-output-dir"] = path_to_output_dir

    try:
        os.makedirs(paths["local-path-to-output-dir"])
    except:
        pass

    data = defaultdict(list)

    print "consumer config:", config

    received_env_count = 1
    context = zmq.Context()
    socket = context.socket(zmq.PULL)
    
    #connect producer and consumer directly
    prod_cons_socket = context.socket(zmq.PULL)
    prod_cons_socket.connect("tcp://localhost:" + config["no-data-port"])
    expected_stats = None
    expected_stats = prod_cons_socket.recv_json(encoding="latin-1")


    if LOCAL_RUN:
        socket.connect("tcp://localhost:" + config["port"])
    else:
        socket.connect("tcp://" + config["server"] + ":" + config["port"])
    socket.RCVTIMEO = 1000
    leave = False
    write_calibration_output_files = True
    

    #for calib out
    to_write = []
    header = ["Station", "Date", "Stage"]
    to_write.append(header)

    while not leave:

        try:
            result = socket.recv_json(encoding="latin-1")
        except:
            continue

        if write_calibration_output_files:

            if result.get("type", "") in ["jobs-per-cell", "no-data", "target-grid-metadata"]:
                print "ignoring", result.get("type", "")
                continue

            print "received work result ", received_env_count, " customId: ", result.get("customId", "")

            custom_id = result["customId"]
            station = custom_id["station_id"]
            
            #save expected station and leave when they are all simulated            
            expected_stats.remove(station)
            print ("missing stations: " + str(expected_stats))
            if len(expected_stats) == 0:
                leave = True
            
            
            for data_ in result.get("data", []):
                results = data_.get("results", [])
                orig_spec = data_.get("origSpec", "")
                output_ids = data_.get("outputIds", [])
                row = []
                row.append(str(station))
                row.append(orig_spec)
                row.append(str(results[0][0]))
                to_write.append(row)
            

            received_env_count = received_env_count + 1
        
        else: #normal files

            if result.get("type", "") in ["jobs-per-cell", "no-data", "target-grid-metadata"]:
                print "ignoring", result.get("type", "")
                continue

            print "received work result ", received_env_count, " customId: ", result.get("customId", "")

            custom_id = result["customId"]
            ci_parts = custom_id.split("|")
            station = int(ci_parts[0])
            
            
            #with open("out/out-" + str(i) + ".csv", 'wb') as _:
            with open("out/out-" + custom_id.replace("|", "_") + ".csv", 'wb') as _:
                writer = csv.writer(_, delimiter=",")

                for data_ in result.get("data", []):
                    results = data_.get("results", [])
                    orig_spec = data_.get("origSpec", "")
                    output_ids = data_.get("outputIds", [])

                    if len(results) > 0:
                        writer.writerow([orig_spec.replace("\"", "")])
                        for row in monica_io.write_output_header_rows(output_ids,
                                                                      include_header_row=True,
                                                                      include_units_row=True,
                                                                      include_time_agg=False):
                            writer.writerow(row)

                        for row in monica_io.write_output(output_ids, results):
                            writer.writerow(row)

                    writer.writerow([])

            received_env_count = received_env_count + 1
    
    if write_calibration_output_files:
        print("writing out file for pheno cal...")
        with open("out/calib_out.csv", "wb") as out_file:
            writer = csv.writer(out_file)
            for row in to_write:
                writer.writerow(row)
                

    print "exiting run_consumer()"
Пример #8
0
def main():
    "collect data from workers"

    data = defaultdict(dict)
    next_row = 0

    with open(PATHS[USER]["PATH_TO_SOIL_DIR"] +
              "buek1000_100_gk5.asc") as template_grid_file:

        def row_to_data_col_count(row, sub=None):
            global tgf_at_row
            global rtd_col_count
            global template_np_grid
            scols = 3653
            srows = 5001
            if tgf_at_row < 0:
                template_np_grid = np.full((srows, scols),
                                           -9999,
                                           dtype=np.int32)
                for _ in range(0, 6):
                    template_grid_file.next()

            for r in xrange(tgf_at_row, row):

                line = template_grid_file.next()
                tgf_at_row += 1

                col = -1
                count = 0
                for col_str in line.strip().split(" "):
                    col += 1
                    if int(col_str) == -9999:
                        continue
                    template_np_grid[tgf_at_row, col] = int(col_str)
                    count += 1

                rtd_col_count[tgf_at_row] = count

            if row in rtd_col_count:
                if sub:
                    rtd_col_count[row] -= sub
                else:
                    return rtd_col_count[row]

            return None

        i = 1
        context = zmq.Context()
        socket = context.socket(zmq.PULL)
        if LOCAL_RUN:
            socket.connect("tcp://localhost:7777")
        else:
            socket.connect("tcp://cluster2:17777")
        socket.RCVTIMEO = 1000
        leave = False
        write_normal_output_files = False
        #start_writing_lines_threshold = 270
        while not leave:

            try:
                result = socket.recv_json(encoding="latin-1")
            except:
                #for crop_id, production_situation, period, grcp, climate_resolution, soil_resolution in data.keys():
                #    if len(data[(crop_id, production_situation, period, grcp, climate_resolution, soil_resolution)]) > 0:
                #        write_data(crop_id, production_situation, period, grcp, climate_resolution, soil_resolution, data)
                continue

            if result["type"] == "finish":
                print "received finish message"
                leave = True

            elif not write_normal_output_files:
                print "received work result ", i, " customId: ", result.get(
                    "customId", "")

                custom_id = result["customId"]
                ci_parts = custom_id.split("|")
                crow, ccol = map(int, ci_parts[0][1:-1].split("/"))
                srow, scol = map(int, ci_parts[1][1:-1].split("/"))

                data[srow][scol] = create_output(result)
                row_to_data_col_count(srow, sub=1)

                while row_to_data_col_count(next_row) == 0:
                    write_row_to_grids(data, template_np_grid, next_row,
                                       data[srow][scol].keys())
                    next_row += 1

                i = i + 1

            elif write_normal_output_files:
                print "received work result ", i, " customId: ", result.get(
                    "customId", "")

                custom_id = result["customId"]
                ci_parts = custom_id.split("|")
                crow, ccol = map(int, ci_parts[0][1:-1].split("/"))
                srow, scol = map(int, ci_parts[1][1:-1].split("/"))
                file_name = "crow" + str(crow) + "-ccol" + str(
                    ccol) + "-srow" + str(srow) + "-scol" + str(scol)

                with open("out/out-" + file_name + ".csv", 'wb') as _:
                    writer = csv.writer(_, delimiter=",")

                    for data_ in result.get("data", []):
                        results = data_.get("results", [])
                        orig_spec = data_.get("origSpec", "")
                        output_ids = data_.get("outputIds", [])

                        if len(results) > 0:
                            writer.writerow([orig_spec.replace("\"", "")])
                            for row in monica_io.write_output_header_rows(
                                    output_ids,
                                    include_header_row=True,
                                    include_units_row=True,
                                    include_time_agg=False):
                                writer.writerow(row)

                            for row in monica_io.write_output(
                                    output_ids, results):
                                writer.writerow(row)

                        writer.writerow([])

                i = i + 1
def run_consumer():
    "collect input_data from workers"

    config = {
        "user": "******",
        "port": "77776",
        "server": "localhost"
    }
    if len(sys.argv) > 1:
        for arg in sys.argv[1:]:
            k,v = arg.split("=")
            if k in config:
                config[k] = v

    paths = PATHS[config["user"]]

    received_env_count = 1
    context = zmq.Context()
    socket = context.socket(zmq.PULL)
    socket.connect("tcp://" + config["server"] + ":" + config["port"])
    socket.RCVTIMEO = 1000
    leave = False
    write_normal_output_files = False

    while not leave:

        try:
            result = socket.recv_json(encoding="latin-1")
        except:
            continue

        if result["type"] == "finish":
            print("Received finish message")
            leave = True

        elif not write_normal_output_files:



            print("Received work result 2 - ", received_env_count, " customId: ", result["customId"])
            write_agmip_calibration_output_file(result)
            received_env_count += 1

        elif write_normal_output_files:

            #print("Received work result 1 - ", received_env_count, " customId: ", result["customId"])
            #write_agmip_calibration_output_file(result)
            print("\n")
            print ("received work result ", received_env_count, " customId: ", str(result.get("customId", "").values()))


            custom_id = result["customId"]
            output_file = custom_id["sim_files"] + custom_id["output_filename"]
            print(result)

            print("Write output file:", output_file)
            with open(output_file, 'wb') as _:
                writer = csv.writer(_, delimiter=",")

                for data_ in result.get("data", []):
                    print("Data", data_)

                    results = data_.get("results", [])
                    orig_spec = data_.get("origSpec", "")
                    output_ids = data_.get("outputIds", [])
                    print("Results:", results)
                    if len(results) > 0:
                        writer.writerow([orig_spec.replace("\"", "")])
                        for row in monica_io.write_output_header_rows(output_ids,
                                                                      include_header_row=True,
                                                                      include_units_row=False,
                                                                      include_time_agg=False):
                            writer.writerow(row)

                        for row in monica_io.write_output(output_ids, results):
                            #print(row)
                            writer.writerow(row)

                    writer.writerow([])


            received_env_count += 1
Пример #10
0
def collector():
    "collect data from workers"

    year_data = defaultdict(list)
    crop_data = defaultdict(list)
    pheno_data = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

    i = 0
    context = zmq.Context()
    #socket = context.socket(zmq.PULL)
    socket = context.socket(zmq.DEALER)
    socket.setsockopt(zmq.IDENTITY, "ts_sustag_nrw")
    if LOCAL_RUN:
        socket.connect("tcp://localhost:77773")
    else:
        socket.connect("tcp://cluster1:7777")
    socket.RCVTIMEO = 1000
    leave = False
    write_normal_output_files = False
    start_writing_lines_threshold = 1000
    while not leave:

        try:
            result = socket.recv_json()
        except:
            for region_id in year_data.keys():
                if len(year_data[region_id]) > 0:
                    write_data(region_id, year_data, crop_data, pheno_data, suffix)
            continue

        if result["type"] == "finish":
            print "received finish message"
            leave = True

        elif not write_normal_output_files:
            print "received work result ", i, " customId: ", result.get("customId", ""), " len(year_data): ", len((year_data.values()[:1] or [[]])[0])

            def True_False_string(str_in):
                out = True
                if str_in.lower() == "false":
                    out = False
                return out

            custom_id = result["customId"]
            ci_parts = custom_id.split("|")
            rotation = ci_parts[0]
            prod_level = ci_parts[1]
            row_, col_ = ci_parts[3][1:-1].split("/")
            row, col = (int(row_), int(col_))
            region_id = ci_parts[4]
            use_secondary_yields = True_False_string(ci_parts[6])
            start_recording_out = int(ci_parts[7])
            residue_humus_balance = True_False_string(ci_parts[8])
            suffix = ci_parts[9]
            KA5_txt = ci_parts[10]
            soil_type = ci_parts[11]

            for data in result.get("data", []):
                results = data.get("results", [])
                orig_spec = data.get("origSpec", "")
                output_ids = data.get("outputIds", [])
                if len(results) > 0:
                    if orig_spec == '"yearly"':
                        res = create_year_output(output_ids, row, col, rotation, prod_level, results, start_recording_out, KA5_txt, soil_type)
                        year_data[region_id].extend(res)
                    elif orig_spec == '"crop"':
                        res = create_crop_output(output_ids, row, col, rotation, prod_level, results, use_secondary_yields, start_recording_out, residue_humus_balance)
                        crop_data[region_id].extend(res)
                    #if re.search('anthesis', orig_spec) or re.search('maturity', orig_spec) or re.search('Harvest', orig_spec):
                    #    update_pheno_output(output_ids, row, col, rotation, prod_level, results, pheno_data, region_id)


            for region_id in year_data.keys():
                if len(year_data[region_id]) > start_writing_lines_threshold:
                    write_data(region_id, year_data, crop_data, pheno_data, suffix)

            i = i + 1

        elif write_normal_output_files:
            print "received work result ", i, " customId: ", result.get("customId", "")

            with open("out/out-" + str(i) + ".csv", 'wb') as _:
                writer = csv.writer(_, delimiter=",")

                for data in result.get("data", []):
                    results = data.get("results", [])
                    orig_spec = data.get("origSpec", "")
                    output_ids = data.get("outputIds", [])

                    if len(results) > 0:
                        writer.writerow([orig_spec])
                        for row in monica_io.write_output_header_rows(output_ids,
                                                                      include_header_row=True,
                                                                      include_units_row=True,
                                                                      include_time_agg=False):
                            writer.writerow(row)

                        for row in monica_io.write_output(output_ids, results):
                            writer.writerow(row)

                    writer.writerow([])

            i = i + 1