def main():
    global configuration_directory
    global do_keep_gen_files
    global tmp_folder

    # obtaining command line arguments for path to config directory
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-c', dest='configuration_directory_path',
        default=default_configuration_directory,
        required=False,
        help='Specify the path to the configuration directory')

    # read the optional argument `-k` for keeping the generated files
    parser.add_argument(
        '-k', '--keep',
        default=default_do_keep_gen_files,
        required=False,
        help = 'Specify `yes` to preserve the files generated during execution')


    args = vars(parser.parse_args())
    configuration_directory = args['configuration_directory_path'] + '/'
    do_keep_gen_files       = False if args['keep'] is None else True

    #setup_json = configuration_directory + 'setup.json'
    global setup
    setup = gsm_lib.read_config(configuration_directory, 'setup.json')
    site_catalog_file = configuration_directory + setup['site_catalog']
    system_log_file = setup['system_log_file']

    # Configure logging
    global gsmlogger
    gsmlogger = GSMLogger()
    gsmlogger.configure_logging(system_log_file)

    # Initialize Redcap Interface
    rt = redcap_transactions()
    rt.configuration_directory = configuration_directory

    properties = rt.init_redcap_interface(setup, gsmlogger.logger)
    transform_xsl = configuration_directory + setup['xml_formatting_tranform_xsl']
    #get data from the redcap for the fields listed in the source_data_schema.xml
    response = rt.get_data_from_redcap(properties, gsmlogger.logger)

    #XSL Transformation 1: This transformation removes junk data, rename elements and extracts site_id and adds new tag site_id
    xml_tree = etree.fromstring(response)
    xslt = etree.parse(transform_xsl)
    transform = etree.XSLT(xslt)
    xml_transformed = transform(xml_tree)
    xml_str = etree.tostring(xml_transformed, method='xml', pretty_print=True)

    #XSL Transformation 2: This transformation groups the data based on site_id
    transform2_xsl = proj_root + 'bin/utils/groupby_siteid_transform.xsl'
    xslt = etree.parse(transform2_xsl)
    transform = etree.XSLT(xslt)
    xml_transformed2 = transform(xml_transformed)

    #XSL Transformation 3: This transformation removes all the nodes which are not set
    transform3_xsl = proj_root + 'bin/utils/remove_junktags_transform.xsl'
    xslt = etree.parse(transform3_xsl)
    transform = etree.XSLT(xslt)
    xml_transformed3 = transform(xml_transformed2)

    #Prettifying the output generated by XSL Transformation
    xml_str2 = etree.tostring(xml_transformed3, method='xml', pretty_print=True)
    tree = etree.fromstring(xml_str2, etree.XMLParser(remove_blank_text=True))

    # Loop through the start_date elements and update theur values
    for k in tree.iter('start_date'):
        d = datetime.datetime.strptime(k.text, "%Y-%m-%d").date()-timedelta(days=180)
        k.text = str(d)

    #writing data to smi+site_id.xml. This xml will be saved to sftp of the site as smi.xml
    smi_filenames = []
    smi_ids = []
    tmp_folder = gsm_lib.get_temp_path(do_keep_gen_files)

    for k in tree:
        file_name = tmp_folder + 'smi' + k.attrib['id']+'.xml'
        gsm_lib.write_element_tree_to_file(ET.ElementTree(k), file_name)
        smi_filenames.append(file_name)
        smi_ids.append(k.attrib['id'])

    print 'Using smi_filenames: '
    pprint.pprint(smi_filenames)
    parse_site_details_and_send(site_catalog_file, smi_filenames, smi_ids, gsmlogger)
def main():
    global configuration_directory
    global do_keep_gen_files

    # obtaining command line arguments for path to config directory
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        dest="configuration_directory_path",
        default=default_configuration_directory,
        required=False,
        help="Specify the path to the configuration directory",
    )

    # read the optional argument `-k` for keeping the generated files
    parser.add_argument(
        "-k",
        "--keep",
        default=default_do_keep_gen_files,
        required=False,
        help="Specify `yes` to preserve the files generated during execution",
    )

    args = vars(parser.parse_args())
    configuration_directory = args["configuration_directory_path"] + "/"
    do_keep_gen_files = False if args["keep"] is None else True

    # read setup options
    global setup
    setup = gsm_lib.read_config(configuration_directory, "setup.json")
    site_catalog_file = configuration_directory + setup["site_catalog"]
    system_log_file = setup["system_log_file"]

    # Configure logging
    global gsmlogger
    gsmlogger = GSMLogger()
    gsmlogger.configure_logging(system_log_file)

    # Initialize Redcap Interface
    rt = redcap_transactions()
    rt.configuration_directory = configuration_directory

    properties = rt.init_redcap_interface(setup, gsmlogger.logger)
    # gets data from the person index for the fields listed in the source_data_schema.xml
    response = rt.get_data_from_redcap(properties, gsmlogger.logger)
    xml_tree = etree.fromstring(response)

    # XSL Transformation : transforms the person_index data
    transform_xsl = proj_root + "bin/utils/person_index_transform.xsl"
    xslt = etree.parse(transform_xsl)
    transform = etree.XSLT(xslt)
    person_index_data = transform(xml_tree)

    # # # retrieve smi.xml from the sftp server
    smi_path = get_smi_and_parse(site_catalog_file)
    if not os.path.exists(smi_path):
        raise GSMLogger().LogException("Error: file " + smi_path + " not found")
    else:
        smi = open(smi_path, "r")
    # Below code merges the 2 xmls
    smi_data = etree.parse(smi_path)
    # sorting both the xml files.
    gsm_lib.sort_element_tree(smi_data)
    gsm_lib.sort_element_tree(person_index_data)
    # generating the person index dictionary
    person_index_dict = {}
    for item in person_index_data.iter("item"):
        person_index_dict[item.findtext("research_subject_id")] = [
            item.findtext("yob"),
            item.findtext("mrn"),
            item.findtext("facility_code"),
        ]
    # iterate through the smi data and generate a
    # new merged xml's for subject_map and subject_map_exceptions
    subjectmap_root = etree.Element("subject_map_records")
    subjectmap_exceptions_root = etree.Element("subject_map_exception_records")
    exceptions = False
    for item in smi_data.iter("item"):
        if item.findtext("research_subject_id") in person_index_dict.keys():
            gsmlogger.logger.debug("Processing research_subject_id %s", item.findtext("research_subject_id"))
            if person_index_dict[item.findtext("research_subject_id")][0] == item.findtext("yob"):
                gsmlogger.logger.debug("yob matched for research_subject_id %s", item.findtext("research_subject_id"))
                mrn = etree.SubElement(item, "mrn")
                mrn.text = person_index_dict[item.findtext("research_subject_id")][1]
                facility_code = etree.SubElement(item, "facility_code")
                facility_code.text = person_index_dict[item.findtext("research_subject_id")][2]
                item.remove(item.find("yob"))
                subjectmap_root.append(item)

            else:
                gsmlogger.logger.debug(
                    "yob not matched for research_subject_id %s", item.findtext("research_subject_id")
                )
                exception_item = etree.Element("item")
                research_subject_id = etree.SubElement(exception_item, "research_subject_id")
                research_subject_id.text = item.findtext("research_subject_id")
                if research_subject_id.text is not None:
                    exceptions = True
                pi_yob = etree.SubElement(exception_item, "Person_Index_YOB")
                pi_yob.text = person_index_dict[item.findtext("research_subject_id")][0]
                hcvt_yob = etree.SubElement(exception_item, "HCVTarget_YOB")
                hcvt_yob.text = item.findtext("yob")
                subjectmap_exceptions_root.append(exception_item)

    # Below code transforms the xml files to csv files
    transform_xsl = proj_root + "bin/utils/xml2csv.xsl"
    xslt = etree.parse(transform_xsl)
    transform = etree.XSLT(xslt)

    tmp_folder = gsm_lib.get_temp_path(do_keep_gen_files)
    subject_map_file = tmp_folder + "subject_map.csv"
    gsmlogger.logger.info("Using path subject map file path: " + subject_map_file)

    try:
        subject_map_csv = open(subject_map_file, "w")
        subject_map_csv.write("%s" % '"research_subject_id","start_date","end_date","mrn","facility_code"\n')

        for item in subjectmap_root.iter("item"):
            line = '"{0}","{1}","{2}","{3}","{4}"\n'.format(
                gsm_lib.handle_blanks(item.findtext("research_subject_id")),
                gsm_lib.handle_blanks(item.findtext("start_date")),
                gsm_lib.handle_blanks(item.findtext("end_date")),
                gsm_lib.handle_blanks(item.findtext("mrn")),
                gsm_lib.handle_blanks(item.findtext("facility_code")),
            )
            subject_map_csv.write("%s" % line)

        subject_map_csv.close()
    except IOError:
        raise GSMLogger().LogException("Could not open file %s for write", subject_map_file)

    # remove the smi.xml from the folder because the XSLT process
    # writes data to smi.xml
    try:
        os.remove(smi_path)
    except OSError:
        raise GSMLogger().LogException("Could not remove file %s ", smi_path)

    # send the subject_map.csv to EMR team (sftp server)
    parse_site_details_and_send(site_catalog_file, subject_map_file, "sftp")
    if do_keep_gen_files:
        print " * Keeping the temporary file: " + subject_map_file
    else:
        print " * Removing the temporary file: " + subject_map_file
        os.remove(subject_map_file)

    # send subject_map_exceptions.csv as email attachment
    if exceptions:
        subject_map_exceptions_file = tmp_folder + "subject_map_exceptions.csv"
        try:
            subject_map_exceptions_csv = open(subject_map_exceptions_file, "w")
        except IOError:
            raise GSMLogger().LogException("Could not open file %s for write", subject_map_exceptions_file)

        subject_map_exceptions_csv.write("%s" % '"research_subject_id","person_index_yob","redcap_yob"\n')
        for item in subjectmap_exceptions_root.iter("item"):
            line = '"{0}","{1}","{2}"\n'.format(
                gsm_lib.handle_blanks(item.find("research_subject_id").text),
                gsm_lib.handle_blanks(item.find("Person_Index_YOB").text),
                gsm_lib.handle_blanks(item.find("HCVTarget_YOB").text),
            )
            subject_map_exceptions_csv.write("%s" % line)
        subject_map_exceptions_csv.close()

        parse_site_details_and_send(site_catalog_file, subject_map_exceptions_file, "email")
        if do_keep_gen_files:
            print " * Keeping the temporary file: " + subject_map_exceptions_file
        else:
            print " * Removing the temporary file: " + subject_map_exceptions_file
            os.remove(subject_map_exceptions_file)