Пример #1
0
def ARCGIS(workspace): 
    """
    Set workspace (where datasets to be processed are located) below
    """
    arcpy.env.workspace = workspace
    ws = arcpy.env.workspace
    print 'ws: ',ws
    """
    Get ArcGIS Install Location
    """
    ARCGIS_INSTALL_LOCATION = arcpy.GetInstallInfo()['InstallDir']

    """
    Get location of XSL Translator to go from ArcGIS to FGDC
    """
    FGDC_XSL_FILE = ARCGIS_INSTALL_LOCATION + "Metadata\\Translator\\Transforms\\ArcGIS2FGDC.xsl"


    """
    Set output location for OGP metadata and log file, then instantiate Logger
    """
    if os.path.exists(ws+"\\output\\"):
        OUTPUT_LOCATION = ws+"\\output\\"
    else:
        os.mkdir(ws+"\\output\\")
        OUTPUT_LOCATION = ws+"\\output\\"
    print 'output location: ',OUTPUT_LOCATION
    d=datetime.today()
    LOG_NAME = "OGP_MD_LOG_" + d.strftime("%y%m%d%M%S") + ".txt"
    sys.stdout = Logger(OUTPUT_LOCATION,LOG_NAME)

    files = arcpy.ListFiles()
    datasets = arcpy.ListDatasets()
    fields = ['LayerId', 'Name', 'CollectionId', 'Institution', 'InstitutionSort',
              'Access', 'DataType', 'DataTypeSort', 'Availability', 'LayerDisplayName',
              'LayerDisplayNameSort', 'Publisher', 'PublisherSort', 'Originator',
              'OriginatorSort', 'ThemeKeywords', 'PlaceKeywords', 'Abstract', 'MaxY',
              'MinY', 'MinX', 'MaxX', 'CenterX', 'CenterY', 'HalfWidth', 'HalfHeight',
              'Area', 'ContentDate','Location']

    if os.path.exists(ws + "\\temp.xml"):
        print "removed temp xml file"
        os.remove(ws + "\\temp.xml")

    """
    Set UTC timezone for ContentDate field
    """
    utc = pytz.utc

    for i in datasets[:5]:
        """
        start the clock! obviously not at all necessary 
        """
        start = clock()

        """
         converting metadata from ArcGIS to FGDC using XSL transformation (super fast) and
         writing it to a temporary XML file that is then parsed and translated into
         final OGP format
        """
        print "======================================"
        print "Current item is " + i 
        print "======================================\n"
       
        print "Converting ArcGIS metadata into temporary FGDC XML file"
        
        XSLtrans = arcpy.XSLTransform_conversion(i,FGDC_XSL_FILE,"temp.xml")
        
        print XSLtrans.getMessages(),'\n'
        
        FGDCtree=ElementTree()
        root = FGDCtree.parse(ws + "\\temp.xml")
             
        DATATYPE = dataTypeParseFGDC(root)
        DATATYPESORT = DATATYPE

        #create string representation of FGDC md to be appended at end of OGP md
        FGDC_TEXT = ET.tostring(root)

        #layerID equals filename sans extension 
        LAYERID = i[:i.find(".")]

        #name equals FGDC title field
        #NAME = root.find("idinfo/citation/citeinfo/title").text
        try:
            NAME = root.findtext("idinfo/citation/citeinfo/title")
        except AttributeError as e:
            print "Name field doesn't exist! Setting to UNKNOWN for now"
            NAME = "UNKNOWN"
            
        COLLECTIONID = "initial collection"
        
        INSTITUTION = "University of Minnesota"
        INSTITUTIONSORT = "University of Minnesota"
        
        try:
            ACCESS = root.find("idinfo/accconst").text
        except AttributeError as e:
            print "Access field doesn't exist! Setting to UNKNOWN for now"        
            ACCESS = "UNKNOWN"

        AVAILABILITY = "Online"

        LAYERDISPLAYNAME = NAME
        LAYERDISPLAYNAMESORT = NAME
        try:
            #PUBLISHER = root.find("idinfo/citation/citeinfo/pubinfo/publish").text
            PUBLISHER = root.findtext("idinfo/citation/citeinfo/pubinfo/publish")
        except AttributeError as e:
            print "Publisher field doesn't exist! Setting to UNKNOWN for now"
            PUBLISHER = "UNKNOWN"
        finally:
            PUBLISHERSORT = PUBLISHER
        
        try:
            ORIGINATOR = root.find("idinfo/citation/citeinfo/origin").text
        except AttributeError as e:
            print "Originator field doesn't exist! Setting to UNKNOWN for now"
            ORIGINATOR = "UNKNOWN"
        finally:
            ORIGINATORSORT = ORIGINATOR
        #Combine multiple Keyword elements into a string rep

        THEMEKEYWORDS= keywordParse(FGDCtree,"themekey")
        PLACEKEYWORDS = keywordParse(FGDCtree,"placekey")
        
        try:
            ABSTRACT = root.find("idinfo/descript/abstract").text
        except AttributeError as e:
            print "No abstract found. Setting to UNKNOWN for now"
            ABSTRACT = "UNKNOWN"
        
        try:
            MINX=root.find("idinfo/spdom/bounding/westbc").text
            MINY=root.find("idinfo/spdom/bounding/southbc").text
            MAXX=root.find("idinfo/spdom/bounding/eastbc").text
            MAXY=root.find("idinfo/spdom/bounding/northbc").text
        except AttributeError as e:
            print "extent information not found!"
            
        
        #SRSPROJECTIONCODE=
        #try:
        if root.find("idinfo/timeperd/timeinfo/sngdate/caldate") is not None:
            dateText = root.find("idinfo/timeperd/timeinfo/sngdate/caldate").text
            year = int(dateText[0:4])
            month = int(dateText[4:6])
            day = int(dateText[6:8])
            date = datetime(year,month,day)
            CONTENTDATE = date.isoformat() + "Z"
        elif root.find("idinfo/timeperd/timeinfo/rngdates/begdate") is not None:
            dateText = root.find("idinfo/timeperd/timeinfo/rngdates/begdate").text
            year = int(dateText[0:4])
            month = int(dateText[4:6])
            day = int(dateText[6:8])
            date = datetime(year,month,day)
            CONTENTDATE = date.isoformat() + "Z"
        #except AttributeError as e:
        #    print "No content date found! setting to UNKNOWN for now"
        #    CONTENTDATE = "UNKNOWN"


        """
        LOCATION
        <field name="Location">
        {
        "ArcGIS93Rest": ["(the url for the REST service)"],
        "tilecache": ["(the url for the tilecache, which I believe ArcGIS server does not have)"],
        "download": "(downloading URL for the layer)","wfs": "(WFS URL, if has one)"
        }
        </field>
        """
##        #base URL for all images
##        REST_URL_BASE = "http://lib-geo1.lib.umn.edu:6080/arcgis/rest/services/OGP/mosaic_ogp_4/ImageServer/exportImage?"
##
##        #parameters to pass to URL, in the form of a dictionary
##        DOWNLOAD_URL_PARAMS = {}
##        DOWNLOAD_URL_PARAMS['bbox'] = MINX+','+MINY+','+MAXX+','+MAXY
##        DOWNLOAD_URL_PARAMS['bboxSR'] = '4269'
##        DOWNLOAD_URL_PARAMS['imageSR'] = '26915'
##        DOWNLOAD_URL_PARAMS['format'] = 'tiff'
##        DOWNLOAD_URL_PARAMS['pixelType'] = 'U8'
##        DOWNLOAD_URL_PARAMS['size'] = '3000,3000'
##        DOWNLOAD_URL_PARAMS['restrictUTurns'] = 'esriNoDataMatchAny'
##        DOWNLOAD_URL_PARAMS['interpolation'] = 'RSP_BilinearInterpolation'
##        cursor = arcpy.SearchCursor(u"\\mosaic245.gdb\\mosaic_ogp_1","name = '"+ LAYERID+"'")
##        raster_ID = cursor.next().OBJECTID
##        DOWNLOAD_URL_PARAMS['mosaicRule'] ={}   
##        DOWNLOAD_URL_PARAMS['mosaicRule']['mosaicMethod'] = 'esriMosaicLockRaster'
##        DOWNLOAD_URL_PARAMS['mosaicRule']['lockRasterIds'] = [raster_ID]
##        DOWNLOAD_URL_PARAMS['f']='image'
##        
##        DOWNLOAD_URL = REST_URL_BASE + urllib.urlencode(DOWNLOAD_URL_PARAMS)
##
##        REST_URL= REST_URL_BASE + urllib.urlencode({'mosaicRule': DOWNLOAD_URL_PARAMS['mosaicRule']})
##        
##        LOCATION = '{"ArcGIS93Rest" : "' + REST_URL + '", "tilecache" : "None", "download" : "' + DOWNLOAD_URL + '", "wfs" : "None"}'

        """
        Put it all together to make the OGP metadata xml file
        """
        OGPtree = ElementTree()
        OGProot = Element("add",allowDups="false")
        doc = SubElement(OGProot,"doc")
        OGPtree._setroot(OGProot)
        try:
            for field in fields:
                fieldEle = SubElement(doc,"field",name=field)
                fieldEle.text = locals()[field.upper()]
        except KeyError as e:
            print "Nonexistant key: ", field 
        fgdc_text=SubElement(doc,"field",name="FgdcText")
        fgdc_text.text = '<?xml version="1.0"?>'+FGDC_TEXT
        
        if os.path.exists(OUTPUT_LOCATION + LAYERID + "_OGP.xml"):
            existsPrompt =raw_input('OGP metadata already exists! Overwrite? (Y/N): ')
            if existsPrompt == 'Y':
                OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")
                print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
            else:
                pass
        else:
            OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")
            print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"



        os.remove(ws+"\\temp.xml")

        print "Next!\n"

    end = clock()
    if (end-start) > 60:
        mins = str(math.ceil((end - start) / 60))
        secs = str(math.ceil((end - start) % 60))
        print str(len(datasets)) + " datasets processed in " + mins + " minutes and " + secs + " seconds"
    else:
        secs = str(math.ceil(end-start))
        print str(len(datasets)) + " datasets processed in " + secs + " seconds"
    sys.stdout=sys.__stdout__
    print 'Done. See log file for information'
Пример #2
0
def MGMG(workspace, error_tolerance):
    """
    Set workspace (where datasets to be processed are located) below
    """
    ws = workspace
    max_errors = error_tolerance

    # Set output location for OGP metadata and log file, then instantiate Logger
    output_random = random.randint(0, 100000000)
    if os.path.exists(ws + "output" + str(output_random) + "\\"):
        OUTPUT_LOCATION = ws + "output" + str(output_random) + "\\"
    else:
        os.mkdir(ws + "output" + str(output_random) + "\\")
        OUTPUT_LOCATION = ws + "output" + str(output_random) + "\\"

    if os.path.exists(ws + "error" + str(output_random) + "\\"):
        ERROR_LOCATION = ws + "error" + str(output_random) + "\\"
    else:
        os.mkdir(ws + "error" + str(output_random) + "\\")
        ERROR_LOCATION = ws + "error" + str(output_random) + "\\"
    print "output location: ", OUTPUT_LOCATION
    print "error location: ", ERROR_LOCATION

    d = datetime.today()
    LOG_NAME = "OGP_MD_LOG_" + d.strftime("%y%m%d%M%S") + ".txt"

    sys.stdout = Logger(OUTPUT_LOCATION, LOG_NAME)

    files = glob.glob(ws + "*[!aux].xml")

    fields = [
        "LayerId",
        "Name",
        "CollectionId",
        "Institution",
        "InstitutionSort",
        "Access",
        "DataType",
        "DataTypeSort",
        "Availability",
        "LayerDisplayName",
        "LayerDisplayNameSort",
        "Publisher",
        "PublisherSort",
        "Originator",
        "OriginatorSort",
        "ThemeKeywords",
        "PlaceKeywords",
        "Abstract",
        "MaxY",
        "MinY",
        "MinX",
        "MaxX",
        "ContentDate",
        "Location",
        "WorkspaceName",
    ]

    # Set UTC timezone for ContentDate field

    utc = pytz.utc

    # List of layers and their respective URLs from Datafinder.org

    df_lyrs_file = open("G:\\GoogleDrive\\Google Drive\\GitHub\\OGP-metadata-py\\tests\\datafinder_layers.json", "r")
    df_lyrs = json.loads(df_lyrs_file.read())

    # start the clock! obviously not at all necessary

    start = clock()

    for i in files:
        print "full file path is ", i
        error_counter = 0
        FGDCtree = et.ElementTree()
        root = FGDCtree.parse(i)

        # layerID equals filename sans extension
        LAYERID = i[i.rfind("\\") + 1 : i.rfind(".")]
        # print 'layerid is ', LAYERID

        try:
            if df_lyrs.has_key(LAYERID):
                NAME = unicode(df_lyrs[LAYERID]["name"])
            else:
                NAME = LAYERID
        except AttributeError as e:
            print "Name field doesn't exist! Setting to UNKNOWN for now"
            NAME = "UNKNOWN"
            error_counter += 1

        try:
            if df_lyrs.has_key(LAYERID):
                WORKSPACENAME = unicode(df_lyrs[LAYERID]["WorkspaceName"])
        except AttributeError as e:
            print "Workspace name error: ", e
            error_counter += 1
        DATATYPE = dataTypeParseMGMG(root)
        if DATATYPE == "Undefined":
            error_counter += 1
        DATATYPESORT = DATATYPE

        # create string representation of MGMG md to be appended at end of OGP md
        MGMG_TEXT = et.tostring(root)

        COLLECTIONID = "initial collection"

        INSTITUTION = "University of Minnesota"
        INSTITUTIONSORT = "University of Minnesota"

        # to avoid authentication complications, for now we'll just set access field to public
        ACCESS = "Public"
        #        try:
        #            ACCESS = root.find("idinfo/accconst").text
        #        except AttributeError as e:
        #            print "Access Constraints field doesn't exist! Setting to UNKNOWN for now"
        #            ACCESS = "UNKNOWN"

        AVAILABILITY = "Online"

        # set Display Name equal to title element
        try:
            LAYERDISPLAYNAME = root.findtext("idinfo/citation/citeinfo/title")

        except AttributeError:
            print "No Title element found..."
            LAYERDISPLAYNAME = "UNKNOWN"
            error_counter += 1

        LAYERDISPLAYNAMESORT = LAYERDISPLAYNAME

        try:
            PUBLISHER = root.findtext("idinfo/citation/citeinfo/pubinfo/publish")
        except AttributeError as e:
            print "Publisher field doesn't exist! Setting to UNKNOWN for now"
            PUBLISHER = "UNKNOWN"
            error_counter += 1
        finally:
            PUBLISHERSORT = PUBLISHER

        try:
            ORIGINATOR = root.find("idinfo/citation/citeinfo/origin").text
        except AttributeError as e:
            print "Originator field doesn't exist! Setting to UNKNOWN for now"
            ORIGINATOR = "UNKNOWN"
            error_counter += 1
        finally:
            ORIGINATORSORT = ORIGINATOR

        # Combine multiple Keyword elements into a string rep

        THEMEKEYWORDS = keywordParse(FGDCtree, "themekey")
        PLACEKEYWORDS = keywordParse(FGDCtree, "placekey")

        try:
            ABSTRACT = root.find("idinfo/descript/abstract").text
        except AttributeError as e:
            print "No abstract found. Setting to UNKNOWN for now"
            ABSTRACT = "UNKNOWN"

        try:
            MINX = root.find("idinfo/spdom/bounding/westbc").text
            try:
                MINXf = float(MINX)
            except (TypeError, ValueError):
                print "Invalid West Bounding Coordinate of:", MINX
                error_counter += 1

        except AttributeError as e:
            print "West Bounding Coordinate not found!"
            error_counter += 1

        try:
            MINY = root.find("idinfo/spdom/bounding/southbc").text
            try:
                MINYf = float(MINY)
            except (TypeError, ValueError):
                print "Invalid South Bounding Coordinate of:", MINY
                error_counter += 1
        except AttributeError as e:
            print "South Bounding Coordinate not found!"
            error_counter += 1

        try:
            MAXX = root.find("idinfo/spdom/bounding/eastbc").text
            try:
                MAXXf = float(MAXX)
            except (TypeError, ValueError):
                print "Invalid East Bounding Coordinate of:", MAXX
                error_counter += 1
        except AttributeError as e:
            print "East Bounding Coordinate not found!"
            error_counter += 1

        try:
            MAXY = root.find("idinfo/spdom/bounding/northbc").text
            try:
                MAXYf = float(MAXY)
            except (TypeError, ValueError):
                print "Invalid North Bounding Coordinate of:", MAXY
                error_counter += 1
        except AttributeError as e:
            print "North Bounding Coordinate not found!"
            error_counter += 1

        try:
            if root.find("idinfo/timeperd/timeinfo/sngdate/caldate") is not None:
                dateText = root.find("idinfo/timeperd/timeinfo/sngdate/caldate").text
                if len(dateText) == 4:
                    # if the length is 4, we'll assume it's referring to the year.
                    year = int(dateText)
                    # we'll take January 1st as the fill in month/day for ISO format
                    date = datetime(year, 1, 1)
                    CONTENTDATE = date.isoformat() + "Z"
                elif len(dateText) == 8:
                    year = int(dateText[0:4])
                    month = int(dateText[4:6])
                    day = int(dateText[6:])
                    date = datetime(year, month, day)
                    CONTENTDATE = date.isoformat() + "Z"

        except AttributeError as e:
            print "No content date found! setting to UNKNOWN for now"
            CONTENTDATE = "UNKNOWN"
            error_counter += 1

        except TypeError as e:
            print "|-|-|-| No content date found! setting to UNKNOWN for now"
            CONTENTDATE = "UNKNOWN"
            error_counter += 1

        # location is too specific at the moment

        ##        #base URL for all images
        ##        REST_URL_BASE = "http://lib-geo1.lib.umn.edu:6080/arcgis/rest/services/OGP/mosaic_ogp_4/ImageServer/exportImage?"
        ##
        ##        #parameters to pass to URL, in the form of a dictionary
        ##        DOWNLOAD_URL_PARAMS = {}
        ##        DOWNLOAD_URL_PARAMS['bbox'] = MINX+','+MINY+','+MAXX+','+MAXY
        ##        DOWNLOAD_URL_PARAMS['bboxSR'] = '4269'
        ##        DOWNLOAD_URL_PARAMS['imageSR'] = '26915'
        ##        DOWNLOAD_URL_PARAMS['format'] = 'tiff'
        ##        DOWNLOAD_URL_PARAMS['pixelType'] = 'U8'
        ##        DOWNLOAD_URL_PARAMS['size'] = '3000,3000'
        ##        DOWNLOAD_URL_PARAMS['restrictUTurns'] = 'esriNoDataMatchAny'
        ##        DOWNLOAD_URL_PARAMS['interpolation'] = 'RSP_BilinearInterpolation'
        ##        cursor = arcpy.SearchCursor(u"\\mosaic245.gdb\\mosaic_ogp_1","name = '"+ LAYERID+"'")
        ##        raster_ID = cursor.next().OBJECTID
        ##        DOWNLOAD_URL_PARAMS['mosaicRule'] ={}
        ##        DOWNLOAD_URL_PARAMS['mosaicRule']['mosaicMethod'] = 'esriMosaicLockRaster'
        ##        DOWNLOAD_URL_PARAMS['mosaicRule']['lockRasterIds'] = [raster_ID]
        ##        DOWNLOAD_URL_PARAMS['f']='image'
        ##
        ##        DOWNLOAD_URL = REST_URL_BASE + urllib.urlencode(DOWNLOAD_URL_PARAMS)
        ##
        ##        REST_URL= REST_URL_BASE + urllib.urlencode({'mosaicRule': DOWNLOAD_URL_PARAMS['mosaicRule']})

        ##        LOCATION = '{"ArcGIS93Rest" : "' + REST_URL + '", "tilecache" : "None", "download" : "' + DOWNLOAD_URL + '", "wfs" : "None"}'
        try:
            if df_lyrs.has_key(LAYERID):
                ARCGISREST = df_lyrs[LAYERID]["ArcGISRest"]
                SERVERTYPE = df_lyrs[LAYERID]["ServerType"]
                # print 'Found',LAYERID,' so removing it from df_lyrs'
                df_lyrs.pop(LAYERID)
                if root.find("idinfo/citation/citeinfo/onlink") is not None:
                    DOWNLOAD_URL = root.find("idinfo/citation/citeinfo/onlink").text
                    LOCATION = json.dumps(
                        {"ArcGISRest": ARCGISREST, "download": DOWNLOAD_URL, "ServerType": SERVERTYPE}
                    )
                else:
                    LOCATION = json.dumps({"ArcGISRest": ARCGISREST, "ServerType": SERVERTYPE})
            else:
                if root.find("idinfo/citation/citeinfo/onlink") is not None:
                    DOWNLOAD_URL = root.find("idinfo/citation/citeinfo/onlink").text
                    LOCATION = json.dumps({"download": DOWNLOAD_URL})
        except AttributeError as e:
            print "LOCATION error: ", e
            error_counter += 1
        print "number of errors: ", error_counter

        # Put it all together to make the OGP metadata xml file

        OGPtree = et.ElementTree()
        OGProot = et.Element("add", allowDups="false")
        doc = et.SubElement(OGProot, "doc")
        OGPtree._setroot(OGProot)
        try:
            for field in fields:
                fieldEle = et.SubElement(doc, "field", name=field)
                fieldEle.text = locals()[field.upper()]
        except KeyError as e:
            print "Nonexistant key: ", field
            error_counter += 1

        # TODO eventually change OGP code base to recognize MgmgText as a
        # field and parse the metadata accordingly
        # mgmg_text = et.SubElement(doc,"field",name="MgmgText")
        mgmg_text = et.SubElement(doc, "field", name="FgdcText")
        mgmg_text.text = '<?xml version="1.0"?>' + MGMG_TEXT

        # check to see which etree module was used. If lxml was used
        # then we can pretty print the results. Otherwise we need to
        # remove the pretty print kwarg
        if et.__name__[0] == "l":
            if error_counter <= max_errors:
                if os.path.exists(OUTPUT_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt = raw_input("OGP metadata already exists! Overwrite? (Y/N): ")
                    if existsPrompt == "Y":
                        OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml", pretty_print=True)
                        print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml", pretty_print=True)

                    print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
            else:
                print "File exceeded error tolerance of ", max_errors, ", so into the error folder it goes"
                if os.path.exists(ERROR_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt = raw_input("OGP metadata already exists! Overwrite? (Y/N): ")
                    if existsPrompt == "Y":
                        OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml", pretty_print=True)
                        print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml", pretty_print=True)

                    print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"
        else:
            if error_counter <= max_errors:
                if os.path.exists(OUTPUT_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt = raw_input("OGP metadata already exists! Overwrite? (Y/N): ")
                    if existsPrompt == "Y":
                        OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")
                        print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")
                    print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
            else:
                print "File exceeded error tolerance of ", max_errors, ", so into the error folder it goes"
                if os.path.exists(ERROR_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt = raw_input("OGP metadata already exists! Overwrite? (Y/N): ")
                    if existsPrompt == "Y":
                        OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml")
                        print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml")

                    print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"

        print "\n"

    end = clock()

    if (end - start) > 60:
        mins = str(math.ceil((end - start) / 60))
        secs = str(math.ceil((end - start) % 60))
        print str(len(files)) + " datasets processed in " + mins + " minutes and " + secs + " seconds"
        print "Done. See log file for information"

    else:
        secs = str(math.ceil(end - start))
        print str(len(files)) + " datasets processed in " + secs + " seconds"
        print "Done. See log file for information"
Пример #3
0
def FGDC(workspace, error_tolerance): 
    """
    Set workspace (where datasets to be processed are located) below
    """
    ws = workspace

    max_errors = error_tolerance
    
    """
    Set output location for OGP metadata and log file, then instantiate Logger
    """
    if os.path.exists(ws+"output\\"):
        OUTPUT_LOCATION = ws+"output\\"
    else:
        os.mkdir(ws+"output\\")
        OUTPUT_LOCATION = ws+"output\\"
    print 'output location: ',OUTPUT_LOCATION
    
    d=datetime.today()
    LOG_NAME = "OGP_MD_LOG_" + d.strftime("%y%m%d%M%S") + ".txt"
    
    sys.stdout = Logger(OUTPUT_LOCATION,LOG_NAME)

    files = glob.glob(ws+'*[!aux].xml')
   
##    fields = ['LayerId', 'Name', 'CollectionId', 'Institution', 'InstitutionSort',
##              'Access', 'DataType', 'DataTypeSort', 'Availability', 'LayerDisplayName',
##              'LayerDisplayNameSort', 'Publisher', 'PublisherSort', 'Originator',
##              'OriginatorSort', 'ThemeKeywords', 'PlaceKeywords', 'Abstract', 'MaxY',
##              'MinY', 'MinX', 'MaxX', 'ContentDate','Location']
    fields = ['LayerId', 'Name', 'CollectionId', 'Institution', 'InstitutionSort',
          'Access', 'DataType', 'DataTypeSort', 'Availability', 'LayerDisplayName',
          'LayerDisplayNameSort', 'Publisher', 'PublisherSort', 'Originator',
          'OriginatorSort', 'ThemeKeywords', 'PlaceKeywords', 'Abstract', 'MaxY',
          'MinY', 'MinX', 'MaxX', 'ContentDate']
  
    #Set UTC timezone for ContentDate field
 
    utc = pytz.utc




    #start the clock! obviously not at all necessary 
      
    start = clock()

    for i in files:
        error_counter = 0
        FGDCtree = et.ElementTree()
        root = FGDCtree.parse(i)
             
        DATATYPE = dataTypeParseFGDC(root)
        DATATYPESORT = DATATYPE

        #create string representation of FGDC md to be appended at end of OGP md
        FGDC_TEXT = et.tostring(root)

        #layerID equals filename sans extension 
        LAYERID = i[i.rfind('\\')+1:i.rfind(".")]
        print 'layerid is ', LAYERID
        
        try:
            NAME = root.findtext("idinfo/citation/citeinfo/title")
        except AttributeError as e:
            print "Name field doesn't exist! Setting to UNKNOWN for now"
            NAME = "UNKNOWN"
            
        COLLECTIONID = "initial collection"
        
        INSTITUTION = "University of Minnesota"
        INSTITUTIONSORT = "University of Minnesota"
        
        #to avoid authentication complications, for now we'll just set access field to public
        ACCESS= "public"
        #try:
        #    ACCESS = root.find("idinfo/accconst").text
        #except AttributeError as e:
        #    print "Access Constraints field doesn't exist! Setting to UNKNOWN for now"        
        #    ACCESS = "UNKNOWN"
        AVAILABILITY = "Online"

        #set Display Name equal to title element   
        try:
            LAYERDISPLAYNAME = root.findtext("idinfo/citation/citeinfo/title")
 
        except AttributeError:
            print 'No Title element found...'
            LAYERDISPLAYNAME = "UNKNOWN"
        
        LAYERDISPLAYNAMESORT = LAYERDISPLAYNAME
        
                
        try:
            #PUBLISHER = root.find("idinfo/citation/citeinfo/pubinfo/publish").text
            PUBLISHER = root.findtext("idinfo/citation/citeinfo/pubinfo/publish")
        except AttributeError as e:
            print "Publisher field doesn't exist! Setting to UNKNOWN for now"
            PUBLISHER = "UNKNOWN"
        finally:
            PUBLISHERSORT = PUBLISHER
        
        try:
            ORIGINATOR = root.find("idinfo/citation/citeinfo/origin").text
        except AttributeError as e:
            print "Originator field doesn't exist! Setting to UNKNOWN for now"
            ORIGINATOR = "UNKNOWN"
        finally:
            ORIGINATORSORT = ORIGINATOR
        #Combine multiple Keyword elements into a string rep

        THEMEKEYWORDS= keywordParse(FGDCtree,"themekey")
        PLACEKEYWORDS = keywordParse(FGDCtree,"placekey")
        
        try:
            ABSTRACT = root.find("idinfo/descript/abstract").text
        except AttributeError as e:
            print "No abstract found. Setting to UNKNOWN for now"
            ABSTRACT = "UNKNOWN"
        
        try:
            MINX=root.find("idinfo/spdom/bounding/westbc").text
            MINY=root.find("idinfo/spdom/bounding/southbc").text
            MAXX=root.find("idinfo/spdom/bounding/eastbc").text
            MAXY=root.find("idinfo/spdom/bounding/northbc").text
        except AttributeError as e:
            print "extent information not found!"
            
        
        try:

            if root.find("idinfo/timeperd/timeinfo/sngdate/caldate") is not None:
                dateText = root.find("idinfo/timeperd/timeinfo/sngdate/caldate").text
                if len(dateText) == 4:
                    #if it's just 4 digits, lets assume it's the year and convert it to integer
                    year = int(dateText)

                    #set month to January
                    month = 1

                    #set day to the 1st
                    day = 1

                    #put em all together
                    date = datetime(year,month,day)

                    #now format it ISO style
                    CONTENTDATE = date.isoformat() + "Z"
                elif len(dateText) == 8:
                    #print dateText
                    year = int(dateText[0:4])
                    month = int(dateText[4:6])
                    day = int(dateText[6:])
                    date = datetime(year,month,day)
                    CONTENTDATE = date.isoformat() + "Z"
            elif root.find("idinfo/timeperd/timeinfo/rngdates/begdate") is not None:
                dateText = root.find("idinfo/timeperd/timeinfo/rngdates/begdate").text
                if len(dateText) == 4:
                    #if it's just 4 digits, lets assume it's the year
                    year = dateText

                    #set month to January
                    month = 1

                    #set day to the 1st
                    day = 1

                    #put em all together
                    date = datetime(year,month,day)

                    #now format it ISO style
                    CONTENTDATE = date.isoformat() + "Z"
                elif len(dateText) == 8:
                    #print dateText
                    year = int(dateText[0:4])
                    month = int(dateText[4:6])
                    day = int(dateText[6:])
                    date = datetime(year,month,day)
                    CONTENTDATE = date.isoformat() + "Z"
        except AttributeError as e:
            print "No content date found! setting to UNKNOWN for now"
            CONTENTDATE = "UNKNOWN"

        except TypeError as e:
            print "|-|-|-| No content date found! setting to UNKNOWN for now"
            CONTENTDATE = "UNKNOWN"

        print CONTENTDATE

        """
        LOCATION
        <field name="Location">
        {
        "ArcGIS93Rest": ["(the url for the REST service)"],
        "tilecache": ["(the url for the tilecache, which I believe ArcGIS server does not have)"],
        "download": "(downloading URL for the layer)","wfs": "(WFS URL, if has one)"
        }
        </field>
        """
##        #base URL for all images
##        REST_URL_BASE = "http://lib-geo1.lib.umn.edu:6080/arcgis/rest/services/OGP/mosaic_ogp_4/ImageServer/exportImage?"
##
##        #parameters to pass to URL, in the form of a dictionary
##        DOWNLOAD_URL_PARAMS = {}
##        DOWNLOAD_URL_PARAMS['bbox'] = MINX+','+MINY+','+MAXX+','+MAXY
##        DOWNLOAD_URL_PARAMS['bboxSR'] = '4269'
##        DOWNLOAD_URL_PARAMS['imageSR'] = '26915'
##        DOWNLOAD_URL_PARAMS['format'] = 'tiff'
##        DOWNLOAD_URL_PARAMS['pixelType'] = 'U8'
##        DOWNLOAD_URL_PARAMS['size'] = '3000,3000'
##        DOWNLOAD_URL_PARAMS['restrictUTurns'] = 'esriNoDataMatchAny'
##        DOWNLOAD_URL_PARAMS['interpolation'] = 'RSP_BilinearInterpolation'
##        cursor = arcpy.SearchCursor(u"\\mosaic245.gdb\\mosaic_ogp_1","name = '"+ LAYERID+"'")
##        raster_ID = cursor.next().OBJECTID
##        DOWNLOAD_URL_PARAMS['mosaicRule'] ={}   
##        DOWNLOAD_URL_PARAMS['mosaicRule']['mosaicMethod'] = 'esriMosaicLockRaster'
##        DOWNLOAD_URL_PARAMS['mosaicRule']['lockRasterIds'] = [raster_ID]
##        DOWNLOAD_URL_PARAMS['f']='image'
##        
##        DOWNLOAD_URL = REST_URL_BASE + urllib.urlencode(DOWNLOAD_URL_PARAMS)
##
##        REST_URL= REST_URL_BASE + urllib.urlencode({'mosaicRule': DOWNLOAD_URL_PARAMS['mosaicRule']})
##        
##        LOCATION = '{"ArcGIS93Rest" : "' + REST_URL + '", "tilecache" : "None", "download" : "' + DOWNLOAD_URL + '", "wfs" : "None"}'

        """
        Put it all together to make the OGP metadata xml file
        """
        
        OGPtree = et.ElementTree()
        OGProot = et.Element("add",allowDups="false")
        doc = et.SubElement(OGProot,"doc")
        OGPtree._setroot(OGProot)
        try:
            for field in fields:
                fieldEle = et.SubElement(doc,"field",name=field)
                fieldEle.text = locals()[field.upper()]
        except KeyError as e:
            print "Nonexistant key: ", field
            error_counter += 1
        

        fgdc_text = et.SubElement(doc,"field",name="FgdcText")
        fgdc_text.text = '<?xml version="1.0"?>'+FGDC_TEXT

        #check to see which etree module was used. If lxml was used
        #then we can pretty print the results. Otherwise we need to
        #remove the pretty print kwarg
        if et.__name__[0] == "l":
            if error_counter <= max_errors:
                if os.path.exists(OUTPUT_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt =raw_input('OGP metadata already exists! Overwrite? (Y/N): ')
                    if existsPrompt == 'Y':
                        OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml",pretty_print=True)
                        print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml",pretty_print=True)
                    
                    print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
            else:
                print 'File exceeded error tolerance of ', max_errors,', so into the error folder it goes'
                if os.path.exists(ERROR_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt =raw_input('OGP metadata already exists! Overwrite? (Y/N): ')
                    if existsPrompt == 'Y':
                        OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml",pretty_print=True)
                        print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml",pretty_print=True)
                    
                    print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"
        else:
            if error_counter <= max_errors:
                if os.path.exists(OUTPUT_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt =raw_input('OGP metadata already exists! Overwrite? (Y/N): ')
                    if existsPrompt == 'Y':
                        OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")
                        print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")     
                    print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
            else:
                print 'File exceeded error tolerance of ', max_errors,', so into the error folder it goes'
                if os.path.exists(ERROR_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt =raw_input('OGP metadata already exists! Overwrite? (Y/N): ')
                    if existsPrompt == 'Y':
                        OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml")
                        print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml")
                    
                    print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"

        print "\n"
        
    end = clock()
    if (end-start) > 60:
        mins = str(math.ceil((end - start) / 60))
        secs = str(math.ceil((end - start) % 60))
        print str(len(files)) + " datasets processed in " + mins + " minutes and " + secs + " seconds"
    else:
        secs = str(math.ceil(end-start))
        print str(len(files)) + " datasets processed in " + secs + " seconds"
    sys.stdout=sys.__stdout__
    print 'Done. See log file for information'
Пример #4
0
def MGMG(workspace, error_tolerance):
    """
    Set workspace (where datasets to be processed are located) below
    """
    ws = workspace
    max_errors = error_tolerance

    #Set output location for OGP metadata and log file, then instantiate Logger
    output_random = random.randint(0, 100000000)
    if os.path.exists(ws + "output" + str(output_random) + "\\"):
        OUTPUT_LOCATION = ws + "output" + str(output_random) + "\\"
    else:
        os.mkdir(ws + "output" + str(output_random) + "\\")
        OUTPUT_LOCATION = ws + "output" + str(output_random) + "\\"

    if os.path.exists(ws + "error" + str(output_random) + "\\"):
        ERROR_LOCATION = ws + "error" + str(output_random) + "\\"
    else:
        os.mkdir(ws + "error" + str(output_random) + "\\")
        ERROR_LOCATION = ws + "error" + str(output_random) + "\\"
    print 'output location: ', OUTPUT_LOCATION
    print 'error location: ', ERROR_LOCATION

    d = datetime.today()
    LOG_NAME = "OGP_MD_LOG_" + d.strftime("%y%m%d%M%S") + ".txt"

    sys.stdout = Logger(OUTPUT_LOCATION, LOG_NAME)

    files = glob.glob(ws + '*[!aux].xml')

    fields = [
        'LayerId', 'Name', 'CollectionId', 'Institution', 'InstitutionSort',
        'Access', 'DataType', 'DataTypeSort', 'Availability',
        'LayerDisplayName', 'LayerDisplayNameSort', 'Publisher',
        'PublisherSort', 'Originator', 'OriginatorSort', 'ThemeKeywords',
        'PlaceKeywords', 'Abstract', 'MaxY', 'MinY', 'MinX', 'MaxX',
        'ContentDate', 'Location', 'WorkspaceName'
    ]

    #Set UTC timezone for ContentDate field

    utc = pytz.utc

    #List of layers and their respective URLs from Datafinder.org

    df_lyrs_file = open(
        "G:\\GoogleDrive\\Google Drive\\GitHub\\OGP-metadata-py\\tests\\datafinder_layers.json",
        'r')
    df_lyrs = json.loads(df_lyrs_file.read())

    #start the clock! obviously not at all necessary

    start = clock()

    for i in files:
        print 'full file path is ', i
        error_counter = 0
        FGDCtree = et.ElementTree()
        root = FGDCtree.parse(i)

        #layerID equals filename sans extension
        LAYERID = i[i.rfind('\\') + 1:i.rfind(".")]
        #print 'layerid is ', LAYERID

        try:
            if df_lyrs.has_key(LAYERID):
                NAME = unicode(df_lyrs[LAYERID]["name"])
            else:
                NAME = LAYERID
        except AttributeError as e:
            print "Name field doesn't exist! Setting to UNKNOWN for now"
            NAME = "UNKNOWN"
            error_counter += 1

        try:
            if df_lyrs.has_key(LAYERID):
                WORKSPACENAME = unicode(df_lyrs[LAYERID]["WorkspaceName"])
        except AttributeError as e:
            print 'Workspace name error: ', e
            error_counter += 1
        DATATYPE = dataTypeParseMGMG(root)
        if DATATYPE == "Undefined":
            error_counter += 1
        DATATYPESORT = DATATYPE

        #create string representation of MGMG md to be appended at end of OGP md
        MGMG_TEXT = et.tostring(root)

        COLLECTIONID = "initial collection"

        INSTITUTION = "University of Minnesota"
        INSTITUTIONSORT = "University of Minnesota"

        #to avoid authentication complications, for now we'll just set access field to public
        ACCESS = "Public"
        #        try:
        #            ACCESS = root.find("idinfo/accconst").text
        #        except AttributeError as e:
        #            print "Access Constraints field doesn't exist! Setting to UNKNOWN for now"
        #            ACCESS = "UNKNOWN"

        AVAILABILITY = "Online"

        #set Display Name equal to title element
        try:
            LAYERDISPLAYNAME = root.findtext("idinfo/citation/citeinfo/title")

        except AttributeError:
            print 'No Title element found...'
            LAYERDISPLAYNAME = "UNKNOWN"
            error_counter += 1

        LAYERDISPLAYNAMESORT = LAYERDISPLAYNAME

        try:
            PUBLISHER = root.findtext(
                "idinfo/citation/citeinfo/pubinfo/publish")
        except AttributeError as e:
            print "Publisher field doesn't exist! Setting to UNKNOWN for now"
            PUBLISHER = "UNKNOWN"
            error_counter += 1
        finally:
            PUBLISHERSORT = PUBLISHER

        try:
            ORIGINATOR = root.find("idinfo/citation/citeinfo/origin").text
        except AttributeError as e:
            print "Originator field doesn't exist! Setting to UNKNOWN for now"
            ORIGINATOR = "UNKNOWN"
            error_counter += 1
        finally:
            ORIGINATORSORT = ORIGINATOR

        #Combine multiple Keyword elements into a string rep

        THEMEKEYWORDS = keywordParse(FGDCtree, "themekey")
        PLACEKEYWORDS = keywordParse(FGDCtree, "placekey")

        try:
            ABSTRACT = root.find("idinfo/descript/abstract").text
        except AttributeError as e:
            print "No abstract found. Setting to UNKNOWN for now"
            ABSTRACT = "UNKNOWN"

        try:
            MINX = root.find("idinfo/spdom/bounding/westbc").text
            try:
                MINXf = float(MINX)
            except (TypeError, ValueError):
                print 'Invalid West Bounding Coordinate of:', MINX
                error_counter += 1

        except AttributeError as e:
            print "West Bounding Coordinate not found!"
            error_counter += 1

        try:
            MINY = root.find("idinfo/spdom/bounding/southbc").text
            try:
                MINYf = float(MINY)
            except (TypeError, ValueError):
                print 'Invalid South Bounding Coordinate of:', MINY
                error_counter += 1
        except AttributeError as e:
            print "South Bounding Coordinate not found!"
            error_counter += 1

        try:
            MAXX = root.find("idinfo/spdom/bounding/eastbc").text
            try:
                MAXXf = float(MAXX)
            except (TypeError, ValueError):
                print 'Invalid East Bounding Coordinate of:', MAXX
                error_counter += 1
        except AttributeError as e:
            print "East Bounding Coordinate not found!"
            error_counter += 1

        try:
            MAXY = root.find("idinfo/spdom/bounding/northbc").text
            try:
                MAXYf = float(MAXY)
            except (TypeError, ValueError):
                print 'Invalid North Bounding Coordinate of:', MAXY
                error_counter += 1
        except AttributeError as e:
            print "North Bounding Coordinate not found!"
            error_counter += 1

        try:
            if root.find(
                    "idinfo/timeperd/timeinfo/sngdate/caldate") is not None:
                dateText = root.find(
                    "idinfo/timeperd/timeinfo/sngdate/caldate").text
                if len(dateText) == 4:
                    #if the length is 4, we'll assume it's referring to the year.
                    year = int(dateText)
                    #we'll take January 1st as the fill in month/day for ISO format
                    date = datetime(year, 1, 1)
                    CONTENTDATE = date.isoformat() + "Z"
                elif len(dateText) == 8:
                    year = int(dateText[0:4])
                    month = int(dateText[4:6])
                    day = int(dateText[6:])
                    date = datetime(year, month, day)
                    CONTENTDATE = date.isoformat() + "Z"

        except AttributeError as e:
            print "No content date found! setting to UNKNOWN for now"
            CONTENTDATE = "UNKNOWN"
            error_counter += 1

        except TypeError as e:
            print "|-|-|-| No content date found! setting to UNKNOWN for now"
            CONTENTDATE = "UNKNOWN"
            error_counter += 1

        # location is too specific at the moment

    ##        #base URL for all images
    ##        REST_URL_BASE = "http://lib-geo1.lib.umn.edu:6080/arcgis/rest/services/OGP/mosaic_ogp_4/ImageServer/exportImage?"
    ##
    ##        #parameters to pass to URL, in the form of a dictionary
    ##        DOWNLOAD_URL_PARAMS = {}
    ##        DOWNLOAD_URL_PARAMS['bbox'] = MINX+','+MINY+','+MAXX+','+MAXY
    ##        DOWNLOAD_URL_PARAMS['bboxSR'] = '4269'
    ##        DOWNLOAD_URL_PARAMS['imageSR'] = '26915'
    ##        DOWNLOAD_URL_PARAMS['format'] = 'tiff'
    ##        DOWNLOAD_URL_PARAMS['pixelType'] = 'U8'
    ##        DOWNLOAD_URL_PARAMS['size'] = '3000,3000'
    ##        DOWNLOAD_URL_PARAMS['restrictUTurns'] = 'esriNoDataMatchAny'
    ##        DOWNLOAD_URL_PARAMS['interpolation'] = 'RSP_BilinearInterpolation'
    ##        cursor = arcpy.SearchCursor(u"\\mosaic245.gdb\\mosaic_ogp_1","name = '"+ LAYERID+"'")
    ##        raster_ID = cursor.next().OBJECTID
    ##        DOWNLOAD_URL_PARAMS['mosaicRule'] ={}
    ##        DOWNLOAD_URL_PARAMS['mosaicRule']['mosaicMethod'] = 'esriMosaicLockRaster'
    ##        DOWNLOAD_URL_PARAMS['mosaicRule']['lockRasterIds'] = [raster_ID]
    ##        DOWNLOAD_URL_PARAMS['f']='image'
    ##
    ##        DOWNLOAD_URL = REST_URL_BASE + urllib.urlencode(DOWNLOAD_URL_PARAMS)
    ##
    ##        REST_URL= REST_URL_BASE + urllib.urlencode({'mosaicRule': DOWNLOAD_URL_PARAMS['mosaicRule']})

    ##        LOCATION = '{"ArcGIS93Rest" : "' + REST_URL + '", "tilecache" : "None", "download" : "' + DOWNLOAD_URL + '", "wfs" : "None"}'
        try:
            if df_lyrs.has_key(LAYERID):
                ARCGISREST = df_lyrs[LAYERID]["ArcGISRest"]
                SERVERTYPE = df_lyrs[LAYERID]["ServerType"]
                #print 'Found',LAYERID,' so removing it from df_lyrs'
                df_lyrs.pop(LAYERID)
                if root.find("idinfo/citation/citeinfo/onlink") is not None:
                    DOWNLOAD_URL = root.find(
                        "idinfo/citation/citeinfo/onlink").text
                    LOCATION = json.dumps({
                        'ArcGISRest': ARCGISREST,
                        'download': DOWNLOAD_URL,
                        'ServerType': SERVERTYPE
                    })
                else:
                    LOCATION = json.dumps({
                        'ArcGISRest': ARCGISREST,
                        'ServerType': SERVERTYPE
                    })
            else:
                if root.find("idinfo/citation/citeinfo/onlink") is not None:
                    DOWNLOAD_URL = root.find(
                        "idinfo/citation/citeinfo/onlink").text
                    LOCATION = json.dumps({'download': DOWNLOAD_URL})
        except AttributeError as e:
            print 'LOCATION error: ', e
            error_counter += 1
        print 'number of errors: ', error_counter

        #Put it all together to make the OGP metadata xml file

        OGPtree = et.ElementTree()
        OGProot = et.Element("add", allowDups="false")
        doc = et.SubElement(OGProot, "doc")
        OGPtree._setroot(OGProot)
        try:
            for field in fields:
                fieldEle = et.SubElement(doc, "field", name=field)
                fieldEle.text = locals()[field.upper()]
        except KeyError as e:
            print "Nonexistant key: ", field
            error_counter += 1

        #TODO eventually change OGP code base to recognize MgmgText as a
        #field and parse the metadata accordingly
        #mgmg_text = et.SubElement(doc,"field",name="MgmgText")
        mgmg_text = et.SubElement(doc, "field", name="FgdcText")
        mgmg_text.text = '<?xml version="1.0"?>' + MGMG_TEXT

        #check to see which etree module was used. If lxml was used
        #then we can pretty print the results. Otherwise we need to
        #remove the pretty print kwarg
        if et.__name__[0] == "l":
            if error_counter <= max_errors:
                if os.path.exists(OUTPUT_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt = raw_input(
                        'OGP metadata already exists! Overwrite? (Y/N): ')
                    if existsPrompt == 'Y':
                        OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml",
                                      pretty_print=True)
                        print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml",
                                  pretty_print=True)

                    print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
            else:
                print 'File exceeded error tolerance of ', max_errors, ', so into the error folder it goes'
                if os.path.exists(ERROR_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt = raw_input(
                        'OGP metadata already exists! Overwrite? (Y/N): ')
                    if existsPrompt == 'Y':
                        OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml",
                                      pretty_print=True)
                        print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml",
                                  pretty_print=True)

                    print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"
        else:
            if error_counter <= max_errors:
                if os.path.exists(OUTPUT_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt = raw_input(
                        'OGP metadata already exists! Overwrite? (Y/N): ')
                    if existsPrompt == 'Y':
                        OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")
                        print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")
                    print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
            else:
                print 'File exceeded error tolerance of ', max_errors, ', so into the error folder it goes'
                if os.path.exists(ERROR_LOCATION + LAYERID + "_OGP.xml"):
                    existsPrompt = raw_input(
                        'OGP metadata already exists! Overwrite? (Y/N): ')
                    if existsPrompt == 'Y':
                        OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml")
                        print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"
                    else:
                        pass
                else:
                    OGPtree.write(ERROR_LOCATION + LAYERID + "_OGP.xml")

                    print "Output file: " + ERROR_LOCATION + LAYERID + "_OGP.xml"

        print "\n"

    end = clock()

    if (end - start) > 60:
        mins = str(math.ceil((end - start) / 60))
        secs = str(math.ceil((end - start) % 60))
        print str(
            len(files)
        ) + " datasets processed in " + mins + " minutes and " + secs + " seconds"
        print 'Done. See log file for information'

    else:
        secs = str(math.ceil(end - start))
        print str(len(files)) + " datasets processed in " + secs + " seconds"
        print 'Done. See log file for information'
Пример #5
0
def ARCGIS(workspace):
    """
    Set workspace (where datasets to be processed are located) below
    """
    arcpy.env.workspace = workspace
    ws = arcpy.env.workspace
    print 'ws: ', ws
    """
    Get ArcGIS Install Location
    """
    ARCGIS_INSTALL_LOCATION = arcpy.GetInstallInfo()['InstallDir']
    """
    Get location of XSL Translator to go from ArcGIS to FGDC
    """
    FGDC_XSL_FILE = ARCGIS_INSTALL_LOCATION + "Metadata\\Translator\\Transforms\\ArcGIS2FGDC.xsl"
    """
    Set output location for OGP metadata and log file, then instantiate Logger
    """
    if os.path.exists(ws + "\\output\\"):
        OUTPUT_LOCATION = ws + "\\output\\"
    else:
        os.mkdir(ws + "\\output\\")
        OUTPUT_LOCATION = ws + "\\output\\"
    print 'output location: ', OUTPUT_LOCATION
    d = datetime.today()
    LOG_NAME = "OGP_MD_LOG_" + d.strftime("%y%m%d%M%S") + ".txt"
    sys.stdout = Logger(OUTPUT_LOCATION, LOG_NAME)

    files = arcpy.ListFiles()
    datasets = arcpy.ListDatasets()
    fields = [
        'LayerId', 'Name', 'CollectionId', 'Institution', 'InstitutionSort',
        'Access', 'DataType', 'DataTypeSort', 'Availability',
        'LayerDisplayName', 'LayerDisplayNameSort', 'Publisher',
        'PublisherSort', 'Originator', 'OriginatorSort', 'ThemeKeywords',
        'PlaceKeywords', 'Abstract', 'MaxY', 'MinY', 'MinX', 'MaxX', 'CenterX',
        'CenterY', 'HalfWidth', 'HalfHeight', 'Area', 'ContentDate', 'Location'
    ]

    if os.path.exists(ws + "\\temp.xml"):
        print "removed temp xml file"
        os.remove(ws + "\\temp.xml")
    """
    Set UTC timezone for ContentDate field
    """
    utc = pytz.utc

    for i in datasets[:5]:
        """
        start the clock! obviously not at all necessary 
        """
        start = clock()
        """
         converting metadata from ArcGIS to FGDC using XSL transformation (super fast) and
         writing it to a temporary XML file that is then parsed and translated into
         final OGP format
        """
        print "======================================"
        print "Current item is " + i
        print "======================================\n"

        print "Converting ArcGIS metadata into temporary FGDC XML file"

        XSLtrans = arcpy.XSLTransform_conversion(i, FGDC_XSL_FILE, "temp.xml")

        print XSLtrans.getMessages(), '\n'

        FGDCtree = ElementTree()
        root = FGDCtree.parse(ws + "\\temp.xml")

        DATATYPE = dataTypeParseFGDC(root)
        DATATYPESORT = DATATYPE

        #create string representation of FGDC md to be appended at end of OGP md
        FGDC_TEXT = ET.tostring(root)

        #layerID equals filename sans extension
        LAYERID = i[:i.find(".")]

        #name equals FGDC title field
        #NAME = root.find("idinfo/citation/citeinfo/title").text
        try:
            NAME = root.findtext("idinfo/citation/citeinfo/title")
        except AttributeError as e:
            print "Name field doesn't exist! Setting to UNKNOWN for now"
            NAME = "UNKNOWN"

        COLLECTIONID = "initial collection"

        INSTITUTION = "University of Minnesota"
        INSTITUTIONSORT = "University of Minnesota"

        try:
            ACCESS = root.find("idinfo/accconst").text
        except AttributeError as e:
            print "Access field doesn't exist! Setting to UNKNOWN for now"
            ACCESS = "UNKNOWN"

        AVAILABILITY = "Online"

        LAYERDISPLAYNAME = NAME
        LAYERDISPLAYNAMESORT = NAME
        try:
            #PUBLISHER = root.find("idinfo/citation/citeinfo/pubinfo/publish").text
            PUBLISHER = root.findtext(
                "idinfo/citation/citeinfo/pubinfo/publish")
        except AttributeError as e:
            print "Publisher field doesn't exist! Setting to UNKNOWN for now"
            PUBLISHER = "UNKNOWN"
        finally:
            PUBLISHERSORT = PUBLISHER

        try:
            ORIGINATOR = root.find("idinfo/citation/citeinfo/origin").text
        except AttributeError as e:
            print "Originator field doesn't exist! Setting to UNKNOWN for now"
            ORIGINATOR = "UNKNOWN"
        finally:
            ORIGINATORSORT = ORIGINATOR
        #Combine multiple Keyword elements into a string rep

        THEMEKEYWORDS = keywordParse(FGDCtree, "themekey")
        PLACEKEYWORDS = keywordParse(FGDCtree, "placekey")

        try:
            ABSTRACT = root.find("idinfo/descript/abstract").text
        except AttributeError as e:
            print "No abstract found. Setting to UNKNOWN for now"
            ABSTRACT = "UNKNOWN"

        try:
            MINX = root.find("idinfo/spdom/bounding/westbc").text
            MINY = root.find("idinfo/spdom/bounding/southbc").text
            MAXX = root.find("idinfo/spdom/bounding/eastbc").text
            MAXY = root.find("idinfo/spdom/bounding/northbc").text
        except AttributeError as e:
            print "extent information not found!"

        #SRSPROJECTIONCODE=
        #try:
        if root.find("idinfo/timeperd/timeinfo/sngdate/caldate") is not None:
            dateText = root.find(
                "idinfo/timeperd/timeinfo/sngdate/caldate").text
            year = int(dateText[0:4])
            month = int(dateText[4:6])
            day = int(dateText[6:8])
            date = datetime(year, month, day)
            CONTENTDATE = date.isoformat() + "Z"
        elif root.find(
                "idinfo/timeperd/timeinfo/rngdates/begdate") is not None:
            dateText = root.find(
                "idinfo/timeperd/timeinfo/rngdates/begdate").text
            year = int(dateText[0:4])
            month = int(dateText[4:6])
            day = int(dateText[6:8])
            date = datetime(year, month, day)
            CONTENTDATE = date.isoformat() + "Z"
        #except AttributeError as e:
        #    print "No content date found! setting to UNKNOWN for now"
        #    CONTENTDATE = "UNKNOWN"
        """
        LOCATION
        <field name="Location">
        {
        "ArcGIS93Rest": ["(the url for the REST service)"],
        "tilecache": ["(the url for the tilecache, which I believe ArcGIS server does not have)"],
        "download": "(downloading URL for the layer)","wfs": "(WFS URL, if has one)"
        }
        </field>
        """
        ##        #base URL for all images
        ##        REST_URL_BASE = "http://lib-geo1.lib.umn.edu:6080/arcgis/rest/services/OGP/mosaic_ogp_4/ImageServer/exportImage?"
        ##
        ##        #parameters to pass to URL, in the form of a dictionary
        ##        DOWNLOAD_URL_PARAMS = {}
        ##        DOWNLOAD_URL_PARAMS['bbox'] = MINX+','+MINY+','+MAXX+','+MAXY
        ##        DOWNLOAD_URL_PARAMS['bboxSR'] = '4269'
        ##        DOWNLOAD_URL_PARAMS['imageSR'] = '26915'
        ##        DOWNLOAD_URL_PARAMS['format'] = 'tiff'
        ##        DOWNLOAD_URL_PARAMS['pixelType'] = 'U8'
        ##        DOWNLOAD_URL_PARAMS['size'] = '3000,3000'
        ##        DOWNLOAD_URL_PARAMS['restrictUTurns'] = 'esriNoDataMatchAny'
        ##        DOWNLOAD_URL_PARAMS['interpolation'] = 'RSP_BilinearInterpolation'
        ##        cursor = arcpy.SearchCursor(u"\\mosaic245.gdb\\mosaic_ogp_1","name = '"+ LAYERID+"'")
        ##        raster_ID = cursor.next().OBJECTID
        ##        DOWNLOAD_URL_PARAMS['mosaicRule'] ={}
        ##        DOWNLOAD_URL_PARAMS['mosaicRule']['mosaicMethod'] = 'esriMosaicLockRaster'
        ##        DOWNLOAD_URL_PARAMS['mosaicRule']['lockRasterIds'] = [raster_ID]
        ##        DOWNLOAD_URL_PARAMS['f']='image'
        ##
        ##        DOWNLOAD_URL = REST_URL_BASE + urllib.urlencode(DOWNLOAD_URL_PARAMS)
        ##
        ##        REST_URL= REST_URL_BASE + urllib.urlencode({'mosaicRule': DOWNLOAD_URL_PARAMS['mosaicRule']})
        ##
        ##        LOCATION = '{"ArcGIS93Rest" : "' + REST_URL + '", "tilecache" : "None", "download" : "' + DOWNLOAD_URL + '", "wfs" : "None"}'
        """
        Put it all together to make the OGP metadata xml file
        """
        OGPtree = ElementTree()
        OGProot = Element("add", allowDups="false")
        doc = SubElement(OGProot, "doc")
        OGPtree._setroot(OGProot)
        try:
            for field in fields:
                fieldEle = SubElement(doc, "field", name=field)
                fieldEle.text = locals()[field.upper()]
        except KeyError as e:
            print "Nonexistant key: ", field
        fgdc_text = SubElement(doc, "field", name="FgdcText")
        fgdc_text.text = '<?xml version="1.0"?>' + FGDC_TEXT

        if os.path.exists(OUTPUT_LOCATION + LAYERID + "_OGP.xml"):
            existsPrompt = raw_input(
                'OGP metadata already exists! Overwrite? (Y/N): ')
            if existsPrompt == 'Y':
                OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")
                print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"
            else:
                pass
        else:
            OGPtree.write(OUTPUT_LOCATION + LAYERID + "_OGP.xml")
            print "Output file: " + OUTPUT_LOCATION + LAYERID + "_OGP.xml"

        os.remove(ws + "\\temp.xml")

        print "Next!\n"

    end = clock()
    if (end - start) > 60:
        mins = str(math.ceil((end - start) / 60))
        secs = str(math.ceil((end - start) % 60))
        print str(
            len(datasets)
        ) + " datasets processed in " + mins + " minutes and " + secs + " seconds"
    else:
        secs = str(math.ceil(end - start))
        print str(
            len(datasets)) + " datasets processed in " + secs + " seconds"
    sys.stdout = sys.__stdout__
    print 'Done. See log file for information'