Пример #1
0
    def process(self, lang):
        assert len(lang) == 2, 'Language name must be two letters long'

        doc = ElementTree(file='%s.xml' % lang)

        root = doc.getroot()

        if root.tag == 'resources':
            for child in root:
                self.walk(child, (child.get('name'),), lang)
Пример #2
0
    def process(self, lang):
        assert len(lang) == 2, "Language name must be two letters long"

        doc = ElementTree(file=os.path.join(self._dirname, "%s.xml" % lang))

        root = doc.getroot()

        if root.tag == "resources":
            for child in root:
                self.walk(child, (child.get("name"),), lang)
    def process(self, lang):
        assert len(lang) == 2, 'Language name must be two letters long'

        doc = ElementTree(file='%s.xml' % lang)

        root = doc.getroot()

        if root.tag == 'resources':
            for child in root:
                self.walk(child, (child.get('name'), ), lang)
Пример #4
0
def mark_changed_publications(modified_since):
    """
    Asks Symplectic API for info about publications modified since given
    date Receives XML File as response Parses XML File to find publications
    modified matches publication XML element to db publication object flags
    each publication object as needing to be re-fetched from Symplectic
    """
    # date needs to be in form of yyyy-mm-dd
    # will then append string "T00:00:00Z" as we are in UTC-0 timezone in
    # which : becomes %3A
    # symplectic api url and local file path
    url = "".join([
        SYMPLECTIC_API_URL,
        'search-publications?modified-since-when=',
        modified_since,
        'T00%3A00%3A00Z'
    ])
    tmp_filename = "".join([
        SYMPLECTIC_LOCAL_XML_FOLDER,
        SYMPLECTIC_LOCAL_PUBSMODIFIED_FOLDER,
        modified_since,
        '.xml'
        ])
    # get xml document from symplectic api and store on hd
    (tmp_filename, http_headers) = urllib.urlretrieve(url, tmp_filename)
    # parse xml file
    search_publications_etree = ElementTree(file=tmp_filename)
    # delete local file from hd
    # try:
    os.remove(tmp_filename)
    # except:
    # pass
    # publication lite elements are held in a subtree BUT the subtree is
    # the root element
    # search_publications_subtree =
    # search_publications_etree.find(SYMPLECTIC_NAMESPACE +
    # 'search-publications-response')

    search_publications_subtree = search_publications_etree.getroot()
    # check if any publication elements in subtree
    if search_publications_subtree is None or \
            len(search_publications_subtree) < 1:
        return
    #  for each publication element in subtree
    for search_publication_element \
            in search_publications_subtree.getchildren():
        _flag_publication_as_needing_refetch(
            search_publication_element
        )
Пример #5
0
    def GetElementsFromXML(self, filename):
        'Extracts a dictionary of elements from the gcc_xml file.'

        tree = ElementTree()
        try:
            tree.parse(filename)
        except ExpatError:
            raise InvalidXMLError, 'Not a XML file: %s' % filename

        root = tree.getroot()
        if root.tag != 'GCC_XML':
            raise InvalidXMLError, 'Not a valid GCC_XML file'

        # build a dictionary of id -> element, None
        elementlist = root.getchildren()
        elements = {}
        for element in elementlist:
            id = element.get('id')
            if id:
                elements[id] = element, None
        return elements
Пример #6
0
    def GetElementsFromXML(self,filename):
        'Extracts a dictionary of elements from the gcc_xml file.'
        
        tree = ElementTree()
        try:
            tree.parse(filename)
        except ExpatError:
            raise InvalidXMLError, 'Not a XML file: %s' % filename

        root = tree.getroot()
        if root.tag != 'GCC_XML':
            raise InvalidXMLError, 'Not a valid GCC_XML file'

        # build a dictionary of id -> element, None
        elementlist = root.getchildren()
        elements = {}
        for element in elementlist:
            id = element.get('id')
            if id:
                elements[id] = element, None
        return elements
Пример #7
0
    def body(self, xml):
        """ Body importer
        """

        if isinstance(xml, (str, unicode)):
            parser = XMLTreeBuilder()
            parser.feed(xml)
            tree = parser.close()
            tree = ElementTree(tree)
            elem = tree.getroot()
        else:
            elem = xml

        if elem.tag != 'object':
            raise AttributeError('Invalid xml root element %s' % elem.tag)

        name = elem.get('name')
        if not name:
            raise AttributeError('No name provided for object')

        if hasattr(self.context, '__name__') and (name != self.context.__name__):
            raise AttributeError(('XML root object name %s '
                'should match context name %s') % (name, self.context.__name__))

        for child in elem.getchildren():
            if child.tag == 'property':
                self.attribute = child
            elif child.tag == 'object':
                self.child = child

        event.notify(ObjectModifiedEvent(self.context))

        if INews.providedBy(self.context):
            logger.info('Commit transaction import for %s' % getattr(
                self.context, '__name__', '(no name)'))
            transaction.commit()
prop_path = os.path.join(persistence_path, "Property")
if not os.path.exists(persistence_path) or \
        not os.path.exists(cat_path) or \
        not os.path.exists(comp_path) or \
        not os.path.exists(prop_path):
    print "INVALID PROJECT DIRECTORY"
    exit(0)
    
et = ElementTree()
    
#transform categories
print "Transforming Categories..."
for cat_fname in os.listdir(cat_path):
    fpath = os.path.join(cat_path, cat_fname)
    et.parse(fpath)
    version = et.getroot().get("version")
    if not version:
        print "\tTransforming %s..." % cat_fname
        root = Element("category",
                        {"version": "1.1",
                         "name": et.find("name").text.strip(),
                         "description": et.find("description").text.strip()})
        et = ElementTree(root)
        et.write(fpath, indent=True)
    elif version == "1.0":
        print "\tTransforming %s..." % cat_fname
        root = Element("category",
                        {"version": "1.1",
                         "name": et.getroot().get("name"),
                         "description": et.getroot().get("description")})
        et = ElementTree(root)
Пример #9
0
prop_path = os.path.join(persistence_path, "Property")
if not os.path.exists(persistence_path) or \
        not os.path.exists(cat_path) or \
        not os.path.exists(comp_path) or \
        not os.path.exists(prop_path):
    print "INVALID PROJECT DIRECTORY"
    exit(0)

et = ElementTree()

#transform categories
print "Transforming Categories..."
for cat_fname in os.listdir(cat_path):
    fpath = os.path.join(cat_path, cat_fname)
    et.parse(fpath)
    version = et.getroot().get("version")
    if not version:
        print "\tTransforming %s..." % cat_fname
        root = Element(
            "category", {
                "version": "1.1",
                "name": et.find("name").text.strip(),
                "description": et.find("description").text.strip()
            })
        et = ElementTree(root)
        et.write(fpath, indent=True)
    elif version == "1.0":
        print "\tTransforming %s..." % cat_fname
        root = Element(
            "category", {
                "version": "1.1",
Пример #10
0
import xml
import re

os.chdir('votes')
cwdfiles=os.listdir(os.getcwd())
votesfiles=filter(lambda s:re.match('votes',s), cwdfiles)


topelement=Element('top')
i=1

for vf in votesfiles:
	print vf
	try:
		votetree=ElementTree(file=vf)
		voteroot=votetree.getroot()
		date=voteroot.get('date')
		m=re.match('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})',date)
		if not m:
			print "internal error in date format"
			sys.exit()
		mgd=m.groupdict()
		mgd.update({'date':date})
		acts=votetree.findall('//royal_assent/act')
		if len(acts)>0:
			assent=Element('assent',mgd)
			for j in range(len(acts)):
				assent.insert(j,acts[j])
			topelement.insert(i,assent)
			i=i+1
	except xml.parsers.expat.ExpatError, errorinst:
Пример #11
0
def process(directory, option, file_out, use_file_out, xml_file, group, verbose, recurse, progress):
    if verbose: print "Inside process..."

    col = commands.getoutput("echo \"$COLUMNS\"")
    try:
        columns = int(col)
    except:
        columns = 60
    pb = progress_bar.pb("Progress: ", "-", columns, sys.stderr)

    tree = ElementTree(file=xml_file)
    elem = tree.getroot()

    if verbose: print "Getting rpm_names"

    rpm_names = get_names_from_dir(directory, recurse, pb, progress)

    if verbose: print "Processing names"

    if option == comps_opt.ERASE:
        """ Handle the ERASE operations """
        for subelem in elem:
            for subsub in subelem:
                p = 0.0
                for subsubsub in subsub:
                    p = p + 1.0
                    if progress:
                        percentage = p / len(subsub)
                        pb.progress(percentage)

                    if subsubsub.tag == 'packagereq' and subsubsub.text in rpm_names:
                        subsub.remove(subsubsub)
                        if verbose: print "Found %s, removing" % subsubsub.text
    elif option == comps_opt.ADD:
        """ Handle the ADD operations """
        text = "<group>\n"
        text += "<id>%s</id>\n" % group
        text += "<name>%s</name>\n" % group
        text += "<packagelist>\n"

        p = 0.0
        for name in rpm_names:
            p = p + 1.0
            if progress:
                percentage = p / len(rpm_names)
                pb.progress(percentage)

            text += "<packagereq type=\"mandatory\">%s</packagereq>\n" % name

        text += "</packagelist>\n"
        text += "</group>\n"
        node = fromstring(text)
        elem.append(node)
    else:
        die("Some unknown error has occured. Neither 'ADD' nor 'ERASE' was specified, somehow")

    if progress: pb.clear()

    if verbose: print "Ending, outputing XML"

    if use_file_out:
        ElementTree(tree).write(file_out)
    else:
        dump(tree)
Пример #12
0
def update_publication(publication_object):
    """
    Asks Symplectic API for info about specified publication based upon
    its guid
    Receives XML File as response
    Parses XML File to find publication info & all biblio-records for that
    publication
    """
    # checking
    # print "        update_publication",  publication_object
    if not(publication_object) or (publication_object.guid == ''):
        return

    # symplectic api url and local file path
    url = SYMPLECTIC_API_URL + 'publications/' + publication_object.guid

    tmp_filename = "".join([
        SYMPLECTIC_LOCAL_XML_FOLDER,
        SYMPLECTIC_LOCAL_PUBS_FOLDER,
        str(publication_object.guid),
        '.xml'
        ])

    # get xml document from symplectic api and store on hd
    (tmp_filename, http_headers) = urllib.urlretrieve(url, tmp_filename)
    # parse xml file
    pub_etree = ElementTree(file=tmp_filename)
    #d elete local file from hd
    # try:
    os.remove(tmp_filename)
    # except:
    # pass
    #++++++PUBLICATION++++++
    # publication element
    pub_element = pub_etree.getroot()
    # no loading-of/blank publication object required as updating the one
    # passed in
    # check returned xml element is valid and for correct publication
    if pub_element is None:
        return
    elif publication_object.guid != pub_element.get('id', ''):
        return
    # publication attributes
    if pub_element is not None:
        publication_object.new_id = pub_element.get('new-id')
        if pub_element.get('is-deleted', 'false') == 'true':
            publication_object.is_deleted = True
        else:
            publication_object.is_deleted = False

        attr_names = ["type", "created-when", "last-modified-when"]
        for attr_name in attr_names:
            attr_value = pub_element.get(attr_name, "")
            setattr(
                publication_object,
                attr_name.replace("-", "_"),
                attr_value
                )

    # just fetched latest version from symplectic
    publication_object.needs_refetch = False
    # save updated publication object
    publication_object.save()
    # ++++++BIBLIOGRAPHIC-RECORD++++++
    # bibliographic-record elements are held in a subtree
    biblio_subtree = pub_etree.find(
        SYMPLECTIC_NAMESPACE + 'bibliographic-records'
        )
    # check if any bibliographic-record elements in subtree
    if biblio_subtree is None or len(biblio_subtree) < 1:
        return
    # for each bibliographic-record element in subtree

    for biblio_element in biblio_subtree.getchildren():
        _create_biblio_object(biblio_element, publication_object)
Пример #13
0
def _get_users(researcher_object):
    """
    Asks Symplectic API for User info about specified researcher
    Specify which researcher using proprietary-id
    Receives XML File as response
    Parses XML File to find symplectic ID for each User
    """
    # symplectic api url and local file path
    url = "".join([
        SYMPLECTIC_API_URL,
        'search-users?',
        '&include-deleted=true',
        '&authority=',
        AUTHENTICATING_AUTHORITY,
        '&proprietary-id=',
        str(researcher_object.person_id)
        ])

    #'&username='******'.xml'
        ])

    #get xml document from symplectic api and store on hd
    try:
        (tmp_filename, http_headers) = urllib.urlretrieve(url, tmp_filename)
    except urllib2.URLError:
        raise ESymplecticGetFileError("""
            Could not HTTP GET the XML file of User GUID from Symplectic API
            """)

    #parse xml file
    users_etree = ElementTree(file=tmp_filename)
    usersresponse_element = users_etree.getroot()

    #delete local file from hd
    #try:
    os.remove(tmp_filename)
    #except:
    #pass
    #check if any user elements in tree
    if usersresponse_element is None:
        return ""
    # for each retrieved user element in tree (should only be 1)
    for user_element in usersresponse_element.getchildren():
        # pull out of xml what symplectic says this researcher's proprietary
        # id and symplectic-id are
        proprietary_id = user_element.attrib.get("proprietary-id")
        id = user_element.attrib.get("id")
        # if arkestra and symplectic agree this is the same person
        if str(researcher_object.person_id) == proprietary_id:
            # researcher_object.symplectic_int_id = id # int_id version
            researcher_object.symplectic_id = id  # guid version
            researcher_object.save()
            # force return after 1 (should only be 1 person per xml file
            # anyway)

            return id
        else:
            raise ESymplecticExtractUserGUIDError("""
                ID returned by Symplectic API not for correct Arkestra User
                (Proprietary ID doesnt match
                """)
Пример #14
0
import sys
import xml
import re

os.chdir('votes')
cwdfiles = os.listdir(os.getcwd())
votesfiles = filter(lambda s: re.match('votes', s), cwdfiles)

topelement = Element('top')
i = 1

for vf in votesfiles:
    print vf
    try:
        votetree = ElementTree(file=vf)
        voteroot = votetree.getroot()
        date = voteroot.get('date')
        m = re.match('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})', date)
        if not m:
            print "internal error in date format"
            sys.exit()
        mgd = m.groupdict()
        mgd.update({'date': date})
        acts = votetree.findall('//royal_assent/act')
        if len(acts) > 0:
            assent = Element('assent', mgd)
            for j in range(len(acts)):
                assent.insert(j, acts[j])
            topelement.insert(i, assent)
            i = i + 1
    except xml.parsers.expat.ExpatError, errorinst: