Exemplo n.º 1
0
def handler(req):
    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id='
    
    ## uris should be of the form
    ## http://rguha.ath.cx/~rguha/cicc/rest/depict/SMILES
    uriParts = req.uri.split('/')
    ids = ','.join([x.strip() for x in uriParts[-1].split(',')])
    url = url+ids

    if req.method not in ['GET']:
        req.err_headers_out['Allow'] = 'GET'
        raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED

    result = ''
    headingCounts = {}
    narticle = 0
    
    data = ''.join(urllib.urlopen(url).readlines())
    doc = XML(data)
    for article in doc.findall('PubmedArticle'):
        narticle += 1
        for mh in article.findall('MedlineCitation/MeshHeadingList/MeshHeading/DescriptorName'):
            if mh.text in headingCounts.keys():
                headingCounts[mh.text] += 1
            else:
                headingCounts[mh.text] = 1

    ## most frequent first
    headingCounts = sorted(headingCounts.items(), key = operator.itemgetter(1), reverse=True)
    for key,item in headingCounts:
        result += '%s # %d/%d\n' % (key, item, narticle)
    
    req.content_type = 'text/plain';
    req.write(result)
    return apache.OK
    def convert(self, data, cache, **kwargs):
        bodydom = Element('div')
        kmldom = XML(data)
        ns = kmldom.tag.strip('kml')
        placemarks = kmldom.findall('.//%sPlacemark' % ns)
        for placemark in placemarks:
            titles = placemark.findall(ns + 'name')
            for title in titles:
                t = Element('h2')
                t.text = title.text
                bodydom.append(t)

            descriptions = placemark.findall(ns+'description')
            for desc in descriptions:
                if desc.text:
                    try:
                        text = desc.text.encode('ascii', 'xmlcharrefreplace').strip()
                    except:
                        text = desc.text.strip()
                    text = sanitize(text)
                    d = XML('<div>' + text.encode('ascii', 'xmlcharrefreplace') + '</div>')
                    bodydom.append(d)

        body = tostring(bodydom)
        cache.setData(body)
        return cache
Exemplo n.º 3
0
    def convert(self, data, cache, **kwargs):
        bodydom = Element('div')
        kmldom = XML(data)
        ns = kmldom.tag.strip('kml')
        placemarks = kmldom.findall('.//%sPlacemark' % ns)
        for placemark in placemarks:
            titles = placemark.findall(ns + 'name')
            for title in titles:
                t = Element('h2')
                t.text = title.text
                bodydom.append(t)

            descriptions = placemark.findall(ns + 'description')
            for desc in descriptions:
                if desc.text:
                    try:
                        text = desc.text.encode('ascii',
                                                'xmlcharrefreplace').strip()
                    except:
                        text = desc.text.strip()
                    text = sanitize(text)
                    d = XML('<div>' +
                            text.encode('ascii', 'xmlcharrefreplace') +
                            '</div>')
                    bodydom.append(d)

        body = tostring(bodydom)
        cache.setData(body)
        return cache
Exemplo n.º 4
0
 def DataFeedQuery(self,start_date,end_date,table_id,report_type,parse_as="flat",sort='',filters='',max_results='50',chart_type="table"):
     m_indx = report_type.index("m-")
     d_indx = report_type.index("d-")
     dimensions = report_type[d_indx+2:m_indx-1]
     for d in dimensions.split(","):
         d = "ga:" + d +","
     dim = d[:-1]
     metrics = report_type[m_indx+2:]
     for m in metrics.split(","):
         m = "ga:" + m +","
     metr = m[:-1]
     #try:            
     print "TEST3"
     if max_results == '':
         max_results = 50
     self.data_feed_query = gdata.analytics.client.DataFeedQuery({
         'ids': table_id,
         'start-date': start_date,
         'end-date': end_date,
         'dimensions':dim,
         'metrics':metr,
         'sort': sort,
         'filters': filters,
         'max-results': max_results})
     print self.data_feed_query
     self.data_feed = self.my_client.GetDataFeed(self.data_feed_query)
     self.parsed_data_feed = XML(str(self.data_feed))
     print self.parsed_data_feed
     if parse_as == "raw":
         return self.parsed_data_feed,len(self.parsed_data_feed.getiterator('{http://www.w3.org/2005/Atom}entry')),True
     else:
         return self.ParseDataFeedResults(self.parsed_data_feed,parse_as),len(self.parsed_data_feed.getiterator('{http://www.w3.org/2005/Atom}entry')),True
     """except:
Exemplo n.º 5
0
def process_rss_feed(request, link_id_encoded, website_encode_id):
    from atrinsic.base.models import Link, Website
    from atrinsic.web.helpers import base36_decode
    from elementtree.ElementTree import XML, tostring
    import urllib2
    link = Link.objects.get(pk=base36_decode(link_id_encoded))
    website = Website.objects.get(pk=base36_decode(website_encode_id))
    tracking_url = link.track_html_ape(website, link_only=True)
    try:
        raw_response = urllib2.urlopen(link.link_content)
    except:
        return AQ_render_to_response(request,
                                     'base/custom_error.html', {
                                         'errmsg': RSS_TIMEOUT,
                                     },
                                     context_instance=RequestContext(request))

    tree = XML(raw_response.read())
    for node in tree.getiterator('link'):
        domain_position = node.text.find(".com") + 4
        node.text = tracking_url + "&url=" + node.text[domain_position:]

    return render_to_response("blank_xml_template.html",
                              {"XML": tostring(tree)},
                              mimetype="application/xhtml+xml")
Exemplo n.º 6
0
def extract_title(data):
    kmldom = XML(data)
    ns = kmldom.tag.strip('kml')
    titles = kmldom.findall('.//%sname' % ns)
    if titles:
        return titles[0].text.strip()
    else:
        return 'N/A'
Exemplo n.º 7
0
def extractfeatures_from_file(data):
    kmldom = XML(data)
    ns = kmldom.tag.strip('kml')
    points = kmldom.findall('.//%sPoint' % ns)
    lines = kmldom.findall('.//%sLineString' % ns)
    polygons = kmldom.findall('.//%sPolygon' % ns)
    mpoint = []
    mline =[]
    mpoly = []
    for point in points:
        coordinates = point.findall('.//%scoordinates' % ns)
        for coordinate in coordinates:
            latlon = coordinate.text.strip().split(',')
            coords = [float(c) for c in latlon]
            try:
                p = Point(coords)
                mpoint.append(p)
            except:
                logger.info('invalid point geometry: %s' % coordinates[:10] )

    for line in lines:
        coordinates = line.findall('.//%scoordinates' % ns)
        for coordinate in coordinates:
            latlons = coordinate.text.split()
            coords = []
            for latlon in latlons:
                coords.append([float(c) for c in latlon.split(',')])
            try:
                l = LineString(coords)
                mline.append(l)
            except:
                logger.info('invalid linestring geometry: %s' % coordinates[:10] )

    for polygon in polygons:
        coordinates = polygon.findall('.//%scoordinates' % ns)
        for coordinate in coordinates:
            latlons = coordinate.text.split()
            coords = []
            for latlon in latlons:
                coords.append([float(c) for c in latlon.split(',')])
            try:
                l = Polygon(coords)
                mpoly.append(l)

            except:
                logger.info('invalid polygon geometry: %s' % coordinates[:10] )

    result = {'MultiPoint':None, 'MultiLineString':None, 'MultiPolygon':None}
    if mpoint:
        result['MultiPoint'] =  MultiPoint(mpoint)
    if mline:
        result['MultiLineString'] = MultiLineString(mline)
    if mpoly:
        result['MultiPolygon'] = MultiPolygon(mpoly)


    return result
Exemplo n.º 8
0
def extract_description(data):
    kmldom = XML(data)
    ns = kmldom.tag.strip('kml')
    descriptions = kmldom.findall('.//%sdescription' % ns)
    desc = ''
    for description in descriptions:
        if 'Double click to zoom in' != description.text.strip():
            desc += description.text.strip()

    return desc
Exemplo n.º 9
0
 def create(self, trans, payload, **kwd):
     """
     POST /api/forms
     Creates a new form.
     """
     if not trans.user_is_admin():
         trans.response.status = 403
         return "You are not authorized to create a new form."
     xml_text = payload.get('xml_text', None)
     if xml_text is None:
         trans.response.status = 400
         return "Missing required parameter 'xml_text'."
         #enhance to allow creating from more than just xml
     form_definition = form_factory.from_elem(XML(xml_text))
     trans.sa_session.add(form_definition)
     trans.sa_session.flush()
     encoded_id = trans.security.encode_id(form_definition.id)
     item = form_definition.to_dict(view='element',
                                    value_mapper={
                                        'id':
                                        trans.security.encode_id,
                                        'form_definition_current_id':
                                        trans.security.encode_id
                                    })
     item['url'] = url_for('form', id=encoded_id)
     return [item]
Exemplo n.º 10
0
def test_input_value_wrapper(tool):
    parameter = IntegerToolParameter(
        tool,
        XML('<param name="blah" type="integer" size="4" value="10" min="0" />')
    )
    wrapper = InputValueWrapper(parameter, "5")
    assert str(wrapper) == "5"
Exemplo n.º 11
0
    def __test_arbitrary_path_rewriting(self):
        self.job.parameters = [
            JobParameter(name="index_path", value="\"/old/path/human\"")
        ]
        xml = XML('''<param name="index_path" type="select">
            <option value="/old/path/human">Human</option>
            <option value="/old/path/mouse">Mouse</option>
        </param>''')
        parameter = SelectToolParameter(self.tool, xml)

        def get_field_by_name_for_value(name, value, trans, other_values):
            assert value == "/old/path/human"
            assert name == "path"
            return ["/old/path/human"]

        parameter.options = Bunch(
            get_field_by_name_for_value=get_field_by_name_for_value)
        self.tool.set_params({"index_path": parameter})
        self.tool._command_line = "prog1 $index_path.fields.path"

        def test_path_rewriter(v):
            if v:
                v = v.replace("/old", "/new")
            return v

        self._set_compute_environment(path_rewriter=test_path_rewriter)
        command_line, extra_filenames = self.evaluator.build()
        self.assertEquals(command_line, "prog1 /new/path/human")
Exemplo n.º 12
0
    def test_conditional_evaluation(self):
        select_xml = XML(
            '''<param name="always_true" type="select"><option value="true">True</option></param>'''
        )
        parameter = SelectToolParameter(self.tool, select_xml)

        conditional = Conditional()
        conditional.name = "c"
        conditional.test_param = parameter
        when = ConditionalWhen()
        when.inputs = {"thresh": self.tool.test_thresh_param()}
        when.value = "true"
        conditional.cases = [when]
        self.tool.set_params({"c": conditional})
        self.job.parameters = [
            JobParameter(
                name="c",
                value=
                '''{"thresh": 4, "always_true": "true", "__current_case__": 0}'''
            )
        ]
        self.tool._command_line = "prog1 --thresh=${c.thresh} --test_param=${c.always_true}"
        self._set_compute_environment()
        command_line, extra_filenames = self.evaluator.build()
        self.assertEquals(command_line, "prog1 --thresh=4 --test_param=true")
 def __split_models(self, xmlDoc):
     """generator that takes parameter xmlDoc and splits it into many
     xml files, with only one model per each"""
     elem = XML(xmlDoc)
     models = elem.find("Models")
     if models:
         elem.remove(models)
         for model in models:    
             to_return = copy.deepcopy(elem)    
             new_models = Element("Models")
             for a in models.attrib:
                 new_models.attrib[a] = models.attrib[a]
             new_models.append(model)
             to_return.append(new_models)
             yield (model.attrib['id'], to_return)
     else:
         pass #TODO return error
Exemplo n.º 14
0
 def xml2ListOfDict_Tutorial(xml):
     """Same as previous function, but the hard (wrong) way."""
     lis = []
     for elt in XML(xml):
         dic = {}
         for child in elt:
             dic[child.tag] = child.text
         lis.append(dic)
     return lis
Exemplo n.º 15
0
def quick_output(format, format_source=None, change_format_xml=None):
    test_output = ToolOutput("test_output")
    test_output.format = format
    test_output.format_source = format_source
    if change_format_xml:
        test_output.change_format = XML(change_format_xml)
    else:
        test_output.change_format = None
    return test_output
Exemplo n.º 16
0
def xmltidy(text):
    """send back good lookin' xml (basically a syntax checker)"""
    try:
        elem = XML(text)
        text = tostring(elem)
        return text
    except:
        #print text
        return ''
Exemplo n.º 17
0
def test_select_wrapper_simple_options(tool):
    xml = XML('''<param name="blah" type="select">
        <option value="x">I am X</option>
        <option value="y" selected="true">I am Y</option>
        <option value="z">I am Z</option>
    </param>''')
    parameter = SelectToolParameter(tool, xml)
    wrapper = SelectToolParameterWrapper(parameter, "x", tool.app)
    assert str(wrapper) == "x"
    assert wrapper.name == "blah"
    assert wrapper.value_label == "I am X"
Exemplo n.º 18
0
def process_rss_feed(request,link_id_encoded,website_encode_id):
    from atrinsic.base.models import Link,Website
    from atrinsic.web.helpers import base36_decode
    from elementtree.ElementTree import XML,tostring
    import urllib2
    link = Link.objects.get(pk=base36_decode(link_id_encoded))
    website = Website.objects.get(pk=base36_decode(website_encode_id))
    tracking_url = link.track_html_ape(website,link_only=True)
    try:
        raw_response = urllib2.urlopen(link.link_content)
    except:
        return AQ_render_to_response(request, 'base/custom_error.html', {
                'errmsg' : RSS_TIMEOUT,
            }, context_instance=RequestContext(request))

    tree = XML(raw_response.read())
    for node in tree.getiterator('link'):
        domain_position = node.text.find(".com") + 4
        node.text = tracking_url + "&url=" + node.text[domain_position:]
    
    return render_to_response("blank_xml_template.html", {"XML":tostring(tree)}, mimetype="application/xhtml+xml")
Exemplo n.º 19
0
 def test_evaluation_of_optional_datasets(self):
     # Make sure optional dataset don't cause evaluation to break and
     # evaluate in cheetah templates as 'None'.
     select_xml = XML(
         '''<param name="input1" type="data" optional="true"></param>''')
     parameter = DataToolParameter(self.tool, select_xml)
     self.job.parameters = [JobParameter(name="input1", value=u'null')]
     self.tool.set_params({"input1": parameter})
     self.tool._command_line = "prog1 --opt_input='${input1}'"
     self._set_compute_environment()
     command_line, extra_filenames = self.evaluator.build()
     self.assertEquals(command_line, "prog1 --opt_input='None'")
Exemplo n.º 20
0
def _setup_blast_tool(tool, multiple=False):
    tool.app.write_test_tool_data("blastdb.loc",
                                  "val1\tname1\tpath1\nval2\tname2\tpath2\n")
    xml = XML(
        '''<param name="database" type="select" label="Nucleotide BLAST database" multiple="%s">
        <options from_file="blastdb.loc">
            <column name="value" index="0"/>
            <column name="name" index="1"/>
            <column name="path" index="2"/>
        </options>
    </param>''' % multiple)
    parameter = SelectToolParameter(tool, xml)
    return parameter
Exemplo n.º 21
0
    def param(self):
        if not self._param:
            multi_text = ""
            if self.multiple:
                multi_text = 'multiple="True"'
            optional_text = ""
            if self.optional:
                optional_text = 'optional="True"'
            template_xml = '''<param name="data2" type="data" ext="txt" %s %s></param>'''
            self.param_xml = XML(template_xml % (multi_text, optional_text))
            self._param = basic.DataToolParameter(self.mock_tool,
                                                  self.param_xml)

        return self._param
Exemplo n.º 22
0
def _drilldown_parameter(tool):
    xml = XML(
        '''<param name="some_name" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true">
        <options>
            <option name="Heading 1" value="heading1">
                <option name="Option 1" value="option1"/>
                <option name="Option 2" value="option2"/>
                <option name="Heading 1" value="heading1">
                    <option name="Option 3" value="option3"/>
                    <option name="Option 4" value="option4"/>
               </option>
            </option>
           <option name="Option 5" value="option5"/>
      </options>
    </param>''')
    parameter = DrillDownSelectToolParameter(tool, xml)
    return parameter
Exemplo n.º 23
0
    def param(self):
        if not self._param:
            multi_text = ""
            if self.multiple:
                multi_text = 'multiple="True"'
            optional_text = ""
            if self.optional:
                optional_text = 'optional="True"'
            options_text = self.options_xml
            data_ref_text = ""
            if self.set_data_ref:
                data_ref_text = 'data_ref="input_bam"'
            template_xml = '''<param name="my_name" type="%s" %s %s %s>%s</param>'''
            self.param_xml = XML(template_xml %
                                 (self.type, data_ref_text, multi_text,
                                  optional_text, options_text))
            self._param = basic.SelectToolParameter(self.mock_tool,
                                                    self.param_xml)

        return self._param
Exemplo n.º 24
0
    def param(self):
        if not self._param:
            multi_text = ""
            if self.multiple:
                multi_text = 'multiple="True"'
            optional_text = ""
            if self.optional:
                optional_text = 'optional="True"'
            data_ref_text = ""
            if self.set_data_ref:
                data_ref_text = 'data_ref="input_tsv"'
            template_xml = '''<param name="my_name" type="%s" %s %s %s %s></param>'''
            self.param_xml = XML(template_xml %
                                 (self.type, data_ref_text, multi_text,
                                  optional_text, self.other_attributes))
            self._param = basic.ColumnListParameter(self.mock_tool,
                                                    self.param_xml)
            self._param.ref_input = bunch.Bunch(formats=[
                model.datatypes_registry.get_datatype_by_extension("tabular")
            ])

        return self._param
Exemplo n.º 25
0
    def test_context(self):
        if self._test_context is None:
            option_xml = ""
            if self.filtered_param:
                option_xml = '''<options><filter type="data_meta" ref="data1" key="dbkey" /></options>'''
            param_xml = XML(
                '''<param name="data2" type="data" ext="txt">%s</param>''' %
                option_xml)
            self.param = basic.DataToolParameter(
                tool=self.tool,
                elem=param_xml,
            )

            self._test_context = dataset_matcher.DatasetMatcher(
                trans=bunch.Bunch(
                    app=self.app,
                    get_current_user_roles=lambda: self.current_user_roles,
                    workflow_building_mode=True,
                ),
                param=self.param,
                value=[],
                other_values=self.other_values)

        return self._test_context
Exemplo n.º 26
0
def parsePodcast(podcastXML, config, filemode=None):
    """
    Access to the podcast and return all information in a podcast object
    
    return None when no info available, when error raise MyCancel
    """
    podcastInfo = PodcastInfo()
    podcastNode = XML(podcastXML)
    channelNode = podcastNode.find('channel')

    ### TODO handle when title is empty, use the program title
    titlePodcast = channelNode.findtext('title', '')
    titlePodcastAscii = titlePodcast.encode('ascii', 'ignore')

    if not titlePodcast:
        titlePodcast = 'UNDEFINED'
        titlePodcastAscii = 'UNDEFINED'
    else:
        titlePodcastAscii = getCroppedFilename(titlePodcastAscii)
        titlePodcastAscii = cleanString(titlePodcastAscii)

    ## TODO support the podcast named the same way... add a hashcode after the title, and a main podcast.xml file at the root

    # the target local directory
    targetDirectory = os.path.join(config.podcastDownloadPath,
                                   titlePodcastAscii)
    chandescription = channelNode.findtext('description', '')
    #chanImage         = getXMLAttrText(channeldom, 'itunes:image', 'href')

    ####
    podcastInfo.title = titlePodcast
    podcastInfo.description = chandescription
    #podcastInfo.image = chanImage
    podcastInfo.titleAscii = titlePodcastAscii
    podcastInfo.targetDirectory = targetDirectory

    # Get the local info, and a link on the elemnt node, to be modified later
    podcastLocalInfo = getPodcastLocalItems(podcastInfo)

    # parse the item list
    #items = channeldom.getElementsByTagName('item')
    itemExist = channelNode.find('item') != None

    if not itemExist:
        # return empty mark
        return None

    #for item in items:
    for itemNode in channelNode.getiterator('item'):
        descr = ''
        type = ''
        length = ''

        title = itemNode.findtext('title', '')
        descr = itemNode.findtext('description', '')
        #pubDate    = getXMLTagText(item, 'pubDate')
        #duration   = getXMLTagText(item, 'itunes:duration')

        enclosureNode = itemNode.find('enclosure')
        if enclosureNode == None:
            continue

        # the url can be redirect, urllib follow this link for downlaod
        url = enclosureNode.get('url')
        if url == None:
            # when no url, continue
            continue

        # search in the local if the file is already here, already downloaded, and add it
        foundLocalItem = None
        for podlocalitem in podcastLocalInfo.itemsInfo:
            if podlocalitem.url == url:
                foundLocalItem = podlocalitem
        if foundLocalItem != None:
            podcastInfo.itemsInfo.append(foundLocalItem)
            continue

        # type is not always defined?!, can test with urllib when downloaded
        type = enclosureNode.get('type')
        # Length only used for information as list, exact size is found later during download
        length = enclosureNode.get('length')

        #####################
        podItem = PodcastItem()
        podItem.title = title
        podItem.description = descr
        podItem.url = url
        podItem.type = type
        podItem.length = length
        podcastInfo.itemsInfo.append(podItem)

    # TODO ?? return when ! config.podcastDownload

    # init the titles and the filename, filelocation
    for podItem in podcastInfo.itemsInfo:

        # the display title
        title = podItem.title

        # when is local, don't process filenames and add >> in the titles
        if podItem.isLocal:
            # control that the file exist and fully downloaded, when not try to download ...
            if not os.path.exists(podItem.fileLocation):
                podItem.isLocal = False
                podItem.flagfinish = False

            elif podItem.flagfinish:
                title = '>> ' + title
            else:
                title = '<> ' + title

        # when not local, or not file found
        else:

            # TODO test that valid name, no special char, len > sufficient, and not always the same name,
            # TODO OR add something in the MEDIA xml file

            ## TODO when a podcast title is many time the same, use a hascode after the title filaname

            ### PROBELM avec la limite des path: lorsque fichier trop long, peut avec des equivalent......

            #  podcastInfo.useTitleForName or
            if filemode == "title":
                filename = podItem.title
                podcastInfo.useTitleForName
            else:
                filename = getLastStringPart(
                    podItem.url.encode('ascii', 'ignore'), '/')
                filename = getBeforeStringPart(filename, '?')

                ### TODO test if this filename already exit in the list..?? -> if YES MARK it using a poditem flag, and use title...

            filename = getCroppedFilename(filename)  # 42-4
            filename = cleanString(filename)

            fileLocation = targetDirectory + '\\' + filename

            # set the properties
            podItem.filename = filename
            podItem.fileLocation = fileLocation

            # When the file exist but no entry in the xml add it here
            if os.path.exists(podItem.fileLocation):
                title = '>? ' + title

        # process size
        size = 0
        if podItem.size != 0:
            size = podItem.size
        elif podItem.length and len(podItem.length) > 0:
            size = round(long(podItem.length) / 1000000.0, 1)
        # set size and title
        if size != 0:
            podItem.size = size
            title = title + ' (' + str(size) + 'Mo)'

        podcastInfo.itemFilenames.append(podItem.filename)
        podcastInfo.titles2display.append(title)

    # search in the folder if already downloaded file are available, and not refferenced in the xml
    appendLocalMedia(podcastInfo, podcastLocalInfo)

    # return the podcast info with items
    return podcastInfo
Exemplo n.º 27
0
 def test_thresh_param(self):
     elem = XML('<param name="thresh" type="integer" value="5" />')
     return IntegerToolParameter(self, elem)
Exemplo n.º 28
0
       <name>John Doe</name>
     </author>
     <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>

     <entry>
       <title type="xhtml">Atom-Powered <br/> Robots Run Amok</title>
       <link href="http://example.org/2003/12/13/atom03"/>
       <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
       <updated>2003-12-13T18:30:02Z</updated>
       <summary>Some text.</summary>
     </entry>

   </feed>"""

etree = ElementTree(file=StringIO.StringIO(content))
feed = XML(content)

print etree
print feed

#print len(feed)
#print feed[0]
#print feed.keys()

ATOM = "http://www.w3.org/2005/Atom"

entry = etree.getiterator('{%s}entry'%ATOM)[0]
new_lin = SubElement(entry, '{%s}link'%ATOM)
new_lin.set('rel', 'source')
new_lin.set('href', 'http://somthing.org')
Exemplo n.º 29
0
        typeID = typeID_List[index]
        typeID_Progress = 100.0 * ((1.0 * index) / typeID_List_length)

        if (verbose == 1):
            print "Getting eve-central data for typeID ", typeID_List[
                index], " - ", typeName_List[index]
        contents = eve_central_query_market_data(regionID, option)
        if (verbose == 1):
            print "Response from eve-central:"
            print contents
            print "END RESPONSE"

        if (contents[0:5] == "<?xml"):
            # Parse the XML response from EVE-Central
            # See usage here for ElementTree:  http://effbot.org/zone/element-index.htm
            tree = XML(contents)  # From a string
            if (verbose == 1):
                print "XML received from eve-central:"
                print tree
            #print "XML Element count = ", len(tree)

            # the tree root is the toplevel html element
            tree_map = dict((c, p) for p in tree.getiterator() for c in p)
            root = tree

            for c in root.getchildren():
                if (verbose == 1):
                    print c.tag
                for d in c.getchildren():
                    if (verbose == 1):
                        print "    ", d.tag
Exemplo n.º 30
0
def parsePodcast(podcastXML, config, filemode=None):
    """
    Access to the podcast and return all information in a podcast object
    
    return None when no info available, when error raise MyCancel
    """
    podcastInfo = PodcastInfo()                    
    podcastNode = XML(podcastXML)
    channelNode = podcastNode.find('channel')
    
    ### TODO handle when title is empty, use the program title
    titlePodcast = channelNode.findtext('title', '')
    titlePodcastAscii = titlePodcast.encode('ascii', 'ignore')

    if not titlePodcast:
        titlePodcast = 'UNDEFINED'
        titlePodcastAscii = 'UNDEFINED'
    else:
        titlePodcastAscii = getCroppedFilename(titlePodcastAscii)
        titlePodcastAscii = cleanString(titlePodcastAscii)
    
    ## TODO support the podcast named the same way... add a hashcode after the title, and a main podcast.xml file at the root
    
    # the target local directory
    targetDirectory = os.path.join(config.podcastDownloadPath, titlePodcastAscii)
    chandescription   = channelNode.findtext('description', '')
    #chanImage         = getXMLAttrText(channeldom, 'itunes:image', 'href')

    ####
    podcastInfo.title = titlePodcast
    podcastInfo.description = chandescription
    #podcastInfo.image = chanImage    
    podcastInfo.titleAscii = titlePodcastAscii
    podcastInfo.targetDirectory = targetDirectory    
    
    # Get the local info, and a link on the elemnt node, to be modified later
    podcastLocalInfo = getPodcastLocalItems(podcastInfo)
    
    # parse the item list
    #items = channeldom.getElementsByTagName('item') 
    itemExist = channelNode.find('item') != None
                        
    if not itemExist:
        # return empty mark
        return None
    
    #for item in items:
    for itemNode in channelNode.getiterator('item'):
        descr = ''
        type = ''
        length = ''
        
        title = itemNode.findtext('title', '')
        descr      = itemNode.findtext('description', '')                                      
        #pubDate    = getXMLTagText(item, 'pubDate')
        #duration   = getXMLTagText(item, 'itunes:duration')     

        enclosureNode = itemNode.find('enclosure')
        if enclosureNode == None:
            continue
        
        # the url can be redirect, urllib follow this link for downlaod                             
        url = enclosureNode.get('url')
        if url == None:
            # when no url, continue
            continue 
        
        # search in the local if the file is already here, already downloaded, and add it
        foundLocalItem = None        
        for podlocalitem in podcastLocalInfo.itemsInfo:
            if podlocalitem.url == url:
                foundLocalItem = podlocalitem
        if foundLocalItem != None:            
            podcastInfo.itemsInfo.append(foundLocalItem)    
            continue
        
        # type is not always defined?!, can test with urllib when downloaded
        type       = enclosureNode.get('type')
        # Length only used for information as list, exact size is found later during download
        length       = enclosureNode.get('length')
                                                                
        #####################
        podItem = PodcastItem()
        podItem.title = title
        podItem.description = descr
        podItem.url = url
        podItem.type = type
        podItem.length = length
        podcastInfo.itemsInfo.append(podItem)    


    # TODO ?? return when ! config.podcastDownload

    # init the titles and the filename, filelocation
    for podItem in podcastInfo.itemsInfo :
        
        # the display title 
        title = podItem.title
        
        # when is local, don't process filenames and add >> in the titles
        if podItem.isLocal:
            # control that the file exist and fully downloaded, when not try to download ...
            if not os.path.exists(podItem.fileLocation) : 
                podItem.isLocal = False
                podItem.flagfinish = False
                
            elif podItem.flagfinish:
                title = '>> ' + title                  
            else: 
                title = '<> ' + title              
        
        # when not local, or not file found
        else :
            
            # TODO test that valid name, no special char, len > sufficient, and not always the same name, 
            # TODO OR add something in the MEDIA xml file
            
            ## TODO when a podcast title is many time the same, use a hascode after the title filaname

            
            ### PROBELM avec la limite des path: lorsque fichier trop long, peut avec des equivalent......
            
            #  podcastInfo.useTitleForName or
            if  filemode == "title":
                filename = podItem.title
                podcastInfo.useTitleForName
            else:
                filename = getLastStringPart(podItem.url.encode('ascii', 'ignore'), '/')    
                filename = getBeforeStringPart(filename, '?')
                
                ### TODO test if this filename already exit in the list..?? -> if YES MARK it using a poditem flag, and use title...
                
                
            filename = getCroppedFilename(filename) # 42-4   
            filename = cleanString(filename)
            
            fileLocation = targetDirectory + '\\' + filename
            
            # set the properties
            podItem.filename = filename
            podItem.fileLocation = fileLocation
            
            # When the file exist but no entry in the xml add it here 
            if os.path.exists(podItem.fileLocation) : 
                title = '>? ' + title   
            
        # process size
        size = 0
        if podItem.size != 0 :
            size = podItem.size
        elif podItem.length and len(podItem.length) > 0:
            size = round(long(podItem.length) / 1000000.0 , 1)     
        # set size and title
        if size != 0:
            podItem.size = size
            title = title + ' (' + str(size) + 'Mo)'
        
        podcastInfo.itemFilenames.append(podItem.filename)
        podcastInfo.titles2display.append(title)
    
    
    # search in the folder if already downloaded file are available, and not refferenced in the xml
    appendLocalMedia(podcastInfo, podcastLocalInfo)
    
    # return the podcast info with items
    return podcastInfo
Exemplo n.º 31
0
 def AccountFeedQuery(self,max_results='50'):
     # DataFeedQuery simplifies constructing API queries and uri encodes params.
     self.account_feed_query = AccountFeedQuery({'max-results': max_results})
     self.account_feed = self.my_client.GetDataFeed(self.account_feed_query)
     self.parsed_account_feed = XML(str(self.account_feed))
     return self.parsed_account_feed
Exemplo n.º 32
0
class AqAnalytics(object):
    def __init__(self,user_name,password):
        self.SOURCE_APP_NAME = 'Atrinsic-Network'
        self.my_client = gdata.analytics.client.AnalyticsClient(source=self.SOURCE_APP_NAME)
        self.account_feed = ''
        self.data_feed = ''
        self.table_ids = None
        self.user_name = user_name
        self.password = password

    def authenticate(self):
        try:
            self.my_client.client_login(self.user_name, self.password,self.SOURCE_APP_NAME,'analytics')
        except gdata.client.BadAuthentication:
            raise Exception('Invalid user credentials given.')
        except gdata.client.Error:
            raise Exception('Invalid user credentials given.')
        return True
        
    def AccountFeedQuery(self,max_results='50'):
        # DataFeedQuery simplifies constructing API queries and uri encodes params.
        self.account_feed_query = AccountFeedQuery({'max-results': max_results})
        self.account_feed = self.my_client.GetDataFeed(self.account_feed_query)
        self.parsed_account_feed = XML(str(self.account_feed))
        return self.parsed_account_feed
        
    def GetSiteList(self,feed_query = None):
        if feed_query == None:
            feed_query = self.AccountFeedQuery()
        complete_feed = []
        feed_details = {}
        for elem in feed_query:
            if list(elem):
                for node in elem:
                    NS1='{http://schemas.google.com/analytics/2009}'
                    NS2='{http://www.w3.org/2005/Atom}'
                    if node.tag[len(NS1):] == "tableId":
                        feed_details['table_id']=node.text
                    elif node.tag[len(NS2):] == "title":
                        feed_details['site_url']=node.text
                if feed_details:
                    complete_feed.append(feed_details)
                feed_details = {}
            
        return complete_feed
    def DataFeedQuery(self,start_date,end_date,table_id,report_type,parse_as="flat",sort='',filters='',max_results='50',chart_type="table"):
        m_indx = report_type.index("m-")
        d_indx = report_type.index("d-")
        dimensions = report_type[d_indx+2:m_indx-1]
        for d in dimensions.split(","):
            d = "ga:" + d +","
        dim = d[:-1]
        metrics = report_type[m_indx+2:]
        for m in metrics.split(","):
            m = "ga:" + m +","
        metr = m[:-1]
        #try:            
        print "TEST3"
        if max_results == '':
            max_results = 50
        self.data_feed_query = gdata.analytics.client.DataFeedQuery({
            'ids': table_id,
            'start-date': start_date,
            'end-date': end_date,
            'dimensions':dim,
            'metrics':metr,
            'sort': sort,
            'filters': filters,
            'max-results': max_results})
        print self.data_feed_query
        self.data_feed = self.my_client.GetDataFeed(self.data_feed_query)
        self.parsed_data_feed = XML(str(self.data_feed))
        print self.parsed_data_feed
        if parse_as == "raw":
            return self.parsed_data_feed,len(self.parsed_data_feed.getiterator('{http://www.w3.org/2005/Atom}entry')),True
        else:
            return self.ParseDataFeedResults(self.parsed_data_feed,parse_as),len(self.parsed_data_feed.getiterator('{http://www.w3.org/2005/Atom}entry')),True
        """except:
            'body', 'headers', 'message', 'reason', 'status'
            error = sys.exc_info()[1]
            error_xml = XML(error.message[error.message.index("<"):])
            error_list = []
            for node in error_xml.getiterator("{http://schemas.google.com/g/2005}error"):
                for err in node.getchildren():
                    if err.tag == "{http://schemas.google.com/g/2005}internalReason":
                        error_list.append(err.text)
            return (False,False,error_list)"""
            
    def ParseDataFeedResults(self,data_feed_query,parse_as):
        NS = '{http://schemas.google.com/analytics/2009}'
        e_list = []
        headers = []
        self.parse_as = parse_as
        for elem in data_feed_query.getiterator('{http://www.w3.org/2005/Atom}entry'):
            if parse_as == "dict":
                e_dict = {}
            elif parse_as == "array" or parse_as == "flat":
                e_dict = []
            else:
                e_dict = ""
            for node in elem.getchildren():
                e_dict,headers = self.SuperAppend(node,e_dict,headers)
                
            if parse_as == "flat":
                for x in e_dict:
                    e_list.append(x)
            else:
                e_list.append(e_dict)
        return e_list,headers
        
    def SuperAppend(self,node,e_dict,headers):
        if node.attrib.has_key('value'):
            try:
                headers.index(node.attrib['name'][3:])
            except:
                headers.append(node.attrib['name'][3:])
            if self.parse_as == "dict":
                e_dict[node.attrib['name'][3:]] = node.attrib['value']
            elif self.parse_as == "array" or self.parse_as == "flat":
                e_dict.append(node.attrib['value'])
            else:
                e_dict = node.attrib['value']
        return e_dict,headers
Exemplo n.º 33
0
    for index in range(1,typeID_List_length-1):
        typeID = typeID_List[index]
        typeID_Progress = 100.0 * ((1.0*index)/typeID_List_length)

        if (verbose == 1):
            print "Getting eve-central data for typeID ", typeID_List[index], " - ", typeName_List[index]
        contents = eve_central_query_market_data(regionID,option)
        if (verbose == 1):
            print "Response from eve-central:"
            print contents
            print "END RESPONSE"
        
        if (contents[0:5] == "<?xml"):
            # Parse the XML response from EVE-Central
            # See usage here for ElementTree:  http://effbot.org/zone/element-index.htm
            tree = XML(contents)        # From a string
            if (verbose == 1):
                print "XML received from eve-central:"
                print tree
            #print "XML Element count = ", len(tree)

            # the tree root is the toplevel html element
            tree_map = dict((c, p) for p in tree.getiterator() for c in p)
            root = tree

            for c in root.getchildren():
                if (verbose == 1):
                    print c.tag
                for d in c.getchildren():
                    if (verbose == 1):
                        print "    ", d.tag
Exemplo n.º 34
0
 def _parameter_for(self, **kwds):
     content = kwds["xml"]
     param_xml = XML( content )
     return basic.ToolParameter.build( self.mock_tool, param_xml )
Exemplo n.º 35
0
 def xml2ListOfDict(xml):
     """Convert an xmlDoc string to a python list of dictionaries."""
     return [
         dict([(child.tag, child.text) for child in elt])
         for elt in XML(xml)
     ]
Exemplo n.º 36
0
    def create(self, trans, payload, **kwd):
        """
        POST /api/request_types
        Creates a new request type (external_service configuration).
        """
        if not trans.user_is_admin():
            trans.response.status = 403
            return "You are not authorized to create a new request type (external_service configuration)."
        xml_text = payload.get('xml_text', None)
        if xml_text is None:
            trans.response.status = 400
            return "Missing required parameter 'xml_text'."
        elem = XML(xml_text)
        request_form_id = payload.get('request_form_id', None)
        if request_form_id is None:
            trans.response.status = 400
            return "Missing required parameter 'request_form_id'."
        request_form = trans.sa_session.query(
            trans.app.model.FormDefinition).get(
                trans.security.decode_id(request_form_id))
        sample_form_id = payload.get('sample_form_id', None)
        if sample_form_id is None:
            trans.response.status = 400
            return "Missing required parameter 'sample_form_id'."
        sample_form = trans.sa_session.query(
            trans.app.model.FormDefinition).get(
                trans.security.decode_id(sample_form_id))
        external_service_id = payload.get('external_service_id', None)
        if external_service_id is None:
            trans.response.status = 400
            return "Missing required parameter 'external_service_id'."
        external_service = trans.sa_session.query(
            trans.app.model.ExternalService).get(
                trans.security.decode_id(external_service_id))
        request_type = request_type_factory.from_elem(elem, request_form,
                                                      sample_form,
                                                      external_service)
        #FIXME: move permission building/setting to separate abstract method call and
        #allow setting individual permissions by role (currently only one action, so not strictly needed)
        role_ids = payload.get('role_ids', [])
        roles = [
            trans.sa_session.query(trans.model.Role).get(
                trans.security.decode_id(i)) for i in role_ids
        ]  # if trans.app.security_agent.ok_to_display( trans.user, i ) ]
        permissions = {}
        if roles:
            #yikes, there has to be a better way?
            for k, v in trans.model.RequestType.permitted_actions.items():
                permissions[trans.app.security_agent.get_action(
                    v.action)] = roles
        if permissions:
            trans.app.security_agent.set_request_type_permissions(
                request_type, permissions)

        #flush objects
        trans.sa_session.add(request_type)
        trans.sa_session.flush()
        encoded_id = trans.security.encode_id(request_type.id)
        item = request_type.to_dict(view='element',
                                    value_mapper={
                                        'id': trans.security.encode_id,
                                        'request_form_id':
                                        trans.security.encode_id,
                                        'sample_form_id':
                                        trans.security.encode_id
                                    })
        item['url'] = url_for('request_type', id=encoded_id)
        return [item]
Exemplo n.º 37
0
class EpubDocument(object):
    """A class that parses and provides
    data about an ePub file"""

    def __init__(self, fname):
        # This is done according to this:
        # http://stackoverflow.com/questions/1388467/reading-epub-format

        print(("Opening:", fname))
        try:
            self.book = zipfile.ZipFile(fname, "r")
        except zipfile.BadZipfile:
            raise ValueError("Invalid format")

        f = self.book.open('META-INF/container.xml')
        self.container = XML(f.read())
        f.close()
        roots = self.container.findall(
                './/{urn:oasis:names:tc:opendocument:xmlns:container}rootfile')
        self.roots = []
        for r in roots:
            self.roots.append(r.attrib['full-path'])
        opf = self.book.open(self.roots[0])
        self.basepath = os.path.dirname(self.roots[0]) + "/"
        if self.basepath == '/':
            self.basepath = ""
        print(("BASEPATH:", self.basepath))

        data = opf.read()
        self.opf = XML(data)
        opf.close()
        self.manifest = self.opf.find('{http://www.idpf.org/2007/opf}manifest')
        self.manifest_dict = {}
        for elem in self.manifest.findall(
                            '{http://www.idpf.org/2007/opf}item'):
            self.manifest_dict[elem.attrib['id']] = self.basepath + \
                                                    elem.attrib['href']

        self.spine = self.opf.find('{http://www.idpf.org/2007/opf}spine')

        self.tocentries = []
        self.toc_id = self.spine.attrib.get('toc', None)
        if self.toc_id:
            self.toc_fn = self.manifest_dict[self.toc_id]
            print(("TOC:", self.toc_fn))
            f = self.book.open(self.toc_fn)
            data = f.read()
            self.toc = XML(data)
            self.navmap = self.toc.find(
                            '{http://www.daisy.org/z3986/2005/ncx/}navMap')
            # FIXME: support nested navpoints
            self.navpoints = self.navmap.findall(
                        './/{http://www.daisy.org/z3986/2005/ncx/}navPoint')
            for np in self.navpoints:
                label = np.find(
                    '{http://www.daisy.org/z3986/2005/ncx/}navLabel').find(
                            '{http://www.daisy.org/z3986/2005/ncx/}text').text
                content = np.find(
                 '{http://www.daisy.org/z3986/2005/ncx/}content').attrib['src']
                if label and content:
                    self.tocentries.append([label, content])

        self.itemrefs = self.spine.findall(
                                    '{http://www.idpf.org/2007/opf}itemref')
        print(("IR:", self.itemrefs))
        self.spinerefs = [
            self.manifest_dict[item.attrib['idref']][len(self.basepath):]
                                                    for item in self.itemrefs]
        # I found one book that has a spine but no navmap:
        # "Der schwarze Baal" from manybooks.net
        # Also another has more entries on the spine than on the navmap
        # (Dinosauria, from feedbooks).
        # So, we need to merge these suckers. I will assume it's not completely
        # insane and the spine is always more complete.

        spinerefs2 = [[x, x] for x in self.spinerefs]

        for te in self.tocentries:
            idx = self.spinerefs.index(te[1])
            spinerefs2[idx] = te

        self.tocentries = spinerefs2
        # if not self.tocentries:
            # # Alternative toc
            # self.tocentries = [[item.attrib['idref'],
             #self.manifest_dict[item.attrib['idref']][len(self.basepath):]]
                                                    #for item in self.itemrefs]

        print((self.tocentries))
        print((self.spinerefs))

    def getData(self, path):
        """Return the contents of a file in the document"""

        path = "%s%s" % (self.basepath, path)
        try:
            f = self.book.open(path)
        except KeyError:  # File missing in the zip
            return []
        data = f.read()
        f.close()
        return data