Пример #1
0
    def load(self,dataset,report_func=None):
        """Load dataset from cache file"""
        options = self._options
        if not dataset or not options.switch_cache or not exists(dataset):
            return

        # get profile from timeseries file if exists
        profile = self._get_file_profile(dataset,options.profile)

        _accounting['cache.load.profile'] = profile


        d = options.cache_date
        if not d:
            d = datetime.today().isoformat()[:10]

        c = self._make_cachedir()
        f = self.filename(dataset)

        _accounting['cache.load.file'] = f




        # logger.debug('trying pickling results from %s',f)

        result = None
        if exists(f):
            # logger.debug('pickle file %s exists',f)
            result = DataSet()
            p = pickle.load(open(f,'r'))
            
            for k,v in p.items():
                if k=="_MISSING":
                    result.add_missing(*v)
                else:
                    
                    if v[0]==0:    # Timeseries
                        result[k]=Timeseries(data=v[1],metadata=v[2],name=k)
                    elif v[0]==1:  # Numpy Array
                        result[k]=v[1]

            _accounting['cache.load.missing'] = ','.join(result.missing)
            _accounting['cache.load.series'] = ','.join(result.keys())


                # Report
            if report_func:
                # logger.debug(dictview(_accounting))
                report_func("load",f,result,self,_accounting)
                _accounting.clear()
        else:
            # logger.debug('no pickle file %s',f)            
            if options.cache_date:
                logger.error('Requested date (%s) does not exists',d)
                sys.exit(-1)
                
        return result
Пример #2
0
    def __init__(self,name,profile=None,options=Options()):
        counter = options.counter 
        # logger.debug('CLASS=%s,COUNTER=%s',self.__class__.__name__, counter)

        self._options = options 
        self.name = name
        self._retries = 3 # TODO: to get from options
        
        # tuple extracting from urlparse
        self.requesting = []

        # get data values        
        self.get_values = None

        # The Information Set
        self._res = DataSet()

        # Missing Variables
        self._missing = []

        # Connection Profile
        self._profile=profile

        # Options for request
        self._opt_request_req = False
        self._opt_delete_base_kvars = False
        self._opt_discipline_inline_function_single = True
        
        # Base parameters for data providers
        self._append_param('LASTYEAR','(($THISYEAR-$YUPD))')
        self._append_param('PREVYEAR','(($LASTYEAR-1))')

        # Password violation (dont make other requests)
        self._password_violation = False
Пример #3
0
Файл: CFA.py Проект: exedre/e4t
    def execute(self):
        logger.debug('begin')
        _newds = self._dataset

        ### Proxy if any
        #
        proxy_info = get_proxy()
        if proxy_info:
            pip={
                'http':proxy_info['proxy'],
                'https':proxy_info['proxy'],
            }

            proxy = urllib2.ProxyHandler(pip)
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)

        page = urllib2.urlopen(self._url)
        logger.debug('Got %s',self._url)
        soup = BeautifulSoup(page)

        ### Questo blocco cerca la tabella tab2a
        tTags = soup.findAll('table')
        found = False
        for  i,tTag in enumerate(tTags):
            #print '====',i,str(tTag.tr.td.text)[0:200]
            if re.match('^Table %s' % self._table,str(tTag.tr.td.text),re.I):
                found=True
                break

            #        for  tTag in tTags:
            #tA = tTd.findNext('a')
            #tAv = tTag.find(attrs={'name':self._table})
            #if tAv:
            #    found=True
            #    break
#            print "--------------------"
        
        if not found:
            logger.error('Table %s not in page',self._table)
            return
            raise ValueError, "%s not found" % self._table

        tTrs = tTag.findAll('tr')
#        print ">>>>>>>>"
        i = 0
        inBlock = False
        res = []
        for tTr in tTrs:
            tTds = tTr.findAll('td')
            v = []
            for i,td in enumerate(tTds):                      
                txt = td.text
                
                if txt != ' ':

                    if re.match('^--$',txt):
                        txt = ''
                    elif re.match(',',txt):
                        txt = txt.replace(',','')
                    elif i==0 and re.match('^.+([0-9]\/?)$',txt):
                        m = re.match('^.+([0-9]\/?)$',txt)
                        m1 = m.group(1)
                        txt = txt[:-len(m1)]
                    elif re.match('^\*.+$',txt):
                        while txt[0]!='*':
                            txt = txt[1:]
                        
                    v.append(txt)                
            res.append(v)
            i += 1

        titles = [ 'Stand-by Arrangements', 
                   'Extended Arrangements', 
                   'Flexible Credit Line', 
                   'Precautionary' ]
        ttl = titles.pop(0)
        lk = -1
        v = {}
        t = {}
        for l in res:
            if len(l)==1 and re.match("^%s"%ttl,l[0],re.I):
                lk += 1
                if len(titles)>0:
                    ttl = titles.pop(0)
                    logger.debug('Next Title %s',ttl)
            elif len(l)==7:
                if not v.has_key(lk):
                    v[lk]=[]
                v[lk].append(l)
            elif len(l)==4 and re.search('Arrangements?$',l[0]):
                m = re.match('^([0-9]+) Arrangements?$',l[0])
                if m:
                    npx = m.group(1)
                    npt = False
                    if not t.has_key(lk):
                        t[lk]=[]                       
                    if len(v[lk])==int(npx):
                        logger.debug("Wow right arrangement found")
                        npt=True
                    l.append(npx)
                    l.append(npt)
                    t[lk].extend(l)                                    
            else:
                if len(l)>0:
                    logger.debug('LINE NOT INSERTED %d > %s', len(l), l)
        N = np.sum([ int(x[4]) for k,x in t.items()])
#        print "ROWS ARE=",N

        # Make the result vectors
        # 
        _ds = DataSet()
        l = 0
        for k,w in sorted(v.items()):
            for i,n in enumerate( ('MEMBER',
                                   'EFDATE', 
                                   'EXDATE', 
                                   'AGREED', 
                                   'UNDRAWN', 
                                   'OUTSTANDING', 
                                   'POQ', 
                                   ) ):
                name = "%s_%d" % (n,k)
                if not _ds.has_key(name):
                    _ds[name]=[]
                for m in w:
#                    print "WL=",m
                    if n=='POQ' and len(m[i])>0:
                        _ds[name].append(m[i]+"\\%")
                    else:
                        _ds[name].append(m[i].lstrip('*'))
                    l += 1

#        pprint(_ds)
        return _ds
Пример #4
0
class DataProvider(object):
    """
    General access class to Data Providers with URI notation like:
    
    * ``dstream://Datastream/AGINDEMIF`` o 
    * ``flinp://DB22/ECB_STS1/M.BG.N.PROD.NS0020.4.000?name=IPBUL&start=$NOW-24M&end=$NOW&proc=weighted_avg&(IPBUL2000AVG)check``

    """
    # library functions
    _func = { 
        'ISO2' :     { 'def' : ndc.get_country_alpha2,  'args' : ( 'code', ) },
        'ISO3' :     { 'def' : ndc.get_country_alpha3,  'args' : ( 'code', ) },
        'ISON' :     { 'def' : ndc.get_country_numeric, 'args' : ( 'code', ) },
        'ISOAREA':   { 'def' : ndc.get_area_name,       'args' : ( 'code', ) },
        'ISOREGION': { 'def' : ndc.get_region_name,     'args' : ( 'code', ) }
        }

    
    def __init__(self,name,profile=None,options=Options()):
        counter = options.counter 
        # logger.debug('CLASS=%s,COUNTER=%s',self.__class__.__name__, counter)

        self._options = options 
        self.name = name
        self._retries = 3 # TODO: to get from options
        
        # tuple extracting from urlparse
        self.requesting = []

        # get data values        
        self.get_values = None

        # The Information Set
        self._res = DataSet()

        # Missing Variables
        self._missing = []

        # Connection Profile
        self._profile=profile

        # Options for request
        self._opt_request_req = False
        self._opt_delete_base_kvars = False
        self._opt_discipline_inline_function_single = True
        
        # Base parameters for data providers
        self._append_param('LASTYEAR','(($THISYEAR-$YUPD))')
        self._append_param('PREVYEAR','(($LASTYEAR-1))')

        # Password violation (dont make other requests)
        self._password_violation = False

    def append(self,url):
        """
        Append a data request URL to the list

        >>> dp = DataProvider()
        >>> dp.append('dstream://Datastream/AGINDEMIF')
        >>> dp.append('dstream://Datastream/PCH#(AGINDEMIF,1Y)')
        >>> dp.append('option://param/TEST?VALUE')
        """

        url=stripcomments(url.rstrip('\n ').upper())
        if len(url)==0:
            return        

        # Needs double parsing because urlparse function
        # cant parse (?...&...)
        # if schema is not http:

        # management of # character
        URL = url.replace('#','__A~~~~A__')

        # first pass of parsing ot get scheme
        up = urlparse(URL)

        # substitute parameters when scheme is not option (CHECK)
        if not re.search('^option$',up.scheme,re.IGNORECASE) and '$' in url:
            url = Template(url).safe_substitute(self._options.define_set)

        # reset #
        url = url.replace('#','__A~~~~A__')
        
        # ...second urlparse using a fake http: schema
        h_url = re.sub("^%s://" % up.scheme, "http://" , url,flags=re.I)
        h_pa  = urlparse(h_url)
        
        parsed = list(h_pa)
        parsed[0] = unicode(up.scheme)
        A = parsed[2]
        parsed[2] = parsed[2].replace('__A~~~~A__','#')  # ?!?
        parsed[2] = parsed[2].replace('\\','/')


        # for option URI insert in the option list
        if re.search('^option$',up.scheme,re.I):
            self._append_option(parsed)
            return

        # logger.debug('%s (%d)', parsed, len(self.requesting))
        
        # otherwise add to requesting list
        self.requesting.append(parsed)

        
    def info(self):
        s = ""
        for req in self.requesting:
            hostname=req[1].upper()
            series = [_replace_funcs(s,self._func) for s in [req[2][1:],]] # .split('+')
            xparams = req[4]
            s += "%s|%s|%s" % (hostname,series,xparams)
        return s

            
    def mk_request(self,sources,serie):
        """The mk_request transform the request string in the structure understood by the provider driver"""
        return sources


    def request(self,profile=None):
        """
        Request data

        :rtype TimeSeriesResultSet: resultset di timeseries
        """
        global acct
        
        if hasattr(self.provider,'open'):
            self.provider.open()

        self._res = DataSet() # Base Dataset is empty


        # accounting stuff
        acct_l = {}
        _accounting['datareq.profile']=profile

        R = udict()
        for req in self.requesting:

            basevars = udict({ 'name':  None, 
                               'start': None, 
                               'end':   None, 
                               'proc':  None, 
                               'check': None })
            
            hostname=req[1].upper()
            series = [ _replace_funcs(s,self._func) for s in [req[2][1:],]] 
            xparams = req[4]

            #logger.debug('dataprovider requests %s from %s | %s (%s%s)',
            #             ','.join(series),hostname,str(xparams),
            #             'R' if self._opt_request_req else "S", 
            #             'K' if self._opt_delete_base_kvars else "-" )
            
            _accounting["datareq.series.%s"%','.join(series)]='%s | %s (%s%s)' % (
                hostname,str(xparams),
                'R' if self._opt_request_req else "S", 
                'K' if self._opt_delete_base_kvars else "-" )

            # Replace self._funcs
            xparams = _replace_funcs(xparams,self._func)
            kvars = basevars
            if len(xparams)>0:
                kvars2 = udict(parse_qsl(xparams))
                accepted(kvars,
                         'NAME',
                         'START',
                         'END',
                         'PROC',
                         'CHECK')

                kvars.update(kvars2)


                if 'NAME' in kvars:
                    kvars['NAME']=kvars['NAME'].strip()

                if self._opt_delete_base_kvars:
                    for _k in kvars.keys():
                        if _k in kvars2:
                            del kvars2[_k]


            if self._opt_request_req:                            
                reqs = req
            else:
                reqs = series
    
            res = []
            
            if not self._options.only_options:                                
                if self._opt_request_req:
                    reqs = self.mk_request(req,hostname)
                else:            
                    reqs = self.mk_request(series,hostname)

                # if self._options.switch_verbose:
                #     for k,v in kvars.items():
                #         logger.info('K:%s=%s',k,v)
                
                with Timer() as t:
                    res  = self.mget(reqs,**kvars)
                _accounting['%s.request.%s.time' % (self.name,kvars['NAME'])] = t.msecs
                _accounting['%s.request.%s.req' % (self.name,kvars['NAME'])] = req
                if res:
                    _accounting['%s.request.%s.res' % (self.name,kvars['NAME'])] = res
            

            
            # acct_l[kvars['NAME']] = (reqs,kvars,res) 
            # if 'provider' not in acct:
            #    acct['provider']={}
            #acct['provider'][self.name]=acct_l

            kvars = {}

        if hasattr(self.provider,'close'):
            self.provider.close()

        _accounting['%s.request.missing' % (self.name)] = ','.join(self._missing)

        self._res.add_missing(*self._missing)
        
        return self._res

    def mget(self,reqs,**kw):

        for serie in reqs:
            name = serie
                
            if kw['NAME']:  name = kw['NAME']
            else:           kw['NAME'] = serie
                                

            _ts = None
            try:
                # gets data from provider
                _ts = self.get(serie,**kw)
            except ValueError, exc:
                logger.debug('Not saving %s in information set - series missing',serie)

            # ...save in results
            if _ts: 
                self._res.update(_ts)
            #    or in missing list
            else:   
                if kw.has_key('NAME') and kw['NAME'] is not None:
                    self._missing.extend(kw['NAME'].split(','))
                else:
                    logger.warn('Anonymous MISSNG found')

            _accounting['load.inline.processors.discipline']=self._opt_discipline_inline_function_single

            if self._opt_discipline_inline_function_single==True:
                # logger.debug("_inline_processor with discipline True")
                self._res = self._inline_processor(self._res,name,kw)

        if self._opt_discipline_inline_function_single!=True:
            # logger.debug("_inline_processor with discipline False")
            self._res = self._inline_processor(self._res,None,kw)

        return _ts
Пример #5
0
    def request(self,profile=None):
        """
        Request data

        :rtype TimeSeriesResultSet: resultset di timeseries
        """
        global acct
        
        if hasattr(self.provider,'open'):
            self.provider.open()

        self._res = DataSet() # Base Dataset is empty


        # accounting stuff
        acct_l = {}
        _accounting['datareq.profile']=profile

        R = udict()
        for req in self.requesting:

            basevars = udict({ 'name':  None, 
                               'start': None, 
                               'end':   None, 
                               'proc':  None, 
                               'check': None })
            
            hostname=req[1].upper()
            series = [ _replace_funcs(s,self._func) for s in [req[2][1:],]] 
            xparams = req[4]

            #logger.debug('dataprovider requests %s from %s | %s (%s%s)',
            #             ','.join(series),hostname,str(xparams),
            #             'R' if self._opt_request_req else "S", 
            #             'K' if self._opt_delete_base_kvars else "-" )
            
            _accounting["datareq.series.%s"%','.join(series)]='%s | %s (%s%s)' % (
                hostname,str(xparams),
                'R' if self._opt_request_req else "S", 
                'K' if self._opt_delete_base_kvars else "-" )

            # Replace self._funcs
            xparams = _replace_funcs(xparams,self._func)
            kvars = basevars
            if len(xparams)>0:
                kvars2 = udict(parse_qsl(xparams))
                accepted(kvars,
                         'NAME',
                         'START',
                         'END',
                         'PROC',
                         'CHECK')

                kvars.update(kvars2)


                if 'NAME' in kvars:
                    kvars['NAME']=kvars['NAME'].strip()

                if self._opt_delete_base_kvars:
                    for _k in kvars.keys():
                        if _k in kvars2:
                            del kvars2[_k]


            if self._opt_request_req:                            
                reqs = req
            else:
                reqs = series
    
            res = []
            
            if not self._options.only_options:                                
                if self._opt_request_req:
                    reqs = self.mk_request(req,hostname)
                else:            
                    reqs = self.mk_request(series,hostname)

                # if self._options.switch_verbose:
                #     for k,v in kvars.items():
                #         logger.info('K:%s=%s',k,v)
                
                with Timer() as t:
                    res  = self.mget(reqs,**kvars)
                _accounting['%s.request.%s.time' % (self.name,kvars['NAME'])] = t.msecs
                _accounting['%s.request.%s.req' % (self.name,kvars['NAME'])] = req
                if res:
                    _accounting['%s.request.%s.res' % (self.name,kvars['NAME'])] = res
            

            
            # acct_l[kvars['NAME']] = (reqs,kvars,res) 
            # if 'provider' not in acct:
            #    acct['provider']={}
            #acct['provider'][self.name]=acct_l

            kvars = {}

        if hasattr(self.provider,'close'):
            self.provider.close()

        _accounting['%s.request.missing' % (self.name)] = ','.join(self._missing)

        self._res.add_missing(*self._missing)
        
        return self._res