Пример #1
0
def data_download(data, basedir='/data/static/', clustered=False, **kwargs):
    '''
        Download multiple data sets from multiple data sources. 
            Simple cart data: Example
                {"SCI-1":{"quantity":1,"id":"SCI-1","name":"North Canadian River at Shawnee, OK (07241800)",
                          "parameter":"Discharge, cubic feet per second",
                           "query":"{'source':'USGS',  'webservice_type':'uv','sites':'07241800','parameterCd':'00060','startDT':'2007-10-01','endDT':'2013-04-04'}"}
                }
        query['source'] used to import module which will have a save function. THis function returns a url to file just downloaded.
        filezip creates a zip file from the list of urls
        Task returns a url to the zip file of all data downloaded from different sources 
        Currently performing in a serial fashion. Need to update and perform with celery groups in which multiple parallel subtask are generated.
        
    '''
    if not data:
        raise Exception('No Data')
    try:
        data = json.loads(data)
    except:
        data = ast.literal_eval(data)
    newDir = os.path.join(basedir, 'ows_tasks/', str(data_download.request.id))
    call(["mkdir", '-p', newDir])
    os.chdir(newDir)
    logger = open(os.path.join(newDir, 'task_log.txt'), 'w')
    # consolidate sources- creates list of shopping cart items
    data_by_source = {}
    for itm, value in data.items():
        value['query'] = ast.literal_eval(value['query'])
        if value['query']['source'] in data_by_source:
            data_by_source[value['query']['source']].append(value)
        else:
            data_by_source[value['query']['source']] = [value]
    stask = []
    taskname_tmpl = 'owsq.data.download.%s.save'
    for itm, value in data_by_source.items():
        logger.write(log_info_tpl % (itm, str(len(value)), 'Subtask Created'))
        stask.append(subtask(taskname_tmpl % (itm), args=(newDir, itm,), kwargs={'data_items': value}))
    job = group(stask)
    result = job.apply_async()
    logger.write(log_info_tpl1 % ('Subtask Submission', 'Subtask Running'))
    aggregate_results = result.join()
    logger.write(log_info_tpl1 % ('Data query Successful', 'Subtasks completed'))

    urls = []
    for res in aggregate_results:
        urls.extend(res)
    outname = zip_name_tpl % (datetime.datetime.now().isoformat())
    zipurl = 'http://%s/%s/%s' % (socket.gethostname(), 'request', outname)
    logger.write(log_info_tpl2 % ('Data Zip URL', zipurl, '30 days'))
    logger.close()
    if clustered:
        return filezip.makezip(urls, zip_name_tpl % (datetime.datetime.now().isoformat()),
                               os.path.join(basedir, 'request/'))
    else:
        return filezip.makezip(newDir, zip_name_tpl % (datetime.datetime.now().isoformat()),
                               os.path.join(basedir, 'request/'), local=True)
Пример #2
0
def modiscountry(product, country, start_date, end_date, notify=None, outpath=None):
    if socket.gethostname() == 'static.cybercommons.org' and not outpath:
        outpath = '/static/request'
    elif not outpath:
        outpath = os.getcwd()
    cat_ids = fz.catname2catid(product,'807')
    files = []
    # Check to make sure we have date-time
    if isinstance(start_date,datetime) or isinstance(end_date,datetime):
        pass
    else:
        try:
            start_date = datetime.strptime(start_date, '%Y-%m-%d')
            end_date = datetime.strptime(end_date, '%Y-%m-%d')
        except ValueError:
            return 'Valid dates are in YYYY-mm-dd format'
    for cat_id in cat_ids:
        files += fz.getEventResult_Country(cat_id=cat_id, 
                        country=country, start_date=start_date, 
                        end_date=end_date, var_id='URL')
    outfile='%s_%s_%s_%s.zip' % (product, country, start_date.strftime('%Y%m%d'), end_date.strftime('%Y%m%d')) 
    download = fz.makezip(files, outname=outfile, outpath=outpath)
    if notify:
        message = """You can download your file at: %s
This link will expire in 48 hours"""  % (download)
        fz.notify_email(notify, "Your MODIS extract for %s %s %s %s has completed" % (product, country, start_date, end_date), message)
    return download