Пример #1
0
    def _get_data(self, indicator="NY.GNS.ICTR.GN.ZS", country='US',
                  start=2002, end=2005):
    
        if isinstance(country, six.string_types):
            country = [country]
    
        countries = ';'.join(country)
        
        # Build URL for api call
        """
        url = ("http://api.worldbank.org/countries/" + countries + "/indicators/" +
               indicator + "?date=" + str(start) + ":" + str(end) +
               "&per_page=25000&format=json")
        """
        endpoint = "/countries/{countries}/indicators/{indicator}"\
            .format(countries=countries, indicator=indicator)
        url = self._url(endpoint)
        params = {
            'date': "%s:%s" % (start, end),
            'per_page': 25000,
            'format': 'json'
        }

        # Download
        response = self.session.get(url, params=params)
        data = response.content

        # Check to see if there is a possible problem
        possible_message = json.loads(data)[0]
        if 'message' in possible_message.keys():
            msg = possible_message['message'][0]
            try:
                msg = msg['key'].split() + ["\n "] + msg['value'].split()
                wb_err = ' '.join(msg)
            except:
                wb_err = ""
                if 'key' in msg.keys():
                    wb_err = msg['key'] + "\n "
                if 'value' in msg.keys():
                    wb_err += msg['value']
            error_msg = "Problem with a World Bank Query \n %s"
            return None, error_msg % wb_err
    
        if 'total' in possible_message.keys():
            if possible_message['total'] == 0:
                return None, "No results from world bank."
            
        # Parse JSON file
        data = json.loads(data)[1]
        country = [x['country']['value'] for x in data]
        iso_code = [x['country']['id'] for x in data]
        year = [x['date'] for x in data]
        value = [x['value'] for x in data]
        # Prepare output
        out = pandas.DataFrame([country, iso_code, year, value]).T
        out.columns = ['country', 'iso_code', 'year', indicator]
        return out,"Success"
Пример #2
0
def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country="US", start=2002, end=2005):

    if type(country) == str:
        country = [country]

    countries = ";".join(country)

    # Build URL for api call
    url = (
        "http://api.worldbank.org/countries/"
        + countries
        + "/indicators/"
        + indicator
        + "?date="
        + str(start)
        + ":"
        + str(end)
        + "&per_page=25000&format=json"
    )

    # Download
    with urlopen(url) as response:
        data = response.read()

    # Check to see if there is a possible problem
    possible_message = json.loads(data)[0]
    if "message" in possible_message.keys():
        msg = possible_message["message"][0]
        try:
            msg = msg["key"].split() + ["\n "] + msg["value"].split()
            wb_err = " ".join(msg)
        except:
            wb_err = ""
            if "key" in msg.keys():
                wb_err = msg["key"] + "\n "
            if "value" in msg.keys():
                wb_err += msg["value"]
        error_msg = "Problem with a World Bank Query \n %s"
        return None, error_msg % wb_err

    if "total" in possible_message.keys():
        if possible_message["total"] == 0:
            return None, "No results from world bank."

    # Parse JSON file
    data = json.loads(data)[1]
    country = [x["country"]["value"] for x in data]
    iso_code = [x["country"]["id"] for x in data]
    year = [x["date"] for x in data]
    value = [x["value"] for x in data]
    # Prepare output
    out = pandas.DataFrame([country, iso_code, year, value]).T
    out.columns = ["country", "iso_code", "year", indicator]
    return out, "Success"
Пример #3
0
def _get_data(indicator="NY.GNS.ICTR.GN.ZS",
              country='US',
              start=2002,
              end=2005):

    if type(country) == str:
        country = [country]

    countries = ';'.join(country)

    # Build URL for api call
    url = ("http://api.worldbank.org/countries/" + countries + "/indicators/" +
           indicator + "?date=" + str(start) + ":" + str(end) +
           "&per_page=25000&format=json")

    # Download
    with urlopen(url) as response:
        data = response.read()

    # Check to see if there is a possible problem
    possible_message = json.loads(data)[0]
    if 'message' in possible_message.keys():
        msg = possible_message['message'][0]
        try:
            msg = msg['key'].split() + ["\n "] + msg['value'].split()
            wb_err = ' '.join(msg)
        except:
            wb_err = ""
            if 'key' in msg.keys():
                wb_err = msg['key'] + "\n "
            if 'value' in msg.keys():
                wb_err += msg['value']
        error_msg = "Problem with a World Bank Query \n %s"
        return None, error_msg % wb_err

    if 'total' in possible_message.keys():
        if possible_message['total'] == 0:
            return None, "No results from world bank."

    # Parse JSON file
    data = json.loads(data)[1]
    country = [x['country']['value'] for x in data]
    iso_code = [x['country']['id'] for x in data]
    year = [x['date'] for x in data]
    value = [x['value'] for x in data]
    # Prepare output
    out = pandas.DataFrame([country, iso_code, year, value]).T
    out.columns = ['country', 'iso_code', 'year', indicator]
    return out, "Success"
Пример #4
0
def test_main_classification(mock_parameters, mock_save_results,
                             mock_get_results, mock_fetch_data, method, name):
    # create mock objects from database
    mock_parameters.return_value = {'type': method}
    mock_fetch_data.return_value = fx.inputs_classification(
        include_categorical=True)
    mock_get_results.return_value = None

    main(job_id=None, generate_pfa=True)

    pfa = mock_save_results.call_args[0][0]
    pfa_dict = json.loads(pfa)

    # NOTE: this does not work due to bug in jsonpickle
    # deserialize model
    # estimator = deserialize_sklearn_estimator(pfa_dict['metadata']['estimator'])
    # assert estimator.__class__.__name__ == name

    # make some prediction with PFA
    from titus.genpy import PFAEngine
    engine, = PFAEngine.fromJson(pfa_dict)
    engine.action({
        'stress_before_test1': 10.,
        'iq': 10.,
        'agegroup': '50-59y'
    })
Пример #5
0
def get_indicators():
    '''Download information about all World Bank data series
    '''
    url = 'http://api.worldbank.org/indicators?per_page=50000&format=json'
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    # Clean fields
    data.source = [x['value'] for x in data.source]
    fun = lambda x: x.encode('ascii', 'ignore')
    data.sourceOrganization = data.sourceOrganization.apply(fun)
    # Clean topic field

    def get_value(x):
        try:
            return x['value']
        except:
            return ''
    fun = lambda x: [get_value(y) for y in x]
    data.topics = data.topics.apply(fun)
    data.topics = data.topics.apply(lambda x: ' ; '.join(x))
    # Clean outpu
    data = data.sort(columns='id')
    data.index = pandas.Index(lrange(data.shape[0]))
    return data
Пример #6
0
def get_indicators():
    """Download information about all World Bank data series
    """
    url = "http://api.worldbank.org/indicators?per_page=50000&format=json"
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    # Clean fields
    data.source = [x["value"] for x in data.source]
    fun = lambda x: x.encode("ascii", "ignore")
    data.sourceOrganization = data.sourceOrganization.apply(fun)
    # Clean topic field

    def get_value(x):
        try:
            return x["value"]
        except:
            return ""

    fun = lambda x: [get_value(y) for y in x]
    data.topics = data.topics.apply(fun)
    data.topics = data.topics.apply(lambda x: " ; ".join(x))
    # Clean outpu
    data = data.sort(columns="id")
    data.index = pandas.Index(lrange(data.shape[0]))
    return data
def read_csv():
    # read the csv file
    df = pd.read_csv(file_name)

    # for each row in the csv...
    for index, row in df.iterrows():
        print('building a person ...')

        root = ET.Element('Person')
        subFirstName = ET.SubElement(root, 'FirstName')
        subFirstName.text = str(row['FirstName'])
        subLastName = ET.SubElement(root, 'LastName')
        subLastName.text = str(row['LastName'])
        subCpr = ET.SubElement(root, 'cprnumber')
        subCpr.text = str(row['DateOfBirth']).replace('-', '') + '-' + str(
            random.randint(1000, 9999))
        subEmail = ET.SubElement(root, 'email')
        subEmail.text = str(row['Email'])

        objectThing = prettify(root)
        print(objectThing)

        # send the xml object to the nemID service to get a nemId
        response = requests.post('http://localhost:8080/nemId',
                                 data=ET.tostring(root),
                                 headers={"Content-Type": "text/xml"})
        nemId = json.loads(response.text)

        # save ToMsgPack
        saveToMsgPack(nemId, subCpr.text)
Пример #8
0
def getOneBatch(location_list, len_of_locations):
    '''
    Retrieve up to 20 requests from the API for the English station names
    :param location_list: List of coordinates for stations
    :param len_of_locations: Number of stations to be searched
    '''
    ops_url_list = ['' for i in range(len_of_locations)]
    station_names_en = ['' for i in range(len_of_locations)]
    main_query = '/v3/place/text?city=' + city_name + '&output=json&offset=1&page=1&key=' + api_key_web_service + '&citylimit=true&language=en&types=150500&location='

    BURL = 'https://restapi.amap.com/v3/batch?key=' + api_key_web_service
    BPARAMS = '{"ops": ['
    for x in range(len_of_locations):
        ops_url_list[x] = main_query + location_list[x]
        BPARAMS += '{"url": "' + ops_url_list[x] + '"}'
        BPARAMS += ']}' if (x == len_of_locations - 1) else ','

    body = json.loads(BPARAMS)
    url = 'https://restapi.amap.com/v3/batch'
    params = {'key': api_key_web_service}
    responseBatchEn = requests.post(url, params=params, json=body)
    dataEn = responseBatchEn.json()

    for x in range(len_of_locations):
        station_names_en[x] = str(
            dataEn[x]['body']['pois'][0]['name']).split('(')[0]

    return station_names_en
Пример #9
0
    def make_json(self):
        #создаем dict в котором key это вложенные листы excel, value это список из DataFrame
        data_parced = pd.read_excel(self.input_file, None)
        dict_jsons = {}

        for key, value in data_parced.items():
            json_obj_list = json.loads(
                value.to_json(  #конвертируем DataFrame в json строку
                    None,
                    orient='records',
                    date_format='iso'))
            for item in json_obj_list:
                for key, value in item.items():
                    item[key] = get_normalized_value(
                        value
                    )  #меняем данные в ячеке на json object, если данные это json строка
            dict_jsons[key] = json_obj_list

        with open(self.output_file, 'w') as result:
            #кладем получившийся dict в файл по указанному пути
            json.dump(dict_jsons,
                      result,
                      ensure_ascii=False,
                      indent=4,
                      separators=(',', ': '))
Пример #10
0
def get_indicators():
    '''Download information about all World Bank data series
    '''
    url = 'http://api.worldbank.org/indicators?per_page=50000&format=json'
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    # Clean fields
    data.source = [x['value'] for x in data.source]
    fun = lambda x: x.encode('ascii', 'ignore')
    data.sourceOrganization = data.sourceOrganization.apply(fun)

    # Clean topic field

    def get_value(x):
        try:
            return x['value']
        except:
            return ''

    fun = lambda x: [get_value(y) for y in x]
    data.topics = data.topics.apply(fun)
    data.topics = data.topics.apply(lambda x: ' ; '.join(x))
    # Clean outpu
    data = data.sort(columns='id')
    data.index = pandas.Index(lrange(data.shape[0]))
    return data
Пример #11
0
 def send(query: str):
     host = 'http://localhost:19002/query/service'
     data = dict()
     data['statement'] = query
     data = urllib.parse.urlencode(data).encode('utf-8')
     with urllib.request.urlopen(host, data) as handler:
         result = json.loads(handler.read())
         return result['status']
Пример #12
0
def get_country_data(country):
    print(f'LOG: getting country {country} data')

    base_url = 'https://api.covid19api.com'  ## open API

    url = f'{base_url}/country/{country}?'

    payload = {}
    headers = {}

    try:
        response = requests.request("GET",
                                    url,
                                    headers=headers,
                                    data=payload,
                                    timeout=3.0)
    except Timeout:
        print(f'LOG: timeout while requesting country {country} data')
        return pd.DataFrame()

    # json_response = response.json()
    try:
        json_data = json.loads(response.text)
    except ValueError as err:
        print(f'LOG: error while loading json country {country} data ({err})')
        return pd.DataFrame()

    df = json_normalize(json_data)

    if (df.empty or response.status_code != 200):
        print(f'LOG: error in request country {country} data')
        return df

    rename = {
        'Country': 'country',
        'CountryCode': 'country_code',
        'Lat': 'lat',
        'Lon': 'lon',
        'Confirmed': 'confirmed',
        'Deaths': 'deaths',
        'Recovered': 'recovered',
        'Active': 'active',
        'Date': 'date',
    }

    columns = [
        'country', 'country_code', 'lat', 'lon', 'confirmed', 'deaths',
        'recovered', 'active', 'date'
    ]

    df = df.rename(columns=rename)

    df = df[columns]

    df['date'] = df['date'].apply(
        lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M:%SZ"))

    return df
Пример #13
0
def _load_intermediate_data(job_ids):
    jobs_data = [io_helper.get_results(job_id).data for job_id in job_ids]
    # chain all results together, ignore empty results
    data = list(itertools.chain(*[json.loads(d) for d in jobs_data if d]))

    if not data:
        raise errors.UserError('Intermediate jobs {} do not have any data.'.format(job_ids))

    return data
Пример #14
0
def collect_limit(start_date, end_date):
    log.info("collect start, start date: {}, end date: {}".format(
        start_date, end_date))
    # 获取股票基础数据
    stock_basic = pro.query(
        'stock_basic',
        exchange='',
        list_status='L',
        fields='ts_code, symbol, area, industry, list_date')
    calendar_list = pro.query(
        'trade_cal',
        exchange="SSE",
        start_date=start_date,
        end_date=end_date,
        is_open="1",
        fields='exchange, cal_date, is_open, pretrade_date')
    count = 0
    for index, cal_row in calendar_list.iterrows():
        # 获取某日涨停股票
        limit_list = pro.limit_list(trade_date=cal_row['cal_date'],
                                    limit_type='U')
        limit_list_mg = pd.merge(limit_list,
                                 stock_basic,
                                 how='left',
                                 on='ts_code')
        limit_list_mg['limit_num'] = 1
        for index, row in limit_list_mg.iterrows():
            # 30天前日期
            day30 = (datetime.date.today() -
                     datetime.timedelta(30)).strftime("%Y%m%d")
            if 'ST' in row['name'] or row['list_date'] > day30:
                continue
            limit_stock = r.hget(
                STOCK_LIMIT_UP.format(cal_row['pretrade_date']),
                row['ts_code'])
            if limit_stock is not None:
                stock = Stock()
                stock.__dict__ = json.loads(limit_stock)
                row['limit_num'] = stock.limit_num + 1
            r.hset(STOCK_LIMIT_UP.format(row['trade_date']), row['ts_code'],
                   row.to_json(orient='index', force_ascii=False))

        # 获取某日跌停股票
        limit_list = pro.limit_list(trade_date=cal_row['cal_date'],
                                    limit_type='D')
        limit_list_mg = pd.merge(limit_list,
                                 stock_basic,
                                 how='left',
                                 on='ts_code')
        for index, row in limit_list_mg.iterrows():
            if 'ST' in row['name']:
                continue
            r.hset(STOCK_LIMIT_DOWN.format(row['trade_date']), row['ts_code'],
                   row.to_json(orient='index', force_ascii=False))
    log.info("collect end")
Пример #15
0
def test_main_partial(mock_parameters, mock_save_results, mock_get_results, mock_fetch_data, method, name):
    # create mock objects from database
    mock_parameters.return_value = {'type': method}
    mock_fetch_data.return_value = fx.inputs_regression()
    mock_get_results.return_value = None

    main(job_id=None, generate_pfa=False)

    js = json.loads(mock_save_results.call_args[0][0])
    estimator = deserialize_sklearn_estimator(js['estimator'])
    assert estimator.__class__.__name__ == name
def aggregate_stats(job_ids):
    """Get all partial statistics from all nodes and aggregate them.
    :input job_ids: list of job_ids with intermediate results
    """
    # Read intermediate inputs from jobs
    logging.info("Fetching intermediate data...")
    results = [json.loads(io_helper.get_results(str(job_id)).data) for job_id in job_ids]

    corr, columns = _aggregate_results(results)

    _save_corr_heatmap(corr, columns)
Пример #17
0
def convert_df(raw: str, config: Dict[str, Any]) -> DataFrame:
    """Convert raw string to DataFrame, currently only supports json/csv.

    :rtype: DataFrame
    :param raw: the raw source string in json/csv format,
        this is to be converted to DataFrame
    :param config: the config of the data source specified in task config,
        see `configs/*.py`
    :return: the converted `DataFrame`
    """
    ftype = "json" if "file_format" not in config else config["file_format"]
    df = None
    if ftype == "jsonl":
        jlines = raw.split("\n")
        df = DataFrame()
        for jline in jlines:
            if len(jline) < 3:
                continue
            line = json.loads(jline)
            df = df.append(Series(line), ignore_index=True)
    elif ftype == "json":
        if "json_path" in config:
            extracted_json = json_extract(raw, config["json_path"])
        elif "json_path_nested" in config:
            extracted_json = json_unnest(raw, config["json_path_nested"],
                                         config["fields"], {}, [])
        else:
            extracted_json = raw
        data = pd_json.loads(extracted_json)
        df = pd_json.json_normalize(data)
    elif ftype == "csv":
        if "header" in config:
            df = pd.read_csv(StringIO(raw), names=config["header"])
        else:
            df = pd.read_csv(StringIO(raw))
    # convert timezone according to config
    tz = None
    if "timezone" in config:
        tz = pytz.timezone(config["timezone"])
    elif "country_code" in config:
        tz = get_country_tz(config["country_code"])
    # TODO: support multiple countries/timezones in the future if needed
    if "date_fields" in config:
        for date_field in config["date_fields"]:
            df[date_field] = pd.to_datetime(df[date_field])
        if tz is not None:
            df["tz"] = get_tz_str(tz)
            for date_field in config["date_fields"]:
                df[date_field] = (
                    df[date_field].dt.tz_localize(tz).dt.tz_convert(pytz.utc))
                df[date_field] = df[date_field].astype("datetime64[ns]")
    return df
Пример #18
0
def get_countries():
    '''Query information about countries
    '''
    url = 'http://api.worldbank.org/countries/all?format=json'
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    data.adminregion = [x['value'] for x in data.adminregion]
    data.incomeLevel = [x['value'] for x in data.incomeLevel]
    data.lendingType = [x['value'] for x in data.lendingType]
    data.region = [x['value'] for x in data.region]
    data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'})
    return data
Пример #19
0
def get_countries():
    """Query information about countries
    """
    url = "http://api.worldbank.org/countries/?per_page=1000&format=json"
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    data.adminregion = [x["value"] for x in data.adminregion]
    data.incomeLevel = [x["value"] for x in data.incomeLevel]
    data.lendingType = [x["value"] for x in data.lendingType]
    data.region = [x["value"] for x in data.region]
    data = data.rename(columns={"id": "iso3c", "iso2Code": "iso2c"})
    return data
Пример #20
0
def get_countries():
    '''Query information about countries
    '''
    url = 'http://api.worldbank.org/countries/?per_page=1000&format=json'
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    data.adminregion = [x['value'] for x in data.adminregion]
    data.incomeLevel = [x['value'] for x in data.incomeLevel]
    data.lendingType = [x['value'] for x in data.lendingType]
    data.region = [x['value'] for x in data.region]
    data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'})
    return data
Пример #21
0
    def _process_json(self, json_string):
        loaded_json = []
        if json_string:
            json_string = re.sub("\",\"\"", ",\"", json_string)
            json_string = re.sub('"\n\t\"', "", json_string)
            json_string = re.sub('}\]\[\]', '}]', json_string)

            if json_string == "[[][]]":
                loaded_json = []
            else:
                try:
                    loaded_json = json.loads(json_string)
                except ValueError:
                    loaded_json = None
        return loaded_json
Пример #22
0
    def create(self, path: str):
        query = 'create %s;\n' % self._dataverse
        query += 'use %s;\n' % self._dataverse
        host = 'http://localhost:19002/query/service'
        data = {}
        query += 'create type Schema as open{ \n' \
                 'id: int64};\n'
        query += 'create dataset %s(Schema) primary key id;\n' % self._dataset
        query += 'LOAD DATASET %s USING localfs\n ' \
                 '((\"path\"=\"127.0.0.1://%s\"),(\"format\"=\"adm\"));\n' % (self._dataset, path)

        data['statement'] = query
        data = urllib.parse.urlencode(data).encode('utf-8')
        with urllib.request.urlopen(host, data) as handler:
            result = json.loads(handler.read())
            ret_array = result['results']
Пример #23
0
def _load_intermediate_data(job_ids):
    data = []
    for job_id in job_ids:
        job_result = io_helper.get_results(job_id)

        # log errors (e.g. about missing data), but do not reraise them
        if job_result.error:
            logging.warning(job_result.error)
        else:
            pfa = json.loads(job_result.data)
            data.append(pfa)

    if not data:
        raise errors.UserError('All jobs {} returned an error.'.format(job_ids))

    return data
Пример #24
0
def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US',
              start=2002, end=2005):
    # Build URL for api call
    url = "http://api.worldbank.org/countries/" + country + "/indicators/" + \
        indicator + "?date=" + str(start) + ":" + str(end) + "&per_page=25000" + \
        "&format=json"
    # Download
    with urlopen(url) as response:
        data = response.read()
    # Parse JSON file
    data = json.loads(data)[1]
    country = [x['country']['value'] for x in data]
    iso2c = [x['country']['id'] for x in data]
    year = [x['date'] for x in data]
    value = [x['value'] for x in data]
    # Prepare output
    out = pandas.DataFrame([country, iso2c, year, value]).T
    return out
def hello_gcs(event, context):
    """Triggered by a change to a Cloud Storage bucket.
    Args:
         event (dict): Event payload.
         context (google.cloud.functions.Context): Metadata for the event.
    """
    file = event
    bucket = storage.get_bucket(file['bucket'])
    blob = bucket.get_blob(file['name'])
    bstr = str(blob.download_as_string(), 'utf-8')
    for bline in bstr.splitlines():
        json_data = json.loads(bline)
        doc_ref = db.collection(u'data').document(json_data['entityId'])
        doc_ref.set(json_data['data'])

    print(f"bucket : {file['bucket']}")
    print(f"selfLink : {file['selfLink']}")
    print(f"Processing file : {file['name']}.")
Пример #26
0
def get_data(ticker, start_date=None, end_date=None, index_as_date=True):
    '''Downloads historical stock price data into a pandas data frame 
    
       @param: ticker
       @param: start_date = None
       @param: end_date = None
       @param: index_as_date = True
    '''

    site = build_url(ticker, start_date, end_date)
    resp = requests.get(site)
    html = resp.content
    html = html.decode()

    start = html.index('"HistoricalPriceStore"')
    end = html.index("firstTradeDate")

    needed = html[start:end]
    needed = needed.strip('"HistoricalPriceStore":')
    needed = needed.strip(""","isPending":false,'""")
    needed = needed + "}"

    temp = loads(needed)
    result = json_normalize(temp['prices'])
    result = result[[
        "date", "open", "high", "low", "close", "adjclose", "volume"
    ]]

    # fix date field
    result['date'] = result['date'].map(
        lambda x: pd.datetime.fromtimestamp(x).date())

    result['ticker'] = ticker.upper()

    result = result.dropna()
    result = result.reset_index(drop=True)

    if index_as_date:

        result = result.sort_values("date")
        result.index = result.date.copy()
        del result["date"]

    return result
Пример #27
0
def get_countries():
    """Query information about countries
    
    Provides information such as: 
        country code, region, income level, capital city, latitude and longitude
    """
    url = "http://api.worldbank.org/countries/?per_page=1000&format=json"
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    data.adminregion = [x["value"] for x in data.adminregion]
    data.incomeLevel = [x["value"] for x in data.incomeLevel]
    data.lendingType = [x["value"] for x in data.lendingType]
    data.region = [x["value"] for x in data.region]
    data.latitude = [float(x) if x != "" else np.nan for x in data.latitude]
    data.longitude = [float(x) if x != "" else np.nan for x in data.longitude]
    data = data.rename(columns={"id": "iso3c", "iso2Code": "iso2c"})
    return data
Пример #28
0
def _get_data(ticker):
    """ Downloads historical stock price data into a pandas data frame
    Args:
        ticker: stock ticker
    returns:
        price of stock"""
    site = _build_url(ticker)
    resp = requests.get(site)
    html = resp.content.decode()
    start = html.index('"HistoricalPriceStore"')
    end = html.index("firstTradeDate")
    needed = html[start:end]
    needed = needed.strip('"HistoricalPriceStore":').strip(
        ""","isPending":false,'""") + "}"
    temp = loads(needed)
    result = json_normalize(temp['prices'])
    return result[[
        "date", "open", "high", "low", "close", "adjclose", "volume"
    ]]['adjclose'][0]
Пример #29
0
def get_countries():
    '''Query information about countries
    
    Provides information such as: 
        country code, region, income level, capital city, latitude and longitude
    '''
    url = 'http://api.worldbank.org/countries/?per_page=1000&format=json'
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    data.adminregion = [x['value'] for x in data.adminregion]
    data.incomeLevel = [x['value'] for x in data.incomeLevel]
    data.lendingType = [x['value'] for x in data.lendingType]
    data.region = [x['value'] for x in data.region]
    data.latitude = [float(x) if x != "" else np.nan for x in data.latitude]
    data.longitude = [float(x) if x != "" else np.nan for x in data.longitude]
    data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'})
    return data
Пример #30
0
def test_main_distributed(mock_parameters, mock_save_results, mock_get_results, mock_fetch_data, method, name):
    mock_parameters.return_value = {'type': method}
    mock_fetch_data.return_value = fx.inputs_regression()
    mock_get_results.return_value = None

    # run intermediate job
    main(job_id=None, generate_pfa=False)

    mock_get_results.return_value = mock.MagicMock(data=mock_save_results.call_args[0][0])

    # generate PFA
    main(job_id='1', generate_pfa=True)

    pfa = mock_save_results.call_args_list[1][0][0]
    pfa_dict = json.loads(pfa)

    # make some prediction with PFA
    from titus.genpy import PFAEngine
    engine, = PFAEngine.fromJson(pfa_dict)
    engine.action({'stress_before_test1': 10., 'iq': 10., 'agegroup': '-50y'})
Пример #31
0
def _get_data(indicator="NY.GNS.ICTR.GN.ZS",
              country='US',
              start=2002,
              end=2005):
    # Build URL for api call
    url = ("http://api.worldbank.org/countries/" + country + "/indicators/" +
           indicator + "?date=" + str(start) + ":" + str(end) +
           "&per_page=25000&format=json")
    # Download
    with urlopen(url) as response:
        data = response.read()
    # Parse JSON file
    data = json.loads(data)[1]
    country = [x['country']['value'] for x in data]
    iso2c = [x['country']['id'] for x in data]
    year = [x['date'] for x in data]
    value = [x['value'] for x in data]
    # Prepare output
    out = pandas.DataFrame([country, iso2c, year, value]).T
    return out
Пример #32
0
def dayopen():
    nse = Nse()
    print(nse)

    companies_list = nse.get_stock_codes(cached=False)
    my_dict = companies_list
    w = csv.writer(open("stocklist.csv", "w", newline=''))
    for key, val in my_dict.items():
        w.writerow([key, val])

    stock_list = csv.reader(open('stocklist.csv'))

    next(stock_list)

    #creates new data frame
    final = pd.DataFrame()

    # dates configuration
    end_day = date.today()
    start_day = end_day - timedelta(365)

    for company in stock_list:
        try:

            symbol, name = company

            df1 = nse.get_quote(symbol.format(symbol), as_json=True)
            # df10 = nse.get_history(symbol.format(symbol), start=start_day, end=end_day,as_json=True)
            # datax = pd_json.loads(df10)
            # df11 = pd.json_normalize(datax)
            # df12 = pd.DataFrame(df11)
            data = pd_json.loads(df1)  # load
            df = pd.json_normalize(data)  # normalise
            df2 = pd.DataFrame(df)
        except:
            continue
        selected = df2.iloc[0:, [1, 6, 65, 11, 20, 67]]
        final = pd.concat([final, selected])
        final = final.reset_index(drop=True)
        print(final)
        final.to_csv('dayopendata.csv', index=None, header=True)
def aggregate_kmeans(job_ids):
    """Compute merging of clusters according to least merging error (e.g. smallest distance betweeen centroids)
    :input job_ids: list of job_ids with intermediate results
    """
    # Read intermediate inputs from jobs
    logging.info("Fetching intermediate data...")
    data = [
        json.loads(io_helper.get_results(str(job_id)).data)
        for job_id in job_ids
    ]

    local_centroids = [
        np.array(x['centroids']) for x in data if x['centroids']
    ]
    indep_vars = data[0]['indep_vars']

    # Aggregate clusters remotely
    remote_centroids = remote.aggregate_clusters(local_centroids)
    logging.info("Centroids:\n{}".format(remote_centroids))

    # Create fake KMeans estimator and assign it our centroids
    estimator = KMeans()
    estimator.cluster_centers_ = np.array(remote_centroids)

    # Generate PFA for kmeans and add centroids to metadata
    featurizer = _create_featurizer(indep_vars)
    types = [(var['name'], var['type']['name']) for var in indep_vars]
    pfa = sklearn_to_pfa(estimator, types, featurizer.generate_pretty_pfa())

    # Add serialized model as metadata
    pfa['metadata'] = {
        'centroids': json.dumps(np.array(remote_centroids).tolist())
    }

    # Save or update job_result
    logging.info('Saving PFA to job_results table')
    pfa = json.dumps(pfa)
    io_helper.save_results(pfa, shapes.Shapes.PFA)
    logging.info("DONE")
Пример #34
0
    def get_input(self, input_str, **kwargs):
        """Get inputs.

        Principales identificadores de la base de datos Tempus3.

        * Ejemplo 1: Identificador del elemento operación estadística.
        Existen tres códigos para la identificación de la operación estadística
        "Índice de Precios de Consumo (IPC)":
            - código alfabético Tempus3 interno (IPC)
            - código numérico Tempus3 interno (Id=25)
            - código de la operación estadística en el Inventario de
              Operaciones Estadísticas (IOE30138)
            (Ver operaciones disponibles:
            https://servicios.ine.es/wstempus/js/ES/OPERACIONES_DISPONIBLES)
        * Ejemplo 2: Identificador de la variable "Provincias":
            - código numérico Tempus3 interno (Id=115)
            (Ver Variables: https://servicios.ine.es/wstempus/js/ES/VARIABLES)

        Identificador de las tablas PcAxis

        * Ejemplo 1: Identificador de la tabla "Gastos internos totales en
        actividades de I+D por años y sectores/unidad"
            - código alfanumérico PcAxis interno (Id=/t14/p057/a2016/l0/01001.px)
            (Ver Obtención del identificador de una tabla utilizando INEbase )

        Parameters
        ----------
        inputs : str
        kwargs : dict, optional

        See Also
        --------
        get_function
        """
        endpoint_input = f"{self.endpoint}/{input_str}"
        r = requests.get(endpoint_input, params=kwargs, verify=False)
        r_dict = json.loads(r.text)
        return r_dict
Пример #35
0
    def get_functions(self, function="OPERACIONES_DISPONIBLES"):
        """Get functions availables.
 
        Parameters
        ----------
        function : str
            Function can take below values:
            * Operaciones: OPERACIONES_DISPONIBLES, OPERACIÓN...
            * Variables: VARIABLES, VARIABLES_OPERACION...
            * Valores: VALORES_VARIABLES, VALORES_VARIABLEOPERACION...
            * Tablas: TABLAS_OPERACION, GRUPOS_TABLA...
            * Series: SERIE, SERIES_OPERACION...
            * Publicaciones: PUBLICACIONES, PUBLICACIONES_OPERACION...
            * Datos: DATOS_SERIE, DATOS_TABLA...

        Returns
        -------
        function : pandas.DataFrame
        """
        endpoint_function = f"{self.endpoint}/{function}"
        r = requests.get(endpoint_function, verify=False)
        r_dict = json.loads(r.text)
        return pd.DataFrame(r_dict)
Пример #36
0
 def __init__(self,
              channel: str,
              arch: str = "linux-64",
              base_url: str = "https://conda.anaconda.org/",
              ttl=600):
     # setup cache
     self.ttl = ttl
     # normal seetings
     logger.info(f"RETRIEVING: {channel}, {arch}")
     if '{channel}' in base_url and '{arch}' in base_url:
         url_prefix = base_url.format(channel=channel, arch=arch)
     elif '{channel}' in base_url:
         url_prefix = base_url.format(
             channel=channel).rstrip('/') + f"/{arch}"
     else:
         url_prefix = f"{base_url.rstrip('/')}/{channel}/{arch}"
     repodata_url = f"{url_prefix}/repodata.json.bz2"
     data = requests.get(repodata_url)
     repodata = json.loads(bz2.decompress(data.content))
     self.channel = channel
     self.arch = arch
     self.graph = build_repodata_graph(repodata, arch, url_prefix)
     logger.info(f"GRAPH BUILD FOR {repodata_url}")
    def get_countries(self):
        '''Query information about countries
        '''

        # Build URL for api call
        endpoint = '/countries'
        url = self._url(endpoint)
        params = {
            'per_page': 1000,
            'format': 'json'
        }

        # Download
        response = self.session.get(url, params=params)
        data = response.content

        data = json.loads(data)[1]
        data = pandas.DataFrame(data)
        data.adminregion = [x['value'] for x in data.adminregion]
        data.incomeLevel = [x['value'] for x in data.incomeLevel]
        data.lendingType = [x['value'] for x in data.lendingType]
        data.region = [x['value'] for x in data.region]
        data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'})
        return data
Пример #38
0
    def get_indicators(self):
        '''Download information about all World Bank data series
        '''

        # Build URL for api call
        endpoint = '/indicators'
        url = self._url(endpoint)
        params = {
            'per_page': 50000,
            'format': 'json'
        }

        # Download
        response = self.session.get(url, params=params)
        data = response.content

        data = json.loads(data)[1]
        data = pandas.DataFrame(data)
        # Clean fields
        data.source = [x['value'] for x in data.source]
        fun = lambda x: x.encode('ascii', 'ignore')
        data.sourceOrganization = data.sourceOrganization.apply(fun)
        # Clean topic field

        def get_value(x):
            try:
                return x['value']
            except:
                return ''
        fun = lambda x: [get_value(y) for y in x]
        data.topics = data.topics.apply(fun)
        data.topics = data.topics.apply(lambda x: ' ; '.join(x))
        # Clean outpu
        data = data.sort(columns='id')
        data.index = pandas.Index(lrange(data.shape[0]))
        return data
Пример #39
0
def get_COOPS_json(begin_dt, end_dt, base_url):

    """Function accepts: a base url (API endpoint), a beginning and end datetime string in the form 'YYYYMMDD mm:ss'
    which are <= 1 year apart, passing these to the query_builder function.
    Function returns the hourly prediction data as a PANDAS DataFrame Object where the returned time becomes the
    datetime index."""

    # import dependencies

    import pandas as pd
    import numpy as np
    from pandas.io.common import urlopen
    from pandas.io import json

    # construct the query

    query, query_dict = query_builder(begin_dt, end_dt, base_url)

    # execute query and read response

    with urlopen(query) as response:
        data = response.read()

        # convert json object to python dictionary and extract time and values for predictions

        data = json.loads(data)['predictions']

        # read into PANDAS DataFrame, then manipulate DataFrame object
        data = pd.DataFrame(data)
        data.columns = ['Date_Time', 'Level']
        data.index = data.Date_Time
        data.index = pd.to_datetime(data.index)
        data = data.drop('Date_Time', axis=1)

        # reindex to fill in any missing time values, this needs
        # work to initialize the range on the data/query vs. hardcoding as it
        # currently stands.

        periods, begin, end = dt_periodizer(query_dict)

        begin_string = begin.strftime('%Y-%m-%d %H:%M:%S')

        rng = pd.date_range(begin_string, periods=periods, freq='6min')

        # the actual reindex itself needs to be reworked for a better fill
        # a good start might be the median of the points directly above and
        # below the missing dt index. Since this is very few points typically
        # I am filling them with 100 for easy removal later. I would rather
        # remove the points than fill in a non-measured value.

        data  = data.reindex(rng, fill_value=100)

        # convert value from string to float
        data.Level = data.Level.astype(float)

        # adjust level to account for distance of Carkeek from NOAA
        # monitoring station (+ 5.5%)
        level_adjust = data.Level.values + (.05 * data.Level.values)
        data.Level = np.round(level_adjust, decimals=2)

        # add date column to dataframe for later use with weather data
        data['Date'] = data.index.date

        # add a column for hourly re-sample

        # data['Hour'] = data.index.hour
        # data['Time'] = data.index.time

        # return DataFrame object

        return data
Пример #40
0
 for prop in props:
     # big_df = pd.read_pickle(prop + '-O_' + cn + '.pkl')
     # '''
     print '-------------------'
     big_df = pd.DataFrame()
     for el in elements:
         indexes_to_drop = []
         df = pd.read_pickle(prop + '_cn.pkl')
         comps_to_remove = []
         for i, row in df.iterrows():
             if el not in row['metadata']['_structure']['elements'] \
                     or 'O' not in row['metadata']['_structure']['elements']:
                 indexes_to_drop.append(i)
             if el in row['metadata']['_structure']['elements']:
                 try:
                     df.set_value(i, el + '_cn', json.loads(row[cn])[el])
                 except KeyError:
                     for sp in json.loads(row[cn]).keys():
                         if el in sp:
                             df.set_value(i, el + '_cn', json.loads(row[cn])[sp])
                 except TypeError:
                     pass
             if row['is_ordered'] < 1:
                 comps_to_remove.append(row['reduced_cell_formula'])
         df.drop(df.index[indexes_to_drop], inplace=True)
         for comp in comps_to_remove:
             df.drop(df[df['reduced_cell_formula'] == comp].index, inplace=True)
         df_groupby = df.groupby(['reduced_cell_formula', 'is_' + prop], as_index=False).mean()
         # Find all compounds with CN > 8
         # print df_groupby[df_groupby[el + '_cn'] > 8]
         # for i, row in df_groupby[df_groupby[el + '_cn'] > 8].iterrows():
Пример #41
0
 def _get_resee(self, html_re_see):
     obj_re_see = json.loads(html_re_see)
     return obj_re_see
Пример #42
0
 def _get_product_comments(self, html_cont_comments):
     obj_comments = json.loads(html_cont_comments)
     return obj_comments
Пример #43
0
 def _get_product_price(self, html_cont_price):
     json_price = re.findall(r"\[(.+)\]", html_cont_price)[0]
     obj_price = json.loads(json_price)
     return obj_price