예제 #1
0
def parse_json_checkin(json, url=None):
    """Return salient info about a Foursquare checkin `json` that can be
    either JSON text or already parsed as a dictionary."""
    if not json:
        return None
    if not isinstance(json, dict):
        try:
            checkin = ujson.loads(json)
        except (TypeError, ValueError) as not_json:
            print(not_json, json, url)
            return None
    else:
        checkin = json['checkin']
    uid = u.get_nested(checkin, ['user', 'id'])
    vid = u.get_nested(checkin, ['venue', 'id'])
    time = u.get_nested(checkin, 'createdAt')
    offset = u.get_nested(checkin, 'timeZoneOffset', 0)
    if None in [uid, vid, time]:
        return None
    time = datetime.fromtimestamp(time, tz=pytz.utc)
    # by doing this, the date is no more UTC. So why not put the correct
    # timezone? Because in that case, pymongo will convert to UTC at
    # insertion. Yet I want local time, but without doing the conversion
    # when the result comes back from the DB.
    time += timedelta(minutes=offset)
    return int(uid), str(vid), time
예제 #2
0
def parse_tweet(tweet):
    """Return a CheckIn from `tweet` or None if it is not located in a valid
    city"""
    loc = u.get_nested(tweet, 'coordinates')
    city = None
    if not loc:
        # In that case, we would have to follow the link to know whether the
        # checkin falls within our cities but that's too costly so we drop it
        # (and introduce a bias toward open sharing users I guess)
        return None
    lon, lat = loc['coordinates']
    city = find_town(lat, lon, CITIES_TREE)
    if not (city and city in cities.SHORT_KEY):
        return None
    tid = u.get_nested(tweet, 'id_str')
    urls = u.get_nested(tweet, ['entities', 'urls'], [])
    # short url of the checkin that need to be expand, either using bitly API
    # or by VenueIdCrawler. Once we get the full URL, we still need to request
    # 4SQ (500 per hours) to get info.
    is_foursquare_url = lambda u: '4sq.com' in u or 'swarmapp.com' in u
    fsq_urls = [url['expanded_url'] for url in urls
                if is_foursquare_url(url['expanded_url'])]
    if not fsq_urls:
        return None
    lid = str(fsq_urls[0])
    uid = u.get_nested(tweet, ['user', 'id_str'])
    msg = u.get_nested(tweet, 'text')
    try:
        time = datetime.strptime(tweet['created_at'], UTC_DATE)
        time = cities.utc_to_local(city, time)
    except ValueError:
        print('time: {}'.format(tweet['created_at']))
        return None
    return FullCheckIn('', lid, '', city, loc, time, tid, uid, msg)
def parse_json_checkin(json, url=None):
    """Return salient info about a Foursquare checkin `json` that can be
    either JSON text or already parsed as a dictionary."""
    #print 'twitter_helper.py/parse_json_checkin'
    if not json:
        return None
    if not isinstance(json, dict):
        try:
            checkin = ujson.loads(json)
        except (TypeError, ValueError) as not_json:
            print(not_json, json, url)
            return None
    else:
        checkin = json['checkin']
    uid = u.get_nested(checkin, ['user', 'id'])
    vid = u.get_nested(checkin, ['venue', 'id'])
    time = u.get_nested(checkin, 'createdAt')
    offset = u.get_nested(checkin, 'timeZoneOffset', 0)
    if None in [uid, vid, time]:
        return None
    time = datetime.fromtimestamp(time, tz=pytz.utc)
    # by doing this, the date is no more UTC. So why not put the correct
    # timezone? Because in that case, pymongo will convert to UTC at
    # insertion. Yet I want local time, but without doing the conversion
    # when the result comes back from the DB.
    time += timedelta(minutes=offset)
    return int(uid), str(vid), time
예제 #4
0
 def job() -> None:
     data: List[Base] = []
     responses: List[Tuple] = fetch_yahoo_responses()
     for response in responses:
         payload: Dict = response[0]
         isin: str = response[1]
         income_statement_response: List = get_nested(
             payload,
             'incomeStatementHistory',
             'incomeStatementHistory',
             default=[])
         data = data + traverse_statement_history(
             Model=IncomeStatement,  # type: ignore
             isin=isin,
             statements=income_statement_response)
         cash_flow_statement_response: List = get_nested(
             payload,
             'cashflowStatementHistory',
             'cashflowStatements',
             default=[])
         data = data + traverse_statement_history(
             Model=CashFlowStatement,  # type: ignore
             isin=isin,
             statements=cash_flow_statement_response)
         balance_sheet_statement_response: List = get_nested(
             payload,
             'balanceSheetHistory',
             'balanceSheetStatements',
             default=[])
         data = data + traverse_statement_history(
             Model=BalanceSheetStatement,  # type: ignore
             isin=isin,
             statements=balance_sheet_statement_response)
     ETLBase.load_data(data)
예제 #5
0
async def task(i):
    aweme_file = open(f"aweme_meta/{i}.txt", "a")
    while not STOP_FLAG:
        item = redis_client.lpop("tiktok:aweme")
        if item is None:
            await asyncio.sleep(5)
            continue
        [key, aweme] = item.decode("utf-8").split("$", 1)
        get_meta_file(meta_file_dir).write(f"{key}\t{aweme}\n")
        aweme = json.loads(aweme)

        file_name = aweme.get("aweme_id") + ".mp4"
        download_urls = get_nested(aweme,
                                   ["video", "download_addr", "url_list"])
        if not download_urls:
            continue
        try:
            for url in download_urls:
                try:
                    await download_video(url, file_name)
                    break
                except Exception as e:
                    logger.warning(e)
                    continue
            await upload_video(file_name)
        except Exception as e:
            logger.warning(e)
예제 #6
0
 def leaf_criterium(keys, node):
     try:
         parser_type = get_nested(parser_tree, keys).type
     except (AttributeError, KeyError):
         parser_type = None
     if isinstance(node, dict) and parser_type != "dict":
         return False
     return True
예제 #7
0
    def push_images(self):
        """Push images to registries"""

        if not self._push_context:
            log.debug(
                "--generate images is not specified. Generate push context...")
            for image_tag, _ in self.build_tags():
                self._push_context[image_tag] = docker_client.images.get(
                    image_tag)

        for registry, registry_spec in self.repository.items():
            # We will want to push README first.
            login_payload: Dict = {
                "username": os.getenv(registry_spec['user']),
                "password": os.getenv(registry_spec['pwd']),
            }
            api_url: str = get_nested(registry_spec, ['urls', 'api'])

            for package, registry_url in registry_spec['registry'].items():
                _, _url = registry_url.split('/', maxsplit=1)
                readme_path = Path('docs', package, 'README.md')
                repo_url: str = f"{get_nested(registry_spec, ['urls', 'repos'])}/{_url}/"
                self.push_readmes(api_url, repo_url, readme_path,
                                  login_payload)

            if FLAGS.readmes:
                log.info("--readmes is specified. Exit after pushing readmes.")
                return

            # Then push image to registry.
            for image_tag in self.push_tags():
                image = self._push_context[image_tag]
                reg, tag = image_tag.split(":")
                registry = ''.join([
                    v for k, v in registry_spec['registry'].items() if reg in k
                ])
                log.info(f"Uploading {image_tag} to {registry}")

                # NOTES: about concurrent pushing
                #   This would change most of our build logics
                #   since DockerClient is essentially a requests.Session,
                #   which doesn't have support for asynchronous requests.
                #   If we want to implement aiohttp then we might want to
                #   run docker from shell commands.
                self.background_upload(image, tag, registry)

                # separate release latest tags for yatai-service
                if all(
                        map(image_tag.__contains__,
                            ['yatai-service', '3.8', 'slim'])):
                    log.info(f"Uploading {image_tag} as latest to {registry}")
                    tag = 'latest'
                    self.background_upload(image, tag, registry)
def parse_tweet(tweet):
    """Return a CheckIn from `tweet` or None if it is not located in a valid
    city"""
    #print 'twitter_helper.py/parse_tweet'
    loc = u.get_nested(tweet, 'coordinates')
    city = None
    if not loc:
        # In that case, we would have to follow the link to know whether the
        # checkin falls within our cities but that's too costly so we drop it
        # (and introduce a bias toward open sharing users I guess)
        return None
    lon, lat = loc['coordinates']
    city = find_town(lat, lon, CITIES_TREE)
    #print 'city', city
    if not (city and city in cities.SHORT_KEY):
        return None
    #print 'tree', CITIES_TREE
    tid = u.get_nested(tweet, 'id_str')
    urls = u.get_nested(tweet, ['entities', 'urls'], [])
    # short url of the checkin that need to be expand, either using bitly API
    # or by VenueIdCrawler. Once we get the full URL, we still need to request
    # 4SQ (500 per hours) to get info.
    is_foursquare_url = lambda u: '4sq.com' in u or 'swarmapp.com' in u
    fsq_urls = [
        url['expanded_url'] for url in urls
        if is_foursquare_url(url['expanded_url'])
    ]
    if not fsq_urls:
        return None
    lid = str(fsq_urls[0])
    uid = u.get_nested(tweet, ['user', 'id_str'])
    msg = u.get_nested(tweet, 'text')
    try:
        time = datetime.strptime(tweet['created_at'], UTC_DATE)
        time = cities.utc_to_local(city, time)
    except ValueError:
        print('time: {}'.format(tweet['created_at']))
        return None
    return FullCheckIn('', lid, '', city, loc, time, tid, uid, msg)
예제 #9
0
파일: manager.py 프로젝트: aarnphm/BentoML
    def push_images(self) -> None:
        """Push images to registries"""

        if not self._push_context:
            log.debug(
                "--generate images is not specified. Generate push context...")
            for image_tag, _ in self.build_tags(
            )[1]:  # get non base image tags
                self._push_context[image_tag] = docker_client.images.get(
                    image_tag)

        for registry, registry_spec in self.repository.items():
            # We will want to push README first.
            login_payload: t.Dict = {
                "username": os.getenv(registry_spec["user"]),
                "password": os.getenv(registry_spec["pwd"]),
            }
            api_url: str = get_nested(registry_spec, ["urls", "api"])

            for package, registry_url in registry_spec["registry"].items():
                _, _url = registry_url.split("/", maxsplit=1)
                readme_path = Path("generated", package, "README.md")
                repo_url: str = (
                    f"{get_nested(registry_spec, ['urls', 'repos'])}/{_url}/")
                self.push_readmes(api_url, repo_url, readme_path,
                                  login_payload)

            if FLAGS.readmes:
                log.info("--readmes is specified. Exit after pushing readmes.")
                return

            # Then push image to registry.
            with ThreadPoolExecutor(max_workers=5) as executor:
                for image_tag in self.push_tags():
                    image = self._push_context[image_tag]
                    reg, tag = image_tag.split(":")
                    registry = "".join([
                        v for k, v in registry_spec["registry"].items()
                        if reg in k
                    ])
                    log.info(f"Uploading {image_tag} to {registry}")
                    future = executor.submit(self.background_upload, image,
                                             tag, registry)
                    log.info(future.result)
예제 #10
0
def get_count(obj, field):
    """If available, return how many item of type 'field' are in 'obj'"""
    return get_nested(obj, [field, 'count'], 0)
예제 #11
0
def get_loc(vid):
    """Return coordinated of the venue `vid` (or None if it's not in DB)."""
    res = DB.venue.find_one({'_id': vid}, {'loc': 1})
    if res:
        return u.get_nested(res, ['loc', 'coordinates'])
    return None
 def process_response(cls, response: Dict, isin: str) -> Base:
     record = {
         'isin':
         isin,
         'report_date':
         datetime.fromtimestamp(get_nested(response, 'endDate',
                                           'raw')).date(),
         'total_revenue':
         get_nested(response, 'totalRevenue', 'raw'),
         'cost_of_revenue':
         get_nested(response, 'costOfRevenue', 'raw'),
         'gross_profit':
         get_nested(response, 'grossProfit', 'raw'),
         'research_development':
         get_nested(response, 'researchDevelopment', 'raw'),
         'selling_general_administrative':
         get_nested(response, 'sellingGeneralAdministrative', 'raw'),
         'non_recurring':
         get_nested(response, 'nonRecurring', 'raw'),
         'other_operating_expenses':
         get_nested(response, 'otherOperatingExpenses', 'raw'),
         'total_operating_expenses':
         get_nested(response, 'totalOperatingExpenses', 'raw'),
         'operating_income':
         get_nested(response, 'operatingIncome', 'raw'),
         'total_other_income_expense_net':
         get_nested(response, 'totalOtherIncomeExpenseNet', 'raw'),
         'ebit':
         get_nested(response, 'ebit', 'raw'),
         'interest_expense':
         get_nested(response, 'interestExpense', 'raw'),
         'income_before_tax':
         get_nested(response, 'incomeBeforeTax', 'raw'),
         'income_tax_expense':
         get_nested(response, 'incomeTaxExpense', 'raw'),
         'minority_interest':
         get_nested(response, 'minorityInterest', 'raw'),
         'net_income_from_continuing_ops':
         get_nested(response, 'netIncomeFromContinuingOps', 'raw'),
         'discontinued_operations':
         get_nested(response, 'discontinuedOperations', 'raw'),
         'extraordinary_items':
         get_nested(response, 'extraordinaryItems', 'raw'),
         'effect_of_accounting_charges':
         get_nested(response, 'effectOfAccountingCharges', 'raw'),
         'other_items':
         get_nested(response, 'otherItems', 'raw'),
         'net_income':
         get_nested(response, 'netIncome', 'raw'),
         'net_income_applicable_to_common_shares':
         get_nested(response, 'netIncomeApplicableToCommonShares', 'raw')
     }
     result: Base = cls(**record)
     return result
def get_count(obj, field):
    """If available, return how many item of type 'field' are in 'obj'"""
    return get_nested(obj, [field, "count"], 0)
 def process_response(cls, response: Dict, isin: str) -> Base:
     record = {
         'isin':
         isin,
         'report_date':
         datetime.fromtimestamp(get_nested(response, 'endDate',
                                           'raw')).date(),
         'cash':
         get_nested(response, 'cash', 'raw'),
         'short_term_investments':
         get_nested(response, 'shortTermInvestments', 'raw'),
         'net_receivables':
         get_nested(response, 'netReceivables', 'raw'),
         'total_current_assets':
         get_nested(response, 'totalCurrentAssets', 'raw'),
         'property_plant_equipment':
         get_nested(response, 'propertyPlantEquipment', 'raw'),
         'intangible_assets':
         get_nested(response, 'intangibleAssets', 'raw'),
         'other_assets':
         get_nested(response, 'otherAssets', 'raw'),
         'deferred_long_term_asset_charges':
         get_nested(response, 'deferredLongTermAssetCharges', 'raw'),
         'total_assets':
         get_nested(response, 'totalAssets', 'raw'),
         'accounts_payable':
         get_nested(response, 'accountsPayable', 'raw'),
         'short_long_term_debt':
         get_nested(response, 'shortLongTermDebt', 'raw'),
         'other_current_liab':
         get_nested(response, 'otherCurrentLiab', 'raw'),
         'long_term_debt':
         get_nested(response, 'longTermDebt', 'raw'),
         'other_liab':
         get_nested(response, 'otherLiab', 'raw'),
         'deferred_long_term_liab':
         get_nested(response, 'deferredLongTermLiab', 'raw'),
         'total_current_liabilities':
         get_nested(response, 'totalCurrentLiabilities', 'raw'),
         'total_liab':
         get_nested(response, 'totalLiab', 'raw'),
         'common_stock':
         get_nested(response, 'commonStock', 'raw'),
         'retained_earnings':
         get_nested(response, 'retainedEarnings', 'raw'),
         'treasury_stock':
         get_nested(response, 'treasuryStock', 'raw'),
         'other_stockholder_equity':
         get_nested(response, 'otherStockholderEquity', 'raw'),
         'total_stockholder_equity':
         get_nested(response, 'totalStockholderEquity', 'raw'),
         'net_tangible_assets':
         get_nested(response, 'netTangibleAssets', 'raw')
     }
     result: Base = cls(**record)
     return result
예제 #15
0
 def process_response(cls, response: Dict, isin: str) -> Base:
     record = {
         'isin': isin,
         'report_date': datetime.fromtimestamp(get_nested(response, 'endDate', 'raw')).date(),
         'net_income': get_nested(response, 'netIncome', 'raw'),
         'change_to_netincome': get_nested(response, 'changeToNetincome', 'raw'),
         'change_to_account_receivables': get_nested(response, 'changeToAccountReceivables', 'raw'),
         'change_to_liabilities': get_nested(response, 'changeToLiabilities', 'raw'),
         'total_cash_from_operating_activities': get_nested(response, 'totalCashFromOperatingActivities', 'raw'),
         'capital_expenditures': get_nested(response, 'capitalExpenditures', 'raw'),
         'other_cashflows_from_investing_activities': get_nested(response,
             'otherCashflowsFromInvestingActivities', 'raw'),
         'total_cashflows_from_investing_activities': get_nested(response,
             'totalCashflowsFromInvestingActivities', 'raw'),
         'dividends_paid': get_nested(response, 'dividendsPaid', 'raw'),
         'net_borrowings': get_nested(response, 'netBorrowings', 'raw'),
         'other_cashflows_from_financing_activities': get_nested(response, 'otherCashflowsFromFinancingActivities',
             'raw'),
         'total_cash_from_financing_activities': get_nested(response, 'totalCashFromFinancingActivities', 'raw'),
         'effect_of_exchange_rate': get_nested(response, 'effectOfExchangeRate', 'raw'),
         'change_in_cash': get_nested(response, 'changeInCash', 'raw'),
         'repurchase_of_stock': get_nested(response, 'repurchaseOfStock', 'raw'),
         'issuance_of_stock': get_nested(response, 'issuanceOfStock', 'raw')
     }
     result: Base = cls(**record)
     return result