def getAccumulatedValue(mappingFunc, wantedKeys, inputList): """ [Set] ([String] wantedKeys), [Function] Iterable -> [Dictionary] [String] key -> [Float] value [Iterable] inputList, whose elements are positions of from the below reports, from month 1, 2...n 1. Profit Loss summary with tax lot details; 2. Daily interest accrual detail; => [Iterable] ([Dictionary] key -> value) if wantedKeys parameter is None, then no filtering is done. """ return \ compose( lambda values: accumulate(values, addDictValues) , partial(map, partial(keepKeysFromDict, wantedKeys)) , partial(map, mappingFunc) )(inputList) \ if wantedKeys != None else \ compose( lambda values: accumulate(values, addDictValues) , partial(map, mappingFunc) )(inputList)
def getDateFromLines(lines): """ [Iterable] lines => [String] date (yyyy-mm-dd) Search for the line that contains the date and return it as a string. """ # [Iterable] lines => [Dictionary] line (or None if not found) findDateLine = partial( firstOf , lambda line: \ isinstance(line[0], str) and line[0].lower().startswith('valuation period:') ) """ [String] date header => [String] date (yyyy-mm-dd) The date header looks like: 'Valuation Period: From 01/02/2020 to 29/02/2020' """ getDateFromString = compose( lambda s: datetime.strftime(datetime.strptime(s, '%d/%m/%Y'), '%Y-%m-%d'), lambda s: s.split()[-1]) return \ compose( getDateFromString , lambda line: lognRaise('getDateFromLines(): failed get date line') \ if line == None else line[0] , findDateLine )(lines)
def createLqaPositions(portfolio, date, mode='production'): """ [String] portfolio, [String] date (yyyymmdd), [Function] writer => ( [Iterator] non-clo positions , [Iterator] clo positions ) """ processGenevaPositions = compose( getGenevaLqaPositions , partial(filter, isGenevaPosition) ) processBlpPositions = compose( getBlpLqaPositions , partial(filterfalse, isGenevaPosition) ) return compose( lambda t: ( consolidate(chain(t[1], t[2])) , consolidate(t[0]) ) , lambda positions: ( *processBlpPositions(positions) , processGenevaPositions(positions) ) , list , getPortfolioPositions )(portfolio, date, mode)
def getCashFromBalancenActivityFiles(balanceFile, activityFile): """ [String] balanceFile, [String] activityFile => ( [String] date , [Iterable] cash entries ) """ checkFileDates = compose( lambda t: lognRaise('checkFileDates(): inconsistant dates from filenames') \ if t[0] != t[1] else t[0] , lambda file1, file2: ( getDateFromFileName(file1) , getDateFromFileName(file2) ) ) processFiles = lambda date, balFile, actFile: compose( partial(map, partial(mergeDictionary, {'date': date, 'custodian':''})) , lambda d: d.values() , lambda _1, balFile, actFile: \ mergeDictionary( getCashFromBalance(fileToLines(balFile)) , getCashFromActivity(fileToLines(actFile)) ) )(date, balFile, actFile) return compose( lambda date: (date, processFiles(date, balanceFile, activityFile)), lambda _: checkFileDates(balanceFile, activityFile), lambda _1, _2: lognContinue( 'getCashFromBalancenActivityFiles(): {0}, {1}'.format( balanceFile, activityFile), 0))(balanceFile, activityFile)
def reset_defaults(load,save): with_defaults = [curry(with_key_value,k,v) for k,v in DEFAULT_SETTINGS.items()] compose( save, compose(*with_defaults), load, )()
def sym_transform_feature_union(estimator): keys = tuple(map(tupget(0), estimator.transformer_list)) transformers = map(compose(sym_transform, tupget(1)), estimator.transformer_list) weights = map( compose(RealNumber, estimator.transformer_weights.__getitem__), keys) return cart(*starmap(__mul__, zip(weights, transformers)))
def __init__(self, inputs, calls, outputs, origin=None): ''' A Function represents a function in the computational sense. Function objects are the intermediary between fitted estimators and generated code. Adapters return Function objects, and sklearn2code converts Function objects into working code. A Function object is composed of Expression objects (including Variable objects) and other Function objects. It knows its inputs (Variable objects), its internal calls (made up of Variable objects and other Function objects), and its outputs (general Expression objects). Parameters ---------- inputs : tuple of Variables The input variables for this function. calls : tuple of pairs with (tuples of Variables, pairs of Function objects and tuples of their inputs) The values are other function calls made by this function. The keys are variables to which the outputs are assigned. The number of output variables in the key must match the number of outputs in the Function. The length of the tuple of inputs must match the number of inputs for the function. Also, no two keys may contain the same variable. These constraints are checked. outputs : tuple of expressions The actual calculations made by this Function. The return values of the function are the results of the computations expressed by the expressions. ''' self.inputs = tuple(map(safe_symbol, tupify(inputs))) self.calls = tupsmap( 1, tupfun(identity, compose(tuple, curry(map)(safe_symbol))), tupsmap(0, compose(tuple, curry(map)(safe_symbol)), calls)) self.outputs = tupify(outputs) self._validate()
def getRawPositionsFromLines(lines): """ [Iterator] ([List]) lines => [Iterator] ([Dictionary]) positions """ stripIfString = lambda x: x.strip() if isinstance(x, str) else x # [Iterable] line => [List] headers getHeaders = compose(list, partial(takewhile, lambda x: x != ''), partial(map, stripIfString)) # [List] headers, [List] line => [Dictionary] position toPosition = lambda headers, line: compose(dict, partial( zip, headers), partial(map, stripIfString))(line) emptyLine = lambda line: \ len(line) == 0 or stripIfString(line[0]) == '' return \ compose( lambda t: map(partial(toPosition, t[0]), t[1]) , lambda t: ( getHeaders(t[0]) , takewhile(lambda line: not emptyLine(line), t[1]) ) , headnRemain )(lines)
def writeAssetAllocationCsv(portfolio, date, mode, reportingCurrency, countryGroups, assetTypeTuples): """ [String] portfolio, [String] date (yyyymmdd), [String] mode, [String] reportingCurrency [List] countries, (e.g., ['China - Hong Kong', 'China - Mainland', 'Singapore']) [List] assetTypeTuples (each assetTypeTuple is like ('Fixed Income', 'Corporate', 'Investment Grade')) => [String] output csv file name Side effect: create a csv file. """ assetTypeToValues = lambda d, countryGroups, assetypeTuple: \ compose( partial(map, partial(sumMarketValueInCurrency, date, reportingCurrency)) , lambda d: map(lambda cg: d[cg], countryGroups) , lambda assetypeTuple: d[assetypeTuple] )(assetypeTuple) return \ compose( partial(writeCsv, portfolio + '_asset_allocation_' + date + '.csv') , lambda d: map(partial(assetTypeToValues, d, countryGroups), assetTypeTuples) , partial(getAssetCountryAllocation, date, getBlpData(date, mode), assetTypeTuples, countryGroups) , getPortfolioPositions )(portfolio, date, mode)
def getGenevaPositions(portfolio, date, mode): """ [String] portfolio, [String] date (yyyymmdd), [String] mode => [Iterator] Investment positions of the portfolio on that date """ """ [String] file (Geneva investment positions report, Excel format) => [Iterator] positions """ readGenevaInvestmentPositionFile = compose( partial( map, lambda p: mergeDict( p, {'Remarks1': 'Geneva investment positions report'})), lambda lines: getPositions(lines)[1], fileToLines, lambda file: lognContinue( 'readGenevaInvestmentPositionFile(): {0}'.format(file), file)) """ [String] portfolio, [String] date (yyyymmdd), [String] mode => [String] file """ getGenevaInvestmentPositionFile = lambda portfolio, date, mode: \ join( getInputDirectory(mode) , portfolio + '_Investment_Positions_' + date + '.xlsx' ) return \ compose( readGenevaInvestmentPositionFile , getGenevaInvestmentPositionFile )(portfolio, date, mode)
def loadAssetTypeSpecialCaseFromFile(file): """ [String] file => [Dictionary] ID -> [Dictionary] security info """ stringToTuple = compose(tuple, partial(map, lambda s: s.strip()), lambda s: s.split(',')) updatePosition = lambda position: mergeDict( position , { 'Portfolio': str(int(position['Portfolio'])) \ if isinstance(position['Portfolio'], float) \ else position['Portfolio'] , 'AssetType': stringToTuple(position['AssetType']) } ) return \ compose( dict , partial(map, lambda p: (p['ID'], p)) , partial(map, updatePosition) , getRawPositions , fileToLines , partial(join, getDataDirectory()) )(file)
def checkInconsistency(positions): compose( partial(valmap, checkGroupConsistency), partial( groupbyToolz, lambda p: (p['Date'], p['Currency'], p['TargetCurrency'])))(positions) return positions
def processFiles(files, outputDir): """ [Iterable] files, [String] outputDir => ([List] output files, [List] successful files, [List] failed files) This function does not throw any exceptions. """ isHoldingFile = compose( lambda s: s.lower().startswith('boc broker statement') , getFilenameWithoutPath ) isCashFile = compose( lambda s: s.lower().startswith('boc bank statement') , getFilenameWithoutPath ) try: date, outputHoldingCsvFiles, successfulHoldingFiles, failedHoldingFiles = \ processHoldingFiles(filter(isHoldingFile, files), outputDir) outputCashCsvFiles, successfulCashFiles, failedCashFiles = \ processCashFiles(filter(isCashFile, files), date, outputDir) return outputHoldingCsvFiles + outputCashCsvFiles \ , successfulHoldingFiles + successfulCashFiles \ , failedHoldingFiles + failedCashFiles except: logger.exception('processFiles()') return [], [], files
def to_pandas(cls, response: Sequence[np.ndarray], data: Optional[Any] = None, index: Optional[Sequence[Text]] = None) -> Any: # pylint: disable=no-value-for-parameter """Reduce stuff.""" matrix = cls._matrix df = reduce( cls._pivot, [cls._view(arr, col) for arr, col in zip(response, matrix)]) df['#timestamp'] = df['#timestamp'].dt.tz_localize('UTC') df = df.reset_index().set_index('#index') df, data = reduce( lambda acc, var_bind: cast( Tuple[Any, Optional[Any]], compose(*var_bind._hooks['before_merge'])(acc) # pylint: disable=protected-access ), [var_bind for col in matrix for var_bind in col], # pylint: disable=not-an-iterable (df, data)) if data is not None: df = df.merge(data, how='outer', left_index=True, right_index=True) df = df.reset_index(drop=True) if index is not None: df = df.set_index(index) df = reduce( lambda acc, var_bind: ( compose(*var_bind._hooks['after_merge'])(acc) # pylint: disable=protected-access ), [var_bind for col in matrix for var_bind in col], # pylint: disable=not-an-iterable df) return df
def getRawPositions(lines): nonEmptyLine = lambda line: len(line) > 0 and line[0] != '' headerMap = { 'Account Name': 'portfolio', 'Currency': 'currency', 'Currency(or Equiv.)': 'currency', 'Ledger Balance': 'balance', 'Ledger Balance(Total Equiv.)': 'balance' } """ [List] line => [List] Headers Only a few fields (headers) will be useful in the output csv, therefore we map those headers to field names in the output csv. """ getHeadersFromLine = compose( list, partial(map, lambda s: headerMap[s] if s in headerMap else s), partial(map, lambda s: s.split('\n')[-1]), partial(takewhile, lambda s: s != '')) return \ compose( partial(map, dict) , lambda t: map(partial(zip, getHeadersFromLine(t[0])), t[1]) , lambda lines: (pop(lines), lines) , partial(takewhile, nonEmptyLine) )(lines)
def getAccumulatedTimeWeightedCapital(bondConnectOnly, sortedCLPositions): """ [Bool] bondConnectOnly [Iterable] ([String] period end date, [List] positions of that period) => [Iterable] Float (time weighted return at each period end date) """ """ [Iterable] cash ledger entries => [Iterable] cash ledger entries filter and change the entries for bond connect calculation. """ mappingFunc = compose( partial( map , lambda p: mergeDict(p, {'TranDescription': 'Deposit'}) \ if p['TranDescription'] == 'Transfer' else p ) , partial( filter , lambda p: 'BOCHK_BC' in p['GroupWithinCurrency_OpeningBalDesc'] ) ) return \ compose( partial(map, lambda t: getTimeWeightedCapital(t[0], t[1])) , partial(map, lambda t: (t[0], list(mappingFunc(t[1])))) \ if bondConnectOnly else partial(map, lambda t: t) , lambda sortedCLPositions: \ accumulate(sortedCLPositions, lambda t1, t2: (t2[0], t1[1] + t2[1])) )(sortedCLPositions)
def kickstart(yaml_path=None, args=None): """ Kicks everything off by creating the configuration function pipeline :return: """ # Create the CLIConfigurator first, because it may override defaults (eg, it can override the # default location of pylarion_path or exporter_config, which are needed by PylarionConfigurator # and YAMLConfigurator) cli_cfg = CLIConfigurator() start_map = pyr.m() init_map = cli_cfg(start_map) pyl_path = init_map.get("pylarion_path") yaml_path = init_map.get("exporter_config") env_path = init_map.get("environment_file") pyl_cfg = PylarionConfigurator(path=pyl_path) env_cfg = OSEnvironmentConfigurator() yml_cfg = YAMLConfigurator(cfg_path=yaml_path) jnk_cfg = None if env_path: jnk_cfg = JenkinsConfigurator(env_path) cli_cfg = CLIConfigurator(args=args) if env_path: pipeline = compose(cli_cfg, jnk_cfg, yml_cfg, env_cfg, pyl_cfg) else: pipeline = compose(cli_cfg, yml_cfg, env_cfg, pyl_cfg) end_map = pipeline(start_map) log.log(DEFAULT_LOG_LEVEL, "================ end_map ===================") dprint(end_map) try: final = ConfigRecord(**end_map) except pyr._checked_types.InvariantException as ex: print ex if ex.missing_fields: log.error("Following fields not configured: " + str(ex.missing_fields)) if False and ex.invariant_errors: log.error("Invariants broken: " + str(ex.invariant_errors)) log.error("Please correct the above and run again") sys.exit(1) log.log(logging.INFO, "================= final ====================") dprint(final, log_lvl=logging.INFO) log.log(logging.INFO, "============================================\n") result = {"pyl_cfg": pyl_cfg, "env_cfg": env_cfg, "yml_cfg": yml_cfg, "cli_cfg": cli_cfg, "config": final} return result
def addRepoHeaders(file): """ [String] file => [String] output file Assume: the input file is a repo XML file of 3 types: repo loan master file, repo transaction file, repo rerate file The function reads the input XML file, add appropriate Geneva headers to its content and saves the output file into the same folder. """ logger.debug('addRepoHeaders(): {0}'.format(file)) # [String] file => [String] file type getFileTypeFromName = lambda file: \ 'loan_master' if isRepoMaster(file) else \ 'transaction' if isRepoTrade(file) else \ 'rerate' if isRepoRerate(file) or isRepoDummyRerate(file) \ else 'others' # [String] file => [Tuple] (headers, footers) getHeaderForFile = compose( getRepoHeaders , getFileTypeFromName ) def getOutputFilename(file): getOutputFile = compose( lambda t: t[0] + '_WithHeaders' + t[1] , lambda file: (file[0:-4], file[-4:]) , getFilenameWithoutPath ) return join(getParentFolder(file), getOutputFile(file)) # end of getOutputFilename() def writeLinesToFile(lines, fileName): with open(fileName, 'w') as f: f.writelines(lines) return fileName # end of writeLinesToFile() return \ compose( lambda lines: writeLinesToFile(lines, getOutputFilename(file)) , lambda t: chain(t[1][0], t[0], t[1][1]) , lambda file: ( fileToLines(file) , getHeaderForFile(file)) )(file)
def convertAccumulateExcelToCSV(file): """ [String] file => [String] file Read an accmulative trade excel file, write it as csv. We need to make sure: make sure dates as yyyy-mm-dd, so that it's consistent with a daily addon from the bloomberg aim trade file. The csv file name is the same as the excel file, except that its file extension is '.csv' instead of '.xlsx' This is an utility function that needs to run only once, to convert the excel version accmulate trade file into csv format. After that, we just need to add daily trades to that csv file. """ getOutputFileName = lambda fn: \ fn[0:-4] + 'csv' if fn.endswith('.xlsx') else \ fn[0:-3] + 'csv' if fn.endswith('.xls') else \ lognRaise('convertAccumulateExcelToCSV(): invalid input file {0}'.format(fn)) """ [List] line => [List] headers Note the second header is an empty string, but we need to keep it. All other empty strings in the list are ignored """ getHeaders = compose(list, partial(map, lambda t: t[1]), partial(takewhile, lambda t: t[0] < 2 or t[1] != ''), lambda line: zip(count(), line)) def toDatetimeString(value): if isinstance(value, float): return datetime.strftime(fromExcelOrdinal(value), '%Y-%m-%d') else: try: return datetime.strftime(datetime.strptime(value, '%m/%d/%Y'), '%Y-%m-%d') except ValueError: return datetime.strftime(datetime.strptime(value, '%d/%m/%Y'), '%Y-%m-%d') getLineItems = lambda headers, line: compose( partial( map , lambda t: toDatetimeString(t[1]) \ if t[0] in ['Trade Date', 'Settlement Date'] else t[1] ) , lambda headers, line: zip(headers, line) )(headers, line) return compose( lambda rows: writeCsv(getOutputFileName(file), rows, delimiter=','), lambda t: chain([t[0]], map(partial(getLineItems, t[0]), t[1])), lambda lines: (getHeaders(pop(lines)), lines), fileToLines)(file)
def test_compose(): assert compose()(0) == 0 assert compose(inc)(0) == 1 assert compose(double, inc)(0) == 2 assert compose(str, iseven, inc, double)(3) == "False" assert compose(str, add)(1, 2) == '3' def f(a, b, c=10): return (a + b) * c assert compose(str, inc, f)(1, 2, c=3) == '10' # Define two functions with different names def f(a): return a def g(a): return a composed = compose(f, g) assert composed.__name__ == 'f_of_g' assert composed.__doc__ == 'lambda *args, **kwargs: f(g(*args, **kwargs))' # Create an object with no __name__. h = object() composed = compose(f, h) assert composed.__name__ == 'Compose' assert composed.__doc__ == 'A composition of functions'
def getPortfolioNames(): """ [Dictionary] ([String] portfolio code => [String] portfolio name) """ file = compose( lambda L: join(getDataDirectory(), L[0]), _checkOnlyOne, list, partial( filter, lambda fn: fn.lower().startswith('steven zhang portfolio names')), getFiles, getDataDirectory)() return compose(dict, partial(map, lambda p: (p['NameSort'], p['NameLine1'])), _getGenevaPortfolioNamesFromFile)(file)
def getTimeWeightedCapital(reportDate, positions): """ [String] report date (yyyy-mm-dd), [List] cash leger positions, => [Float] time weighted capital """ stringToDate = lambda d: \ datetime.strptime(d, '%Y-%m-%d') # [String] day1 (yyyy-mm-dd), [String] day2 (yyyy-mm-dd) => [Int] days getDaysDifference = lambda day1, day2: \ (stringToDate(day2) - stringToDate(day1)).days """ [String] report date (yyyy-mm-dd), [Iterable cash ledger positions => [Float] time weighted capital Calculate time weighted capital for internal cash flow, i.e., bond mature """ getTimeWeightAmountInternalCF = lambda reportDate, positions: \ compose( sum , partial( map , lambda p: p['BookAmount'] * getDaysDifference(p['CashDate'], reportDate)/365.0) , partial(filter, lambda p: p['TranDescription'] in ['Mature', 'Paydown', 'Sell']) )(positions) """ [String] report date (yyyy-mm-dd), [Iterable cash ledger positions => [Float] time weighted capital Calculate time weighted capital for external cash flow, i.e., deposit and withdrawal """ getTimeWeightAmountExternalCF = lambda reportDate, positions: \ compose( sum , partial( map , lambda p: p['BookAmount'] * (getDaysDifference(p['CashDate'], reportDate) + 1)/365.0) , partial(filter, lambda p: p['TranDescription'] in ['Deposit', 'Withdraw']) )(positions) return getTimeWeightAmountInternalCF(reportDate, positions) \ + getTimeWeightAmountExternalCF(reportDate, positions)
def getAllPositionsBlp(date, mode): """ [String] date (yyyymmdd), [String] mode => [Iterator] positions of all portfolios on the date from Bloomberg """ getBlpPositionFile = lambda date, mode: \ join(getInputDirectory(mode), 'risk_m2_mav_' + date + '.xlsx') # [Iterable] lines => [List] line that contains the date findDateLine = partial( firstOf, lambda line: len(line) > 1 and line[1].startswith('Risk-Mon Steven')) # [String] The string containing date => [String] date (yyyymmdd) # it looks like: Risk Report LQA Master as of 20200429 getDateFromString = lambda s: s.split()[-1] getDateFromLines = compose( getDateFromString, lambda line: lognRaise('Failed to find date line') if line == None else line[1], findDateLine) floatToString = lambda x: str(int(x)) if isinstance(x, float) else x updatePosition = lambda date, position: \ mergeDict( position , { 'AsOfDate': date , 'Remarks1': 'Bloomberg MAV Risk-Mon Steven' , 'Account Code': floatToString(position['Account Code']) } ) getPositions = lambda date, lines: \ compose( partial(map, partial(updatePosition, date)) , partial(filterfalse, lambda p: p['Account Code'] == '') , getRawPositions , lambda lines: dropwhile(lambda line: line[0] != 'Name', lines) )(lines) return \ compose( lambda t: getPositions(t[0], t[1]) , lambda lines: (getDateFromLines(lines), lines) , fileToLines , lambda file: lognContinue('getAllPositionsBlp(): {0}'.format(file), file) , getBlpPositionFile )(date, mode)
def processHoldingFiles(files, outputDir): """ [Iterable] files, [String] output directory => ( [String] date (yyyy-mm-dd) , [List] output files , [List] successfully processed files , [List] failed processed files ) """ getDateFromFiles = compose( lambda L: L[-1] , sorted , partial(map, dateFromFilename) , partial(map, getFilenameWithoutPath) ) def getResult(acc, file): """ [Tuple] ([List] output files, [List] successful files, [List] failed files) acc [String] file => acc """ try: return ( acc[0] + [writeHoldingCsv(outputDir, file)] , acc[1] + [file] , acc[2] ) except: return (acc[0], acc[1], acc[2] + [file]) # end of getResult() return getDateFromFiles(files) , reduce(getResult, files, ([], [], []))
def get_rcv3_contents(): """ Get Rackspace Cloud Load Balancer contents as list of `RCv3Node`. """ eff = service_request(ServiceType.RACKCONNECT_V3, 'GET', 'load_balancer_pools') def on_listing_pools(lblist_result): _, body = lblist_result return parallel([ service_request(ServiceType.RACKCONNECT_V3, 'GET', append_segments('load_balancer_pools', lb_pool['id'], 'nodes')).on( partial(on_listing_nodes, RCv3Description(lb_id=lb_pool['id']))) for lb_pool in body ]) def on_listing_nodes(rcv3_description, lbnodes_result): _, body = lbnodes_result return [ RCv3Node(node_id=node['id'], description=rcv3_description, cloud_server_id=get_in(('cloud_server', 'id'), node)) for node in body ] return eff.on(on_listing_pools).on( success=compose(list, concat), error=catch(NoSuchEndpoint, lambda _: []))
def split_cf_messages(format_message, var_length_key, event, separator=', ', max_length=255): """ Try to split cloud feed log events out into multiple events if the message is too long (the variable-length variable would cause the message to be too long.) :param str format_message: The format string to use to format the event :param str var_length_key: The key in the event dictionary that contains the variable-length part of the formatted message. :param dict event: The event dictionary :param str separator: The separator to use to join the various elements that should be varied. (e.g. if the elements in "var_length_key" are ["1", "2", "3"] and the separator is "; ", "var_length_key" will be represented as "1; 2; 3") :param int max_length: The maximum length of the formatted message. :return: `list` of event dictionaries with the formatted message and the split event field. """ def length_calc(e): return len(format_message.format(**e)) render = compose(assoc(event, var_length_key), separator.join, curry(map, str)) if length_calc(event) <= max_length: return [(render(event[var_length_key]), format_message)] events = split(render, event[var_length_key], max_length, length_calc) return [(e, format_message) for e in events]
def get_tenant_metrics(tenant_id, scaling_groups, grouped_servers, _print=False): """ Produce per-group metrics for all the groups of a tenant :param list scaling_groups: Tenant's scaling groups as dict from CASS :param dict grouped_servers: Servers from Nova grouped based on scaling group ID. :return: generator of (tenantId, groupId, desired, actual) GroupMetrics """ if _print: print('processing tenant {} with groups {} and servers {}'.format( tenant_id, len(scaling_groups), len(grouped_servers))) groups = {g['groupId']: g for g in scaling_groups} for group_id in set(groups.keys() + grouped_servers.keys()): servers = grouped_servers.get(group_id, []) if group_id in groups: group = groups[group_id] else: group = {'groupId': group_id_from_metadata(servers[0]['metadata']), 'desired': 0} servers = map(NovaServer.from_server_details_json, servers) _len = compose(len, list, flip(filter, servers)) active = _len(lambda s: s.state == ServerState.ACTIVE) bad = _len(lambda s: s.state in (ServerState.SHUTOFF, ServerState.ERROR, ServerState.DELETED)) yield GroupMetrics(tenant_id, group['groupId'], group['desired'], active, len(servers) - bad - active)
def testMultipartTaxlotReport(self): file = join(currentDir(), 'samples', 'all funds tax lot 2021-03-31.txt') positions = compose( list, partial(filter, lambda p: p['Portfolio'] == '12307'), readMultipartTaxlotReport)('utf-16', '\t', file) cashPositions = list(filter(isTaxlotCash, positions)) self.assertEqual(2, len(cashPositions)) p = firstOf(lambda p: p['InvestID'] == 'HKD', cashPositions) self.assertAlmostEqual(29762442.60, p['Quantity']) otherPositions = list(filterfalse(isTaxlotCash, positions)) self.assertEqual(114, len(otherPositions)) p = firstOf(lambda p: p['InvestID'] == '1088 HK', otherPositions) self.assertEqual(761500, p['Quantity']) self.assertAlmostEqual(14.687, p['UnitCost'], 3) self.assertEqual(16.02, p['MarketPrice']) self.assertEqual(1569143.80, p['MarketValueBook']) self.assertEqual(0, p['AccruedInterestBook']) self.assertEqual( 134206.70, p['UnrealizedPriceGainLossBook'] + p['UnrealizedFXGainLossBook'])
def testMultipartTaxlotReport2(self): file = join(currentDir(), 'samples', 'all funds tax lot 2021-03-31.txt') positions = compose( list, partial(filter, lambda p: p['Portfolio'] == '60001'), readMultipartTaxlotReport)('utf-16', '\t', file) cashPositions = list(filter(isTaxlotCash, positions)) self.assertEqual(2, len(cashPositions)) p = firstOf(lambda p: p['InvestID'] == 'USD', cashPositions) self.assertAlmostEqual(-7198256.77, p['Quantity']) fdPositions = list(filter(isTaxlotFixedDeposit, positions)) self.assertEqual(13, len(fdPositions)) p = firstOf( lambda p: p['InvestID'] == 'IB Fixed Deposit 0.651 07/08/2021', fdPositions) self.assertEqual(5000000, p['Quantity']) self.assertEqual(7504.58, p['AccruedInterestBook']) p = firstOf(lambda p: p['InvestID'] == 'US06120TAA60', positions) self.assertEqual(7273000, p['Quantity']) self.assertAlmostEqual(112.186, p['UnitCost'], 3) self.assertEqual(112.449, p['MarketPrice']) self.assertEqual(8178415.77, p['MarketValueBook']) self.assertAlmostEqual(139399.17, p['AccruedInterestBook']) self.assertEqual( 19121.06, p['UnrealizedPriceGainLossBook'] + p['UnrealizedFXGainLossBook'])
def optimum_polyfit(x, y, score=functoolz.compose(np.max, np.abs), max_degree=50, stop_at=1e-10): """ Optimize the degree of a polyfit polynomial so that score(y - poly(x)) is minimized. :param max_degree: The maximum degree to try. LinAlgErrors are automatically ignored. :param stop_at: If a score lower than this is reached, the function returns early :param score: The score function that is applied to y - poly(x). Default: max deviation. :return A tuple (poly1d object, degree, score) """ scores = np.empty(max_degree - 1, dtype=np.float64) # Ignore rank warnings now, but do not ignore for the final polynomial if not early returning with warnings.catch_warnings(): warnings.simplefilter('ignore', np.RankWarning) for deg in range(1, max_degree): # Set score to max float value try: poly = np.poly1d(np.polyfit(x, y, deg)) except np.linalg.LinAlgError: scores[deg - 1] = np.finfo(np.float64).max continue scores[deg - 1] = score(y - poly(x)) # Early return if we found a polynomial that is good enough if scores[deg - 1] <= stop_at: return poly, deg, scores[deg - 1] # Find minimum score deg = np.argmin(scores) + 1 # Compute polyfit for that degreet poly = np.poly1d(np.polyfit(x, y, deg)) return poly, deg, np.min(scores)
def getHTMPositionsFromFiles(files): """ [Iterable] files (CL trustee excel files) => [Iterable] HTM positions from these files, with ISIN code added to each position. """ def addISINCode(position): # some bond identifiers are not ISIN, map them to ISIN bondIsinMap = { 'DBANFB12014': 'HK0000175916', # Dragon Days Ltd 6% 03/21/22 'HSBCFN13014': 'HK0000163607' # New World Development 6% Sept 2023 } getIdentifier = lambda p: p['Description'].split()[0] idToISIN = lambda id: bondIsinMap[id] if id in bondIsinMap else id return \ compose( lambda isin: mergeDictionary( position , {'ISIN': isin} ) , idToISIN , getIdentifier )(position) # End of addISINCode() htmPositionsFromFile = compose( partial(map, addISINCode), partial(filter, lambda p: p['AssetType'] == 'HTMBond'), readFile) return reduce(chain, map(htmPositionsFromFile, files))
def nested_sequence(seq, get_effect=attrgetter('effect'), fallback_dispatcher=base_dispatcher): """ Return a function of Intent -> a that performs an effect retrieved from the intent (by accessing its `effect` attribute, by default) with the given intent-sequence. A demonstration is best:: SequenceDispatcher([ (BoundFields(effect=mock.ANY, fields={...}), nested_sequence([(SomeIntent(), perform_some_intent)])) ]) The point is that sometimes you have an intent that wraps another effect, and you want to ensure that the nested effects follow some sequence in the context of that wrapper intent. `get_effect` defaults to attrgetter('effect'), so you can override it if your intent stores its nested effect in a different attribute. Or, more interestingly, if it's something other than a single effect, e.g. for ParallelEffects see the :func:`parallel_nested_sequence` function. :param seq: sequence of intents like :obj:`SequenceDispatcher` takes :param get_effect: callable to get the inner effect from the wrapper intent. :param fallback_dispatcher: an optional dispatcher to compose onto the sequence dispatcher. """ return compose( partial(perform_sequence, seq, fallback_dispatcher=fallback_dispatcher), get_effect)
def get_rcv3_contents(): """ Get Rackspace Cloud Load Balancer contents as list of `RCv3Node`. """ eff = service_request(ServiceType.RACKCONNECT_V3, 'GET', 'load_balancer_pools') def on_listing_pools(lblist_result): _, body = lblist_result return parallel([ service_request( ServiceType.RACKCONNECT_V3, 'GET', append_segments('load_balancer_pools', lb_pool['id'], 'nodes')).on( partial( on_listing_nodes, RCv3Description(lb_id=lb_pool['id']))) for lb_pool in body ]) def on_listing_nodes(rcv3_description, lbnodes_result): _, body = lbnodes_result return [ RCv3Node(node_id=node['id'], description=rcv3_description, cloud_server_id=get_in(('cloud_server', 'id'), node)) for node in body ] return eff.on(on_listing_pools).on(success=compose(list, concat), error=catch(NoSuchEndpoint, lambda _: []))
def apply_transforms(self): fn = compose(*self.transforms[::-1]) self.transforms = [] # In place transformation to save some mem. for i in tqdm(range(len(self.data)), total=len(self.data)): self.data[i] = (fn(self.data[i][0]), fn(self.data[i][1])) return self
def extract_and_capitalize_headlines_from_corpus(corpus_dir, docids): """ Iterate through all the files in `corpus_dir`, extract the headlines, capitalized and return them Parameter: --------------- corpus_dir: string docids: list of string the document to be processed Return: -------------- generator of (docid, headlines): (str, list<list<str>>) """ get_tokens = partial(map, partial(get_in, ["token"])) get_features = partial(get_in, ["features"]) make_capitalized_title_new = lambda words: make_capitalized_title(title_words=words) for docid in docids: p = Path(corpus_dir) / Path(docid) auxil_p = p.with_suffix(".auxil") paf_p = p.with_suffix(".paf") if auxil_p.exists() and paf_p.exists(): try: titles, _ = separate_title_from_body(str(auxil_p), str(paf_p)) except Exception as e: yield (e, None) # pipeline: # -> get features # -> get tokens # -> capitalize headline yield (None, (p.name, list(map(compose(make_capitalized_title_new, get_tokens, get_features), titles))))
def apply(self, fn): """ Add a function to the current list of functions. The given function will be executed last in the list of functions. """ if self.func == functoolz.identity: self.func = fn elif isinstance(self.func, functoolz.Compose): self.func.funcs.append(fn) else: self.func = functoolz.compose(fn, self.func)
def test_compose(): assert compose()(0) == 0 assert compose(inc)(0) == 1 assert compose(double, inc)(0) == 2 assert compose(str, iseven, inc, double)(3) == "False" assert compose(str, add)(1, 2) == '3' def f(a, b, c=10): return (a + b) * c assert compose(str, inc, f)(1, 2, c=3) == '10'
def split_execute_convergence(event, max_length=event_max_length): """ Try to split execute-convergence event out into multiple events if there are too many CLB nodes, too many servers, or too many steps. The problem is mainly the servers, since they take up the most space. Experimentally determined that probably logs cut off at around 75k, characters - we're going to limit it to 50k. :param dict event: The 'execute-convergence' type event dictionary to split :param int max_length: The maximum length of the entire JSON-formatted dictionary. :return: `list` of `tuple` of (`dict`, `str`). The `dict`s in the tuple represents the spit up event dicts, and the `str` the format string for each. If the event does not need to be split, the list will only have one tuple. """ message = "Executing convergence" if _json_len(event) <= max_length: return [(event, message)] events = [(event, message)] large_things = sorted(('servers', 'lb_nodes'), key=compose(_json_len, event.get), reverse=True) # simplified event which serves as a base for the split out events base_event = keyfilter( lambda k: k not in ('desired', 'servers', 'lb_nodes', 'steps'), event) for thing in large_things: split_up_events = split( assoc(base_event, thing), event[thing], max_length, _json_len) events.extend([(e, message) for e in split_up_events]) del event[thing] if _json_len(event) <= max_length: break return events
def polarion_safe_string(string): """ Polarion doesn't like . in a string :param string: :return: """ # As we find other characters to replace, add a new # function and compose them def no_dot(s): return s.replace(".", "-") def no_newline(s): return s.strip() def no_colon(s): return s.replace(":", " ") safe = ftz.compose(no_colon, no_dot, no_newline) return safe(string)
def get_scaling_group_servers(tenant_id, authenticator, service_name, region, server_predicate=None, clock=None): """ Return tenant's servers that belong to a scaling group as {group_id: [server1, server2]} ``dict``. No specific ordering is guaranteed :param server_predicate: `callable` taking single server as arg and returns True if the server should be included, False otherwise """ def has_group_id(s): return 'metadata' in s and 'rax:auto_scaling_group_id' in s['metadata'] def group_id(s): return s['metadata']['rax:auto_scaling_group_id'] server_predicate = server_predicate if server_predicate is not None else lambda s: s servers_apply = compose(groupby(group_id), filter(server_predicate), filter(has_group_id)) d = get_all_server_details(tenant_id, authenticator, service_name, region, clock=clock) d.addCallback(servers_apply) return d
def get_facet_items_dict(facet, limit=None, exclude_active=False, sort_key='count'): ''' Monkey-Patch of ckan/lib/helpers/get_facet_items_dict() CKANs sorting of facet items is hardcoded (https://github.com/ckan/ckan/issues/3271) Also: refactored to be a bit more functional (SCNR) ''' try: f = c.search_facets.get(facet)['items'] except: return [] def active(facet_item): if not (facet, facet_item['name']) in tk.request.params.items(): return dict(active=False, **facet_item) elif not exclude_active: return dict(active=True, **facet_item) def sort_facet(f): key = 'count' names = map(lambda i: i['name'], f) if sort_key == 'name' and any(map(str_to_int, names)): key = 'name' return sorted(f, key=lambda item: item[key], reverse=True) # for some reason limit is not in scope here, so it must be a param def set_limit(facs, limit): if c.search_facets_limits and limit is None: limit = c.search_facets_limits.get(facet) # zero treated as infinite for hysterical raisins if limit is not None and limit > 0: return facs[:limit] return facs filter_empty_name = partial(filter, lambda i: len(i['name'].strip()) > 0) isdict = partial(filter, lambda i: isinstance(i, dict)) facets = compose(sort_facet, isdict, partial(map, active), filter_empty_name)(f) return set_limit(facets, limit)
def _find_matching_event_abi(cls, event_name=None, argument_names=None): filters = [ functools.partial(filter_by_type, 'event'), ] if event_name is not None: filters.append(functools.partial(filter_by_name, event_name)) if argument_names is not None: filters.append( functools.partial(filter_by_argument_name, argument_names) ) filter_fn = compose(*filters) event_abi_candidates = filter_fn(cls.abi) if len(event_abi_candidates) == 1: return event_abi_candidates[0] elif not event_abi_candidates: raise ValueError("No matching functions found") else: raise ValueError("Multiple functions found")
def get_all_scaling_group_servers(changes_since=None, server_predicate=identity): """ Return tenant's servers that belong to any scaling group as {group_id: [server1, server2]} ``dict``. No specific ordering is guaranteed :param datetime changes_since: Get server since this time. Must be UTC :param server_predicate: function of server -> bool that determines whether the server should be included in the result. :return: dict mapping group IDs to lists of Nova servers. """ def has_group_id(s): return 'metadata' in s and isinstance(s['metadata'], dict) def group_id(s): return group_id_from_metadata(s['metadata']) servers_apply = compose(keyfilter(lambda k: k is not None), groupby(group_id), filter(server_predicate), filter(has_group_id)) return get_all_server_details(changes_since).on(servers_apply)
data = json.dumps(data) return get_request(method, url, data=data, **kwargs).on( success=lambda r: (r[0], json.loads(r[1]))) def content_request(effect): """Only return the content part of a response.""" return effect.on(success=lambda r: r[1]) _request = wrappers( get_request, request_with_auth, request_with_status_check, request_with_json) _request = compose(content_request, _request) def request(method, url, *args, **kwargs): """ Make an HTTP request, with a number of conveniences. Accepts the same arguments as :class:`Request`, in addition to these: :param tuple success_codes: integer HTTP codes to accept as successful :param data: python object, to be encoded with json :param get_auth_headers: a function to retrieve auth tokens :param refresh_auth_info: a function to refresh the auth cache :param tuple reauth_codes: integer HTTP codes upon which to reauthenticate """ return _request(method, url, *args, **kwargs)
def setUp(self): self.now = datetime(2010, 5, 31) self.freeze = compose(set, map(freeze))
import sys from toolz.functoolz import compose from toolz.itertoolz import map, zip, second, drop, nth, iterate, first from functools import partial import itertools import sh ''' samtools mpileup -cf ref.fasta hu.bam -g | bcftools view -''' ''' use this to create consensus, and bioframes.py to create the VCF comparison #NOTE: freebayes requires ALL reads to be tagged with an RG, which requires a slight change to # tagreads.py: https://github.com/VDBWRAIR/ngs_mapper/blob/9523d32effd268543611b60758991a99373a65f5/ngs_mapper/tagreads.py#L56-L59 ''' ''' how to handle file globbing and multiple dirs at once? And: a compiled report as final task target?''' AMBIGUITY_TABLE = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'N': 'N', 'AC': 'M', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'CT': 'Y', 'GT': 'K', 'ACG': 'V', 'ACT': 'H', 'AGT': 'D', 'CGT': 'B', 'ACGT': 'N' } get_degen = compose(AMBIGUITY_TABLE.__getitem__, ''.join, sorted) insert_gap = lambda s, x: s[:x]+ '-' + s[x+1:] from operator import methodcaller as call, attrgetter def make_dict(classes): return dict(zip(map(call('__name__'), classes, classes))) #TODO: fix ambiguous base definition #TODO: this doesn't work for flu sequences which have multiple sections, \ # and the VCFs have multiple references. need to group by reference def fix_fb_df(df): #Freebayes only ever reports one ALT? df.ALT = df.ALT.apply(lambda x: x[0]) # the vcf library reports alts as _Substitution/whatever objects. extract the string. df.REF, df.ALT = df.REF.apply(str), df.ALT.apply(str) '''#TODO: this re-definition of ambiguous bases translates mult-base sections (e.g. AC) into single base alts or something''' ambiguous = ((df.AO / df.DP.apply(float)) < 0.8)
def zero_coverage_positions(bam_file, ref_file): pileup = sh.samtools('mpileup', bam_file, f=ref_file, _iter=True) return map(compose(int, second, unicode.split), pileup)
def factory(cls, *args, **kwargs): return compose(cls, Contract.factory(*args, **kwargs))
""" Format logs based on specification """ import json import math from toolz.curried import assoc from toolz.dicttoolz import keyfilter from toolz.functoolz import compose, curry from twisted.python.failure import Failure from otter.log.formatters import LoggingEncoder _json_len = compose(len, curry(json.dumps, cls=LoggingEncoder)) # Maximum length of entire JSON-formatted event dictionary event_max_length = 50000 def split_execute_convergence(event, max_length=event_max_length): """ Try to split execute-convergence event out into multiple events if there are too many CLB nodes, too many servers, or too many steps. The problem is mainly the servers, since they take up the most space. Experimentally determined that probably logs cut off at around 75k, characters - we're going to limit it to 50k.
def uppercase_first_letter_compose(word): comp = functoolz.compose(get(0), lambda c: c.upper()) return comp(word)
def printable_train_data(malform_data_dir, okform_data_dir, ids, extractor, feature_names, start, end=None, title_transform_func=make_capitalized_title, exclude_labels=None, exclude_word_positions=set([0])): """ Adapted to PULS requirement: - auxil file is read to get the additional prepreocessed features Parameters ------------ malform_data_dir: string the directory where the malformed data reside okform_data_dir: string the directory where the correctly formed data reside ids: list of string document ids extractor: FeatureExtractor the feature extractor feature_names: list of string the feature names start, end: int how many titles to extract title_transform_func: function funtion that accepts the title and transforms it into some badly capitalized version exclude_labels: iterable of str labels that we don't consider Returns ------------ Generator of str: each str is one sentence, each line in the str is one token in the sent """ feature_names += ['y'] # add the label feature name malform_data_dir = Path(malform_data_dir) # take care of this ["tickerSymbol",["NYSE","SKT"]] # /cs/taatto/home/hxiao/capitalization-recovery/corpus/puls-format-capitalized/3987E0BD03749C996A04B881079AD753.auxil clean_tag = (lambda t: t[0] if isinstance(t, list) else t) get_tokens = partial(map, partial(get_in, ['token'])) get_tags = partial(map, compose(clean_tag, partial(get_in, ['pos']))) get_lemmas = partial(map, partial(get_in, ['lemma'])) n_collected = 0 for i, id_ in enumerate(ids): if i < start: continue if i % 1000 == 0: logger.info("Collected %d" % n_collected) logger.info("Finished %d" % i) if end is not None and i >= end: logger.info("Reached %d. Terminate." % end) break try: malform_auxil_path = (malform_data_dir / Path(id_)).with_suffix('.auxil') with malform_auxil_path.open(encoding='utf8') as f: logger.debug('processing: {}'.format(id_)) # to get the last line lines = f.readlines() if len(lines) == 0: raise EmptyFileError('auxil file empty: {}'.format(malform_auxil_path)) l = lines[-1] data = json.loads(l.strip()) okform_auxil_path = str((okform_data_dir / Path(id_)).with_suffix('.auxil')) okform_paf_path = str((okform_data_dir / Path(id_)).with_suffix('.paf')) good_title_sents, body_sents = separate_title_from_body( okform_auxil_path, okform_paf_path ) # extract the tokens doc = [[t['token'] for t in sent['features']] for sent in body_sents] good_title_sents = list(good_title_sents) bad_title_sents = data['sents'] if not isinstance(bad_title_sents, list): raise InvalidTitleError( 'bad_title_sents not a list: {}'.format( bad_title_sents) ) # we only consider headline that contains only ONE sentence if (len(good_title_sents) == 1 and len(bad_title_sents) == 1): good_sent = good_title_sents[0] bad_sent = bad_title_sents[0] good_title_tokens = get_tokens(good_sent['features']) bad_title_tokens = get_tokens(bad_sent['features']) # some validity checking if len(good_title_tokens) != len(bad_title_tokens): raise TitleInconsistencyError('{}\n{}'.format( good_title_tokens, bad_title_tokens) ) good_title_tokens_lower = map(lambda s: s.lower(), good_title_tokens) bad_title_tokens_lower = map(lambda s: s.lower(), bad_title_tokens) if (good_title_tokens_lower != bad_title_tokens_lower): raise TitleInconsistencyError('{}\n{}'.format( good_title_tokens_lower, bad_title_tokens_lower) ) tags = get_tags(bad_sent['features']) lemmas = get_lemmas(bad_sent['features']) # tag validity checking for tag in tags: if not (tag is None or isinstance(tag, basestring)): raise InvalidTitleError( '{}: tag {} not string'.format(id_, tag) ) # get malformed title tokens words = convert_to_trainable_format( good_title_tokens, title_transform_func, extractor, doc=doc, pos=tags, lemma=lemmas ) # format the features in the required form res = unicode() for i, word in enumerate(words): if (i not in exclude_word_positions and exclude_labels and word['y'] not in exclude_labels): word_feature_str = u'\t'.join( [unicode(word[feature_name]) for feature_name in feature_names]) res += word_feature_str + '\n' n_collected += 1 yield id_, res else: raise TitleInconsistencyError( '# of title sentences more than 1: {}'.format(id_) ) except (IOError, TitleInconsistencyError, InvalidTitleError, EmptyFileError): logger.debug(traceback.format_exc()) continue except: logger.error(traceback.format_exc()) continue
#!/usr/bin/env python3 """ File utilities """ from toolz import functoolz import operator import numpy as np from .NumPy import numpy_resize_insert _strip_newline = lambda s: s.strip("\n") __standard_isline = functoolz.compose(bool, str.strip) # Utility to get nth CSV column _csv_nthcol = lambda n: functoolz.compose(operator.itemgetter(n), lambda s: s.partition(',')) _csv_firstcol = _csv_nthcol(0) def count_lines(flo, isline=__standard_isline): """ Count the lines in a file. Takes a file-like object. Strings are treated as filenames. Returns the number of lines. """ # Open it if it is a string if isinstance(flo, str): with open(flo, "r") as infile: return count_lines(infile, isline=isline) # Actual counting code num_lines = 0 for line in flo: num_lines += 1 if isline(line) else 0 return num_lines
def test_apply_composed(self): "Test apply on functoolz composed function" cg = overlapping_chunks(self.data1, 3, 3) cg.apply(functoolz.compose(functoolz.identity, np.square)) assert_array_equal(cg.as_array(), np.square(self.result1))