Пример #1
0
def getAccumulatedValue(mappingFunc, wantedKeys, inputList):
	"""
	[Set] ([String] wantedKeys),
	[Function] Iterable -> [Dictionary] [String] key -> [Float] value
	[Iterable] inputList, whose elements are positions of from the 
		below reports, from month 1, 2...n

		1. Profit Loss summary with tax lot details;
		2. Daily interest accrual detail;

	=> [Iterable] ([Dictionary] key -> value)

	if wantedKeys parameter is None, then no filtering is done.
	"""
	return \
	compose(
		lambda values: accumulate(values, addDictValues)
	  , partial(map, partial(keepKeysFromDict, wantedKeys))
	  , partial(map, mappingFunc)
	)(inputList) \
	if wantedKeys != None else \
	compose(
		lambda values: accumulate(values, addDictValues)
	  , partial(map, mappingFunc)
	)(inputList)
Пример #2
0
def getDateFromLines(lines):
    """
	[Iterable] lines => [String] date (yyyy-mm-dd)

	Search for the line that contains the date and return it as a string.
	"""

    # [Iterable] lines => [Dictionary] line (or None if not found)
    findDateLine = partial(
     firstOf
      , lambda line: \
        isinstance(line[0], str) and line[0].lower().startswith('valuation period:')
    )
    """
	[String] date header => [String] date (yyyy-mm-dd)
	The date header looks like: 
		'Valuation Period: From 01/02/2020 to 29/02/2020'
	"""
    getDateFromString = compose(
        lambda s: datetime.strftime(datetime.strptime(s, '%d/%m/%Y'),
                                    '%Y-%m-%d'), lambda s: s.split()[-1])


    return \
    compose(
     getDateFromString
      , lambda line: lognRaise('getDateFromLines(): failed get date line') \
           if line == None else line[0]
      , findDateLine
    )(lines)
Пример #3
0
def createLqaPositions(portfolio, date, mode='production'):
	"""
	[String] portfolio, [String] date (yyyymmdd), [Function] writer
		=> ( [Iterator] non-clo positions
		   , [Iterator] clo positions
		   )
	"""
	processGenevaPositions = compose(
		getGenevaLqaPositions
	  , partial(filter, isGenevaPosition)
	)


	processBlpPositions = compose(
		getBlpLqaPositions
	  , partial(filterfalse, isGenevaPosition)
	)


	return compose(
		lambda t: ( consolidate(chain(t[1], t[2]))
				  , consolidate(t[0])
				  )
	  , lambda positions: ( *processBlpPositions(positions)
						  , processGenevaPositions(positions)
						  )
	  , list
	  , getPortfolioPositions
	)(portfolio, date, mode)
Пример #4
0
def getCashFromBalancenActivityFiles(balanceFile, activityFile):
    """
	[String] balanceFile, [String] activityFile
		=> ( [String] date
		   , [Iterable] cash entries
		   )
	"""
    checkFileDates = compose(
     lambda t: lognRaise('checkFileDates(): inconsistant dates from filenames') \
         if t[0] != t[1] else t[0]
      , lambda file1, file2: ( getDateFromFileName(file1)
             , getDateFromFileName(file2)
             )
    )


    processFiles = lambda date, balFile, actFile: compose(
     partial(map, partial(mergeDictionary, {'date': date, 'custodian':''}))
      , lambda d: d.values()
      , lambda _1, balFile, actFile: \
        mergeDictionary( getCashFromBalance(fileToLines(balFile))
               , getCashFromActivity(fileToLines(actFile))
               )
    )(date, balFile, actFile)

    return compose(
        lambda date: (date, processFiles(date, balanceFile, activityFile)),
        lambda _: checkFileDates(balanceFile, activityFile),
        lambda _1, _2: lognContinue(
            'getCashFromBalancenActivityFiles(): {0}, {1}'.format(
                balanceFile, activityFile), 0))(balanceFile, activityFile)
Пример #5
0
def reset_defaults(load,save):
    with_defaults = [curry(with_key_value,k,v) for k,v in DEFAULT_SETTINGS.items()]
    compose(
            save,
            compose(*with_defaults),
            load,
        )()
Пример #6
0
def sym_transform_feature_union(estimator):
    keys = tuple(map(tupget(0), estimator.transformer_list))
    transformers = map(compose(sym_transform, tupget(1)),
                       estimator.transformer_list)
    weights = map(
        compose(RealNumber, estimator.transformer_weights.__getitem__), keys)
    return cart(*starmap(__mul__, zip(weights, transformers)))
Пример #7
0
 def __init__(self, inputs, calls, outputs, origin=None):
     '''
     A Function represents a function in the computational sense.  Function objects are the 
     intermediary between fitted estimators and generated code.  Adapters return Function 
     objects, and sklearn2code converts Function objects into working code.  A Function object
     is composed of Expression objects (including Variable objects) and other Function objects.  
     It knows its inputs (Variable objects), its internal calls (made up of Variable objects 
     and other Function objects), and its outputs (general Expression objects).  
     
     Parameters
     ----------
     inputs : tuple of Variables
         The input variables for this function.
     
     calls : tuple of pairs with (tuples of Variables, pairs of Function objects and tuples of their inputs)
         The values are other function calls made by this function.  The keys are 
         variables to which the outputs are assigned.  The number of output variables in the
         key must match the number of outputs in the Function.  The length of the tuple of inputs must match the
         number of inputs for the function.  Also, no two keys may contain 
         the same variable.  These constraints are checked.
         
     outputs : tuple of expressions
         The actual calculations made by this Function.  The return values of the function
         are the results of the computations expressed by the expressions.
     
     '''
     self.inputs = tuple(map(safe_symbol, tupify(inputs)))
     self.calls = tupsmap(
         1, tupfun(identity, compose(tuple,
                                     curry(map)(safe_symbol))),
         tupsmap(0, compose(tuple,
                            curry(map)(safe_symbol)), calls))
     self.outputs = tupify(outputs)
     self._validate()
Пример #8
0
def getRawPositionsFromLines(lines):
    """
	[Iterator] ([List]) lines => [Iterator] ([Dictionary]) positions
	"""
    stripIfString = lambda x: x.strip() if isinstance(x, str) else x

    # [Iterable] line => [List] headers
    getHeaders = compose(list, partial(takewhile, lambda x: x != ''),
                         partial(map, stripIfString))

    # [List] headers, [List] line => [Dictionary] position
    toPosition = lambda headers, line: compose(dict, partial(
        zip, headers), partial(map, stripIfString))(line)


    emptyLine = lambda line: \
     len(line) == 0 or stripIfString(line[0]) == ''


    return \
    compose(
     lambda t: map(partial(toPosition, t[0]), t[1])
      , lambda t: ( getHeaders(t[0])
         , takewhile(lambda line: not emptyLine(line), t[1])
         )
      , headnRemain
    )(lines)
Пример #9
0
def writeAssetAllocationCsv(portfolio, date, mode, reportingCurrency, countryGroups, assetTypeTuples):
	"""
	[String] portfolio,
	[String] date (yyyymmdd),
	[String] mode,
	[String] reportingCurrency
	[List] countries, (e.g., ['China - Hong Kong', 'China - Mainland', 'Singapore'])
	[List] assetTypeTuples (each assetTypeTuple is like ('Fixed Income', 'Corporate', 'Investment Grade'))
		
		=> [String] output csv file name

	Side effect: create a csv file.
	"""
	assetTypeToValues = lambda d, countryGroups, assetypeTuple: \
	compose(
		partial(map, partial(sumMarketValueInCurrency, date, reportingCurrency))
	  , lambda d: map(lambda cg: d[cg], countryGroups)
	  , lambda assetypeTuple: d[assetypeTuple]
	)(assetypeTuple)


	return \
	compose(
		partial(writeCsv, portfolio + '_asset_allocation_' + date + '.csv')
	  , lambda d: map(partial(assetTypeToValues, d, countryGroups), assetTypeTuples)
	  , partial(getAssetCountryAllocation, date, getBlpData(date, mode), assetTypeTuples, countryGroups)
	  , getPortfolioPositions
	)(portfolio, date, mode)
Пример #10
0
def getGenevaPositions(portfolio, date, mode):
    """
	[String] portfolio, [String] date (yyyymmdd), [String] mode
		=> [Iterator] Investment positions of the portfolio on that date
	"""
    """
		[String] file (Geneva investment positions report, Excel format)
			=> [Iterator] positions
	"""
    readGenevaInvestmentPositionFile = compose(
        partial(
            map, lambda p: mergeDict(
                p, {'Remarks1': 'Geneva investment positions report'})),
        lambda lines: getPositions(lines)[1], fileToLines,
        lambda file: lognContinue(
            'readGenevaInvestmentPositionFile(): {0}'.format(file), file))
    """
		[String] portfolio, [String] date (yyyymmdd), [String] mode
			=> [String] file 
	"""
    getGenevaInvestmentPositionFile = lambda portfolio, date, mode: \
     join( getInputDirectory(mode)
      , portfolio + '_Investment_Positions_' + date + '.xlsx'
      )


    return \
    compose(
     readGenevaInvestmentPositionFile
      , getGenevaInvestmentPositionFile
    )(portfolio, date, mode)
Пример #11
0
def loadAssetTypeSpecialCaseFromFile(file):
    """
	[String] file => [Dictionary] ID -> [Dictionary] security info
	"""
    stringToTuple = compose(tuple, partial(map, lambda s: s.strip()),
                            lambda s: s.split(','))


    updatePosition = lambda position: mergeDict(
     position
      , { 'Portfolio': str(int(position['Portfolio'])) \
           if isinstance(position['Portfolio'], float) \
           else position['Portfolio']
       , 'AssetType': stringToTuple(position['AssetType'])
       }
    )


    return \
    compose(
     dict
      , partial(map, lambda p: (p['ID'], p))
      , partial(map, updatePosition)
      , getRawPositions
      , fileToLines
      , partial(join, getDataDirectory())
    )(file)
Пример #12
0
 def checkInconsistency(positions):
     compose(
         partial(valmap, checkGroupConsistency),
         partial(
             groupbyToolz, lambda p:
             (p['Date'], p['Currency'], p['TargetCurrency'])))(positions)
     return positions
Пример #13
0
def processFiles(files, outputDir):
	"""
	[Iterable] files, [String] outputDir
		=> ([List] output files, [List] successful files, [List] failed files)
	
	This function does not throw any exceptions.
	"""
	isHoldingFile = compose(
		lambda s: s.lower().startswith('boc broker statement')
	  , getFilenameWithoutPath
	)

	isCashFile = compose(
		lambda s: s.lower().startswith('boc bank statement')
	  , getFilenameWithoutPath
	)


	try:
		date, outputHoldingCsvFiles, successfulHoldingFiles, failedHoldingFiles = \
			processHoldingFiles(filter(isHoldingFile, files), outputDir)

		outputCashCsvFiles, successfulCashFiles, failedCashFiles = \
			processCashFiles(filter(isCashFile, files), date, outputDir)

		return outputHoldingCsvFiles + outputCashCsvFiles \
			 , successfulHoldingFiles + successfulCashFiles \
			 , failedHoldingFiles + failedCashFiles

	except:
		logger.exception('processFiles()')
		return [], [], files
Пример #14
0
 def to_pandas(cls,
               response: Sequence[np.ndarray],
               data: Optional[Any] = None,
               index: Optional[Sequence[Text]] = None) -> Any:
     # pylint: disable=no-value-for-parameter
     """Reduce stuff."""
     matrix = cls._matrix
     df = reduce(
         cls._pivot,
         [cls._view(arr, col) for arr, col in zip(response, matrix)])
     df['#timestamp'] = df['#timestamp'].dt.tz_localize('UTC')
     df = df.reset_index().set_index('#index')
     df, data = reduce(
         lambda acc, var_bind: cast(
             Tuple[Any, Optional[Any]],
             compose(*var_bind._hooks['before_merge'])(acc)  # pylint: disable=protected-access
         ),
         [var_bind for col in matrix for var_bind in col],  # pylint: disable=not-an-iterable
         (df, data))
     if data is not None:
         df = df.merge(data, how='outer', left_index=True, right_index=True)
     df = df.reset_index(drop=True)
     if index is not None:
         df = df.set_index(index)
     df = reduce(
         lambda acc, var_bind: (
             compose(*var_bind._hooks['after_merge'])(acc)  # pylint: disable=protected-access
         ),
         [var_bind for col in matrix for var_bind in col],  # pylint: disable=not-an-iterable
         df)
     return df
Пример #15
0
def getRawPositions(lines):

    nonEmptyLine = lambda line: len(line) > 0 and line[0] != ''

    headerMap = {
        'Account Name': 'portfolio',
        'Currency': 'currency',
        'Currency(or Equiv.)': 'currency',
        'Ledger Balance': 'balance',
        'Ledger Balance(Total Equiv.)': 'balance'
    }
    """
		[List] line => [List] Headers

		Only a few fields (headers) will be useful in the output csv, therefore 
		we map those headers to field names in the output csv.
	"""
    getHeadersFromLine = compose(
        list, partial(map, lambda s: headerMap[s] if s in headerMap else s),
        partial(map, lambda s: s.split('\n')[-1]),
        partial(takewhile, lambda s: s != ''))


    return \
    compose(
      partial(map, dict)
      , lambda t: map(partial(zip, getHeadersFromLine(t[0])), t[1])
      , lambda lines: (pop(lines), lines)
      , partial(takewhile, nonEmptyLine)
    )(lines)
Пример #16
0
def getAccumulatedTimeWeightedCapital(bondConnectOnly, sortedCLPositions):
	"""
	[Bool] bondConnectOnly
	[Iterable] ([String] period end date, [List] positions of that period)
	=> [Iterable] Float (time weighted return at each period end date)

	"""

	"""
		[Iterable] cash ledger entries => [Iterable] cash ledger entries

		filter and change the entries for bond connect calculation.
	"""
	mappingFunc = compose(
		partial( map 
			   , lambda p: mergeDict(p, {'TranDescription': 'Deposit'}) \
					if p['TranDescription'] == 'Transfer' else p
			   )
	  , partial( filter
			   , lambda p: 'BOCHK_BC' in p['GroupWithinCurrency_OpeningBalDesc']
			   )
	)


	return \
	compose(
		partial(map, lambda t: getTimeWeightedCapital(t[0], t[1]))
	  , partial(map, lambda t: (t[0], list(mappingFunc(t[1])))) \
	  	if bondConnectOnly else partial(map, lambda t: t)
	  , lambda sortedCLPositions: \
	  		accumulate(sortedCLPositions, lambda t1, t2: (t2[0], t1[1] + t2[1]))
	)(sortedCLPositions)
Пример #17
0
def kickstart(yaml_path=None, args=None):
    """
    Kicks everything off by creating the configuration function pipeline

    :return:
    """
    # Create the CLIConfigurator first, because it may override defaults (eg, it can override the
    # default location of pylarion_path or exporter_config, which are needed by PylarionConfigurator
    # and YAMLConfigurator)

    cli_cfg = CLIConfigurator()
    start_map = pyr.m()
    init_map = cli_cfg(start_map)
    pyl_path = init_map.get("pylarion_path")
    yaml_path = init_map.get("exporter_config")
    env_path = init_map.get("environment_file")

    pyl_cfg = PylarionConfigurator(path=pyl_path)
    env_cfg = OSEnvironmentConfigurator()
    yml_cfg = YAMLConfigurator(cfg_path=yaml_path)
    jnk_cfg = None
    if env_path:
        jnk_cfg = JenkinsConfigurator(env_path)
    cli_cfg = CLIConfigurator(args=args)

    if env_path:
        pipeline = compose(cli_cfg, jnk_cfg, yml_cfg, env_cfg, pyl_cfg)
    else:
        pipeline = compose(cli_cfg, yml_cfg, env_cfg, pyl_cfg)
    end_map = pipeline(start_map)

    log.log(DEFAULT_LOG_LEVEL, "================ end_map ===================")
    dprint(end_map)

    try:
        final = ConfigRecord(**end_map)
    except pyr._checked_types.InvariantException as ex:
        print ex
        if ex.missing_fields:
            log.error("Following fields not configured: " + str(ex.missing_fields))
        if False and  ex.invariant_errors:
            log.error("Invariants broken: " + str(ex.invariant_errors))
        log.error("Please correct the above and run again")
        sys.exit(1)
    log.log(logging.INFO, "================= final ====================")
    dprint(final, log_lvl=logging.INFO)
    log.log(logging.INFO, "============================================\n")

    result = {"pyl_cfg": pyl_cfg,
              "env_cfg": env_cfg,
              "yml_cfg": yml_cfg,
              "cli_cfg": cli_cfg,
              "config": final}
    return result
Пример #18
0
def addRepoHeaders(file):
	"""
	[String] file => [String] output file

	Assume: the input file is a repo XML file of 3 types:
	repo loan master file, repo transaction file, repo rerate file

	The function reads the input XML file, add appropriate Geneva headers to
	its content and saves the output file into the same folder.
	"""
	logger.debug('addRepoHeaders(): {0}'.format(file))

	# [String] file => [String] file type
	getFileTypeFromName = lambda file: \
		'loan_master' if isRepoMaster(file) else \
		'transaction' if isRepoTrade(file) else \
		'rerate' if isRepoRerate(file) or isRepoDummyRerate(file) \
		else 'others'


	# [String] file => [Tuple] (headers, footers)
	getHeaderForFile = compose(
		getRepoHeaders
	  , getFileTypeFromName
	)


	def getOutputFilename(file):

		getOutputFile = compose(
			lambda t: t[0] + '_WithHeaders' + t[1]
		  , lambda file: (file[0:-4], file[-4:])
		  , getFilenameWithoutPath
		)

		return join(getParentFolder(file), getOutputFile(file))
	# end of getOutputFilename()


	def writeLinesToFile(lines, fileName):
		with open(fileName, 'w') as f:
			f.writelines(lines)

		return fileName
	# end of writeLinesToFile()


	return \
	compose(
		lambda lines: writeLinesToFile(lines, getOutputFilename(file))
	  , lambda t: chain(t[1][0], t[0], t[1][1])
	  , lambda file: ( fileToLines(file)
	  				 , getHeaderForFile(file))
	)(file)
Пример #19
0
def convertAccumulateExcelToCSV(file):
    """
	[String] file => [String] file

	Read an accmulative trade excel file, write it as csv. We need to make sure:
	make sure dates as yyyy-mm-dd, so that it's consistent with a daily addon
	from the bloomberg aim trade file.

	The csv file name is the same as the excel file, except that its file
	extension is '.csv' instead of '.xlsx'

	This is an utility function that needs to run only once, to convert the 
	excel version accmulate trade file into csv format. After that, we just
	need to add daily trades to that csv file.
	"""
    getOutputFileName = lambda fn: \
     fn[0:-4] + 'csv' if fn.endswith('.xlsx') else \
     fn[0:-3] + 'csv' if fn.endswith('.xls') else \
     lognRaise('convertAccumulateExcelToCSV(): invalid input file {0}'.format(fn))
    """
		[List] line => [List] headers
		Note the second header is an empty string, but we need to keep it. All
		other empty strings in the list are ignored
	"""
    getHeaders = compose(list, partial(map, lambda t: t[1]),
                         partial(takewhile, lambda t: t[0] < 2 or t[1] != ''),
                         lambda line: zip(count(), line))

    def toDatetimeString(value):
        if isinstance(value, float):
            return datetime.strftime(fromExcelOrdinal(value), '%Y-%m-%d')
        else:
            try:
                return datetime.strftime(datetime.strptime(value, '%m/%d/%Y'),
                                         '%Y-%m-%d')
            except ValueError:
                return datetime.strftime(datetime.strptime(value, '%d/%m/%Y'),
                                         '%Y-%m-%d')


    getLineItems = lambda headers, line: compose(
     partial( map
         , lambda t: toDatetimeString(t[1]) \
           if t[0] in ['Trade Date', 'Settlement Date'] else t[1]
         )
      , lambda headers, line: zip(headers, line)
    )(headers, line)

    return compose(
        lambda rows: writeCsv(getOutputFileName(file), rows, delimiter=','),
        lambda t: chain([t[0]], map(partial(getLineItems, t[0]), t[1])),
        lambda lines: (getHeaders(pop(lines)), lines), fileToLines)(file)
Пример #20
0
def test_compose():
    assert compose()(0) == 0
    assert compose(inc)(0) == 1
    assert compose(double, inc)(0) == 2
    assert compose(str, iseven, inc, double)(3) == "False"
    assert compose(str, add)(1, 2) == '3'

    def f(a, b, c=10):
        return (a + b) * c

    assert compose(str, inc, f)(1, 2, c=3) == '10'

    # Define two functions with different names
    def f(a):
        return a

    def g(a):
        return a

    composed = compose(f, g)
    assert composed.__name__ == 'f_of_g'
    assert composed.__doc__ == 'lambda *args, **kwargs: f(g(*args, **kwargs))'

    # Create an object with no __name__.
    h = object()

    composed = compose(f, h)
    assert composed.__name__ == 'Compose'
    assert composed.__doc__ == 'A composition of functions'
Пример #21
0
def getPortfolioNames():
    """
	[Dictionary] ([String] portfolio code => [String] portfolio name)
	"""
    file = compose(
        lambda L: join(getDataDirectory(), L[0]), _checkOnlyOne, list,
        partial(
            filter,
            lambda fn: fn.lower().startswith('steven zhang portfolio names')),
        getFiles, getDataDirectory)()

    return compose(dict, partial(map, lambda p:
                                 (p['NameSort'], p['NameLine1'])),
                   _getGenevaPortfolioNamesFromFile)(file)
Пример #22
0
def getTimeWeightedCapital(reportDate, positions):
	"""
	[String] report date (yyyy-mm-dd),
	[List] cash leger positions,
		=> [Float] time weighted capital
	"""
	stringToDate = lambda d: \
		datetime.strptime(d, '%Y-%m-%d')


	# [String] day1 (yyyy-mm-dd), [String] day2 (yyyy-mm-dd) => [Int] days
	getDaysDifference = lambda day1, day2: \
		(stringToDate(day2) - stringToDate(day1)).days


	"""
		[String] report date (yyyy-mm-dd),
		[Iterable cash ledger positions
			=> [Float] time weighted capital

		Calculate time weighted capital for internal cash flow, i.e., bond mature
	"""
	getTimeWeightAmountInternalCF = lambda reportDate, positions: \
	compose(
		sum
	  , partial( map
	  		   , lambda p: p['BookAmount'] * getDaysDifference(p['CashDate'], reportDate)/365.0)
	  , partial(filter, lambda p: p['TranDescription'] in ['Mature', 'Paydown', 'Sell'])
	)(positions)


	"""
		[String] report date (yyyy-mm-dd),
		[Iterable cash ledger positions
			=> [Float] time weighted capital

		Calculate time weighted capital for external cash flow, i.e., deposit
		and withdrawal
	"""
	getTimeWeightAmountExternalCF = lambda reportDate, positions: \
	compose(
		sum
	  , partial( map
	  		   , lambda p: p['BookAmount'] * (getDaysDifference(p['CashDate'], reportDate) + 1)/365.0)
	  , partial(filter, lambda p: p['TranDescription'] in ['Deposit', 'Withdraw'])
	)(positions)


	return getTimeWeightAmountInternalCF(reportDate, positions) \
		 + getTimeWeightAmountExternalCF(reportDate, positions)
Пример #23
0
def getAllPositionsBlp(date, mode):
    """
	[String] date (yyyymmdd), [String] mode
		=> [Iterator] positions of all portfolios on the date from Bloomberg
	"""
    getBlpPositionFile = lambda date, mode: \
     join(getInputDirectory(mode), 'risk_m2_mav_' + date + '.xlsx')

    # [Iterable] lines => [List] line that contains the date
    findDateLine = partial(
        firstOf,
        lambda line: len(line) > 1 and line[1].startswith('Risk-Mon Steven'))

    # [String] The string containing date => [String] date (yyyymmdd)
    # it looks like: Risk Report LQA Master as of 20200429
    getDateFromString = lambda s: s.split()[-1]

    getDateFromLines = compose(
        getDateFromString, lambda line: lognRaise('Failed to find date line')
        if line == None else line[1], findDateLine)

    floatToString = lambda x: str(int(x)) if isinstance(x, float) else x


    updatePosition = lambda date, position: \
     mergeDict( position
        , { 'AsOfDate': date
          , 'Remarks1': 'Bloomberg MAV Risk-Mon Steven'
          , 'Account Code': floatToString(position['Account Code'])
          }
        )


    getPositions = lambda date, lines: \
    compose(
     partial(map, partial(updatePosition, date))
      , partial(filterfalse, lambda p: p['Account Code'] == '')
      , getRawPositions
      , lambda lines: dropwhile(lambda line: line[0] != 'Name', lines)
    )(lines)


    return \
    compose(
     lambda t: getPositions(t[0], t[1])
      , lambda lines: (getDateFromLines(lines), lines)
      , fileToLines
      , lambda file: lognContinue('getAllPositionsBlp(): {0}'.format(file), file)
      , getBlpPositionFile
    )(date, mode)
Пример #24
0
def test_compose():
    assert compose()(0) == 0
    assert compose(inc)(0) == 1
    assert compose(double, inc)(0) == 2
    assert compose(str, iseven, inc, double)(3) == "False"
    assert compose(str, add)(1, 2) == '3'

    def f(a, b, c=10):
        return (a + b) * c

    assert compose(str, inc, f)(1, 2, c=3) == '10'

    # Define two functions with different names
    def f(a):
        return a

    def g(a):
        return a

    composed = compose(f, g)
    assert composed.__name__ == 'f_of_g'
    assert composed.__doc__ == 'lambda *args, **kwargs: f(g(*args, **kwargs))'

    # Create an object with no __name__.
    h = object()

    composed = compose(f, h)
    assert composed.__name__ == 'Compose'
    assert composed.__doc__ == 'A composition of functions'
Пример #25
0
def processHoldingFiles(files, outputDir):
	"""
	[Iterable] files, [String] output directory => 
		( [String] date (yyyy-mm-dd)
		, [List] output files
		, [List] successfully processed files
		, [List] failed processed files
		)
	"""
	getDateFromFiles = compose(
		lambda L: L[-1]
	  , sorted
	  , partial(map, dateFromFilename)
	  , partial(map, getFilenameWithoutPath)
	)

	def getResult(acc, file):
		"""
		[Tuple] ([List] output files, [List] successful files, [List] failed files) acc
		[String] file
			=> acc
		"""
		try:
			return ( acc[0] + [writeHoldingCsv(outputDir, file)]
				   , acc[1] + [file]
				   , acc[2]
				   )
		except:
			return (acc[0], acc[1], acc[2] + [file])
	# end of getResult()

	return getDateFromFiles(files)
		 , reduce(getResult, files, ([], [], []))
Пример #26
0
def get_rcv3_contents():
    """
    Get Rackspace Cloud Load Balancer contents as list of `RCv3Node`.
    """
    eff = service_request(ServiceType.RACKCONNECT_V3, 'GET',
                          'load_balancer_pools')

    def on_listing_pools(lblist_result):
        _, body = lblist_result
        return parallel([
            service_request(ServiceType.RACKCONNECT_V3, 'GET',
                            append_segments('load_balancer_pools',
                                            lb_pool['id'], 'nodes')).on(
                partial(on_listing_nodes,
                        RCv3Description(lb_id=lb_pool['id'])))
            for lb_pool in body
        ])

    def on_listing_nodes(rcv3_description, lbnodes_result):
        _, body = lbnodes_result
        return [
            RCv3Node(node_id=node['id'], description=rcv3_description,
                     cloud_server_id=get_in(('cloud_server', 'id'), node))
            for node in body
        ]

    return eff.on(on_listing_pools).on(
        success=compose(list, concat),
        error=catch(NoSuchEndpoint, lambda _: []))
Пример #27
0
def split_cf_messages(format_message,
                      var_length_key,
                      event,
                      separator=', ',
                      max_length=255):
    """
    Try to split cloud feed log events out into multiple events if the message
    is too long (the variable-length variable would cause the message to be
    too long.)

    :param str format_message: The format string to use to format the event
    :param str var_length_key: The key in the event dictionary that contains
        the variable-length part of the formatted message.
    :param dict event: The event dictionary
    :param str separator: The separator to use to join the various elements
        that should be varied.  (e.g. if the elements in "var_length_key" are
        ["1", "2", "3"] and the separator is "; ", "var_length_key" will be
        represented as "1; 2; 3")
    :param int max_length: The maximum length of the formatted message.

    :return: `list` of event dictionaries with the formatted message and
        the split event field.
    """
    def length_calc(e):
        return len(format_message.format(**e))

    render = compose(assoc(event, var_length_key), separator.join,
                     curry(map, str))

    if length_calc(event) <= max_length:
        return [(render(event[var_length_key]), format_message)]

    events = split(render, event[var_length_key], max_length, length_calc)
    return [(e, format_message) for e in events]
Пример #28
0
def get_tenant_metrics(tenant_id, scaling_groups, grouped_servers,
                       _print=False):
    """
    Produce per-group metrics for all the groups of a tenant

    :param list scaling_groups: Tenant's scaling groups as dict from CASS
    :param dict grouped_servers: Servers from Nova grouped based on
        scaling group ID.
    :return: generator of (tenantId, groupId, desired, actual) GroupMetrics
    """
    if _print:
        print('processing tenant {} with groups {} and servers {}'.format(
              tenant_id, len(scaling_groups), len(grouped_servers)))

    groups = {g['groupId']: g for g in scaling_groups}

    for group_id in set(groups.keys() + grouped_servers.keys()):
        servers = grouped_servers.get(group_id, [])
        if group_id in groups:
            group = groups[group_id]
        else:
            group = {'groupId': group_id_from_metadata(servers[0]['metadata']),
                     'desired': 0}
        servers = map(NovaServer.from_server_details_json, servers)
        _len = compose(len, list, flip(filter, servers))
        active = _len(lambda s: s.state == ServerState.ACTIVE)
        bad = _len(lambda s: s.state in (ServerState.SHUTOFF,
                                         ServerState.ERROR,
                                         ServerState.DELETED))
        yield GroupMetrics(tenant_id, group['groupId'], group['desired'],
                           active, len(servers) - bad - active)
Пример #29
0
    def testMultipartTaxlotReport(self):
        file = join(currentDir(), 'samples',
                    'all funds tax lot 2021-03-31.txt')
        positions = compose(
            list, partial(filter, lambda p: p['Portfolio'] == '12307'),
            readMultipartTaxlotReport)('utf-16', '\t', file)

        cashPositions = list(filter(isTaxlotCash, positions))
        self.assertEqual(2, len(cashPositions))

        p = firstOf(lambda p: p['InvestID'] == 'HKD', cashPositions)
        self.assertAlmostEqual(29762442.60, p['Quantity'])

        otherPositions = list(filterfalse(isTaxlotCash, positions))
        self.assertEqual(114, len(otherPositions))

        p = firstOf(lambda p: p['InvestID'] == '1088 HK', otherPositions)
        self.assertEqual(761500, p['Quantity'])
        self.assertAlmostEqual(14.687, p['UnitCost'], 3)
        self.assertEqual(16.02, p['MarketPrice'])
        self.assertEqual(1569143.80, p['MarketValueBook'])
        self.assertEqual(0, p['AccruedInterestBook'])
        self.assertEqual(
            134206.70,
            p['UnrealizedPriceGainLossBook'] + p['UnrealizedFXGainLossBook'])
Пример #30
0
    def testMultipartTaxlotReport2(self):
        file = join(currentDir(), 'samples',
                    'all funds tax lot 2021-03-31.txt')
        positions = compose(
            list, partial(filter, lambda p: p['Portfolio'] == '60001'),
            readMultipartTaxlotReport)('utf-16', '\t', file)

        cashPositions = list(filter(isTaxlotCash, positions))
        self.assertEqual(2, len(cashPositions))

        p = firstOf(lambda p: p['InvestID'] == 'USD', cashPositions)
        self.assertAlmostEqual(-7198256.77, p['Quantity'])

        fdPositions = list(filter(isTaxlotFixedDeposit, positions))
        self.assertEqual(13, len(fdPositions))

        p = firstOf(
            lambda p: p['InvestID'] == 'IB Fixed Deposit 0.651 07/08/2021',
            fdPositions)
        self.assertEqual(5000000, p['Quantity'])
        self.assertEqual(7504.58, p['AccruedInterestBook'])

        p = firstOf(lambda p: p['InvestID'] == 'US06120TAA60', positions)
        self.assertEqual(7273000, p['Quantity'])
        self.assertAlmostEqual(112.186, p['UnitCost'], 3)
        self.assertEqual(112.449, p['MarketPrice'])
        self.assertEqual(8178415.77, p['MarketValueBook'])
        self.assertAlmostEqual(139399.17, p['AccruedInterestBook'])
        self.assertEqual(
            19121.06,
            p['UnrealizedPriceGainLossBook'] + p['UnrealizedFXGainLossBook'])
Пример #31
0
def optimum_polyfit(x, y, score=functoolz.compose(np.max, np.abs), max_degree=50, stop_at=1e-10):
    """
    Optimize the degree of a polyfit polynomial so that score(y - poly(x)) is minimized.

    :param max_degree: The maximum degree to try. LinAlgErrors are automatically ignored.
    :param stop_at: If a score lower than this is reached, the function returns early
    :param score: The score function that is applied to y - poly(x). Default: max deviation.
    :return A tuple (poly1d object, degree, score)
    """
    scores = np.empty(max_degree - 1, dtype=np.float64)
    # Ignore rank warnings now, but do not ignore for the final polynomial if not early returning
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', np.RankWarning)
        for deg in range(1, max_degree):
            # Set score to max float value
            try:
                poly = np.poly1d(np.polyfit(x, y, deg))
            except np.linalg.LinAlgError:
                scores[deg - 1] = np.finfo(np.float64).max
                continue
            scores[deg - 1] = score(y - poly(x))
            # Early return if we found a polynomial that is good enough
            if scores[deg - 1] <= stop_at:
                return poly, deg, scores[deg - 1]
    # Find minimum score
    deg = np.argmin(scores) + 1
    # Compute polyfit for that degreet
    poly = np.poly1d(np.polyfit(x, y, deg))
    return poly, deg, np.min(scores)
Пример #32
0
def split_cf_messages(format_message, var_length_key, event, separator=', ',
                      max_length=255):
    """
    Try to split cloud feed log events out into multiple events if the message
    is too long (the variable-length variable would cause the message to be
    too long.)

    :param str format_message: The format string to use to format the event
    :param str var_length_key: The key in the event dictionary that contains
        the variable-length part of the formatted message.
    :param dict event: The event dictionary
    :param str separator: The separator to use to join the various elements
        that should be varied.  (e.g. if the elements in "var_length_key" are
        ["1", "2", "3"] and the separator is "; ", "var_length_key" will be
        represented as "1; 2; 3")
    :param int max_length: The maximum length of the formatted message.

    :return: `list` of event dictionaries with the formatted message and
        the split event field.
    """
    def length_calc(e):
        return len(format_message.format(**e))

    render = compose(assoc(event, var_length_key), separator.join,
                     curry(map, str))

    if length_calc(event) <= max_length:
        return [(render(event[var_length_key]), format_message)]

    events = split(render, event[var_length_key], max_length, length_calc)
    return [(e, format_message) for e in events]
Пример #33
0
def getHTMPositionsFromFiles(files):
    """
	[Iterable] files (CL trustee excel files)
		=> [Iterable] HTM positions from these files, with ISIN code added to each
			position.
	"""
    def addISINCode(position):

        # some bond identifiers are not ISIN, map them to ISIN
        bondIsinMap = {
            'DBANFB12014': 'HK0000175916',  # Dragon Days Ltd 6% 03/21/22
            'HSBCFN13014': 'HK0000163607'  # New World Development 6% Sept 2023
        }

        getIdentifier = lambda p: p['Description'].split()[0]
        idToISIN = lambda id: bondIsinMap[id] if id in bondIsinMap else id

        return \
        compose(
         lambda isin: mergeDictionary(
           position
           , {'ISIN': isin}
         )
          , idToISIN
          , getIdentifier
        )(position)

    # End of addISINCode()

    htmPositionsFromFile = compose(
        partial(map, addISINCode),
        partial(filter, lambda p: p['AssetType'] == 'HTMBond'), readFile)

    return reduce(chain, map(htmPositionsFromFile, files))
Пример #34
0
def nested_sequence(seq, get_effect=attrgetter('effect'),
                    fallback_dispatcher=base_dispatcher):
    """
    Return a function of Intent -> a that performs an effect retrieved from the
    intent (by accessing its `effect` attribute, by default) with the given
    intent-sequence.

    A demonstration is best::

        SequenceDispatcher([
            (BoundFields(effect=mock.ANY, fields={...}),
             nested_sequence([(SomeIntent(), perform_some_intent)]))
        ])

    The point is that sometimes you have an intent that wraps another effect,
    and you want to ensure that the nested effects follow some sequence in the
    context of that wrapper intent.

    `get_effect` defaults to attrgetter('effect'), so you can override it if
    your intent stores its nested effect in a different attribute. Or, more
    interestingly, if it's something other than a single effect, e.g. for
    ParallelEffects see the :func:`parallel_nested_sequence` function.

    :param seq: sequence of intents like :obj:`SequenceDispatcher` takes
    :param get_effect: callable to get the inner effect from the wrapper
        intent.
    :param fallback_dispatcher: an optional dispatcher to compose onto the
        sequence dispatcher.
    """
    return compose(
        partial(perform_sequence, seq,
                fallback_dispatcher=fallback_dispatcher),
        get_effect)
Пример #35
0
def get_rcv3_contents():
    """
    Get Rackspace Cloud Load Balancer contents as list of `RCv3Node`.
    """
    eff = service_request(ServiceType.RACKCONNECT_V3, 'GET',
                          'load_balancer_pools')

    def on_listing_pools(lblist_result):
        _, body = lblist_result
        return parallel([
            service_request(
                ServiceType.RACKCONNECT_V3, 'GET',
                append_segments('load_balancer_pools', lb_pool['id'],
                                'nodes')).on(
                                    partial(
                                        on_listing_nodes,
                                        RCv3Description(lb_id=lb_pool['id'])))
            for lb_pool in body
        ])

    def on_listing_nodes(rcv3_description, lbnodes_result):
        _, body = lbnodes_result
        return [
            RCv3Node(node_id=node['id'],
                     description=rcv3_description,
                     cloud_server_id=get_in(('cloud_server', 'id'), node))
            for node in body
        ]

    return eff.on(on_listing_pools).on(success=compose(list, concat),
                                       error=catch(NoSuchEndpoint,
                                                   lambda _: []))
Пример #36
0
 def apply_transforms(self):
     fn = compose(*self.transforms[::-1])
     self.transforms = []
     # In place transformation to save some mem.
     for i in tqdm(range(len(self.data)), total=len(self.data)):
         self.data[i] = (fn(self.data[i][0]), fn(self.data[i][1]))
     return self
def extract_and_capitalize_headlines_from_corpus(corpus_dir, docids):
    """
    Iterate through all the files in `corpus_dir`,
    extract the headlines, capitalized and return them
    
    Parameter:
    ---------------
    corpus_dir: string

    docids: list of string
        the document to be processed

    Return:
    --------------
    generator of (docid, headlines): (str, list<list<str>>)
    """
    get_tokens = partial(map, partial(get_in, ["token"]))
    get_features = partial(get_in, ["features"])

    make_capitalized_title_new = lambda words: make_capitalized_title(title_words=words)

    for docid in docids:
        p = Path(corpus_dir) / Path(docid)
        auxil_p = p.with_suffix(".auxil")
        paf_p = p.with_suffix(".paf")
        if auxil_p.exists() and paf_p.exists():
            try:
                titles, _ = separate_title_from_body(str(auxil_p), str(paf_p))
            except Exception as e:
                yield (e, None)
            # pipeline:
            # -> get features
            # -> get tokens
            # -> capitalize headline
            yield (None, (p.name, list(map(compose(make_capitalized_title_new, get_tokens, get_features), titles))))
Пример #38
0
 def apply(self, fn):
     """
     Add a function to the current list of functions. The given function
     will be executed last in the list of functions.
     """
     if self.func == functoolz.identity:
         self.func = fn
     elif isinstance(self.func, functoolz.Compose):
         self.func.funcs.append(fn)
     else:
         self.func = functoolz.compose(fn, self.func)
Пример #39
0
def test_compose():
    assert compose()(0) == 0
    assert compose(inc)(0) == 1
    assert compose(double, inc)(0) == 2
    assert compose(str, iseven, inc, double)(3) == "False"
    assert compose(str, add)(1, 2) == '3'

    def f(a, b, c=10):
        return (a + b) * c

    assert compose(str, inc, f)(1, 2, c=3) == '10'
Пример #40
0
def split_execute_convergence(event, max_length=event_max_length):
    """
    Try to split execute-convergence event out into multiple events if there
    are too many CLB nodes, too many servers, or too many steps.

    The problem is mainly the servers, since they take up the most space.

    Experimentally determined that probably logs cut off at around 75k,
    characters - we're going to limit it to 50k.

    :param dict event: The 'execute-convergence' type event dictionary to split
    :param int max_length: The maximum length of the entire JSON-formatted
        dictionary.

    :return: `list` of `tuple` of (`dict`, `str`).  The `dict`s in the tuple
        represents the spit up event dicts, and the `str` the format string
        for each.  If the event does not need to be split, the list will only
        have one tuple.
    """
    message = "Executing convergence"
    if _json_len(event) <= max_length:
        return [(event, message)]

    events = [(event, message)]
    large_things = sorted(('servers', 'lb_nodes'),
                          key=compose(_json_len, event.get),
                          reverse=True)

    # simplified event which serves as a base for the split out events
    base_event = keyfilter(
        lambda k: k not in ('desired', 'servers', 'lb_nodes', 'steps'),
        event)

    for thing in large_things:
        split_up_events = split(
            assoc(base_event, thing), event[thing], max_length,
            _json_len)
        events.extend([(e, message) for e in split_up_events])
        del event[thing]
        if _json_len(event) <= max_length:
            break

    return events
Пример #41
0
def polarion_safe_string(string):
    """
    Polarion doesn't like . in a string
    :param string:
    :return:
    """
    # As we find other characters to replace, add a new
    # function and compose them
    def no_dot(s):
        return s.replace(".", "-")

    def no_newline(s):
        return s.strip()

    def no_colon(s):
        return s.replace(":", " ")

    safe = ftz.compose(no_colon, no_dot, no_newline)
    return safe(string)
Пример #42
0
def get_scaling_group_servers(tenant_id, authenticator, service_name, region,
                              server_predicate=None, clock=None):
    """
    Return tenant's servers that belong to a scaling group as
    {group_id: [server1, server2]} ``dict``. No specific ordering is guaranteed

    :param server_predicate: `callable` taking single server as arg and returns True
                              if the server should be included, False otherwise
    """

    def has_group_id(s):
        return 'metadata' in s and 'rax:auto_scaling_group_id' in s['metadata']

    def group_id(s):
        return s['metadata']['rax:auto_scaling_group_id']

    server_predicate = server_predicate if server_predicate is not None else lambda s: s
    servers_apply = compose(groupby(group_id), filter(server_predicate), filter(has_group_id))

    d = get_all_server_details(tenant_id, authenticator, service_name, region, clock=clock)
    d.addCallback(servers_apply)
    return d
Пример #43
0
def get_facet_items_dict(facet, limit=None, exclude_active=False, sort_key='count'):
    '''
    Monkey-Patch of ckan/lib/helpers/get_facet_items_dict()
    CKANs sorting of facet items is hardcoded (https://github.com/ckan/ckan/issues/3271)
    Also: refactored to be a bit more functional (SCNR)
    '''

    try:
        f = c.search_facets.get(facet)['items']
    except:
        return []

    def active(facet_item):
        if not (facet, facet_item['name']) in tk.request.params.items():
            return dict(active=False, **facet_item)
        elif not exclude_active:
            return dict(active=True, **facet_item)

    def sort_facet(f):
        key = 'count'
        names = map(lambda i: i['name'], f)
        if sort_key == 'name' and any(map(str_to_int, names)):
            key = 'name'
        return sorted(f, key=lambda item: item[key], reverse=True)

    # for some reason limit is not in scope here, so it must be a param
    def set_limit(facs, limit):
        if c.search_facets_limits and limit is None:
            limit = c.search_facets_limits.get(facet)
        # zero treated as infinite for hysterical raisins
        if limit is not None and limit > 0:
            return facs[:limit]
        return facs

    filter_empty_name = partial(filter, lambda i: len(i['name'].strip()) > 0)
    isdict = partial(filter, lambda i: isinstance(i, dict))
    facets = compose(sort_facet, isdict, partial(map, active), filter_empty_name)(f)
    return set_limit(facets, limit)
Пример #44
0
    def _find_matching_event_abi(cls, event_name=None, argument_names=None):
        filters = [
            functools.partial(filter_by_type, 'event'),
        ]

        if event_name is not None:
            filters.append(functools.partial(filter_by_name, event_name))

        if argument_names is not None:
            filters.append(
                functools.partial(filter_by_argument_name, argument_names)
            )

        filter_fn = compose(*filters)

        event_abi_candidates = filter_fn(cls.abi)

        if len(event_abi_candidates) == 1:
            return event_abi_candidates[0]
        elif not event_abi_candidates:
            raise ValueError("No matching functions found")
        else:
            raise ValueError("Multiple functions found")
Пример #45
0
def get_all_scaling_group_servers(changes_since=None,
                                  server_predicate=identity):
    """
    Return tenant's servers that belong to any scaling group as
    {group_id: [server1, server2]} ``dict``. No specific ordering is guaranteed

    :param datetime changes_since: Get server since this time. Must be UTC
    :param server_predicate: function of server -> bool that determines whether
        the server should be included in the result.
    :return: dict mapping group IDs to lists of Nova servers.
    """

    def has_group_id(s):
        return 'metadata' in s and isinstance(s['metadata'], dict)

    def group_id(s):
        return group_id_from_metadata(s['metadata'])

    servers_apply = compose(keyfilter(lambda k: k is not None),
                            groupby(group_id),
                            filter(server_predicate),
                            filter(has_group_id))

    return get_all_server_details(changes_since).on(servers_apply)
Пример #46
0
        data = json.dumps(data)
    return get_request(method, url, data=data, **kwargs).on(
        success=lambda r: (r[0], json.loads(r[1])))


def content_request(effect):
    """Only return the content part of a response."""
    return effect.on(success=lambda r: r[1])


_request = wrappers(
    get_request,
    request_with_auth,
    request_with_status_check,
    request_with_json)
_request = compose(content_request, _request)


def request(method, url, *args, **kwargs):
    """
    Make an HTTP request, with a number of conveniences. Accepts the same
    arguments as :class:`Request`, in addition to these:

    :param tuple success_codes: integer HTTP codes to accept as successful
    :param data: python object, to be encoded with json
    :param get_auth_headers: a function to retrieve auth tokens
    :param refresh_auth_info: a function to refresh the auth cache
    :param tuple reauth_codes: integer HTTP codes upon which to reauthenticate
    """
    return _request(method, url, *args, **kwargs)
Пример #47
0
 def setUp(self):
     self.now = datetime(2010, 5, 31)
     self.freeze = compose(set, map(freeze))
Пример #48
0
import sys
from toolz.functoolz import compose
from toolz.itertoolz import map, zip, second, drop, nth, iterate, first
from functools import partial
import itertools
import sh
''' samtools mpileup -cf ref.fasta hu.bam -g | bcftools view  -'''

'''
use this to create consensus, and bioframes.py to create the VCF comparison
#NOTE: freebayes requires ALL reads to be tagged with an RG, which requires a slight change to
# tagreads.py:  https://github.com/VDBWRAIR/ngs_mapper/blob/9523d32effd268543611b60758991a99373a65f5/ngs_mapper/tagreads.py#L56-L59
'''
''' how to handle file globbing and multiple dirs at once? And: a compiled report as final task target?'''
AMBIGUITY_TABLE = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'N': 'N', 'AC': 'M', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'CT': 'Y', 'GT': 'K', 'ACG': 'V', 'ACT': 'H', 'AGT': 'D', 'CGT': 'B', 'ACGT': 'N' }
get_degen = compose(AMBIGUITY_TABLE.__getitem__, ''.join, sorted)
insert_gap = lambda s, x: s[:x]+ '-' + s[x+1:]
from operator import methodcaller as call, attrgetter
def make_dict(classes):
    return dict(zip(map(call('__name__'), classes, classes)))
#TODO: fix ambiguous base definition
#TODO: this doesn't work for flu sequences which have multiple sections, \
#    and the  VCFs have multiple references. need to group by reference
def fix_fb_df(df):
    #Freebayes only ever reports one ALT?
    df.ALT = df.ALT.apply(lambda x: x[0])
    # the vcf library reports alts as _Substitution/whatever objects. extract the string.
    df.REF, df.ALT = df.REF.apply(str), df.ALT.apply(str)
    '''#TODO: this re-definition of ambiguous bases translates mult-base
    sections (e.g. AC) into single base alts or something'''
    ambiguous = ((df.AO / df.DP.apply(float)) < 0.8)
Пример #49
0
def zero_coverage_positions(bam_file, ref_file):
    pileup = sh.samtools('mpileup', bam_file, f=ref_file, _iter=True)
    return map(compose(int, second, unicode.split), pileup)
Пример #50
0
 def factory(cls, *args, **kwargs):
     return compose(cls, Contract.factory(*args, **kwargs))
Пример #51
0
"""
Format logs based on specification
"""
import json
import math

from toolz.curried import assoc
from toolz.dicttoolz import keyfilter
from toolz.functoolz import compose, curry

from twisted.python.failure import Failure

from otter.log.formatters import LoggingEncoder


_json_len = compose(len, curry(json.dumps, cls=LoggingEncoder))

# Maximum length of entire JSON-formatted event dictionary
event_max_length = 50000


def split_execute_convergence(event, max_length=event_max_length):
    """
    Try to split execute-convergence event out into multiple events if there
    are too many CLB nodes, too many servers, or too many steps.

    The problem is mainly the servers, since they take up the most space.

    Experimentally determined that probably logs cut off at around 75k,
    characters - we're going to limit it to 50k.
Пример #52
0
def uppercase_first_letter_compose(word):

    comp = functoolz.compose(get(0), lambda c: c.upper())
    return comp(word)
def printable_train_data(malform_data_dir,
                         okform_data_dir,
                         ids,
                         extractor, feature_names,
                         start, end=None,
                         title_transform_func=make_capitalized_title,
                         exclude_labels=None,
                         exclude_word_positions=set([0])):
    """

    Adapted to PULS requirement:
    
    - auxil file is read to get the additional prepreocessed features

    Parameters
    ------------
    malform_data_dir: string
        the directory where the malformed data reside
    okform_data_dir: string
        the directory where the correctly formed data reside
    ids: list of string
        document ids
    extractor: FeatureExtractor
        the feature extractor
    feature_names: list of string
        the feature names
    start, end: int
        how many titles to extract
    title_transform_func: function
        funtion that accepts the title and transforms it
        into some badly capitalized version
    exclude_labels: iterable of str
        labels that we don't consider

    Returns
    ------------
    Generator of str:
        each str is one sentence, each line in the str is one token in the sent
        
    """
    feature_names += ['y']  # add the label feature name
    malform_data_dir = Path(malform_data_dir)

    # take care of this ["tickerSymbol",["NYSE","SKT"]]
    # /cs/taatto/home/hxiao/capitalization-recovery/corpus/puls-format-capitalized/3987E0BD03749C996A04B881079AD753.auxil
    clean_tag = (lambda t: t[0] if isinstance(t, list) else t)
    get_tokens = partial(map, partial(get_in, ['token']))
    get_tags = partial(map, compose(clean_tag,
                                    partial(get_in, ['pos'])))
    get_lemmas = partial(map, partial(get_in, ['lemma']))

    n_collected = 0

    for i, id_ in enumerate(ids):
        if i < start:
            continue
            
        if i % 1000 == 0:
            logger.info("Collected %d" % n_collected)
            logger.info("Finished %d" % i)

        if end is not None and i >= end:
            logger.info("Reached %d. Terminate." % end)
            break

        try:
            malform_auxil_path = (malform_data_dir /
                                  Path(id_)).with_suffix('.auxil')
            with malform_auxil_path.open(encoding='utf8') as f:
                logger.debug('processing: {}'.format(id_))
                # to get the last line
                lines = f.readlines()
                if len(lines) == 0:
                    raise EmptyFileError('auxil file empty: {}'.format(malform_auxil_path))

                l = lines[-1]
                    
                data = json.loads(l.strip())

                okform_auxil_path = str((okform_data_dir /
                                         Path(id_)).with_suffix('.auxil'))
                okform_paf_path = str((okform_data_dir /
                                       Path(id_)).with_suffix('.paf'))

                good_title_sents, body_sents = separate_title_from_body(
                    okform_auxil_path,
                    okform_paf_path
                )

                # extract the tokens
                doc = [[t['token'] for t in sent['features']]
                       for sent in body_sents]

                good_title_sents = list(good_title_sents)

                bad_title_sents = data['sents']
                if not isinstance(bad_title_sents, list):
                    raise InvalidTitleError(
                        'bad_title_sents not a list: {}'.format(
                            bad_title_sents)
                    )

                # we only consider headline that contains only ONE sentence
                if (len(good_title_sents) == 1 and
                    len(bad_title_sents) == 1):
                    good_sent = good_title_sents[0]
                    bad_sent = bad_title_sents[0]
                    good_title_tokens = get_tokens(good_sent['features'])
                    bad_title_tokens = get_tokens(bad_sent['features'])

                    # some validity checking
                    if len(good_title_tokens) != len(bad_title_tokens):
                        raise TitleInconsistencyError('{}\n{}'.format(
                            good_title_tokens, bad_title_tokens)
                        )

                    good_title_tokens_lower = map(lambda s: s.lower(),
                                                  good_title_tokens)
                    bad_title_tokens_lower = map(lambda s: s.lower(),
                                                 bad_title_tokens)
                    if (good_title_tokens_lower != bad_title_tokens_lower):
                            raise TitleInconsistencyError('{}\n{}'.format(
                                good_title_tokens_lower,
                                bad_title_tokens_lower)
                            )

                    tags = get_tags(bad_sent['features'])
                    lemmas = get_lemmas(bad_sent['features'])

                    # tag validity checking
                    for tag in tags:
                        if not (tag is None or
                                isinstance(tag, basestring)):
                            raise InvalidTitleError(
                                '{}: tag {} not string'.format(id_, tag)
                            )

                    # get malformed title tokens
                    words = convert_to_trainable_format(
                        good_title_tokens,
                        title_transform_func,
                        extractor,
                        doc=doc,
                        pos=tags,
                        lemma=lemmas
                    )

                    # format the features in the required form
                    res = unicode()
                    for i, word in enumerate(words):
                        if (i not in exclude_word_positions
                            and exclude_labels
                            and word['y'] not in exclude_labels):
                            word_feature_str = u'\t'.join(
                                [unicode(word[feature_name])
                                 for feature_name in feature_names])
                            res += word_feature_str + '\n'
                    n_collected += 1
                    yield id_, res
                else:
                    raise TitleInconsistencyError(
                        '# of title sentences more than 1: {}'.format(id_)
                    )
        except (IOError, TitleInconsistencyError,
                InvalidTitleError, EmptyFileError):
            logger.debug(traceback.format_exc())
            continue
        except:
            logger.error(traceback.format_exc())
            continue
Пример #54
0
#!/usr/bin/env python3
"""
File utilities
"""
from toolz import functoolz
import operator
import numpy as np
from .NumPy import numpy_resize_insert

_strip_newline = lambda s: s.strip("\n")
__standard_isline = functoolz.compose(bool, str.strip)
# Utility to get nth CSV column
_csv_nthcol = lambda n: functoolz.compose(operator.itemgetter(n), lambda s: s.partition(','))
_csv_firstcol = _csv_nthcol(0)

def count_lines(flo, isline=__standard_isline):
    """
    Count the lines in a file.

    Takes a file-like object. Strings are treated as filenames.
    Returns the number of lines.
    """
    # Open it if it is a string
    if isinstance(flo, str):
        with open(flo, "r") as infile:
            return count_lines(infile, isline=isline)
    # Actual counting code
    num_lines = 0
    for line in flo:
        num_lines += 1 if isline(line) else 0
    return num_lines
Пример #55
0
 def test_apply_composed(self):
     "Test apply on functoolz composed function"
     cg = overlapping_chunks(self.data1, 3, 3)
     cg.apply(functoolz.compose(functoolz.identity, np.square))
     assert_array_equal(cg.as_array(), np.square(self.result1))