Exemplo n.º 1
0
def generateFilterByRegex(regexStrL, convIdxL):
    """
    regexStrL :: [str]
        Regular expression string list.
    convIdxL :: [(str -> ANY), int]
        Column index list with converter.
        converters will be ignored.
        Specify 1 for first element.
        0 means all columns.
    separator :: str
        Separator string of columns in a line.
    return :: str -> bool
        Filter function.

    """
    regexL = map(re.compile, regexStrL)
    idxL = map(lambda (_, idx): idx, convIdxL)
    project1 = pysows.generateProject(idxL)

    def filterByRegex(rec):
        """
        rec :: tuple(str)
        return :: bool

        """
        rec1 = project1(rec)
        if len(regexL) < len(rec1):
            raise IOError("key length %d > number of regex %d." % (len(rec1), len(regexL)))

        predicate = lambda (regex, x): regex.match(x) is not None
        return all(map(predicate, zip(regexL, rec1)))

    return filterByRegex
Exemplo n.º 2
0
def doMain():
    args = parseOpts(sys.argv[1:])

    outColumnIdxes = map(prefixToIsLeft,
                         getColumnIndexListWithPrefix(args.out_columns))
    getOutRec = generateGetOutputRecord(outColumnIdxes)

    lReader = pysows.recordReader(args.left_input, args.separator)
    rReader = pysows.recordReader(args.right_input, args.separator)

    lKeyIdxL, rKeyIdxL = getKeyIndexLists(args)
    lGetKey = pysows.generateProject(lKeyIdxL)
    rGetKey = pysows.generateProject(rKeyIdxL)

    hashTable = createHashTable(lReader, lGetKey)
    resultIter = hashJoin(hashTable, rReader, rGetKey)

    for lRec, rRec in resultIter:
        pysows.printList(getOutRec(lRec, rRec))
        print