示例#1
0
    def _getRecentColumnIdx(siteText, tableBeginIdx):
        # type: (str, int) -> int
        ths = getNextRowCells(siteText, tableBeginIdx, "th")

        recentColumnIdx = COLUMN_ID_NOT_FOUND

        for i, th in reversed(tuple(enumerate(ths))):
            if "~1000" in th or "~1,000" in th:
                recentColumnIdx = i

        return recentColumnIdx
示例#2
0
def _getOverallAndRecentColumnIdx(siteText, tableBeginIdx):
    ths = getNextRowCells(siteText, tableBeginIdx, "th")

    overallColumnIdx = COLUMN_ID_NOT_FOUND
    recentColumnIdx = COLUMN_ID_NOT_FOUND

    for i, th in reversed(tuple(enumerate(ths))):
        if "Общий" in th or "Overall" in th:
            overallColumnIdx = i
        if "~1000" in th or "~1,000" in th:
            recentColumnIdx = i

    assert overallColumnIdx != COLUMN_ID_NOT_FOUND, "No overall column found in %s" % ths

    return overallColumnIdx, recentColumnIdx
示例#3
0
def _getTrsWithData(siteText, tableBeginIdx):
    iterations = 0

    headerEndIdx = siteText.find("</tr>", tableBeginIdx)
    tableEndIdx = siteText.find("</table>", headerEndIdx)
    nextTrBeginIdx = headerEndIdx

    trs = list()

    while nextTrBeginIdx != -1 and nextTrBeginIdx < tableEndIdx:
        nowTrBeginIdx = nextTrBeginIdx

        tds = getNextRowCells(siteText, nowTrBeginIdx)
        trs.append(tds)

        nextTrBeginIdx = siteText.find("<tr", nowTrBeginIdx + 1)

        assert iterations < MAX_ITERATIONS, "Too many iterations: %s" % iterations
        iterations += 1

    return trs