Пример #1
0
def main():
    fileName = arsutils.getRemoveCmdArg("-file")
    zipCode = arsutils.getRemoveCmdArg("-reverseZip")
    if None == fileName and None == zipCode:
        usage()
        sys.exit(0)

    if 1 != len(sys.argv):
        usage()
        sys.exit(0)

    if None != zipCode:
        retrieveYpReverseAreaCode(zipCode)
        return

    fo = open(fileName, "rb")
    htmlTxt = fo.read()
    fo.close()
    (resultType, resultBody) = reversePhoneLookup(htmlTxt, fileName, fDebug=True)

    if MODULE_DOWN == resultType:
        print "module down"
    if PARSING_FAILED == resultType:
        print "parsing failed"
    if RESULTS_DATA == resultType:
        print "got BOXOFFICE"
        print udfPrettyPrint(resultBody)
Пример #2
0
def main():
    itemId = arsutils.getRemoveCmdArg("-item")
    if None == itemId:
        itemId = arsutils.getRemoveCmdArg("--item")
    if itemId:
        (resultType, resultBody) = getLyricsItem(itemId, None, dbgLevel=1)
    else:
        if 2 != len(sys.argv):
            usage()
            sys.exit(0)

        arg = sys.argv[1]
        print "arg=%s" % arg
        argParts = arg.split(";")
        if 5 != len(argParts):
            print "len(argParts) = %d, not 5" % len(argParts)
            usage()
            sys.exit(0)

        artist,title,album,composer,fullText = argParts

        (resultType, resultBody) = getLyricsSearch(artist, title, album, composer, fullText, None, dbgLevel=1)

    if MODULE_DOWN == resultType:
        print "module down"
    if UNKNOWN_FORMAT == resultType:
        print "unknown format"
    if NO_RESULTS == resultType:
        print "no results"
    if LYRICS_ITEM == resultType:
        print "got LYRICS_ITEM"
        #print udfPrettyPrint(resultBody)
    if LYRICS_SEARCH == resultType:
        print "got LYRICS_SEARCH"
        print udfPrettyPrint(resultBody)
Пример #3
0
def main():
    global g_dbName
    dbs = iPediaServer.getIpediaDbList()
    if 0==len(dbs):
        print "No databases available"

    dbs.sort()

    fListDbs = arsutils.fDetectRemoveCmdFlag("-listdbs")
    if fListDbs:
        for dbName in dbs:
            print dbName
        sys.exit(0)

    dbName=arsutils.getRemoveCmdArg("-db")

    if dbName:
        if dbName in dbs:
            print "Using database '%s'" % dbName
        else:
            print "Database '%s' doesn't exist" % dbName
            print "Available databases:"
            for name in dbs:
                print "  %s" % name
            sys.exit(0)
    else: 
        dbName=dbs[-1] # use the latest database

    print "Using database '%s'" % dbName
    g_dbName = dbName
    validateRedirects()
    closeConn()
Пример #4
0
def main():
    global g_dbName
    dbs = iPediaServer.getIpediaDbList()
    if 0 == len(dbs):
        print "No databases available"

    dbs.sort()

    fListDbs = arsutils.fDetectRemoveCmdFlag("-listdbs")
    if fListDbs:
        for dbName in dbs:
            print dbName
        sys.exit(0)

    dbName = arsutils.getRemoveCmdArg("-db")

    if dbName:
        if dbName in dbs:
            print "Using database '%s'" % dbName
        else:
            print "Database '%s' doesn't exist" % dbName
            print "Available databases:"
            for name in dbs:
                print "  %s" % name
            sys.exit(0)
    else:
        dbName = dbs[-1]  # use the latest database

    print "Using database '%s'" % dbName
    g_dbName = dbName
    validateRedirects()
    closeConn()
Пример #5
0
def main():
    # _test()
    # return
    spider = fDetectRemoveCmdFlag("-spider")
    reindex = fDetectRemoveCmdFlag("-reindex")
    update = fDetectRemoveCmdFlag("-update")
    force = fDetectRemoveCmdFlag("-force")
    test_query = getRemoveCmdArg("-test")
    # undocumented flags just for testing ;)
    backup = fDetectRemoveCmdFlag("--backup-index")
    use_old_index = fDetectRemoveCmdFlag("--update-old-index")
    update_index = fDetectRemoveCmdFlag("--force-index-update")
    bump_version = fDetectRemoveCmdFlag("--bump_version")
    if test_query:
        formats = FORMATS_ALL
        if -1 != test_query.find(";"):
            test_query, formats = [s.strip() for s in test_query.split(";", 1)]
        type = SEARCH_ANY
        if -1 != test_query.find(":"):
            t, q = [s.strip() for s in test_query.split(":", 1)]
            if t in SEARCH_TYPES:
                type, test_query = t, q
        for doc in _find_proxy(test_query, formats, type):
            d = _doc_to_tuple(doc)
            if 0 != len(d[1]):
                print "%s, \"%s (%s)\"" % (d[2], d[0], d[1])
            else:
                print "%s, \"%s\"" % (d[2], d[0])
        return
    if spider and reindex:
        update = True
    if update:
        update_all(force, force_index_update = update_index, update_old_index = use_old_index )
        return
    if spider:
        spider_all(force)
        return
    if reindex:
        reindex_all(use_old_index )
        return
    if backup:
        _backup_index()
        return
    if bump_version:
        _bump_version()
        return

    print """
usage: ebooks.py (-spider | -reindex | -update | -test "phrase") [-force]
Options:
    -spider  - performs only incremental spidering of all data
    -reindex - only reindexes existing spidered data.
    -update  - performs spidering and reindexing (only if fresh data was
               spidered).
    -force   - (only with -spider or -update) discards previously spidered data
               prior to spidering.
    -test    - tests searching in the index.
"""
    return 1
Пример #6
0
def main():
    itemId = arsutils.getRemoveCmdArg("-item")
    if None == itemId:
        itemId = arsutils.getRemoveCmdArg("--item")
    if itemId:
        (resultType, resultBody) = getLyricsItem(itemId, None, dbgLevel=1)
    else:
        if 2 != len(sys.argv):
            usage()
            sys.exit(0)

        arg = sys.argv[1]
        print "arg=%s" % arg
        argParts = arg.split(";")
        if 5 != len(argParts):
            print "len(argParts) = %d, not 5" % len(argParts)
            usage()
            sys.exit(0)

        artist, title, album, composer, fullText = argParts

        (resultType, resultBody) = getLyricsSearch(artist,
                                                   title,
                                                   album,
                                                   composer,
                                                   fullText,
                                                   None,
                                                   dbgLevel=1)

    if MODULE_DOWN == resultType:
        print "module down"
    if UNKNOWN_FORMAT == resultType:
        print "unknown format"
    if NO_RESULTS == resultType:
        print "no results"
    if LYRICS_ITEM == resultType:
        print "got LYRICS_ITEM"
        #print udfPrettyPrint(resultBody)
    if LYRICS_SEARCH == resultType:
        print "got LYRICS_SEARCH"
        print udfPrettyPrint(resultBody)
Пример #7
0
def main():
    fDumpCache = arsutils.fDetectRemoveCmdFlag("-dump-cache")
    if not fDumpCache:
        fDumpCache = arsutils.fDetectRemoveCmdFlag("--dump-cache")

    if fDumpCache:
        dumpCache()
        sys.exit(0)

    fileName = arsutils.getRemoveCmdArg("-file")

    if None != fileName:
        if 1 != len(sys.argv):
            usage()
            print sys.argv
            sys.exit(0)
        fo = open(fileName, "rb")
        htmlTxt = fo.read()
        fo.close()
        (resultType, resultBody) = parseGas(htmlTxt, url=fileName, dbgLevel=1)
    else:
        if 2 != len(sys.argv):
            usage()
            sys.exit(0)
        zipCode = sys.argv[1]
        print "zipCode: %s" % zipCode
        (resultType, resultBody) = getGasPricesForZip(cookielib.CookieJar(),
                                                      zipCode,
                                                      dbgLevel=1)

    if RETRIEVE_FAILED == resultType:
        print "retrieve failed"
    if PARSING_FAILED == resultType:
        htmlTxt = resultBody[1]
        #print htmlTxt
        print "parsing failed"
    if NO_RESULTS == resultType:
        print "no results"
    if LOCATION_UNKNOWN == resultType:
        print "location unknown"
    if GAS_DATA == resultType:
        print udfPrettyPrint(resultBody)
Пример #8
0
def main():
    fDumpCache = arsutils.fDetectRemoveCmdFlag("-dump-cache")
    if not fDumpCache:
        fDumpCache = arsutils.fDetectRemoveCmdFlag("--dump-cache")

    if fDumpCache:
        dumpCache()
        sys.exit(0)

    fileName = arsutils.getRemoveCmdArg("-file")


    if None != fileName:
        if 1 != len(sys.argv):
            usage()
            print sys.argv
            sys.exit(0)
        fo = open(fileName, "rb")
        htmlTxt = fo.read()
        fo.close()
        (resultType, resultBody) = parseGas(htmlTxt, url=fileName, dbgLevel=1)
    else:
        if 2 != len(sys.argv):
            usage()
            sys.exit(0)
        zipCode = sys.argv[1]
        print "zipCode: %s" % zipCode
        (resultType, resultBody) = getGasPricesForZip(cookielib.CookieJar(), zipCode, dbgLevel=1)

    if RETRIEVE_FAILED == resultType:
        print "retrieve failed"
    if PARSING_FAILED == resultType:
        htmlTxt = resultBody[1]
        #print htmlTxt
        print "parsing failed"
    if NO_RESULTS == resultType:
        print "no results"
    if LOCATION_UNKNOWN == resultType:
        print "location unknown"
    if GAS_DATA == resultType:
        print udfPrettyPrint(resultBody)
Пример #9
0
    elif "KJKLAP1" == computerName:
        g_serverToUse = "kjk"
    elif "TLAP" == computerName:
        g_serverToUse = "kjk"
    elif "RABBAN" == computerName:
        g_serverToUse = "andrzej"
    elif "GIZMO" == computerName:
        g_serverToUse = "pc"
    else:
        print "Don't know the server for computer %s" % computerName
        sys.exit(0)


if __name__ == "__main__":
    serverToUse = None
    serverToUse = arsutils.getRemoveCmdArg("-server")
    if None == serverToUse:
        detectAndSetServerToUse()
    else:
        if not g_serverList.has_key(serverToUse):
            print "server %s is not known. Known servers:" % serverToUse
            for serverName in g_serverList.keys():
                print "   %s" % serverName
            sys.exit(0)
        else:
            g_serverToUse = serverToUse

    if arsutils.fDetectRemoveCmdFlag("-raw"):
        g_showRawOutput = True

    printUsedServer()
Пример #10
0
        txt = article.getTxt()
        converted = articleconvert.convertArticle(title,txt)
        print "TITLE: %s" % title
        print "ORIGINAL: %s" % txt
        print "CONVERTED: %s" % converted
        return

if __name__=="__main__":
    fRandom = arsutils.fDetectRemoveCmdFlag("-random")
    fDump = arsutils.fDetectRemoveCmdFlag("-dump")

    if fDump and fRandom:
        print "Can't use -dump and -random at the same time"
        usageAndExit()

    title = arsutils.getRemoveCmdArg("-title")

    fSave = arsutils.fDetectRemoveCmdFlag("-save")
    fForceConvert = arsutils.fDetectRemoveCmdFlag("-forceconvert")

    if title and fRandom:
        print "Can't use -title and -random at the same time"
        usageAndExit()

    if not title and not fRandom:
        print "Have to provide either -title or -random"
        usageAndExit()

    # now we should only have file name
    if len(sys.argv) != 2:
        print "Have to provide *.sql or *.sql.bz2 file name with wikipedia dump"
Пример #11
0
        if count % 20000 == 0:
            print "processed %d articles, found %d small" % (count,len(articles))
    print "Articles without comma in orig: %d" % countNoComma
    avgSize = float(totalSizeNoComma)/float(countNoComma)
    print "Average size: %.2f" % avgSize
    return articles

def dumpArticles(fileName,articles):
    fo = open(fileName, "wb")
    for article in articles:
        fo.write("!'%s'\n" % article.getTitle().strip())
        fo.write("'%s'\n" % (article.getText().strip()) )
    fo.close()

if __name__=="__main__":
    size = arsutils.getRemoveCmdArg("-size")
    if None == size:
        size = DEFAULT_SIZE
    else:
        size = int(size)

    # now we should only have file name
    if len(sys.argv) != 2:
        print "Have to provide *.sql or *.sql.bz2 file name with wikipedia dump"
        usageAndExit()
    fileName = sys.argv[1]
    fileNameOutConv = wikipediasql.getSmallConverterFileName(fileName)
    fileNameOutOrig = wikipediasql.getSmallOrigFileName(fileName)

    articles = findConvertedArticlesUnderThreshold(fileName,size)
    dumpArticles(fileNameOutConv,articles)
Пример #12
0
    print "Articles without comma in orig: %d" % countNoComma
    avgSize = float(totalSizeNoComma) / float(countNoComma)
    print "Average size: %.2f" % avgSize
    return articles


def dumpArticles(fileName, articles):
    fo = open(fileName, "wb")
    for article in articles:
        fo.write("!'%s'\n" % article.getTitle().strip())
        fo.write("'%s'\n" % (article.getText().strip()))
    fo.close()


if __name__ == "__main__":
    size = arsutils.getRemoveCmdArg("-size")
    if None == size:
        size = DEFAULT_SIZE
    else:
        size = int(size)

    # now we should only have file name
    if len(sys.argv) != 2:
        print "Have to provide *.sql or *.sql.bz2 file name with wikipedia dump"
        usageAndExit()
    fileName = sys.argv[1]
    fileNameOutConv = wikipediasql.getSmallConverterFileName(fileName)
    fileNameOutOrig = wikipediasql.getSmallOrigFileName(fileName)

    articles = findConvertedArticlesUnderThreshold(fileName, size)
    dumpArticles(fileNameOutConv, articles)
Пример #13
0
        converted = articleconvert.convertArticle(title, txt)
        print "TITLE: %s" % title
        print "ORIGINAL: %s" % txt
        print "CONVERTED: %s" % converted
        return


if __name__ == "__main__":
    fRandom = arsutils.fDetectRemoveCmdFlag("-random")
    fDump = arsutils.fDetectRemoveCmdFlag("-dump")

    if fDump and fRandom:
        print "Can't use -dump and -random at the same time"
        usageAndExit()

    title = arsutils.getRemoveCmdArg("-title")

    fSave = arsutils.fDetectRemoveCmdFlag("-save")
    fForceConvert = arsutils.fDetectRemoveCmdFlag("-forceconvert")

    if title and fRandom:
        print "Can't use -title and -random at the same time"
        usageAndExit()

    if not title and not fRandom:
        print "Have to provide either -title or -random"
        usageAndExit()

    # now we should only have file name
    if len(sys.argv) != 2:
        print "Have to provide *.sql or *.sql.bz2 file name with wikipedia dump"
Пример #14
0
        g_serverToUse = "kjk"
    elif "KJKLAP1" == computerName:
        g_serverToUse = "kjk"
    elif "TLAP" == computerName:
        g_serverToUse = "kjk"
    elif "RABBAN" == computerName:
        g_serverToUse = "andrzej"
    elif "GIZMO" == computerName:
        g_serverToUse = "pc"
    else:
        print "Don't know the server for computer %s" % computerName
        sys.exit(0)

if __name__=="__main__":
    serverToUse = None
    serverToUse = arsutils.getRemoveCmdArg("-server")
    if None == serverToUse:
        detectAndSetServerToUse()
    else:
        if not g_serverList.has_key(serverToUse):
            print "server %s is not known. Known servers:" % serverToUse
            for serverName in g_serverList.keys():
                print "   %s" % serverName
            sys.exit(0)
        else:
            g_serverToUse = serverToUse

    if arsutils.fDetectRemoveCmdFlag("-raw"):
        g_showRawOutput = True

    printUsedServer()