Пример #1
0
########################### SCRIPT ########################################

if ',' in fileName:
    fileNames = fileName.split(',')
else:
    fileNames = [fileName]

globalIndx = 0

for fileName in fileNames:
    for listNum in range(numLists):
        print "LIST", listNum

        #read the file
        articles, titles, identifiers, downloaded_articles = loadFile(
            fileName + '.' + str(listNum))

        #need this information only once (original articles)
        if listNum == 0:
            ARTICLES = articles
            TITLES = titles
            IDENTIFIERS = identifiers
            DOWNLOADED_ARTICLES = [[] for q in range(len(ARTICLES))]

            #calculate the context dictionary

            vectorizer1, vectorizer2 = getContextDictionary(articles)

#now to store everything
pickle.dump([vectorizer1, vectorizer2], open(sys.argv[4], "wb"))
########################### SCRIPT ########################################

if ',' in fileName:
    fileNames = fileName.split(',')
else:
    fileNames = [fileName]

globalIndx = 0

for fileName in fileNames:
    for listNum in range(numLists):
        print "LIST", listNum

        #read the file
        articles, titles, identifiers, downloaded_articles = loadFile(fileName+'.'+str(listNum))
        print "LEN ARTICLES", len(articles)
        print "final LEN IDENTIFIERS", len(IDENTIFIERS)

        #need this information only once (original articles)
        if listNum==0:
            ARTICLES = articles
            TITLES = titles
            IDENTIFIERS = identifiers
            DOWNLOADED_ARTICLES = [[] for q in range(len(ARTICLES))]

            #calculate the context dictionary
            if not vectorizer1:
                vectorizer1, vectorizer2 = getContextDictionary(articles)

        for indx in range(len(articles)):