########################### SCRIPT ######################################## if ',' in fileName: fileNames = fileName.split(',') else: fileNames = [fileName] globalIndx = 0 for fileName in fileNames: for listNum in range(numLists): print "LIST", listNum #read the file articles, titles, identifiers, downloaded_articles = loadFile( fileName + '.' + str(listNum)) #need this information only once (original articles) if listNum == 0: ARTICLES = articles TITLES = titles IDENTIFIERS = identifiers DOWNLOADED_ARTICLES = [[] for q in range(len(ARTICLES))] #calculate the context dictionary vectorizer1, vectorizer2 = getContextDictionary(articles) #now to store everything pickle.dump([vectorizer1, vectorizer2], open(sys.argv[4], "wb"))
########################### SCRIPT ######################################## if ',' in fileName: fileNames = fileName.split(',') else: fileNames = [fileName] globalIndx = 0 for fileName in fileNames: for listNum in range(numLists): print "LIST", listNum #read the file articles, titles, identifiers, downloaded_articles = loadFile(fileName+'.'+str(listNum)) print "LEN ARTICLES", len(articles) print "final LEN IDENTIFIERS", len(IDENTIFIERS) #need this information only once (original articles) if listNum==0: ARTICLES = articles TITLES = titles IDENTIFIERS = identifiers DOWNLOADED_ARTICLES = [[] for q in range(len(ARTICLES))] #calculate the context dictionary if not vectorizer1: vectorizer1, vectorizer2 = getContextDictionary(articles) for indx in range(len(articles)):