import os
from vectorize import Vectorizer

DATA_DIR = "../../data/movie/polarity_dataset_v1.0/tokens"

def inpYielder():
    root = DATA_DIR
    neg_dir = os.path.join(root, "neg")
    for fileName in os.listdir(neg_dir):
        print fileName
        f = open(os.path.join(neg_dir, fileName), "r")
        yield 0, f.read()
        f.close()
        
    pos_dir = os.path.join(root, "pos")
    for fileName in os.listdir(pos_dir):
        print fileName
        f = open(os.path.join(pos_dir, fileName), "r")
        yield 1, f.read()
        f.close()
    
vectorizer = Vectorizer(\
    "Bo Pang's movie data", \
    "Movie", \
    "polarity_dataset_v1.0", \
    ngram = 1,\
)

vectorizer.run(inpYielder)
import os.path
from csv import reader
from vectorize import Vectorizer

PATH = "../../data/movie/amazon"

def score_review_pairs():
    fi = reader(
        open(os.path.join(PATH, "processed_reviews.tsv"), "r"),
        delimiter="\t",
        quotechar="\""
    )
    for score, review in fi:
        yield int(score), review
        
vectorizer = Vectorizer(
    "Amazon dvd",
    "Movie", 
    "amazon", 
    ngram=3,
)

vectorizer.run(score_review_pairs)
import os.path
from csv import reader
from vectorize import Vectorizer

PATH = "../../data/movie/amazon"


def score_review_pairs():
    fi = reader(open(os.path.join(PATH, "processed_reviews.tsv"), "r"),
                delimiter="\t",
                quotechar="\"")
    for score, review in fi:
        yield int(score), review


vectorizer = Vectorizer(
    "Amazon dvd",
    "Movie",
    "amazon",
    ngram=3,
)

vectorizer.run(score_review_pairs)
import os
from vectorize import Vectorizer

DATA_DIR = "../../data/movie/polarity_dataset_v1.0/tokens"


def inpYielder():
    root = DATA_DIR
    neg_dir = os.path.join(root, "neg")
    for fileName in os.listdir(neg_dir):
        print fileName
        f = open(os.path.join(neg_dir, fileName), "r")
        yield 0, f.read()
        f.close()

    pos_dir = os.path.join(root, "pos")
    for fileName in os.listdir(pos_dir):
        print fileName
        f = open(os.path.join(pos_dir, fileName), "r")
        yield 1, f.read()
        f.close()

vectorizer = Vectorizer(\
    "Bo Pang's movie data", \
    "Movie", \
    "polarity_dataset_v1.0", \
    ngram = 1,\
)

vectorizer.run(inpYielder)