#Percentage of total term features to kepp
feature_percent = 17



#----------------Begin Program--------------------------


#Corpus Data
from sklearn.datasets import fetch_rcv1
rcv1_info = fetch_rcv1()
sklearn_labelMatrix = rcv1_info.target.toarray()
sklearn_docIDs = rcv1_info.sample_id
rcv1_info = []
from tools.getRCV1V2 import getRCV1V2
rcv1_data = getRCV1V2("/Volumes/Files/Work/Research/Information Retrieval/1) Data/Reuters/RCV/RCV/RCV1-V2/Raw Data/", testset=0)

#Feature Extraction
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from nltk.corpus import stopwords
stop_words = stopwords.words("english")
import tools.cooccurence_main as cooccurence_main
from tools.text_processing import tokenize, get_TF, get_TFIDF, freqToProbability
import numpy

#Classification
from tools.CopulaClassifier import CopulaClassifier

#Evaluation
from sklearn.metrics import f1_score, precision_score, recall_score
coorelation_boost = 8
#Percentage of total term features to kepp
feature_percent = 4

#-------------------------- Begin Program --------------------------

#Corpus Data
from sklearn.datasets import fetch_rcv1
rcv1_info = fetch_rcv1()
sklearn_labelMatrix = rcv1_info.target.toarray()
sklearn_docIDs = rcv1_info.sample_id
rcv1_info = []
RCV1V2Path = "/Volumes/Files/Work/Research/Information Retrieval/1) Data/Reuters/RCV/RCV/RCV1-V2/Raw Data/"

from tools.getRCV1V2 import getRCV1V2
rcv1v2_data = getRCV1V2(RCV1V2Path, testset=1)

from tools.getRCV1 import getRCV1
RCV1Path = "/Volumes/Files/Work/Research/Information Retrieval/1) Data/Reuters/RCV/RCV/rcv1/Data/"
rcv1_data = getRCV1(RCV1Path, RCV1V2Path, testset=1)

#Feature Extraction
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from nltk.corpus import stopwords
stop_words = stopwords.words("english")
import tools.cooccurence_main as cooccurence_main
from tools.text_processing import tokenize, get_TF, get_TFIDF, freqToProbability
import numpy

#Classification
from tools.CopulaClassifier import CopulaClassifier