示例#1
0
    def __init__(self, *args: any, shuffle=False, **kwargs):
        super().__init__(**kwargs)
        self.options: List[any] = [*args]
        self.working_copy: List[any] = [*self.options]
        self.shuffle = shuffle

        if self.shuffle:
            shuffler(self.working_copy)
    def __init__(self, classifierName, posFile, negFile):
        
        self._name = classifierName
        
        pos = pd.read_table(posFile,delimiter='\n',header=None, names=["text"] )
        pos['sentiment'] = 1 #1 for positive

        neg = pd.read_table(negFile,delimiter='\n',header=None, names=["text"] )
        neg['sentiment'] = 2 #2 for negative
        
        pos_words=[]
        for s in pos['text']:
           short_p_words.extend(word_tokenize(str(s)))

        neg_words=[]
        for s in neg['text']:
            neg_words.extend(word_tokenize(str(s)))

        all_words=[]
        for w in pos_words:
            all_words.append(w.lower())

        for w in neg_words:
            all_words.append(w.lower())

            
        all_words = nltk.FreqDist(all_words)

        self.word_features = list(all_words.keys())[:int(len(all_words)*0.8)]
        
        documents = pos.get_values()
        documents = np.concatenate((documents,neg.get_values()),axis=0)

        #shuffle the documents
        random.shuffler(documents)

        #prepare X and T, classification
        self.X = document[:,0:1]
        self.T = documents[:,1:2]
        
        if classifierName == 'NaiveBayesClassifier':
            self.classifier = nltk.NaiveBayesClassifier
        elif classifierName == 'MaxEntropy':
            classifier = nltk.MaxentClassifier
        elif classifierName == 'MultinomialNB':
            self.classifier = SklearnClassifier(MultinomialNB())
        elif classifierName == 'BernoulliNB':
            self.classifier = SklearnClassifier(BernoulliNB())
        elif classifierName == 'LogisticRegression':
            self.classifier = SklearnClassifier(LogisticRegression())
        elif classifierName == 'SGDClassifier':
            self.classifier = SklearnClassifier(SGDClassifier())
        elif classifierName == 'LinearSVC':
            self.classifier = SklearnClassifier(SGDClassifier())
        elif classifierName == 'NuSVC':
            self.classifier = SklearnClassifier(SGDClassifier())
        else:
            raise ValueError('Not a valid classifier name')
示例#3
0
    def exhausted(self) -> bool:
        result = len(self.options) == 0

        # auto-reset
        if result:
            self.working_copy = [*self.options]
            if self.shuffle:
                shuffler(self.working_copy)

        return result
示例#4
0
    def shuffle(self, value):
        """
        queue.shuffle(True) turns on shuffling for this queue. All tracks that havn't been played will now
        be shuffled the current playing track is not affected.

        queue.shuffle(False) turns off shuffling - this will now play all songs in order from the current
        song in their original order (even if already played).
        """

        if value is self._shuffle_tracks:
            return
        self._shuffle_tracks = value
        if value:
            q = collections.deque(self._urls)
            shuffler(q)
            self._queued = q
        else:
            # find current in urls
            i = self._urls.index(self.current)
            # set queue to all songs after current
            remaining_urls = self._urls[i + 1:]
            q = collections.deque(remaining_urls)
# from src.modules.classes import *
import src.modules.loss_funcs as lf
from src.modules.helper_functions import *
from src.modules.eval_funcs import *
import src.modules.reporting as rpt

# %% ENERGY DISTRIBUTIONS
particle = 'muon_neutrino'
dataset = get_project_root() + get_path_from_root(
    '/CubeML/data/oscnext-genie-level5-v01-01-pass2')

train, val, test = split_files_in_dataset(dataset, particle=particle)

# * Get random files
rand_train = np.arange(len(train))
shuffler(rand_train)
energy_train = []
n_in_file_train = []

rand_val = np.arange(len(val))
shuffler(rand_val)
energy_val = []
n_in_file_val = []

rand_test = np.arange(len(test))
shuffler(rand_test)
energy_test = []
n_in_file_test = []

n_wanted = 50000
key = 'raw/true_primary_time'
示例#6
0
 def _create_loop_queue(self):
     if self._loop_tracks is True:
         q = collections.deque(self._urls)
         if self._shuffle_tracks is True:
             shuffler(q)
         self._loop_queue = q