def __init__(self, min_df=2, max_per=1.0, binarize=False, transform=None, replace_num='#', source=None, subdir=None, pseudotype=None, splits_file=None, stage='training'): name = 'property' prefix = '_p_' FeatureExtractorCounts.__init__(self, name=name, prefix=prefix, min_df=min_df, max_per=max_per, binarize=binarize, transform=transform, source=source, subdir=subdir, pseudotype=pseudotype, splits_file=splits_file, replace_num=replace_num, stage=stage)
def __init__(self, test_fold=0, dev_subfold=None): #print "Creating from arguments" name = 'dataset' prefix = '_d_' FeatureExtractorCounts.__init__(self, name, prefix, add_oov=True, min_doc_threshold=1, binarize=True, test_fold=test_fold, dev_subfold=dev_subfold)
def __init__(self, n=1, min_df=2, max_per=1.0, binarize=False, transform=None, replace_num='#', source=None, subdir=None, pseudotype=None, splits_file=None, stage='training', lower=True, scale_factor=None): name = 'ngrams' self.n = int(n) prefix = '_n' + str(n) + '_' suffix = ',n=' + str(self.n) FeatureExtractorCounts.__init__(self, name=name, prefix=prefix, min_df=min_df, max_per=max_per, binarize=binarize, transform=transform, source=source, subdir=subdir, pseudotype=pseudotype, splits_file=splits_file, replace_num=replace_num, stage=stage, suffix=suffix, lower=lower, scale_factor=scale_factor)
def __init__(self, test_fold=0, dev_subfold=None, binarize=False, clusters=''): #print "Creating from arguments" name = 'brownclusters' prefix = '_bc-' + clusters + '_' FeatureExtractorCounts.__init__(self, name, prefix, add_oov=True, min_doc_threshold=1, binarize=binarize, test_fold=test_fold, dev_subfold=dev_subfold) self.params['clusters'] = clusters FeatureExtractorCountsBrownClusters.extend_dirname(self)
def __init__(self, test_fold=0, dev_subfold=None, n=1, min_doc_threshold=1, binarize=True, concat_oov_counts=False, append_dataset=False, source='normalized'): #print "Creating from arguments" name = 'ngrams' prefix = '_n' + str(n) + '_' FeatureExtractorCounts.__init__(self, name, prefix, add_oov=True, min_doc_threshold=min_doc_threshold, binarize=binarize, test_fold=test_fold, dev_subfold=dev_subfold) self.params['n'] = int(n) self.params['concat_oov_counts'] = ast.literal_eval(str(concat_oov_counts)) self.params['append_dataset'] = ast.literal_eval(str(append_dataset)) self.params['source'] = source FeatureExtractorCountsNgrams.extend_dirname(self)
def __init__(self, min_df=2, max_per=1.0, binarize=True, transform=None, replace_num='#', subdir=None, source=None, splits_file=None, pseudotype=None, stage='training', lower=True, scale_factor=None, shorten=None): name = 'list' prefix = '_l_' if shorten is not None: self.shorten = int(shorten) if subdir == 'brown': replace_num = None suffix = '' if shorten is not None: suffix += ',shorten=' + str(self.shorten) FeatureExtractorCounts.__init__(self, name=name, prefix=prefix, min_df=min_df, max_per=max_per, binarize=binarize, transform=transform, replace_num=replace_num, source=source, subdir=subdir, pseudotype=pseudotype, splits_file=splits_file, stage=stage, lower=lower, suffix=suffix, scale_factor=scale_factor)