def setUpClass(self): cfg = Config() cfg.popcon_index = "test_data/.sample_pxi" cfg.popcon_dir = "test_data/popcon_dir" cfg.clusters_dir = "test_data/clusters_dir" cfg.popcon = 0 self.rec = Recommender()
def test_error_train_on_run_apprec(self): config = Config() strategy = config.strategy config.strategy = 'mlbva' training_path = MachineLearningData.MACHINE_LEARNING_TRAINING MachineLearningData.MACHINE_LEARNING_TRAINING = "error.txt" result = apprec.run() config.strategy = strategy MachineLearningData.MACHINE_LEARNING_TRAINING = training_path self.assertEqual(apprec.ERROR_TRAIN, result)
def reset(self, params, rep): if params['name'].startswith("content"): cfg = Config() # if the index was not built yet # app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi") cfg.axi = "data/AppAxi" cfg.index_mode = "old" cfg.weight = params['weight'] self.rec = Recommender(cfg) self.rec.set_strategy(params['strategy']) self.repo_size = self.rec.items_repository.get_doccount() self.user = LocalSystem() self.user.app_pkg_profile(self.rec.items_repository) self.sample_size = int( len(self.user.pkg_profile) * params['sample'])
def __init__(self): """ Set initial parameters. """ self.cfg = Config() # Load xapian indexes # self.axi_programs = xapian.Database(cfg.axi_programs) self.axi_desktopapps = xapian.Database(self.cfg.axi_desktopapps) if self.cfg.popcon: # self.popcon_programs = xapian.Database(cfg.popcon_programs) self.popcon_desktopapps = xapian.Database( self.cfg.popcon_desktopapps) # Load valid programs, desktopapps and tags # format: one package or tag name per line # self.valid_programs = [] self.valid_desktopapps = [] self.valid_tags = [] logging.info("Loading recommender filters") # with open(os.path.join(cfg.filters_dir,"programs")) as pkgs: # self.valid_programs = [line.strip() for line in pkgs # if not line.startswith("#")] with open(os.path.join(self.cfg.filters_dir, "desktopapps")) as pkgs: self.valid_desktopapps = [line.strip() for line in pkgs if not line.startswith("#")] with open(os.path.join(self.cfg.filters_dir, "debtags")) as tags: self.valid_tags = [line.strip() for line in tags if not line.startswith("#")] # Set xapian index weighting scheme if self.cfg.weight == "bm25": self.weight = xapian.BM25Weight(self.cfg.bm25_k1, self.cfg.bm25_k2, self.cfg.bm25_k3, self.cfg.bm25_b, self.cfg.bm25_nl) else: self.weight = xapian.TradWeight() self.set_strategy(self.cfg.strategy)
def load_summary(self): cfg = Config() if self.connect_to_dde(cfg.dde_server, cfg.dde_port): json_data = json.load(urllib.urlopen(cfg.dde_url % self.name)) self.summary = json_data['r']['description'] else: pkg_version = apt.Cache()[self.name].candidate self.summary = pkg_version.summary
def stopwords(self): if not self._stopwords: stopwords_path = Config().stopwords with open(stopwords_path, 'r') as stopwords: for word in stopwords: self._stopwords.add(word.strip()) return self._stopwords else: return self._stopwords
def setUpClass(self): cfg = Config() self.axi = xapian.Database(cfg.axi) packages = [ "gimp", "aaphoto", "eog", "emacs", "dia", "ferret", "festival", "file", "inkscape", "xpdf" ] path = "apprecommender/tests/test_data/.sample_axi" self.sample_axi = SampleAptXapianIndex(packages, self.axi, path) self.user = User({"gimp": 1, "aaphoto": 1, "eog": 1, "emacs": 1})
def __init__(self): self.axi = xapian.Database(MachineLearningData.XAPIAN_DATABASE_PATH) self.stemmer = Stemmer.Stemmer('english') valid_tags = [] with open(path.join(Config().filters_dir, "debtags")) as tags: valid_tags = [ line.strip() for line in tags if not line.startswith("#") ] self.filter_tag = FilterTag(valid_tags) self.filter_description = FilterDescription()
def get_user(self, no_auto_pkg_profile): config = Config() user = LocalSystem() user.filter_pkg_profile(os.path.join(config.filters_dir, "desktopapps")) user.maximal_pkg_profile() if no_auto_pkg_profile: user.no_auto_pkg_profile() return user
def run_apprecommender(options): try: recommendation_size = 20 no_auto_pkg_profile = True app_recommender = AppRecommender() app_recommender.make_recommendation(recommendation_size, no_auto_pkg_profile) return SUCCESS except xapian.DatabaseOpeningError: return ERROR_INIT except IOError: if "ml" in Config().strategy: return ERROR_TRAIN
def load(self): config = Config() short_options = 'hdvo:d:v:s:z:idvo:tdvo:b:n:cdvo' long_options = ['help', 'debug', 'verbose', 'strategy=', 'profile_size=', 'init', 'train', 'because', 'nrecommendation', 'contribute'] try: opts, args = getopt.getopt(sys.argv[1:], short_options, long_options) self.options = opts except getopt.GetoptError as error: config.set_logger() logging.error('Bad syntax: {}'.format(str(error))) self.usage() sys.exit() for o, p in opts: if o in ('-h', '--help'): self.usage() sys.exit() elif o in ('-d', '--debug'): config.debug = 1 elif o in ('-v', '--verbose'): config.verbose = 1 elif o in ('-s', '--strategy'): config.strategy = p elif o in ('-z', '--profile_size'): config.profile_size = int(p) elif o in ('-i', '--init'): continue elif o in ('-t', '--train'): continue elif o in ('-b', '--because'): config.because = True elif o in ('-n', '--num-recommendations'): config.num_recommendations = int(p) elif o in ('-c', '--contribute'): continue else: assert False, "unhandled option"
def load(self): config = Config() short_options = "hdvo:f:b:a:e:p:m:u:l:c:x:w:s:z:r:n:idvo:tdvo" long_options = [ "help", "debug", "verbose", "output=", "filtersdir=", "pkgsfilter=", "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=", "clustersdir=", "kmedoids=", "maxpopcon=", "weight=", "strategy=", "profile_size=", "profiling=", "neighbors=", "init", "train" ] try: opts, args = getopt.getopt(sys.argv[1:], short_options, long_options) self.options = opts except getopt.GetoptError as error: config.set_logger() logging.error("Bad syntax: %s" % str(error)) self.usage() sys.exit() for o, p in opts: if o in ("-h", "--help"): self.usage() sys.exit() elif o in ("-d", "--debug"): config.debug = 1 elif o in ("-v", "--verbose"): config.verbose = 1 elif o in ("-o", "--output"): config.output = p elif o in ("-f", "--filtersdir"): config.filters_dir = p elif o in ("-b", "--pkgsfilter"): config.pkgs_filter = p elif o in ("-a", "--axi"): config.axi = p elif o in ("-e", "--dde"): config.dde_url = p elif o in ("-p", "--popconindex"): config.popcon_index = p elif o in ("-m", "--popcondir"): config.popcon_dir = p elif o in ("-u", "--index_mode"): config.index_mode = p elif o in ("-l", "--clustersdir"): config.clusters_dir = p elif o in ("-c", "--kmedoids"): config.k_medoids = int(p) elif o in ("-x", "--max_popcon"): config.max_popcon = int(p) elif o in ("-w", "--weight"): config.weight = p elif o in ("-s", "--strategy"): config.strategy = p elif o in ("-z", "--profile_size"): config.profile_size = int(p) elif o in ("-z", "--profiling"): config.profiling = p elif o in ("-n", "--neighbors"): config.k_neighbors = int(p) elif o in ("-i", "--init"): continue elif o in ("-t", "--train"): continue else: assert False, "unhandled option"
class BagOfWords(): USER_DATA_DIR = Config().user_data_dir BAG_OF_WORDS_DIR = USER_DATA_DIR + 'bag_of_words/' BAG_OF_WORDS_MODEL = BAG_OF_WORDS_DIR + 'bag_of_words_model.pickle' BAG_OF_WORDS_TERMS = BAG_OF_WORDS_DIR + 'bag_of_words_terms.pickle' BAG_OF_WORDS_DEBTAGS = BAG_OF_WORDS_DIR + 'bag_of_words_debtags.pickle' BAG_OF_WORDS_PKGS_CLASSIFICATION = BAG_OF_WORDS_DIR + \ 'bow_pkgs_classification.pickle' MODEL_ALREADY_CREATED = 1 CREATED_MODEL = 0 @staticmethod def save(bag_of_words, file_path): with open(file_path, 'wb') as text: pickle.dump(bag_of_words, text) @staticmethod def load(file_path): with open(file_path, 'rb') as text: bag_of_words = pickle.load(text) return bag_of_words def __init__(self): self.vectorizer = TfidfVectorizer(max_df=0.8, max_features=5000, min_df=5, stop_words='english', use_idf=True) def check_dir(self): return os.path.exists(BagOfWords.BAG_OF_WORDS_DIR) def combine_pkg_info(self, description, debtags, section): description.extend(debtags) description.append(section) return description def classify_pkg(self, attribute_vector, transform=True): if transform: pkg_feature = self.vectorizer.transform([attribute_vector]) pkg_feature = pkg_feature.toarray() else: pkg_feature = attribute_vector label = self.classifier.predict(pkg_feature) return label[0] def create_pkg_data(self, pkg, axi, cache, ml_data): description = self.get_pkg_description(pkg, cache, ml_data) debtags = self.get_pkg_debtags(pkg, axi, ml_data) section = self.get_pkg_section(pkg, cache, ml_data) return ' '.join(self.combine_pkg_info(description, debtags, section)) def get_pkgs_classification(self, pkgs_list): pkgs_classification = [] with open(MachineLearningData.PKGS_CLASSIFICATIONS) as pkgs: pkgs_data = pickle.load(pkgs) for pkg_name in pkgs_list: pkgs_classification.append(pkgs_data[pkg_name][-1]) return pkgs_classification def get_pkg_description(self, pkg, cache, ml_data): return ml_data.get_pkg_terms(cache, pkg) def get_pkg_debtags(self, pkg, axi, ml_data): return map(lambda x: x.replace('::', '_'), ml_data.get_pkg_debtags(axi, pkg)) def get_pkg_section(self, pkg, cache, ml_data): return ml_data.get_pkg_section(cache, pkg) def get_used_terms_and_debtags(self, features_lists): terms, debtags = [], [] for feature in features_lists: if '_' in feature: debtags.append(feature.replace('_', '::')) else: terms.append(feature) return terms, debtags def prepare_data(self, pkg_list, axi, cache, ml_data): pkgs_description = [] pkgs_classification = [] for pkg in pkg_list: pkg_data = self.create_pkg_data(pkg, axi, cache, ml_data) pkgs_description.append(pkg_data) pkgs_classification = self.get_pkgs_classification(pkg_list) return (pkgs_description, pkgs_classification) def save_features(self, features, path): if not self.check_dir(): os.mkdir(BagOfWords.BAG_OF_WORDS_DIR) with open(path, 'wa') as feature_file: pickle.dump(features, feature_file) def save_pkgs_features(self, path, pkgs_list, features_array, pkg_classification): pkgs_classification = {} for index, pkg in enumerate(pkgs_list): value = features_array[index, :].tolist() value.append(pkg_classification[index]) pkgs_classification[pkg] = value with open(path, 'wa') as bow_pkgs_classification: pickle.dump(pkgs_classification, bow_pkgs_classification) def train_model(self, pkgs_list, axi, save_files=True): cache = Cache() ml_data = MachineLearningData() pkgs_description, pkg_classification = self.prepare_data( pkgs_list, axi, cache, ml_data) pkg_features = self.vectorizer.fit_transform(pkgs_description) features_array = pkg_features.toarray() terms, debtags = self.get_used_terms_and_debtags( self.vectorizer.get_feature_names()) self.classifier = GaussianNB() self.classifier.fit(features_array, pkg_classification) path = BagOfWords.BAG_OF_WORDS_PKGS_CLASSIFICATION if save_files: self.save_features(terms, BagOfWords.BAG_OF_WORDS_TERMS) self.save_features(debtags, BagOfWords.BAG_OF_WORDS_DEBTAGS) self.save_pkgs_features(path, pkgs_list, features_array, pkg_classification) return BagOfWords.CREATED_MODEL
def load_details(self): cfg = Config() if self.connect_to_dde(cfg.dde_server, cfg.dde_port): self.load_details_from_dde(cfg.dde_url) else: self.load_details_from_apt()
class MachineLearningData(): XAPIAN_DATABASE_PATH = path.expanduser( '~/.app-recommender/axi_desktopapps/') USER_DATA_DIR = Config().user_data_dir BASE_DIR = Config().base_dir PKG_DATA_PATH = USER_DATA_DIR + 'pkg_data.txt' PKGS_CLASSIFICATIONS = USER_DATA_DIR + 'pkgs_classifications.txt' MACHINE_LEARNING_TERMS = USER_DATA_DIR + 'machine_learning_terms.txt' MACHINE_LEARNING_DEBTAGS = USER_DATA_DIR + 'machine_learning_debtags.txt' MACHINE_LEARNING_TRAINING = USER_DATA_DIR + 'machine_learning_training.txt' def __init__(self): self.axi = xapian.Database(MachineLearningData.XAPIAN_DATABASE_PATH) self.stemmer = Stemmer.Stemmer('english') valid_tags = [] with open(path.join(Config().filters_dir, "debtags")) as tags: valid_tags = [ line.strip() for line in tags if not line.startswith("#") ] self.filter_tag = FilterTag(valid_tags) self.filter_description = FilterDescription() def create_data(self, labels): if not path.exists(MachineLearningData.USER_DATA_DIR): makedirs(MachineLearningData.USER_DATA_DIR) pkgs = self.get_pkgs_classification(data_cl.square_percent_function, labels) cache = apt.Cache() terms_name = self.get_terms_for_all_pkgs(cache, pkgs.keys()) debtags_name = self.get_debtags_for_all_pkgs(self.axi, pkgs.keys()) debtags_name = self.filter_debtags(debtags_name) debtags_name = sorted(debtags_name) terms_name = self.filter_terms(terms_name) terms_name = sorted(terms_name) pkgs_classifications = (self.get_pkgs_table_classification( self.axi, pkgs, cache, debtags_name, terms_name)) self.save_pkg_data(terms_name, MachineLearningData.MACHINE_LEARNING_TERMS) self.save_pkg_data(debtags_name, MachineLearningData.MACHINE_LEARNING_DEBTAGS) self.save_pkg_data(pkgs_classifications, MachineLearningData.PKGS_CLASSIFICATIONS) return pkgs_classifications def get_pkgs_classification(self, percent_function, labels): pkgs_percent = {} pkgs_classification = {} time_now = calendar.timegm(time.gmtime()) pkg_time = PkgTime() pkg_data = pkg_time.get_package_data() for name, time_values in pkg_data.iteritems(): modify = time_values[0] access = time_values[1] pkgs_percent[name] = percent_function(modify, access, time_now) pkgs = pkgs_percent.keys() pkgs = sorted(pkgs, key=lambda pkg: pkgs_percent[pkg]) pkgs = list(reversed(pkgs)) size = len(pkgs) / len(labels) for index, label in enumerate(labels): index_begin = size * index index_end = index_begin + size classifications = dict.fromkeys(pkgs[index_begin:index_end], label) pkgs_classification.update(classifications) index_begin = size * len(labels) if index_begin < len(labels): classifications = dict.fromkeys(pkgs[index_begin], label[-1]) pkgs_classification.update(classifications) return pkgs_classification def get_pkg_data(self, axi, pkg_name, data_type): pkg_name = 'XP' + pkg_name query = xapian.Query(xapian.Query.OP_OR, [pkg_name]) enquire = xapian.Enquire(axi) enquire.set_query(query) mset = enquire.get_mset(0, 10) pkg_info = [] for pkg in mset: for term in axi.get_document(pkg.docid).termlist(): pkg_term = term.term if pkg_term.startswith(data_type): pkg_info.append(pkg_term[len(data_type):]) elif data_type == 'term': if pkg_term[0].islower(): pkg_info.append(pkg_term) return pkg_info def get_pkg_debtags(self, axi, pkg_name): return self.get_pkg_data(axi, pkg_name, 'XT') def get_pkg_terms(self, cache, pkg_name): description = cache[pkg_name].candidate.description.strip() description = re.sub('[^a-zA-Z]', ' ', description) tokens = description.lower().split() stems = [ self.stemmer.stemWord(token) for token in tokens if self.filter_description(token) ] return stems def get_pkg_section(self, cache, pkg_name): return cache[pkg_name].section def get_debtags_name(self, file_path): with open(file_path, 'r') as text: debtags_name = [debtag.strip() for debtag in text] return debtags_name def create_row_table_list(self, labels_name, pkg_elements): row_list = [] for debtag in labels_name: row_list.append(1 if debtag in pkg_elements else 0) return row_list def get_terms_for_all_pkgs(self, cache, pkgs): pkg_terms = set() for pkg in pkgs: pkg_terms = pkg_terms | set(self.get_pkg_terms(cache, pkg)) return pkg_terms def get_debtags_for_all_pkgs(self, axi, pkgs): pkg_debtags = set() for pkg in pkgs: pkg_debtags = pkg_debtags | set(self.get_pkg_debtags(axi, pkg)) return pkg_debtags def filter_terms(self, terms): filtered_terms = [] for term in terms: if self.filter_description(term): filtered_terms.append(term) return filtered_terms def filter_debtags(self, debtags): filtered_debtags = [] for tag in debtags: if self.filter_tag('XT' + tag): filtered_debtags.append(tag) return filtered_debtags def get_pkgs_table_classification(self, axi, pkgs, cache, debtags_name, terms_name): pkgs_classification = {} for key, value in pkgs.iteritems(): pkgs_classification[key] = [] debtags = self.get_pkg_debtags(axi, key) debtags = self.create_row_table_list(debtags_name, debtags) pkgs_classification[key].extend(debtags) terms = self.get_pkg_terms(cache, key) terms = self.create_row_table_list(list(terms_name), terms) pkgs_classification[key].extend(terms) pkgs_classification[key].append(value) return pkgs_classification def save_pkg_data(self, pkg_data, file_path): with open(file_path, 'wb') as text: pickle.dump(pkg_data, text)
def setUpClass(self): cfg = Config() self.axi = xapian.Database(cfg.axi)
import os import re import commands from apprecommender.data_classification import get_time_from_package from apprecommender.config import Config from apprecommender.user import LocalSystem USER_DATA_DIR = Config().user_data_dir class PkgTime: def __init__(self): pass def create_pkg_data(self): user = LocalSystem() user.maximal_pkg_profile() user.no_auto_pkg_profile() user_pkgs = user.pkg_profile pkgs_time = self.get_packages_time(user_pkgs) self.save_package_time(pkgs_time) return pkgs_time def get_best_time(self, pkg): valid_regex = re.compile( r'/usr/bin/|/usr/sbin|/usr/game/|/usr/lib/.+/') pkg_files = commands.getoutput('dpkg -L {}'.format(pkg))
# iterations = 3 # content_based = ['cb'] # collaborative = ['knn_eset'] # hybrid = ['knnco'] # profile_size = [50,100] # neighbors = [50] iterations = 20 content_based = ['cb', 'cbt', 'cbd', 'cbh', 'cb_eset', 'cbt_eset', 'cbd_eset', 'cbh_eset'] collaborative = ['knn_eset', 'knn', 'knn_plus'] hybrid = ['knnco', 'knnco_eset'] profile_size = [10, 20, 40, 60, 80, 100, 140, 170, 200, 240] neighbors = [3, 5, 10, 20, 30, 50, 70, 100, 150, 200] cfg = Config() cfg.strategy = sys.argv[1] # user = # PopconSystem("/root/.app-recommender/popcon-entries/4a/4a67a295ec14826db2aa1d90be2f1623") user = PopconSystem( "/root/.app-recommender/popcon-entries/8b/8b44fcdbcf676e711a153d5db09979d7") # noqa # user = PopconSystem(sys.argv[1]) user.filter_pkg_profile(cfg.pkgs_filter) user.maximal_pkg_profile() if cfg.strategy in content_based: run_content(user, cfg) if cfg.strategy in collaborative: run_collaborative(user, cfg) if cfg.strategy in hybrid:
# collaborative_strategies = ['knn'] strategy_category = sys.argv[1] if strategy_category == "content": strategies = content_strategies sizes = profile_size option_str = "profile" elif strategy_category == "collaborative": strategies = collaborative_strategies sizes = neighbor_size option_str = "neighborhood" else: print "Usage: profile-suite strategy_category sample_file" exit(1) cfg = Config() population_sample = [] sample_file = sys.argv[2] sample_str = sample_file.split('/')[-1] with open(sample_file, 'r') as f: for line in f.readlines(): user_id = line.strip('\n') population_sample.append( os.path.join(cfg.popcon_dir, user_id[:2], user_id)) sample_dir = ("results/%s/%s" % (strategy_category, sample_str)) if not os.path.exists(sample_dir): os.makedirs(sample_dir) for strategy in strategies: cfg.strategy = strategy
import os import pickle import sys import getopt sys.path.insert(0, "{0}/../".format(os.path.dirname(__file__))) from apprecommender.ml.cross_validation import (CrossValidationBVA, CrossValidationBOW) from apprecommender.evaluation import (SimpleAccuracy, Precision, Recall, FPR, F_score) from apprecommender.ml.data import MachineLearningData from apprecommender.ml.bag_of_words import BagOfWords from apprecommender.config import Config BASE_DIR = Config().base_dir CROSS_VALIDATION_FOLDER = BASE_DIR + '/cross_validation_data/' def get_strategy(ml_strategy_str, pkg_data, partition_size, rounds, metrics_list, labels): if ml_strategy_str == 'bow': return CrossValidationBOW(pkg_data, partition_size, rounds, metrics_list, labels) else: return CrossValidationBVA(pkg_data, partition_size, rounds, metrics_list, labels) def get_pkg_data(ml_strategy_str, ml_data, labels): if ml_strategy_str == 'bow':
def load(self): config = Config() short_options = "hdvo:f:b:a:e:p:m:u:l:c:x:w:s:z:r:n:idvo:tdvo" long_options = ["help", "debug", "verbose", "output=", "filtersdir=", "pkgsfilter=", "axi=", "dde=", "popconindex=", "popcondir=", "indexmode=", "clustersdir=", "kmedoids=", "maxpopcon=", "weight=", "strategy=", "profile_size=", "profiling=", "neighbors=", "init", "train"] try: opts, args = getopt.getopt(sys.argv[1:], short_options, long_options) self.options = opts except getopt.GetoptError as error: config.set_logger() logging.error("Bad syntax: %s" % str(error)) self.usage() sys.exit() for o, p in opts: if o in ("-h", "--help"): self.usage() sys.exit() elif o in ("-d", "--debug"): config.debug = 1 elif o in ("-v", "--verbose"): config.verbose = 1 elif o in ("-o", "--output"): config.output = p elif o in ("-f", "--filtersdir"): config.filters_dir = p elif o in ("-b", "--pkgsfilter"): config.pkgs_filter = p elif o in ("-a", "--axi"): config.axi = p elif o in ("-e", "--dde"): config.dde_url = p elif o in ("-p", "--popconindex"): config.popcon_index = p elif o in ("-m", "--popcondir"): config.popcon_dir = p elif o in ("-u", "--index_mode"): config.index_mode = p elif o in ("-l", "--clustersdir"): config.clusters_dir = p elif o in ("-c", "--kmedoids"): config.k_medoids = int(p) elif o in ("-x", "--max_popcon"): config.max_popcon = int(p) elif o in ("-w", "--weight"): config.weight = p elif o in ("-s", "--strategy"): config.strategy = p elif o in ("-z", "--profile_size"): config.profile_size = int(p) elif o in ("-z", "--profiling"): config.profiling = p elif o in ("-n", "--neighbors"): config.k_neighbors = int(p) elif o in ("-i", "--init"): continue elif o in ("-t", "--train"): continue else: assert False, "unhandled option"
def __init__(self): self.config = Config()
You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """ import sys import os import logging sys.path.insert(0, '../') from apprecommender.config import Config from apprecommender.data import PopconXapianIndex if __name__ == '__main__': cfg = Config() cfg.index_mode = "recluster" logging.info("Starting clustering experiments") logging.info("Medoids: %d\t Max popcon:%d" % (cfg.k_medoids, cfg.max_popcon)) cfg.popcon_dir = os.path.expanduser( "~/org/popcon.debian.org/popcon-mail/popcon-entries/") cfg.popcon_index = cfg.popcon_index + ("_%dmedoids%dmax" % (cfg.k_medoids, cfg.max_popcon)) cfg.clusters_dir = cfg.clusters_dir + ("_%dmedoids%dmax" % (cfg.k_medoids, cfg.max_popcon)) pxi = PopconXapianIndex(cfg) logging.info("Overall dispersion: %f\n" % pxi.cluster_dispersion) # Write clustering log output = open(("results/clustering/%dmedoids%dmax" % (cfg.k_medoids, cfg.max_popcon)), 'w')
def setUp(self): logging.getLogger().disabled = True self.axi_desktopapps = Config().axi_desktopapps
def tearDown(self): Config().axi_desktopapps = self.axi_desktopapps
def test_error_init_on_run_apprec(self): Config().axi_desktopapps = "asd" result = apprec.run() self.assertEqual(apprec.ERROR_INIT, result)
import sys sys.path.insert(0, '../') import logging import datetime from apprecommender.config import Config from apprecommender.data import FilteredPopconXapianIndex if __name__ == '__main__': base_dir = os.path.expanduser("~/.app-recommender/") axi_path = os.path.join(base_dir, "axi_XD") path = os.path.join(base_dir, "popcon_XD") popcon_dir = os.path.join(base_dir, "popcon-entries") tags_filter = os.path.join(base_dir, "filters/debtags") # set up config for logging cfg = Config() begin_time = datetime.datetime.now() logging.info("Popcon indexing started at %s" % begin_time) # use config file or command line options index = FilteredPopconXapianIndex(path, popcon_dir, axi_path, tags_filter) end_time = datetime.datetime.now() logging.info("Popcon indexing completed at %s" % end_time) logging.info("Number of documents (submissions): %d" % index.get_doccount()) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds)
return 0 return sum(self.f05) / len(self.f05) def get_mcc_summary(self): if not self.mcc: return 0 return sum(self.mcc) / len(self.mcc) if __name__ == '__main__': if len(sys.argv) < 3: print "Usage: k-suite strategy_str sample_file" exit(1) threshold = 20 iterations = 30 neighbors = [3, 5, 10, 50, 100, 150, 200, 300, 400, 500] cfg = Config() cfg.strategy = sys.argv[1] sample_file = sys.argv[2] population_sample = [] with open(sample_file, 'r') as f: for line in f.readlines(): user_id = line.strip('\n') population_sample.append( os.path.join(cfg.popcon_dir, user_id[:2], user_id)) # setup dictionaries and files roc_summary = {} recommended = {} precision_summary = {} f05_summary = {} mcc_summary = {} sample_dir = ("results/k-suite/%s" % sample_file.split('/')[-1])