def __init__(self, dataset_dir, users_file='users.tsv.gz', excluded_users=set(), default_location_source='geo-median'): settings_fname = os.path.join(dataset_dir,'dataset.json') if os.path.exists(settings_fname): self._settings = jsonlib.load(open(settings_fname,'r')) else: self._settings = {} # prepare for all data self._dataset_dir = dataset_dir self._users_fname = os.path.join(dataset_dir, users_file) self._users_with_locations_fname = os.path.join(dataset_dir, 'users.home-locations.' + default_location_source + '.tsv.gz') self._mention_network_fname = os.path.join(dataset_dir, 'mention_network.elist') self._bi_mention_network_fname = os.path.join(dataset_dir, 'bi_mention_network.elist') self.excluded_users = excluded_users
def __init__(self,dataset_dir, users_file=None): settings_fname = os.path.join(dataset_dir,'dataset.json') if os.path.exists(settings_fname): self._settings = jsonlib.load(open(settings_fname,'r')) else: self._settings = {} # prepare for all data self._posts_fname = os.path.join(dataset_dir,'posts.json.gz') if users_file is None: self._users_fname = os.path.join(dataset_dir,'users.json.gz') else: # NOTE: We should probably do some format verification here self._users_fname = users_file self._mention_network_fname = os.path.join(dataset_dir,'mention_network.elist')
def __init__(self, dataset_dir, users_file=None): settings_fname = os.path.join(dataset_dir, 'dataset.json') if os.path.exists(settings_fname): self._settings = jsonlib.load(open(settings_fname, 'r')) else: self._settings = {} # prepare for all data self._posts_fname = os.path.join(dataset_dir, 'posts.json.gz') if users_file is None: self._users_fname = os.path.join(dataset_dir, 'users.json.gz') else: # NOTE: We should probably do some format verification here self._users_fname = users_file self._mention_network_fname = os.path.join(dataset_dir, 'mention_network.elist')
def __init__(self, dataset_dir, users_file='users.tsv.gz', excluded_users=set(), default_location_source='geo-median'): settings_fname = os.path.join(dataset_dir, 'dataset.json') if os.path.exists(settings_fname): self._settings = jsonlib.load(open(settings_fname, 'r')) else: self._settings = {} # prepare for all data self._dataset_dir = dataset_dir self._users_fname = os.path.join(dataset_dir, users_file) self._users_with_locations_fname = os.path.join( dataset_dir, 'users.home-locations.' + default_location_source + '.tsv.gz') self._mention_network_fname = os.path.join(dataset_dir, 'mention_network.elist') self._bi_mention_network_fname = os.path.join( dataset_dir, 'bi_mention_network.elist') self.excluded_users = excluded_users