def grep_articles(self):
     for docid in self.docid_set:
         self.logger.info('%s', docid)
         cmd = '/usr/bin/grep'
         param1 = docid
         param2 = FileLocations.get_dropbox_datasets_path(
         ) + 'washingtonpost/WashingtonPost/data/*.txt'
         self.logger.info('%s %s %s  ', cmd, param1, param2)
         full_cmd = cmd + ' ' + param1 + ' ' + param2 + ' >> wp.txt'
         process = subprocess.Popen(full_cmd,
                                    shell=True,
                                    stdout=subprocess.PIPE)
         process.wait()
         self.logger.info('return code %d ', process.returncode)
            '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'))
    logger = logging.getLogger(__name__)
    logger.addHandler(handler)
    logger.propagate = False
    logger.setLevel(logging.INFO)

    smb = BaseDocToSentiment()
    datasetDexter = DatasetDexter
    wikipediaDataset = WikipediaDataset()

    if use_dexter_dataset:
        document_list = datasetDexter.get_dexter_dataset(
            path=FileLocations.get_dropbox_dexter_path())
    if use_wahington_post_dataset:
        document_list = datasetDexter.get_dexter_dataset(
            path=FileLocations.get_dropbox_datasets_path() + 'washingtonpost/',
            filename="washington_post.json")

    spotter = GoldenSpotter(document_list, wikipediaDataset)
    golden_saliency_by_entid_by_docid = datasetDexter.get_golden_saliency_by_entid_by_docid(
        document_list, wikipediaDataset)

    if train_model:
        salience_by_entity_by_doc_id = smb.build_output_using_dexter_dataset(
            spotter,
            golden_saliency_by_entid_by_docid,
            output_filename,
            document_to_feature_converter,
            None,
            train_docid_set,
            wikipediaDataset,
    aws_util = AWSUtil()
    smb = SelModelBuilder()


    # if build_model:
    #     sentiment_processor = smb.train_and_save_model(filename)
    # else:
    #     sentiment_processor = SentimentProcessor()
    #     sentiment_processor.load_model(filename)

    dd = smb.get_dexter_datset()
    wikipediaDataset = WikipediaDataset()



    document_list = dd.get_dexter_dataset(path=FileLocations.get_dropbox_datasets_path()+'washingtonpost/', filename="washington_post.json")
    spotter = GoldenSpotter(document_list, wikipediaDataset)

    golden_saliency_by_entid_by_docid = dd.get_golden_saliency_by_entid_by_docid(document_list, wikipediaDataset)



    output_filename = FileLocations.get_dropbox_intermediate_path() + 'sel_all_features_golden_spotter.washington_post.docnum.'+ str(min_number) + '-' + str(max_number) + '.txt'
    heavy_feature_filename = FileLocations.get_temp_path() + 'sel_heavy_features_golden_spotter.washington_post.docnum.'+ str(min_number) + '-' + str(max_number) + '.txt'
    light_feature_filename = FileLocations.get_temp_path() + 'sel_light_features_golden_spotter.washington_post.docnum.'+ str(min_number) + '-' + str(max_number) + '.txt'

    document_to_feature_converter = SelFeatureExtractor(spotter, binary_classifier_threshold=0.5,
                                                        min_candidates_to_pass_through = 5000,
                                                        binary_classifier=None,
                 light_feature_filename = light_feature_filename, heavy_feature_filename = heavy_feature_filename, num_light_features = 23, break_early = break_early)