def batch_extract_content(websiteElementsPath, urlData):
    ## 1) Extract webpage data
    print "[INFO] ==== Extracting webpage data ===="
    data_extractor = WebsiteDataExtractor(websiteElementsPath)

    out = pd.DataFrame(urlData["URL"])

    keyterms = []
    for url in urlData["URL"]:
        print url

        data_dict = data_extractor.crawlPage(url)

        ## 2) Extract candidate keyterms
        print "[INFO] ==== Extracting candidate keyterms ===="
        keyterm_extractor = KeyTermExtractor(data_dict)
        keyterm_extractor.execute()

        #print keyterm_extractor.result_dict
        ## 3) Compute candidate keyterm features
        print "[INFO] ==== Computing candidate keyterm features ===="
        keyterm_feat = KeyTermFeatures(url, data_dict, keyterm_extractor.result_dict, lang=utils.LANG_FR)
        candidate_keyterm_df = keyterm_feat.compute_features()

        selected_keyterms = []
        if not candidate_keyterm_df.empty:
        ## 4) Filter for relevancy and output top 10 keyterms
            print "[INFO] ==== Selecting relevant keyterms ===="
            relevance_filter = RelevanceFilter(candidate_keyterm_df, "dataset/keyterm-classifier-model-v2.pickle", topk=10)
            selected_keyterms = relevance_filter.select_relevant()

        keyterms.append(",".join(selected_keyterms))

    out["keyterms"] = keyterms
    return out
from keyterm_extractor import KeyTermExtractor, KeyTermExtractor2
from keyterm_features import KeyTermFeatures
from keyterm_classifier import RelevanceFilter


if __name__ == "__main__":
    url = 'http://www.generation-nt.com/blackview-a8-smartphone-petit-budget-pas-cher-mwc-2016-actualite-1925283.html'

    ## 1) Extract webpage data
    print "[INFO] ==== Extracting webpage data ===="
    data_extractor = WebsiteDataExtractor("dataset/WebsiteElementsPathDef.xml")
    data_dict = data_extractor.crawlPage(url)

    ## 2) Extract candidate keyterms
    print "[INFO] ==== Extracting candidate keyterms ===="
    keyterm_extractor = KeyTermExtractor(data_dict)
    keyterm_extractor.execute()

    keyterm_extractor2 = KeyTermExtractor2(data_dict, lang="french")
    keyterm_extractor2.execute()

    print "======== Results from Extractor 1 ========"
    pprint.pprint(keyterm_extractor.result_dict)
    # print "Nr t1grams: " + str(len(keyterm_extractor.result_dict['t1gram']['term']))
    # print "Nr t2grams: " + str(len(keyterm_extractor.result_dict['t2gram']['term']))
    # print "Nr t3grams: " + str(len(keyterm_extractor.result_dict['t3gram']['term']))
    # print "Nr t4grams: " + str(len(keyterm_extractor.result_dict['t4gram']['term']))

    print "======== Results from Extractor 2 ========"
    pprint.pprint(keyterm_extractor2.result_dict)
    # print "Nr t1grams: " + str(len(set(keyterm_extractor2.result_dict['t1gram'])))