示例#1
0
def splitfilebycategory(filename, combined_results):
    '''
    Inputs:
        filename: file to split
        combined_results: output from performClassification in Jobs_Classifier
        
            {
                'apple': [0,0,1,0...],
                'twitter': [0,0,0,...],
            }
    Returns:
        {
            'apple': [json_tweet, json_tweet],
            ...
        }
    '''
    returnresult = {}
    svmstates = SVMStatesClassifier.objects.all()
    categorieslist = []
    returnresult[NO_CATEGORY] = []
    
    for svm in svmstates:
        categorieslist.append(svm.classifier_name)
        returnresult[svm.classifier_name] = []
        
    with codecs.open(filename, encoding='cp1252') as f:
        for idx, line in enumerate(f):
            for category in categorieslist:
                if combined_results[category][idx] == POSITIVE:
                    returnresult[category].append(line)
            returnresult[NO_CATEGORY].append(line)
    
    for cat, val in returnresult.iteritems():
        debugPrint(cat, len(val))
    return returnresult
示例#2
0
def selectFeatureAndNegSamples(keyinfo,
                               feat_type,
                               feat_size,
                               neg_sample_by_feat_size,
                               debug_category=''):
    '''
    High level function to select features of type and select negative samples using the selected features
    Params:
        keyinfo: output from getKeyInfoForClassifier
        feat_type: name of feature, from CS4242_Assg2.constants
        feat_size: number of features to select
        neg_sample_by_feat_size: number of negative samples to select 
        debug_category (optional): category name for writing debug files
    Output:
        tuple
        [0]: [] of selected features
        [1]: [] of selected negative samples
    '''
    pos_tweets = keyinfo[POSITIVE][PROCESSED_TWEETS]
    pos_sample_size = len(pos_tweets)
    neg_tweets = keyinfo[NEGATIVE][PROCESSED_TWEETS]
    neg_sample_size = len(neg_tweets)

    sel_feat = []
    sel_neg_by_feat = []

    if feat_type in keyinfo[UNIQUE_FEATURES] \
        and feat_type in keyinfo[POSITIVE][FEATURES] \
        and feat_type in keyinfo[NEGATIVE][FEATURES]:
        unique_feat = keyinfo[UNIQUE_FEATURES][feat_type]
        pos_feat = keyinfo[POSITIVE][FEATURES][feat_type]
        neg_feat = keyinfo[NEGATIVE][FEATURES][feat_type]

        chi2_feat = selectFeatureByChi2(unique_feat, pos_feat, neg_feat,
                                        pos_sample_size, neg_sample_size,
                                        feat_size)
        writeDebugListToFile(
            "%s_%s_chi2_sel_feat.txt" % (debug_category, feat_type), chi2_feat)

        sel_feat = [x[0] for x in chi2_feat]
        writeDebugListToFile(
            "%s_%s_chi2_sel_feat_only.txt" % (debug_category, feat_type),
            sel_feat)

        debugPrint("%s feature count (intial): %s" %
                   (feat_type, len(pos_feat)))
        debugPrint("%s feature count (selected): %s" %
                   (feat_type, len(sel_feat)))

        sel_neg_by_feat = selectTweetIfFeatureExists(neg_tweets,
                                                     neg_sample_by_feat_size,
                                                     sel_feat, feat_type)
        debugPrint("%s selected neg tweet count: %s" %
                   (feat_type, len(sel_neg_by_feat)))

    else:
        debugPrint("%s not in use" % feat_type)

    return (sel_feat, sel_neg_by_feat)
示例#3
0
def performSA(test_data_list, category=None):
    '''
    Performs Sentimental Analysis on a given list of json tweets.
    
    Params:
        category: category in which input data belongs to.
                    None if uncategorized
    
    Returns:
    {
        'twitter': [[{TWEET_ID: id, TWEET_FULL: 'lalala', 
                              FEATURE_CREATED_AT: 'Sun Oct 16 22:28:08 +0000 2011', TWEET_USER_ID: 14883342}], [0,1,2],],
        'microsoft':[]
    }
    '''

    tweet_features_list = []
    tweet_id_list = []
    svmstates = SVMStatesSentimental.objects.all()
    debugPrint(">> Extracting features")

    for line in test_data_list:
        json_data = json.loads(line, encoding='cp1252')
        featureline = extractSentiFeaturesFromTweet(json_data)
        # {'tweet': This was a triumph, 'features': {FEATURE_TEXT: __ , 'geolocation' : __ }}
        # check if reply, combine features if nec
        tweet_features_list.append(featureline)
        tweet_id_list.append({
            TWEET_ID: json_data['id_str'],
            TWEET_FULL: json_data['text'],
            FEATURE_CREATED_AT: json_data['created_at'],
            TWEET_USER_ID: json_data['user']['id']
        })
#         writeDebugListToFile("test_tweets_feature.txt", tweet_features_list)

    tweet_features_list_replyconcat = copy.deepcopy(tweet_features_list)
    #     print tweet_features_list_replyconcat
    for featureline in tweet_features_list_replyconcat:
        if featureline[TWEET_FEATURES][FEATURE_SA_REPLY_TO_ID] != "":
            for fline2 in tweet_features_list_replyconcat:
                if fline2[TWEET_FEATURES][
                        FEATURE_SA_TWEETID_STR] == featureline[TWEET_FEATURES][
                            FEATURE_SA_REPLY_TO_ID]:
                    # if ids match, featureline is a reply to fline2
                    for key, value in fline2[TWEET_FEATURES].iteritems():
                        if key != FEATURE_SA_TWEETID_STR and key != FEATURE_SA_REPLY_TO_ID and key != FEATURE_SA_CAPS_PERCENTAGE:

                            for key2, value2 in value[FEATURE_VALUE].iteritems(
                            ):

                                if key2 not in featureline[TWEET_FEATURES][
                                        key][FEATURE_VALUE]:
                                    featureline[TWEET_FEATURES][key][
                                        FEATURE_VALUE][key2] = 0

                                featureline[TWEET_FEATURES][key][
                                    FEATURE_VALUE][key2] += value2

    #    For each svm
    debugPrint(">> Classifying with SVM")
    combined_results = {}
    for svm in svmstates:
        if category is None or category == svm.classifier_name:
            featurematrix_classifier = svm.featurematrix
            features_enabled = svm.features_enabled

            debugPrint("Classifying for %s" %
                       featurematrix_classifier.category)
            if FEATURE_SA_REPLIES in features_enabled:
                svm_matrix = getSVMMatrixForSA(
                    featurematrix_classifier, features_enabled,
                    tweet_features_list_replyconcat)
            else:
                svm_matrix = getSVMMatrixForSA(featurematrix_classifier,
                                               features_enabled,
                                               tweet_features_list)
    #             writeDebugListToFile("%s_test_svm_matrix.txt" % featurematrix_classifier.category, svm_matrix[SVM_X])

            debugPrint("Perform SVM Classification for %s" %
                       svm.classifier_name)
            reslist = performSVMClassificationForSA(svm, svm_matrix)

            #         for key, value in combined_results.iteritems():
            #             writeDebugListToFile("%s_results.txt" % key, value)

            # temporal info
            #             print features_enabled
            if FEATURE_SA_TEMPORAL in features_enabled:
                #                 print "ti enabled"
                ti_dict = {}
                for idx, res in enumerate(reslist):
                    user = tweet_id_list[idx][TWEET_USER_ID]
                    created_at = tweet_id_list[idx][FEATURE_CREATED_AT]

                    if user in ti_dict:
                        # temporal info hit, update

                        start = ti_dict[user][
                            'last_tweet_time'] - datetime.timedelta(
                                hours=TEMPORAL_INFO_TIMEFRAME_MINS)
                        end = ti_dict[user][
                            'last_tweet_time'] + datetime.timedelta(
                                hours=TEMPORAL_INFO_TIMEFRAME_MINS)
                        if start <= ti_dict[user]['last_tweet_time'] <= end:
                            reslist[idx] = ti_dict[user]['sentiment']

                        ti_dict[user][
                            'last_tweet_time'] = dateutil.parser.parse(
                                created_at)
                    else:
                        # update and continue
                        ti_dict[user] = {
                            'sentiment': res,
                            'last_tweet_time':
                            dateutil.parser.parse(created_at)
                        }

            combined_results[svm.classifier_name] = (tweet_id_list, reslist)
            # incomplete
    return combined_results
示例#4
0
def performTrainingForSA(data_filename,
                         label_filename,
                         features_used=FEATURES_SA_DEFAULT,
                         job_id=None):
    # extract & preprocess features
    try:
        debugPrint("feature extraction and preprocessing...")
        if job_id != None:
            connection.close()
            jobstatus = JobStatusSA.objects.get(id=job_id)

        gen = parseLabelFile(PATH_GROUNDTRUTH_TRAINING)
        categories_list = gen['categories']
        groundtruth_list = gen['groundtruth_list']
        all_keyinfo = getKeyInfoForSA(
            PATH_TRAINING_DATA, categories_list, groundtruth_list,
            features_used)  # A test for unicode errors

        for category, keyinfo in all_keyinfo.iteritems():
            debugPrint("training category: %s" % category)
            if job_id != None:
                updateJobStatus(jobstatus, "Training Category: %s" % category)
            pos_tweets = keyinfo[CLASS_SVM_POSITIVE][PROCESSED_TWEETS]
            neg_tweets = keyinfo[CLASS_SVM_NEGATIVE][PROCESSED_TWEETS]
            neu_tweets = keyinfo[CLASS_SVM_NEUTRAL][PROCESSED_TWEETS]

            #         size = min(len(pos_tweets), len(neg_tweets), len(neu_tweets))
            #         max_size = 2*size
            #         if max_size < 100:
            #             max_size = 100
            #         print size, max_size

            # feature selection
            debugPrint(">> feature selection")

            # create feature matrix for each tweet
            debugPrint(">> get feature matrix")
            training_tweets = {
                CLASS_SVM_POSITIVE: pos_tweets,
                CLASS_SVM_NEGATIVE: neg_tweets,
                CLASS_SVM_NEUTRAL: neu_tweets
            }

            selected_feat_tweets = selectFeaturesForTraining(
                keyinfo, features_used)
            selected_feat = selected_feat_tweets[0]
            #             training_tweets = selected_feat_tweets[1]

            #         selected_feat = selectFeaturesForSA(keyinfo, training_tweets, features_used)
            writeDebugListToFile("%s_sa_selected_feat.txt" % category,
                                 selected_feat)
            writeDebugListToFile("%s_sa_pos_tweets.txt" % category, pos_tweets)
            writeDebugListToFile("%s_sa_neg_tweets.txt" % category, neg_tweets)
            writeDebugListToFile("%s_sa_neu_tweets.txt" % category, neu_tweets)

            feature_matrix = getFeatureMatrixForSA(category, training_tweets,
                                                   selected_feat,
                                                   features_used)

            debugPrint("feature count: %s" %
                       len(feature_matrix.feature_to_id_map))
            writeDebugCountDictToFile("%s_sa_feature_to_id_map.txt" % category,
                                      feature_matrix.feature_to_id_map)
            writeDebugListToFile(
                "%s_sa_tweet_feature_matrix_list.txt" % category,
                feature_matrix.tweet_feature_matrix_list)

            # create svm matrix
            debugPrint(">> create svm matrix")
            if job_id != None:
                updateJobStatus(
                    jobstatus,
                    "Creating SVM Matrix for category %s" % (category))
            svm_matrix = getSVMMatrixForSA(feature_matrix, features_used)
            createSVMForSA(category, feature_matrix, svm_matrix, features_used)
            writeDebugListToFile("%s_sa_svm_matrix_X.txt" % category,
                                 svm_matrix[SVM_X])
            writeDebugListToFile("%s_sa_svm_matrix_Y.txt" % category,
                                 svm_matrix[SVM_Y])

            debugPrint("training completed for category: %s" % category)
            if job_id != None:
                updateJobStatus(
                    jobstatus,
                    "Training completed for category: %s" % (category))

            # TODO: remove!
    #         break

        if job_id != None:
            updateJobStatus(jobstatus, "Completed!")
    except:
        traceback.print_exc(file=open("%s/svmstates/errlog.txt" %
                                      (BASE_DIR), "a"))
示例#5
0
def selectFeaturesForTraining(keyinfo, feat_used):
    all_features = keyinfo[UNIQUE_FEATURES]
    sample_size_set = {
        CLASS_SVM_POSITIVE: len(keyinfo[CLASS_SVM_POSITIVE][PROCESSED_TWEETS]),
        CLASS_SVM_NEUTRAL: len(keyinfo[CLASS_SVM_NEUTRAL][PROCESSED_TWEETS]),
        CLASS_SVM_NEGATIVE: len(keyinfo[CLASS_SVM_NEGATIVE][PROCESSED_TWEETS])
    }
    tweet_set = {
        CLASS_SVM_POSITIVE: keyinfo[CLASS_SVM_POSITIVE][PROCESSED_TWEETS],
        CLASS_SVM_NEUTRAL: keyinfo[CLASS_SVM_NEUTRAL][PROCESSED_TWEETS],
        CLASS_SVM_NEGATIVE: keyinfo[CLASS_SVM_NEGATIVE][PROCESSED_TWEETS]
    }

    #     print all_features
    selected_feat = []
    training_tweets = {
        CLASS_SVM_POSITIVE: [],
        CLASS_SVM_NEUTRAL: [],
        CLASS_SVM_NEGATIVE: []
    }
    size = min(len(tweet_set[CLASS_SVM_POSITIVE]),
               len(tweet_set[CLASS_SVM_NEGATIVE]),
               len(tweet_set[CLASS_SVM_NEUTRAL]))
    max_size = 2 * size
    if max_size < 100:
        max_size = 100
#     print max_size
    for feat_type in feat_used:
        # TODO: whitelist
        if feat_type == FEATURE_SA_REPLIES or feat_type == FEATURE_SA_TEMPORAL:
            continue
        feature_set = {
            POLARITY_POSITIVE:
            keyinfo[CLASS_SVM_POSITIVE][FEATURES][feat_type],
            POLARITY_NEUTRAL: keyinfo[CLASS_SVM_NEUTRAL][FEATURES][feat_type],
            POLARITY_NEGATIVE: keyinfo[CLASS_SVM_NEGATIVE][FEATURES][feat_type]
        }

        if feat_type in all_features:
            #             if feat_type == FEATURE_TEXT:
            #                 print FEATURE_TEXT
            #                 sel_feat_info = selectFeature(all_features[feat_type], feature_set, sample_size_set, 500)
            #                 sel_feat = sel_feat_info[0]
            #                 sel_tweet = selectTweetIfFeatureExistsSA(tweet_set, max_size, sel_feat_info[1], feat_type)
            #                 for class_svm in training_tweets:
            #                     training_tweets[class_svm] += sel_tweet[class_svm]
            #                     debugPrint("%s class svm tweet count: %s" % (class_svm, len(sel_tweet[class_svm])))
            #
            #             elif feat_type == FEATURE_HASHTAG:
            #                 print FEATURE_HASHTAG
            #                 sel_feat_info = selectFeature(all_features[feat_type], feature_set, sample_size_set, 200)
            #                 sel_feat = sel_feat_info[0]
            #                 sel_tweet = selectTweetIfFeatureExistsSA(tweet_set, max_size, sel_feat_info[1], feat_type)
            #                 for class_svm in training_tweets:
            #                     training_tweets[class_svm] += sel_tweet[class_svm]
            #                     debugPrint("%s class svm tweet count: %s" % (class_svm, len(sel_tweet[class_svm])))

            #             elif feat == FEATURE_SA_EMOTICONS:
            #                 sel_feat = selectFeature(all_features[feat], feature_set, sample_size_set, 10)
            #             else:
            sel_feat = all_features[feat_type]
            selected_feat += sel_feat
            debugPrint("%s sa feature count: %s" % (feat_type, len(sel_feat)))


#     for class_svm in training_tweets:
#         class_tweets = training_tweets[class_svm]
#         training_tweets[class_svm] = {t[TWEET_FULL]:t for t in class_tweets}.values()
#         debugPrint("%s class svm tweet count: %s" % (class_svm, len(training_tweets[class_svm])))

    return selected_feat, training_tweets
示例#6
0
def getKeyInfoForSA(data_filename, categories_list, groundtruth_list,
                    features_used):
    '''
    Params:
        data_filename: input data file in json format
        categories_list: [cat1, cat2, cat3]
        groundtruth_list: [{CATEGORY: category, POLARITY: polarity, TWEET_ID: tweetid}]
        features: features list, defaults to FEATURES_SA_DEFAULT
        
    Returns:
    {
        'category' : {
            CLASS_SVM_POSITIVE:{
                PROCESSED_TWEETS : [{
                    TWEET_FULL: This was a triumph, 
                    TWEET_FEATURES: {
                        FEATURE_TEXT: {FEATURE_COUNT: number, FEATURE_VALUE: {feature: count}} , 
                    }
                    
                }],
                FEATURES: {
                    FEATURE_TEXT: { feature: df } ...
                }
            },
            
            CLASS_SVM_NEGATIVE:{
                PROCESSED_TWEETS : [{
                    TWEET_FULL: This was a triumph, 
                    TWEET_FEATURES: {
                        FEATURE_TEXT: {FEATURE_COUNT: number, FEATURE_VALUE: {feature: count}} , 
                    }
                    
                }],
                FEATURES: {
                    FEATURE_TEXT: { feature: df } ...
                }
            },
            
            CLASS_SVM_NEUTRAL:{
                PROCESSED_TWEETS : [{
                    TWEET_FULL: This was a triumph, 
                    TWEET_FEATURES: {
                        FEATURE_TEXT: {FEATURE_COUNT: number, FEATURE_VALUE: {feature: count}} , 
                    }
                    
                }],
                FEATURES: {
                    FEATURE_TEXT: { feature: df } ...
                }
            },
            
            UNIQUE_FEATURES: {
                FEATURE_TEXT: { feature: df } , ... 
            }
        }
    }
    '''
    returnmap = {}

    # Initialize category dictionaries
    categories_list.append(NO_CATEGORY)
    for category in categories_list:
        returnmap[category] = {}
        returnmap[category][CLASS_SVM_POSITIVE] = {}
        returnmap[category][CLASS_SVM_POSITIVE][PROCESSED_TWEETS] = []

        returnmap[category][CLASS_SVM_NEGATIVE] = {}
        returnmap[category][CLASS_SVM_NEGATIVE][PROCESSED_TWEETS] = []

        returnmap[category][CLASS_SVM_NEUTRAL] = {}
        returnmap[category][CLASS_SVM_NEUTRAL][PROCESSED_TWEETS] = []

    with codecs.open(data_filename, encoding='cp1252') as k:
        debugPrint(">> extracting features from tweet")
        for idx, line in enumerate(k):
            # extract all features from tweet
            json_data = json.loads(line, encoding='cp1252')
            tweet_keyinfo = extractSentiFeaturesFromTweet(
                json_data, features_used)
            #             print tweet_keyinfo

            # Classify into sentiment positive/negative/neutral
            gt_item = groundtruth_list[idx]
            if gt_item[POLARITY] == POLARITY_POSITIVE:
                returnmap[NO_CATEGORY][CLASS_SVM_POSITIVE][
                    PROCESSED_TWEETS].append(tweet_keyinfo)
                returnmap[gt_item[CATEGORY]][CLASS_SVM_POSITIVE][
                    PROCESSED_TWEETS].append(tweet_keyinfo)

            elif gt_item[POLARITY] == POLARITY_NEGATIVE:
                returnmap[NO_CATEGORY][CLASS_SVM_NEGATIVE][
                    PROCESSED_TWEETS].append(tweet_keyinfo)
                returnmap[gt_item[CATEGORY]][CLASS_SVM_NEGATIVE][
                    PROCESSED_TWEETS].append(tweet_keyinfo)

            elif gt_item[POLARITY] == POLARITY_NEUTRAL:
                returnmap[NO_CATEGORY][CLASS_SVM_NEUTRAL][
                    PROCESSED_TWEETS].append(tweet_keyinfo)
                returnmap[gt_item[CATEGORY]][CLASS_SVM_NEUTRAL][
                    PROCESSED_TWEETS].append(tweet_keyinfo)

        # collate unique features
        debugPrint(">> collating unique features...")
        for category in categories_list:
            debugPrint(">> collating for %s" % category)
            #             unique_features_dict = initializeFeatureDict(features)
            unique_features_dict = {}
            for feature in features_used:
                # TODO: whitelist
                if feature == FEATURE_SA_REPLIES or feature == FEATURE_SA_TEMPORAL:
                    continue
                unique_features_dict[feature] = {}

            pos_unique_features = getUniqueFeaturesForClass(
                returnmap[category][CLASS_SVM_POSITIVE][PROCESSED_TWEETS],
                unique_features_dict, features_used)
            returnmap[category][CLASS_SVM_POSITIVE][
                FEATURES] = pos_unique_features

            neg_unique_features = getUniqueFeaturesForClass(
                returnmap[category][CLASS_SVM_NEGATIVE][PROCESSED_TWEETS],
                unique_features_dict, features_used)
            returnmap[category][CLASS_SVM_NEGATIVE][
                FEATURES] = neg_unique_features

            neut_unique_features = getUniqueFeaturesForClass(
                returnmap[category][CLASS_SVM_NEUTRAL][PROCESSED_TWEETS],
                unique_features_dict, features_used)
            returnmap[category][CLASS_SVM_NEUTRAL][
                FEATURES] = neut_unique_features

            # resolve global unique features
            returnmap[category][UNIQUE_FEATURES] = unique_features_dict

    return returnmap
示例#7
0
def performTraining(data_filename,
                    label_filename,
                    features_used=FEATURES_DEFAULT,
                    job_id=None):
    try:
        # extract & preprocess features
        debugPrint("feature extraction and preprocessing...")
        if job_id != None:
            connection.close()
            jobstatus = JobStatus.objects.get(id=job_id)
            updateJobStatus(jobstatus, "Acquiring Key info")

        gen = parseLabelFile(label_filename)
        categories_list = gen['categories']
        groundtruth_list = gen['groundtruth_list']
        all_keyinfo = getKeyInfoForClassifier(data_filename, categories_list,
                                              groundtruth_list, features_used)

        for category, keyinfo in all_keyinfo.iteritems():
            debugPrint("training category: %s" % category)
            if job_id != None:
                updateJobStatus(jobstatus,
                                "Training category: %s" % (category))
            pos_tweets = keyinfo[POSITIVE][PROCESSED_TWEETS]
            pos_sample_size = len(pos_tweets)
            neg_tweets = keyinfo[NEGATIVE][PROCESSED_TWEETS]
            neg_sample_size = len(neg_tweets)

            # feature selection
            debugPrint(">> feature selection")
            if job_id != None:
                updateJobStatus(
                    jobstatus,
                    "Feature selection on category: %s" % (category))
            select_results = selectTrainingFeaturesAndNegSamples(
                keyinfo, features_used, pos_sample_size, category)
            selected_feat = select_results[0]
            selected_neg_tweets = select_results[1]
            #
            writeDebugListToFile("%s_selected_feat.txt" % category,
                                 selected_feat)
            writeDebugListToFile("%s_selected_neg_tweets.txt" % category,
                                 selected_neg_tweets)
            writeDebugListToFile("%s_pos_tweets.txt" % category, pos_tweets)

            # create feature matrix for each tweet
            debugPrint(">> get feature matrix")
            training_tweets = {
                POSITIVE: pos_tweets,
                NEGATIVE: selected_neg_tweets
            }
            feature_matrix = getFeatureMatrix(category, training_tweets,
                                              selected_feat, features_used)
            writeDebugCountDictToFile("%s_feature_to_id_map.txt" % category,
                                      feature_matrix.feature_to_id_map)
            writeDebugListToFile("%s_tweet_feature_ids_list.txt" % category,
                                 feature_matrix.tweet_feature_ids_list)

            debugPrint('feature count: %s' %
                       len(feature_matrix.feature_to_id_map))
            debugPrint("positive tweets count: %s" % pos_sample_size)
            debugPrint("negative tweets count: %s" % len(selected_neg_tweets))

            # create svm matrix
            debugPrint(">> create svm matrix")

            if job_id != None:
                updateJobStatus(
                    jobstatus,
                    "Creating SVM Matrix for category %s" % (category))
            svm_matrix = getSVMMatrixForClassification(feature_matrix)
            writeDebugListToFile("%s_svm_matrix_X.txt" % category,
                                 svm_matrix[SVM_X])
            writeDebugListToFile("%s_svm_matrix_Y.txt" % category,
                                 svm_matrix[SVM_Y])
            createSVM(category, feature_matrix, svm_matrix)

            debugPrint("training completed for category: %s" % category)
            if job_id != None:
                updateJobStatus(
                    jobstatus,
                    "Training completed for category: %s" % (category))

        if job_id != None:
            updateJobStatus(jobstatus, "Completed!")

    except:
        traceback.print_exc(file=open("%s/svmstates/errlog.txt" %
                                      (BASE_DIR), "a"))