Пример #1
0
def build_key2data(keywords, names):
    filters = [
        featureFilter.Filter(long_time=duration.LONG_TIME, keywords=keywords),
        featureFilter.Filter(long_time=digraph.LONG_TIME, keywords=keywords),
        featureFilter.Filter(long_time=trigraph.LONG_TIME, keywords=keywords),
        featureFilter.Filter(long_time=digraphRatio.LONG_TIME,
                             keywords=keywords),
    ]
    feature_types = featureGenerator.ALL_TYPES
    people = dict()
    for name in names:
        print("Building " + name)
        path = os.path.join(main_directory, name)
        events, _ = parseFile.get_events(path,
                                         filters[0],
                                         is_uniform=False,
                                         with_sample=False)
        people[name] = dict()
        for filter, feature_type in zip(filters, feature_types):
            keys2data = featureGenerator.create_data(events, filter,
                                                     feature_type)
            people[name].update(
                keys2data
            )  #{key:data for key,data in keys2data.items() if key in chosen_keys})
    return people
Пример #2
0
def generate_people(filters, feature_types, is_uniform=False):
    default_filter = filters[0] #should all be identical
    people = []
    for name in os.listdir(main_directory):
        print("Building " + name)
        path = os.path.join(main_directory, name)
        if os.path.isdir(path):
            events, _ = parseFile.get_events(path, default_filter, is_uniform, with_sample=False)
            people.append(Person(name, events, filters, feature_types, is_uniform))
    return people
Пример #3
0
def build_key2data(keywords, names):
    filters = [featureFilter.Filter(long_time=duration.LONG_TIME,keywords=keywords),
                featureFilter.Filter(long_time=digraph.LONG_TIME,keywords=keywords),
                featureFilter.Filter(long_time=trigraph.LONG_TIME,keywords=keywords),
                featureFilter.Filter(long_time=digraphRatio.LONG_TIME,keywords=keywords),
                ] 
    feature_types = featureGenerator.ALL_TYPES
    people = dict()
    for name in names:
        print("Building " + name)
        path = os.path.join(main_directory, name)
        events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False)
        people[name] = dict()
        for filter, feature_type in zip(filters, feature_types):
            keys2data = featureGenerator.create_data(events,filter,feature_type)
            people[name].update( keys2data )#{key:data for key,data in keys2data.items() if key in chosen_keys})
    return people
Пример #4
0
names = ['Gal Oz-Ari', 'Gil Boazi', 'Nir Yaron', 'Guy Levanon', 'Yonathan Schwammenthal', 'Matan Levine', 'Ohad Ben-Or', 'Dor Aharonson', 'Yuval Itkin', 'Yonatan Caspi', 'Noam Greenberg', 'Adi Asher', 'Yovel Rom']

#filter and get feature arrays for them
keywords = ('java', 'Java', 'Eclipse', 'IntelliJ', 'IDEA')
languages = [featureFilter.ENGLISH]
filters = [featureFilter.Filter(long_time=duration.LONG_TIME,keywords=keywords),
            featureFilter.Filter(long_time=digraph.LONG_TIME,keywords=keywords),
            featureFilter.Filter(long_time=trigraph.LONG_TIME,keywords=keywords),
            featureFilter.Filter(long_time=digraphRatio.LONG_TIME,keywords=keywords),
            ] 
feature_types = featureGenerator.ALL_TYPES
people = dict() #name:[time1, time2, time3...]
for name in names:
    print("Building " + name)
    path = os.path.join(main_directory, name)
    events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False)
    people[name] = dict()
    for filter, feature_type in zip(filters, feature_types):
        keys2data = featureGenerator.create_data(events,filter,feature_type)
        people[name].update( keys2data )#{key:data for key,data in keys2data.items() if key in chosen_keys})

loc = r"F:\Clouds\Dropbox\SMOP\AnalysisCompressed\eclipse_english_people2key2data.p"
pickle.dump( people, open( loc, "wb+" ) )

#find best features (most common of each type)
def best_keys2(people, feature_type):
    #for people from featureGenerator.create_data
    keys = dict()
    for p in people.values():
        for k,li in p.items():
            if k[-1] == feature_type:
Пример #5
0
names = names3

#filter and get feature arrays for them
keywords = tuple() #('java', 'Java', 'Eclipse', 'IntelliJ', 'IDEA')
languages = featureFilter.ALL_LANGUAGES #[featureFilter.ENGLISH]
filters = [featureFilter.Filter(long_time=duration.LONG_TIME,keywords=keywords,languages=languages),
            featureFilter.Filter(long_time=digraph.LONG_TIME,keywords=keywords,languages=languages),
            featureFilter.Filter(long_time=trigraph.LONG_TIME,keywords=keywords,languages=languages),
            featureFilter.Filter(long_time=digraphRatio.LONG_TIME,keywords=keywords,languages=languages),
            ] 
feature_types = featureGenerator.ALL_TYPES
people = dict() #name:[time1, time2, time3...]
for name in names:
    print("Building " + name)
    path = os.path.join(main_directory, name)
    events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False)
    people[name] = dict()
    for filter, feature_type in zip(filters, feature_types):
        keys2data = featureGenerator.create_data(events,filter,feature_type)
        people[name].update( keys2data )#{key:data for key,data in keys2data.items() if key in chosen_keys})

loc = "F:\Clouds\Dropbox\SMOP\AnalysisCompressed\general_people2key2data.p"  #r"F:\Clouds\Dropbox\SMOP\AnalysisCompressed\eclipse_english_people2key2data.p"
pickle.dump( people, open( loc, "wb+" ) )


#find best features (most common of each type)
def best_keys2(people, feature_type):
    #for people from featureGenerator.create_data
    keys = dict()
    for p in people.values():
        for k,li in p.items():