Пример #1
0
def genConferenceIdFeature(instances, paperList, maxConferenceId):
    sys.stderr.write("genConferenceIdFeature\n")
    d = {}
    for line in paperList:
        paperId = int(line[0])
        conferenceId = int(line[3])
        d[paperId] = conferenceId
    feature = Feature(maxConferenceId)
    for instance in instances:
        authorId, paperId = instance[0], instance[1]
        conferenceId = d[paperId] + 1 # -1
        feature.addLine([[conferenceId, 1.0]])
    feature.fix()
    return feature
Пример #2
0
def genJournalIdFeature(instances, paperList, maxJournalId):
    sys.stderr.write("genJournalIdFeature\n")
    d = {}
    for line in paperList:
        paperId = int(line[0])
        journalId = int(line[4])
        d[paperId] = journalId
    feature = Feature(maxJournalId)
    for instance in instances:
        authorId, paperId = instance[0], instance[1]
        journalId = d[paperId] + 1 # -1
        feature.addLine([[journalId, 1.0]])
    feature.fix()
    return feature
Пример #3
0
def genCoauthorFeature(instances, pathFname, maxAuthorId):
    '''
    return Feature(sparse, [features])
    '''
    sys.stderr.write("genCoauthorFeature\n")
    paperAuthorDict = {}
    csvReader = csv.reader(file(pathFname))
    csvReader.next()
    counter = Counter("paperAuthorDict")
    for line in csvReader:
        counter.inc()
        authorId, paperId = int(line[0]), int(line[1])
        paperAuthorDict.setdefault(authorId, set())
        paperAuthorDict[authorId].add(paperId)
    feature = Feature(maxAuthorId)
    counter = Counter("instance", 1000)
    for line in instances:
        counter.inc()
        authorId, paperId = line[0], line[1]
        feature.addLine(map(lambda x: [int(x), 1.0], paperAuthorDict[paperId]))
    feature.fix()
    return feature