Exemplo n.º 1
0
        }, {
            DataSource.NAME: 'Twitter',
            DataSource.PATH: '../../data/SemEval-2014.csv'
        }]

        srcs = [{
            DataSource.NAME: src[DataSource.NAME],
            DataSource.PATH: os.path.abspath(src[DataSource.PATH]),
            DataSource.TYPE: os.path.splitext(src[DataSource.PATH])[1]
        } for src in srcs]

        for src in srcs:
            print("Name: '%s'\nType: '%s'" %
                  (src[DataSource.NAME], src[DataSource.TYPE]))
            dSrc = DataSource(src)
            srcStats = dSrc.getStats()
            print("\nUsable data:")
            pprint.pprint(srcStats[DataSource.REGULAR])
            print("\nUnusable data:")
            pprint.pprint(srcStats[DataSource.BROKEN])
            print("-----------------")

        print("Finished!")

        for src in srcs:
            srcName = src[DataSource.NAME]
            dSrc = DataSource(src)
            srcStats = dSrc.getStats()
            kws, ocs = [], []
            for word in srcStats[DataSource.REGULAR].items():
                ocs.append(word[1][DataStats.AMOUNT])
Exemplo n.º 2
0
             DataSource.PATH:'../scraping/data/sentences_data.txt'
         },
         {
             DataSource.NAME:'Twitter',
             DataSource.PATH:'../../data/SemEval-2014.csv'
         }
 ]
 
 srcs = [{DataSource.NAME:src[DataSource.NAME],
         DataSource.PATH:os.path.abspath(src[DataSource.PATH]),
         DataSource.TYPE:os.path.splitext(src[DataSource.PATH])[1]} for src in srcs]
 
 for src in srcs:
     print("Name: '%s'\nType: '%s'" % (src[DataSource.NAME], src[DataSource.TYPE]))
     dSrc = DataSource(src)
     srcStats = dSrc.getStats()
     print("\nUsable data:")
     pprint.pprint(srcStats[DataSource.REGULAR])
     print("\nUnusable data:")
     pprint.pprint(srcStats[DataSource.BROKEN])
     print("-----------------")
 
 print("Finished!")
 
 for src in srcs:
     srcName = src[DataSource.NAME]
     dSrc = DataSource(src)
     srcStats = dSrc.getStats()
     kws,ocs = [],[]
     for word in srcStats[DataSource.REGULAR].items():
         ocs.append(word[1][DataStats.AMOUNT])