Python main примеры, mrs.main Python примеры использования

Пример #1

0

Показать файл

Файл: mapreduce.py Проект: sidds1601/cgl-mooc-builder-development

    def main(self):
        if not os.path.exists(self.args.file):
            sys.exit('Input file %s not found' % self.args.file)
        if not os.path.exists(self.args.output):
            sys.exit('Output directory %s not found' % self.args.output)

        mrs.main(self.MAPREDUCE_CLASS, args=self._parsed_etl_args.job_args)

Пример #2

0

Показать файл

Файл: mapreduce.py Проект: ME430/CourseBuilder

    def main(self):
        if not os.path.exists(self.args.file):
            sys.exit('Input file %s not found' % self.args.file)
        if not os.path.exists(self.args.output):
            sys.exit('Output directory %s not found' % self.args.output)

        mrs.main(self.MAPREDUCE_CLASS, args=self._parsed_etl_args.job_args)

Пример #3

0

Показать файл

Файл: countofcounts.py Проект: WillChilds-Klein/mistress-mapreduce

        yield (ds,self.callback)

        itr = itr + 1
        while True:
            # iteratively map reduce (count counts)
            ds = job.map_data(ds,mapper=self.map_counts) # key=int, val=int
            ds = job.reduce_data(ds,self.reduce,outdir="%s/counts_of_counts%d"%(self.args[-1],itr),format=mrs.fileformats.TextWriter)
            itr = itr + 1
            yield (ds, self.callback)

    # count the counts
    def map_counts(self, key, count):
        yield (count, 1)

    # count the words
    def map_words(self, line_num, line_text):
        for word in line_text.split():
            word = word.strip(string.punctuation).lower()
            if word:
                yield (word, 1)

    # aggregate the counts
    def reduce(self, key, counts):
        yield sum(counts)


if __name__ == '__main__':
    mrs.main(IterativeWordCount)

# vim: et sw=4 sts=4

Пример #4

0

Показать файл

class invertedIndexing(mrs.MapReduce):
    
    # Mapper takes in a key, which is the indexNumber and a value, which is a line 
    def map(self, indexNumber, line):
        # Reads a line and creates list of the words in the line
        line = line.split() 
        for word in line:
            # Converts all letters to lowercase
            word = word.lower()
            # Converts every instance of punctuation to a space
            word = word.translate(str.maketrans(string.punctuation, ' '*len(string.punctuation)))
            #Only maps word once it has been validated that it only contains the alphabet, it's larger than 3 letters and is not a stopword
            if word.isalpha() and len(word) > 3 and word not in stopWords: 
               yield(word, indexNumber + 1)  # Returns lowercase word, stripped of all punstuation and the index number

    # Reducer takes in the key, which is a word, and a value, which is the index number and returns the list of indexNumbers
    def reduce(self, word, indexNumber):
        indexNumbers = []

        for currentIndex in indexNumber:
            # If statements to ensure only the first 50 lines are recorded and that if a word appears twice on one line, its only recorded once
            if len(indexNumbers) >= 50:
                break
            if currentIndex not in indexNumbers:
                indexNumbers.append(currentIndex)       
        yield(indexNumbers)

if __name__ == '__main__':
	mrs.main(invertedIndexing)

Пример #5

0

Показать файл

Файл: wordcount.py Проект: WillChilds-Klein/mistress-mapreduce

#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import mrs
import string

class WordCount(mrs.MapReduce):
    """Count the number of occurrences of each word in a set of documents.

    Word Count is the classic "hello world" MapReduce program. This is a
    working example and is provided to demonstrate a simple Mrs program. It is
    further explained in the tutorials provided in the docs directory.
    """
    def map(self, line_num, line_text):
        for word in line_text.split():
            word = word.strip(string.punctuation).lower()
            if word:
                yield (word, 1)

    def reduce(self, word, counts):
        yield sum(counts)

if __name__ == '__main__':
    mrs.main(WordCount)

# vim: et sw=4 sts=4

Пример #6

0

Показать файл

Файл: walk_analyzer.py Проект: WillChilds-Klein/mistress-mapreduce

        self.items.append(node)

    def add_edge(self, edge):
        self.items.append(edge)

    def get_path_string(self, remove_cycles=True, lexicalize=False,
            max_length=5):
        to_output = []
        i = len(self.items) - 1
        while i >= 0:
            item = self.items[i]
            if lexicalize or not isinstance(item, int):
                to_output.append(item)
            if isinstance(item, int):
                j = i - 1
                while j >= 0:
                    if self.items[j] == self.items[i]:
                        i = j
                    j -= 1
            i -= 1
        if len(to_output) > max_length or not to_output:
            return None
        to_output.reverse()
        return '-'.join(to_output)


if __name__ == '__main__':
    mrs.main(RandomWalkAnalyzer)

# vim: et sw=4 sts=4

Пример #7

0

Показать файл

Файл: jobfarm.py Проект: WillChilds-Klein/mistress-mapreduce

    def reduce(self, key, jobs):
        """ run the job """
        for job in jobs:
            # execute the job string
            print "\nRunning cmd %d\n\t%s" % (key,job)
            os.system(job)

        print "============= Done! ==================="
        yield ["Done"]


    '''
    Define a set of command line parameters whose values will be 
    passed to your program in the opts parameter of the __init__ method.
    '''
    @classmethod
    def update_parser(cls, parser):
        # TODO: add your option(s) here
        parser.add_option('--myopt',
                type='int',
                dest='myopt',
                default=1,
                help='Myopt determines blah blah blah...',
                )
        return parser

if __name__ == '__main__':
    mrs.main(JobLauncher)

# vim: et sw=4 sts=4

Пример #8

0

Показать файл

Файл: MatrixMultiplication.py Проект: hussein6065/PDC_final_project

            for k, v in zip(tempK, i[1]):
                # print(k)
                matrixC[k[0]][k[1]] = v
        print(len(matrixA))
        # for i in matrixC:
        #     print(i)

        sys.stdout.flush()

        return 0

    @classmethod
    def update_parser(cls, parser):
        parser.add_option('-P',
                          '--num_processes',
                          dest='num_processes',
                          type='int',
                          help='Number of points for each map task',
                          default=2)

        parser.add_option('-N',
                          '--row_size',
                          dest='row_size',
                          type='int',
                          help='Number of map tasks to use',
                          default=16)
        return parser

if __name__ == '__main__':
    mrs.main(MatrixMultiplication)

Пример #9

0

Показать файл

Файл: TP_Delvaux_Sartori.py Проект: fLaVz/BIpython

        print(self.args[0])
        direc = self.args[0]
        fileList = glob.glob(direc + '/*.txt')
        print(fileList)
        print("----------------------------------")
        return job.file_data(fileList)

    def map(self, key, value):
        # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        # print(value)
        splited = re.split('[\W0-9]', value, flags=re.UNICODE)
        for i in splited:
            if (i != ''):
                i = i.lower()
                # print(i)
                yield i, 1

    def reduce(self, key, values):
        length = len(tuple(values))
        # print length
        yield length


if __name__ == '__main__':
    mrs.main(WordCount)

# j = mrs.job.Job
# wc = WordCount
# wc.input_data(j)
# wc.map("AAA 2.1 2 , bc2 3z bbb",1)

Пример #10

0

Показать файл

    """

    @mrs.output_serializers(key=mrs.MapReduce.str_serializer,
                            value=mrs.MapReduce.int_serializer)
    def map(self, key, value):
        for word in value.split():
            word = word.strip(string.punctuation).lower()
            if word:
                yield (word, 1)

    def reduce(self, key, values):
        yield sum(values)

    combine = reduce

    def input_data(self, job):
        if len(self.args) < 2:
            print("Requires input(s) and an output.", file=sys.stderr)
            return None
        inputs = []
        for filename in self.args[:-1]:
            with open(filename) as f:
                for line in f:
                    inputs.append(line.strip())
        return job.file_data(inputs)

if __name__ == '__main__':
    mrs.main(WordCount2)

# vim: et sw=4 sts=4

Пример #11

0

Показать файл

Файл: c_pi.py Проект: WillChilds-Klein/mistress-mapreduce

        job.wait(output)
        output.fetchall()
        for key, value in output.data():
            if key == True:
                inside = value
            else:
                outside = value

        pi = 4 * inside / (inside + outside)
        print(pi)
        sys.stdout.flush()

        return 0

    @classmethod
    def update_parser(cls, parser):
        parser.add_option('-p', '--points',
                        dest='num_points',
                        help='Number of points for each map task',
                        default=1000)

        parser.add_option('-t', '--tasks',
                        dest='num_tasks', type='int',
                        help='Number of map tasks to use',
                        default=40)

        return parser

if __name__ == '__main__':
    mrs.main(SamplePi)

Пример #12

0

Показать файл

Файл: specex.py Проект: matt-gardner/optprime

            dest='pruner', action='extend', search=['specmethod'],
            help='Pruning method for generating speculative children',
            default='OneCompleteIteration',
            )
    parser.add_option('','--total-tokens',
            dest='tokens', type='int',
            help='Number of tokens to use (only for the TokenPruner).  This is'
            ' the difference between the number of desired particles and the'
            ' number of available processors.',
            default=0,
            )
    parser.add_option('','--min-tokens',
            dest='min_tokens', type='int',
            help='The minimum number of tokens that each particle can have. '
            'This cannot be greater than the total number of tokens available.',
            default=0,
            )

    # There are some sticky issues involved with doing this speculatively
    # that I haven't worried about.  If we ever feel like we should do this,
    # we need make some changes to the code.  Until then, disabling it is
    # better than leaving it in and having it not work.
    parser.remove_option('--transitive-best')

    return parser

if __name__ == '__main__':
    mrs.main(SpecExPSO, update_parser=update_parser)

# vim: et sw=4 sts=4

Пример #13

0

Показать файл

Файл: mrs-topic.py Проект: jefflund/ankura

        Q, anchors = self.get_qank()
        V, K = Q.shape[0], len(anchors)

        P_w = np.diag(Q.sum(axis=1))
        for word in range(V):
            if np.isnan(P_w[word, word]):
                P_w[word, word] = 1e-16

        C = np.zeros((V, K))
        for part in values:
            C += np.loads(part)

        A = np.dot(P_w, C)
        for k in range(K):
            A[:, k] = A[:, k] / A[:, k].sum()
        yield C.dumps()

    @classmethod
    def update_parser(cls, parser):
        parser.add_option('-t',
                          '--tasks',
                          dest='num_tasks',
                          type=int,
                          help='Number of map tasks to use',
                          default=20)
        return parser


if __name__ == '__main__':
    mrs.main(TopicRecover)

Пример #14

0

Показать файл

Файл: dependency_parse.py Проект: WillChilds-Klein/mistress-mapreduce

    def reduce(self, key, values):
        # All we do is aggregate all of the information we've seen
        topic_info = TopicInfo()
        for value in values:
            topic_info.aggregate(value)
        # Then output it in as a pickle, for easy analysis later.
        yield topic_info

    @classmethod
    def update_parser(cls, parser):
        parser.add_option('-d', '--dataset',
                dest='dataset',
                help='Database name of the dataset to use',
                )
        parser.add_option('-a', '--analysis',
                dest='analysis',
                help='Database name of the analysis to use',
                )
        parser.add_option('-o', '--outdir',
                dest='outdir',
                help='Directory to store the output',
                )
        return parser


if __name__ == '__main__':
    mrs.main(DependencyParse)

# vim: et sw=4 sts=4

Пример #15

0

Показать файл

Файл: mrs-q.py Проект: jefflund/ankura

                        continue
                    Q[w_i.token, w_j.token] += norm

        yield '', pickle.dumps(scipy.sparse.coo_matrix(Q))

    @mrs.output_serializers(key=mrs.str_serializer, value=mrs.raw_serializer)
    def reduce(self, key, values):
        corpus = self.get_corpus()
        V = len(corpus.vocabulary)
        Q = np.zeros((V, V))

        for Q_part in values:
            Q += pickle.loads(Q_part)
        Q /= Q.sum()

        yield Q.dumps()

    @classmethod
    def update_parser(cls, parser):
        parser.add_option('-t',
                          '--tasks',
                          dest='num_tasks',
                          type=int,
                          help='Number of map tasks to use',
                          default=20)
        return parser


if __name__ == '__main__':
    mrs.main(ConstructQ)

Пример #16

0

Показать файл

                word = word.split()

                for w in word:
                    w = w.strip(string.punctuation).lower()
                    if w and w not in stopWords and not w[0].isdigit():
                        yield (w, line_num + 1)  #Start counting at 1

            else:
                word = word.strip(string.punctuation).lower()
                if word and word not in stopWords and not word[0].isdigit():
                    yield (word, line_num + 1)  #Start counting at 1

    def reduce(self, word, line_num):

        lineNumbers = []
        for i in line_num:

            #At most 50 lines
            if len(lineNumbers) >= 50:
                break
            #If a word repeats on a line, write the line only once
            if i not in lineNumbers:
                lineNumbers.append(i)

        yield lineNumbers


if __name__ == '__main__':
    mrs.main(WordIndex)

Пример #17

0

Показать файл

			if int(j) < 10:
				j = '0' + j
			if int(i) < 10:
				i = '0' + i
			if line:
				if MatrixMultiply.CurrentFile == '1':
					for k in range(MatrixMultiply.Col):
						if k < 10:
							yield ('0'+str(k)+' '+j, i+' '+val)
						else:
							yield (str(k)+' '+j, i+' '+val)
				if MatrixMultiply.CurrentFile == '2':
					for k in range(MatrixMultiply.Row):
						if k < 10:
							yield (i+' '+'0'+str(k), j+' '+val)
						else:
							yield (i+' '+str(k), j+' '+val)

	def reduce(self, key, values):
		val = []
		for k in range(MatrixMultiply.Col):
			indexTemp, valTemp = values.next().split(' ')
			val.append(int(valTemp))
		for k in range(MatrixMultiply.Col):
			indexTemp, valTemp = values.next().split(' ')
			val[k] *= int(valTemp)
		yield sum(val)

if __name__ == '__main__':
    mrs.main(MatrixMultiply)

Пример #18

0

Показать файл

Файл: pi.py Проект: byu-aml-lab/optprime

            default=0.4,
            )
    parser.add_option('-f', '--func', metavar='FUNCTION',
            dest='func', action='extend', search=['amlpso.functions'],
            help='Function to optimize',
            default='sphere.Sphere',
            )
    parser.add_option('-t', '--top', metavar='TOPOLOGY',
            dest='top', action='extend', search=['amlpso.topology'],
            help='Initialization parameters',
            default='Isolated',
            )
    parser.add_option('-o', '--out', metavar='OUTPUTTER',
            dest='out', action='extend', search=['amlpso.output'],
            help='Style of output',
            default='Basic',
            )
    parser.add_option('--hey-im-testing',
            dest='hey_im_testing', action='store_true',
            help='Ignore errors from uncommitted changes (for testing only!)',
            default=False,
            )

    return parser


if __name__ == '__main__':
    mrs.main(PI, update_parser)

# vim: et sw=4 sts=4

Пример #19

0

Показать файл

Файл: dependency_parse.py Проект: SheLovesCode/MapReduce-Examples

        # Then output it in as a pickle, for easy analysis later.
        yield topic_info

    @classmethod
    def update_parser(cls, parser):
        parser.add_option(
            '-d',
            '--dataset',
            dest='dataset',
            help='Database name of the dataset to use',
        )
        parser.add_option(
            '-a',
            '--analysis',
            dest='analysis',
            help='Database name of the analysis to use',
        )
        parser.add_option(
            '-o',
            '--outdir',
            dest='outdir',
            help='Directory to store the output',
        )
        return parser


if __name__ == '__main__':
    mrs.main(DependencyParse)

# vim: et sw=4 sts=4

Пример #20

0

Показать файл

                inside = value
            else:
                outside = value

        pi = 4 * inside / (inside + outside)
        print(pi)
        sys.stdout.flush()

        return 0

    @classmethod
    def update_parser(cls, parser):
        parser.add_option('-p',
                          '--points',
                          dest='num_points',
                          help='Number of points for each map task',
                          default=1000)

        parser.add_option('-t',
                          '--tasks',
                          dest='num_tasks',
                          type='int',
                          help='Number of map tasks to use',
                          default=40)

        return parser


if __name__ == '__main__':
    mrs.main(SamplePi)

Пример #21

0

Показать файл

        for job in jobs:
            # execute the job string
            print "\nRunning cmd %d\n\t%s" % (key, job)
            os.system(job)

        print "============= Done! ==================="
        yield ["Done"]

    '''
    Define a set of command line parameters whose values will be 
    passed to your program in the opts parameter of the __init__ method.
    '''

    @classmethod
    def update_parser(cls, parser):
        # TODO: add your option(s) here
        parser.add_option(
            '--myopt',
            type='int',
            dest='myopt',
            default=1,
            help='Myopt determines blah blah blah...',
        )
        return parser


if __name__ == '__main__':
    mrs.main(JobLauncher)

# vim: et sw=4 sts=4

Пример #22

0

Показать файл

Файл: conditional_prob.py Проект: SheLovesCode/MapReduce-Examples

    def node_pair_map(self, key, value):
        """Emit an entry for each pair of nodes in the walks."""
        for i, start_node in enumerate(value):
            for end_node in value[i + 1:]:
                yield (start_node, end_node)

    def normalize_reduce(self, key, values):
        """Make a conditional probability distribution given the node `key`."""
        counts = defaultdict(int)
        for v in values:
            counts[v] += 1

        distribution = {}
        total = 0
        for node, count in counts.iteritems():
            if count >= MIN_COUNT:
                distribution[node] = count
                total += count

        for node in distribution:
            distribution[node] /= total

        if distribution:
            yield distribution


if __name__ == '__main__':
    mrs.main(RandomWalkAnalyzer)

# vim: et sw=4 sts=4

Пример #23

0

Показать файл

        "they'll", "they're", "they've", "this", "those", "through", "to",
        "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll",
        "we're", "we've", "were", "what", "what's", "when", "when's", "where",
        "where's", "which", "while", "who", "who's", "whom", "why", "why's",
        "with", "would", "you", "you'd", "you'll", "you're", "you've", "your",
        "yours", "yourself", "yourselves"
    ]


WORD_RE = re.compile(
    r"[\w']+")  #looks for words such as "word", word's "word's", word
STOP_WORDS = getStopWords()


class MRSInvertedIndex(mrs.MapReduce):
    """Count the number of occurrences of each word in a set of documents.
    """
    def map(self, line_num, line_text):
        for word in WORD_RE.findall(line_text):
            word = word.strip(string.punctuation).lower()
            if word.lower() not in STOP_WORDS:
                if not word.isdigit():
                    yield (word, line_num)

    def reduce(self, word, line_num_list):
        yield word, list(line_num_list)


if __name__ == '__main__':
    mrs.main(MRSInvertedIndex)

Python main примеры использования