/
xpipe.py
executable file
·205 lines (154 loc) · 5.41 KB
/
xpipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/usr/bin/env python
import os
import fcntl
import sys
import re
from select import select
from subprocess import Popen, PIPE
import time
# XPipe prototype - might rewrite in C later to reduce memory overhead and dependencies
# Making this a binary will also allow us to use #! in pipe files, and use them as programs themselves, which would be neat
# TODO: special STDIN and STDOUT commands, so we can handle them
def debug_print(x):
pass
def make_nonblocking(fd):
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
# make stdin nonblocking
make_nonblocking(sys.stdin.fileno())
make_nonblocking(sys.stdout.fileno())
class GraphNodeStream:
def __init__(self, node, stream, name):
self.node = node
self.stream = stream
self.name = name
def fileno(self):
return self.stream.fileno()
def is_live(self):
return self.stream is not None and not self.stream.closed
def __repr__(self):
return self.node.__repr__() + "." + self.name
class GraphNode:
def __init__(self, name, command):
self.name = name
self.command = command
self.process = None
self.stdin = None
self.stdout = None
self.outputs = []
def execute(self):
if self.command is not None:
self.process = Popen(self.command, shell=True, stdin=PIPE, stdout=PIPE, stderr=None)
make_nonblocking(self.process.stdout.fileno())
make_nonblocking(self.process.stdin.fileno())
self.stdin = GraphNodeStream(self, self.process.stdin, 'stdin')
self.stdout = GraphNodeStream(self, self.process.stdout, 'stdout')
# Returns False iff this node will never produce more data
def is_live(self):
if self.process is not None:
self.process.poll()
return self.command is None or (self.process is not None and self.process.returncode is None)
def is_readable(self):
return self.stdout.is_live()
def is_writable(self):
return self.stdin.is_live()
def __repr__(self):
if self.command is None:
return "(" + self.name + ")"
return "(" + self.name + ": " + self.command + ")"
### Read input files
# map of command names to commands (later, becomes a map of command names to GraphNode instances)
cmds = {}
# list of (name, name) tuples
graph = []
f = open(sys.argv[1])
cmd_pattern = re.compile(r"CMD\s+(\w+)\s+(.*)\s*")
edge_pattern = re.compile(r"EDGE\s+(\w+)\s+(\w+)\s*")
comment_pattern = re.compile(r"\s*#.*")
lineno = 0
for line in f:
lineno += 1
# skip blank lines
if line.strip() == "":
continue
m = cmd_pattern.match(line)
if m:
name, command = m.groups()
cmds[name] = command
continue
m = edge_pattern.match(line)
if m:
src, sink = m.groups()
graph.append((src, sink))
continue
m = comment_pattern.match(line)
if m:
continue
# if we get here, no patterns matched
print "Error on line %d: Invalid line" % lineno
sys.exit(1)
### Construct graph structure
# first pass - construct GraphNode objects
for cmd in cmds:
cmds[cmd] = GraphNode(cmd, cmds[cmd])
# create special stdin and stdout commands
fake_stdout = GraphNode('STDOUT', None)
fake_stdout.stdin = GraphNodeStream(fake_stdout, sys.stdout, 'stdin')
fake_stdout.stdout = GraphNodeStream(fake_stdout, None, 'stdout')
cmds['STDOUT'] = fake_stdout
fake_stdin = GraphNode('STDIN', None)
fake_stdin.stdin = GraphNodeStream(fake_stdin, None, 'stdin')
fake_stdin.stdout = GraphNodeStream(fake_stdin, sys.stdin, 'stdout')
cmds['STDIN'] = fake_stdin
# second pass - fill in output lists with GraphNode objects
for edge in graph:
for cmd in edge[0], edge[1]:
if not cmd in cmds:
print "Invalid edge: Reference to nonexistent command " + cmd
sys.exit(1)
cmds[edge[0]].outputs.append(cmds[edge[1]])
#print cmds
### Process pipeline data in a loop
# execute all commands
for cmd in cmds:
cmds[cmd].execute()
def cleanup_process(cmd):
for out in cmds[cmd].outputs:
if out is not fake_stdout:
out.stdin.stream.close()
for x in cmds:
if cmds[cmd] in cmds[x].outputs:
cmds[x].outputs.remove(cmds[cmd])
del cmds[cmd]
while [ c for c in cmds if cmds[c].process is not None ]:
r = [ cmds[x].stdout for x in cmds if cmds[x].is_readable() ] # read from stdout streams
w = [ cmds[x].stdin for x in cmds if cmds[x].is_writable() ] # write to stdin streams
debug_print("AVAILABLE: " + str(r) + str(w) + str(x))
# split readable and writable, since we can't act unless we have both
# FIXME: this isn't quite right; if there's a readable process, and a
# writable process, that aren't connected, then this will spin
r, meh, x = select(r, [], [])
meh, w, x = select([], w, [])
debug_print("SELECTED: " + str(r) + str(w) + str(x))
for readable in r:
debug_print("trying " + str(readable))
can_read = True
for out in readable.node.outputs:
debug_print(" is " + str(out) + " writable?")
if out.stdin not in w:
debug_print(str(out) + " is not writable, skipping " + str(readable))
can_read = False
if can_read:
debug_print(" can read!")
data = readable.stream.read(65536)
# FIXME: possible race condition if the process writes more data and then dies right here
if data == "" and (readable.node is fake_stdin or not readable.node.is_live()):
debug_print(str(readable.node) + " is done, cleaning up")
cleanup_process(readable.node.name)
continue
debug_print(" read data from " + str(readable.node) + ": " + data)
for out in readable.node.outputs:
out.stdin.stream.write(data)
out.stdin.stream.flush()
debug_print(" wrote data to " + str(out))
#time.sleep(1)