Context Navigation

source: fedd/federation/local_emulab_segment.py @ 595191c

axis_examplecompt_changesinfo-opsversion-2.00version-3.01version-3.02

Last change on this file since 595191c was 595191c, checked in by Ted Faber <faber@…>, 15 years ago
longer timeout
Property mode set to `100644`
File size: 7.7 KB

Line
1	#!/usr/local/bin/python
2
3	import sys, os
4	import re
5
6	import tempfile
7	import subprocess
8	import logging
9	import time
10	import signal
11
12	import util
13
14	class local_emulab_segment:
15	class cmd_timeout(RuntimeError): pass
16
17	def __init__(self, log=None, keyfile=None, debug=False):
18	self.log = log or logging.getLogger(\
19	'fedd.access.proxy_emulab_segment')
20	self.certfile = keyfile
21	self.debug = debug
22	self.cmd_timeout = local_emulab_segment.cmd_timeout
23
24	def copy_file(self, src, dest, size=1024):
25	"""
26	Exceedingly simple file copy.
27	"""
28
29	if not self.debug:
30	util.copy_file(src, dest, size)
31	else:
32	self.log.debug("Copy %s to %s" % (src, dest))
33
34	def cmd_with_timeout(self, cmd, wname=None, timeout=None):
35	"""
36	Run a command. If debug is set, the action
37	is only logged. Commands are run without stdin, to avoid stray
38	SIGTTINs. If timeout is given and the command runs longer, a
39	cmd_timeout exception is thrown.
40	"""
41
42	try:
43	dnull = open("/dev/null", "w")
44	except IOError:
45	self.log.debug("[cmd_with_timeout]: failed to open /dev/null " + \
46	"for redirect")
47	dnull = Null
48
49	self.log.debug("[cmd_with_timeout]: %s" % cmd)
50	if not self.debug:
51	if dnull:
52	sub = subprocess.Popen(cmd, shell=True, stdout=dnull,
53	stderr=dnull, close_fds=True)
54	else:
55	sub = subprocess.Popen(cmd, shell=True, close_fds=True)
56	if timeout:
57	i = 0
58	rv = sub.poll()
59	while i < timeout:
60	if rv is not None: break
61	else:
62	time.sleep(1)
63	rv = sub.poll()
64	i += 1
65	else:
66	self.log.debug("Process exceeded runtime: %s" % cmd)
67	os.kill(sub.pid, signal.SIGKILL)
68	raise self.cmd_timeout();
69	return rv == 0
70	else:
71	return sub.wait() == 0
72	else:
73	if timeout == 0:
74	self.log.debug("debug timeout raised on %s " % cmd)
75	raise self.cmd_timeout()
76	else:
77	return True
78
79	class start_segment(local_emulab_segment):
80	def __init__(self, log=None, keyfile=None, debug=False):
81	local_emulab_segment.__init__(self, log=log,
82	keyfile=keyfile, debug=debug)
83	self.null = """
84	set ns [new Simulator]
85	source tb_compat.tcl
86
87	set a [$ns node]
88
89	$ns rtproto Session
90	$ns run
91	"""
92
93	def get_state(self, pid, eid):
94	# command to test experiment state
95	expinfo_exec = "/usr/testbed/bin/expinfo"
96	# Regular expressions to parse the expinfo response
97	state_re = re.compile("State:\s+(\w+)")
98	no_exp_re = re.compile("^No\s+such\s+experiment")
99	swapping_re = re.compile("^No\s+information\s+available.")
100	state = None # Experiment state parsed from expinfo
101	# The expinfo ssh command. Note the identity restriction to use
102	# only the identity provided in the pubkey given.
103	cmd = [ expinfo_exec, pid, eid]
104
105	dev_null = None
106	try:
107	dev_null = open("/dev/null", "a")
108	except IOError, e:
109	self.log.error("[get_state]: can't open /dev/null: %s" %e)
110
111	if self.debug:
112	state = 'swapped'
113	rv = 0
114	else:
115	self.log.debug("Checking state")
116	status = subprocess.Popen(cmd, stdout=subprocess.PIPE,
117	stderr=dev_null, close_fds=True)
118	for line in status.stdout:
119	m = state_re.match(line)
120	if m: state = m.group(1)
121	else:
122	for reg, st in ((no_exp_re, "none"),
123	(swapping_re, "swapping")):
124	m = reg.match(line)
125	if m: state = st
126	rv = status.wait()
127
128	# If the experiment is not present the subcommand returns a
129	# non-zero return value. If we successfully parsed a "none"
130	# outcome, ignore the return code.
131	if rv != 0 and state != 'none':
132	raise service_error(service_error.internal,
133	"Cannot get status of segment:%s/%s" % (pid, eid))
134	elif state not in ('active', 'swapped', 'swapping', 'none'):
135	raise service_error(service_error.internal,
136	"Cannot get status of segment:%s/%s" % (pid, eid))
137	else:
138	self.log.debug("State is %s" % state)
139	return state
140
141
142	def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
143	"""
144	Start a sub-experiment on a federant.
145
146	Get the current state, modify or create as appropriate, ship data
147	and configs and start the experiment. There are small ordering
148	differences based on the initial state of the sub-experiment.
149	"""
150	# Configuration directories on this machine
151	proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
152	softdir = "/proj/%s/software/%s" % (pid, eid)
153	# Softwrae staging directory software dir
154	lsoftdir = "%s/software" % tmpdir
155
156	state = self.get_state(pid, eid)
157
158	if state == 'none':
159	# Create a null copy of the experiment so that we capture any
160	# logs there if the modify fails. Emulab software discards the
161	# logs from a failed startexp
162	try:
163	f = open("%s/null.tcl" % tmpdir, "w")
164	print >>f, self.null
165	f.close()
166	except IOError, e:
167	raise service_error(service_error.internal,
168	"Cannot stage null.tcl: %s" % e.strerror)
169
170	timedout = False
171	try:
172	if not self.cmd_with_timeout(
173	("/usr/testbed/bin/startexp -i -f -w -p %s " +
174	"-e %s %s/null.tcl") % (pid, eid, tmpdir), "startexp",
175	timeout=60 * 10):
176	return False
177	except self.cmd_timeout:
178	timedout = True
179
180	if timedout:
181	state = self.get_state(pid, eid)
182	if state != "swapped":
183	return False
184
185	# Set up the experiment's file space
186	if not self.cmd_with_timeout("/bin/rm -rf %s" % proj_dir):
187	return False
188	# Clear and create the software and configuration directories
189	if not self.cmd_with_timeout("/bin/rm -rf %s/*" % softdir):
190	return False
191	if not self.cmd_with_timeout('mkdir -p %s' % proj_dir):
192	return False
193	if os.path.isdir(lsoftdir):
194	if not self.cmd_with_timeout('mkdir -p %s' % softdir):
195	return False
196
197	try:
198	for f in os.listdir(tmpdir):
199	if not os.path.isdir("%s/%s" % (tmpdir, f)):
200	self.copy_file("%s/%s" % (tmpdir, f),
201	"%s/%s" % (proj_dir, f))
202	if os.path.isdir(lsoftdir):
203	for f in os.listdir(lsoftdir):
204	if not os.path.isdir("%s/%s" % (lsoftdir, f)):
205	self.copy_file("%s/%s" % (lsoftdir, f),
206	"%s/%s" % (softdir, f))
207	except IOError, e:
208	self.log.error("Error copying file: %s" %e)
209	return False
210
211	# Stage the new configuration (active experiments will stay swapped
212	# in now)
213	self.log.info("[start_segment]: Modifying %s" % eid)
214	try:
215	if not self.cmd_with_timeout(
216	"/usr/testbed/bin/modexp -r -s -w %s %s %s" % \
217	(pid, eid, tclfile),
218	"modexp", timeout= 60 * 10):
219	return False
220	except self.cmd_timeout:
221	self.log.error("Modify command failed to complete in time")
222	# There's really no way to see if this succeeded or failed, so
223	# if it hangs, assume the worst.
224	return False
225	# Active experiments are still swapped, this swaps the others in.
226	if state != 'active':
227	self.log.info("[start_segment]: Swapping %s" % eid)
228	timedout = False
229	try:
230	if not self.cmd_with_timeout(
231	"/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
232	"swapexp", timeout=25*60):
233	return False
234	except self.cmd_timeout:
235	timedout = True
236
237	# If the command was terminated, but completed successfully,
238	# report success.
239	if timedout:
240	self.log.debug("[start_segment]: swapin timed out " +\
241	"checking state")
242	state = self.get_state(pid, eid)
243	self.log.debug("[start_segment]: state is %s" % state)
244	return state == 'active'
245	# Everything has gone OK.
246	return True
247
248	class stop_segment(local_emulab_segment):
249	def __init__(self, log=None, keyfile=None, debug=False):
250	local_emulab_segment.__init__(self,
251	log=log, keyfile=keyfile, debug=debug)
252
253	def __call__(self, parent, user, pid, eid):
254	"""
255	Stop a sub experiment by calling swapexp on the federant
256	"""
257	self.log.info("[stop_segment]: Stopping %s" % eid)
258	rv = False
259	try:
260	# Clean out tar files: we've gone over quota in the past
261	self.cmd_with_timeout("rm -rf /proj/%s/software/%s" % (pid, eid))
262	rv = self.cmd_with_timeout(
263	"/usr/testbed/bin/swapexp -w %s %s out" % (pid, eid),
264	timeout = 60*10)
265	except self.cmd_timeout:
266	rv = False
267	return rv
268

Note: See TracBrowser for help on using the repository browser.

Download in other formats: