Context Navigation

source: fedd/federation/experiment_control.py @ 1b376ca

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02

Last change on this file since 1b376ca was dceeef9, checked in by Ted Faber <faber@…>, 16 years ago
Restrict ssh (and allow 0 counts in Allbeds)
Property mode set to `100644`
File size: 60.3 KB

Line
1	#!/usr/local/bin/python
2
3	import os,sys
4
5	import re
6	import random
7	import string
8	import subprocess
9	import tempfile
10	import copy
11	import pickle
12	import logging
13
14	import traceback
15	# For parsing visualization output and splitter output
16	import xml.parsers.expat
17
18	from threading import *
19	from subprocess import *
20
21	from util import *
22	from fedid import fedid, generate_fedid
23	from remote_service import xmlrpc_handler, soap_handler, service_caller
24	from service_error import service_error
25
26
27	class nullHandler(logging.Handler):
28	def emit(self, record): pass
29
30	fl = logging.getLogger("fedd.experiment_control")
31	fl.addHandler(nullHandler())
32
33	class experiment_control_local:
34	"""
35	Control of experiments that this system can directly access.
36
37	Includes experiment creation, termination and information dissemination.
38	Thred safe.
39	"""
40
41	class thread_pool:
42	"""
43	A class to keep track of a set of threads all invoked for the same
44	task. Manages the mutual exclusion of the states.
45	"""
46	def __init__(self):
47	"""
48	Start a pool.
49	"""
50	self.changed = Condition()
51	self.started = 0
52	self.terminated = 0
53
54	def acquire(self):
55	"""
56	Get the pool's lock.
57	"""
58	self.changed.acquire()
59
60	def release(self):
61	"""
62	Release the pool's lock.
63	"""
64	self.changed.release()
65
66	def wait(self, timeout = None):
67	"""
68	Wait for a pool thread to start or stop.
69	"""
70	self.changed.wait(timeout)
71
72	def start(self):
73	"""
74	Called by a pool thread to report starting.
75	"""
76	self.changed.acquire()
77	self.started += 1
78	self.changed.notifyAll()
79	self.changed.release()
80
81	def terminate(self):
82	"""
83	Called by a pool thread to report finishing.
84	"""
85	self.changed.acquire()
86	self.terminated += 1
87	self.changed.notifyAll()
88	self.changed.release()
89
90	def clear(self):
91	"""
92	Clear all pool data.
93	"""
94	self.changed.acquire()
95	self.started = 0
96	self.terminated =0
97	self.changed.notifyAll()
98	self.changed.release()
99
100	class pooled_thread(Thread):
101	"""
102	One of a set of threads dedicated to a specific task. Uses the
103	thread_pool class above for coordination.
104	"""
105	def __init__(self, group=None, target=None, name=None, args=(),
106	kwargs={}, pdata=None, trace_file=None):
107	Thread.__init__(self, group, target, name, args, kwargs)
108	self.rv = None # Return value of the ops in this thread
109	self.exception = None # Exception that terminated this thread
110	self.target=target # Target function to run on start()
111	self.args = args # Args to pass to target
112	self.kwargs = kwargs # Additional kw args
113	self.pdata = pdata # thread_pool for this class
114	# Logger for this thread
115	self.log = logging.getLogger("fedd.experiment_control")
116
117	def run(self):
118	"""
119	Emulate Thread.run, except add pool data manipulation and error
120	logging.
121	"""
122	if self.pdata:
123	self.pdata.start()
124
125	if self.target:
126	try:
127	self.rv = self.target(self.args, *self.kwargs)
128	except service_error, s:
129	self.exception = s
130	self.log.error("Thread exception: %s %s" % \
131	(s.code_string(), s.desc))
132	except:
133	self.exception = sys.exc_info()[1]
134	self.log.error(("Unexpected thread exception: %s" +\
135	"Trace %s") % (self.exception,\
136	traceback.format_exc()))
137	if self.pdata:
138	self.pdata.terminate()
139
140	call_RequestAccess = service_caller('RequestAccess')
141	call_ReleaseAccess = service_caller('ReleaseAccess')
142	call_Ns2Split = service_caller('Ns2Split')
143
144	def __init__(self, config=None, auth=None):
145	"""
146	Intialize the various attributes, most from the config object
147	"""
148	self.thread_with_rv = experiment_control_local.pooled_thread
149	self.thread_pool = experiment_control_local.thread_pool
150
151	self.cert_file = None
152	self.cert_pwd = None
153	self.trusted_certs = None
154
155	# Walk through the various relevant certificat specifying config
156	# attributes until the local certificate attributes can be resolved.
157	# The walk is from most specific to most general specification.
158	for s in ("experiment_control", "globals"):
159	if config.has_section(s):
160	if config.has_option(s, "cert_file"):
161	if not self.cert_file:
162	self.cert_file = config.get(s, "cert_file")
163	self.cert_pwd = config.get(s, "cert_pwd")
164
165	if config.has_option(s, "trusted_certs"):
166	if not self.trusted_certs:
167	self.trusted_certs = config.get(s, "trusted_certs")
168
169
170	self.exp_stem = "fed-stem"
171	self.log = logging.getLogger("fedd.experiment_control")
172	set_log_level(config, "experiment_control", self.log)
173	self.muxmax = 2
174	self.nthreads = 2
175	self.randomize_experiments = False
176
177	self.scp_exec = "/usr/bin/scp"
178	self.splitter = None
179	self.ssh_exec="/usr/bin/ssh"
180	self.ssh_keygen = "/usr/bin/ssh-keygen"
181	self.ssh_identity_file = None
182
183
184	self.debug = config.getboolean("experiment_control", "create_debug")
185	self.state_filename = config.get("experiment_control",
186	"experiment_state_file")
187	self.splitter_url = config.get("experiment_control", "splitter_url")
188	self.fedkit = config.get("experiment_control", "fedkit")
189	accessdb_file = config.get("experiment_control", "accessdb")
190
191	self.ssh_pubkey_file = config.get("experiment_control",
192	"ssh_pubkey_file")
193	self.ssh_privkey_file = config.get("experiment_control",
194	"ssh_privkey_file")
195	# NB for internal master/slave ops, not experiment setup
196	self.ssh_type = config.get("experiment_control", "sshkeytype", "rsa")
197	self.state = { }
198	self.state_lock = Lock()
199	self.tclsh = "/usr/local/bin/otclsh"
200	self.tcl_splitter = config.get("splitter", "tcl_splitter") or \
201	config.get("experiment_control", "tcl_splitter",
202	"/usr/testbed/lib/ns2ir/parse.tcl")
203	mapdb_file = config.get("experiment_control", "mapdb")
204	self.trace_file = sys.stderr
205
206	self.def_expstart = \
207	"sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& " +\
208	"/tmp/federate";
209	self.def_mexpstart = "sudo -H /usr/local/federation/bin/make_hosts " +\
210	"FEDDIR/hosts";
211	self.def_gwstart = \
212	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF>& " +\
213	"/tmp/bridge.log";
214	self.def_mgwstart = \
215	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF >& " +\
216	"/tmp/bridge.log";
217	self.def_gwimage = "FBSD61-TUNNEL2";
218	self.def_gwtype = "pc";
219	self.local_access = { }
220
221	if auth:
222	self.auth = auth
223	else:
224	self.log.error(\
225	"[access]: No authorizer initialized, creating local one.")
226	auth = authorizer()
227
228
229	if self.ssh_pubkey_file:
230	try:
231	f = open(self.ssh_pubkey_file, 'r')
232	self.ssh_pubkey = f.read()
233	f.close()
234	except IOError:
235	raise service_error(service_error.internal,
236	"Cannot read sshpubkey")
237	else:
238	raise service_error(service_error.internal,
239	"No SSH public key file?")
240
241	if not self.ssh_privkey_file:
242	raise service_error(service_error.internal,
243	"No SSH public key file?")
244
245
246	if mapdb_file:
247	self.read_mapdb(mapdb_file)
248	else:
249	self.log.warn("[experiment_control] No testbed map, using defaults")
250	self.tbmap = {
251	'deter':'https://users.isi.deterlab.net:23235',
252	'emulab':'https://users.isi.deterlab.net:23236',
253	'ucb':'https://users.isi.deterlab.net:23237',
254	}
255
256	if accessdb_file:
257	self.read_accessdb(accessdb_file)
258	else:
259	raise service_error(service_error.internal,
260	"No accessdb specified in config")
261
262	# Grab saved state. OK to do this w/o locking because it's read only
263	# and only one thread should be in existence that can see self.state at
264	# this point.
265	if self.state_filename:
266	self.read_state()
267
268	# Dispatch tables
269	self.soap_services = {\
270	'Create': soap_handler('Create', self.create_experiment),
271	'Vtopo': soap_handler('Vtopo', self.get_vtopo),
272	'Vis': soap_handler('Vis', self.get_vis),
273	'Info': soap_handler('Info', self.get_info),
274	'Terminate': soap_handler('Terminate',
275	self.terminate_experiment),
276	}
277
278	self.xmlrpc_services = {\
279	'Create': xmlrpc_handler('Create', self.create_experiment),
280	'Vtopo': xmlrpc_handler('Vtopo', self.get_vtopo),
281	'Vis': xmlrpc_handler('Vis', self.get_vis),
282	'Info': xmlrpc_handler('Info', self.get_info),
283	'Terminate': xmlrpc_handler('Terminate',
284	self.terminate_experiment),
285	}
286
287	def copy_file(self, src, dest, size=1024):
288	"""
289	Exceedingly simple file copy.
290	"""
291	s = open(src,'r')
292	d = open(dest, 'w')
293
294	buf = "x"
295	while buf != "":
296	buf = s.read(size)
297	d.write(buf)
298	s.close()
299	d.close()
300
301	# Call while holding self.state_lock
302	def write_state(self):
303	"""
304	Write a new copy of experiment state after copying the existing state
305	to a backup.
306
307	State format is a simple pickling of the state dictionary.
308	"""
309	if os.access(self.state_filename, os.W_OK):
310	self.copy_file(self.state_filename, \
311	"%s.bak" % self.state_filename)
312	try:
313	f = open(self.state_filename, 'w')
314	pickle.dump(self.state, f)
315	except IOError, e:
316	self.log.error("Can't write file %s: %s" % \
317	(self.state_filename, e))
318	except pickle.PicklingError, e:
319	self.log.error("Pickling problem: %s" % e)
320	except TypeError, e:
321	self.log.error("Pickling problem (TypeError): %s" % e)
322
323	# Call while holding self.state_lock
324	def read_state(self):
325	"""
326	Read a new copy of experiment state. Old state is overwritten.
327
328	State format is a simple pickling of the state dictionary.
329	"""
330	try:
331	f = open(self.state_filename, "r")
332	self.state = pickle.load(f)
333	self.log.debug("[read_state]: Read state from %s" % \
334	self.state_filename)
335	except IOError, e:
336	self.log.warning("[read_state]: No saved state: Can't open %s: %s"\
337	% (self.state_filename, e))
338	except pickle.UnpicklingError, e:
339	self.log.warning(("[read_state]: No saved state: " + \
340	"Unpickling failed: %s") % e)
341
342	for k in self.state.keys():
343	try:
344	# This list should only have one element in it, but phrasing it
345	# as a for loop doesn't cost much, really. We have to find the
346	# fedid elements anyway.
347	for eid in [ f['fedid'] \
348	for f in self.state[k]['experimentID']\
349	if f.has_key('fedid') ]:
350	self.auth.set_attribute(self.state[k]['owner'], eid)
351	except KeyError, e:
352	self.log.warning("[read_state]: State ownership or identity " +\
353	"misformatted in %s: %s" % (self.state_filename, e))
354
355
356	def read_accessdb(self, accessdb_file):
357	"""
358	Read the mapping from fedids that can create experiments to their name
359	in the 3-level access namespace. All will be asserted from this
360	testbed and can include the local username and porject that will be
361	asserted on their behalf by this fedd. Each fedid is also added to the
362	authorization system with the "create" attribute.
363	"""
364	self.accessdb = {}
365	# These are the regexps for parsing the db
366	name_expr = "[" + string.ascii_letters + string.digits + "\.\-]+"
367	project_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
368	"\s->\(\s("+name_expr+")\s,\s("+name_expr+")\s\)\s$")
369	user_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
370	"\s->\s(" + name_expr + ")\s*$")
371	lineno = 0
372
373	# Parse the mappings and store in self.authdb, a dict of
374	# fedid -> (proj, user)
375	try:
376	f = open(accessdb_file, "r")
377	for line in f:
378	lineno += 1
379	line = line.strip()
380	if len(line) == 0 or line.startswith('#'):
381	continue
382	m = project_line.match(line)
383	if m:
384	fid = fedid(hexstr=m.group(1))
385	project, user = m.group(2,3)
386	if not self.accessdb.has_key(fid):
387	self.accessdb[fid] = []
388	self.accessdb[fid].append((project, user))
389	continue
390
391	m = user_line.match(line)
392	if m:
393	fid = fedid(hexstr=m.group(1))
394	project = None
395	user = m.group(2)
396	if not self.accessdb.has_key(fid):
397	self.accessdb[fid] = []
398	self.accessdb[fid].append((project, user))
399	continue
400	self.log.warn("[experiment_control] Error parsing access " +\
401	"db %s at line %d" % (accessdb_file, lineno))
402	except IOError:
403	raise service_error(service_error.internal,
404	"Error opening/reading %s as experiment " +\
405	"control accessdb" % accessdb_file)
406	f.close()
407
408	# Initialize the authorization attributes
409	for fid in self.accessdb.keys():
410	self.auth.set_attribute(fid, 'create')
411
412	def read_mapdb(self, file):
413	"""
414	Read a simple colon separated list of mappings for the
415	label-to-testbed-URL mappings. Clears or creates self.tbmap.
416	"""
417
418	self.tbmap = { }
419	lineno =0
420	try:
421	f = open(file, "r")
422	for line in f:
423	lineno += 1
424	line = line.strip()
425	if line.startswith('#') or len(line) == 0:
426	continue
427	try:
428	label, url = line.split(':', 1)
429	self.tbmap[label] = url
430	except ValueError, e:
431	self.log.warn("[read_mapdb] Ignored bad line (%d) in " +\
432	"map db: %s %s" % (lineno, line, e))
433	except IOError, e:
434	self.log.warning("[read_mapdb]: No saved map database: Can't " +\
435	"open %s: %s" % (file, e))
436	f.close()
437
438	def scp_file(self, file, user, host, dest=""):
439	"""
440	scp a file to the remote host. If debug is set the action is only
441	logged.
442	"""
443
444	scp_cmd = [self.scp_exec, '-o', 'IdentitiesOnly yes', '-i',
445	self.ssh_privkey_file, file, "%s@%s:%s" % (user, host, dest)]
446	rv = 0
447
448	try:
449	dnull = open("/dev/null", "r")
450	except IOError:
451	self.log.debug("[ssh_file]: failed to open /dev/null for redirect")
452	dnull = Null
453
454	self.log.debug("[scp_file]: %s" % " ".join(scp_cmd))
455	if not self.debug:
456	if dnull: rv = call(scp_cmd, stdout=dnull, stderr=dnull)
457	else: rv = call(scp_cmd)
458
459	return rv == 0
460
461	def ssh_cmd(self, user, host, cmd, wname=None):
462	"""
463	Run a remote command on host as user. If debug is set, the action is
464	only logged.
465	"""
466	sh_str = "%s -o 'IdentitiesOnly yes' -i %s %s@%s %s" % \
467	(self.ssh_exec, self.ssh_privkey_file,
468	user, host, cmd)
469
470	try:
471	dnull = open("/dev/null", "r")
472	except IOError:
473	self.log.debug("[ssh_cmd]: failed to open /dev/null for redirect")
474	dnull = Null
475
476	self.log.debug("[ssh_cmd]: %s" % sh_str)
477	if not self.debug:
478	if dnull:
479	sub = Popen(sh_str, shell=True, stdout=dnull, stderr=dnull)
480	else:
481	sub = Popen(sh_str, shell=True)
482	return sub.wait() == 0
483	else:
484	return True
485
486	def ship_configs(self, host, user, src_dir, dest_dir):
487	"""
488	Copy federant-specific configuration files to the federant.
489	"""
490	if not self.ssh_cmd(user, host, "mkdir -p %s" % dest_dir):
491	return False
492	if not self.ssh_cmd(user, host, "chmod 770 %s" % dest_dir):
493	return False
494
495	for f in os.listdir(src_dir):
496	if os.path.isdir(f):
497	if not self.ship_configs(host, user, "%s/%s" % (src_dir, f),
498	"%s/%s" % (dest_dir, f)):
499	return False
500	else:
501	if not self.scp_file("%s/%s" % (src_dir, f),
502	user, host, dest_dir):
503	return False
504	return True
505
506	def start_segment(self, tb, eid, tbparams, tmpdir, timeout=0):
507	"""
508	Start a sub-experiment on a federant.
509
510	Get the current state, modify or create as appropriate, ship data and
511	configs and start the experiment. There are small ordering differences
512	based on the initial state of the sub-experiment.
513	"""
514	# ops node in the federant
515	host = "%s%s" % (tbparams[tb]['host'], tbparams[tb]['domain'])
516	user = tbparams[tb]['user'] # federant user
517	pid = tbparams[tb]['project'] # federant project
518	# XXX
519	base_confs = ( "hosts",)
520	tclfile = "%s.%s.tcl" % (eid, tb) # sub-experiment description
521	# command to test experiment state
522	expinfo_exec = "/usr/testbed/bin/expinfo"
523	# Configuration directories on the remote machine
524	proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
525	tarfiles_dir = "/proj/%s/tarfiles/%s" % (pid, eid)
526	rpms_dir = "/proj/%s/rpms/%s" % (pid, eid)
527	# Regular expressions to parse the expinfo response
528	state_re = re.compile("State:\s+(\w+)")
529	no_exp_re = re.compile("^No\s+such\s+experiment")
530	state = None # Experiment state parsed from expinfo
531	# The expinfo ssh command. Note the identity restriction to use only
532	# the identity provided in the pubkey given.
533	cmd = [self.ssh_exec, '-o', 'IdentitiesOnly yes', '-i',
534	self.ssh_privkey_file, "%s@%s" % (user, host),
535	expinfo_exec, pid, eid]
536
537	# Get status
538	self.log.debug("[start_segment]: %s"% " ".join(cmd))
539	dev_null = None
540	try:
541	dev_null = open("/dev/null", "a")
542	except IOError, e:
543	self.log.error("[start_segment]: can't open /dev/null: %s" %e)
544
545	if self.debug:
546	state = 'swapped'
547	rv = 0
548	else:
549	status = Popen(cmd, stdout=PIPE, stderr=dev_null)
550	for line in status.stdout:
551	m = state_re.match(line)
552	if m: state = m.group(1)
553	else:
554	m = no_exp_re.match(line)
555	if m: state = "none"
556	rv = status.wait()
557
558	# If the experiment is not present the subcommand returns a non-zero
559	# return value. If we successfully parsed a "none" outcome, ignore the
560	# return code.
561	if rv != 0 and state != "none":
562	raise service_error(service_error.internal,
563	"Cannot get status of segment %s:%s/%s" % (tb, pid, eid))
564
565	self.log.debug("[start_segment]: %s: %s" % (tb, state))
566	self.log.info("[start_segment]:transferring experiment to %s" % tb)
567
568	if not self.scp_file("%s/%s/%s" % (tmpdir, tb, tclfile), user, host):
569	return False
570	# Clear the federation files
571	if not self.ssh_cmd(user, host,
572	"/bin/sh -c \"'/bin/rm -rf %s'\"" % proj_dir):
573	return False
574	if not self.ssh_cmd(user, host,
575	"/bin/sh -c \"'mkdir -p %s'\"" % proj_dir):
576	return False
577	# Clear and create the tarfiles and rpm directories
578	for d in (tarfiles_dir, rpms_dir):
579	if not self.ssh_cmd(user, host,
580	"/bin/sh -c \"'/bin/rm -rf %s/*'\"" % d):
581	return False
582	if not self.ssh_cmd(user, host, "mkdir -p %s" % d,
583	"create tarfiles"):
584	return False
585
586	if state == 'active':
587	# Remote experiment is active. Modify it.
588	for f in base_confs:
589	if not self.scp_file("%s/%s" % (tmpdir, f), user, host,
590	"%s/%s" % (proj_dir, f)):
591	return False
592	if not self.ship_configs(host, user, "%s/%s" % (tmpdir, tb),
593	proj_dir):
594	return False
595	if os.path.isdir("%s/tarfiles" % tmpdir):
596	if not self.ship_configs(host, user,
597	"%s/tarfiles" % tmpdir, tarfiles_dir):
598	return False
599	if os.path.isdir("%s/rpms" % tmpdir):
600	if not self.ship_configs(host, user,
601	"%s/rpms" % tmpdir, tarfiles_dir):
602	return False
603	self.log.info("[start_segment]: Modifying %s on %s" % (eid, tb))
604	if not self.ssh_cmd(user, host,
605	"/usr/testbed/bin/modexp -r -s -w %s %s %s" % \
606	(pid, eid, tclfile), "modexp"):
607	return False
608	return True
609	elif state == "swapped":
610	# Remote experiment swapped out. Modify it and swap it in.
611	for f in base_confs:
612	if not self.scp_file("%s/%s" % (tmpdir, f), user, host,
613	"%s/%s" % (proj_dir, f)):
614	return False
615	if not self.ship_configs(host, user, "%s/%s" % (tmpdir, tb),
616	proj_dir):
617	return False
618	if os.path.isdir("%s/tarfiles" % tmpdir):
619	if not self.ship_configs(host, user,
620	"%s/tarfiles" % tmpdir, tarfiles_dir):
621	return False
622	if os.path.isdir("%s/rpms" % tmpdir):
623	if not self.ship_configs(host, user,
624	"%s/rpms" % tmpdir, tarfiles_dir):
625	return False
626	self.log.info("[start_segment]: Modifying %s on %s" % (eid, tb))
627	if not self.ssh_cmd(user, host,
628	"/usr/testbed/bin/modexp -w %s %s %s" % (pid, eid, tclfile),
629	"modexp"):
630	return False
631	self.log.info("[start_segment]: Swapping %s in on %s" % (eid, tb))
632	if not self.ssh_cmd(user, host,
633	"/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
634	"swapexp"):
635	return False
636	return True
637	elif state == "none":
638	# No remote experiment. Create one. We do this in 2 steps so we
639	# can put the configuration files and scripts into the new
640	# experiment directories.
641
642	# Tarfiles must be present for creation to work
643	if os.path.isdir("%s/tarfiles" % tmpdir):
644	if not self.ship_configs(host, user,
645	"%s/tarfiles" % tmpdir, tarfiles_dir):
646	return False
647	if os.path.isdir("%s/rpms" % tmpdir):
648	if not self.ship_configs(host, user,
649	"%s/rpms" % tmpdir, tarfiles_dir):
650	return False
651	self.log.info("[start_segment]: Creating %s on %s" % (eid, tb))
652	if not self.ssh_cmd(user, host,
653	"/usr/testbed/bin/startexp -i -f -w -p %s -e %s %s" % \
654	(pid, eid, tclfile), "startexp"):
655	return False
656	# After startexp the per-experiment directories exist
657	for f in base_confs:
658	if not self.scp_file("%s/%s" % (tmpdir, f), user, host,
659	"%s/%s" % (proj_dir, f)):
660	return False
661	if not self.ship_configs(host, user, "%s/%s" % (tmpdir, tb),
662	proj_dir):
663	return False
664	self.log.info("[start_segment]: Swapping %s in on %s" % (eid, tb))
665	if not self.ssh_cmd(user, host,
666	"/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
667	"swapexp"):
668	return False
669	return True
670	else:
671	self.log.debug("[start_segment]:unknown state %s" % state)
672	return False
673
674	def stop_segment(self, tb, eid, tbparams):
675	"""
676	Stop a sub experiment by calling swapexp on the federant
677	"""
678	user = tbparams[tb]['user']
679	host = tbparams[tb]['host']
680	pid = tbparams[tb]['project']
681
682	self.log.info("[stop_segment]: Stopping %s on %s" % (eid, tb))
683	return self.ssh_cmd(user, host,
684	"/usr/testbed/bin/swapexp -w %s %s out" % (pid, eid))
685
686
687	def generate_ssh_keys(self, dest, type="rsa" ):
688	"""
689	Generate a set of keys for the gateways to use to talk.
690
691	Keys are of type type and are stored in the required dest file.
692	"""
693	valid_types = ("rsa", "dsa")
694	t = type.lower();
695	if t not in valid_types: raise ValueError
696	cmd = [self.ssh_keygen, '-t', t, '-N', '', '-f', dest]
697
698	try:
699	trace = open("/dev/null", "w")
700	except IOError:
701	raise service_error(service_error.internal,
702	"Cannot open /dev/null??");
703
704	# May raise CalledProcessError
705	self.log.debug("[generate_ssh_keys]: %s" % " ".join(cmd))
706	rv = call(cmd, stdout=trace, stderr=trace)
707	if rv != 0:
708	raise service_error(service_error.internal,
709	"Cannot generate nonce ssh keys. %s return code %d" \
710	% (self.ssh_keygen, rv))
711
712	def gentopo(self, str):
713	"""
714	Generate the topology dtat structure from the splitter's XML
715	representation of it.
716
717	The topology XML looks like:
718	<experiment>
719	<nodes>
720	<node><vname></vname><ips>ip1:ip2</ips></node>
721	</nodes>
722	<lans>
723	<lan>
724	<vname></vname><vnode></vnode><ip></ip>
725	<bandwidth></bandwidth><member>node:port</member>
726	</lan>
727	</lans>
728	"""
729	class topo_parse:
730	"""
731	Parse the topology XML and create the dats structure.
732	"""
733	def __init__(self):
734	# Typing of the subelements for data conversion
735	self.str_subelements = ('vname', 'vnode', 'ips', 'ip', 'member')
736	self.int_subelements = ( 'bandwidth',)
737	self.float_subelements = ( 'delay',)
738	# The final data structure
739	self.nodes = [ ]
740	self.lans = [ ]
741	self.topo = { \
742	'node': self.nodes,\
743	'lan' : self.lans,\
744	}
745	self.element = { } # Current element being created
746	self.chars = "" # Last text seen
747
748	def end_element(self, name):
749	# After each sub element the contents is added to the current
750	# element or to the appropriate list.
751	if name == 'node':
752	self.nodes.append(self.element)
753	self.element = { }
754	elif name == 'lan':
755	self.lans.append(self.element)
756	self.element = { }
757	elif name in self.str_subelements:
758	self.element[name] = self.chars
759	self.chars = ""
760	elif name in self.int_subelements:
761	self.element[name] = int(self.chars)
762	self.chars = ""
763	elif name in self.float_subelements:
764	self.element[name] = float(self.chars)
765	self.chars = ""
766
767	def found_chars(self, data):
768	self.chars += data.rstrip()
769
770
771	tp = topo_parse();
772	parser = xml.parsers.expat.ParserCreate()
773	parser.EndElementHandler = tp.end_element
774	parser.CharacterDataHandler = tp.found_chars
775
776	parser.Parse(str)
777
778	return tp.topo
779
780
781	def genviz(self, topo):
782	"""
783	Generate the visualization the virtual topology
784	"""
785
786	neato = "/usr/local/bin/neato"
787	# These are used to parse neato output and to create the visualization
788	# file.
789	vis_re = re.compile('^\s"?([\w\-]+)"?\s+\[.pos="(\d+),(\d+)"')
790	vis_fmt = "<node><name>%s</name><x>%s</x><y>%s</y><type>" + \
791	"%s</type></node>"
792
793	try:
794	# Node names
795	nodes = [ n['vname'] for n in topo['node'] ]
796	topo_lans = topo['lan']
797	except KeyError:
798	raise service_error(service_error.internal, "Bad topology")
799
800	lans = { }
801	links = { }
802
803	# Walk through the virtual topology, organizing the connections into
804	# 2-node connections (links) and more-than-2-node connections (lans).
805	# When a lan is created, it's added to the list of nodes (there's a
806	# node in the visualization for the lan).
807	for l in topo_lans:
808	if links.has_key(l['vname']):
809	if len(links[l['vname']]) < 2:
810	links[l['vname']].append(l['vnode'])
811	else:
812	nodes.append(l['vname'])
813	lans[l['vname']] = links[l['vname']]
814	del links[l['vname']]
815	lans[l['vname']].append(l['vnode'])
816	elif lans.has_key(l['vname']):
817	lans[l['vname']].append(l['vnode'])
818	else:
819	links[l['vname']] = [ l['vnode'] ]
820
821
822	# Open up a temporary file for dot to turn into a visualization
823	try:
824	df, dotname = tempfile.mkstemp()
825	dotfile = os.fdopen(df, 'w')
826	except IOError:
827	raise service_error(service_error.internal,
828	"Failed to open file in genviz")
829
830	# Generate a dot/neato input file from the links, nodes and lans
831	try:
832	print >>dotfile, "graph G {"
833	for n in nodes:
834	print >>dotfile, '\t"%s"' % n
835	for l in links.keys():
836	print >>dotfile, '\t"%s" -- "%s"' % tuple(links[l])
837	for l in lans.keys():
838	for n in lans[l]:
839	print >>dotfile, '\t "%s" -- "%s"' % (n,l)
840	print >>dotfile, "}"
841	dotfile.close()
842	except TypeError:
843	raise service_error(service_error.internal,
844	"Single endpoint link in vtopo")
845	except IOError:
846	raise service_error(service_error.internal, "Cannot write dot file")
847
848	# Use dot to create a visualization
849	dot = Popen([neato, '-Gstart=rand', '-Gepsilon=0.005', '-Gmaxiter=2000',
850	'-Gpack=true', dotname], stdout=PIPE)
851
852	# Translate dot to vis format
853	vis_nodes = [ ]
854	vis = { 'node': vis_nodes }
855	for line in dot.stdout:
856	m = vis_re.match(line)
857	if m:
858	vn = m.group(1)
859	vis_node = {'name': vn, \
860	'x': float(m.group(2)),\
861	'y' : float(m.group(3)),\
862	}
863	if vn in links.keys() or vn in lans.keys():
864	vis_node['type'] = 'lan'
865	else:
866	vis_node['type'] = 'node'
867	vis_nodes.append(vis_node)
868	rv = dot.wait()
869
870	os.remove(dotname)
871	if rv == 0 : return vis
872	else: return None
873
874	def get_access(self, tb, nodes, user, tbparam, master, export_project,
875	access_user):
876	"""
877	Get access to testbed through fedd and set the parameters for that tb
878	"""
879
880	translate_attr = {
881	'slavenodestartcmd': 'expstart',
882	'slaveconnectorstartcmd': 'gwstart',
883	'masternodestartcmd': 'mexpstart',
884	'masterconnectorstartcmd': 'mgwstart',
885	'connectorimage': 'gwimage',
886	'connectortype': 'gwtype',
887	'tunnelcfg': 'tun',
888	'smbshare': 'smbshare',
889	}
890
891	uri = self.tbmap.get(tb, None)
892	if not uri:
893	raise service_error(serice_error.server_config,
894	"Unknown testbed: %s" % tb)
895
896	# currently this lumps all users into one service access group
897	service_keys = [ a for u in user \
898	for a in u.get('access', []) \
899	if a.has_key('sshPubkey')]
900
901	if len(service_keys) == 0:
902	raise service_error(service_error.req,
903	"Must have at least one SSH pubkey for services")
904
905
906	for p, u in access_user:
907	self.log.debug(("[get_access] Attempting access from (%s, %s) " + \
908	"to %s") % ((p or "None"), u, uri))
909
910	if p:
911	# Request with user and project specified
912	req = {\
913	'destinationTestbed' : { 'uri' : uri },
914	'project': {
915	'name': {'localname': p},
916	'user': [ {'userID': { 'localname': u } } ],
917	},
918	'user': user,
919	'allocID' : { 'localname': 'test' },
920	'createAccess' : [ { 'sshPubkey' : self.ssh_pubkey } ],
921	'serviceAccess' : service_keys
922	}
923	else:
924	# Request with only user specified
925	req = {\
926	'destinationTestbed' : { 'uri' : uri },
927	'user': [ {'userID': { 'localname': u } } ],
928	'allocID' : { 'localname': 'test' },
929	'createAccess' : [ { 'sshPubkey' : self.ssh_pubkey } ],
930	'serviceAccess' : service_keys
931	}
932
933	if tb == master:
934	# NB, the export_project parameter is a dict that includes
935	# the type
936	req['exportProject'] = export_project
937
938	# node resources if any
939	if nodes != None and len(nodes) > 0:
940	rnodes = [ ]
941	for n in nodes:
942	rn = { }
943	image, hw, count = n.split(":")
944	if image: rn['image'] = [ image ]
945	if hw: rn['hardware'] = [ hw ]
946	if count and int(count) >0 : rn['count'] = int(count)
947	rnodes.append(rn)
948	req['resources']= { }
949	req['resources']['node'] = rnodes
950
951	try:
952	if self.local_access.has_key(uri):
953	# Local access call
954	req = { 'RequestAccessRequestBody' : req }
955	r = self.local_access[uri].RequestAccess(req,
956	fedid(file=self.cert_file))
957	r = { 'RequestAccessResponseBody' : r }
958	else:
959	r = self.call_RequestAccess(uri, req,
960	self.cert_file, self.cert_pwd, self.trusted_certs)
961	except service_error, e:
962	if e.code == service_error.access:
963	self.log.debug("[get_access] Access denied")
964	r = None
965	continue
966	else:
967	raise e
968
969	if r.has_key('RequestAccessResponseBody'):
970	# Through to here we have a valid response, not a fault.
971	# Access denied is a fault, so something better or worse than
972	# access denied has happened.
973	r = r['RequestAccessResponseBody']
974	self.log.debug("[get_access] Access granted")
975	break
976	else:
977	raise service_error(service_error.protocol,
978	"Bad proxy response")
979
980	if not r:
981	raise service_error(service_error.access,
982	"Access denied by %s (%s)" % (tb, uri))
983
984	e = r['emulab']
985	p = e['project']
986	tbparam[tb] = {
987	"boss": e['boss'],
988	"host": e['ops'],
989	"domain": e['domain'],
990	"fs": e['fileServer'],
991	"eventserver": e['eventServer'],
992	"project": unpack_id(p['name']),
993	"emulab" : e,
994	"allocID" : r['allocID'],
995	}
996	# Make the testbed name be the label the user applied
997	p['testbed'] = {'localname': tb }
998
999	for u in p['user']:
1000	tbparam[tb]['user'] = unpack_id(u['userID'])
1001
1002	for a in e['fedAttr']:
1003	if a['attribute']:
1004	key = translate_attr.get(a['attribute'].lower(), None)
1005	if key:
1006	tbparam[tb][key]= a['value']
1007
1008	def release_access(self, tb, aid):
1009	"""
1010	Release access to testbed through fedd
1011	"""
1012
1013	uri = self.tbmap.get(tb, None)
1014	if not uri:
1015	raise service_error(serice_error.server_config,
1016	"Unknown testbed: %s" % tb)
1017
1018	if self.local_access.has_key(uri):
1019	resp = self.local_access[uri].ReleaseAccess(\
1020	{ 'ReleaseAccessRequestBody' : {'allocID': aid},},
1021	fedid(file=self.cert_file))
1022	resp = { 'ReleaseAccessResponseBody': resp }
1023	else:
1024	resp = self.call_ReleaseAccess(uri, {'allocID': aid},
1025	self.cert_file, self.cert_pwd, self.trusted_certs)
1026
1027	# better error coding
1028
1029	def remote_splitter(self, uri, desc, master):
1030
1031	req = {
1032	'description' : { 'ns2description': desc },
1033	'master': master,
1034	'include_fedkit': bool(self.fedkit)
1035	}
1036
1037	r = self.call_Ns2Split(uri, req, self.cert_file, self.cert_pwd,
1038	self.trusted_certs)
1039
1040	if r.has_key('Ns2SplitResponseBody'):
1041	r = r['Ns2SplitResponseBody']
1042	if r.has_key('output'):
1043	return r['output'].splitlines()
1044	else:
1045	raise service_error(service_error.protocol,
1046	"Bad splitter response (no output)")
1047	else:
1048	raise service_error(service_error.protocol, "Bad splitter response")
1049
1050	class current_testbed:
1051	"""
1052	Object for collecting the current testbed description. The testbed
1053	description is saved to a file with the local testbed variables
1054	subsittuted line by line.
1055	"""
1056	def __init__(self, eid, tmpdir, fedkit):
1057	self.begin_testbed = re.compile("^#\s+Begin\s+Testbed\s+\((\w+)\)")
1058	self.end_testbed = re.compile("^#\s+End\s+Testbed\s+\((\w+)\)")
1059	self.current_testbed = None
1060	self.testbed_file = None
1061
1062	self.def_expstart = \
1063	"sudo -H /bin/sh FEDDIR/fed_bootstrap >& /tmp/federate";
1064	self.def_mexpstart = "sudo -H FEDDIR/make_hosts FEDDIR/hosts";
1065	self.def_gwstart = \
1066	"sudo -H FEDDIR/fed-tun.pl -f GWCONF>& /tmp/bridge.log";
1067	self.def_mgwstart = \
1068	"sudo -H FEDDIR/fed-tun.pl -f GWCONF >& /tmp/bridge.log";
1069	self.def_gwimage = "FBSD61-TUNNEL2";
1070	self.def_gwtype = "pc";
1071
1072	self.eid = eid
1073	self.tmpdir = tmpdir
1074	self.fedkit = fedkit
1075
1076	def __call__(self, line, master, allocated, tbparams):
1077	# Capture testbed topology descriptions
1078	if self.current_testbed == None:
1079	m = self.begin_testbed.match(line)
1080	if m != None:
1081	self.current_testbed = m.group(1)
1082	if self.current_testbed == None:
1083	raise service_error(service_error.req,
1084	"Bad request format (unnamed testbed)")
1085	allocated[self.current_testbed] = \
1086	allocated.get(self.current_testbed,0) + 1
1087	tb_dir = "%s/%s" % (self.tmpdir, self.current_testbed)
1088	if not os.path.exists(tb_dir):
1089	try:
1090	os.mkdir(tb_dir)
1091	except IOError:
1092	raise service_error(service_error.internal,
1093	"Cannot create %s" % tb_dir)
1094	try:
1095	self.testbed_file = open("%s/%s.%s.tcl" %
1096	(tb_dir, self.eid, self.current_testbed), 'w')
1097	except IOError:
1098	self.testbed_file = None
1099	return True
1100	else: return False
1101	else:
1102	m = self.end_testbed.match(line)
1103	if m != None:
1104	if m.group(1) != self.current_testbed:
1105	raise service_error(service_error.internal,
1106	"Mismatched testbed markers!?")
1107	if self.testbed_file != None:
1108	self.testbed_file.close()
1109	self.testbed_file = None
1110	self.current_testbed = None
1111	elif self.testbed_file:
1112	# Substitute variables and put the line into the local
1113	# testbed file.
1114	gwtype = tbparams[self.current_testbed].get('gwtype',
1115	self.def_gwtype)
1116	gwimage = tbparams[self.current_testbed].get('gwimage',
1117	self.def_gwimage)
1118	mgwstart = tbparams[self.current_testbed].get('mgwstart',
1119	self.def_mgwstart)
1120	mexpstart = tbparams[self.current_testbed].get('mexpstart',
1121	self.def_mexpstart)
1122	gwstart = tbparams[self.current_testbed].get('gwstart',
1123	self.def_gwstart)
1124	expstart = tbparams[self.current_testbed].get('expstart',
1125	self.def_expstart)
1126	project = tbparams[self.current_testbed].get('project')
1127	line = re.sub("GWTYPE", gwtype, line)
1128	line = re.sub("GWIMAGE", gwimage, line)
1129	if self.current_testbed == master:
1130	line = re.sub("GWSTART", mgwstart, line)
1131	line = re.sub("EXPSTART", mexpstart, line)
1132	else:
1133	line = re.sub("GWSTART", gwstart, line)
1134	line = re.sub("EXPSTART", expstart, line)
1135	line = re.sub("GWCONF", "FEDDIR`hostname`.gw.conf", line)
1136	line = re.sub("PROJDIR", "/proj/%s/" % project, line)
1137	line = re.sub("EID", self.eid, line)
1138	line = re.sub("FEDDIR", "/proj/%s/exp/%s/tmp/" % \
1139	(project, self.eid), line)
1140	if self.fedkit:
1141	line = re.sub("FEDKIT", os.path.basename(self.fedkit),
1142	line)
1143	print >>self.testbed_file, line
1144	return True
1145
1146	class allbeds:
1147	"""
1148	Process the Allbeds section. Get access to each federant and save the
1149	parameters in tbparams
1150	"""
1151	def __init__(self, get_access):
1152	self.begin_allbeds = re.compile("^#\s+Begin\s+Allbeds")
1153	self.end_allbeds = re.compile("^#\s+End\s+Allbeds")
1154	self.in_allbeds = False
1155	self.get_access = get_access
1156
1157	def __call__(self, line, user, tbparams, master, export_project,
1158	access_user):
1159	# Testbed access parameters
1160	if not self.in_allbeds:
1161	if self.begin_allbeds.match(line):
1162	self.in_allbeds = True
1163	return True
1164	else:
1165	return False
1166	else:
1167	if self.end_allbeds.match(line):
1168	self.in_allbeds = False
1169	else:
1170	nodes = line.split('\|')
1171	tb = nodes.pop(0)
1172	self.get_access(tb, nodes, user, tbparams, master,
1173	export_project, access_user)
1174	return True
1175
1176	class gateways:
1177	def __init__(self, eid, master, tmpdir, gw_pubkey,
1178	gw_secretkey, copy_file, fedkit):
1179	self.begin_gateways = \
1180	re.compile("^#\s+Begin\s+gateways\s+\((\w+)\)")
1181	self.end_gateways = re.compile("^#\s+End\s+gateways\s+\((\w+)\)")
1182	self.current_gateways = None
1183	self.control_gateway = None
1184	self.active_end = { }
1185
1186	self.eid = eid
1187	self.master = master
1188	self.tmpdir = tmpdir
1189	self.gw_pubkey_base = gw_pubkey
1190	self.gw_secretkey_base = gw_secretkey
1191
1192	self.copy_file = copy_file
1193	self.fedkit = fedkit
1194
1195
1196	def gateway_conf_file(self, gw, master, eid, pubkey, privkey,
1197	active_end, tbparams, dtb, myname, desthost, type):
1198	"""
1199	Produce a gateway configuration file from a gateways line.
1200	"""
1201
1202	sproject = tbparams[gw].get('project', 'project')
1203	dproject = tbparams[dtb].get('project', 'project')
1204	sdomain = ".%s.%s%s" % (eid, sproject,
1205	tbparams[gw].get('domain', ".example.com"))
1206	ddomain = ".%s.%s%s" % (eid, dproject,
1207	tbparams[dtb].get('domain', ".example.com"))
1208	boss = tbparams[master].get('boss', "boss")
1209	fs = tbparams[master].get('fs', "fs")
1210	event_server = "%s%s" % \
1211	(tbparams[gw].get('eventserver', "event_server"),
1212	tbparams[gw].get('domain', "example.com"))
1213	remote_event_server = "%s%s" % \
1214	(tbparams[dtb].get('eventserver', "event_server"),
1215	tbparams[dtb].get('domain', "example.com"))
1216	seer_control = "%s%s" % \
1217	(tbparams[gw].get('control', "control"), sdomain)
1218
1219	if self.fedkit:
1220	remote_script_dir = "/usr/local/federation/bin"
1221	local_script_dir = "/usr/local/federation/bin"
1222	else:
1223	remote_script_dir = "/proj/%s/exp/%s/tmp" % ( dproject, eid)
1224	local_script_dir = "/proj/%s/exp/%s/tmp" % ( sproject, eid)
1225
1226	local_key_dir = "/proj/%s/exp/%s/tmp" % ( sproject, eid)
1227	remote_conf_dir = "/proj/%s/exp/%s/tmp" % ( dproject, eid)
1228	tunnel_cfg = tbparams[gw].get("tun", "false")
1229
1230	conf_file = "%s%s.gw.conf" % (myname, sdomain)
1231	remote_conf_file = "%s%s.gw.conf" % (desthost, ddomain)
1232
1233	# translate to lower case so the `hostname` hack for specifying
1234	# configuration files works.
1235	conf_file = conf_file.lower();
1236	remote_conf_file = remote_conf_file.lower();
1237
1238	if dtb == master:
1239	active = "false"
1240	elif gw == master:
1241	active = "true"
1242	elif active_end.has_key['%s-%s' % (dtb, gw)]:
1243	active = "false"
1244	else:
1245	active_end['%s-%s' % (gw, dtb)] = 1
1246	active = "true"
1247
1248	gwconfig = open("%s/%s/%s" % (self.tmpdir, gw, conf_file), "w")
1249	print >>gwconfig, "Active: %s" % active
1250	print >>gwconfig, "TunnelCfg: %s" % tunnel_cfg
1251	print >>gwconfig, "BossName: %s" % boss
1252	print >>gwconfig, "FsName: %s" % fs
1253	print >>gwconfig, "EventServerName: %s" % event_server
1254	print >>gwconfig, "RemoteEventServerName: %s" % remote_event_server
1255	print >>gwconfig, "SeerControl: %s" % seer_control
1256	print >>gwconfig, "Type: %s" % type
1257	print >>gwconfig, "RemoteScriptDir: %s" % remote_script_dir
1258	print >>gwconfig, "EventRepeater: %s/fed_evrepeater" % \
1259	local_script_dir
1260	print >>gwconfig, "RemoteExperiment: %s/%s" % (dproject, eid)
1261	print >>gwconfig, "LocalExperiment: %s/%s" % (sproject, eid)
1262	print >>gwconfig, "RemoteConfigFile: %s/%s" % \
1263	(remote_conf_dir, remote_conf_file)
1264	print >>gwconfig, "Peer: %s%s" % (desthost, ddomain)
1265	print >>gwconfig, "Pubkeys: %s/%s" % (local_key_dir, pubkey)
1266	print >>gwconfig, "Privkeys: %s/%s" % (local_key_dir, privkey)
1267	gwconfig.close()
1268
1269	return active == "true"
1270
1271	def __call__(self, line, allocated, tbparams):
1272	# Process gateways
1273	if not self.current_gateways:
1274	m = self.begin_gateways.match(line)
1275	if m:
1276	self.current_gateways = m.group(1)
1277	if allocated.has_key(self.current_gateways):
1278	# This test should always succeed
1279	tb_dir = "%s/%s" % (self.tmpdir, self.current_gateways)
1280	if not os.path.exists(tb_dir):
1281	try:
1282	os.mkdir(tb_dir)
1283	except IOError:
1284	raise service_error(service_error.internal,
1285	"Cannot create %s" % tb_dir)
1286	else:
1287	# XXX
1288	self.log.error("[gateways]: Ignoring gateways for " + \
1289	"unknown testbed %s" % self.current_gateways)
1290	self.current_gateways = None
1291	return True
1292	else:
1293	return False
1294	else:
1295	m = self.end_gateways.match(line)
1296	if m :
1297	if m.group(1) != self.current_gateways:
1298	raise service_error(service_error.internal,
1299	"Mismatched gateway markers!?")
1300	if self.control_gateway:
1301	try:
1302	cc = open("%s/%s/client.conf" %
1303	(self.tmpdir, self.current_gateways), 'w')
1304	print >>cc, "ControlGateway: %s" % \
1305	self.control_gateway
1306	if tbparams[self.master].has_key('smbshare'):
1307	print >>cc, "SMBSHare: %s" % \
1308	tbparams[self.master]['smbshare']
1309	print >>cc, "ProjectUser: %s" % \
1310	tbparams[self.master]['user']
1311	print >>cc, "ProjectName: %s" % \
1312	tbparams[self.master]['project']
1313	cc.close()
1314	except IOError:
1315	raise service_error(service_error.internal,
1316	"Error creating client config")
1317	try:
1318	cc = open("%s/%s/seer.conf" %
1319	(self.tmpdir, self.current_gateways),
1320	'w')
1321	if self.current_gateways != self.master:
1322	print >>cc, "ControlNode: %s" % \
1323	self.control_gateway
1324	print >>cc, "ExperimentID: %s/%s" % \
1325	( tbparams[self.master]['project'], \
1326	self.eid )
1327	cc.close()
1328	except IOError:
1329	raise service_error(service_error.internal,
1330	"Error creating seer config")
1331	else:
1332	debug.error("[gateways]: No control gateway for %s" %\
1333	self.current_gateways)
1334	self.current_gateways = None
1335	else:
1336	dtb, myname, desthost, type = line.split(" ")
1337
1338	if type == "control" or type == "both":
1339	self.control_gateway = "%s.%s.%s%s" % (myname,
1340	self.eid,
1341	tbparams[self.current_gateways]['project'],
1342	tbparams[self.current_gateways]['domain'])
1343	try:
1344	active = self.gateway_conf_file(self.current_gateways,
1345	self.master, self.eid, self.gw_pubkey_base,
1346	self.gw_secretkey_base,
1347	self.active_end, tbparams, dtb, myname,
1348	desthost, type)
1349	except IOError, e:
1350	raise service_error(service_error.internal,
1351	"Failed to write config file for %s" % \
1352	self.current_gateway)
1353
1354	gw_pubkey = "%s/keys/%s" % \
1355	(self.tmpdir, self.gw_pubkey_base)
1356	gw_secretkey = "%s/keys/%s" % \
1357	(self.tmpdir, self.gw_secretkey_base)
1358
1359	pkfile = "%s/%s/%s" % \
1360	( self.tmpdir, self.current_gateways,
1361	self.gw_pubkey_base)
1362	skfile = "%s/%s/%s" % \
1363	( self.tmpdir, self.current_gateways,
1364	self.gw_secretkey_base)
1365
1366	if not os.path.exists(pkfile):
1367	try:
1368	self.copy_file(gw_pubkey, pkfile)
1369	except IOError:
1370	service_error(service_error.internal,
1371	"Failed to copy pubkey file")
1372
1373	if active and not os.path.exists(skfile):
1374	try:
1375	self.copy_file(gw_secretkey, skfile)
1376	except IOError:
1377	service_error(service_error.internal,
1378	"Failed to copy secretkey file")
1379	return True
1380
1381	class shunt_to_file:
1382	"""
1383	Simple class to write data between two regexps to a file.
1384	"""
1385	def __init__(self, begin, end, filename):
1386	"""
1387	Begin shunting on a match of begin, stop on end, send data to
1388	filename.
1389	"""
1390	self.begin = re.compile(begin)
1391	self.end = re.compile(end)
1392	self.in_shunt = False
1393	self.file = None
1394	self.filename = filename
1395
1396	def __call__(self, line):
1397	"""
1398	Call this on each line in the input that may be shunted.
1399	"""
1400	if not self.in_shunt:
1401	if self.begin.match(line):
1402	self.in_shunt = True
1403	try:
1404	self.file = open(self.filename, "w")
1405	except:
1406	self.file = None
1407	raise
1408	return True
1409	else:
1410	return False
1411	else:
1412	if self.end.match(line):
1413	if self.file:
1414	self.file.close()
1415	self.file = None
1416	self.in_shunt = False
1417	else:
1418	if self.file:
1419	print >>self.file, line
1420	return True
1421
1422	class shunt_to_list:
1423	"""
1424	Same interface as shunt_to_file. Data collected in self.list, one list
1425	element per line.
1426	"""
1427	def __init__(self, begin, end):
1428	self.begin = re.compile(begin)
1429	self.end = re.compile(end)
1430	self.in_shunt = False
1431	self.list = [ ]
1432
1433	def __call__(self, line):
1434	if not self.in_shunt:
1435	if self.begin.match(line):
1436	self.in_shunt = True
1437	return True
1438	else:
1439	return False
1440	else:
1441	if self.end.match(line):
1442	self.in_shunt = False
1443	else:
1444	self.list.append(line)
1445	return True
1446
1447	class shunt_to_string:
1448	"""
1449	Same interface as shunt_to_file. Data collected in self.str, all in
1450	one string.
1451	"""
1452	def __init__(self, begin, end):
1453	self.begin = re.compile(begin)
1454	self.end = re.compile(end)
1455	self.in_shunt = False
1456	self.str = ""
1457
1458	def __call__(self, line):
1459	if not self.in_shunt:
1460	if self.begin.match(line):
1461	self.in_shunt = True
1462	return True
1463	else:
1464	return False
1465	else:
1466	if self.end.match(line):
1467	self.in_shunt = False
1468	else:
1469	self.str += line
1470	return True
1471
1472	def create_experiment(self, req, fid):
1473	"""
1474	The external interface to experiment creation called from the
1475	dispatcher.
1476
1477	Creates a working directory, splits the incoming description using the
1478	splitter script and parses out the avrious subsections using the
1479	lcasses above. Once each sub-experiment is created, use pooled threads
1480	to instantiate them and start it all up.
1481	"""
1482
1483	if not self.auth.check_attribute(fid, 'create'):
1484	raise service_error(service_error.access, "Create access denied")
1485
1486	try:
1487	tmpdir = tempfile.mkdtemp(prefix="split-")
1488	except IOError:
1489	raise service_error(service_error.internal, "Cannot create tmp dir")
1490
1491	gw_pubkey_base = "fed.%s.pub" % self.ssh_type
1492	gw_secretkey_base = "fed.%s" % self.ssh_type
1493	gw_pubkey = tmpdir + "/keys/" + gw_pubkey_base
1494	gw_secretkey = tmpdir + "/keys/" + gw_secretkey_base
1495	tclfile = tmpdir + "/experiment.tcl"
1496	tbparams = { }
1497	try:
1498	access_user = self.accessdb[fid]
1499	except KeyError:
1500	raise service_error(service_error.internal,
1501	"Access map and authorizer out of sync in " + \
1502	"create_experiment for fedid %s" % fid)
1503
1504	pid = "dummy"
1505	gid = "dummy"
1506	# XXX
1507	fail_soft = False
1508
1509	try:
1510	os.mkdir(tmpdir+"/keys")
1511	except OSError:
1512	raise service_error(service_error.internal,
1513	"Can't make temporary dir")
1514
1515	req = req.get('CreateRequestBody', None)
1516	if not req:
1517	raise service_error(service_error.req,
1518	"Bad request format (no CreateRequestBody)")
1519	# The tcl parser needs to read a file so put the content into that file
1520	descr=req.get('experimentdescription', None)
1521	if descr:
1522	file_content=descr.get('ns2description', None)
1523	if file_content:
1524	try:
1525	f = open(tclfile, 'w')
1526	f.write(file_content)
1527	f.close()
1528	except IOError:
1529	raise service_error(service_error.internal,
1530	"Cannot write temp experiment description")
1531	else:
1532	raise service_error(service_error.req,
1533	"Only ns2descriptions supported")
1534	else:
1535	raise service_error(service_error.req, "No experiment description")
1536
1537	if req.has_key('experimentID') and \
1538	req['experimentID'].has_key('localname'):
1539	eid = req['experimentID']['localname']
1540	self.state_lock.acquire()
1541	while (self.state.has_key(eid)):
1542	eid += random.choice(string.ascii_letters)
1543	# To avoid another thread picking this localname
1544	self.state[eid] = "placeholder"
1545	self.state_lock.release()
1546	else:
1547	eid = self.exp_stem
1548	for i in range(0,5):
1549	eid += random.choice(string.ascii_letters)
1550	self.state_lock.acquire()
1551	while (self.state.has_key(eid)):
1552	eid = self.exp_stem
1553	for i in range(0,5):
1554	eid += random.choice(string.ascii_letters)
1555	# To avoid another thread picking this localname
1556	self.state[eid] = "placeholder"
1557	self.state_lock.release()
1558
1559	try:
1560	# This catches exceptions to clear the placeholder if necessary
1561	try:
1562	self.generate_ssh_keys(gw_secretkey, self.ssh_type)
1563	except ValueError:
1564	raise service_error(service_error.server_config,
1565	"Bad key type (%s)" % self.ssh_type)
1566
1567	user = req.get('user', None)
1568	if user == None:
1569	raise service_error(service_error.req, "No user")
1570
1571	master = req.get('master', None)
1572	if not master:
1573	raise service_error(service_error.req,
1574	"No master testbed label")
1575	export_project = req.get('exportProject', None)
1576	if not export_project:
1577	raise service_error(service_error.req, "No export project")
1578
1579	if self.splitter_url:
1580	self.log.debug("Calling remote splitter at %s" % \
1581	self.splitter_url)
1582	split_data = self.remote_splitter(self.splitter_url,
1583	file_content, master)
1584	else:
1585	tclcmd = [self.tclsh, self.tcl_splitter, '-s', '-x',
1586	str(self.muxmax), '-m', master]
1587
1588	if self.fedkit:
1589	tclcmd.append('-k')
1590
1591	tclcmd.extend([pid, gid, eid, tclfile])
1592
1593	self.log.debug("running local splitter %s", " ".join(tclcmd))
1594	tclparser = Popen(tclcmd, stdout=PIPE)
1595	split_data = tclparser.stdout
1596
1597	allocated = { } # Testbeds we can access
1598	started = { } # Testbeds where a sub-experiment started
1599	# successfully
1600
1601	# Objects to parse the splitter output (defined above)
1602	parse_current_testbed = self.current_testbed(eid, tmpdir,
1603	self.fedkit)
1604	parse_allbeds = self.allbeds(self.get_access)
1605	parse_gateways = self.gateways(eid, master, tmpdir,
1606	gw_pubkey_base, gw_secretkey_base, self.copy_file,
1607	self.fedkit)
1608	parse_vtopo = self.shunt_to_string("^#\s+Begin\s+Vtopo",
1609	"^#\s+End\s+Vtopo")
1610	parse_hostnames = self.shunt_to_file("^#\s+Begin\s+hostnames",
1611	"^#\s+End\s+hostnames", tmpdir + "/hosts")
1612	parse_tarfiles = self.shunt_to_list("^#\s+Begin\s+tarfiles",
1613	"^#\s+End\s+tarfiles")
1614	parse_rpms = self.shunt_to_list("^#\s+Begin\s+rpms",
1615	"^#\s+End\s+rpms")
1616
1617	# Working on the split data
1618	for line in split_data:
1619	line = line.rstrip()
1620	if parse_current_testbed(line, master, allocated, tbparams):
1621	continue
1622	elif parse_allbeds(line, user, tbparams, master, export_project,
1623	access_user):
1624	continue
1625	elif parse_gateways(line, allocated, tbparams):
1626	continue
1627	elif parse_vtopo(line):
1628	continue
1629	elif parse_hostnames(line):
1630	continue
1631	elif parse_tarfiles(line):
1632	continue
1633	elif parse_rpms(line):
1634	continue
1635	else:
1636	raise service_error(service_error.internal,
1637	"Bad tcl parse? %s" % line)
1638	# Virtual topology and visualization
1639	vtopo = self.gentopo(parse_vtopo.str)
1640	if not vtopo:
1641	raise service_error(service_error.internal,
1642	"Failed to generate virtual topology")
1643
1644	vis = self.genviz(vtopo)
1645	if not vis:
1646	raise service_error(service_error.internal,
1647	"Failed to generate visualization")
1648
1649	# save federant information
1650	for k in allocated.keys():
1651	tbparams[k]['federant'] = {\
1652	'name': [ { 'localname' : eid} ],\
1653	'emulab': tbparams[k]['emulab'],\
1654	'allocID' : tbparams[k]['allocID'],\
1655	'master' : k == master,\
1656	}
1657
1658
1659	# Copy tarfiles and rpms needed at remote sites into a staging area
1660	try:
1661	if self.fedkit:
1662	parse_tarfiles.list.append(self.fedkit)
1663	for t in parse_tarfiles.list:
1664	if not os.path.exists("%s/tarfiles" % tmpdir):
1665	os.mkdir("%s/tarfiles" % tmpdir)
1666	self.copy_file(t, "%s/tarfiles/%s" % \
1667	(tmpdir, os.path.basename(t)))
1668	for r in parse_rpms.list:
1669	if not os.path.exists("%s/rpms" % tmpdir):
1670	os.mkdir("%s/rpms" % tmpdir)
1671	self.copy_file(r, "%s/rpms/%s" % \
1672	(tmpdir, os.path.basename(r)))
1673	except IOError, e:
1674	raise service_error(service_error.internal,
1675	"Cannot stage tarfile/rpm: %s" % e.strerror)
1676
1677	except service_error, e:
1678	# If something goes wrong in the parse (usually an access error)
1679	# clear the placeholder state. From here on out the code delays
1680	# exceptions.
1681	self.state_lock.acquire()
1682	del self.state[eid]
1683	self.state_lock.release()
1684	raise e
1685
1686	thread_pool_info = self.thread_pool()
1687	threads = [ ]
1688
1689	for tb in [ k for k in allocated.keys() if k != master]:
1690	# Wait until we have a free slot to start the next testbed load
1691	thread_pool_info.acquire()
1692	while thread_pool_info.started - \
1693	thread_pool_info.terminated >= self.nthreads:
1694	thread_pool_info.wait()
1695	thread_pool_info.release()
1696
1697	# Create and start a thread to start the segment, and save it to
1698	# get the return value later
1699	t = self.pooled_thread(target=self.start_segment,
1700	args=(tb, eid, tbparams, tmpdir, 0), name=tb,
1701	pdata=thread_pool_info, trace_file=self.trace_file)
1702	threads.append(t)
1703	t.start()
1704
1705	# Wait until all finish (the first clause of the while is to make sure
1706	# one starts)
1707	thread_pool_info.acquire()
1708	while thread_pool_info.started == 0 or \
1709	thread_pool_info.started > thread_pool_info.terminated:
1710	thread_pool_info.wait()
1711	thread_pool_info.release()
1712
1713	# If none failed, start the master
1714	failed = [ t.getName() for t in threads if not t.rv ]
1715
1716	if len(failed) == 0:
1717	if not self.start_segment(master, eid, tbparams, tmpdir):
1718	failed.append(master)
1719
1720	succeeded = [tb for tb in allocated.keys() if tb not in failed]
1721	# If one failed clean up, unless fail_soft is set
1722	if failed:
1723	if not fail_soft:
1724	for tb in succeeded:
1725	self.stop_segment(tb, eid, tbparams)
1726	# Remove the placeholder
1727	self.state_lock.acquire()
1728	del self.state[eid]
1729	self.state_lock.release()
1730
1731	raise service_error(service_error.federant,
1732	"Swap in failed on %s" % ",".join(failed))
1733	else:
1734	self.log.info("[start_segment]: Experiment %s started" % eid)
1735
1736	# Generate an ID for the experiment (slice) and a certificate that the
1737	# allocator can use to prove they own it. We'll ship it back through
1738	# the encrypted connection.
1739	(expid, expcert) = generate_fedid("test", dir=tmpdir, log=self.log)
1740
1741	self.log.debug("[start_experiment]: removing %s" % tmpdir)
1742
1743	# Walk up tmpdir, deleting as we go
1744	for path, dirs, files in os.walk(tmpdir, topdown=False):
1745	for f in files:
1746	os.remove(os.path.join(path, f))
1747	for d in dirs:
1748	os.rmdir(os.path.join(path, d))
1749	os.rmdir(tmpdir)
1750
1751	# The deepcopy prevents the allocation ID and other binaries from being
1752	# translated into other formats
1753	resp = { 'federant' : [ copy.deepcopy(tbparams[tb]['federant']) \
1754	for tb in tbparams.keys() \
1755	if tbparams[tb].has_key('federant') ],\
1756	'vtopo': vtopo,\
1757	'vis' : vis,
1758	'experimentID' : [\
1759	{ 'fedid': copy.copy(expid) }, \
1760	{ 'localname': eid },\
1761	],\
1762	'experimentAccess': { 'X509' : expcert },\
1763	}
1764
1765	# Insert the experiment into our state and update the disk copy
1766	self.state_lock.acquire()
1767	self.state[expid] = { 'federant' : [ tbparams[tb]['federant'] \
1768	for tb in tbparams.keys() \
1769	if tbparams[tb].has_key('federant') ],\
1770	'vtopo': vtopo,\
1771	'vis' : vis,
1772	'owner': fid,
1773	'experimentID' : [\
1774	{ 'fedid': expid }, { 'localname': eid },\
1775	],\
1776	}
1777	self.state[eid] = self.state[expid]
1778	if self.state_filename: self.write_state()
1779	self.state_lock.release()
1780
1781	self.auth.set_attribute(fid, expid)
1782	self.auth.set_attribute(expid, expid)
1783
1784	if not failed:
1785	return resp
1786	else:
1787	raise service_error(service_error.partial, \
1788	"Partial swap in on %s" % ",".join(succeeded))
1789
1790	def check_experiment_access(self, fid, key):
1791	"""
1792	Confirm that the fid has access to the experiment. Though a request
1793	may be made in terms of a local name, the access attribute is always
1794	the experiment's fedid.
1795	"""
1796	if not isinstance(key, fedid):
1797	self.state_lock.acquire()
1798	if self.state.has_key(key):
1799	try:
1800	kl = [ f['fedid'] for f in self.state[key]['experimentID']\
1801	if f.has_key('fedid') ]
1802	except KeyError:
1803	self.state_lock.release()
1804	raise service_error(service_error.internal,
1805	"No fedid for experiment %s when checking " +\
1806	"access(!?)" % key)
1807	if len(kl) == 1:
1808	key = kl[0]
1809	else:
1810	self.state_lock.release()
1811	raise service_error(service_error.internal,
1812	"multiple fedids for experiment %s when " +\
1813	"checking access(!?)" % key)
1814	else:
1815	self.state_lock.release()
1816	raise service_error(service_error.access, "Access Denied")
1817	self.state_lock.release()
1818
1819	if self.auth.check_attribute(fid, key):
1820	return True
1821	else:
1822	raise service_error(service_error.access, "Access Denied")
1823
1824
1825
1826	def get_vtopo(self, req, fid):
1827	"""
1828	Return the stored virtual topology for this experiment
1829	"""
1830	rv = None
1831
1832	req = req.get('VtopoRequestBody', None)
1833	if not req:
1834	raise service_error(service_error.req,
1835	"Bad request format (no VtopoRequestBody)")
1836	exp = req.get('experiment', None)
1837	if exp:
1838	if exp.has_key('fedid'):
1839	key = exp['fedid']
1840	keytype = "fedid"
1841	elif exp.has_key('localname'):
1842	key = exp['localname']
1843	keytype = "localname"
1844	else:
1845	raise service_error(service_error.req, "Unknown lookup type")
1846	else:
1847	raise service_error(service_error.req, "No request?")
1848
1849	self.check_experiment_access(fid, key)
1850
1851	self.state_lock.acquire()
1852	if self.state.has_key(key):
1853	rv = { 'experiment' : {keytype: key },\
1854	'vtopo': self.state[key]['vtopo'],\
1855	}
1856	self.state_lock.release()
1857
1858	if rv: return rv
1859	else: raise service_error(service_error.req, "No such experiment")
1860
1861	def get_vis(self, req, fid):
1862	"""
1863	Return the stored visualization for this experiment
1864	"""
1865	rv = None
1866
1867	req = req.get('VisRequestBody', None)
1868	if not req:
1869	raise service_error(service_error.req,
1870	"Bad request format (no VisRequestBody)")
1871	exp = req.get('experiment', None)
1872	if exp:
1873	if exp.has_key('fedid'):
1874	key = exp['fedid']
1875	keytype = "fedid"
1876	elif exp.has_key('localname'):
1877	key = exp['localname']
1878	keytype = "localname"
1879	else:
1880	raise service_error(service_error.req, "Unknown lookup type")
1881	else:
1882	raise service_error(service_error.req, "No request?")
1883
1884	self.check_experiment_access(fid, key)
1885
1886	self.state_lock.acquire()
1887	if self.state.has_key(key):
1888	rv = { 'experiment' : {keytype: key },\
1889	'vis': self.state[key]['vis'],\
1890	}
1891	self.state_lock.release()
1892
1893	if rv: return rv
1894	else: raise service_error(service_error.req, "No such experiment")
1895
1896	def get_info(self, req, fid):
1897	"""
1898	Return all the stored info about this experiment
1899	"""
1900	rv = None
1901
1902	req = req.get('InfoRequestBody', None)
1903	if not req:
1904	raise service_error(service_error.req,
1905	"Bad request format (no VisRequestBody)")
1906	exp = req.get('experiment', None)
1907	if exp:
1908	if exp.has_key('fedid'):
1909	key = exp['fedid']
1910	keytype = "fedid"
1911	elif exp.has_key('localname'):
1912	key = exp['localname']
1913	keytype = "localname"
1914	else:
1915	raise service_error(service_error.req, "Unknown lookup type")
1916	else:
1917	raise service_error(service_error.req, "No request?")
1918
1919	self.check_experiment_access(fid, key)
1920
1921	# The state may be massaged by the service function that called
1922	# get_info (e.g., encoded for XMLRPC transport) so send a copy of the
1923	# state.
1924	self.state_lock.acquire()
1925	if self.state.has_key(key):
1926	rv = copy.deepcopy(self.state[key])
1927	self.state_lock.release()
1928
1929	if rv: return rv
1930	else: raise service_error(service_error.req, "No such experiment")
1931
1932
1933	def terminate_experiment(self, req, fid):
1934	"""
1935	Swap this experiment out on the federants and delete the shared
1936	information
1937	"""
1938	tbparams = { }
1939	req = req.get('TerminateRequestBody', None)
1940	if not req:
1941	raise service_error(service_error.req,
1942	"Bad request format (no TerminateRequestBody)")
1943	exp = req.get('experiment', None)
1944	if exp:
1945	if exp.has_key('fedid'):
1946	key = exp['fedid']
1947	keytype = "fedid"
1948	elif exp.has_key('localname'):
1949	key = exp['localname']
1950	keytype = "localname"
1951	else:
1952	raise service_error(service_error.req, "Unknown lookup type")
1953	else:
1954	raise service_error(service_error.req, "No request?")
1955
1956	self.check_experiment_access(fid, key)
1957
1958	self.state_lock.acquire()
1959	fed_exp = self.state.get(key, None)
1960
1961	if fed_exp:
1962	# This branch of the conditional holds the lock to generate a
1963	# consistent temporary tbparams variable to deallocate experiments.
1964	# It releases the lock to do the deallocations and reacquires it to
1965	# remove the experiment state when the termination is complete.
1966	ids = []
1967	# experimentID is a list of dicts that are self-describing
1968	# identifiers. This finds all the fedids and localnames - the
1969	# keys of self.state - and puts them into ids.
1970	for id in fed_exp.get('experimentID', []):
1971	if id.has_key('fedid'): ids.append(id['fedid'])
1972	if id.has_key('localname'): ids.append(id['localname'])
1973
1974	# Construct enough of the tbparams to make the stop_segment calls
1975	# work
1976	for fed in fed_exp['federant']:
1977	try:
1978	for e in fed['name']:
1979	eid = e.get('localname', None)
1980	if eid: break
1981	else:
1982	continue
1983
1984	p = fed['emulab']['project']
1985
1986	project = p['name']['localname']
1987	tb = p['testbed']['localname']
1988	user = p['user'][0]['userID']['localname']
1989
1990	domain = fed['emulab']['domain']
1991	host = "%s%s" % (fed['emulab']['ops'], domain)
1992	aid = fed['allocID']
1993	except KeyError, e:
1994	continue
1995	tbparams[tb] = {\
1996	'user': user,\
1997	'domain': domain,\
1998	'project': project,\
1999	'host': host,\
2000	'eid': eid,\
2001	'aid': aid,\
2002	}
2003	self.state_lock.release()
2004
2005	# Stop everyone.
2006	for tb in tbparams.keys():
2007	self.stop_segment(tb, tbparams[tb]['eid'], tbparams)
2008
2009	# release the allocations
2010	for tb in tbparams.keys():
2011	self.release_access(tb, tbparams[tb]['aid'])
2012
2013	# Remove the terminated experiment
2014	self.state_lock.acquire()
2015	for id in ids:
2016	if self.state.has_key(id): del self.state[id]
2017
2018	if self.state_filename: self.write_state()
2019	self.state_lock.release()
2020
2021	return { 'experiment': exp }
2022	else:
2023	# Don't forget to release the lock
2024	self.state_lock.release()
2025	raise service_error(service_error.req, "No saved state")

Note: See TracBrowser for help on using the repository browser.

Download in other formats: