Context Navigation

source: fedd/federation/experiment_control.py @ 3132419

axis_examplecompt_changesinfo-opsversion-3.01version-3.02

Last change on this file since 3132419 was 3132419, checked in by Ted Faber <faber@…>, 14 years ago
Parallel starts now.
Property mode set to `100644`
File size: 86.9 KB

Line
1	#!/usr/local/bin/python
2
3	import os,sys
4
5	import re
6	import random
7	import string
8	import subprocess
9	import tempfile
10	import copy
11	import pickle
12	import logging
13	import signal
14	import time
15
16	import traceback
17	# For parsing visualization output and splitter output
18	import xml.parsers.expat
19
20	from threading import Lock, Thread, Condition
21	from subprocess import call, Popen, PIPE
22
23	from urlparse import urlparse
24	from urllib2 import urlopen
25
26	from util import *
27	from fedid import fedid, generate_fedid
28	from remote_service import xmlrpc_handler, soap_handler, service_caller
29	from service_error import service_error
30
31	import topdl
32	import list_log
33	from ip_allocator import ip_allocator
34	from ip_addr import ip_addr
35
36
37	class nullHandler(logging.Handler):
38	def emit(self, record): pass
39
40	fl = logging.getLogger("fedd.experiment_control")
41	fl.addHandler(nullHandler())
42
43	class experiment_control_local:
44	"""
45	Control of experiments that this system can directly access.
46
47	Includes experiment creation, termination and information dissemination.
48	Thred safe.
49	"""
50
51	class ssh_cmd_timeout(RuntimeError): pass
52
53	class thread_pool:
54	"""
55	A class to keep track of a set of threads all invoked for the same
56	task. Manages the mutual exclusion of the states.
57	"""
58	def __init__(self, nthreads):
59	"""
60	Start a pool.
61	"""
62	self.changed = Condition()
63	self.started = 0
64	self.terminated = 0
65	self.nthreads = nthreads
66
67	def acquire(self):
68	"""
69	Get the pool's lock.
70	"""
71	self.changed.acquire()
72
73	def release(self):
74	"""
75	Release the pool's lock.
76	"""
77	self.changed.release()
78
79	def wait(self, timeout = None):
80	"""
81	Wait for a pool thread to start or stop.
82	"""
83	self.changed.wait(timeout)
84
85	def start(self):
86	"""
87	Called by a pool thread to report starting.
88	"""
89	self.changed.acquire()
90	self.started += 1
91	self.changed.notifyAll()
92	self.changed.release()
93
94	def terminate(self):
95	"""
96	Called by a pool thread to report finishing.
97	"""
98	self.changed.acquire()
99	self.terminated += 1
100	self.changed.notifyAll()
101	self.changed.release()
102
103	def clear(self):
104	"""
105	Clear all pool data.
106	"""
107	self.changed.acquire()
108	self.started = 0
109	self.terminated =0
110	self.changed.notifyAll()
111	self.changed.release()
112
113	def wait_for_slot(self):
114	"""
115	Wait until we have a free slot to start another pooled thread
116	"""
117	self.acquire()
118	while self.started - self.terminated >= self.nthreads:
119	self.wait()
120	self.release()
121
122	def wait_for_all_done(self, timeout=None):
123	"""
124	Wait until all active threads finish (and at least one has
125	started). If a timeout is given, return after waiting that long
126	for termination. If all threads are done (and one has started in
127	the since the last clear()) return True, otherwise False.
128	"""
129	if timeout:
130	deadline = time.time() + timeout
131	self.acquire()
132	while self.started == 0 or self.started > self.terminated:
133	self.wait(timeout)
134	if timeout:
135	if time.time() > deadline:
136	break
137	timeout = deadline - time.time()
138	self.release()
139	return not (self.started == 0 or self.started > self.terminated)
140
141	class pooled_thread(Thread):
142	"""
143	One of a set of threads dedicated to a specific task. Uses the
144	thread_pool class above for coordination.
145	"""
146	def __init__(self, group=None, target=None, name=None, args=(),
147	kwargs={}, pdata=None, trace_file=None):
148	Thread.__init__(self, group, target, name, args, kwargs)
149	self.rv = None # Return value of the ops in this thread
150	self.exception = None # Exception that terminated this thread
151	self.target=target # Target function to run on start()
152	self.args = args # Args to pass to target
153	self.kwargs = kwargs # Additional kw args
154	self.pdata = pdata # thread_pool for this class
155	# Logger for this thread
156	self.log = logging.getLogger("fedd.experiment_control")
157
158	def run(self):
159	"""
160	Emulate Thread.run, except add pool data manipulation and error
161	logging.
162	"""
163	if self.pdata:
164	self.pdata.start()
165
166	if self.target:
167	try:
168	self.rv = self.target(self.args, *self.kwargs)
169	except service_error, s:
170	self.exception = s
171	self.log.error("Thread exception: %s %s" % \
172	(s.code_string(), s.desc))
173	except:
174	self.exception = sys.exc_info()[1]
175	self.log.error(("Unexpected thread exception: %s" +\
176	"Trace %s") % (self.exception,\
177	traceback.format_exc()))
178	if self.pdata:
179	self.pdata.terminate()
180
181	call_RequestAccess = service_caller('RequestAccess')
182	call_ReleaseAccess = service_caller('ReleaseAccess')
183	call_StartSegment = service_caller('StartSegment')
184	call_TerminateSegment = service_caller('TerminateSegment')
185	call_Ns2Split = service_caller('Ns2Split')
186
187	def __init__(self, config=None, auth=None):
188	"""
189	Intialize the various attributes, most from the config object
190	"""
191
192	def parse_tarfile_list(tf):
193	"""
194	Parse a tarfile list from the configuration. This is a set of
195	paths and tarfiles separated by spaces.
196	"""
197	rv = [ ]
198	if tf is not None:
199	tl = tf.split()
200	while len(tl) > 1:
201	p, t = tl[0:2]
202	del tl[0:2]
203	rv.append((p, t))
204	return rv
205
206	self.thread_with_rv = experiment_control_local.pooled_thread
207	self.thread_pool = experiment_control_local.thread_pool
208	self.list_log = list_log.list_log
209
210	self.cert_file = config.get("experiment_control", "cert_file")
211	if self.cert_file:
212	self.cert_pwd = config.get("experiment_control", "cert_pwd")
213	else:
214	self.cert_file = config.get("globals", "cert_file")
215	self.cert_pwd = config.get("globals", "cert_pwd")
216
217	self.trusted_certs = config.get("experiment_control", "trusted_certs") \
218	or config.get("globals", "trusted_certs")
219
220	self.repodir = config.get("experiment_control", "repodir")
221	self.repo_url = config.get("experiment_control", "repo_url",
222	"https://users.isi.deterlab.net:23235");
223
224	self.exp_stem = "fed-stem"
225	self.log = logging.getLogger("fedd.experiment_control")
226	set_log_level(config, "experiment_control", self.log)
227	self.muxmax = 2
228	self.nthreads = 2
229	self.randomize_experiments = False
230
231	self.splitter = None
232	self.ssh_keygen = "/usr/bin/ssh-keygen"
233	self.ssh_identity_file = None
234
235
236	self.debug = config.getboolean("experiment_control", "create_debug")
237	self.cleanup = not config.getboolean("experiment_control",
238	"leave_tmpfiles")
239	self.state_filename = config.get("experiment_control",
240	"experiment_state")
241	self.splitter_url = config.get("experiment_control", "splitter_uri")
242	self.fedkit = parse_tarfile_list(\
243	config.get("experiment_control", "fedkit"))
244	self.gatewaykit = parse_tarfile_list(\
245	config.get("experiment_control", "gatewaykit"))
246	accessdb_file = config.get("experiment_control", "accessdb")
247
248	self.ssh_pubkey_file = config.get("experiment_control",
249	"ssh_pubkey_file")
250	self.ssh_privkey_file = config.get("experiment_control",
251	"ssh_privkey_file")
252	# NB for internal master/slave ops, not experiment setup
253	self.ssh_type = config.get("experiment_control", "sshkeytype", "rsa")
254
255	self.overrides = set([])
256	ovr = config.get('experiment_control', 'overrides')
257	if ovr:
258	for o in ovr.split(","):
259	o = o.strip()
260	if o.startswith('fedid:'): o = o[len('fedid:'):]
261	self.overrides.add(fedid(hexstr=o))
262
263	self.state = { }
264	self.state_lock = Lock()
265	self.tclsh = "/usr/local/bin/otclsh"
266	self.tcl_splitter = config.get("splitter", "tcl_splitter") or \
267	config.get("experiment_control", "tcl_splitter",
268	"/usr/testbed/lib/ns2ir/parse.tcl")
269	mapdb_file = config.get("experiment_control", "mapdb")
270	self.trace_file = sys.stderr
271
272	self.def_expstart = \
273	"sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& " +\
274	"/tmp/federate";
275	self.def_mexpstart = "sudo -H /usr/local/federation/bin/make_hosts " +\
276	"FEDDIR/hosts";
277	self.def_gwstart = \
278	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF>& " +\
279	"/tmp/bridge.log";
280	self.def_mgwstart = \
281	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF >& " +\
282	"/tmp/bridge.log";
283	self.def_gwimage = "FBSD61-TUNNEL2";
284	self.def_gwtype = "pc";
285	self.local_access = { }
286
287	if auth:
288	self.auth = auth
289	else:
290	self.log.error(\
291	"[access]: No authorizer initialized, creating local one.")
292	auth = authorizer()
293
294
295	if self.ssh_pubkey_file:
296	try:
297	f = open(self.ssh_pubkey_file, 'r')
298	self.ssh_pubkey = f.read()
299	f.close()
300	except IOError:
301	raise service_error(service_error.internal,
302	"Cannot read sshpubkey")
303	else:
304	raise service_error(service_error.internal,
305	"No SSH public key file?")
306
307	if not self.ssh_privkey_file:
308	raise service_error(service_error.internal,
309	"No SSH public key file?")
310
311
312	if mapdb_file:
313	self.read_mapdb(mapdb_file)
314	else:
315	self.log.warn("[experiment_control] No testbed map, using defaults")
316	self.tbmap = {
317	'deter':'https://users.isi.deterlab.net:23235',
318	'emulab':'https://users.isi.deterlab.net:23236',
319	'ucb':'https://users.isi.deterlab.net:23237',
320	}
321
322	if accessdb_file:
323	self.read_accessdb(accessdb_file)
324	else:
325	raise service_error(service_error.internal,
326	"No accessdb specified in config")
327
328	# Grab saved state. OK to do this w/o locking because it's read only
329	# and only one thread should be in existence that can see self.state at
330	# this point.
331	if self.state_filename:
332	self.read_state()
333
334	# Dispatch tables
335	self.soap_services = {\
336	'New': soap_handler('New', self.new_experiment),
337	'Create': soap_handler('Create', self.create_experiment),
338	'Vtopo': soap_handler('Vtopo', self.get_vtopo),
339	'Vis': soap_handler('Vis', self.get_vis),
340	'Info': soap_handler('Info', self.get_info),
341	'MultiInfo': soap_handler('MultiInfo', self.get_multi_info),
342	'Terminate': soap_handler('Terminate',
343	self.terminate_experiment),
344	}
345
346	self.xmlrpc_services = {\
347	'New': xmlrpc_handler('New', self.new_experiment),
348	'Create': xmlrpc_handler('Create', self.create_experiment),
349	'Vtopo': xmlrpc_handler('Vtopo', self.get_vtopo),
350	'Vis': xmlrpc_handler('Vis', self.get_vis),
351	'Info': xmlrpc_handler('Info', self.get_info),
352	'MultiInfo': xmlrpc_handler('MultiInfo', self.get_multi_info),
353	'Terminate': xmlrpc_handler('Terminate',
354	self.terminate_experiment),
355	}
356
357	# Call while holding self.state_lock
358	def write_state(self):
359	"""
360	Write a new copy of experiment state after copying the existing state
361	to a backup.
362
363	State format is a simple pickling of the state dictionary.
364	"""
365	if os.access(self.state_filename, os.W_OK):
366	copy_file(self.state_filename, \
367	"%s.bak" % self.state_filename)
368	try:
369	f = open(self.state_filename, 'w')
370	pickle.dump(self.state, f)
371	except IOError, e:
372	self.log.error("Can't write file %s: %s" % \
373	(self.state_filename, e))
374	except pickle.PicklingError, e:
375	self.log.error("Pickling problem: %s" % e)
376	except TypeError, e:
377	self.log.error("Pickling problem (TypeError): %s" % e)
378
379	# Call while holding self.state_lock
380	def read_state(self):
381	"""
382	Read a new copy of experiment state. Old state is overwritten.
383
384	State format is a simple pickling of the state dictionary.
385	"""
386
387	def get_experiment_id(state):
388	"""
389	Pull the fedid experimentID out of the saved state. This is kind
390	of a gross walk through the dict.
391	"""
392
393	if state.has_key('experimentID'):
394	for e in state['experimentID']:
395	if e.has_key('fedid'):
396	return e['fedid']
397	else:
398	return None
399	else:
400	return None
401
402	def get_alloc_ids(state):
403	"""
404	Pull the fedids of the identifiers of each allocation from the
405	state. Again, a dict dive that's best isolated.
406	"""
407
408	return [ f['allocID']['fedid']
409	for f in state.get('federant',[]) \
410	if f.has_key('allocID') and \
411	f['allocID'].has_key('fedid')]
412
413
414	try:
415	f = open(self.state_filename, "r")
416	self.state = pickle.load(f)
417	self.log.debug("[read_state]: Read state from %s" % \
418	self.state_filename)
419	except IOError, e:
420	self.log.warning("[read_state]: No saved state: Can't open %s: %s"\
421	% (self.state_filename, e))
422	except pickle.UnpicklingError, e:
423	self.log.warning(("[read_state]: No saved state: " + \
424	"Unpickling failed: %s") % e)
425
426	for s in self.state.values():
427	try:
428
429	eid = get_experiment_id(s)
430	if eid :
431	# Give the owner rights to the experiment
432	self.auth.set_attribute(s['owner'], eid)
433	# And holders of the eid as well
434	self.auth.set_attribute(eid, eid)
435	# allow overrides to control experiments as well
436	for o in self.overrides:
437	self.auth.set_attribute(o, eid)
438	# Set permissions to allow reading of the software repo, if
439	# any, as well.
440	for a in get_alloc_ids(s):
441	self.auth.set_attribute(a, 'repo/%s' % eid)
442	else:
443	raise KeyError("No experiment id")
444	except KeyError, e:
445	self.log.warning("[read_state]: State ownership or identity " +\
446	"misformatted in %s: %s" % (self.state_filename, e))
447
448
449	def read_accessdb(self, accessdb_file):
450	"""
451	Read the mapping from fedids that can create experiments to their name
452	in the 3-level access namespace. All will be asserted from this
453	testbed and can include the local username and porject that will be
454	asserted on their behalf by this fedd. Each fedid is also added to the
455	authorization system with the "create" attribute.
456	"""
457	self.accessdb = {}
458	# These are the regexps for parsing the db
459	name_expr = "[" + string.ascii_letters + string.digits + "\.\-]+"
460	project_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
461	"\s->\(\s("+name_expr+")\s,\s("+name_expr+")\s\)\s$")
462	user_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
463	"\s->\s(" + name_expr + ")\s*$")
464	lineno = 0
465
466	# Parse the mappings and store in self.authdb, a dict of
467	# fedid -> (proj, user)
468	try:
469	f = open(accessdb_file, "r")
470	for line in f:
471	lineno += 1
472	line = line.strip()
473	if len(line) == 0 or line.startswith('#'):
474	continue
475	m = project_line.match(line)
476	if m:
477	fid = fedid(hexstr=m.group(1))
478	project, user = m.group(2,3)
479	if not self.accessdb.has_key(fid):
480	self.accessdb[fid] = []
481	self.accessdb[fid].append((project, user))
482	continue
483
484	m = user_line.match(line)
485	if m:
486	fid = fedid(hexstr=m.group(1))
487	project = None
488	user = m.group(2)
489	if not self.accessdb.has_key(fid):
490	self.accessdb[fid] = []
491	self.accessdb[fid].append((project, user))
492	continue
493	self.log.warn("[experiment_control] Error parsing access " +\
494	"db %s at line %d" % (accessdb_file, lineno))
495	except IOError:
496	raise service_error(service_error.internal,
497	"Error opening/reading %s as experiment " +\
498	"control accessdb" % accessdb_file)
499	f.close()
500
501	# Initialize the authorization attributes
502	for fid in self.accessdb.keys():
503	self.auth.set_attribute(fid, 'create')
504	self.auth.set_attribute(fid, 'new')
505
506	def read_mapdb(self, file):
507	"""
508	Read a simple colon separated list of mappings for the
509	label-to-testbed-URL mappings. Clears or creates self.tbmap.
510	"""
511
512	self.tbmap = { }
513	lineno =0
514	try:
515	f = open(file, "r")
516	for line in f:
517	lineno += 1
518	line = line.strip()
519	if line.startswith('#') or len(line) == 0:
520	continue
521	try:
522	label, url = line.split(':', 1)
523	self.tbmap[label] = url
524	except ValueError, e:
525	self.log.warn("[read_mapdb] Ignored bad line (%d) in " +\
526	"map db: %s %s" % (lineno, line, e))
527	except IOError, e:
528	self.log.warning("[read_mapdb]: No saved map database: Can't " +\
529	"open %s: %s" % (file, e))
530	f.close()
531
532	def generate_ssh_keys(self, dest, type="rsa" ):
533	"""
534	Generate a set of keys for the gateways to use to talk.
535
536	Keys are of type type and are stored in the required dest file.
537	"""
538	valid_types = ("rsa", "dsa")
539	t = type.lower();
540	if t not in valid_types: raise ValueError
541	cmd = [self.ssh_keygen, '-t', t, '-N', '', '-f', dest]
542
543	try:
544	trace = open("/dev/null", "w")
545	except IOError:
546	raise service_error(service_error.internal,
547	"Cannot open /dev/null??");
548
549	# May raise CalledProcessError
550	self.log.debug("[generate_ssh_keys]: %s" % " ".join(cmd))
551	rv = call(cmd, stdout=trace, stderr=trace, close_fds=True)
552	if rv != 0:
553	raise service_error(service_error.internal,
554	"Cannot generate nonce ssh keys. %s return code %d" \
555	% (self.ssh_keygen, rv))
556
557	def gentopo(self, str):
558	"""
559	Generate the topology dtat structure from the splitter's XML
560	representation of it.
561
562	The topology XML looks like:
563	<experiment>
564	<nodes>
565	<node><vname></vname><ips>ip1:ip2</ips></node>
566	</nodes>
567	<lans>
568	<lan>
569	<vname></vname><vnode></vnode><ip></ip>
570	<bandwidth></bandwidth><member>node:port</member>
571	</lan>
572	</lans>
573	"""
574	class topo_parse:
575	"""
576	Parse the topology XML and create the dats structure.
577	"""
578	def __init__(self):
579	# Typing of the subelements for data conversion
580	self.str_subelements = ('vname', 'vnode', 'ips', 'ip', 'member')
581	self.int_subelements = ( 'bandwidth',)
582	self.float_subelements = ( 'delay',)
583	# The final data structure
584	self.nodes = [ ]
585	self.lans = [ ]
586	self.topo = { \
587	'node': self.nodes,\
588	'lan' : self.lans,\
589	}
590	self.element = { } # Current element being created
591	self.chars = "" # Last text seen
592
593	def end_element(self, name):
594	# After each sub element the contents is added to the current
595	# element or to the appropriate list.
596	if name == 'node':
597	self.nodes.append(self.element)
598	self.element = { }
599	elif name == 'lan':
600	self.lans.append(self.element)
601	self.element = { }
602	elif name in self.str_subelements:
603	self.element[name] = self.chars
604	self.chars = ""
605	elif name in self.int_subelements:
606	self.element[name] = int(self.chars)
607	self.chars = ""
608	elif name in self.float_subelements:
609	self.element[name] = float(self.chars)
610	self.chars = ""
611
612	def found_chars(self, data):
613	self.chars += data.rstrip()
614
615
616	tp = topo_parse();
617	parser = xml.parsers.expat.ParserCreate()
618	parser.EndElementHandler = tp.end_element
619	parser.CharacterDataHandler = tp.found_chars
620
621	parser.Parse(str)
622
623	return tp.topo
624
625
626	def genviz(self, topo):
627	"""
628	Generate the visualization the virtual topology
629	"""
630
631	neato = "/usr/local/bin/neato"
632	# These are used to parse neato output and to create the visualization
633	# file.
634	vis_re = re.compile('^\s"?([\w\-]+)"?\s+\[.pos="(\d+),(\d+)"')
635	vis_fmt = "<node><name>%s</name><x>%s</x><y>%s</y><type>" + \
636	"%s</type></node>"
637
638	try:
639	# Node names
640	nodes = [ n['vname'] for n in topo['node'] ]
641	topo_lans = topo['lan']
642	except KeyError, e:
643	raise service_error(service_error.internal, "Bad topology: %s" %e)
644
645	lans = { }
646	links = { }
647
648	# Walk through the virtual topology, organizing the connections into
649	# 2-node connections (links) and more-than-2-node connections (lans).
650	# When a lan is created, it's added to the list of nodes (there's a
651	# node in the visualization for the lan).
652	for l in topo_lans:
653	if links.has_key(l['vname']):
654	if len(links[l['vname']]) < 2:
655	links[l['vname']].append(l['vnode'])
656	else:
657	nodes.append(l['vname'])
658	lans[l['vname']] = links[l['vname']]
659	del links[l['vname']]
660	lans[l['vname']].append(l['vnode'])
661	elif lans.has_key(l['vname']):
662	lans[l['vname']].append(l['vnode'])
663	else:
664	links[l['vname']] = [ l['vnode'] ]
665
666
667	# Open up a temporary file for dot to turn into a visualization
668	try:
669	df, dotname = tempfile.mkstemp()
670	dotfile = os.fdopen(df, 'w')
671	except IOError:
672	raise service_error(service_error.internal,
673	"Failed to open file in genviz")
674
675	try:
676	dnull = open('/dev/null', 'w')
677	except IOError:
678	service_error(service_error.internal,
679	"Failed to open /dev/null in genviz")
680
681	# Generate a dot/neato input file from the links, nodes and lans
682	try:
683	print >>dotfile, "graph G {"
684	for n in nodes:
685	print >>dotfile, '\t"%s"' % n
686	for l in links.keys():
687	print >>dotfile, '\t"%s" -- "%s"' % tuple(links[l])
688	for l in lans.keys():
689	for n in lans[l]:
690	print >>dotfile, '\t "%s" -- "%s"' % (n,l)
691	print >>dotfile, "}"
692	dotfile.close()
693	except TypeError:
694	raise service_error(service_error.internal,
695	"Single endpoint link in vtopo")
696	except IOError:
697	raise service_error(service_error.internal, "Cannot write dot file")
698
699	# Use dot to create a visualization
700	dot = Popen([neato, '-Gstart=rand', '-Gepsilon=0.005', '-Gmaxiter=2000',
701	'-Gpack=true', dotname], stdout=PIPE, stderr=dnull,
702	close_fds=True)
703	dnull.close()
704
705	# Translate dot to vis format
706	vis_nodes = [ ]
707	vis = { 'node': vis_nodes }
708	for line in dot.stdout:
709	m = vis_re.match(line)
710	if m:
711	vn = m.group(1)
712	vis_node = {'name': vn, \
713	'x': float(m.group(2)),\
714	'y' : float(m.group(3)),\
715	}
716	if vn in links.keys() or vn in lans.keys():
717	vis_node['type'] = 'lan'
718	else:
719	vis_node['type'] = 'node'
720	vis_nodes.append(vis_node)
721	rv = dot.wait()
722
723	os.remove(dotname)
724	if rv == 0 : return vis
725	else: return None
726
727	def get_access(self, tb, nodes, tbparam, master, export_project,
728	access_user, services):
729	"""
730	Get access to testbed through fedd and set the parameters for that tb
731	"""
732	uri = self.tbmap.get(tb, None)
733	if not uri:
734	raise service_error(serice_error.server_config,
735	"Unknown testbed: %s" % tb)
736
737	# Tweak search order so that if there are entries in access_user that
738	# have a project matching the export project, we try them first
739	if export_project and export_project.has_key('localname'):
740	pn = export_project['localname']
741
742	access_sequence = [ (p, u) for p, u in access_user if p == pn]
743	access_sequence.extend([(p, u) for p, u in access_user if p != pn])
744	else:
745	access_sequence = access_user
746
747	for p, u in access_sequence:
748	self.log.debug(("[get_access] Attempting access from (%s, %s) " + \
749	"to %s") % ((p or "None"), u, uri))
750
751	if p:
752	# Request with user and project specified
753	req = {\
754	'destinationTestbed' : { 'uri' : uri },
755	'credential': [ "project: %s" % p, "user: %s" % u],
756	'allocID' : { 'localname': 'test' },
757	}
758	else:
759	# Request with only user specified
760	req = {\
761	'destinationTestbed' : { 'uri' : uri },
762	'credential': [ 'user: %s' % u ],
763	'user': [ {'userID': { 'localname': u } } ],
764	'allocID' : { 'localname': 'test' },
765	}
766
767	if tb == master:
768	# NB, the export_project parameter is a dict that includes
769	# the type
770	req['exportProject'] = export_project
771	req['service'] = [
772	{ 'name': 'userconfig', 'visibility': 'export'},
773	{ 'name': 'SMB', 'visibility': 'export'},
774	{ 'name': 'seer', 'visibility': 'export'},
775	{ 'name': 'tmcd', 'visibility': 'export'},
776	]
777
778	# node resources if any
779	if nodes != None and len(nodes) > 0:
780	rnodes = [ ]
781	for n in nodes:
782	rn = { }
783	image, hw, count = n.split(":")
784	if image: rn['image'] = [ image ]
785	if hw: rn['hardware'] = [ hw ]
786	if count and int(count) >0 : rn['count'] = int(count)
787	rnodes.append(rn)
788	req['resources']= { }
789	req['resources']['node'] = rnodes
790
791	try:
792	if self.local_access.has_key(uri):
793	# Local access call
794	req = { 'RequestAccessRequestBody' : req }
795	r = self.local_access[uri].RequestAccess(req,
796	fedid(file=self.cert_file))
797	r = { 'RequestAccessResponseBody' : r }
798	else:
799	r = self.call_RequestAccess(uri, req,
800	self.cert_file, self.cert_pwd, self.trusted_certs)
801	except service_error, e:
802	if e.code == service_error.access:
803	self.log.debug("[get_access] Access denied")
804	r = None
805	continue
806	else:
807	raise e
808
809	if r.has_key('RequestAccessResponseBody'):
810	# Through to here we have a valid response, not a fault.
811	# Access denied is a fault, so something better or worse than
812	# access denied has happened.
813	r = r['RequestAccessResponseBody']
814	self.log.debug("[get_access] Access granted")
815	break
816	else:
817	raise service_error(service_error.protocol,
818	"Bad proxy response")
819
820	if not r:
821	raise service_error(service_error.access,
822	"Access denied by %s (%s)" % (tb, uri))
823
824	tbparam[tb] = {
825	"allocID" : r['allocID'],
826	"uri": uri,
827	}
828	if 'service' in r:
829	services.extend(r['service'])
830
831	# Add attributes to parameter space. We don't allow attributes to
832	# overlay any parameters already installed.
833	for a in r['fedAttr']:
834	try:
835	if a['attribute'] and \
836	isinstance(a['attribute'], basestring)\
837	and not tbparam[tb].has_key(a['attribute'].lower()):
838	tbparam[tb][a['attribute'].lower()] = a['value']
839	except KeyError:
840	self.log.error("Bad attribute in response: %s" % a)
841
842	def release_access(self, tb, aid, uri=None):
843	"""
844	Release access to testbed through fedd
845	"""
846
847	if not uri:
848	uri = self.tbmap.get(tb, None)
849	if not uri:
850	raise service_error(service_error.server_config,
851	"Unknown testbed: %s" % tb)
852
853	if self.local_access.has_key(uri):
854	resp = self.local_access[uri].ReleaseAccess(\
855	{ 'ReleaseAccessRequestBody' : {'allocID': aid},},
856	fedid(file=self.cert_file))
857	resp = { 'ReleaseAccessResponseBody': resp }
858	else:
859	resp = self.call_ReleaseAccess(uri, {'allocID': aid},
860	self.cert_file, self.cert_pwd, self.trusted_certs)
861
862	# better error coding
863
864	def remote_splitter(self, uri, desc, master):
865
866	req = {
867	'description' : { 'ns2description': desc },
868	'master': master,
869	'include_fedkit': bool(self.fedkit),
870	'include_gatewaykit': bool(self.gatewaykit)
871	}
872
873	r = self.call_Ns2Split(uri, req, self.cert_file, self.cert_pwd,
874	self.trusted_certs)
875
876	if r.has_key('Ns2SplitResponseBody'):
877	r = r['Ns2SplitResponseBody']
878	if r.has_key('output'):
879	return r['output'].splitlines()
880	else:
881	raise service_error(service_error.protocol,
882	"Bad splitter response (no output)")
883	else:
884	raise service_error(service_error.protocol, "Bad splitter response")
885
886	class start_segment:
887	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
888	cert_pwd=None, trusted_certs=None, caller=None,
889	log_collector=None):
890	self.log = log
891	self.debug = debug
892	self.cert_file = cert_file
893	self.cert_pwd = cert_pwd
894	self.trusted_certs = None
895	self.caller = caller
896	self.testbed = testbed
897	self.log_collector = log_collector
898	self.response = None
899
900	def __call__(self, uri, aid, topo, master, attrs=None, connInfo=None,
901	services=None):
902	req = {
903	'allocID': { 'fedid' : aid },
904	'segmentdescription': {
905	'topdldescription': topo.to_dict(),
906	},
907	'master': master,
908	}
909
910	if connInfo:
911	req['connection'] = connInfo
912	# Add services to request. The master exports, everyone else
913	# imports.
914	if services:
915	svcs = [ x.copy() for x in services]
916	for s in svcs:
917	if master: s['visibility'] = 'export'
918	else: s['visibility'] = 'import'
919	req['service'] = svcs
920	if attrs:
921	req['fedAttr'] = attrs
922
923	try:
924	self.log.debug("Calling StartSegment at %s " % uri)
925	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
926	self.trusted_certs)
927	if r.has_key('StartSegmentResponseBody'):
928	lval = r['StartSegmentResponseBody'].get('allocationLog',
929	None)
930	if lval and self.log_collector:
931	for line in lval.splitlines(True):
932	self.log_collector.write(line)
933	self.response = r
934	else:
935	raise service_error(service_error.internal,
936	"Bad response!?: %s" %r)
937	return True
938	except service_error, e:
939	self.log.error("Start segment failed on %s: %s" % \
940	(self.testbed, e))
941	return False
942
943
944
945	class terminate_segment:
946	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
947	cert_pwd=None, trusted_certs=None, caller=None):
948	self.log = log
949	self.debug = debug
950	self.cert_file = cert_file
951	self.cert_pwd = cert_pwd
952	self.trusted_certs = None
953	self.caller = caller
954	self.testbed = testbed
955
956	def __call__(self, uri, aid ):
957	req = {
958	'allocID': aid ,
959	}
960	try:
961	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
962	self.trusted_certs)
963	return True
964	except service_error, e:
965	self.log.error("Terminate segment failed on %s: %s" % \
966	(self.testbed, e))
967	return False
968
969
970	def allocate_resources(self, allocated, master, eid, expid,
971	tbparams, topo, tmpdir, alloc_log=None, log_collector=None,
972	attrs=None, connInfo={}, services=[]):
973	def get_vlan(r):
974	if r.has_key('StartSegmentResponseBody'):
975	srb = r['StartSegmentResponseBody']
976	if srb.has_key('fedAttr'):
977	for k, v in [ (a['attribute'], a['value']) \
978	for a in srb['fedAttr']]:
979	if k == 'vlan': return v
980	return None
981
982	started = { } # Testbeds where a sub-experiment started
983	# successfully
984
985	# XXX
986	fail_soft = False
987
988	non_transit = [ k for k in allocated.keys() \
989	if not topo[k].get_attribute('transit')]
990	transit = [ k for k in allocated.keys() \
991	if topo[k].get_attribute('transit')]
992
993	log = alloc_log or self.log
994
995	thread_pool = self.thread_pool(self.nthreads)
996	threads = [ ]
997
998	for tb in transit:
999	uri = tbparams[tb]['uri']
1000	if tbparams[tb].has_key('allocID') and \
1001	tbparams[tb]['allocID'].has_key('fedid'):
1002	aid = tbparams[tb]['allocID']['fedid']
1003	else:
1004	raise service_error(service_error.internal,
1005	"No alloc id for testbed %s !?" % tb)
1006
1007	m = re.search('(\d+)', tb)
1008	if m:
1009	to_repl = "unassigned%s" % m.group(1)
1010	else:
1011	raise service_error(service_error.internal,
1012	"Bad dynamic allocation name")
1013	break
1014
1015	ss = self.start_segment(log=log, debug=self.debug,
1016	testbed=tb, cert_file=self.cert_file,
1017	cert_pwd=self.cert_pwd,
1018	trusted_certs=self.trusted_certs,
1019	caller=self.call_StartSegment,
1020	log_collector=log_collector)
1021	t = self.pooled_thread(
1022	target=ss,
1023	args =(uri, aid, topo[tb], False, attrs, connInfo[tb],
1024	services),
1025	name=tb, pdata=thread_pool, trace_file=self.trace_file)
1026	threads.append(t)
1027	t.start()
1028	# Wait until the this transit node finishes (keep pinging the log,
1029	# though)
1030
1031	mins = 0
1032	while not thread_pool.wait_for_all_done(60.0):
1033	mins += 1
1034	alloc_log.info("Waiting for transit (it has been %d mins)" \
1035	% mins)
1036
1037	if t.rv:
1038	vlan = get_vlan(ss.response)
1039	if vlan is not None:
1040	for k, t in topo.items():
1041	for e in t.elements:
1042	for i in e.interface:
1043	vl = i.get_attribute('dragon_vlan')
1044	if vl is not None and vl == to_repl:
1045	i.set_attribute('dragon_vlan', vlan)
1046	else:
1047	break
1048	thread_pool.clear()
1049
1050
1051	failed = [ t.getName() for t in threads if not t.rv ]
1052
1053	if len(failed) == 0:
1054	for tb in non_transit:
1055	# Create and start a thread to start the segment, and save it
1056	# to get the return value later
1057	thread_pool.wait_for_slot()
1058	uri = self.tbmap.get(tb, None)
1059	if not uri:
1060	raise service_error(service_error.internal,
1061	"Unknown testbed %s !?" % tb)
1062
1063	if tbparams[tb].has_key('allocID') and \
1064	tbparams[tb]['allocID'].has_key('fedid'):
1065	aid = tbparams[tb]['allocID']['fedid']
1066	else:
1067	raise service_error(service_error.internal,
1068	"No alloc id for testbed %s !?" % tb)
1069
1070	t = self.pooled_thread(\
1071	target=self.start_segment(log=log, debug=self.debug,
1072	testbed=tb, cert_file=self.cert_file,
1073	cert_pwd=self.cert_pwd,
1074	trusted_certs=self.trusted_certs,
1075	caller=self.call_StartSegment,
1076	log_collector=log_collector),
1077	args=(uri, aid, topo[tb], tb == master,
1078	attrs, connInfo[tb], services),
1079	name=tb,
1080	pdata=thread_pool, trace_file=self.trace_file)
1081	threads.append(t)
1082	t.start()
1083
1084	# Wait until all finish (keep pinging the log, though)
1085	mins = 0
1086	while not thread_pool.wait_for_all_done(60.0):
1087	mins += 1
1088	alloc_log.info("Waiting for sub threads (it has been %d mins)" \
1089	% mins)
1090
1091	thread_pool.clear()
1092
1093	failed = [ t.getName() for t in threads if not t.rv ]
1094	succeeded = [tb for tb in allocated.keys() if tb not in failed]
1095
1096	# If one failed clean up, unless fail_soft is set
1097	if failed:
1098	if not fail_soft:
1099	thread_pool.clear()
1100	for tb in succeeded:
1101	# Create and start a thread to stop the segment
1102	thread_pool.wait_for_slot()
1103	uri = tbparams[tb]['uri']
1104	t = self.pooled_thread(\
1105	target=self.terminate_segment(log=log,
1106	testbed=tb,
1107	cert_file=self.cert_file,
1108	cert_pwd=self.cert_pwd,
1109	trusted_certs=self.trusted_certs,
1110	caller=self.call_TerminateSegment),
1111	args=(uri, tbparams[tb]['federant']['allocID']),
1112	name=tb,
1113	pdata=thread_pool, trace_file=self.trace_file)
1114	t.start()
1115	# Wait until all finish
1116	thread_pool.wait_for_all_done()
1117
1118	# release the allocations
1119	for tb in tbparams.keys():
1120	self.release_access(tb, tbparams[tb]['allocID'],
1121	tbparams[tb].get('uri', None))
1122	# Remove the placeholder
1123	self.state_lock.acquire()
1124	self.state[eid]['experimentStatus'] = 'failed'
1125	if self.state_filename: self.write_state()
1126	self.state_lock.release()
1127
1128	log.error("Swap in failed on %s" % ",".join(failed))
1129	return
1130	else:
1131	log.info("[start_segment]: Experiment %s active" % eid)
1132
1133
1134	# Walk up tmpdir, deleting as we go
1135	if self.cleanup:
1136	log.debug("[start_experiment]: removing %s" % tmpdir)
1137	for path, dirs, files in os.walk(tmpdir, topdown=False):
1138	for f in files:
1139	os.remove(os.path.join(path, f))
1140	for d in dirs:
1141	os.rmdir(os.path.join(path, d))
1142	os.rmdir(tmpdir)
1143	else:
1144	log.debug("[start_experiment]: not removing %s" % tmpdir)
1145
1146	# Insert the experiment into our state and update the disk copy
1147	self.state_lock.acquire()
1148	self.state[expid]['experimentStatus'] = 'active'
1149	self.state[eid] = self.state[expid]
1150	if self.state_filename: self.write_state()
1151	self.state_lock.release()
1152	return
1153
1154
1155	def add_kit(self, e, kit):
1156	"""
1157	Add a Software object created from the list of (install, location)
1158	tuples passed as kit to the software attribute of an object e. We
1159	do this enough to break out the code, but it's kind of a hack to
1160	avoid changing the old tuple rep.
1161	"""
1162
1163	s = [ topdl.Software(install=i, location=l) for i, l in kit]
1164
1165	if isinstance(e.software, list): e.software.extend(s)
1166	else: e.software = s
1167
1168
1169	def create_experiment_state(self, fid, req, expid, expcert,
1170	state='starting'):
1171	"""
1172	Create the initial entry in the experiment's state. The expid and
1173	expcert are the experiment's fedid and certifacte that represents that
1174	ID, which are installed in the experiment state. If the request
1175	includes a suggested local name that is used if possible. If the local
1176	name is already taken by an experiment owned by this user that has
1177	failed, it is overwritten. Otherwise new letters are added until a
1178	valid localname is found. The generated local name is returned.
1179	"""
1180
1181	if req.has_key('experimentID') and \
1182	req['experimentID'].has_key('localname'):
1183	overwrite = False
1184	eid = req['experimentID']['localname']
1185	# If there's an old failed experiment here with the same local name
1186	# and accessible by this user, we'll overwrite it, otherwise we'll
1187	# fall through and do the collision avoidance.
1188	old_expid = self.get_experiment_fedid(eid)
1189	if old_expid and self.check_experiment_access(fid, old_expid):
1190	self.state_lock.acquire()
1191	status = self.state[eid].get('experimentStatus', None)
1192	if status and status == 'failed':
1193	# remove the old access attribute
1194	self.auth.unset_attribute(fid, old_expid)
1195	overwrite = True
1196	del self.state[eid]
1197	del self.state[old_expid]
1198	self.state_lock.release()
1199	self.state_lock.acquire()
1200	while (self.state.has_key(eid) and not overwrite):
1201	eid += random.choice(string.ascii_letters)
1202	# Initial state
1203	self.state[eid] = {
1204	'experimentID' : \
1205	[ { 'localname' : eid }, {'fedid': expid } ],
1206	'experimentStatus': state,
1207	'experimentAccess': { 'X509' : expcert },
1208	'owner': fid,
1209	'log' : [],
1210	}
1211	self.state[expid] = self.state[eid]
1212	if self.state_filename: self.write_state()
1213	self.state_lock.release()
1214	else:
1215	eid = self.exp_stem
1216	for i in range(0,5):
1217	eid += random.choice(string.ascii_letters)
1218	self.state_lock.acquire()
1219	while (self.state.has_key(eid)):
1220	eid = self.exp_stem
1221	for i in range(0,5):
1222	eid += random.choice(string.ascii_letters)
1223	# Initial state
1224	self.state[eid] = {
1225	'experimentID' : \
1226	[ { 'localname' : eid }, {'fedid': expid } ],
1227	'experimentStatus': state,
1228	'experimentAccess': { 'X509' : expcert },
1229	'owner': fid,
1230	'log' : [],
1231	}
1232	self.state[expid] = self.state[eid]
1233	if self.state_filename: self.write_state()
1234	self.state_lock.release()
1235
1236	return eid
1237
1238
1239	def allocate_ips_to_topo(self, top):
1240	"""
1241	Add an ip4_address attribute to all the hosts in the topology, based on
1242	the shared substrates on which they sit. An /etc/hosts file is also
1243	created and returned as a list of hostfiles entries. We also return
1244	the allocator, because we may need to allocate IPs to portals
1245	(specifically DRAGON portals).
1246	"""
1247	subs = sorted(top.substrates,
1248	cmp=lambda x,y: cmp(len(x.interfaces), len(y.interfaces)),
1249	reverse=True)
1250	ips = ip_allocator(int(ip_addr("10.0.0.0")), 2 **24)
1251	ifs = { }
1252	hosts = [ ]
1253
1254	for idx, s in enumerate(subs):
1255	a = ips.allocate(len(s.interfaces)+2)
1256	if a :
1257	base, num = a
1258	if num < len(s.interfaces) +2 :
1259	raise service_error(service_error.internal,
1260	"Allocator returned wrong number of IPs??")
1261	else:
1262	raise service_error(service_error.req,
1263	"Cannot allocate IP addresses")
1264
1265	base += 1
1266	for i in s.interfaces:
1267	i.attribute.append(
1268	topdl.Attribute('ip4_address',
1269	"%s" % ip_addr(base)))
1270	hname = i.element.name[0]
1271	if ifs.has_key(hname):
1272	hosts.append("%s\t%s-%s %s-%d" % \
1273	(ip_addr(base), hname, s.name, hname,
1274	ifs[hname]))
1275	else:
1276	ifs[hname] = 0
1277	hosts.append("%s\t%s-%s %s-%d %s" % \
1278	(ip_addr(base), hname, s.name, hname,
1279	ifs[hname], hname))
1280
1281	ifs[hname] += 1
1282	base += 1
1283	return hosts, ips
1284
1285	def get_access_to_testbeds(self, testbeds, access_user,
1286	export_project, master, allocated, tbparams, services):
1287	"""
1288	Request access to the various testbeds required for this instantiation
1289	(passed in as testbeds). User, access_user, expoert_project and master
1290	are used to construct the correct requests. Per-testbed parameters are
1291	returned in tbparams.
1292	"""
1293	for tb in testbeds:
1294	self.get_access(tb, None, tbparams, master,
1295	export_project, access_user, services)
1296	allocated[tb] = 1
1297
1298	def split_topology(self, top, topo, testbeds, eid, master, tbparams):
1299	"""
1300	Create the sub-topologies that are needed for experiment instantiation.
1301	"""
1302	for tb in testbeds:
1303	topo[tb] = top.clone()
1304	to_delete = [ ]
1305	# XXX: copy in for loop to simplify
1306	for e in topo[tb].elements:
1307	etb = e.get_attribute('testbed')
1308	if etb and etb != tb:
1309	for i in e.interface:
1310	for s in i.subs:
1311	try:
1312	s.interfaces.remove(i)
1313	except ValueError:
1314	raise service_error(service_error.internal,
1315	"Can't remove interface??")
1316	to_delete.append(e)
1317	for e in to_delete:
1318	topo[tb].elements.remove(e)
1319	topo[tb].make_indices()
1320
1321	for e in [ e for e in topo[tb].elements \
1322	if isinstance(e,topdl.Computer)]:
1323	if self.fedkit: self.add_kit(e, self.fedkit)
1324
1325	def new_portal_node(self, st, dt, tbparams, master, eid, myname, desthost,
1326	portal_type, iface_desc=()):
1327	"""
1328	Return a new internet portal node and a dict with the connectionInfo to
1329	be attached.
1330	"""
1331	dproject = tbparams[dt].get('project', 'project')
1332	ddomain = tbparams[dt].get('domain', ".example.com")
1333	mdomain = tbparams[master].get('domain', '.example.com')
1334	mproject = tbparams[master].get('project', 'project')
1335	muser = tbparams[master].get('user', 'root')
1336	smbshare = tbparams[master].get('smbshare', 'USERS')
1337
1338	if st == master or dt == master:
1339	active = ("%s" % (st == master))
1340	else:
1341	active = ("%s" % (st > dt))
1342
1343	ifaces = [ ]
1344	for sub, attrs in iface_desc:
1345	inf = topdl.Interface(
1346	substrate=sub,
1347	attribute=[
1348	topdl.Attribute(
1349	attribute=n,
1350	value = v)
1351	for n, v in attrs
1352	]
1353	)
1354	ifaces.append(inf)
1355	info = {
1356	"type" : "ssh",
1357	"portal": myname,
1358	'peer': "%s.%s.%s%s" % (desthost.lower(), eid.lower(),
1359	dproject.lower(), ddomain.lower()),
1360	'fedAttr': [
1361	{ 'attribute': 'masterdomain', 'value': mdomain},
1362	{ 'attribute': 'masterexperiment', 'value':
1363	"%s/%s" % (mproject, eid)},
1364	{ 'attribute': 'active', 'value': active},
1365	# Move to SMB service description
1366	{ 'attribute': 'masteruser', 'value': muser},
1367	{ 'attribute': 'smbshare', 'value': smbshare},
1368	],
1369	}
1370	return (topdl.Computer(
1371	name=myname,
1372	attribute=[
1373	topdl.Attribute(attribute=n,value=v)
1374	for n, v in (\
1375	('portal', 'true'),
1376	('portal_type', portal_type),
1377	)
1378	],
1379	interface=ifaces,
1380	), info)
1381
1382	def new_portal_substrate(self, st, dt, eid, tbparams):
1383	ddomain = tbparams[dt].get('domain', ".example.com")
1384	dproject = tbparams[dt].get('project', 'project')
1385	tsubstrate = \
1386	topdl.Substrate(name='%s-%s' % (st, dt),
1387	attribute= [
1388	topdl.Attribute(
1389	attribute='portal',
1390	value='true')
1391	]
1392	)
1393	segment_element = topdl.Segment(
1394	id= tbparams[dt]['allocID'],
1395	type='emulab',
1396	uri = self.tbmap.get(dt, None),
1397	interface=[
1398	topdl.Interface(
1399	substrate=tsubstrate.name),
1400	],
1401	attribute = [
1402	topdl.Attribute(attribute=n, value=v)
1403	for n, v in (\
1404	('domain', ddomain),
1405	('experiment', "%s/%s" % \
1406	(dproject, eid)),)
1407	],
1408	)
1409
1410	return (tsubstrate, segment_element)
1411
1412	def new_dragon_topo(self, idx, sub, topo, tbs, tbparams):
1413	if sub.capacity is None:
1414	raise service_error(service_error.internal,
1415	"Cannot DRAGON split substrate w/o capacity")
1416	segs = [ ]
1417	substr = topdl.Substrate(name="dragon%d" % idx,
1418	capacity=sub.capacity.clone(),
1419	attribute=[ topdl.Attribute(attribute=n, value=v)
1420	for n, v, in (\
1421	('vlan', 'unassigned%d' % idx),)])
1422	for tb in tbs.keys():
1423	seg = topdl.Segment(
1424	id = tbparams[tb]['allocID'],
1425	type='emulab',
1426	uri = self.tbmap.get(tb, None),
1427	interface=[
1428	topdl.Interface(
1429	substrate=substr.name),
1430	],
1431	attribute=[ topdl.Attribute(
1432	attribute='dragon_endpoint',
1433	value=tbparams[tb]['dragon']),
1434	]
1435	)
1436	if tbparams[tb].has_key('vlans'):
1437	seg.set_attribute('vlans', tbparams[tb]['vlans'])
1438	segs.append(seg)
1439
1440	topo["dragon%d" %idx] = \
1441	topdl.Topology(substrates=[substr], elements=segs,
1442	attribute=[
1443	topdl.Attribute(attribute="transit", value='true'),
1444	topdl.Attribute(attribute="dynamic", value='true'),
1445	topdl.Attribute(attribute="testbed", value='dragon'),
1446	]
1447	)
1448
1449	def create_dragon_substrate(self, sub, topo, tbs, tbparams, master, eid):
1450	"""
1451	Add attribiutes to the various elements indicating that they are to be
1452	dragon connected and create a dragon segment in tops to be
1453	instantiated.
1454	"""
1455
1456	def get_substrate_from_topo(name, t):
1457	for s in t.substrates:
1458	if s.name == name: return s
1459	else: return None
1460
1461	dn = len([x for x in topo.keys() if x.startswith('dragon')])
1462	elements = [ i.element for i in sub.interfaces ]
1463	count = { }
1464	for e in elements:
1465	tb = e.get_attribute('testbed')
1466	count[tb] = count.get(tb, 0) + 1
1467
1468	for tb in tbs.keys():
1469	s = get_substrate_from_topo(sub.name, topo[tb])
1470	if s:
1471	for i in s.interfaces:
1472	i.set_attribute('dragon_vlan', 'unassigned%d' % dn)
1473	if count[tb] > 1: i.set_attribute('dragon_type', 'lan')
1474	else: i.set_attribute('dragon_type', 'link')
1475	else:
1476	raise service_error(service_error.internal,
1477	"No substrate %s in testbed %s" % (sub.name, tb))
1478
1479	self.new_dragon_topo(dn, sub, topo, tbs, tbparams)
1480
1481	def insert_internet_portals(self, sub, topo, tbs, tbparams, master, eid,
1482	segment_substrate, portals, connInfo):
1483	# More than one testbed is on this substrate. Insert
1484	# some portals into the subtopologies. st == source testbed,
1485	# dt == destination testbed.
1486	for st in tbs.keys():
1487	if not segment_substrate.has_key(st):
1488	segment_substrate[st] = { }
1489	if not portals.has_key(st):
1490	portals[st] = { }
1491	if not connInfo.has_key(st):
1492	connInfo[st] = [ ]
1493	for dt in [ t for t in tbs.keys() if t != st]:
1494	sproject = tbparams[st].get('project', 'project')
1495	dproject = tbparams[dt].get('project', 'project')
1496	mproject = tbparams[master].get('project', 'project')
1497	sdomain = tbparams[st].get('domain', ".example.com")
1498	ddomain = tbparams[dt].get('domain', ".example.com")
1499	mdomain = tbparams[master].get('domain', '.example.com')
1500	muser = tbparams[master].get('user', 'root')
1501	smbshare = tbparams[master].get('smbshare', 'USERS')
1502	aid = tbparams[dt]['allocID']['fedid']
1503	if st == master or dt == master:
1504	active = ("%s" % (st == master))
1505	else:
1506	active = ("%s" %(st > dt))
1507	if not segment_substrate[st].has_key(dt):
1508	# Put a substrate and a segment for the connected
1509	# testbed in there.
1510	tsubstrate, segment_element = \
1511	self.new_portal_substrate(st, dt, eid, tbparams)
1512	segment_substrate[st][dt] = tsubstrate
1513	topo[st].substrates.append(tsubstrate)
1514	topo[st].elements.append(segment_element)
1515
1516	new_portal = False
1517	if portals[st].has_key(dt):
1518	# There's a portal set up to go to this destination.
1519	# See if there's room to multiplex this connection on
1520	# it. If so, add an interface to the portal; if not,
1521	# set up to add a portal below.
1522	# [This little festival of braces is just a pop of the
1523	# last element in the list of portals between st and
1524	# dt.]
1525	portal = portals[st][dt][-1]
1526	mux = len([ i for i in portal.interface \
1527	if not i.get_attribute('portal')])
1528	if mux == self.muxmax:
1529	new_portal = True
1530	portal_type = "experiment"
1531	myname = "%stunnel%d" % (dt, len(portals[st][dt]))
1532	desthost = "%stunnel%d" % (st, len(portals[st][dt]))
1533	else:
1534	new_i = topdl.Interface(
1535	substrate=sub.name,
1536	attribute=[
1537	topdl.Attribute(
1538	attribute='ip4_address',
1539	value=tbs[dt]
1540	)
1541	])
1542	portal.interface.append(new_i)
1543	else:
1544	# First connection to this testbed, make an empty list
1545	# and set up to add the new portal below
1546	new_portal = True
1547	portals[st][dt] = [ ]
1548	myname = "%stunnel%d" % (dt, len(portals[st][dt]))
1549	desthost = "%stunnel%d" % (st, len(portals[st][dt]))
1550
1551	if dt == master or st == master: portal_type = "both"
1552	else: portal_type = "experiment"
1553
1554	if new_portal:
1555	infs = (
1556	(segment_substrate[st][dt].name,
1557	(('portal', 'true'),)),
1558	(sub.name,
1559	(('ip4_address', tbs[dt]),))
1560	)
1561	portal, info = self.new_portal_node(st, dt, tbparams,
1562	master, eid, myname, desthost, portal_type,
1563	infs)
1564	if self.fedkit:
1565	self.add_kit(portal, self.fedkit)
1566	if self.gatewaykit:
1567	self.add_kit(portal, self.gatewaykit)
1568
1569	topo[st].elements.append(portal)
1570	portals[st][dt].append(portal)
1571	connInfo[st].append(info)
1572
1573	def add_control_portal(self, st, dt, master, eid, topo, tbparams, connInfo):
1574	# Add to the master testbed
1575	tsubstrate, segment_element = \
1576	self.new_portal_substrate(st, dt, eid, tbparams)
1577	myname = "%stunnel" % dt
1578	desthost = "%stunnel" % st
1579
1580	portal, info = self.new_portal_node(st, dt, tbparams, master,
1581	eid, myname, desthost, "control",
1582	((tsubstrate.name,(('portal','true'),)),))
1583	if self.fedkit:
1584	self.add_kit(portal, self.fedkit)
1585	if self.gatewaykit:
1586	self.add_kit(portal, self.gatewaykit)
1587
1588	topo[st].substrates.append(tsubstrate)
1589	topo[st].elements.append(segment_element)
1590	topo[st].elements.append(portal)
1591	if not connInfo.has_key(st):
1592	connInfo[st] = [ ]
1593	connInfo[st].append(info)
1594
1595	def new_dragon_portal(self, st, dt, master, eid, myip, dip, idx,
1596	substrate, tbparams):
1597	# Add to the master testbed
1598	myname = "%stunnel" % dt
1599	desthost = "%s" % ip_addr(dip)
1600
1601	portal = self.new_portal_node(st, dt, tbparams, master,
1602	eid, myname, desthost, "control",
1603	((substrate.name,(
1604	('portal','true'),
1605	('ip4_address', "%s" % ip_addr(myip)),
1606	('dragon_vlan', 'unassigned%d' % idx),
1607	('dragon_type', 'link'),)),))
1608	if self.fedkit:
1609	self.add_kit(portal, self.fedkit)
1610	if self.gatewaykit:
1611	self.add_kit(portal, self.gatewaykit)
1612
1613	return portal
1614
1615	def add_portals(self, top, topo, eid, master, tbparams, ip_allocator,
1616	connInfo):
1617	"""
1618	For each substrate in the main topology, find those that
1619	have nodes on more than one testbed. Insert portal nodes
1620	into the copies of those substrates on the sub topologies.
1621	"""
1622	segment_substrate = { }
1623	portals = { }
1624	for s in top.substrates:
1625	# tbs will contain an ip address on this subsrate that is in
1626	# each testbed.
1627	tbs = { }
1628	for i in s.interfaces:
1629	e = i.element
1630	tb = e.get_attribute('testbed')
1631	if tb and not tbs.has_key(tb):
1632	for i in e.interface:
1633	if s in i.subs:
1634	tbs[tb]= i.get_attribute('ip4_address')
1635	if len(tbs) < 2:
1636	continue
1637
1638	# DRAGON will not create multi-site vlans yet
1639	if len(tbs) == 2 and \
1640	all([tbparams[x].has_key('dragon') for x in tbs]):
1641	self.create_dragon_substrate(s, topo, tbs, tbparams,
1642	master, eid)
1643	else:
1644	self.insert_internet_portals(s, topo, tbs, tbparams, master,
1645	eid, segment_substrate, portals, connInfo)
1646
1647	# Make sure that all the slaves have a control portal back to the
1648	# master.
1649	for tb in [ t for t in tbparams.keys() if t != master ]:
1650	if len([e for e in topo[tb].elements \
1651	if isinstance(e, topdl.Computer) and \
1652	e.get_attribute('portal') and \
1653	e.get_attribute('portal_type') == 'both']) == 0:
1654
1655	if tbparams[master].has_key('dragon') \
1656	and tbparams[tb].has_key('dragon'):
1657
1658	idx = len([x for x in topo.keys() \
1659	if x.startswith('dragon')])
1660	dip, leng = ip_allocator.allocate(4)
1661	dip += 1
1662	mip = dip+1
1663	csub = topdl.Substrate(
1664	name="dragon-control-%s" % tb,
1665	capacity=topdl.Capacity(100000.0, 'max'),
1666	attribute=[
1667	topdl.Attribute(
1668	attribute='portal',
1669	value='true'
1670	)
1671	]
1672	)
1673	seg = topdl.Segment(
1674	id= tbparams[master]['allocID'],
1675	type='emulab',
1676	uri = self.tbmap.get(master, None),
1677	interface=[
1678	topdl.Interface(
1679	substrate=csub.name),
1680	],
1681	attribute = [
1682	topdl.Attribute(attribute=n, value=v)
1683	for n, v in (\
1684	('domain',
1685	tbparams[master].get('domain',
1686	".example.com")),
1687	('experiment', "%s/%s" % \
1688	(tbparams[master].get(
1689	'project',
1690	'project'),
1691	eid)),)
1692	],
1693	)
1694	topo[tb].substrates.append(csub)
1695	topo[tb].elements.append(
1696	self.new_dragon_portal(tb, master, master, eid,
1697	dip, mip, idx, csub, tbparams))
1698	topo[tb].elements.append(seg)
1699
1700	mcsub = csub.clone()
1701	seg = topdl.Segment(
1702	id= tbparams[tb]['allocID'],
1703	type='emulab',
1704	uri = self.tbmap.get(tb, None),
1705	interface=[
1706	topdl.Interface(
1707	substrate=csub.name),
1708	],
1709	attribute = [
1710	topdl.Attribute(attribute=n, value=v)
1711	for n, v in (\
1712	('domain',
1713	tbparams[tb].get('domain',
1714	".example.com")),
1715	('experiment', "%s/%s" % \
1716	(tbparams[tb].get('project',
1717	'project'),
1718	eid)),)
1719	],
1720	)
1721	topo[master].substrates.append(mcsub)
1722	topo[master].elements.append(
1723	self.new_dragon_portal(master, tb, master, eid,
1724	mip, dip, idx, mcsub, tbparams))
1725	topo[master].elements.append(seg)
1726
1727	self.create_dragon_substrate(csub, topo,
1728	{tb: 1, master:1}, tbparams, master, eid)
1729	else:
1730	self.add_control_portal(master, tb, master, eid, topo,
1731	tbparams, connInfo)
1732	self.add_control_portal(tb, master, master, eid, topo,
1733	tbparams, connInfo)
1734
1735	# Connect the portal nodes into the topologies and clear out
1736	# substrates that are not in the topologies
1737	for tb in tbparams.keys():
1738	topo[tb].incorporate_elements()
1739	topo[tb].substrates = \
1740	[s for s in topo[tb].substrates \
1741	if len(s.interfaces) >0]
1742
1743	def wrangle_software(self, expid, top, topo, tbparams):
1744	"""
1745	Copy software out to the repository directory, allocate permissions and
1746	rewrite the segment topologies to look for the software in local
1747	places.
1748	"""
1749
1750	# Copy the rpms and tarfiles to a distribution directory from
1751	# which the federants can retrieve them
1752	linkpath = "%s/software" % expid
1753	softdir ="%s/%s" % ( self.repodir, linkpath)
1754	softmap = { }
1755	# These are in a list of tuples format (each kit). This comprehension
1756	# unwraps them into a single list of tuples that initilaizes the set of
1757	# tuples.
1758	pkgs = set([ t for l in [self.fedkit, self.gatewaykit] \
1759	for p, t in l ])
1760	pkgs.update([x.location for e in top.elements \
1761	for x in e.software])
1762	try:
1763	os.makedirs(softdir)
1764	except IOError, e:
1765	raise service_error(
1766	"Cannot create software directory: %s" % e)
1767	# The actual copying. Everything's converted into a url for copying.
1768	for pkg in pkgs:
1769	loc = pkg
1770
1771	scheme, host, path = urlparse(loc)[0:3]
1772	dest = os.path.basename(path)
1773	if not scheme:
1774	if not loc.startswith('/'):
1775	loc = "/%s" % loc
1776	loc = "file://%s" %loc
1777	try:
1778	u = urlopen(loc)
1779	except Exception, e:
1780	raise service_error(service_error.req,
1781	"Cannot open %s: %s" % (loc, e))
1782	try:
1783	f = open("%s/%s" % (softdir, dest) , "w")
1784	self.log.debug("Writing %s/%s" % (softdir,dest) )
1785	data = u.read(4096)
1786	while data:
1787	f.write(data)
1788	data = u.read(4096)
1789	f.close()
1790	u.close()
1791	except Exception, e:
1792	raise service_error(service_error.internal,
1793	"Could not copy %s: %s" % (loc, e))
1794	path = re.sub("/tmp", "", linkpath)
1795	# XXX
1796	softmap[pkg] = \
1797	"%s/%s/%s" %\
1798	( self.repo_url, path, dest)
1799
1800	# Allow the individual segments to access the software.
1801	for tb in tbparams.keys():
1802	self.auth.set_attribute(tbparams[tb]['allocID']['fedid'],
1803	"/%s/%s" % ( path, dest))
1804
1805	# Convert the software locations in the segments into the local
1806	# copies on this host
1807	for soft in [ s for tb in topo.values() \
1808	for e in tb.elements \
1809	if getattr(e, 'software', False) \
1810	for s in e.software ]:
1811	if softmap.has_key(soft.location):
1812	soft.location = softmap[soft.location]
1813
1814
1815	def new_experiment(self, req, fid):
1816	"""
1817	The external interface to empty initial experiment creation called from
1818	the dispatcher.
1819
1820	Creates a working directory, splits the incoming description using the
1821	splitter script and parses out the avrious subsections using the
1822	lcasses above. Once each sub-experiment is created, use pooled threads
1823	to instantiate them and start it all up.
1824	"""
1825	if not self.auth.check_attribute(fid, 'new'):
1826	raise service_error(service_error.access, "New access denied")
1827
1828	try:
1829	tmpdir = tempfile.mkdtemp(prefix="split-")
1830	except IOError:
1831	raise service_error(service_error.internal, "Cannot create tmp dir")
1832
1833	try:
1834	access_user = self.accessdb[fid]
1835	except KeyError:
1836	raise service_error(service_error.internal,
1837	"Access map and authorizer out of sync in " + \
1838	"new_experiment for fedid %s" % fid)
1839
1840	pid = "dummy"
1841	gid = "dummy"
1842
1843	req = req.get('NewRequestBody', None)
1844	if not req:
1845	raise service_error(service_error.req,
1846	"Bad request format (no NewRequestBody)")
1847
1848	# Generate an ID for the experiment (slice) and a certificate that the
1849	# allocator can use to prove they own it. We'll ship it back through
1850	# the encrypted connection.
1851	(expid, expcert) = generate_fedid("test", dir=tmpdir, log=self.log)
1852
1853	#now we're done with the tmpdir, and it should be empty
1854	if self.cleanup:
1855	self.log.debug("[new_experiment]: removing %s" % tmpdir)
1856	os.rmdir(tmpdir)
1857	else:
1858	self.log.debug("[new_experiment]: not removing %s" % tmpdir)
1859
1860	eid = self.create_experiment_state(fid, req, expid, expcert,
1861	state='empty')
1862
1863	# Let users touch the state
1864	self.auth.set_attribute(fid, expid)
1865	self.auth.set_attribute(expid, expid)
1866	# Override fedids can manipulate state as well
1867	for o in self.overrides:
1868	self.auth.set_attribute(o, expid)
1869
1870	rv = {
1871	'experimentID': [
1872	{'localname' : eid }, { 'fedid': copy.copy(expid) }
1873	],
1874	'experimentStatus': 'empty',
1875	'experimentAccess': { 'X509' : expcert }
1876	}
1877
1878	return rv
1879
1880
1881	def create_experiment(self, req, fid):
1882	"""
1883	The external interface to experiment creation called from the
1884	dispatcher.
1885
1886	Creates a working directory, splits the incoming description using the
1887	splitter script and parses out the avrious subsections using the
1888	lcasses above. Once each sub-experiment is created, use pooled threads
1889	to instantiate them and start it all up.
1890	"""
1891
1892	req = req.get('CreateRequestBody', None)
1893	if not req:
1894	raise service_error(service_error.req,
1895	"Bad request format (no CreateRequestBody)")
1896
1897	# Get the experiment access
1898	exp = req.get('experimentID', None)
1899	if exp:
1900	if exp.has_key('fedid'):
1901	key = exp['fedid']
1902	expid = key
1903	eid = None
1904	elif exp.has_key('localname'):
1905	key = exp['localname']
1906	eid = key
1907	expid = None
1908	else:
1909	raise service_error(service_error.req, "Unknown lookup type")
1910	else:
1911	raise service_error(service_error.req, "No request?")
1912
1913	self.check_experiment_access(fid, key)
1914
1915	try:
1916	tmpdir = tempfile.mkdtemp(prefix="split-")
1917	os.mkdir(tmpdir+"/keys")
1918	except IOError:
1919	raise service_error(service_error.internal, "Cannot create tmp dir")
1920
1921	gw_pubkey_base = "fed.%s.pub" % self.ssh_type
1922	gw_secretkey_base = "fed.%s" % self.ssh_type
1923	gw_pubkey = tmpdir + "/keys/" + gw_pubkey_base
1924	gw_secretkey = tmpdir + "/keys/" + gw_secretkey_base
1925	tclfile = tmpdir + "/experiment.tcl"
1926	tbparams = { }
1927	try:
1928	access_user = self.accessdb[fid]
1929	except KeyError:
1930	raise service_error(service_error.internal,
1931	"Access map and authorizer out of sync in " + \
1932	"create_experiment for fedid %s" % fid)
1933
1934	pid = "dummy"
1935	gid = "dummy"
1936
1937	# The tcl parser needs to read a file so put the content into that file
1938	descr=req.get('experimentdescription', None)
1939	if descr:
1940	file_content=descr.get('ns2description', None)
1941	if file_content:
1942	try:
1943	f = open(tclfile, 'w')
1944	f.write(file_content)
1945	f.close()
1946	except IOError:
1947	raise service_error(service_error.internal,
1948	"Cannot write temp experiment description")
1949	else:
1950	raise service_error(service_error.req,
1951	"Only ns2descriptions supported")
1952	else:
1953	raise service_error(service_error.req, "No experiment description")
1954
1955	self.state_lock.acquire()
1956	if self.state.has_key(key):
1957	self.state[key]['experimentStatus'] = "starting"
1958	for e in self.state[key].get('experimentID',[]):
1959	if not expid and e.has_key('fedid'):
1960	expid = e['fedid']
1961	elif not eid and e.has_key('localname'):
1962	eid = e['localname']
1963	self.state_lock.release()
1964
1965	if not (eid and expid):
1966	raise service_error(service_error.internal,
1967	"Cannot find local experiment info!?")
1968
1969	try:
1970	# This catches exceptions to clear the placeholder if necessary
1971	try:
1972	self.generate_ssh_keys(gw_secretkey, self.ssh_type)
1973	except ValueError:
1974	raise service_error(service_error.server_config,
1975	"Bad key type (%s)" % self.ssh_type)
1976
1977	master = req.get('master', None)
1978	if not master:
1979	raise service_error(service_error.req,
1980	"No master testbed label")
1981	export_project = req.get('exportProject', None)
1982	if not export_project:
1983	raise service_error(service_error.req, "No export project")
1984
1985	# Translate to topdl
1986	if self.splitter_url:
1987	# XXX: need remote topdl translator
1988	self.log.debug("Calling remote splitter at %s" % \
1989	self.splitter_url)
1990	split_data = self.remote_splitter(self.splitter_url,
1991	file_content, master)
1992	else:
1993	tclcmd = [self.tclsh, self.tcl_splitter, '-t', '-x',
1994	str(self.muxmax), '-m', master]
1995
1996	if self.fedkit:
1997	tclcmd.append('-k')
1998
1999	if self.gatewaykit:
2000	tclcmd.append('-K')
2001
2002	tclcmd.extend([pid, gid, eid, tclfile])
2003
2004	self.log.debug("running local splitter %s", " ".join(tclcmd))
2005	# This is just fantastic. As a side effect the parser copies
2006	# tb_compat.tcl into the current directory, so that directory
2007	# must be writable by the fedd user. Doing this in the
2008	# temporary subdir ensures this is the case.
2009	tclparser = Popen(tclcmd, stdout=PIPE, close_fds=True,
2010	cwd=tmpdir)
2011	split_data = tclparser.stdout
2012
2013	top = topdl.topology_from_xml(file=split_data, top="experiment")
2014
2015	hosts, ip_allocator = self.allocate_ips_to_topo(top)
2016	# Find the testbeds to look up
2017	testbeds = set([ a.value for e in top.elements \
2018	for a in e.attribute \
2019	if a.attribute == 'testbed'] )
2020
2021	allocated = { } # Testbeds we can access
2022	topo ={ } # Sub topologies
2023	connInfo = { } # Connection information
2024	services = [ ]
2025	self.get_access_to_testbeds(testbeds, access_user,
2026	export_project, master, allocated, tbparams, services)
2027	self.split_topology(top, topo, testbeds, eid, master, tbparams)
2028
2029	# Copy configuration files into the remote file store
2030	# The config urlpath
2031	configpath = "/%s/config" % expid
2032	# The config file system location
2033	configdir ="%s%s" % ( self.repodir, configpath)
2034	try:
2035	os.makedirs(configdir)
2036	except IOError, e:
2037	raise service_error(
2038	"Cannot create config directory: %s" % e)
2039	try:
2040	f = open("%s/hosts" % configdir, "w")
2041	f.write('\n'.join(hosts))
2042	f.close()
2043	except IOError, e:
2044	raise service_error(service_error.internal,
2045	"Cannot write hosts file: %s" % e)
2046	try:
2047	copy_file("%s" % gw_pubkey, "%s/%s" % \
2048	(configdir, gw_pubkey_base))
2049	copy_file("%s" % gw_secretkey, "%s/%s" % \
2050	(configdir, gw_secretkey_base))
2051	except IOError, e:
2052	raise service_error(service_error.internal,
2053	"Cannot copy keyfiles: %s" % e)
2054
2055	# Allow the individual testbeds to access the configuration files.
2056	for tb in tbparams.keys():
2057	asignee = tbparams[tb]['allocID']['fedid']
2058	for f in ("hosts", gw_secretkey_base, gw_pubkey_base):
2059	self.auth.set_attribute(asignee, "%s/%s" % (configpath, f))
2060
2061	self.add_portals(top, topo, eid, master, tbparams, ip_allocator,
2062	connInfo)
2063	# Now get access to the dynamic testbeds
2064	for k, t in topo.items():
2065	if not t.get_attribute('dynamic'):
2066	continue
2067	tb = t.get_attribute('testbed')
2068	if tb:
2069	self.get_access(tb, None, user, tbparams, master,
2070	export_project, access_user, services)
2071	tbparams[k] = tbparams[tb]
2072	del tbparams[tb]
2073	allocated[k] = 1
2074	else:
2075	raise service_error(service_error.internal,
2076	"Dynamic allocation from no testbed!?")
2077
2078	self.wrangle_software(expid, top, topo, tbparams)
2079
2080	vtopo = topdl.topology_to_vtopo(top)
2081	vis = self.genviz(vtopo)
2082
2083	# save federant information
2084	for k in allocated.keys():
2085	tbparams[k]['federant'] = {
2086	'name': [ { 'localname' : eid} ],
2087	'allocID' : tbparams[k]['allocID'],
2088	'master' : k == master,
2089	'uri': tbparams[k]['uri'],
2090	}
2091	if tbparams[k].has_key('emulab'):
2092	tbparams[k]['federant']['emulab'] = \
2093	tbparams[k]['emulab']
2094
2095	self.state_lock.acquire()
2096	self.state[eid]['vtopo'] = vtopo
2097	self.state[eid]['vis'] = vis
2098	self.state[expid]['federant'] = \
2099	[ tbparams[tb]['federant'] for tb in tbparams.keys() \
2100	if tbparams[tb].has_key('federant') ]
2101	if self.state_filename:
2102	self.write_state()
2103	self.state_lock.release()
2104	except service_error, e:
2105	# If something goes wrong in the parse (usually an access error)
2106	# clear the placeholder state. From here on out the code delays
2107	# exceptions. Failing at this point returns a fault to the remote
2108	# caller.
2109
2110	self.state_lock.acquire()
2111	del self.state[eid]
2112	del self.state[expid]
2113	if self.state_filename: self.write_state()
2114	self.state_lock.release()
2115	raise e
2116
2117
2118	# Start the background swapper and return the starting state. From
2119	# here on out, the state will stick around a while.
2120
2121	# Let users touch the state
2122	self.auth.set_attribute(fid, expid)
2123	self.auth.set_attribute(expid, expid)
2124	# Override fedids can manipulate state as well
2125	for o in self.overrides:
2126	self.auth.set_attribute(o, expid)
2127
2128	# Create a logger that logs to the experiment's state object as well as
2129	# to the main log file.
2130	alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
2131	alloc_collector = self.list_log(self.state[eid]['log'])
2132	h = logging.StreamHandler(alloc_collector)
2133	# XXX: there should be a global one of these rather than repeating the
2134	# code.
2135	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
2136	'%d %b %y %H:%M:%S'))
2137	alloc_log.addHandler(h)
2138
2139	attrs = [
2140	{
2141	'attribute': 'ssh_pubkey',
2142	'value': '%s/%s/config/%s' % \
2143	(self.repo_url, expid, gw_pubkey_base)
2144	},
2145	{
2146	'attribute': 'ssh_secretkey',
2147	'value': '%s/%s/config/%s' % \
2148	(self.repo_url, expid, gw_secretkey_base)
2149	},
2150	{
2151	'attribute': 'hosts',
2152	'value': '%s/%s/config/hosts' % \
2153	(self.repo_url, expid)
2154	},
2155	{
2156	'attribute': 'experiment_name',
2157	'value': eid,
2158	},
2159	]
2160
2161	# Start a thread to do the resource allocation
2162	t = Thread(target=self.allocate_resources,
2163	args=(allocated, master, eid, expid, tbparams,
2164	topo, tmpdir, alloc_log, alloc_collector, attrs, connInfo,
2165	services),
2166	name=eid)
2167	t.start()
2168
2169	rv = {
2170	'experimentID': [
2171	{'localname' : eid }, { 'fedid': copy.copy(expid) }
2172	],
2173	'experimentStatus': 'starting',
2174	}
2175
2176	return rv
2177
2178	def get_experiment_fedid(self, key):
2179	"""
2180	find the fedid associated with the localname key in the state database.
2181	"""
2182
2183	rv = None
2184	self.state_lock.acquire()
2185	if self.state.has_key(key):
2186	if isinstance(self.state[key], dict):
2187	try:
2188	kl = [ f['fedid'] for f in \
2189	self.state[key]['experimentID']\
2190	if f.has_key('fedid') ]
2191	except KeyError:
2192	self.state_lock.release()
2193	raise service_error(service_error.internal,
2194	"No fedid for experiment %s when getting "+\
2195	"fedid(!?)" % key)
2196	if len(kl) == 1:
2197	rv = kl[0]
2198	else:
2199	self.state_lock.release()
2200	raise service_error(service_error.internal,
2201	"multiple fedids for experiment %s when " +\
2202	"getting fedid(!?)" % key)
2203	else:
2204	self.state_lock.release()
2205	raise service_error(service_error.internal,
2206	"Unexpected state for %s" % key)
2207	self.state_lock.release()
2208	return rv
2209
2210	def check_experiment_access(self, fid, key):
2211	"""
2212	Confirm that the fid has access to the experiment. Though a request
2213	may be made in terms of a local name, the access attribute is always
2214	the experiment's fedid.
2215	"""
2216	if not isinstance(key, fedid):
2217	key = self.get_experiment_fedid(key)
2218
2219	if self.auth.check_attribute(fid, key):
2220	return True
2221	else:
2222	raise service_error(service_error.access, "Access Denied")
2223
2224
2225	def get_handler(self, path, fid):
2226	self.log.info("Get handler %s %s" % (path, fid))
2227	if self.auth.check_attribute(fid, path):
2228	return ("%s/%s" % (self.repodir, path), "application/binary")
2229	else:
2230	return (None, None)
2231
2232	def get_vtopo(self, req, fid):
2233	"""
2234	Return the stored virtual topology for this experiment
2235	"""
2236	rv = None
2237	state = None
2238
2239	req = req.get('VtopoRequestBody', None)
2240	if not req:
2241	raise service_error(service_error.req,
2242	"Bad request format (no VtopoRequestBody)")
2243	exp = req.get('experiment', None)
2244	if exp:
2245	if exp.has_key('fedid'):
2246	key = exp['fedid']
2247	keytype = "fedid"
2248	elif exp.has_key('localname'):
2249	key = exp['localname']
2250	keytype = "localname"
2251	else:
2252	raise service_error(service_error.req, "Unknown lookup type")
2253	else:
2254	raise service_error(service_error.req, "No request?")
2255
2256	self.check_experiment_access(fid, key)
2257
2258	self.state_lock.acquire()
2259	if self.state.has_key(key):
2260	if self.state[key].has_key('vtopo'):
2261	rv = { 'experiment' : {keytype: key },\
2262	'vtopo': self.state[key]['vtopo'],\
2263	}
2264	else:
2265	state = self.state[key]['experimentStatus']
2266	self.state_lock.release()
2267
2268	if rv: return rv
2269	else:
2270	if state:
2271	raise service_error(service_error.partial,
2272	"Not ready: %s" % state)
2273	else:
2274	raise service_error(service_error.req, "No such experiment")
2275
2276	def get_vis(self, req, fid):
2277	"""
2278	Return the stored visualization for this experiment
2279	"""
2280	rv = None
2281	state = None
2282
2283	req = req.get('VisRequestBody', None)
2284	if not req:
2285	raise service_error(service_error.req,
2286	"Bad request format (no VisRequestBody)")
2287	exp = req.get('experiment', None)
2288	if exp:
2289	if exp.has_key('fedid'):
2290	key = exp['fedid']
2291	keytype = "fedid"
2292	elif exp.has_key('localname'):
2293	key = exp['localname']
2294	keytype = "localname"
2295	else:
2296	raise service_error(service_error.req, "Unknown lookup type")
2297	else:
2298	raise service_error(service_error.req, "No request?")
2299
2300	self.check_experiment_access(fid, key)
2301
2302	self.state_lock.acquire()
2303	if self.state.has_key(key):
2304	if self.state[key].has_key('vis'):
2305	rv = { 'experiment' : {keytype: key },\
2306	'vis': self.state[key]['vis'],\
2307	}
2308	else:
2309	state = self.state[key]['experimentStatus']
2310	self.state_lock.release()
2311
2312	if rv: return rv
2313	else:
2314	if state:
2315	raise service_error(service_error.partial,
2316	"Not ready: %s" % state)
2317	else:
2318	raise service_error(service_error.req, "No such experiment")
2319
2320	def clean_info_response(self, rv):
2321	"""
2322	Remove the information in the experiment's state object that is not in
2323	the info response.
2324	"""
2325	# Remove the owner info (should always be there, but...)
2326	if rv.has_key('owner'): del rv['owner']
2327
2328	# Convert the log into the allocationLog parameter and remove the
2329	# log entry (with defensive programming)
2330	if rv.has_key('log'):
2331	rv['allocationLog'] = "".join(rv['log'])
2332	del rv['log']
2333	else:
2334	rv['allocationLog'] = ""
2335
2336	if rv['experimentStatus'] != 'active':
2337	if rv.has_key('federant'): del rv['federant']
2338	else:
2339	# remove the allocationID and uri info from each federant
2340	for f in rv.get('federant', []):
2341	if f.has_key('allocID'): del f['allocID']
2342	if f.has_key('uri'): del f['uri']
2343	return rv
2344
2345	def get_info(self, req, fid):
2346	"""
2347	Return all the stored info about this experiment
2348	"""
2349	rv = None
2350
2351	req = req.get('InfoRequestBody', None)
2352	if not req:
2353	raise service_error(service_error.req,
2354	"Bad request format (no InfoRequestBody)")
2355	exp = req.get('experiment', None)
2356	if exp:
2357	if exp.has_key('fedid'):
2358	key = exp['fedid']
2359	keytype = "fedid"
2360	elif exp.has_key('localname'):
2361	key = exp['localname']
2362	keytype = "localname"
2363	else:
2364	raise service_error(service_error.req, "Unknown lookup type")
2365	else:
2366	raise service_error(service_error.req, "No request?")
2367
2368	self.check_experiment_access(fid, key)
2369
2370	# The state may be massaged by the service function that called
2371	# get_info (e.g., encoded for XMLRPC transport) so send a copy of the
2372	# state.
2373	self.state_lock.acquire()
2374	if self.state.has_key(key):
2375	rv = copy.deepcopy(self.state[key])
2376	self.state_lock.release()
2377
2378	if rv:
2379	return self.clean_info_response(rv)
2380	else:
2381	raise service_error(service_error.req, "No such experiment")
2382
2383	def get_multi_info(self, req, fid):
2384	"""
2385	Return all the stored info that this fedid can access
2386	"""
2387	rv = { 'info': [ ] }
2388
2389	self.state_lock.acquire()
2390	for key in [ k for k in self.state.keys() if isinstance(k, fedid)]:
2391	try:
2392	self.check_experiment_access(fid, key)
2393	except service_error, e:
2394	if e.code == service_error.access:
2395	continue
2396	else:
2397	self.state_lock.release()
2398	raise e
2399
2400	if self.state.has_key(key):
2401	e = copy.deepcopy(self.state[key])
2402	e = self.clean_info_response(e)
2403	rv['info'].append(e)
2404	self.state_lock.release()
2405	return rv
2406
2407	def terminate_experiment(self, req, fid):
2408	"""
2409	Swap this experiment out on the federants and delete the shared
2410	information
2411	"""
2412	tbparams = { }
2413	req = req.get('TerminateRequestBody', None)
2414	if not req:
2415	raise service_error(service_error.req,
2416	"Bad request format (no TerminateRequestBody)")
2417	force = req.get('force', False)
2418	exp = req.get('experiment', None)
2419	if exp:
2420	if exp.has_key('fedid'):
2421	key = exp['fedid']
2422	keytype = "fedid"
2423	elif exp.has_key('localname'):
2424	key = exp['localname']
2425	keytype = "localname"
2426	else:
2427	raise service_error(service_error.req, "Unknown lookup type")
2428	else:
2429	raise service_error(service_error.req, "No request?")
2430
2431	self.check_experiment_access(fid, key)
2432
2433	dealloc_list = [ ]
2434
2435
2436	# Create a logger that logs to the dealloc_list as well as to the main
2437	# log file.
2438	dealloc_log = logging.getLogger('fedd.experiment_control.%s' % key)
2439	h = logging.StreamHandler(self.list_log(dealloc_list))
2440	# XXX: there should be a global one of these rather than repeating the
2441	# code.
2442	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
2443	'%d %b %y %H:%M:%S'))
2444	dealloc_log.addHandler(h)
2445
2446	self.state_lock.acquire()
2447	fed_exp = self.state.get(key, None)
2448
2449	if fed_exp:
2450	# This branch of the conditional holds the lock to generate a
2451	# consistent temporary tbparams variable to deallocate experiments.
2452	# It releases the lock to do the deallocations and reacquires it to
2453	# remove the experiment state when the termination is complete.
2454
2455	# First make sure that the experiment creation is complete.
2456	status = fed_exp.get('experimentStatus', None)
2457
2458	if status:
2459	if status in ('starting', 'terminating'):
2460	if not force:
2461	self.state_lock.release()
2462	raise service_error(service_error.partial,
2463	'Experiment still being created or destroyed')
2464	else:
2465	self.log.warning('Experiment in %s state ' % status + \
2466	'being terminated by force.')
2467	else:
2468	# No status??? trouble
2469	self.state_lock.release()
2470	raise service_error(service_error.internal,
2471	"Experiment has no status!?")
2472
2473	ids = []
2474	# experimentID is a list of dicts that are self-describing
2475	# identifiers. This finds all the fedids and localnames - the
2476	# keys of self.state - and puts them into ids.
2477	for id in fed_exp.get('experimentID', []):
2478	if id.has_key('fedid'): ids.append(id['fedid'])
2479	if id.has_key('localname'): ids.append(id['localname'])
2480
2481	# Collect the allocation/segment ids into a dict keyed by the fedid
2482	# of the allocation (or a monotonically increasing integer) that
2483	# contains a tuple of uri, aid (which is a dict...)
2484	for i, fed in enumerate(fed_exp.get('federant', [])):
2485	try:
2486	uri = fed['uri']
2487	aid = fed['allocID']
2488	k = fed['allocID'].get('fedid', i)
2489	except KeyError, e:
2490	continue
2491	tbparams[k] = (uri, aid)
2492	fed_exp['experimentStatus'] = 'terminating'
2493	if self.state_filename: self.write_state()
2494	self.state_lock.release()
2495
2496	# Stop everyone. NB, wait_for_all waits until a thread starts and
2497	# then completes, so we can't wait if nothing starts. So, no
2498	# tbparams, no start.
2499	if len(tbparams) > 0:
2500	thread_pool = self.thread_pool(self.nthreads)
2501	for k in tbparams.keys():
2502	# Create and start a thread to stop the segment
2503	thread_pool.wait_for_slot()
2504	uri, aid = tbparams[k]
2505	t = self.pooled_thread(\
2506	target=self.terminate_segment(log=dealloc_log,
2507	testbed=uri,
2508	cert_file=self.cert_file,
2509	cert_pwd=self.cert_pwd,
2510	trusted_certs=self.trusted_certs,
2511	caller=self.call_TerminateSegment),
2512	args=(uri, aid), name=k,
2513	pdata=thread_pool, trace_file=self.trace_file)
2514	t.start()
2515	# Wait for completions
2516	thread_pool.wait_for_all_done()
2517
2518	# release the allocations (failed experiments have done this
2519	# already, and starting experiments may be in odd states, so we
2520	# ignore errors releasing those allocations
2521	try:
2522	for k in tbparams.keys():
2523	# This releases access by uri
2524	uri, aid = tbparams[k]
2525	self.release_access(None, aid, uri=uri)
2526	except service_error, e:
2527	if status != 'failed' and not force:
2528	raise e
2529
2530	# Remove the terminated experiment
2531	self.state_lock.acquire()
2532	for id in ids:
2533	if self.state.has_key(id): del self.state[id]
2534
2535	if self.state_filename: self.write_state()
2536	self.state_lock.release()
2537
2538	return {
2539	'experiment': exp ,
2540	'deallocationLog': "".join(dealloc_list),
2541	}
2542	else:
2543	# Don't forget to release the lock
2544	self.state_lock.release()
2545	raise service_error(service_error.req, "No saved state")

Note: See TracBrowser for help on using the repository browser.

Download in other formats: