Context Navigation

source: fedd/federation/experiment_control.py @ 9b8e269

axis_examplecompt_changesinfo-opsversion-3.01version-3.02

Last change on this file since 9b8e269 was 9b8e269, checked in by Ted Faber <faber@…>, 15 years ago
remove deprecated master field (and change a log message0
Property mode set to `100644`
File size: 79.5 KB

Line
1	#!/usr/local/bin/python
2
3	import os,sys
4
5	import re
6	import random
7	import string
8	import subprocess
9	import tempfile
10	import copy
11	import pickle
12	import logging
13	import signal
14	import time
15
16	import traceback
17	# For parsing visualization output and splitter output
18	import xml.parsers.expat
19
20	from threading import Lock, Thread, Condition
21	from subprocess import call, Popen, PIPE
22
23	from urlparse import urlparse
24	from urllib2 import urlopen
25
26	from util import *
27	from fedid import fedid, generate_fedid
28	from remote_service import xmlrpc_handler, soap_handler, service_caller
29	from service_error import service_error
30	from synch_store import synch_store
31	from experiment_partition import experiment_partition
32
33	import topdl
34	import list_log
35	from ip_allocator import ip_allocator
36	from ip_addr import ip_addr
37
38
39	class nullHandler(logging.Handler):
40	def emit(self, record): pass
41
42	fl = logging.getLogger("fedd.experiment_control")
43	fl.addHandler(nullHandler())
44
45
46	# Right now, no support for composition.
47	class federated_service:
48	def __init__(self, name, exporter=None, importers=[], params={ }, reqs=[]):
49	self.name=name
50	self.exporter=exporter
51	self.importers=importers
52	self.params = params
53	self.reqs = reqs
54
55	class experiment_control_local:
56	"""
57	Control of experiments that this system can directly access.
58
59	Includes experiment creation, termination and information dissemination.
60	Thred safe.
61	"""
62
63	class ssh_cmd_timeout(RuntimeError): pass
64
65	class thread_pool:
66	"""
67	A class to keep track of a set of threads all invoked for the same
68	task. Manages the mutual exclusion of the states.
69	"""
70	def __init__(self, nthreads):
71	"""
72	Start a pool.
73	"""
74	self.changed = Condition()
75	self.started = 0
76	self.terminated = 0
77	self.nthreads = nthreads
78
79	def acquire(self):
80	"""
81	Get the pool's lock.
82	"""
83	self.changed.acquire()
84
85	def release(self):
86	"""
87	Release the pool's lock.
88	"""
89	self.changed.release()
90
91	def wait(self, timeout = None):
92	"""
93	Wait for a pool thread to start or stop.
94	"""
95	self.changed.wait(timeout)
96
97	def start(self):
98	"""
99	Called by a pool thread to report starting.
100	"""
101	self.changed.acquire()
102	self.started += 1
103	self.changed.notifyAll()
104	self.changed.release()
105
106	def terminate(self):
107	"""
108	Called by a pool thread to report finishing.
109	"""
110	self.changed.acquire()
111	self.terminated += 1
112	self.changed.notifyAll()
113	self.changed.release()
114
115	def clear(self):
116	"""
117	Clear all pool data.
118	"""
119	self.changed.acquire()
120	self.started = 0
121	self.terminated =0
122	self.changed.notifyAll()
123	self.changed.release()
124
125	def wait_for_slot(self):
126	"""
127	Wait until we have a free slot to start another pooled thread
128	"""
129	self.acquire()
130	while self.started - self.terminated >= self.nthreads:
131	self.wait()
132	self.release()
133
134	def wait_for_all_done(self, timeout=None):
135	"""
136	Wait until all active threads finish (and at least one has
137	started). If a timeout is given, return after waiting that long
138	for termination. If all threads are done (and one has started in
139	the since the last clear()) return True, otherwise False.
140	"""
141	if timeout:
142	deadline = time.time() + timeout
143	self.acquire()
144	while self.started == 0 or self.started > self.terminated:
145	self.wait(timeout)
146	if timeout:
147	if time.time() > deadline:
148	break
149	timeout = deadline - time.time()
150	self.release()
151	return not (self.started == 0 or self.started > self.terminated)
152
153	class pooled_thread(Thread):
154	"""
155	One of a set of threads dedicated to a specific task. Uses the
156	thread_pool class above for coordination.
157	"""
158	def __init__(self, group=None, target=None, name=None, args=(),
159	kwargs={}, pdata=None, trace_file=None):
160	Thread.__init__(self, group, target, name, args, kwargs)
161	self.rv = None # Return value of the ops in this thread
162	self.exception = None # Exception that terminated this thread
163	self.target=target # Target function to run on start()
164	self.args = args # Args to pass to target
165	self.kwargs = kwargs # Additional kw args
166	self.pdata = pdata # thread_pool for this class
167	# Logger for this thread
168	self.log = logging.getLogger("fedd.experiment_control")
169
170	def run(self):
171	"""
172	Emulate Thread.run, except add pool data manipulation and error
173	logging.
174	"""
175	if self.pdata:
176	self.pdata.start()
177
178	if self.target:
179	try:
180	self.rv = self.target(self.args, *self.kwargs)
181	except service_error, s:
182	self.exception = s
183	self.log.error("Thread exception: %s %s" % \
184	(s.code_string(), s.desc))
185	except:
186	self.exception = sys.exc_info()[1]
187	self.log.error(("Unexpected thread exception: %s" +\
188	"Trace %s") % (self.exception,\
189	traceback.format_exc()))
190	if self.pdata:
191	self.pdata.terminate()
192
193	call_RequestAccess = service_caller('RequestAccess')
194	call_ReleaseAccess = service_caller('ReleaseAccess')
195	call_StartSegment = service_caller('StartSegment')
196	call_TerminateSegment = service_caller('TerminateSegment')
197	call_Ns2Topdl = service_caller('Ns2Topdl')
198
199	def __init__(self, config=None, auth=None):
200	"""
201	Intialize the various attributes, most from the config object
202	"""
203
204	def parse_tarfile_list(tf):
205	"""
206	Parse a tarfile list from the configuration. This is a set of
207	paths and tarfiles separated by spaces.
208	"""
209	rv = [ ]
210	if tf is not None:
211	tl = tf.split()
212	while len(tl) > 1:
213	p, t = tl[0:2]
214	del tl[0:2]
215	rv.append((p, t))
216	return rv
217
218	self.thread_with_rv = experiment_control_local.pooled_thread
219	self.thread_pool = experiment_control_local.thread_pool
220	self.list_log = list_log.list_log
221
222	self.cert_file = config.get("experiment_control", "cert_file")
223	if self.cert_file:
224	self.cert_pwd = config.get("experiment_control", "cert_pwd")
225	else:
226	self.cert_file = config.get("globals", "cert_file")
227	self.cert_pwd = config.get("globals", "cert_pwd")
228
229	self.trusted_certs = config.get("experiment_control", "trusted_certs") \
230	or config.get("globals", "trusted_certs")
231
232	self.repodir = config.get("experiment_control", "repodir")
233	self.repo_url = config.get("experiment_control", "repo_url",
234	"https://users.isi.deterlab.net:23235");
235
236	self.exp_stem = "fed-stem"
237	self.log = logging.getLogger("fedd.experiment_control")
238	set_log_level(config, "experiment_control", self.log)
239	self.muxmax = 2
240	self.nthreads = 10
241	self.randomize_experiments = False
242
243	self.splitter = None
244	self.ssh_keygen = "/usr/bin/ssh-keygen"
245	self.ssh_identity_file = None
246
247
248	self.debug = config.getboolean("experiment_control", "create_debug")
249	self.cleanup = not config.getboolean("experiment_control",
250	"leave_tmpfiles")
251	self.state_filename = config.get("experiment_control",
252	"experiment_state")
253	self.store_filename = config.get("experiment_control",
254	"synch_store")
255	self.store_url = config.get("experiment_control", "store_url")
256	self.splitter_url = config.get("experiment_control", "ns2topdl_uri")
257	self.fedkit = parse_tarfile_list(\
258	config.get("experiment_control", "fedkit"))
259	self.gatewaykit = parse_tarfile_list(\
260	config.get("experiment_control", "gatewaykit"))
261	accessdb_file = config.get("experiment_control", "accessdb")
262
263	self.ssh_pubkey_file = config.get("experiment_control",
264	"ssh_pubkey_file")
265	self.ssh_privkey_file = config.get("experiment_control",
266	"ssh_privkey_file")
267	# NB for internal master/slave ops, not experiment setup
268	self.ssh_type = config.get("experiment_control", "sshkeytype", "rsa")
269
270	self.overrides = set([])
271	ovr = config.get('experiment_control', 'overrides')
272	if ovr:
273	for o in ovr.split(","):
274	o = o.strip()
275	if o.startswith('fedid:'): o = o[len('fedid:'):]
276	self.overrides.add(fedid(hexstr=o))
277
278	self.state = { }
279	self.state_lock = Lock()
280	self.tclsh = "/usr/local/bin/otclsh"
281	self.tcl_splitter = config.get("ns2topdl", "tcl_splitter") or \
282	config.get("experiment_control", "tcl_splitter",
283	"/usr/testbed/lib/ns2ir/parse.tcl")
284	mapdb_file = config.get("experiment_control", "mapdb")
285	self.trace_file = sys.stderr
286
287	self.def_expstart = \
288	"sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& " +\
289	"/tmp/federate";
290	self.def_mexpstart = "sudo -H /usr/local/federation/bin/make_hosts " +\
291	"FEDDIR/hosts";
292	self.def_gwstart = \
293	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF>& " +\
294	"/tmp/bridge.log";
295	self.def_mgwstart = \
296	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF >& " +\
297	"/tmp/bridge.log";
298	self.def_gwimage = "FBSD61-TUNNEL2";
299	self.def_gwtype = "pc";
300	self.local_access = { }
301
302	if auth:
303	self.auth = auth
304	else:
305	self.log.error(\
306	"[access]: No authorizer initialized, creating local one.")
307	auth = authorizer()
308
309
310	if self.ssh_pubkey_file:
311	try:
312	f = open(self.ssh_pubkey_file, 'r')
313	self.ssh_pubkey = f.read()
314	f.close()
315	except IOError:
316	raise service_error(service_error.internal,
317	"Cannot read sshpubkey")
318	else:
319	raise service_error(service_error.internal,
320	"No SSH public key file?")
321
322	if not self.ssh_privkey_file:
323	raise service_error(service_error.internal,
324	"No SSH public key file?")
325
326
327	if mapdb_file:
328	self.read_mapdb(mapdb_file)
329	else:
330	self.log.warn("[experiment_control] No testbed map, using defaults")
331	self.tbmap = {
332	'deter':'https://users.isi.deterlab.net:23235',
333	'emulab':'https://users.isi.deterlab.net:23236',
334	'ucb':'https://users.isi.deterlab.net:23237',
335	}
336
337	if accessdb_file:
338	self.read_accessdb(accessdb_file)
339	else:
340	raise service_error(service_error.internal,
341	"No accessdb specified in config")
342
343	# Grab saved state. OK to do this w/o locking because it's read only
344	# and only one thread should be in existence that can see self.state at
345	# this point.
346	if self.state_filename:
347	self.read_state()
348
349	if self.store_filename:
350	self.read_store()
351	else:
352	self.log.warning("No saved synch store")
353	self.synch_store = synch_store
354
355	# Dispatch tables
356	self.soap_services = {\
357	'New': soap_handler('New', self.new_experiment),
358	'Create': soap_handler('Create', self.create_experiment),
359	'Vtopo': soap_handler('Vtopo', self.get_vtopo),
360	'Vis': soap_handler('Vis', self.get_vis),
361	'Info': soap_handler('Info', self.get_info),
362	'MultiInfo': soap_handler('MultiInfo', self.get_multi_info),
363	'Terminate': soap_handler('Terminate',
364	self.terminate_experiment),
365	'GetValue': soap_handler('GetValue', self.GetValue),
366	'SetValue': soap_handler('SetValue', self.SetValue),
367	}
368
369	self.xmlrpc_services = {\
370	'New': xmlrpc_handler('New', self.new_experiment),
371	'Create': xmlrpc_handler('Create', self.create_experiment),
372	'Vtopo': xmlrpc_handler('Vtopo', self.get_vtopo),
373	'Vis': xmlrpc_handler('Vis', self.get_vis),
374	'Info': xmlrpc_handler('Info', self.get_info),
375	'MultiInfo': xmlrpc_handler('MultiInfo', self.get_multi_info),
376	'Terminate': xmlrpc_handler('Terminate',
377	self.terminate_experiment),
378	'GetValue': xmlrpc_handler('GetValue', self.GetValue),
379	'SetValue': xmlrpc_handler('SetValue', self.SetValue),
380	}
381
382	# Call while holding self.state_lock
383	def write_state(self):
384	"""
385	Write a new copy of experiment state after copying the existing state
386	to a backup.
387
388	State format is a simple pickling of the state dictionary.
389	"""
390	if os.access(self.state_filename, os.W_OK):
391	copy_file(self.state_filename, \
392	"%s.bak" % self.state_filename)
393	try:
394	f = open(self.state_filename, 'w')
395	pickle.dump(self.state, f)
396	except IOError, e:
397	self.log.error("Can't write file %s: %s" % \
398	(self.state_filename, e))
399	except pickle.PicklingError, e:
400	self.log.error("Pickling problem: %s" % e)
401	except TypeError, e:
402	self.log.error("Pickling problem (TypeError): %s" % e)
403
404	@staticmethod
405	def get_alloc_ids(state):
406	"""
407	Pull the fedids of the identifiers of each allocation from the
408	state. Again, a dict dive that's best isolated.
409
410	Used by read_store and read state
411	"""
412
413	return [ f['allocID']['fedid']
414	for f in state.get('federant',[]) \
415	if f.has_key('allocID') and \
416	f['allocID'].has_key('fedid')]
417
418	# Call while holding self.state_lock
419	def read_state(self):
420	"""
421	Read a new copy of experiment state. Old state is overwritten.
422
423	State format is a simple pickling of the state dictionary.
424	"""
425
426	def get_experiment_id(state):
427	"""
428	Pull the fedid experimentID out of the saved state. This is kind
429	of a gross walk through the dict.
430	"""
431
432	if state.has_key('experimentID'):
433	for e in state['experimentID']:
434	if e.has_key('fedid'):
435	return e['fedid']
436	else:
437	return None
438	else:
439	return None
440
441	try:
442	f = open(self.state_filename, "r")
443	self.state = pickle.load(f)
444	self.log.debug("[read_state]: Read state from %s" % \
445	self.state_filename)
446	except IOError, e:
447	self.log.warning("[read_state]: No saved state: Can't open %s: %s"\
448	% (self.state_filename, e))
449	except pickle.UnpicklingError, e:
450	self.log.warning(("[read_state]: No saved state: " + \
451	"Unpickling failed: %s") % e)
452
453	for s in self.state.values():
454	try:
455
456	eid = get_experiment_id(s)
457	if eid :
458	# Give the owner rights to the experiment
459	self.auth.set_attribute(s['owner'], eid)
460	# And holders of the eid as well
461	self.auth.set_attribute(eid, eid)
462	# allow overrides to control experiments as well
463	for o in self.overrides:
464	self.auth.set_attribute(o, eid)
465	# Set permissions to allow reading of the software repo, if
466	# any, as well.
467	for a in self.get_alloc_ids(s):
468	self.auth.set_attribute(a, 'repo/%s' % eid)
469	else:
470	raise KeyError("No experiment id")
471	except KeyError, e:
472	self.log.warning("[read_state]: State ownership or identity " +\
473	"misformatted in %s: %s" % (self.state_filename, e))
474
475
476	def read_accessdb(self, accessdb_file):
477	"""
478	Read the mapping from fedids that can create experiments to their name
479	in the 3-level access namespace. All will be asserted from this
480	testbed and can include the local username and porject that will be
481	asserted on their behalf by this fedd. Each fedid is also added to the
482	authorization system with the "create" attribute.
483	"""
484	self.accessdb = {}
485	# These are the regexps for parsing the db
486	name_expr = "[" + string.ascii_letters + string.digits + "\.\-]+"
487	project_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
488	"\s->\(\s("+name_expr+")\s,\s("+name_expr+")\s\)\s$")
489	user_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
490	"\s->\s(" + name_expr + ")\s*$")
491	lineno = 0
492
493	# Parse the mappings and store in self.authdb, a dict of
494	# fedid -> (proj, user)
495	try:
496	f = open(accessdb_file, "r")
497	for line in f:
498	lineno += 1
499	line = line.strip()
500	if len(line) == 0 or line.startswith('#'):
501	continue
502	m = project_line.match(line)
503	if m:
504	fid = fedid(hexstr=m.group(1))
505	project, user = m.group(2,3)
506	if not self.accessdb.has_key(fid):
507	self.accessdb[fid] = []
508	self.accessdb[fid].append((project, user))
509	continue
510
511	m = user_line.match(line)
512	if m:
513	fid = fedid(hexstr=m.group(1))
514	project = None
515	user = m.group(2)
516	if not self.accessdb.has_key(fid):
517	self.accessdb[fid] = []
518	self.accessdb[fid].append((project, user))
519	continue
520	self.log.warn("[experiment_control] Error parsing access " +\
521	"db %s at line %d" % (accessdb_file, lineno))
522	except IOError:
523	raise service_error(service_error.internal,
524	"Error opening/reading %s as experiment " +\
525	"control accessdb" % accessdb_file)
526	f.close()
527
528	# Initialize the authorization attributes
529	for fid in self.accessdb.keys():
530	self.auth.set_attribute(fid, 'create')
531	self.auth.set_attribute(fid, 'new')
532
533	def read_mapdb(self, file):
534	"""
535	Read a simple colon separated list of mappings for the
536	label-to-testbed-URL mappings. Clears or creates self.tbmap.
537	"""
538
539	self.tbmap = { }
540	lineno =0
541	try:
542	f = open(file, "r")
543	for line in f:
544	lineno += 1
545	line = line.strip()
546	if line.startswith('#') or len(line) == 0:
547	continue
548	try:
549	label, url = line.split(':', 1)
550	self.tbmap[label] = url
551	except ValueError, e:
552	self.log.warn("[read_mapdb] Ignored bad line (%d) in " +\
553	"map db: %s %s" % (lineno, line, e))
554	except IOError, e:
555	self.log.warning("[read_mapdb]: No saved map database: Can't " +\
556	"open %s: %s" % (file, e))
557	f.close()
558
559	def read_store(self):
560	try:
561	self.synch_store = synch_store()
562	self.synch_store.load(self.store_filename)
563	self.log.debug("[read_store]: Read store from %s" % \
564	self.store_filename)
565	except IOError, e:
566	self.log.warning("[read_store]: No saved store: Can't open %s: %s"\
567	% (self.state_filename, e))
568	self.synch_store = synch_store()
569
570	# Set the initial permissions on data in the store. XXX: This ad hoc
571	# authorization attribute initialization is getting out of hand.
572	for k in self.synch_store.all_keys():
573	try:
574	if k.startswith('fedid:'):
575	fid = fedid(hexstr=k[6:46])
576	if self.state.has_key(fid):
577	for a in self.get_alloc_ids(self.state[fid]):
578	self.auth.set_attribute(a, k)
579	except ValueError, e:
580	self.log.warn("Cannot deduce permissions for %s" % k)
581
582
583	def write_store(self):
584	"""
585	Write a new copy of synch_store after writing current state
586	to a backup. We use the internal synch_store pickle method to avoid
587	incinsistent data.
588
589	State format is a simple pickling of the store.
590	"""
591	if os.access(self.store_filename, os.W_OK):
592	copy_file(self.store_filename, \
593	"%s.bak" % self.store_filename)
594	try:
595	self.synch_store.save(self.store_filename)
596	except IOError, e:
597	self.log.error("Can't write file %s: %s" % \
598	(self.store_filename, e))
599	except TypeError, e:
600	self.log.error("Pickling problem (TypeError): %s" % e)
601
602
603	def generate_ssh_keys(self, dest, type="rsa" ):
604	"""
605	Generate a set of keys for the gateways to use to talk.
606
607	Keys are of type type and are stored in the required dest file.
608	"""
609	valid_types = ("rsa", "dsa")
610	t = type.lower();
611	if t not in valid_types: raise ValueError
612	cmd = [self.ssh_keygen, '-t', t, '-N', '', '-f', dest]
613
614	try:
615	trace = open("/dev/null", "w")
616	except IOError:
617	raise service_error(service_error.internal,
618	"Cannot open /dev/null??");
619
620	# May raise CalledProcessError
621	self.log.debug("[generate_ssh_keys]: %s" % " ".join(cmd))
622	rv = call(cmd, stdout=trace, stderr=trace, close_fds=True)
623	if rv != 0:
624	raise service_error(service_error.internal,
625	"Cannot generate nonce ssh keys. %s return code %d" \
626	% (self.ssh_keygen, rv))
627
628	def gentopo(self, str):
629	"""
630	Generate the topology dtat structure from the splitter's XML
631	representation of it.
632
633	The topology XML looks like:
634	<experiment>
635	<nodes>
636	<node><vname></vname><ips>ip1:ip2</ips></node>
637	</nodes>
638	<lans>
639	<lan>
640	<vname></vname><vnode></vnode><ip></ip>
641	<bandwidth></bandwidth><member>node:port</member>
642	</lan>
643	</lans>
644	"""
645	class topo_parse:
646	"""
647	Parse the topology XML and create the dats structure.
648	"""
649	def __init__(self):
650	# Typing of the subelements for data conversion
651	self.str_subelements = ('vname', 'vnode', 'ips', 'ip', 'member')
652	self.int_subelements = ( 'bandwidth',)
653	self.float_subelements = ( 'delay',)
654	# The final data structure
655	self.nodes = [ ]
656	self.lans = [ ]
657	self.topo = { \
658	'node': self.nodes,\
659	'lan' : self.lans,\
660	}
661	self.element = { } # Current element being created
662	self.chars = "" # Last text seen
663
664	def end_element(self, name):
665	# After each sub element the contents is added to the current
666	# element or to the appropriate list.
667	if name == 'node':
668	self.nodes.append(self.element)
669	self.element = { }
670	elif name == 'lan':
671	self.lans.append(self.element)
672	self.element = { }
673	elif name in self.str_subelements:
674	self.element[name] = self.chars
675	self.chars = ""
676	elif name in self.int_subelements:
677	self.element[name] = int(self.chars)
678	self.chars = ""
679	elif name in self.float_subelements:
680	self.element[name] = float(self.chars)
681	self.chars = ""
682
683	def found_chars(self, data):
684	self.chars += data.rstrip()
685
686
687	tp = topo_parse();
688	parser = xml.parsers.expat.ParserCreate()
689	parser.EndElementHandler = tp.end_element
690	parser.CharacterDataHandler = tp.found_chars
691
692	parser.Parse(str)
693
694	return tp.topo
695
696
697	def genviz(self, topo):
698	"""
699	Generate the visualization the virtual topology
700	"""
701
702	neato = "/usr/local/bin/neato"
703	# These are used to parse neato output and to create the visualization
704	# file.
705	vis_re = re.compile('^\s"?([\w\-]+)"?\s+\[.pos="([\d\.]+),([\d\.]+)"')
706	vis_fmt = "<node><name>%s</name><x>%s</x><y>%s</y><type>" + \
707	"%s</type></node>"
708
709	try:
710	# Node names
711	nodes = [ n['vname'] for n in topo['node'] ]
712	topo_lans = topo['lan']
713	except KeyError, e:
714	raise service_error(service_error.internal, "Bad topology: %s" %e)
715
716	lans = { }
717	links = { }
718
719	# Walk through the virtual topology, organizing the connections into
720	# 2-node connections (links) and more-than-2-node connections (lans).
721	# When a lan is created, it's added to the list of nodes (there's a
722	# node in the visualization for the lan).
723	for l in topo_lans:
724	if links.has_key(l['vname']):
725	if len(links[l['vname']]) < 2:
726	links[l['vname']].append(l['vnode'])
727	else:
728	nodes.append(l['vname'])
729	lans[l['vname']] = links[l['vname']]
730	del links[l['vname']]
731	lans[l['vname']].append(l['vnode'])
732	elif lans.has_key(l['vname']):
733	lans[l['vname']].append(l['vnode'])
734	else:
735	links[l['vname']] = [ l['vnode'] ]
736
737
738	# Open up a temporary file for dot to turn into a visualization
739	try:
740	df, dotname = tempfile.mkstemp()
741	dotfile = os.fdopen(df, 'w')
742	except IOError:
743	raise service_error(service_error.internal,
744	"Failed to open file in genviz")
745
746	try:
747	dnull = open('/dev/null', 'w')
748	except IOError:
749	service_error(service_error.internal,
750	"Failed to open /dev/null in genviz")
751
752	# Generate a dot/neato input file from the links, nodes and lans
753	try:
754	print >>dotfile, "graph G {"
755	for n in nodes:
756	print >>dotfile, '\t"%s"' % n
757	for l in links.keys():
758	print >>dotfile, '\t"%s" -- "%s"' % tuple(links[l])
759	for l in lans.keys():
760	for n in lans[l]:
761	print >>dotfile, '\t "%s" -- "%s"' % (n,l)
762	print >>dotfile, "}"
763	dotfile.close()
764	except TypeError:
765	raise service_error(service_error.internal,
766	"Single endpoint link in vtopo")
767	except IOError:
768	raise service_error(service_error.internal, "Cannot write dot file")
769
770	# Use dot to create a visualization
771	dot = Popen([neato, '-Gstart=rand', '-Gepsilon=0.005', '-Gmaxiter=2000',
772	'-Gpack=true', dotname], stdout=PIPE, stderr=dnull,
773	close_fds=True)
774	dnull.close()
775
776	# Translate dot to vis format
777	vis_nodes = [ ]
778	vis = { 'node': vis_nodes }
779	for line in dot.stdout:
780	m = vis_re.match(line)
781	if m:
782	vn = m.group(1)
783	vis_node = {'name': vn, \
784	'x': float(m.group(2)),\
785	'y' : float(m.group(3)),\
786	}
787	if vn in links.keys() or vn in lans.keys():
788	vis_node['type'] = 'lan'
789	else:
790	vis_node['type'] = 'node'
791	vis_nodes.append(vis_node)
792	rv = dot.wait()
793
794	os.remove(dotname)
795	if rv == 0 : return vis
796	else: return None
797
798	def get_access(self, tb, nodes, tbparam, access_user, masters):
799	"""
800	Get access to testbed through fedd and set the parameters for that tb
801	"""
802	def get_export_project(svcs):
803	"""
804	Look through for the list of federated_service for this testbed
805	objects for a project_export service, and extract the project
806	parameter.
807	"""
808
809	pe = [s for s in svcs if s.name=='project_export']
810	if len(pe) == 1:
811	return pe[0].params.get('project', None)
812	elif len(pe) == 0:
813	return None
814	else:
815	raise service_error(service_error.req,
816	"More than one project export is not supported")
817
818	uri = self.tbmap.get(tb, None)
819	if not uri:
820	raise service_error(service_error.server_config,
821	"Unknown testbed: %s" % tb)
822
823	export_svcs = masters.get(tb,[])
824	import_svcs = [ s for m in masters.values() \
825	for s in m \
826	if tb in s.importers ]
827
828	export_project = get_export_project(export_svcs)
829
830	# Tweak search order so that if there are entries in access_user that
831	# have a project matching the export project, we try them first
832	if export_project:
833	access_sequence = [ (p, u) for p, u in access_user \
834	if p == export_project]
835	access_sequence.extend([(p, u) for p, u in access_user \
836	if p != export_project])
837	else:
838	access_sequence = access_user
839
840	for p, u in access_sequence:
841	self.log.debug(("[get_access] Attempting access from (%s, %s) " + \
842	"to %s") % ((p or "None"), u, uri))
843
844	if p:
845	# Request with user and project specified
846	req = {\
847	'destinationTestbed' : { 'uri' : uri },
848	'credential': [ "project: %s" % p, "user: %s" % u],
849	'allocID' : { 'localname': 'test' },
850	}
851	else:
852	# Request with only user specified
853	req = {\
854	'destinationTestbed' : { 'uri' : uri },
855	'credential': [ 'user: %s' % u ],
856	'allocID' : { 'localname': 'test' },
857	}
858
859	# Make the service request from the services we're importing and
860	# exporting. Keep track of the export request ids so we can
861	# collect the resulting info from the access response.
862	e_keys = { }
863	if import_svcs or export_svcs:
864	req['service'] = [ ]
865
866	for i, s in enumerate(import_svcs):
867	idx = 'import%d' % i
868	sr = {'id': idx, 'name': s.name, 'visibility': 'import' }
869	if s.params:
870	sr['fedAttr'] = [ { 'attribute': k, 'value': v } \
871	for k, v in s.params.items()]
872	req['service'].append(sr)
873
874	for i, s in enumerate(export_svcs):
875	idx = 'export%d' % i
876	e_keys[idx] = s
877	sr = {'id': idx, 'name': s.name, 'visibility': 'export' }
878	if s.params:
879	sr['fedAttr'] = [ { 'attribute': k, 'value': v }
880	for k, v in s.params.items()]
881	req['service'].append(sr)
882
883	# node resources if any
884	if nodes != None and len(nodes) > 0:
885	rnodes = [ ]
886	for n in nodes:
887	rn = { }
888	image, hw, count = n.split(":")
889	if image: rn['image'] = [ image ]
890	if hw: rn['hardware'] = [ hw ]
891	if count and int(count) >0 : rn['count'] = int(count)
892	rnodes.append(rn)
893	req['resources']= { }
894	req['resources']['node'] = rnodes
895
896	try:
897	if self.local_access.has_key(uri):
898	# Local access call
899	req = { 'RequestAccessRequestBody' : req }
900	r = self.local_access[uri].RequestAccess(req,
901	fedid(file=self.cert_file))
902	r = { 'RequestAccessResponseBody' : r }
903	else:
904	r = self.call_RequestAccess(uri, req,
905	self.cert_file, self.cert_pwd, self.trusted_certs)
906	except service_error, e:
907	if e.code == service_error.access:
908	self.log.debug("[get_access] Access denied")
909	r = None
910	continue
911	else:
912	raise e
913
914	if r.has_key('RequestAccessResponseBody'):
915	# Through to here we have a valid response, not a fault.
916	# Access denied is a fault, so something better or worse than
917	# access denied has happened.
918	r = r['RequestAccessResponseBody']
919	self.log.debug("[get_access] Access granted")
920	break
921	else:
922	raise service_error(service_error.protocol,
923	"Bad proxy response")
924
925	if not r:
926	raise service_error(service_error.access,
927	"Access denied by %s (%s)" % (tb, uri))
928
929	tbparam[tb] = {
930	"allocID" : r['allocID'],
931	"uri": uri,
932	}
933
934	# Collect the responses corresponding to the services this testbed
935	# exports. These will be the service requests that we will include in
936	# the start segment requests (with appropriate visibility values) to
937	# import and export the segments.
938	for s in r.get('service', []):
939	id = s.get('id', None)
940	if id and id in e_keys:
941	e_keys[id].reqs.append(s)
942
943	# Add attributes to parameter space. We don't allow attributes to
944	# overlay any parameters already installed.
945	for a in r.get('fedAttr', []):
946	try:
947	if a['attribute'] and \
948	isinstance(a['attribute'], basestring)\
949	and not tbparam[tb].has_key(a['attribute'].lower()):
950	tbparam[tb][a['attribute'].lower()] = a['value']
951	except KeyError:
952	self.log.error("Bad attribute in response: %s" % a)
953
954	def release_access(self, tb, aid, uri=None):
955	"""
956	Release access to testbed through fedd
957	"""
958
959	if not uri:
960	uri = self.tbmap.get(tb, None)
961	if not uri:
962	raise service_error(service_error.server_config,
963	"Unknown testbed: %s" % tb)
964
965	if self.local_access.has_key(uri):
966	resp = self.local_access[uri].ReleaseAccess(\
967	{ 'ReleaseAccessRequestBody' : {'allocID': aid},},
968	fedid(file=self.cert_file))
969	resp = { 'ReleaseAccessResponseBody': resp }
970	else:
971	resp = self.call_ReleaseAccess(uri, {'allocID': aid},
972	self.cert_file, self.cert_pwd, self.trusted_certs)
973
974	# better error coding
975
976	def remote_ns2topdl(self, uri, desc):
977
978	req = {
979	'description' : { 'ns2description': desc },
980	}
981
982	r = self.call_Ns2Topdl(uri, req, self.cert_file, self.cert_pwd,
983	self.trusted_certs)
984
985	if r.has_key('Ns2TopdlResponseBody'):
986	r = r['Ns2TopdlResponseBody']
987	ed = r.get('experimentdescription', None)
988	if ed.has_key('topdldescription'):
989	return topdl.Topology(**ed['topdldescription'])
990	else:
991	raise service_error(service_error.protocol,
992	"Bad splitter response (no output)")
993	else:
994	raise service_error(service_error.protocol, "Bad splitter response")
995
996	class start_segment:
997	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
998	cert_pwd=None, trusted_certs=None, caller=None,
999	log_collector=None):
1000	self.log = log
1001	self.debug = debug
1002	self.cert_file = cert_file
1003	self.cert_pwd = cert_pwd
1004	self.trusted_certs = None
1005	self.caller = caller
1006	self.testbed = testbed
1007	self.log_collector = log_collector
1008	self.response = None
1009
1010	def __call__(self, uri, aid, topo, masters, attrs=None, connInfo=None):
1011	req = {
1012	'allocID': { 'fedid' : aid },
1013	'segmentdescription': {
1014	'topdldescription': topo.to_dict(),
1015	},
1016	}
1017
1018	if connInfo:
1019	req['connection'] = connInfo
1020
1021	import_svcs = [ s for m in masters.values() \
1022	for s in m if self.testbed in s.importers]
1023
1024	if import_svcs or self.testbed in masters:
1025	req['service'] = []
1026
1027	for s in import_svcs:
1028	for r in s.reqs:
1029	sr = copy.deepcopy(r)
1030	sr['visibility'] = 'import';
1031	req['service'].append(sr)
1032
1033	for s in masters.get(self.testbed, []):
1034	for r in s.reqs:
1035	sr = copy.deepcopy(r)
1036	sr['visibility'] = 'export';
1037	req['service'].append(sr)
1038
1039	if attrs:
1040	req['fedAttr'] = attrs
1041
1042	try:
1043	self.log.debug("Calling StartSegment at %s " % uri)
1044	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
1045	self.trusted_certs)
1046	if r.has_key('StartSegmentResponseBody'):
1047	lval = r['StartSegmentResponseBody'].get('allocationLog',
1048	None)
1049	if lval and self.log_collector:
1050	for line in lval.splitlines(True):
1051	self.log_collector.write(line)
1052	self.response = r
1053	else:
1054	raise service_error(service_error.internal,
1055	"Bad response!?: %s" %r)
1056	return True
1057	except service_error, e:
1058	self.log.error("Start segment failed on %s: %s" % \
1059	(self.testbed, e))
1060	return False
1061
1062
1063
1064	class terminate_segment:
1065	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
1066	cert_pwd=None, trusted_certs=None, caller=None):
1067	self.log = log
1068	self.debug = debug
1069	self.cert_file = cert_file
1070	self.cert_pwd = cert_pwd
1071	self.trusted_certs = None
1072	self.caller = caller
1073	self.testbed = testbed
1074
1075	def __call__(self, uri, aid ):
1076	req = {
1077	'allocID': aid ,
1078	}
1079	try:
1080	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
1081	self.trusted_certs)
1082	return True
1083	except service_error, e:
1084	self.log.error("Terminate segment failed on %s: %s" % \
1085	(self.testbed, e))
1086	return False
1087
1088
1089	def allocate_resources(self, allocated, masters, eid, expid,
1090	tbparams, topo, tmpdir, alloc_log=None, log_collector=None,
1091	attrs=None, connInfo={}):
1092
1093	started = { } # Testbeds where a sub-experiment started
1094	# successfully
1095
1096	# XXX
1097	fail_soft = False
1098
1099	log = alloc_log or self.log
1100
1101	thread_pool = self.thread_pool(self.nthreads)
1102	threads = [ ]
1103
1104	for tb in allocated.keys():
1105	# Create and start a thread to start the segment, and save it
1106	# to get the return value later
1107	thread_pool.wait_for_slot()
1108	uri = tbparams[tb].get('uri', self.tbmap.get(tb, None))
1109	if not uri:
1110	raise service_error(service_error.internal,
1111	"Unknown testbed %s !?" % tb)
1112
1113	if tbparams[tb].has_key('allocID') and \
1114	tbparams[tb]['allocID'].has_key('fedid'):
1115	aid = tbparams[tb]['allocID']['fedid']
1116	else:
1117	raise service_error(service_error.internal,
1118	"No alloc id for testbed %s !?" % tb)
1119
1120	t = self.pooled_thread(\
1121	target=self.start_segment(log=log, debug=self.debug,
1122	testbed=tb, cert_file=self.cert_file,
1123	cert_pwd=self.cert_pwd,
1124	trusted_certs=self.trusted_certs,
1125	caller=self.call_StartSegment,
1126	log_collector=log_collector),
1127	args=(uri, aid, topo[tb], masters, attrs, connInfo[tb]),
1128	name=tb,
1129	pdata=thread_pool, trace_file=self.trace_file)
1130	threads.append(t)
1131	t.start()
1132
1133	# Wait until all finish (keep pinging the log, though)
1134	mins = 0
1135	revoked = False
1136	while not thread_pool.wait_for_all_done(60.0):
1137	mins += 1
1138	alloc_log.info("Waiting for sub threads (it has been %d mins)" \
1139	% mins)
1140	if not revoked and \
1141	len([ t.getName() for t in threads if t.rv == False]) > 0:
1142	# a testbed has failed. Revoke this experiment's
1143	# synchronizarion values so that sub experiments will not
1144	# deadlock waiting for synchronization that will never happen
1145	self.log.info("A subexperiment has failed to swap in, " + \
1146	"revoking synch keys")
1147	var_key = "fedid:%s" % expid
1148	for k in self.synch_store.all_keys():
1149	if len(k) > 45 and k[0:46] == var_key:
1150	self.synch_store.revoke_key(k)
1151	revoked = True
1152
1153	failed = [ t.getName() for t in threads if not t.rv ]
1154	succeeded = [tb for tb in allocated.keys() if tb not in failed]
1155
1156	# If one failed clean up, unless fail_soft is set
1157	if failed:
1158	if not fail_soft:
1159	thread_pool.clear()
1160	for tb in succeeded:
1161	# Create and start a thread to stop the segment
1162	thread_pool.wait_for_slot()
1163	uri = tbparams[tb]['uri']
1164	t = self.pooled_thread(\
1165	target=self.terminate_segment(log=log,
1166	testbed=tb,
1167	cert_file=self.cert_file,
1168	cert_pwd=self.cert_pwd,
1169	trusted_certs=self.trusted_certs,
1170	caller=self.call_TerminateSegment),
1171	args=(uri, tbparams[tb]['federant']['allocID']),
1172	name=tb,
1173	pdata=thread_pool, trace_file=self.trace_file)
1174	t.start()
1175	# Wait until all finish (if any are being stopped)
1176	if succeeded:
1177	thread_pool.wait_for_all_done()
1178
1179	# release the allocations
1180	for tb in tbparams.keys():
1181	self.release_access(tb, tbparams[tb]['allocID'],
1182	tbparams[tb].get('uri', None))
1183	# Remove the placeholder
1184	self.state_lock.acquire()
1185	self.state[eid]['experimentStatus'] = 'failed'
1186	if self.state_filename: self.write_state()
1187	self.state_lock.release()
1188
1189	log.error("Swap in failed on %s" % ",".join(failed))
1190	return
1191	else:
1192	log.info("[start_segment]: Experiment %s active" % eid)
1193
1194
1195	# Walk up tmpdir, deleting as we go
1196	if self.cleanup:
1197	log.debug("[start_experiment]: removing %s" % tmpdir)
1198	for path, dirs, files in os.walk(tmpdir, topdown=False):
1199	for f in files:
1200	os.remove(os.path.join(path, f))
1201	for d in dirs:
1202	os.rmdir(os.path.join(path, d))
1203	os.rmdir(tmpdir)
1204	else:
1205	log.debug("[start_experiment]: not removing %s" % tmpdir)
1206
1207	# Insert the experiment into our state and update the disk copy
1208	self.state_lock.acquire()
1209	self.state[expid]['experimentStatus'] = 'active'
1210	self.state[eid] = self.state[expid]
1211	if self.state_filename: self.write_state()
1212	self.state_lock.release()
1213	return
1214
1215
1216	def add_kit(self, e, kit):
1217	"""
1218	Add a Software object created from the list of (install, location)
1219	tuples passed as kit to the software attribute of an object e. We
1220	do this enough to break out the code, but it's kind of a hack to
1221	avoid changing the old tuple rep.
1222	"""
1223
1224	s = [ topdl.Software(install=i, location=l) for i, l in kit]
1225
1226	if isinstance(e.software, list): e.software.extend(s)
1227	else: e.software = s
1228
1229
1230	def create_experiment_state(self, fid, req, expid, expcert,
1231	state='starting'):
1232	"""
1233	Create the initial entry in the experiment's state. The expid and
1234	expcert are the experiment's fedid and certifacte that represents that
1235	ID, which are installed in the experiment state. If the request
1236	includes a suggested local name that is used if possible. If the local
1237	name is already taken by an experiment owned by this user that has
1238	failed, it is overwritten. Otherwise new letters are added until a
1239	valid localname is found. The generated local name is returned.
1240	"""
1241
1242	if req.has_key('experimentID') and \
1243	req['experimentID'].has_key('localname'):
1244	overwrite = False
1245	eid = req['experimentID']['localname']
1246	# If there's an old failed experiment here with the same local name
1247	# and accessible by this user, we'll overwrite it, otherwise we'll
1248	# fall through and do the collision avoidance.
1249	old_expid = self.get_experiment_fedid(eid)
1250	if old_expid and self.check_experiment_access(fid, old_expid):
1251	self.state_lock.acquire()
1252	status = self.state[eid].get('experimentStatus', None)
1253	if status and status == 'failed':
1254	# remove the old access attribute
1255	self.auth.unset_attribute(fid, old_expid)
1256	overwrite = True
1257	del self.state[eid]
1258	del self.state[old_expid]
1259	self.state_lock.release()
1260	self.state_lock.acquire()
1261	while (self.state.has_key(eid) and not overwrite):
1262	eid += random.choice(string.ascii_letters)
1263	# Initial state
1264	self.state[eid] = {
1265	'experimentID' : \
1266	[ { 'localname' : eid }, {'fedid': expid } ],
1267	'experimentStatus': state,
1268	'experimentAccess': { 'X509' : expcert },
1269	'owner': fid,
1270	'log' : [],
1271	}
1272	self.state[expid] = self.state[eid]
1273	if self.state_filename: self.write_state()
1274	self.state_lock.release()
1275	else:
1276	eid = self.exp_stem
1277	for i in range(0,5):
1278	eid += random.choice(string.ascii_letters)
1279	self.state_lock.acquire()
1280	while (self.state.has_key(eid)):
1281	eid = self.exp_stem
1282	for i in range(0,5):
1283	eid += random.choice(string.ascii_letters)
1284	# Initial state
1285	self.state[eid] = {
1286	'experimentID' : \
1287	[ { 'localname' : eid }, {'fedid': expid } ],
1288	'experimentStatus': state,
1289	'experimentAccess': { 'X509' : expcert },
1290	'owner': fid,
1291	'log' : [],
1292	}
1293	self.state[expid] = self.state[eid]
1294	if self.state_filename: self.write_state()
1295	self.state_lock.release()
1296
1297	return eid
1298
1299
1300	def allocate_ips_to_topo(self, top):
1301	"""
1302	Add an ip4_address attribute to all the hosts in the topology, based on
1303	the shared substrates on which they sit. An /etc/hosts file is also
1304	created and returned as a list of hostfiles entries. We also return
1305	the allocator, because we may need to allocate IPs to portals
1306	(specifically DRAGON portals).
1307	"""
1308	subs = sorted(top.substrates,
1309	cmp=lambda x,y: cmp(len(x.interfaces), len(y.interfaces)),
1310	reverse=True)
1311	ips = ip_allocator(int(ip_addr("10.0.0.0")), 2 **24)
1312	ifs = { }
1313	hosts = [ ]
1314
1315	for idx, s in enumerate(subs):
1316	net_size = len(s.interfaces)+2
1317
1318	a = ips.allocate(net_size)
1319	if a :
1320	base, num = a
1321	if num < net_size:
1322	raise service_error(service_error.internal,
1323	"Allocator returned wrong number of IPs??")
1324	else:
1325	raise service_error(service_error.req,
1326	"Cannot allocate IP addresses")
1327	mask = ips.min_alloc
1328	while mask < net_size:
1329	mask *= 2
1330
1331	netmask = ((2**32-1) ^ (mask-1))
1332
1333	base += 1
1334	for i in s.interfaces:
1335	i.attribute.append(
1336	topdl.Attribute('ip4_address',
1337	"%s" % ip_addr(base)))
1338	i.attribute.append(
1339	topdl.Attribute('ip4_netmask',
1340	"%s" % ip_addr(int(netmask))))
1341
1342	hname = i.element.name[0]
1343	if ifs.has_key(hname):
1344	hosts.append("%s\t%s-%s %s-%d" % \
1345	(ip_addr(base), hname, s.name, hname,
1346	ifs[hname]))
1347	else:
1348	ifs[hname] = 0
1349	hosts.append("%s\t%s-%s %s-%d %s" % \
1350	(ip_addr(base), hname, s.name, hname,
1351	ifs[hname], hname))
1352
1353	ifs[hname] += 1
1354	base += 1
1355	return hosts, ips
1356
1357	def get_access_to_testbeds(self, testbeds, access_user, allocated,
1358	tbparams, masters):
1359	"""
1360	Request access to the various testbeds required for this instantiation
1361	(passed in as testbeds). User, access_user, expoert_project and master
1362	are used to construct the correct requests. Per-testbed parameters are
1363	returned in tbparams.
1364	"""
1365	for tb in testbeds:
1366	self.get_access(tb, None, tbparams, access_user, masters)
1367	allocated[tb] = 1
1368
1369	def split_topology(self, top, topo, testbeds):
1370	"""
1371	Create the sub-topologies that are needed for experiment instantiation.
1372	"""
1373	for tb in testbeds:
1374	topo[tb] = top.clone()
1375	# copy in for loop allows deletions from the original
1376	for e in [ e for e in topo[tb].elements]:
1377	etb = e.get_attribute('testbed')
1378	# NB: elements without a testbed attribute won't appear in any
1379	# sub topologies.
1380	if not etb or etb != tb:
1381	for i in e.interface:
1382	for s in i.subs:
1383	try:
1384	s.interfaces.remove(i)
1385	except ValueError:
1386	raise service_error(service_error.internal,
1387	"Can't remove interface??")
1388	topo[tb].elements.remove(e)
1389	topo[tb].make_indices()
1390
1391	def wrangle_software(self, expid, top, topo, tbparams):
1392	"""
1393	Copy software out to the repository directory, allocate permissions and
1394	rewrite the segment topologies to look for the software in local
1395	places.
1396	"""
1397
1398	# Copy the rpms and tarfiles to a distribution directory from
1399	# which the federants can retrieve them
1400	linkpath = "%s/software" % expid
1401	softdir ="%s/%s" % ( self.repodir, linkpath)
1402	softmap = { }
1403	# These are in a list of tuples format (each kit). This comprehension
1404	# unwraps them into a single list of tuples that initilaizes the set of
1405	# tuples.
1406	pkgs = set([ t for l in [self.fedkit, self.gatewaykit] \
1407	for p, t in l ])
1408	pkgs.update([x.location for e in top.elements \
1409	for x in e.software])
1410	try:
1411	os.makedirs(softdir)
1412	except IOError, e:
1413	raise service_error(
1414	"Cannot create software directory: %s" % e)
1415	# The actual copying. Everything's converted into a url for copying.
1416	for pkg in pkgs:
1417	loc = pkg
1418
1419	scheme, host, path = urlparse(loc)[0:3]
1420	dest = os.path.basename(path)
1421	if not scheme:
1422	if not loc.startswith('/'):
1423	loc = "/%s" % loc
1424	loc = "file://%s" %loc
1425	try:
1426	u = urlopen(loc)
1427	except Exception, e:
1428	raise service_error(service_error.req,
1429	"Cannot open %s: %s" % (loc, e))
1430	try:
1431	f = open("%s/%s" % (softdir, dest) , "w")
1432	self.log.debug("Writing %s/%s" % (softdir,dest) )
1433	data = u.read(4096)
1434	while data:
1435	f.write(data)
1436	data = u.read(4096)
1437	f.close()
1438	u.close()
1439	except Exception, e:
1440	raise service_error(service_error.internal,
1441	"Could not copy %s: %s" % (loc, e))
1442	path = re.sub("/tmp", "", linkpath)
1443	# XXX
1444	softmap[pkg] = \
1445	"%s/%s/%s" %\
1446	( self.repo_url, path, dest)
1447
1448	# Allow the individual segments to access the software.
1449	for tb in tbparams.keys():
1450	self.auth.set_attribute(tbparams[tb]['allocID']['fedid'],
1451	"/%s/%s" % ( path, dest))
1452
1453	# Convert the software locations in the segments into the local
1454	# copies on this host
1455	for soft in [ s for tb in topo.values() \
1456	for e in tb.elements \
1457	if getattr(e, 'software', False) \
1458	for s in e.software ]:
1459	if softmap.has_key(soft.location):
1460	soft.location = softmap[soft.location]
1461
1462
1463	def new_experiment(self, req, fid):
1464	"""
1465	The external interface to empty initial experiment creation called from
1466	the dispatcher.
1467
1468	Creates a working directory, splits the incoming description using the
1469	splitter script and parses out the avrious subsections using the
1470	lcasses above. Once each sub-experiment is created, use pooled threads
1471	to instantiate them and start it all up.
1472	"""
1473	if not self.auth.check_attribute(fid, 'new'):
1474	raise service_error(service_error.access, "New access denied")
1475
1476	try:
1477	tmpdir = tempfile.mkdtemp(prefix="split-")
1478	except IOError:
1479	raise service_error(service_error.internal, "Cannot create tmp dir")
1480
1481	try:
1482	access_user = self.accessdb[fid]
1483	except KeyError:
1484	raise service_error(service_error.internal,
1485	"Access map and authorizer out of sync in " + \
1486	"new_experiment for fedid %s" % fid)
1487
1488	pid = "dummy"
1489	gid = "dummy"
1490
1491	req = req.get('NewRequestBody', None)
1492	if not req:
1493	raise service_error(service_error.req,
1494	"Bad request format (no NewRequestBody)")
1495
1496	# Generate an ID for the experiment (slice) and a certificate that the
1497	# allocator can use to prove they own it. We'll ship it back through
1498	# the encrypted connection.
1499	(expid, expcert) = generate_fedid("test", dir=tmpdir, log=self.log)
1500
1501	#now we're done with the tmpdir, and it should be empty
1502	if self.cleanup:
1503	self.log.debug("[new_experiment]: removing %s" % tmpdir)
1504	os.rmdir(tmpdir)
1505	else:
1506	self.log.debug("[new_experiment]: not removing %s" % tmpdir)
1507
1508	eid = self.create_experiment_state(fid, req, expid, expcert,
1509	state='empty')
1510
1511	# Let users touch the state
1512	self.auth.set_attribute(fid, expid)
1513	self.auth.set_attribute(expid, expid)
1514	# Override fedids can manipulate state as well
1515	for o in self.overrides:
1516	self.auth.set_attribute(o, expid)
1517
1518	rv = {
1519	'experimentID': [
1520	{'localname' : eid }, { 'fedid': copy.copy(expid) }
1521	],
1522	'experimentStatus': 'empty',
1523	'experimentAccess': { 'X509' : expcert }
1524	}
1525
1526	return rv
1527
1528	def create_experiment(self, req, fid):
1529	"""
1530	The external interface to experiment creation called from the
1531	dispatcher.
1532
1533	Creates a working directory, splits the incoming description using the
1534	splitter script and parses out the various subsections using the
1535	classes above. Once each sub-experiment is created, use pooled threads
1536	to instantiate them and start it all up.
1537	"""
1538
1539	req = req.get('CreateRequestBody', None)
1540	if not req:
1541	raise service_error(service_error.req,
1542	"Bad request format (no CreateRequestBody)")
1543
1544	# Get the experiment access
1545	exp = req.get('experimentID', None)
1546	if exp:
1547	if exp.has_key('fedid'):
1548	key = exp['fedid']
1549	expid = key
1550	eid = None
1551	elif exp.has_key('localname'):
1552	key = exp['localname']
1553	eid = key
1554	expid = None
1555	else:
1556	raise service_error(service_error.req, "Unknown lookup type")
1557	else:
1558	raise service_error(service_error.req, "No request?")
1559
1560	self.check_experiment_access(fid, key)
1561
1562	try:
1563	tmpdir = tempfile.mkdtemp(prefix="split-")
1564	os.mkdir(tmpdir+"/keys")
1565	except IOError:
1566	raise service_error(service_error.internal, "Cannot create tmp dir")
1567
1568	gw_pubkey_base = "fed.%s.pub" % self.ssh_type
1569	gw_secretkey_base = "fed.%s" % self.ssh_type
1570	gw_pubkey = tmpdir + "/keys/" + gw_pubkey_base
1571	gw_secretkey = tmpdir + "/keys/" + gw_secretkey_base
1572	tclfile = tmpdir + "/experiment.tcl"
1573	tbparams = { }
1574	try:
1575	access_user = self.accessdb[fid]
1576	except KeyError:
1577	raise service_error(service_error.internal,
1578	"Access map and authorizer out of sync in " + \
1579	"create_experiment for fedid %s" % fid)
1580
1581	pid = "dummy"
1582	gid = "dummy"
1583
1584	# The tcl parser needs to read a file so put the content into that file
1585	descr=req.get('experimentdescription', None)
1586	if descr:
1587	file_content=descr.get('ns2description', None)
1588	if file_content:
1589	try:
1590	f = open(tclfile, 'w')
1591	f.write(file_content)
1592	f.close()
1593	except IOError:
1594	raise service_error(service_error.internal,
1595	"Cannot write temp experiment description")
1596	else:
1597	raise service_error(service_error.req,
1598	"Only ns2descriptions supported")
1599	else:
1600	raise service_error(service_error.req, "No experiment description")
1601
1602	self.state_lock.acquire()
1603	if self.state.has_key(key):
1604	self.state[key]['experimentStatus'] = "starting"
1605	for e in self.state[key].get('experimentID',[]):
1606	if not expid and e.has_key('fedid'):
1607	expid = e['fedid']
1608	elif not eid and e.has_key('localname'):
1609	eid = e['localname']
1610	self.state_lock.release()
1611
1612	if not (eid and expid):
1613	raise service_error(service_error.internal,
1614	"Cannot find local experiment info!?")
1615
1616	try:
1617	# This catches exceptions to clear the placeholder if necessary
1618	try:
1619	self.generate_ssh_keys(gw_secretkey, self.ssh_type)
1620	except ValueError:
1621	raise service_error(service_error.server_config,
1622	"Bad key type (%s)" % self.ssh_type)
1623
1624	# Copy the service request
1625	tb_services = [ s for s in req.get('service',[]) ]
1626	# Translate to topdl
1627	if self.splitter_url:
1628	self.log.debug("Calling remote topdl translator at %s" % \
1629	self.splitter_url)
1630	top = self.remote_ns2topdl(self.splitter_url, file_content)
1631	else:
1632	tclcmd = [self.tclsh, self.tcl_splitter, '-t', '-x',
1633	str(self.muxmax), '-m', 'dummy']
1634
1635	tclcmd.extend([pid, gid, eid, tclfile])
1636
1637	self.log.debug("running local splitter %s", " ".join(tclcmd))
1638	# This is just fantastic. As a side effect the parser copies
1639	# tb_compat.tcl into the current directory, so that directory
1640	# must be writable by the fedd user. Doing this in the
1641	# temporary subdir ensures this is the case.
1642	tclparser = Popen(tclcmd, stdout=PIPE, close_fds=True,
1643	cwd=tmpdir)
1644	split_data = tclparser.stdout
1645
1646	top = topdl.topology_from_xml(file=split_data, top="experiment")
1647
1648	hosts, ip_allocator = self.allocate_ips_to_topo(top)
1649	# Find the testbeds to look up
1650	testbeds = set([ a.value for e in top.elements \
1651	for a in e.attribute \
1652	if a.attribute == 'testbed'])
1653
1654	masters = { } # testbeds exporting services
1655	for s in tb_services:
1656	# If this is a project_export request with the imports field
1657	# blank, fill it in.
1658	if s.get('name', '') == 'project_export':
1659	if 'import' not in s or len(s['import']) == 0:
1660	s['import'] = [ tb for tb in testbeds \
1661	if tb not in s.get('export',[])]
1662	# Add the service to masters
1663	for tb in s.get('export', []):
1664	if s.get('name', None) and s.get('import', None):
1665	if tb not in masters:
1666	masters[tb] = [ ]
1667
1668	params = { }
1669	if 'fedAttr' in s:
1670	for a in s['fedAttr']:
1671	params[a.get('attribute', '')] = \
1672	a.get('value','')
1673
1674	masters[tb].append(federated_service(name=s['name'],
1675	exporter=tb, importers=s.get('import',[]),
1676	params=params))
1677	else:
1678	log.error('Testbed service does not have name " + \
1679	"and importers')
1680
1681
1682	allocated = { } # Testbeds we can access
1683	topo ={ } # Sub topologies
1684	connInfo = { } # Connection information
1685	self.get_access_to_testbeds(testbeds, access_user, allocated,
1686	tbparams, masters)
1687
1688	self.split_topology(top, topo, testbeds)
1689
1690	# Copy configuration files into the remote file store
1691	# The config urlpath
1692	configpath = "/%s/config" % expid
1693	# The config file system location
1694	configdir ="%s%s" % ( self.repodir, configpath)
1695	try:
1696	os.makedirs(configdir)
1697	except IOError, e:
1698	raise service_error(
1699	"Cannot create config directory: %s" % e)
1700	try:
1701	f = open("%s/hosts" % configdir, "w")
1702	f.write('\n'.join(hosts))
1703	f.close()
1704	except IOError, e:
1705	raise service_error(service_error.internal,
1706	"Cannot write hosts file: %s" % e)
1707	try:
1708	copy_file("%s" % gw_pubkey, "%s/%s" % \
1709	(configdir, gw_pubkey_base))
1710	copy_file("%s" % gw_secretkey, "%s/%s" % \
1711	(configdir, gw_secretkey_base))
1712	except IOError, e:
1713	raise service_error(service_error.internal,
1714	"Cannot copy keyfiles: %s" % e)
1715
1716	# Allow the individual testbeds to access the configuration files.
1717	for tb in tbparams.keys():
1718	asignee = tbparams[tb]['allocID']['fedid']
1719	for f in ("hosts", gw_secretkey_base, gw_pubkey_base):
1720	self.auth.set_attribute(asignee, "%s/%s" % (configpath, f))
1721
1722	part = experiment_partition(self.auth, self.store_url, self.tbmap,
1723	self.muxmax)
1724	part.add_portals(top, topo, eid, masters, tbparams, ip_allocator,
1725	connInfo, expid)
1726	# Now get access to the dynamic testbeds
1727	for k, t in topo.items():
1728	if not t.get_attribute('dynamic'):
1729	continue
1730	tb = t.get_attribute('testbed')
1731	if tb:
1732	self.get_access(tb, None, tbparams, export_project,
1733	access_user, masters)
1734	tbparams[k] = tbparams[tb]
1735	del tbparams[tb]
1736	allocated[k] = 1
1737	store_keys = t.get_attribute('store_keys')
1738	# Give the testbed access to keys it exports or imports
1739	if store_keys:
1740	for sk in store_keys.split(" "):
1741	self.auth.set_attribute(\
1742	tbparams[k]['allocID']['fedid'], sk)
1743	else:
1744	raise service_error(service_error.internal,
1745	"Dynamic allocation from no testbed!?")
1746
1747	self.wrangle_software(expid, top, topo, tbparams)
1748
1749	vtopo = topdl.topology_to_vtopo(top)
1750	vis = self.genviz(vtopo)
1751
1752	# save federant information
1753	for k in allocated.keys():
1754	tbparams[k]['federant'] = {
1755	'name': [ { 'localname' : eid} ],
1756	'allocID' : tbparams[k]['allocID'],
1757	'uri': tbparams[k]['uri'],
1758	}
1759	if tbparams[k].has_key('emulab'):
1760	tbparams[k]['federant']['emulab'] = \
1761	tbparams[k]['emulab']
1762
1763	self.state_lock.acquire()
1764	self.state[eid]['vtopo'] = vtopo
1765	self.state[eid]['vis'] = vis
1766	self.state[expid]['federant'] = \
1767	[ tbparams[tb]['federant'] for tb in tbparams.keys() \
1768	if tbparams[tb].has_key('federant') ]
1769	if self.state_filename:
1770	self.write_state()
1771	self.state_lock.release()
1772	except service_error, e:
1773	# If something goes wrong in the parse (usually an access error)
1774	# clear the placeholder state. From here on out the code delays
1775	# exceptions. Failing at this point returns a fault to the remote
1776	# caller.
1777
1778	self.state_lock.acquire()
1779	del self.state[eid]
1780	del self.state[expid]
1781	if self.state_filename: self.write_state()
1782	self.state_lock.release()
1783	raise e
1784
1785
1786	# Start the background swapper and return the starting state. From
1787	# here on out, the state will stick around a while.
1788
1789	# Let users touch the state
1790	self.auth.set_attribute(fid, expid)
1791	self.auth.set_attribute(expid, expid)
1792	# Override fedids can manipulate state as well
1793	for o in self.overrides:
1794	self.auth.set_attribute(o, expid)
1795
1796	# Create a logger that logs to the experiment's state object as well as
1797	# to the main log file.
1798	alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
1799	alloc_collector = self.list_log(self.state[eid]['log'])
1800	h = logging.StreamHandler(alloc_collector)
1801	# XXX: there should be a global one of these rather than repeating the
1802	# code.
1803	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
1804	'%d %b %y %H:%M:%S'))
1805	alloc_log.addHandler(h)
1806
1807	attrs = [
1808	{
1809	'attribute': 'ssh_pubkey',
1810	'value': '%s/%s/config/%s' % \
1811	(self.repo_url, expid, gw_pubkey_base)
1812	},
1813	{
1814	'attribute': 'ssh_secretkey',
1815	'value': '%s/%s/config/%s' % \
1816	(self.repo_url, expid, gw_secretkey_base)
1817	},
1818	{
1819	'attribute': 'hosts',
1820	'value': '%s/%s/config/hosts' % \
1821	(self.repo_url, expid)
1822	},
1823	{
1824	'attribute': 'experiment_name',
1825	'value': eid,
1826	},
1827	]
1828
1829	# transit and disconnected testbeds may not have a connInfo entry.
1830	# Fill in the blanks.
1831	for t in allocated.keys():
1832	if not connInfo.has_key(t):
1833	connInfo[t] = { }
1834
1835	# Start a thread to do the resource allocation
1836	t = Thread(target=self.allocate_resources,
1837	args=(allocated, masters, eid, expid, tbparams,
1838	topo, tmpdir, alloc_log, alloc_collector, attrs, connInfo),
1839	name=eid)
1840	t.start()
1841
1842	rv = {
1843	'experimentID': [
1844	{'localname' : eid }, { 'fedid': copy.copy(expid) }
1845	],
1846	'experimentStatus': 'starting',
1847	}
1848
1849	return rv
1850
1851	def get_experiment_fedid(self, key):
1852	"""
1853	find the fedid associated with the localname key in the state database.
1854	"""
1855
1856	rv = None
1857	self.state_lock.acquire()
1858	if self.state.has_key(key):
1859	if isinstance(self.state[key], dict):
1860	try:
1861	kl = [ f['fedid'] for f in \
1862	self.state[key]['experimentID']\
1863	if f.has_key('fedid') ]
1864	except KeyError:
1865	self.state_lock.release()
1866	raise service_error(service_error.internal,
1867	"No fedid for experiment %s when getting "+\
1868	"fedid(!?)" % key)
1869	if len(kl) == 1:
1870	rv = kl[0]
1871	else:
1872	self.state_lock.release()
1873	raise service_error(service_error.internal,
1874	"multiple fedids for experiment %s when " +\
1875	"getting fedid(!?)" % key)
1876	else:
1877	self.state_lock.release()
1878	raise service_error(service_error.internal,
1879	"Unexpected state for %s" % key)
1880	self.state_lock.release()
1881	return rv
1882
1883	def check_experiment_access(self, fid, key):
1884	"""
1885	Confirm that the fid has access to the experiment. Though a request
1886	may be made in terms of a local name, the access attribute is always
1887	the experiment's fedid.
1888	"""
1889	if not isinstance(key, fedid):
1890	key = self.get_experiment_fedid(key)
1891
1892	if self.auth.check_attribute(fid, key):
1893	return True
1894	else:
1895	raise service_error(service_error.access, "Access Denied")
1896
1897
1898	def get_handler(self, path, fid):
1899	self.log.info("Get handler %s %s" % (path, fid))
1900	if self.auth.check_attribute(fid, path):
1901	return ("%s/%s" % (self.repodir, path), "application/binary")
1902	else:
1903	return (None, None)
1904
1905	def get_vtopo(self, req, fid):
1906	"""
1907	Return the stored virtual topology for this experiment
1908	"""
1909	rv = None
1910	state = None
1911
1912	req = req.get('VtopoRequestBody', None)
1913	if not req:
1914	raise service_error(service_error.req,
1915	"Bad request format (no VtopoRequestBody)")
1916	exp = req.get('experiment', None)
1917	if exp:
1918	if exp.has_key('fedid'):
1919	key = exp['fedid']
1920	keytype = "fedid"
1921	elif exp.has_key('localname'):
1922	key = exp['localname']
1923	keytype = "localname"
1924	else:
1925	raise service_error(service_error.req, "Unknown lookup type")
1926	else:
1927	raise service_error(service_error.req, "No request?")
1928
1929	self.check_experiment_access(fid, key)
1930
1931	self.state_lock.acquire()
1932	if self.state.has_key(key):
1933	if self.state[key].has_key('vtopo'):
1934	rv = { 'experiment' : {keytype: key },\
1935	'vtopo': self.state[key]['vtopo'],\
1936	}
1937	else:
1938	state = self.state[key]['experimentStatus']
1939	self.state_lock.release()
1940
1941	if rv: return rv
1942	else:
1943	if state:
1944	raise service_error(service_error.partial,
1945	"Not ready: %s" % state)
1946	else:
1947	raise service_error(service_error.req, "No such experiment")
1948
1949	def get_vis(self, req, fid):
1950	"""
1951	Return the stored visualization for this experiment
1952	"""
1953	rv = None
1954	state = None
1955
1956	req = req.get('VisRequestBody', None)
1957	if not req:
1958	raise service_error(service_error.req,
1959	"Bad request format (no VisRequestBody)")
1960	exp = req.get('experiment', None)
1961	if exp:
1962	if exp.has_key('fedid'):
1963	key = exp['fedid']
1964	keytype = "fedid"
1965	elif exp.has_key('localname'):
1966	key = exp['localname']
1967	keytype = "localname"
1968	else:
1969	raise service_error(service_error.req, "Unknown lookup type")
1970	else:
1971	raise service_error(service_error.req, "No request?")
1972
1973	self.check_experiment_access(fid, key)
1974
1975	self.state_lock.acquire()
1976	if self.state.has_key(key):
1977	if self.state[key].has_key('vis'):
1978	rv = { 'experiment' : {keytype: key },\
1979	'vis': self.state[key]['vis'],\
1980	}
1981	else:
1982	state = self.state[key]['experimentStatus']
1983	self.state_lock.release()
1984
1985	if rv: return rv
1986	else:
1987	if state:
1988	raise service_error(service_error.partial,
1989	"Not ready: %s" % state)
1990	else:
1991	raise service_error(service_error.req, "No such experiment")
1992
1993	def clean_info_response(self, rv):
1994	"""
1995	Remove the information in the experiment's state object that is not in
1996	the info response.
1997	"""
1998	# Remove the owner info (should always be there, but...)
1999	if rv.has_key('owner'): del rv['owner']
2000
2001	# Convert the log into the allocationLog parameter and remove the
2002	# log entry (with defensive programming)
2003	if rv.has_key('log'):
2004	rv['allocationLog'] = "".join(rv['log'])
2005	del rv['log']
2006	else:
2007	rv['allocationLog'] = ""
2008
2009	if rv['experimentStatus'] != 'active':
2010	if rv.has_key('federant'): del rv['federant']
2011	else:
2012	# remove the allocationID and uri info from each federant
2013	for f in rv.get('federant', []):
2014	if f.has_key('allocID'): del f['allocID']
2015	if f.has_key('uri'): del f['uri']
2016	return rv
2017
2018	def get_info(self, req, fid):
2019	"""
2020	Return all the stored info about this experiment
2021	"""
2022	rv = None
2023
2024	req = req.get('InfoRequestBody', None)
2025	if not req:
2026	raise service_error(service_error.req,
2027	"Bad request format (no InfoRequestBody)")
2028	exp = req.get('experiment', None)
2029	if exp:
2030	if exp.has_key('fedid'):
2031	key = exp['fedid']
2032	keytype = "fedid"
2033	elif exp.has_key('localname'):
2034	key = exp['localname']
2035	keytype = "localname"
2036	else:
2037	raise service_error(service_error.req, "Unknown lookup type")
2038	else:
2039	raise service_error(service_error.req, "No request?")
2040
2041	self.check_experiment_access(fid, key)
2042
2043	# The state may be massaged by the service function that called
2044	# get_info (e.g., encoded for XMLRPC transport) so send a copy of the
2045	# state.
2046	self.state_lock.acquire()
2047	if self.state.has_key(key):
2048	rv = copy.deepcopy(self.state[key])
2049	self.state_lock.release()
2050
2051	if rv:
2052	return self.clean_info_response(rv)
2053	else:
2054	raise service_error(service_error.req, "No such experiment")
2055
2056	def get_multi_info(self, req, fid):
2057	"""
2058	Return all the stored info that this fedid can access
2059	"""
2060	rv = { 'info': [ ] }
2061
2062	self.state_lock.acquire()
2063	for key in [ k for k in self.state.keys() if isinstance(k, fedid)]:
2064	try:
2065	self.check_experiment_access(fid, key)
2066	except service_error, e:
2067	if e.code == service_error.access:
2068	continue
2069	else:
2070	self.state_lock.release()
2071	raise e
2072
2073	if self.state.has_key(key):
2074	e = copy.deepcopy(self.state[key])
2075	e = self.clean_info_response(e)
2076	rv['info'].append(e)
2077	self.state_lock.release()
2078	return rv
2079
2080	def terminate_experiment(self, req, fid):
2081	"""
2082	Swap this experiment out on the federants and delete the shared
2083	information
2084	"""
2085	tbparams = { }
2086	req = req.get('TerminateRequestBody', None)
2087	if not req:
2088	raise service_error(service_error.req,
2089	"Bad request format (no TerminateRequestBody)")
2090	force = req.get('force', False)
2091	exp = req.get('experiment', None)
2092	if exp:
2093	if exp.has_key('fedid'):
2094	key = exp['fedid']
2095	keytype = "fedid"
2096	elif exp.has_key('localname'):
2097	key = exp['localname']
2098	keytype = "localname"
2099	else:
2100	raise service_error(service_error.req, "Unknown lookup type")
2101	else:
2102	raise service_error(service_error.req, "No request?")
2103
2104	self.check_experiment_access(fid, key)
2105
2106	dealloc_list = [ ]
2107
2108
2109	# Create a logger that logs to the dealloc_list as well as to the main
2110	# log file.
2111	dealloc_log = logging.getLogger('fedd.experiment_control.%s' % key)
2112	h = logging.StreamHandler(self.list_log(dealloc_list))
2113	# XXX: there should be a global one of these rather than repeating the
2114	# code.
2115	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
2116	'%d %b %y %H:%M:%S'))
2117	dealloc_log.addHandler(h)
2118
2119	self.state_lock.acquire()
2120	fed_exp = self.state.get(key, None)
2121
2122	if fed_exp:
2123	# This branch of the conditional holds the lock to generate a
2124	# consistent temporary tbparams variable to deallocate experiments.
2125	# It releases the lock to do the deallocations and reacquires it to
2126	# remove the experiment state when the termination is complete.
2127
2128	# First make sure that the experiment creation is complete.
2129	status = fed_exp.get('experimentStatus', None)
2130
2131	if status:
2132	if status in ('starting', 'terminating'):
2133	if not force:
2134	self.state_lock.release()
2135	raise service_error(service_error.partial,
2136	'Experiment still being created or destroyed')
2137	else:
2138	self.log.warning('Experiment in %s state ' % status + \
2139	'being terminated by force.')
2140	else:
2141	# No status??? trouble
2142	self.state_lock.release()
2143	raise service_error(service_error.internal,
2144	"Experiment has no status!?")
2145
2146	ids = []
2147	# experimentID is a list of dicts that are self-describing
2148	# identifiers. This finds all the fedids and localnames - the
2149	# keys of self.state - and puts them into ids.
2150	for id in fed_exp.get('experimentID', []):
2151	if id.has_key('fedid'): ids.append(id['fedid'])
2152	if id.has_key('localname'): ids.append(id['localname'])
2153
2154	# Collect the allocation/segment ids into a dict keyed by the fedid
2155	# of the allocation (or a monotonically increasing integer) that
2156	# contains a tuple of uri, aid (which is a dict...)
2157	for i, fed in enumerate(fed_exp.get('federant', [])):
2158	try:
2159	uri = fed['uri']
2160	aid = fed['allocID']
2161	k = fed['allocID'].get('fedid', i)
2162	except KeyError, e:
2163	continue
2164	tbparams[k] = (uri, aid)
2165	fed_exp['experimentStatus'] = 'terminating'
2166	if self.state_filename: self.write_state()
2167	self.state_lock.release()
2168
2169	# Stop everyone. NB, wait_for_all waits until a thread starts and
2170	# then completes, so we can't wait if nothing starts. So, no
2171	# tbparams, no start.
2172	if len(tbparams) > 0:
2173	thread_pool = self.thread_pool(self.nthreads)
2174	for k in tbparams.keys():
2175	# Create and start a thread to stop the segment
2176	thread_pool.wait_for_slot()
2177	uri, aid = tbparams[k]
2178	t = self.pooled_thread(\
2179	target=self.terminate_segment(log=dealloc_log,
2180	testbed=uri,
2181	cert_file=self.cert_file,
2182	cert_pwd=self.cert_pwd,
2183	trusted_certs=self.trusted_certs,
2184	caller=self.call_TerminateSegment),
2185	args=(uri, aid), name=k,
2186	pdata=thread_pool, trace_file=self.trace_file)
2187	t.start()
2188	# Wait for completions
2189	thread_pool.wait_for_all_done()
2190
2191	# release the allocations (failed experiments have done this
2192	# already, and starting experiments may be in odd states, so we
2193	# ignore errors releasing those allocations
2194	try:
2195	for k in tbparams.keys():
2196	# This releases access by uri
2197	uri, aid = tbparams[k]
2198	self.release_access(None, aid, uri=uri)
2199	except service_error, e:
2200	if status != 'failed' and not force:
2201	raise e
2202
2203	# Remove the terminated experiment
2204	self.state_lock.acquire()
2205	for id in ids:
2206	if self.state.has_key(id): del self.state[id]
2207
2208	if self.state_filename: self.write_state()
2209	self.state_lock.release()
2210
2211	# Delete any synch points associated with this experiment. All
2212	# synch points begin with the fedid of the experiment.
2213	fedid_keys = set(["fedid:%s" % f for f in ids \
2214	if isinstance(f, fedid)])
2215	for k in self.synch_store.all_keys():
2216	try:
2217	if len(k) > 45 and k[0:46] in fedid_keys:
2218	self.synch_store.del_value(k)
2219	except synch_store.BadDeletionError:
2220	pass
2221	self.write_store()
2222
2223	return {
2224	'experiment': exp ,
2225	'deallocationLog': "".join(dealloc_list),
2226	}
2227	else:
2228	# Don't forget to release the lock
2229	self.state_lock.release()
2230	raise service_error(service_error.req, "No saved state")
2231
2232
2233	def GetValue(self, req, fid):
2234	"""
2235	Get a value from the synchronized store
2236	"""
2237	req = req.get('GetValueRequestBody', None)
2238	if not req:
2239	raise service_error(service_error.req,
2240	"Bad request format (no GetValueRequestBody)")
2241
2242	name = req['name']
2243	wait = req['wait']
2244	rv = { 'name': name }
2245
2246	if self.auth.check_attribute(fid, name):
2247	try:
2248	v = self.synch_store.get_value(name, wait)
2249	except synch_store.RevokedKeyError:
2250	# No more synch on this key
2251	raise service_error(service_error.federant,
2252	"Synch key %s revoked" % name)
2253	if v is not None:
2254	rv['value'] = v
2255	self.log.debug("[GetValue] got %s from %s" % (v, name))
2256	return rv
2257	else:
2258	raise service_error(service_error.access, "Access Denied")
2259
2260
2261	def SetValue(self, req, fid):
2262	"""
2263	Set a value in the synchronized store
2264	"""
2265	req = req.get('SetValueRequestBody', None)
2266	if not req:
2267	raise service_error(service_error.req,
2268	"Bad request format (no SetValueRequestBody)")
2269
2270	name = req['name']
2271	v = req['value']
2272
2273	if self.auth.check_attribute(fid, name):
2274	try:
2275	self.synch_store.set_value(name, v)
2276	self.write_store()
2277	self.log.debug("[SetValue] set %s to %s" % (name, v))
2278	except synch_store.CollisionError:
2279	# Translate into a service_error
2280	raise service_error(service_error.req,
2281	"Value already set: %s" %name)
2282	except synch_store.RevokedKeyError:
2283	# No more synch on this key
2284	raise service_error(service_error.federant,
2285	"Synch key %s revoked" % name)
2286	return { 'name': name, 'value': v }
2287	else:
2288	raise service_error(service_error.access, "Access Denied")

Note: See TracBrowser for help on using the repository browser.

Download in other formats: