Context Navigation

source: fedd/federation/experiment_control.py @ 181cf9c

axis_examplecompt_changesinfo-opsversion-2.00version-3.01version-3.02

Last change on this file since 181cf9c was 0fa1729, checked in by Ted Faber <faber@…>, 15 years ago
bug in cancelling partial swapin
Property mode set to `100644`
File size: 85.4 KB

Rev	Line
[6679c122]	1	#!/usr/local/bin/python
	2
	3	import os,sys
	4
	5	import re
	6	import random
	7	import string
	8	import subprocess
	9	import tempfile
	10	import copy
[eee2b2e]	11	import pickle
[c971895]	12	import logging
[79b6596]	13	import signal
	14	import time
[6679c122]	15
[3441fe3]	16	import traceback
[c971895]	17	# For parsing visualization output and splitter output
	18	import xml.parsers.expat
[3441fe3]	19
[6c57fe9]	20	from threading import Lock, Thread, Condition
	21	from subprocess import call, Popen, PIPE
[6679c122]	22
[db6b092]	23	from urlparse import urlparse
	24	from urllib2 import urlopen
	25
[ec4fb42]	26	from util import *
[51cc9df]	27	from fedid import fedid, generate_fedid
[9460b1e]	28	from remote_service import xmlrpc_handler, soap_handler, service_caller
[c971895]	29	from service_error import service_error
[6679c122]	30
[db6b092]	31	import topdl
[f07fa49]	32	import list_log
[db6b092]	33	from ip_allocator import ip_allocator
	34	from ip_addr import ip_addr
	35
[11a08b0]	36
	37	class nullHandler(logging.Handler):
	38	def emit(self, record): pass
	39
	40	fl = logging.getLogger("fedd.experiment_control")
	41	fl.addHandler(nullHandler())
	42
[ec4fb42]	43	class experiment_control_local:
[0ea11af]	44	"""
	45	Control of experiments that this system can directly access.
	46
	47	Includes experiment creation, termination and information dissemination.
	48	Thred safe.
	49	"""
[79b6596]	50
	51	class ssh_cmd_timeout(RuntimeError): pass
[6679c122]	52
[1af38d6]	53	class thread_pool:
[866c983]	54	"""
	55	A class to keep track of a set of threads all invoked for the same
	56	task. Manages the mutual exclusion of the states.
	57	"""
	58	def __init__(self, nthreads):
	59	"""
	60	Start a pool.
	61	"""
	62	self.changed = Condition()
	63	self.started = 0
	64	self.terminated = 0
	65	self.nthreads = nthreads
	66
	67	def acquire(self):
	68	"""
	69	Get the pool's lock.
	70	"""
	71	self.changed.acquire()
	72
	73	def release(self):
	74	"""
	75	Release the pool's lock.
	76	"""
	77	self.changed.release()
	78
	79	def wait(self, timeout = None):
	80	"""
	81	Wait for a pool thread to start or stop.
	82	"""
	83	self.changed.wait(timeout)
	84
	85	def start(self):
	86	"""
	87	Called by a pool thread to report starting.
	88	"""
	89	self.changed.acquire()
	90	self.started += 1
	91	self.changed.notifyAll()
	92	self.changed.release()
	93
	94	def terminate(self):
	95	"""
	96	Called by a pool thread to report finishing.
	97	"""
	98	self.changed.acquire()
	99	self.terminated += 1
	100	self.changed.notifyAll()
	101	self.changed.release()
	102
	103	def clear(self):
	104	"""
	105	Clear all pool data.
	106	"""
	107	self.changed.acquire()
	108	self.started = 0
	109	self.terminated =0
	110	self.changed.notifyAll()
	111	self.changed.release()
	112
	113	def wait_for_slot(self):
	114	"""
	115	Wait until we have a free slot to start another pooled thread
	116	"""
	117	self.acquire()
	118	while self.started - self.terminated >= self.nthreads:
	119	self.wait()
	120	self.release()
	121
[32e7d93]	122	def wait_for_all_done(self, timeout=None):
[866c983]	123	"""
[32e7d93]	124	Wait until all active threads finish (and at least one has
	125	started). If a timeout is given, return after waiting that long
	126	for termination. If all threads are done (and one has started in
	127	the since the last clear()) return True, otherwise False.
[866c983]	128	"""
[32e7d93]	129	if timeout:
	130	deadline = time.time() + timeout
[866c983]	131	self.acquire()
	132	while self.started == 0 or self.started > self.terminated:
[32e7d93]	133	self.wait(timeout)
	134	if timeout:
	135	if time.time() > deadline:
	136	break
	137	timeout = deadline - time.time()
[866c983]	138	self.release()
[32e7d93]	139	return not (self.started == 0 or self.started > self.terminated)
[8bc5754]	140
[1af38d6]	141	class pooled_thread(Thread):
[866c983]	142	"""
	143	One of a set of threads dedicated to a specific task. Uses the
	144	thread_pool class above for coordination.
	145	"""
	146	def __init__(self, group=None, target=None, name=None, args=(),
	147	kwargs={}, pdata=None, trace_file=None):
	148	Thread.__init__(self, group, target, name, args, kwargs)
	149	self.rv = None # Return value of the ops in this thread
	150	self.exception = None # Exception that terminated this thread
	151	self.target=target # Target function to run on start()
	152	self.args = args # Args to pass to target
	153	self.kwargs = kwargs # Additional kw args
	154	self.pdata = pdata # thread_pool for this class
	155	# Logger for this thread
	156	self.log = logging.getLogger("fedd.experiment_control")
	157
	158	def run(self):
	159	"""
	160	Emulate Thread.run, except add pool data manipulation and error
	161	logging.
	162	"""
	163	if self.pdata:
	164	self.pdata.start()
	165
	166	if self.target:
	167	try:
	168	self.rv = self.target(self.args, *self.kwargs)
	169	except service_error, s:
	170	self.exception = s
	171	self.log.error("Thread exception: %s %s" % \
	172	(s.code_string(), s.desc))
	173	except:
	174	self.exception = sys.exc_info()[1]
	175	self.log.error(("Unexpected thread exception: %s" +\
	176	"Trace %s") % (self.exception,\
	177	traceback.format_exc()))
	178	if self.pdata:
	179	self.pdata.terminate()
[6679c122]	180
[f069052]	181	call_RequestAccess = service_caller('RequestAccess')
	182	call_ReleaseAccess = service_caller('ReleaseAccess')
[cc8d8e9]	183	call_StartSegment = service_caller('StartSegment')
[5ae3857]	184	call_TerminateSegment = service_caller('TerminateSegment')
[f069052]	185	call_Ns2Split = service_caller('Ns2Split')
[058f58e]	186
[3f6bc5f]	187	def __init__(self, config=None, auth=None):
[866c983]	188	"""
	189	Intialize the various attributes, most from the config object
	190	"""
	191
	192	def parse_tarfile_list(tf):
	193	"""
	194	Parse a tarfile list from the configuration. This is a set of
	195	paths and tarfiles separated by spaces.
	196	"""
	197	rv = [ ]
	198	if tf is not None:
	199	tl = tf.split()
	200	while len(tl) > 1:
	201	p, t = tl[0:2]
	202	del tl[0:2]
	203	rv.append((p, t))
	204	return rv
	205
	206	self.thread_with_rv = experiment_control_local.pooled_thread
	207	self.thread_pool = experiment_control_local.thread_pool
[f07fa49]	208	self.list_log = list_log.list_log
[866c983]	209
	210	self.cert_file = config.get("experiment_control", "cert_file")
	211	if self.cert_file:
	212	self.cert_pwd = config.get("experiment_control", "cert_pwd")
	213	else:
	214	self.cert_file = config.get("globals", "cert_file")
	215	self.cert_pwd = config.get("globals", "cert_pwd")
	216
	217	self.trusted_certs = config.get("experiment_control", "trusted_certs") \
	218	or config.get("globals", "trusted_certs")
	219
[6c57fe9]	220	self.repodir = config.get("experiment_control", "repodir")
[cc8d8e9]	221
[866c983]	222	self.exp_stem = "fed-stem"
	223	self.log = logging.getLogger("fedd.experiment_control")
	224	set_log_level(config, "experiment_control", self.log)
	225	self.muxmax = 2
	226	self.nthreads = 2
	227	self.randomize_experiments = False
	228
	229	self.splitter = None
	230	self.ssh_keygen = "/usr/bin/ssh-keygen"
	231	self.ssh_identity_file = None
	232
	233
	234	self.debug = config.getboolean("experiment_control", "create_debug")
[69692a9]	235	self.cleanup = not config.getboolean("experiment_control",
	236	"leave_tmpfiles")
[866c983]	237	self.state_filename = config.get("experiment_control",
	238	"experiment_state")
	239	self.splitter_url = config.get("experiment_control", "splitter_uri")
	240	self.fedkit = parse_tarfile_list(\
	241	config.get("experiment_control", "fedkit"))
	242	self.gatewaykit = parse_tarfile_list(\
	243	config.get("experiment_control", "gatewaykit"))
	244	accessdb_file = config.get("experiment_control", "accessdb")
	245
	246	self.ssh_pubkey_file = config.get("experiment_control",
	247	"ssh_pubkey_file")
	248	self.ssh_privkey_file = config.get("experiment_control",
	249	"ssh_privkey_file")
	250	# NB for internal master/slave ops, not experiment setup
	251	self.ssh_type = config.get("experiment_control", "sshkeytype", "rsa")
[ca489e8]	252
[db6b092]	253	self.overrides = set([])
	254	ovr = config.get('experiment_control', 'overrides')
	255	if ovr:
	256	for o in ovr.split(","):
	257	o = o.strip()
	258	if o.startswith('fedid:'): o = o[len('fedid:'):]
	259	self.overrides.add(fedid(hexstr=o))
[ca489e8]	260
[866c983]	261	self.state = { }
	262	self.state_lock = Lock()
	263	self.tclsh = "/usr/local/bin/otclsh"
	264	self.tcl_splitter = config.get("splitter", "tcl_splitter") or \
	265	config.get("experiment_control", "tcl_splitter",
	266	"/usr/testbed/lib/ns2ir/parse.tcl")
	267	mapdb_file = config.get("experiment_control", "mapdb")
	268	self.trace_file = sys.stderr
	269
	270	self.def_expstart = \
	271	"sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& " +\
	272	"/tmp/federate";
	273	self.def_mexpstart = "sudo -H /usr/local/federation/bin/make_hosts " +\
	274	"FEDDIR/hosts";
	275	self.def_gwstart = \
	276	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF>& " +\
	277	"/tmp/bridge.log";
	278	self.def_mgwstart = \
	279	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF >& " +\
	280	"/tmp/bridge.log";
	281	self.def_gwimage = "FBSD61-TUNNEL2";
	282	self.def_gwtype = "pc";
	283	self.local_access = { }
	284
	285	if auth:
	286	self.auth = auth
	287	else:
	288	self.log.error(\
	289	"[access]: No authorizer initialized, creating local one.")
	290	auth = authorizer()
	291
	292
	293	if self.ssh_pubkey_file:
	294	try:
	295	f = open(self.ssh_pubkey_file, 'r')
	296	self.ssh_pubkey = f.read()
	297	f.close()
	298	except IOError:
	299	raise service_error(service_error.internal,
	300	"Cannot read sshpubkey")
	301	else:
	302	raise service_error(service_error.internal,
	303	"No SSH public key file?")
	304
	305	if not self.ssh_privkey_file:
	306	raise service_error(service_error.internal,
	307	"No SSH public key file?")
	308
	309
	310	if mapdb_file:
	311	self.read_mapdb(mapdb_file)
	312	else:
	313	self.log.warn("[experiment_control] No testbed map, using defaults")
	314	self.tbmap = {
	315	'deter':'https://users.isi.deterlab.net:23235',
	316	'emulab':'https://users.isi.deterlab.net:23236',
	317	'ucb':'https://users.isi.deterlab.net:23237',
	318	}
	319
	320	if accessdb_file:
	321	self.read_accessdb(accessdb_file)
	322	else:
	323	raise service_error(service_error.internal,
	324	"No accessdb specified in config")
	325
	326	# Grab saved state. OK to do this w/o locking because it's read only
	327	# and only one thread should be in existence that can see self.state at
	328	# this point.
	329	if self.state_filename:
	330	self.read_state()
	331
	332	# Dispatch tables
	333	self.soap_services = {\
[e19b75c]	334	'Create': soap_handler('Create', self.create_experiment),
[866c983]	335	'Vtopo': soap_handler('Vtopo', self.get_vtopo),
	336	'Vis': soap_handler('Vis', self.get_vis),
	337	'Info': soap_handler('Info', self.get_info),
[65f3f29]	338	'MultiInfo': soap_handler('MultiInfo', self.get_multi_info),
[866c983]	339	'Terminate': soap_handler('Terminate',
[e19b75c]	340	self.terminate_experiment),
[866c983]	341	}
	342
	343	self.xmlrpc_services = {\
[e19b75c]	344	'Create': xmlrpc_handler('Create', self.create_experiment),
[866c983]	345	'Vtopo': xmlrpc_handler('Vtopo', self.get_vtopo),
	346	'Vis': xmlrpc_handler('Vis', self.get_vis),
	347	'Info': xmlrpc_handler('Info', self.get_info),
[65f3f29]	348	'MultiInfo': xmlrpc_handler('MultiInfo', self.get_multi_info),
[866c983]	349	'Terminate': xmlrpc_handler('Terminate',
[e19b75c]	350	self.terminate_experiment),
[866c983]	351	}
[19cc408]	352
[a97394b]	353	# Call while holding self.state_lock
[eee2b2e]	354	def write_state(self):
[866c983]	355	"""
	356	Write a new copy of experiment state after copying the existing state
	357	to a backup.
	358
	359	State format is a simple pickling of the state dictionary.
	360	"""
	361	if os.access(self.state_filename, os.W_OK):
[40dd8c1]	362	copy_file(self.state_filename, \
	363	"%s.bak" % self.state_filename)
[866c983]	364	try:
	365	f = open(self.state_filename, 'w')
	366	pickle.dump(self.state, f)
	367	except IOError, e:
	368	self.log.error("Can't write file %s: %s" % \
	369	(self.state_filename, e))
	370	except pickle.PicklingError, e:
	371	self.log.error("Pickling problem: %s" % e)
	372	except TypeError, e:
	373	self.log.error("Pickling problem (TypeError): %s" % e)
[eee2b2e]	374
[a97394b]	375	# Call while holding self.state_lock
[eee2b2e]	376	def read_state(self):
[866c983]	377	"""
	378	Read a new copy of experiment state. Old state is overwritten.
	379
	380	State format is a simple pickling of the state dictionary.
	381	"""
[cc8d8e9]	382
	383	def get_experiment_id(state):
	384	"""
	385	Pull the fedid experimentID out of the saved state. This is kind
	386	of a gross walk through the dict.
	387	"""
	388
	389	if state.has_key('experimentID'):
	390	for e in state['experimentID']:
	391	if e.has_key('fedid'):
	392	return e['fedid']
	393	else:
	394	return None
	395	else:
	396	return None
	397
	398	def get_alloc_ids(state):
	399	"""
	400	Pull the fedids of the identifiers of each allocation from the
	401	state. Again, a dict dive that's best isolated.
	402	"""
	403
	404	return [ f['allocID']['fedid']
	405	for f in state.get('federant',[]) \
	406	if f.has_key('allocID') and \
	407	f['allocID'].has_key('fedid')]
	408
	409
[866c983]	410	try:
	411	f = open(self.state_filename, "r")
	412	self.state = pickle.load(f)
	413	self.log.debug("[read_state]: Read state from %s" % \
	414	self.state_filename)
	415	except IOError, e:
	416	self.log.warning("[read_state]: No saved state: Can't open %s: %s"\
	417	% (self.state_filename, e))
	418	except pickle.UnpicklingError, e:
	419	self.log.warning(("[read_state]: No saved state: " + \
	420	"Unpickling failed: %s") % e)
	421
[cc8d8e9]	422	for s in self.state.values():
[866c983]	423	try:
[cc8d8e9]	424
	425	eid = get_experiment_id(s)
	426	if eid :
	427	# Give the owner rights to the experiment
	428	self.auth.set_attribute(s['owner'], eid)
	429	# And holders of the eid as well
	430	self.auth.set_attribute(eid, eid)
[db6b092]	431	# allow overrides to control experiments as well
	432	for o in self.overrides:
	433	self.auth.set_attribute(o, eid)
[cc8d8e9]	434	# Set permissions to allow reading of the software repo, if
	435	# any, as well.
	436	for a in get_alloc_ids(s):
	437	self.auth.set_attribute(a, 'repo/%s' % eid)
	438	else:
	439	raise KeyError("No experiment id")
[866c983]	440	except KeyError, e:
	441	self.log.warning("[read_state]: State ownership or identity " +\
	442	"misformatted in %s: %s" % (self.state_filename, e))
[4064742]	443
	444
	445	def read_accessdb(self, accessdb_file):
[866c983]	446	"""
	447	Read the mapping from fedids that can create experiments to their name
	448	in the 3-level access namespace. All will be asserted from this
	449	testbed and can include the local username and porject that will be
	450	asserted on their behalf by this fedd. Each fedid is also added to the
	451	authorization system with the "create" attribute.
	452	"""
	453	self.accessdb = {}
	454	# These are the regexps for parsing the db
	455	name_expr = "[" + string.ascii_letters + string.digits + "\.\-]+"
	456	project_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
	457	"\s->\(\s("+name_expr+")\s,\s("+name_expr+")\s\)\s$")
	458	user_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
	459	"\s->\s(" + name_expr + ")\s*$")
	460	lineno = 0
	461
	462	# Parse the mappings and store in self.authdb, a dict of
	463	# fedid -> (proj, user)
	464	try:
	465	f = open(accessdb_file, "r")
	466	for line in f:
	467	lineno += 1
	468	line = line.strip()
	469	if len(line) == 0 or line.startswith('#'):
	470	continue
	471	m = project_line.match(line)
	472	if m:
	473	fid = fedid(hexstr=m.group(1))
	474	project, user = m.group(2,3)
	475	if not self.accessdb.has_key(fid):
	476	self.accessdb[fid] = []
	477	self.accessdb[fid].append((project, user))
	478	continue
	479
	480	m = user_line.match(line)
	481	if m:
	482	fid = fedid(hexstr=m.group(1))
	483	project = None
	484	user = m.group(2)
	485	if not self.accessdb.has_key(fid):
	486	self.accessdb[fid] = []
	487	self.accessdb[fid].append((project, user))
	488	continue
	489	self.log.warn("[experiment_control] Error parsing access " +\
	490	"db %s at line %d" % (accessdb_file, lineno))
	491	except IOError:
	492	raise service_error(service_error.internal,
	493	"Error opening/reading %s as experiment " +\
	494	"control accessdb" % accessdb_file)
	495	f.close()
	496
	497	# Initialize the authorization attributes
	498	for fid in self.accessdb.keys():
	499	self.auth.set_attribute(fid, 'create')
[34bc05c]	500
	501	def read_mapdb(self, file):
[866c983]	502	"""
	503	Read a simple colon separated list of mappings for the
	504	label-to-testbed-URL mappings. Clears or creates self.tbmap.
	505	"""
	506
	507	self.tbmap = { }
	508	lineno =0
	509	try:
	510	f = open(file, "r")
	511	for line in f:
	512	lineno += 1
	513	line = line.strip()
	514	if line.startswith('#') or len(line) == 0:
	515	continue
	516	try:
	517	label, url = line.split(':', 1)
	518	self.tbmap[label] = url
	519	except ValueError, e:
	520	self.log.warn("[read_mapdb] Ignored bad line (%d) in " +\
	521	"map db: %s %s" % (lineno, line, e))
	522	except IOError, e:
	523	self.log.warning("[read_mapdb]: No saved map database: Can't " +\
	524	"open %s: %s" % (file, e))
	525	f.close()
	526
[6679c122]	527	def generate_ssh_keys(self, dest, type="rsa" ):
[866c983]	528	"""
	529	Generate a set of keys for the gateways to use to talk.
	530
	531	Keys are of type type and are stored in the required dest file.
	532	"""
	533	valid_types = ("rsa", "dsa")
	534	t = type.lower();
	535	if t not in valid_types: raise ValueError
	536	cmd = [self.ssh_keygen, '-t', t, '-N', '', '-f', dest]
	537
	538	try:
	539	trace = open("/dev/null", "w")
	540	except IOError:
	541	raise service_error(service_error.internal,
	542	"Cannot open /dev/null??");
	543
	544	# May raise CalledProcessError
	545	self.log.debug("[generate_ssh_keys]: %s" % " ".join(cmd))
[4ea1e22]	546	rv = call(cmd, stdout=trace, stderr=trace, close_fds=True)
[866c983]	547	if rv != 0:
	548	raise service_error(service_error.internal,
	549	"Cannot generate nonce ssh keys. %s return code %d" \
	550	% (self.ssh_keygen, rv))
[6679c122]	551
[0d830de]	552	def gentopo(self, str):
[866c983]	553	"""
	554	Generate the topology dtat structure from the splitter's XML
	555	representation of it.
	556
	557	The topology XML looks like:
	558	<experiment>
	559	<nodes>
	560	<node><vname></vname><ips>ip1:ip2</ips></node>
	561	</nodes>
	562	<lans>
	563	<lan>
	564	<vname></vname><vnode></vnode><ip></ip>
	565	<bandwidth></bandwidth><member>node:port</member>
	566	</lan>
	567	</lans>
	568	"""
	569	class topo_parse:
	570	"""
	571	Parse the topology XML and create the dats structure.
	572	"""
	573	def __init__(self):
	574	# Typing of the subelements for data conversion
	575	self.str_subelements = ('vname', 'vnode', 'ips', 'ip', 'member')
	576	self.int_subelements = ( 'bandwidth',)
	577	self.float_subelements = ( 'delay',)
	578	# The final data structure
	579	self.nodes = [ ]
	580	self.lans = [ ]
	581	self.topo = { \
	582	'node': self.nodes,\
	583	'lan' : self.lans,\
	584	}
	585	self.element = { } # Current element being created
	586	self.chars = "" # Last text seen
	587
	588	def end_element(self, name):
	589	# After each sub element the contents is added to the current
	590	# element or to the appropriate list.
	591	if name == 'node':
	592	self.nodes.append(self.element)
	593	self.element = { }
	594	elif name == 'lan':
	595	self.lans.append(self.element)
	596	self.element = { }
	597	elif name in self.str_subelements:
	598	self.element[name] = self.chars
	599	self.chars = ""
	600	elif name in self.int_subelements:
	601	self.element[name] = int(self.chars)
	602	self.chars = ""
	603	elif name in self.float_subelements:
	604	self.element[name] = float(self.chars)
	605	self.chars = ""
	606
	607	def found_chars(self, data):
	608	self.chars += data.rstrip()
	609
	610
	611	tp = topo_parse();
	612	parser = xml.parsers.expat.ParserCreate()
	613	parser.EndElementHandler = tp.end_element
	614	parser.CharacterDataHandler = tp.found_chars
	615
	616	parser.Parse(str)
	617
	618	return tp.topo
	619
[0d830de]	620
	621	def genviz(self, topo):
[866c983]	622	"""
	623	Generate the visualization the virtual topology
	624	"""
	625
	626	neato = "/usr/local/bin/neato"
	627	# These are used to parse neato output and to create the visualization
	628	# file.
	629	vis_re = re.compile('^\s"?([\w\-]+)"?\s+\[.pos="(\d+),(\d+)"')
	630	vis_fmt = "<node><name>%s</name><x>%s</x><y>%s</y><type>" + \
	631	"%s</type></node>"
	632
	633	try:
	634	# Node names
	635	nodes = [ n['vname'] for n in topo['node'] ]
	636	topo_lans = topo['lan']
[cc8d8e9]	637	except KeyError, e:
	638	raise service_error(service_error.internal, "Bad topology: %s" %e)
[866c983]	639
	640	lans = { }
	641	links = { }
	642
	643	# Walk through the virtual topology, organizing the connections into
	644	# 2-node connections (links) and more-than-2-node connections (lans).
	645	# When a lan is created, it's added to the list of nodes (there's a
	646	# node in the visualization for the lan).
	647	for l in topo_lans:
	648	if links.has_key(l['vname']):
	649	if len(links[l['vname']]) < 2:
	650	links[l['vname']].append(l['vnode'])
	651	else:
	652	nodes.append(l['vname'])
	653	lans[l['vname']] = links[l['vname']]
	654	del links[l['vname']]
	655	lans[l['vname']].append(l['vnode'])
	656	elif lans.has_key(l['vname']):
	657	lans[l['vname']].append(l['vnode'])
	658	else:
	659	links[l['vname']] = [ l['vnode'] ]
	660
	661
	662	# Open up a temporary file for dot to turn into a visualization
	663	try:
	664	df, dotname = tempfile.mkstemp()
	665	dotfile = os.fdopen(df, 'w')
	666	except IOError:
	667	raise service_error(service_error.internal,
	668	"Failed to open file in genviz")
	669
[db6b092]	670	try:
	671	dnull = open('/dev/null', 'w')
	672	except IOError:
	673	service_error(service_error.internal,
[886307f]	674	"Failed to open /dev/null in genviz")
	675
[866c983]	676	# Generate a dot/neato input file from the links, nodes and lans
	677	try:
	678	print >>dotfile, "graph G {"
	679	for n in nodes:
	680	print >>dotfile, '\t"%s"' % n
	681	for l in links.keys():
	682	print >>dotfile, '\t"%s" -- "%s"' % tuple(links[l])
	683	for l in lans.keys():
	684	for n in lans[l]:
	685	print >>dotfile, '\t "%s" -- "%s"' % (n,l)
	686	print >>dotfile, "}"
	687	dotfile.close()
	688	except TypeError:
	689	raise service_error(service_error.internal,
	690	"Single endpoint link in vtopo")
	691	except IOError:
	692	raise service_error(service_error.internal, "Cannot write dot file")
	693
	694	# Use dot to create a visualization
	695	dot = Popen([neato, '-Gstart=rand', '-Gepsilon=0.005', '-Gmaxiter=2000',
[886307f]	696	'-Gpack=true', dotname], stdout=PIPE, stderr=dnull,
[db6b092]	697	close_fds=True)
	698	dnull.close()
[866c983]	699
	700	# Translate dot to vis format
	701	vis_nodes = [ ]
	702	vis = { 'node': vis_nodes }
	703	for line in dot.stdout:
	704	m = vis_re.match(line)
	705	if m:
	706	vn = m.group(1)
	707	vis_node = {'name': vn, \
	708	'x': float(m.group(2)),\
	709	'y' : float(m.group(3)),\
	710	}
	711	if vn in links.keys() or vn in lans.keys():
	712	vis_node['type'] = 'lan'
	713	else:
	714	vis_node['type'] = 'node'
	715	vis_nodes.append(vis_node)
	716	rv = dot.wait()
	717
	718	os.remove(dotname)
	719	if rv == 0 : return vis
	720	else: return None
[d0ae12d]	721
[4064742]	722	def get_access(self, tb, nodes, user, tbparam, master, export_project,
[866c983]	723	access_user):
	724	"""
	725	Get access to testbed through fedd and set the parameters for that tb
	726	"""
	727	uri = self.tbmap.get(tb, None)
	728	if not uri:
	729	raise service_error(serice_error.server_config,
	730	"Unknown testbed: %s" % tb)
	731
	732	# currently this lumps all users into one service access group
	733	service_keys = [ a for u in user \
	734	for a in u.get('access', []) \
	735	if a.has_key('sshPubkey')]
	736
	737	if len(service_keys) == 0:
	738	raise service_error(service_error.req,
	739	"Must have at least one SSH pubkey for services")
	740
	741
	742	for p, u in access_user:
	743	self.log.debug(("[get_access] Attempting access from (%s, %s) " + \
	744	"to %s") % ((p or "None"), u, uri))
	745
	746	if p:
	747	# Request with user and project specified
	748	req = {\
	749	'destinationTestbed' : { 'uri' : uri },
	750	'project': {
	751	'name': {'localname': p},
	752	'user': [ {'userID': { 'localname': u } } ],
	753	},
	754	'user': user,
	755	'allocID' : { 'localname': 'test' },
	756	'createAccess' : [ { 'sshPubkey' : self.ssh_pubkey } ],
	757	'serviceAccess' : service_keys
	758	}
	759	else:
	760	# Request with only user specified
	761	req = {\
	762	'destinationTestbed' : { 'uri' : uri },
	763	'user': [ {'userID': { 'localname': u } } ],
	764	'allocID' : { 'localname': 'test' },
	765	'createAccess' : [ { 'sshPubkey' : self.ssh_pubkey } ],
	766	'serviceAccess' : service_keys
	767	}
	768
	769	if tb == master:
	770	# NB, the export_project parameter is a dict that includes
	771	# the type
	772	req['exportProject'] = export_project
	773
	774	# node resources if any
	775	if nodes != None and len(nodes) > 0:
	776	rnodes = [ ]
	777	for n in nodes:
	778	rn = { }
	779	image, hw, count = n.split(":")
	780	if image: rn['image'] = [ image ]
	781	if hw: rn['hardware'] = [ hw ]
	782	if count and int(count) >0 : rn['count'] = int(count)
	783	rnodes.append(rn)
	784	req['resources']= { }
	785	req['resources']['node'] = rnodes
	786
	787	try:
	788	if self.local_access.has_key(uri):
	789	# Local access call
	790	req = { 'RequestAccessRequestBody' : req }
	791	r = self.local_access[uri].RequestAccess(req,
	792	fedid(file=self.cert_file))
	793	r = { 'RequestAccessResponseBody' : r }
	794	else:
	795	r = self.call_RequestAccess(uri, req,
	796	self.cert_file, self.cert_pwd, self.trusted_certs)
	797	except service_error, e:
	798	if e.code == service_error.access:
	799	self.log.debug("[get_access] Access denied")
	800	r = None
	801	continue
	802	else:
	803	raise e
	804
[e19b75c]	805	if r.has_key('RequestAccessResponseBody'):
	806	# Through to here we have a valid response, not a fault.
	807	# Access denied is a fault, so something better or worse than
	808	# access denied has happened.
	809	r = r['RequestAccessResponseBody']
	810	self.log.debug("[get_access] Access granted")
	811	break
	812	else:
	813	raise service_error(service_error.protocol,
	814	"Bad proxy response")
	815
	816	if not r:
	817	raise service_error(service_error.access,
	818	"Access denied by %s (%s)" % (tb, uri))
[db6b092]	819
[69692a9]	820	if r.has_key('emulab'):
	821	e = r['emulab']
	822	p = e['project']
	823	tbparam[tb] = {
	824	"boss": e['boss'],
	825	"host": e['ops'],
	826	"domain": e['domain'],
	827	"fs": e['fileServer'],
	828	"eventserver": e['eventServer'],
	829	"project": unpack_id(p['name']),
	830	"emulab" : e,
	831	"allocID" : r['allocID'],
	832	"uri": uri,
	833	}
	834	# Make the testbed name be the label the user applied
	835	p['testbed'] = {'localname': tb }
	836
	837	for u in p['user']:
	838	role = u.get('role', None)
	839	if role == 'experimentCreation':
	840	tbparam[tb]['user'] = unpack_id(u['userID'])
	841	break
	842	else:
	843	raise service_error(service_error.internal,
	844	"No createExperimentUser from %s" %tb)
	845	# Add attributes to parameter space. We don't allow attributes to
	846	# overlay any parameters already installed.
	847	for a in e['fedAttr']:
	848	try:
	849	if a['attribute'] and \
	850	isinstance(a['attribute'], basestring)\
	851	and not tbparam[tb].has_key(a['attribute'].lower()):
	852	tbparam[tb][a['attribute'].lower()] = a['value']
	853	except KeyError:
	854	self.log.error("Bad attribute in response: %s" % a)
	855	else:
	856	tbparam[tb] = {
	857	"allocID" : r['allocID'],
	858	"uri": uri,
	859	}
[db6b092]	860
[e19b75c]	861
[69692a9]	862	def release_access(self, tb, aid, uri=None):
[e19b75c]	863	"""
	864	Release access to testbed through fedd
	865	"""
[db6b092]	866
[69692a9]	867	if not uri:
	868	uri = self.tbmap.get(tb, None)
[e19b75c]	869	if not uri:
[69692a9]	870	raise service_error(service_error.server_config,
[e19b75c]	871	"Unknown testbed: %s" % tb)
[db6b092]	872
[e19b75c]	873	if self.local_access.has_key(uri):
	874	resp = self.local_access[uri].ReleaseAccess(\
	875	{ 'ReleaseAccessRequestBody' : {'allocID': aid},},
	876	fedid(file=self.cert_file))
	877	resp = { 'ReleaseAccessResponseBody': resp }
	878	else:
	879	resp = self.call_ReleaseAccess(uri, {'allocID': aid},
	880	self.cert_file, self.cert_pwd, self.trusted_certs)
[db6b092]	881
[e19b75c]	882	# better error coding
[db6b092]	883
[e19b75c]	884	def remote_splitter(self, uri, desc, master):
[db6b092]	885
[e19b75c]	886	req = {
	887	'description' : { 'ns2description': desc },
	888	'master': master,
	889	'include_fedkit': bool(self.fedkit),
	890	'include_gatewaykit': bool(self.gatewaykit)
[db6b092]	891	}
	892
[e19b75c]	893	r = self.call_Ns2Split(uri, req, self.cert_file, self.cert_pwd,
	894	self.trusted_certs)
	895
	896	if r.has_key('Ns2SplitResponseBody'):
	897	r = r['Ns2SplitResponseBody']
	898	if r.has_key('output'):
	899	return r['output'].splitlines()
	900	else:
	901	raise service_error(service_error.protocol,
	902	"Bad splitter response (no output)")
	903	else:
	904	raise service_error(service_error.protocol, "Bad splitter response")
[cc8d8e9]	905
[e19b75c]	906	class start_segment:
[fd556d1]	907	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[f07fa49]	908	cert_pwd=None, trusted_certs=None, caller=None,
	909	log_collector=None):
[cc8d8e9]	910	self.log = log
	911	self.debug = debug
	912	self.cert_file = cert_file
	913	self.cert_pwd = cert_pwd
	914	self.trusted_certs = None
	915	self.caller = caller
[fd556d1]	916	self.testbed = testbed
[f07fa49]	917	self.log_collector = log_collector
[69692a9]	918	self.response = None
[cc8d8e9]	919
[ecca6eb]	920	def __call__(self, uri, aid, topo, master, attrs=None):
[cc8d8e9]	921	req = {
	922	'allocID': { 'fedid' : aid },
	923	'segmentdescription': {
	924	'topdldescription': topo.to_dict(),
	925	},
[ecca6eb]	926	'master': master,
[cc8d8e9]	927	}
[6c57fe9]	928	if attrs:
	929	req['fedAttr'] = attrs
[cc8d8e9]	930
[fd556d1]	931	try:
[13e3dd2]	932	self.log.debug("Calling StartSegment at %s " % uri)
[fd556d1]	933	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	934	self.trusted_certs)
[f07fa49]	935	if r.has_key('StartSegmentResponseBody'):
	936	lval = r['StartSegmentResponseBody'].get('allocationLog',
	937	None)
	938	if lval and self.log_collector:
	939	for line in lval.splitlines(True):
	940	self.log_collector.write(line)
[69692a9]	941	self.response = r
[f07fa49]	942	else:
	943	raise service_error(service_error.internal,
	944	"Bad response!?: %s" %r)
[fd556d1]	945	return True
	946	except service_error, e:
	947	self.log.error("Start segment failed on %s: %s" % \
	948	(self.testbed, e))
	949	return False
[cc8d8e9]	950
	951
[5ae3857]	952
[e19b75c]	953	class terminate_segment:
[fd556d1]	954	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[5ae3857]	955	cert_pwd=None, trusted_certs=None, caller=None):
	956	self.log = log
	957	self.debug = debug
	958	self.cert_file = cert_file
	959	self.cert_pwd = cert_pwd
	960	self.trusted_certs = None
	961	self.caller = caller
[fd556d1]	962	self.testbed = testbed
[5ae3857]	963
	964	def __call__(self, uri, aid ):
	965	req = {
	966	'allocID': aid ,
	967	}
[fd556d1]	968	try:
	969	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	970	self.trusted_certs)
	971	return True
	972	except service_error, e:
	973	self.log.error("Terminate segment failed on %s: %s" % \
	974	(self.testbed, e))
	975	return False
[db6b092]	976
	977
[e19b75c]	978	def allocate_resources(self, allocated, master, eid, expid, expcert,
[f07fa49]	979	tbparams, topo, tmpdir, alloc_log=None, log_collector=None,
	980	attrs=None):
[69692a9]	981	def get_vlan(r):
	982	if r.has_key('StartSegmentResponseBody'):
	983	srb = r['StartSegmentResponseBody']
	984	if srb.has_key('fedAttr'):
	985	for k, v in [ (a['attribute'], a['value']) \
	986	for a in srb['fedAttr']]:
	987	if k == 'vlan': return v
	988	return None
	989
[cc8d8e9]	990	started = { } # Testbeds where a sub-experiment started
	991	# successfully
	992
	993	# XXX
	994	fail_soft = False
	995
[69692a9]	996	slaves = [ k for k in allocated.keys() \
	997	if k != master and not topo[k].get_attribute('transit')]
	998	transit = [ k for k in allocated.keys() \
	999	if topo[k].get_attribute('transit')]
	1000
[cc8d8e9]	1001	log = alloc_log or self.log
	1002
	1003	thread_pool = self.thread_pool(self.nthreads)
	1004	threads = [ ]
	1005
[69692a9]	1006	for tb in transit:
	1007	uri = tbparams[tb]['uri']
[cc8d8e9]	1008	if tbparams[tb].has_key('allocID') and \
	1009	tbparams[tb]['allocID'].has_key('fedid'):
	1010	aid = tbparams[tb]['allocID']['fedid']
	1011	else:
	1012	raise service_error(service_error.internal,
	1013	"No alloc id for testbed %s !?" % tb)
	1014
[69692a9]	1015	m = re.search('(\d+)', tb)
	1016	if m:
	1017	to_repl = "unassigned%s" % m.group(1)
	1018	else:
	1019	raise service_error(service_error.internal,
	1020	"Bad dynamic allocation name")
	1021	break
	1022
	1023	ss = self.start_segment(log=log, debug=self.debug,
	1024	testbed=master, cert_file=self.cert_file,
	1025	cert_pwd=self.cert_pwd,
	1026	trusted_certs=self.trusted_certs,
	1027	caller=self.call_StartSegment,
	1028	log_collector=log_collector)
	1029	t = self.pooled_thread(
	1030	target=ss,
	1031	args =(uri, aid, topo[tb], False, attrs),
	1032	name=tb, pdata=thread_pool, trace_file=self.trace_file)
	1033	threads.append(t)
	1034	t.start()
	1035	# Wait until the this transit node finishes (keep pinging the log,
	1036	# though)
	1037
	1038	mins = 0
	1039	while not thread_pool.wait_for_all_done(60.0):
	1040	mins += 1
	1041	alloc_log.info("Waiting for master (it has been %d mins)" \
	1042	% mins)
	1043
	1044	if t.rv:
	1045	vlan = get_vlan(ss.response)
	1046	if vlan is not None:
	1047	for k, t in topo.items():
	1048	for e in t.elements:
	1049	for i in e.interface:
	1050	vl = i.get_attribute('dragon_vlan')
	1051	if vl is not None and vl == to_repl:
	1052	i.set_attribute('dragon_vlan', vlan)
	1053	else:
	1054	break
	1055	thread_pool.clear()
	1056
[cc8d8e9]	1057
[69692a9]	1058	failed = [ t.getName() for t in threads if not t.rv ]
[32e7d93]	1059
[69692a9]	1060	if len(failed) == 0:
	1061	for tb in slaves:
	1062	# Create and start a thread to start the segment, and save it
	1063	# to get the return value later
	1064	thread_pool.wait_for_slot()
	1065	uri = self.tbmap.get(tb, None)
	1066	if not uri:
	1067	raise service_error(service_error.internal,
	1068	"Unknown testbed %s !?" % tb)
	1069
	1070	if tbparams[tb].has_key('allocID') and \
	1071	tbparams[tb]['allocID'].has_key('fedid'):
	1072	aid = tbparams[tb]['allocID']['fedid']
	1073	else:
	1074	raise service_error(service_error.internal,
	1075	"No alloc id for testbed %s !?" % tb)
	1076
	1077	t = self.pooled_thread(\
	1078	target=self.start_segment(log=log, debug=self.debug,
	1079	testbed=tb, cert_file=self.cert_file,
	1080	cert_pwd=self.cert_pwd,
	1081	trusted_certs=self.trusted_certs,
	1082	caller=self.call_StartSegment,
	1083	log_collector=log_collector),
	1084	args=(uri, aid, topo[tb], False, attrs), name=tb,
	1085	pdata=thread_pool, trace_file=self.trace_file)
	1086	threads.append(t)
	1087	t.start()
	1088
	1089	# Wait until all finish (keep pinging the log, though)
	1090	mins = 0
	1091	while not thread_pool.wait_for_all_done(60.0):
	1092	mins += 1
	1093	alloc_log.info("Waiting for sub threads (it has been %d mins)" \
	1094	% mins)
	1095
	1096	thread_pool.clear()
[cc8d8e9]	1097
	1098	# If none failed, start the master
	1099	failed = [ t.getName() for t in threads if not t.rv ]
	1100
	1101	if len(failed) == 0:
	1102	uri = self.tbmap.get(master, None)
	1103	if not uri:
	1104	raise service_error(service_error.internal,
	1105	"Unknown testbed %s !?" % master)
	1106
	1107	if tbparams[master].has_key('allocID') and \
	1108	tbparams[master]['allocID'].has_key('fedid'):
	1109	aid = tbparams[master]['allocID']['fedid']
	1110	else:
	1111	raise service_error(service_error.internal,
	1112	"No alloc id for testbed %s !?" % master)
[32e7d93]	1113	t = self.pooled_thread(
	1114	target=self.start_segment(log=log, debug=self.debug,
	1115	testbed=master, cert_file=self.cert_file,
	1116	cert_pwd=self.cert_pwd,
	1117	trusted_certs=self.trusted_certs,
	1118	caller=self.call_StartSegment,
	1119	log_collector=log_collector),
	1120	args =(uri, aid, topo[master], True, attrs),
	1121	name=master, pdata=thread_pool, trace_file=self.trace_file)
	1122	threads.append(t)
	1123	t.start()
	1124	# Wait until the master finishes (keep pinging the log, though)
	1125	mins = 0
	1126	while not thread_pool.wait_for_all_done(60.0):
	1127	mins += 1
	1128	alloc_log.info("Waiting for master (it has been %d mins)" \
	1129	% mins)
	1130	# update failed to include the master, if it failed
	1131	failed = [ t.getName() for t in threads if not t.rv ]
[cc8d8e9]	1132
	1133	succeeded = [tb for tb in allocated.keys() if tb not in failed]
	1134	# If one failed clean up, unless fail_soft is set
[32e7d93]	1135	if failed:
[cc8d8e9]	1136	if not fail_soft:
	1137	thread_pool.clear()
	1138	for tb in succeeded:
	1139	# Create and start a thread to stop the segment
	1140	thread_pool.wait_for_slot()
[0fa1729]	1141	uri = tbparams[tb]['uri']
[cc8d8e9]	1142	t = self.pooled_thread(\
[32e7d93]	1143	target=self.terminate_segment(log=log,
[fd556d1]	1144	testbed=tb,
[32e7d93]	1145	cert_file=self.cert_file,
	1146	cert_pwd=self.cert_pwd,
	1147	trusted_certs=self.trusted_certs,
	1148	caller=self.call_TerminateSegment),
	1149	args=(uri, tbparams[tb]['federant']['allocID']),
	1150	name=tb,
[cc8d8e9]	1151	pdata=thread_pool, trace_file=self.trace_file)
	1152	t.start()
	1153	# Wait until all finish
	1154	thread_pool.wait_for_all_done()
	1155
	1156	# release the allocations
	1157	for tb in tbparams.keys():
[69692a9]	1158	self.release_access(tb, tbparams[tb]['allocID'],
	1159	tbparams[tb].get('uri', None))
[cc8d8e9]	1160	# Remove the placeholder
	1161	self.state_lock.acquire()
	1162	self.state[eid]['experimentStatus'] = 'failed'
	1163	if self.state_filename: self.write_state()
	1164	self.state_lock.release()
	1165
	1166	log.error("Swap in failed on %s" % ",".join(failed))
	1167	return
	1168	else:
	1169	log.info("[start_segment]: Experiment %s active" % eid)
	1170
	1171
	1172	# Walk up tmpdir, deleting as we go
[69692a9]	1173	if self.cleanup:
	1174	log.debug("[start_experiment]: removing %s" % tmpdir)
	1175	for path, dirs, files in os.walk(tmpdir, topdown=False):
	1176	for f in files:
	1177	os.remove(os.path.join(path, f))
	1178	for d in dirs:
	1179	os.rmdir(os.path.join(path, d))
	1180	os.rmdir(tmpdir)
	1181	else:
	1182	log.debug("[start_experiment]: not removing %s" % tmpdir)
[cc8d8e9]	1183
	1184	# Insert the experiment into our state and update the disk copy
	1185	self.state_lock.acquire()
	1186	self.state[expid]['experimentStatus'] = 'active'
	1187	self.state[eid] = self.state[expid]
	1188	if self.state_filename: self.write_state()
	1189	self.state_lock.release()
	1190	return
	1191
	1192
[895a133]	1193	def add_kit(self, e, kit):
	1194	"""
	1195	Add a Software object created from the list of (install, location)
	1196	tuples passed as kit to the software attribute of an object e. We
	1197	do this enough to break out the code, but it's kind of a hack to
	1198	avoid changing the old tuple rep.
	1199	"""
	1200
	1201	s = [ topdl.Software(install=i, location=l) for i, l in kit]
	1202
	1203	if isinstance(e.software, list): e.software.extend(s)
	1204	else: e.software = s
	1205
	1206
	1207	def create_experiment_state(self, fid, req, expid, expcert):
	1208	"""
	1209	Create the initial entry in the experiment's state. The expid and
	1210	expcert are the experiment's fedid and certifacte that represents that
	1211	ID, which are installed in the experiment state. If the request
	1212	includes a suggested local name that is used if possible. If the local
	1213	name is already taken by an experiment owned by this user that has
	1214	failed, it is overwriutten. Otherwise new letters are added until a
	1215	valid localname is found. The generated local name is returned.
	1216	"""
	1217
	1218	if req.has_key('experimentID') and \
	1219	req['experimentID'].has_key('localname'):
	1220	overwrite = False
	1221	eid = req['experimentID']['localname']
	1222	# If there's an old failed experiment here with the same local name
	1223	# and accessible by this user, we'll overwrite it, otherwise we'll
	1224	# fall through and do the collision avoidance.
	1225	old_expid = self.get_experiment_fedid(eid)
	1226	if old_expid and self.check_experiment_access(fid, old_expid):
	1227	self.state_lock.acquire()
	1228	status = self.state[eid].get('experimentStatus', None)
	1229	if status and status == 'failed':
	1230	# remove the old access attribute
	1231	self.auth.unset_attribute(fid, old_expid)
	1232	overwrite = True
	1233	del self.state[eid]
	1234	del self.state[old_expid]
	1235	self.state_lock.release()
	1236	self.state_lock.acquire()
	1237	while (self.state.has_key(eid) and not overwrite):
	1238	eid += random.choice(string.ascii_letters)
	1239	# Initial state
	1240	self.state[eid] = {
	1241	'experimentID' : \
	1242	[ { 'localname' : eid }, {'fedid': expid } ],
	1243	'experimentStatus': 'starting',
	1244	'experimentAccess': { 'X509' : expcert },
	1245	'owner': fid,
	1246	'log' : [],
	1247	}
	1248	self.state[expid] = self.state[eid]
	1249	if self.state_filename: self.write_state()
	1250	self.state_lock.release()
	1251	else:
	1252	eid = self.exp_stem
	1253	for i in range(0,5):
	1254	eid += random.choice(string.ascii_letters)
	1255	self.state_lock.acquire()
	1256	while (self.state.has_key(eid)):
	1257	eid = self.exp_stem
	1258	for i in range(0,5):
	1259	eid += random.choice(string.ascii_letters)
	1260	# Initial state
	1261	self.state[eid] = {
	1262	'experimentID' : \
	1263	[ { 'localname' : eid }, {'fedid': expid } ],
	1264	'experimentStatus': 'starting',
	1265	'experimentAccess': { 'X509' : expcert },
	1266	'owner': fid,
	1267	'log' : [],
	1268	}
	1269	self.state[expid] = self.state[eid]
	1270	if self.state_filename: self.write_state()
	1271	self.state_lock.release()
	1272
	1273	return eid
	1274
	1275
	1276	def allocate_ips_to_topo(self, top):
	1277	"""
[69692a9]	1278	Add an ip4_address attribute to all the hosts in the topology, based on
[895a133]	1279	the shared substrates on which they sit. An /etc/hosts file is also
[69692a9]	1280	created and returned as a list of hostfiles entries. We also return
	1281	the allocator, because we may need to allocate IPs to portals
	1282	(specifically DRAGON portals).
[895a133]	1283	"""
	1284	subs = sorted(top.substrates,
	1285	cmp=lambda x,y: cmp(len(x.interfaces), len(y.interfaces)),
	1286	reverse=True)
	1287	ips = ip_allocator(int(ip_addr("10.0.0.0")), 2 **24)
	1288	ifs = { }
	1289	hosts = [ ]
	1290
	1291	for idx, s in enumerate(subs):
	1292	a = ips.allocate(len(s.interfaces)+2)
	1293	if a :
	1294	base, num = a
	1295	if num < len(s.interfaces) +2 :
	1296	raise service_error(service_error.internal,
	1297	"Allocator returned wrong number of IPs??")
	1298	else:
	1299	raise service_error(service_error.req,
	1300	"Cannot allocate IP addresses")
	1301
	1302	base += 1
	1303	for i in s.interfaces:
	1304	i.attribute.append(
	1305	topdl.Attribute('ip4_address',
	1306	"%s" % ip_addr(base)))
	1307	hname = i.element.name[0]
	1308	if ifs.has_key(hname):
	1309	hosts.append("%s\t%s-%s %s-%d" % \
	1310	(ip_addr(base), hname, s.name, hname,
	1311	ifs[hname]))
	1312	else:
	1313	ifs[hname] = 0
	1314	hosts.append("%s\t%s-%s %s-%d %s" % \
	1315	(ip_addr(base), hname, s.name, hname,
	1316	ifs[hname], hname))
	1317
	1318	ifs[hname] += 1
	1319	base += 1
[69692a9]	1320	return hosts, ips
[895a133]	1321
	1322	def get_access_to_testbeds(self, testbeds, user, access_user,
	1323	export_project, master, allocated, tbparams):
	1324	"""
	1325	Request access to the various testbeds required for this instantiation
	1326	(passed in as testbeds). User, access_user, expoert_project and master
	1327	are used to construct the correct requests. Per-testbed parameters are
	1328	returned in tbparams.
	1329	"""
	1330	for tb in testbeds:
	1331	self.get_access(tb, None, user, tbparams, master,
	1332	export_project, access_user)
	1333	allocated[tb] = 1
	1334
	1335	def split_topology(self, top, topo, testbeds, eid, master, tbparams):
	1336	"""
	1337	Create the sub-topologies that are needed for experimetn instantiation.
	1338	Along the way attach startup commands to the computers in the
	1339	subtopologies.
	1340	"""
	1341	for tb in testbeds:
	1342	topo[tb] = top.clone()
	1343	to_delete = [ ]
	1344	for e in topo[tb].elements:
	1345	etb = e.get_attribute('testbed')
	1346	if etb and etb != tb:
	1347	for i in e.interface:
	1348	for s in i.subs:
	1349	try:
	1350	s.interfaces.remove(i)
	1351	except ValueError:
	1352	raise service_error(service_error.internal,
	1353	"Can't remove interface??")
	1354	to_delete.append(e)
	1355	for e in to_delete:
	1356	topo[tb].elements.remove(e)
	1357	topo[tb].make_indices()
	1358
	1359	for e in [ e for e in topo[tb].elements \
	1360	if isinstance(e,topdl.Computer)]:
	1361	if tb == master:
	1362	cmd = 'sudo -H /usr/local/federation/bin/make_hosts /proj/%s/exp/%s/tmp/hosts >& /tmp/federate' % (tbparams[tb].get('project', 'project'), eid)
	1363	else:
	1364	cmd = "sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& /tmp/federate"
	1365	scmd = e.get_attribute('startup')
	1366	if scmd:
	1367	cmd = "%s \\$USER '%s'" % (cmd, scmd)
	1368
	1369	e.set_attribute('startup', cmd)
	1370	if self.fedkit: self.add_kit(e, self.fedkit)
	1371
[13e3dd2]	1372	def new_portal_node(self, st, dt, tbparams, master, eid, myname, desthost,
[69692a9]	1373	portal_type, iface_desc=()):
[13e3dd2]	1374	sproject = tbparams[st].get('project', 'project')
	1375	dproject = tbparams[dt].get('project', 'project')
	1376	mproject = tbparams[master].get('project', 'project')
	1377	sdomain = tbparams[st].get('domain', ".example.com")
	1378	ddomain = tbparams[dt].get('domain', ".example.com")
	1379	mdomain = tbparams[master].get('domain', '.example.com')
	1380	muser = tbparams[master].get('user', 'root')
	1381	smbshare = tbparams[master].get('smbshare', 'USERS')
	1382	aid = tbparams[dt]['allocID']['fedid']
	1383	if st == master or dt == master:
	1384	active = ("%s" % (st == master))
	1385	else:
	1386	active = ("%s" %(st > dt))
	1387
[69692a9]	1388	ifaces = [ ]
	1389	for sub, attrs in iface_desc:
	1390	inf = topdl.Interface(
	1391	substrate=sub,
	1392	attribute=[
	1393	topdl.Attribute(
	1394	attribute=n,
	1395	value = v)
	1396	for n, v in attrs
[13e3dd2]	1397	]
[69692a9]	1398	)
	1399	ifaces.append(inf)
[13e3dd2]	1400	return topdl.Computer(
	1401	name=myname,
	1402	attribute=[
	1403	topdl.Attribute(attribute=n,value=v)
	1404	for n, v in (\
	1405	('portal', 'true'),
	1406	('domain', sdomain),
	1407	('masterdomain', mdomain),
	1408	('masterexperiment', "%s/%s" % \
	1409	(mproject, eid)),
	1410	('masteruser', muser),
	1411	('smbshare', smbshare),
	1412	('experiment', "%s/%s" % \
	1413	(sproject, eid)),
	1414	('peer', "%s" % desthost),
	1415	('peer_segment', "%s" % aid),
	1416	('scriptdir',
	1417	"/usr/local/federation/bin"),
	1418	('active', "%s" % active),
	1419	('portal_type', portal_type),
[69692a9]	1420	('startup', 'sudo -H /usr/local/federation/bin/fed-tun.pl >& /tmp/bridge.log'))
[13e3dd2]	1421	],
	1422	interface=ifaces,
	1423	)
	1424
	1425	def new_portal_substrate(self, st, dt, eid, tbparams):
	1426	ddomain = tbparams[dt].get('domain', ".example.com")
	1427	dproject = tbparams[dt].get('project', 'project')
	1428	tsubstrate = \
	1429	topdl.Substrate(name='%s-%s' % (st, dt),
	1430	attribute= [
	1431	topdl.Attribute(
	1432	attribute='portal',
	1433	value='true')
	1434	]
	1435	)
	1436	segment_element = topdl.Segment(
	1437	id= tbparams[dt]['allocID'],
	1438	type='emulab',
	1439	uri = self.tbmap.get(dt, None),
	1440	interface=[
	1441	topdl.Interface(
	1442	substrate=tsubstrate.name),
	1443	],
	1444	attribute = [
	1445	topdl.Attribute(attribute=n, value=v)
	1446	for n, v in (\
	1447	('domain', ddomain),
	1448	('experiment', "%s/%s" % \
	1449	(dproject, eid)),)
	1450	],
	1451	)
	1452
	1453	return (tsubstrate, segment_element)
	1454
[69692a9]	1455	def new_dragon_topo(self, idx, sub, topo, tbs, tbparams):
	1456	if sub.capacity is None:
	1457	raise service_error(service_error.internal,
	1458	"Cannot DRAGON split substrate w/o capacity")
	1459	segs = [ ]
	1460	substr = topdl.Substrate(name="dragon%d" % idx,
	1461	capacity=sub.capacity.clone(),
	1462	attribute=[ topdl.Attribute(attribute=n, value=v)
	1463	for n, v, in (\
	1464	('vlan', 'unassigned%d' % idx),)])
	1465	for tb in tbs.keys():
	1466	seg = topdl.Segment(
	1467	id = tbparams[tb]['allocID'],
	1468	type='emulab',
	1469	uri = self.tbmap.get(tb, None),
	1470	interface=[
	1471	topdl.Interface(
	1472	substrate=substr.name),
	1473	],
	1474	attribute=[ topdl.Attribute(
[ecf679e]	1475	attribute='dragon_endpoint',
	1476	value=tbparams[tb]['dragon']),
[69692a9]	1477	]
	1478	)
	1479	if tbparams[tb].has_key('vlans'):
	1480	seg.set_attribute('vlans', tbparams[tb]['vlans'])
	1481	segs.append(seg)
	1482
	1483	topo["dragon%d" %idx] = \
	1484	topdl.Topology(substrates=[substr], elements=segs,
	1485	attribute=[
	1486	topdl.Attribute(attribute="transit", value='true'),
	1487	topdl.Attribute(attribute="dynamic", value='true'),
	1488	topdl.Attribute(attribute="testbed", value='dragon'),
	1489	]
	1490	)
	1491
	1492	def create_dragon_substrate(self, sub, topo, tbs, tbparams, master, eid):
	1493	"""
	1494	Add attribiutes to the various elements indicating that they are to be
	1495	dragon connected and create a dragon segment in tops to be
	1496	instantiated.
	1497	"""
	1498
	1499	def get_substrate_from_topo(name, t):
	1500	for s in t.substrates:
	1501	if s.name == name: return s
	1502	else: return None
	1503
	1504	dn = len([x for x in topo.keys() if x.startswith('dragon')])
	1505	elements = [ i.element for i in sub.interfaces ]
	1506	count = { }
	1507	for e in elements:
	1508	tb = e.get_attribute('testbed')
	1509	count[tb] = count.get(tb, 0) + 1
	1510
	1511	for tb in tbs.keys():
	1512	s = get_substrate_from_topo(sub.name, topo[tb])
	1513	if s:
	1514	for i in s.interfaces:
	1515	i.set_attribute('dragon_vlan', 'unassigned%d' % dn)
	1516	if count[tb] > 1: i.set_attribute('dragon_type', 'lan')
	1517	else: i.set_attribute('dragon_type', 'link')
	1518	else:
	1519	raise service_error(service_error.internal,
	1520	"No substrate %s in testbed %s" % (sub.name, tb))
	1521
	1522	self.new_dragon_topo(dn, sub, topo, tbs, tbparams)
	1523
	1524	def insert_internet_portals(self, sub, topo, tbs, tbparams, master, eid,
	1525	segment_substrate, portals):
	1526	# More than one testbed is on this substrate. Insert
	1527	# some portals into the subtopologies. st == source testbed,
	1528	# dt == destination testbed.
	1529	for st in tbs.keys():
	1530	if not segment_substrate.has_key(st):
	1531	segment_substrate[st] = { }
	1532	if not portals.has_key(st):
	1533	portals[st] = { }
	1534	for dt in [ t for t in tbs.keys() if t != st]:
	1535	sproject = tbparams[st].get('project', 'project')
	1536	dproject = tbparams[dt].get('project', 'project')
	1537	mproject = tbparams[master].get('project', 'project')
	1538	sdomain = tbparams[st].get('domain', ".example.com")
	1539	ddomain = tbparams[dt].get('domain', ".example.com")
	1540	mdomain = tbparams[master].get('domain', '.example.com')
	1541	muser = tbparams[master].get('user', 'root')
	1542	smbshare = tbparams[master].get('smbshare', 'USERS')
	1543	aid = tbparams[dt]['allocID']['fedid']
	1544	if st == master or dt == master:
	1545	active = ("%s" % (st == master))
	1546	else:
	1547	active = ("%s" %(st > dt))
	1548	if not segment_substrate[st].has_key(dt):
	1549	# Put a substrate and a segment for the connected
	1550	# testbed in there.
	1551	tsubstrate, segment_element = \
	1552	self.new_portal_substrate(st, dt, eid, tbparams)
	1553	segment_substrate[st][dt] = tsubstrate
	1554	topo[st].substrates.append(tsubstrate)
	1555	topo[st].elements.append(segment_element)
	1556
	1557	new_portal = False
	1558	if portals[st].has_key(dt):
	1559	# There's a portal set up to go to this destination.
	1560	# See if there's room to multiples this connection on
	1561	# it. If so, add an interface to the portal; if not,
	1562	# set up to add a portal below.
	1563	# [This little festival of braces is just a pop of the
	1564	# last element in the list of portals between st and
	1565	# dt.]
	1566	portal = portals[st][dt][-1]
	1567	mux = len([ i for i in portal.interface \
	1568	if not i.get_attribute('portal')])
	1569	if mux == self.muxmax:
	1570	new_portal = True
	1571	portal_type = "experiment"
	1572	myname = "%stunnel%d" % (dt, len(portals[st][dt]))
	1573	desthost = "%stunnel%d" % (st, len(portals[st][dt]))
	1574	else:
	1575	new_i = topdl.Interface(
	1576	substrate=s.name,
	1577	attribute=[
	1578	topdl.Attribute(
	1579	attribute='ip4_address',
	1580	value=tbs[dt]
	1581	)
	1582	])
	1583	portal.interface.append(new_i)
	1584	else:
	1585	# First connection to this testbed, make an empty list
	1586	# and set up to add the new portal below
	1587	new_portal = True
	1588	portals[st][dt] = [ ]
	1589	myname = "%stunnel%d" % (dt, len(portals[st][dt]))
	1590	desthost = "%stunnel%d" % (st, len(portals[st][dt]))
	1591
	1592	if dt == master or st == master: portal_type = "both"
	1593	else: portal_type = "experiment"
	1594
	1595	if new_portal:
	1596	infs = (
	1597	(segment_substrate[st][dt].name,
	1598	(('portal', 'true'),)),
	1599	(sub.name,
	1600	(('ip4_address', tbs[dt]),))
	1601	)
	1602	portal = self.new_portal_node(st, dt, tbparams,
	1603	master, eid, myname, desthost, portal_type,
	1604	infs)
	1605	if self.fedkit:
	1606	self.add_kit(portal, self.fedkit)
	1607	if self.gatewaykit:
	1608	self.add_kit(portal, self.gatewaykit)
	1609
	1610	topo[st].elements.append(portal)
	1611	portals[st][dt].append(portal)
	1612
	1613	def add_control_portal(self, st, dt, master, eid, topo, tbparams):
	1614	# Add to the master testbed
	1615	tsubstrate, segment_element = \
	1616	self.new_portal_substrate(st, dt, eid, tbparams)
	1617	myname = "%stunnel" % dt
	1618	desthost = "%stunnel" % st
	1619
	1620	portal = self.new_portal_node(st, dt, tbparams, master,
	1621	eid, myname, desthost, "control",
	1622	((tsubstrate.name,(('portal','true'),)),))
	1623	if self.fedkit:
	1624	self.add_kit(portal, self.fedkit)
	1625	if self.gatewaykit:
	1626	self.add_kit(portal, self.gatewaykit)
	1627
	1628	topo[st].substrates.append(tsubstrate)
	1629	topo[st].elements.append(segment_element)
	1630	topo[st].elements.append(portal)
	1631
	1632	def new_dragon_portal(self, st, dt, master, eid, dip, idx,
	1633	substrate, tbparams):
	1634	# Add to the master testbed
	1635	myname = "%stunnel" % dt
	1636	desthost = "%s" % ip_addr(dip)
	1637
	1638	portal = self.new_portal_node(st, dt, tbparams, master,
	1639	eid, myname, desthost, "control",
	1640	((substrate.name,(
	1641	('portal','true'),
	1642	('ip4_address', "%s" % ip_addr(dip)),
	1643	('dragon_vlan', 'unassigned%d' % idx),
	1644	('dragon_type', 'link'),)),))
	1645	if self.fedkit:
	1646	self.add_kit(portal, self.fedkit)
	1647	if self.gatewaykit:
	1648	self.add_kit(portal, self.gatewaykit)
	1649
	1650	return portal
	1651
	1652	def add_portals(self, top, topo, eid, master, tbparams, ip_allocator):
[895a133]	1653	"""
	1654	For each substrate in the main topology, find those that
	1655	have nodes on more than one testbed. Insert portal nodes
	1656	into the copies of those substrates on the sub topologies.
	1657	"""
[13e3dd2]	1658	segment_substrate = { }
	1659	portals = { }
[895a133]	1660	for s in top.substrates:
	1661	# tbs will contain an ip address on this subsrate that is in
	1662	# each testbed.
	1663	tbs = { }
	1664	for i in s.interfaces:
	1665	e = i.element
	1666	tb = e.get_attribute('testbed')
	1667	if tb and not tbs.has_key(tb):
	1668	for i in e.interface:
	1669	if s in i.subs:
	1670	tbs[tb]= i.get_attribute('ip4_address')
	1671	if len(tbs) < 2:
	1672	continue
	1673
[69692a9]	1674	# DRAGON will not create multi-site vlans yet
	1675	if len(tbs) == 2 and \
	1676	all([tbparams[x].has_key('dragon') for x in tbs]):
	1677	self.create_dragon_substrate(s, topo, tbs, tbparams,
	1678	master, eid)
	1679	else:
	1680	self.insert_internet_portals(s, topo, tbs, tbparams, master,
	1681	eid, segment_substrate, portals)
[13e3dd2]	1682
	1683	# Make sure that all the slaves have a control portal back to the
	1684	# master.
	1685	for tb in [ t for t in tbparams.keys() if t != master ]:
	1686	if len([e for e in topo[tb].elements \
	1687	if isinstance(e, topdl.Computer) and \
	1688	e.get_attribute('portal') and \
	1689	e.get_attribute('portal_type') == 'both']) == 0:
	1690
[69692a9]	1691	if tbparams[master].has_key('dragon') \
	1692	and tbparams[tb].has_key('dragon'):
	1693
	1694	idx = len([x for x in topo.keys() \
	1695	if x.startswith('dragon')])
	1696	dip, leng = ip_allocator.allocate(4)
	1697	dip += 1
	1698	csub = topdl.Substrate(
	1699	name="dragon-control-%s" % tb,
	1700	capacity=topdl.Capacity(100000.0, 'max'),
	1701	attribute=[
	1702	topdl.Attribute(
	1703	attribute='portal',
	1704	value='true'
	1705	)
	1706	]
	1707	)
	1708	seg = topdl.Segment(
	1709	id= tbparams[master]['allocID'],
	1710	type='emulab',
	1711	uri = self.tbmap.get(master, None),
	1712	interface=[
	1713	topdl.Interface(
	1714	substrate=csub.name),
	1715	],
	1716	attribute = [
	1717	topdl.Attribute(attribute=n, value=v)
	1718	for n, v in (\
	1719	('domain',
	1720	tbparams[master].get('domain',
	1721	".example.com")),
	1722	('experiment', "%s/%s" % \
	1723	(tbparams[master].get(
	1724	'project',
	1725	'project'),
	1726	eid)),)
	1727	],
	1728	)
	1729	topo[tb].substrates.append(csub)
	1730	topo[tb].elements.append(
	1731	self.new_dragon_portal(tb, master, master, eid,
	1732	dip, idx, csub, tbparams))
	1733	topo[tb].elements.append(seg)
	1734
	1735	dip+=1
	1736	mcsub = csub.clone()
	1737	seg = topdl.Segment(
	1738	id= tbparams[tb]['allocID'],
	1739	type='emulab',
	1740	uri = self.tbmap.get(tb, None),
	1741	interface=[
	1742	topdl.Interface(
	1743	substrate=csub.name),
	1744	],
	1745	attribute = [
	1746	topdl.Attribute(attribute=n, value=v)
	1747	for n, v in (\
	1748	('domain',
	1749	tbparams[tb].get('domain',
	1750	".example.com")),
	1751	('experiment', "%s/%s" % \
	1752	(tbparams[tb].get('project',
	1753	'project'),
	1754	eid)),)
	1755	],
	1756	)
	1757	topo[master].substrates.append(mcsub)
	1758	topo[master].elements.append(
	1759	self.new_dragon_portal(master, tb, master, eid,
	1760	dip, idx, mcsub, tbparams))
	1761	topo[master].elements.append(seg)
	1762
	1763	self.create_dragon_substrate(csub, topo,
	1764	{tb: 1, master:1}, tbparams, master, eid)
	1765	else:
	1766	self.add_control_portal(master, tb, master, eid, topo,
	1767	tbparams)
	1768	self.add_control_portal(tb, master, master, eid, topo,
	1769	tbparams)
[13e3dd2]	1770
	1771	# Connect the portal nodes into the topologies and clear out
[895a133]	1772	# substrates that are not in the topologies
	1773	for tb in tbparams.keys():
	1774	topo[tb].incorporate_elements()
	1775	topo[tb].substrates = \
	1776	[s for s in topo[tb].substrates \
	1777	if len(s.interfaces) >0]
	1778
	1779	def wrangle_software(self, expid, top, topo, tbparams):
	1780	"""
	1781	Copy software out to the repository directory, allocate permissions and
	1782	rewrite the segment topologies to look for the software in local
	1783	places.
	1784	"""
	1785
	1786	# Copy the rpms and tarfiles to a distribution directory from
	1787	# which the federants can retrieve them
	1788	linkpath = "%s/software" % expid
	1789	softdir ="%s/%s" % ( self.repodir, linkpath)
	1790	softmap = { }
	1791	# These are in a list of tuples format (each kit). This comprehension
	1792	# unwraps them into a single list of tuples that initilaizes the set of
	1793	# tuples.
	1794	pkgs = set([ t for l in [self.fedkit, self.gatewaykit] \
	1795	for p, t in l ])
	1796	pkgs.update([x.location for e in top.elements \
	1797	for x in e.software])
	1798	try:
	1799	os.makedirs(softdir)
	1800	except IOError, e:
	1801	raise service_error(
	1802	"Cannot create software directory: %s" % e)
	1803	# The actual copying. Everything's converted into a url for copying.
	1804	for pkg in pkgs:
	1805	loc = pkg
	1806
	1807	scheme, host, path = urlparse(loc)[0:3]
	1808	dest = os.path.basename(path)
	1809	if not scheme:
	1810	if not loc.startswith('/'):
	1811	loc = "/%s" % loc
	1812	loc = "file://%s" %loc
	1813	try:
	1814	u = urlopen(loc)
	1815	except Exception, e:
	1816	raise service_error(service_error.req,
	1817	"Cannot open %s: %s" % (loc, e))
	1818	try:
	1819	f = open("%s/%s" % (softdir, dest) , "w")
	1820	self.log.debug("Writing %s/%s" % (softdir,dest) )
	1821	data = u.read(4096)
	1822	while data:
	1823	f.write(data)
	1824	data = u.read(4096)
	1825	f.close()
	1826	u.close()
	1827	except Exception, e:
	1828	raise service_error(service_error.internal,
	1829	"Could not copy %s: %s" % (loc, e))
	1830	path = re.sub("/tmp", "", linkpath)
	1831	# XXX
	1832	softmap[pkg] = \
	1833	"https://users.isi.deterlab.net:23232/%s/%s" %\
	1834	( path, dest)
	1835
	1836	# Allow the individual segments to access the software.
	1837	for tb in tbparams.keys():
	1838	self.auth.set_attribute(tbparams[tb]['allocID']['fedid'],
	1839	"/%s/%s" % ( path, dest))
	1840
	1841	# Convert the software locations in the segments into the local
	1842	# copies on this host
	1843	for soft in [ s for tb in topo.values() \
	1844	for e in tb.elements \
	1845	if getattr(e, 'software', False) \
	1846	for s in e.software ]:
	1847	if softmap.has_key(soft.location):
	1848	soft.location = softmap[soft.location]
	1849
	1850
[e19b75c]	1851	def create_experiment(self, req, fid):
[db6b092]	1852	"""
	1853	The external interface to experiment creation called from the
	1854	dispatcher.
	1855
	1856	Creates a working directory, splits the incoming description using the
	1857	splitter script and parses out the avrious subsections using the
	1858	lcasses above. Once each sub-experiment is created, use pooled threads
	1859	to instantiate them and start it all up.
	1860	"""
	1861	if not self.auth.check_attribute(fid, 'create'):
	1862	raise service_error(service_error.access, "Create access denied")
	1863
	1864	try:
	1865	tmpdir = tempfile.mkdtemp(prefix="split-")
[895a133]	1866	os.mkdir(tmpdir+"/keys")
[db6b092]	1867	except IOError:
	1868	raise service_error(service_error.internal, "Cannot create tmp dir")
	1869
	1870	gw_pubkey_base = "fed.%s.pub" % self.ssh_type
	1871	gw_secretkey_base = "fed.%s" % self.ssh_type
	1872	gw_pubkey = tmpdir + "/keys/" + gw_pubkey_base
	1873	gw_secretkey = tmpdir + "/keys/" + gw_secretkey_base
	1874	tclfile = tmpdir + "/experiment.tcl"
	1875	tbparams = { }
	1876	try:
	1877	access_user = self.accessdb[fid]
	1878	except KeyError:
	1879	raise service_error(service_error.internal,
	1880	"Access map and authorizer out of sync in " + \
	1881	"create_experiment for fedid %s" % fid)
	1882
	1883	pid = "dummy"
	1884	gid = "dummy"
	1885
	1886	req = req.get('CreateRequestBody', None)
	1887	if not req:
	1888	raise service_error(service_error.req,
	1889	"Bad request format (no CreateRequestBody)")
	1890	# The tcl parser needs to read a file so put the content into that file
	1891	descr=req.get('experimentdescription', None)
	1892	if descr:
	1893	file_content=descr.get('ns2description', None)
	1894	if file_content:
	1895	try:
	1896	f = open(tclfile, 'w')
	1897	f.write(file_content)
	1898	f.close()
	1899	except IOError:
	1900	raise service_error(service_error.internal,
	1901	"Cannot write temp experiment description")
	1902	else:
	1903	raise service_error(service_error.req,
	1904	"Only ns2descriptions supported")
	1905	else:
	1906	raise service_error(service_error.req, "No experiment description")
	1907
	1908	# Generate an ID for the experiment (slice) and a certificate that the
	1909	# allocator can use to prove they own it. We'll ship it back through
	1910	# the encrypted connection.
	1911	(expid, expcert) = generate_fedid("test", dir=tmpdir, log=self.log)
	1912
[895a133]	1913	eid = self.create_experiment_state(fid, req, expid, expcert)
[db6b092]	1914	try:
	1915	# This catches exceptions to clear the placeholder if necessary
	1916	try:
	1917	self.generate_ssh_keys(gw_secretkey, self.ssh_type)
	1918	except ValueError:
	1919	raise service_error(service_error.server_config,
	1920	"Bad key type (%s)" % self.ssh_type)
	1921
	1922	user = req.get('user', None)
	1923	if user == None:
	1924	raise service_error(service_error.req, "No user")
	1925
	1926	master = req.get('master', None)
	1927	if not master:
	1928	raise service_error(service_error.req,
	1929	"No master testbed label")
	1930	export_project = req.get('exportProject', None)
	1931	if not export_project:
	1932	raise service_error(service_error.req, "No export project")
[895a133]	1933
	1934	# Translate to topdl
[db6b092]	1935	if self.splitter_url:
[895a133]	1936	# XXX: need remote topdl translator
[db6b092]	1937	self.log.debug("Calling remote splitter at %s" % \
	1938	self.splitter_url)
	1939	split_data = self.remote_splitter(self.splitter_url,
	1940	file_content, master)
	1941	else:
	1942	tclcmd = [self.tclsh, self.tcl_splitter, '-t', '-x',
	1943	str(self.muxmax), '-m', master]
	1944
	1945	if self.fedkit:
	1946	tclcmd.append('-k')
	1947
	1948	if self.gatewaykit:
	1949	tclcmd.append('-K')
	1950
	1951	tclcmd.extend([pid, gid, eid, tclfile])
	1952
	1953	self.log.debug("running local splitter %s", " ".join(tclcmd))
	1954	# This is just fantastic. As a side effect the parser copies
	1955	# tb_compat.tcl into the current directory, so that directory
	1956	# must be writable by the fedd user. Doing this in the
	1957	# temporary subdir ensures this is the case.
[70caa72]	1958	tclparser = Popen(tclcmd, stdout=PIPE, close_fds=True,
[db6b092]	1959	cwd=tmpdir)
[866c983]	1960	split_data = tclparser.stdout
	1961
[cc8d8e9]	1962	top = topdl.topology_from_xml(file=split_data, top="experiment")
[895a133]	1963
[69692a9]	1964	hosts, ip_allocator = self.allocate_ips_to_topo(top)
[895a133]	1965	# Find the testbeds to look up
	1966	testbeds = set([ a.value for e in top.elements \
	1967	for a in e.attribute \
	1968	if a.attribute == 'testbed'] )
	1969
	1970	allocated = { } # Testbeds we can access
	1971	topo ={ } # Sub topologies
	1972	self.get_access_to_testbeds(testbeds, user, access_user,
	1973	export_project, master, allocated, tbparams)
	1974	self.split_topology(top, topo, testbeds, eid, master, tbparams)
	1975
	1976	# Copy configuration files into the remote file store
[6c57fe9]	1977	# The config urlpath
	1978	configpath = "/%s/config" % expid
	1979	# The config file system location
	1980	configdir ="%s%s" % ( self.repodir, configpath)
	1981	try:
	1982	os.makedirs(configdir)
	1983	except IOError, e:
	1984	raise service_error(
	1985	"Cannot create config directory: %s" % e)
	1986	try:
	1987	f = open("%s/hosts" % configdir, "w")
	1988	f.write('\n'.join(hosts))
	1989	f.close()
	1990	except IOError, e:
	1991	raise service_error(service_error.internal,
	1992	"Cannot write hosts file: %s" % e)
	1993	try:
[40dd8c1]	1994	copy_file("%s" % gw_pubkey, "%s/%s" % \
[6c57fe9]	1995	(configdir, gw_pubkey_base))
[40dd8c1]	1996	copy_file("%s" % gw_secretkey, "%s/%s" % \
[6c57fe9]	1997	(configdir, gw_secretkey_base))
	1998	except IOError, e:
	1999	raise service_error(service_error.internal,
	2000	"Cannot copy keyfiles: %s" % e)
[cc8d8e9]	2001
[6c57fe9]	2002	# Allow the individual testbeds to access the configuration files.
	2003	for tb in tbparams.keys():
	2004	asignee = tbparams[tb]['allocID']['fedid']
	2005	for f in ("hosts", gw_secretkey_base, gw_pubkey_base):
	2006	self.auth.set_attribute(asignee, "%s/%s" % (configpath, f))
[cc8d8e9]	2007
[69692a9]	2008	self.add_portals(top, topo, eid, master, tbparams, ip_allocator)
	2009	# Now get access to the dynamic testbeds
	2010	for k, t in topo.items():
	2011	if not t.get_attribute('dynamic'):
	2012	continue
	2013	tb = t.get_attribute('testbed')
	2014	if tb:
	2015	self.get_access(tb, None, user, tbparams, master,
	2016	export_project, access_user)
	2017	tbparams[k] = tbparams[tb]
	2018	del tbparams[tb]
	2019	allocated[k] = 1
	2020	else:
	2021	raise service_error(service_error.internal,
	2022	"Dynamic allocation from no testbed!?")
	2023
[895a133]	2024	self.wrangle_software(expid, top, topo, tbparams)
[cc8d8e9]	2025
	2026	vtopo = topdl.topology_to_vtopo(top)
	2027	vis = self.genviz(vtopo)
[db6b092]	2028
[866c983]	2029	# save federant information
	2030	for k in allocated.keys():
[ecf679e]	2031	tbparams[k]['federant'] = {
	2032	'name': [ { 'localname' : eid} ],
	2033	'allocID' : tbparams[k]['allocID'],
	2034	'master' : k == master,
	2035	'uri': tbparams[k]['uri'],
[866c983]	2036	}
[69692a9]	2037	if tbparams[k].has_key('emulab'):
	2038	tbparams[k]['federant']['emulab'] = \
	2039	tbparams[k]['emulab']
[866c983]	2040
[db6b092]	2041	self.state_lock.acquire()
	2042	self.state[eid]['vtopo'] = vtopo
	2043	self.state[eid]['vis'] = vis
	2044	self.state[expid]['federant'] = \
	2045	[ tbparams[tb]['federant'] for tb in tbparams.keys() \
	2046	if tbparams[tb].has_key('federant') ]
[cc8d8e9]	2047	if self.state_filename:
	2048	self.write_state()
[db6b092]	2049	self.state_lock.release()
[866c983]	2050	except service_error, e:
	2051	# If something goes wrong in the parse (usually an access error)
	2052	# clear the placeholder state. From here on out the code delays
[db6b092]	2053	# exceptions. Failing at this point returns a fault to the remote
	2054	# caller.
[cc8d8e9]	2055
[866c983]	2056	self.state_lock.acquire()
	2057	del self.state[eid]
[bd3e314]	2058	del self.state[expid]
	2059	if self.state_filename: self.write_state()
[866c983]	2060	self.state_lock.release()
	2061	raise e
	2062
	2063
[db6b092]	2064	# Start the background swapper and return the starting state. From
	2065	# here on out, the state will stick around a while.
[866c983]	2066
[db6b092]	2067	# Let users touch the state
[bd3e314]	2068	self.auth.set_attribute(fid, expid)
	2069	self.auth.set_attribute(expid, expid)
[db6b092]	2070	# Override fedids can manipulate state as well
	2071	for o in self.overrides:
	2072	self.auth.set_attribute(o, expid)
	2073
	2074	# Create a logger that logs to the experiment's state object as well as
	2075	# to the main log file.
	2076	alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
[f07fa49]	2077	alloc_collector = self.list_log(self.state[eid]['log'])
	2078	h = logging.StreamHandler(alloc_collector)
[db6b092]	2079	# XXX: there should be a global one of these rather than repeating the
	2080	# code.
	2081	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	2082	'%d %b %y %H:%M:%S'))
	2083	alloc_log.addHandler(h)
	2084
[6c57fe9]	2085	# XXX
	2086	url_base = 'https://users.isi.deterlab.net:23232'
	2087	attrs = [
	2088	{
	2089	'attribute': 'ssh_pubkey',
	2090	'value': '%s/%s/config/%s' % \
	2091	(url_base, expid, gw_pubkey_base)
	2092	},
	2093	{
	2094	'attribute': 'ssh_secretkey',
	2095	'value': '%s/%s/config/%s' % \
	2096	(url_base, expid, gw_secretkey_base)
	2097	},
	2098	{
	2099	'attribute': 'hosts',
	2100	'value': '%s/%s/config/hosts' % \
	2101	(url_base, expid)
	2102	},
[ecca6eb]	2103	{
	2104	'attribute': 'experiment_name',
	2105	'value': eid,
	2106	},
[6c57fe9]	2107	]
	2108
[db6b092]	2109	# Start a thread to do the resource allocation
[e19b75c]	2110	t = Thread(target=self.allocate_resources,
[db6b092]	2111	args=(allocated, master, eid, expid, expcert, tbparams,
[f07fa49]	2112	topo, tmpdir, alloc_log, alloc_collector, attrs),
[db6b092]	2113	name=eid)
	2114	t.start()
	2115
	2116	rv = {
	2117	'experimentID': [
	2118	{'localname' : eid }, { 'fedid': copy.copy(expid) }
	2119	],
	2120	'experimentStatus': 'starting',
	2121	'experimentAccess': { 'X509' : expcert }
	2122	}
	2123
	2124	return rv
[9479343]	2125
	2126	def get_experiment_fedid(self, key):
	2127	"""
[db6b092]	2128	find the fedid associated with the localname key in the state database.
[9479343]	2129	"""
	2130
[db6b092]	2131	rv = None
	2132	self.state_lock.acquire()
	2133	if self.state.has_key(key):
	2134	if isinstance(self.state[key], dict):
	2135	try:
	2136	kl = [ f['fedid'] for f in \
	2137	self.state[key]['experimentID']\
	2138	if f.has_key('fedid') ]
	2139	except KeyError:
	2140	self.state_lock.release()
	2141	raise service_error(service_error.internal,
	2142	"No fedid for experiment %s when getting "+\
	2143	"fedid(!?)" % key)
	2144	if len(kl) == 1:
	2145	rv = kl[0]
	2146	else:
	2147	self.state_lock.release()
	2148	raise service_error(service_error.internal,
	2149	"multiple fedids for experiment %s when " +\
	2150	"getting fedid(!?)" % key)
	2151	else:
	2152	self.state_lock.release()
	2153	raise service_error(service_error.internal,
	2154	"Unexpected state for %s" % key)
	2155	self.state_lock.release()
	2156	return rv
[a97394b]	2157
[4064742]	2158	def check_experiment_access(self, fid, key):
[866c983]	2159	"""
	2160	Confirm that the fid has access to the experiment. Though a request
	2161	may be made in terms of a local name, the access attribute is always
	2162	the experiment's fedid.
	2163	"""
	2164	if not isinstance(key, fedid):
[db6b092]	2165	key = self.get_experiment_fedid(key)
[866c983]	2166
	2167	if self.auth.check_attribute(fid, key):
	2168	return True
	2169	else:
	2170	raise service_error(service_error.access, "Access Denied")
[4064742]	2171
	2172
[db6b092]	2173	def get_handler(self, path, fid):
[6c57fe9]	2174	if self.auth.check_attribute(fid, path):
	2175	return ("%s/%s" % (self.repodir, path), "application/binary")
	2176	else:
	2177	return (None, None)
[987aaa1]	2178
	2179	def get_vtopo(self, req, fid):
[866c983]	2180	"""
	2181	Return the stored virtual topology for this experiment
	2182	"""
	2183	rv = None
[db6b092]	2184	state = None
[866c983]	2185
	2186	req = req.get('VtopoRequestBody', None)
	2187	if not req:
	2188	raise service_error(service_error.req,
	2189	"Bad request format (no VtopoRequestBody)")
	2190	exp = req.get('experiment', None)
	2191	if exp:
	2192	if exp.has_key('fedid'):
	2193	key = exp['fedid']
	2194	keytype = "fedid"
	2195	elif exp.has_key('localname'):
	2196	key = exp['localname']
	2197	keytype = "localname"
	2198	else:
	2199	raise service_error(service_error.req, "Unknown lookup type")
	2200	else:
	2201	raise service_error(service_error.req, "No request?")
	2202
	2203	self.check_experiment_access(fid, key)
	2204
	2205	self.state_lock.acquire()
	2206	if self.state.has_key(key):
[db6b092]	2207	if self.state[key].has_key('vtopo'):
	2208	rv = { 'experiment' : {keytype: key },\
	2209	'vtopo': self.state[key]['vtopo'],\
	2210	}
	2211	else:
	2212	state = self.state[key]['experimentStatus']
[866c983]	2213	self.state_lock.release()
	2214
	2215	if rv: return rv
[bd3e314]	2216	else:
[db6b092]	2217	if state:
	2218	raise service_error(service_error.partial,
	2219	"Not ready: %s" % state)
	2220	else:
	2221	raise service_error(service_error.req, "No such experiment")
[987aaa1]	2222
	2223	def get_vis(self, req, fid):
[866c983]	2224	"""
	2225	Return the stored visualization for this experiment
	2226	"""
	2227	rv = None
[db6b092]	2228	state = None
[866c983]	2229
	2230	req = req.get('VisRequestBody', None)
	2231	if not req:
	2232	raise service_error(service_error.req,
	2233	"Bad request format (no VisRequestBody)")
	2234	exp = req.get('experiment', None)
	2235	if exp:
	2236	if exp.has_key('fedid'):
	2237	key = exp['fedid']
	2238	keytype = "fedid"
	2239	elif exp.has_key('localname'):
	2240	key = exp['localname']
	2241	keytype = "localname"
	2242	else:
	2243	raise service_error(service_error.req, "Unknown lookup type")
	2244	else:
	2245	raise service_error(service_error.req, "No request?")
	2246
	2247	self.check_experiment_access(fid, key)
	2248
	2249	self.state_lock.acquire()
	2250	if self.state.has_key(key):
[db6b092]	2251	if self.state[key].has_key('vis'):
	2252	rv = { 'experiment' : {keytype: key },\
	2253	'vis': self.state[key]['vis'],\
	2254	}
	2255	else:
	2256	state = self.state[key]['experimentStatus']
[866c983]	2257	self.state_lock.release()
	2258
	2259	if rv: return rv
[bd3e314]	2260	else:
[db6b092]	2261	if state:
	2262	raise service_error(service_error.partial,
	2263	"Not ready: %s" % state)
	2264	else:
	2265	raise service_error(service_error.req, "No such experiment")
[987aaa1]	2266
[65f3f29]	2267	def clean_info_response(self, rv):
[db6b092]	2268	"""
	2269	Remove the information in the experiment's state object that is not in
	2270	the info response.
	2271	"""
	2272	# Remove the owner info (should always be there, but...)
	2273	if rv.has_key('owner'): del rv['owner']
	2274
	2275	# Convert the log into the allocationLog parameter and remove the
	2276	# log entry (with defensive programming)
	2277	if rv.has_key('log'):
	2278	rv['allocationLog'] = "".join(rv['log'])
	2279	del rv['log']
	2280	else:
	2281	rv['allocationLog'] = ""
	2282
	2283	if rv['experimentStatus'] != 'active':
	2284	if rv.has_key('federant'): del rv['federant']
	2285	else:
[69692a9]	2286	# remove the allocationID and uri info from each federant
[db6b092]	2287	for f in rv.get('federant', []):
	2288	if f.has_key('allocID'): del f['allocID']
[69692a9]	2289	if f.has_key('uri'): del f['uri']
[db6b092]	2290	return rv
[65f3f29]	2291
[c52c48d]	2292	def get_info(self, req, fid):
[866c983]	2293	"""
	2294	Return all the stored info about this experiment
	2295	"""
	2296	rv = None
	2297
	2298	req = req.get('InfoRequestBody', None)
	2299	if not req:
	2300	raise service_error(service_error.req,
[65f3f29]	2301	"Bad request format (no InfoRequestBody)")
[866c983]	2302	exp = req.get('experiment', None)
	2303	if exp:
	2304	if exp.has_key('fedid'):
	2305	key = exp['fedid']
	2306	keytype = "fedid"
	2307	elif exp.has_key('localname'):
	2308	key = exp['localname']
	2309	keytype = "localname"
	2310	else:
	2311	raise service_error(service_error.req, "Unknown lookup type")
	2312	else:
	2313	raise service_error(service_error.req, "No request?")
	2314
	2315	self.check_experiment_access(fid, key)
	2316
	2317	# The state may be massaged by the service function that called
	2318	# get_info (e.g., encoded for XMLRPC transport) so send a copy of the
	2319	# state.
	2320	self.state_lock.acquire()
	2321	if self.state.has_key(key):
	2322	rv = copy.deepcopy(self.state[key])
	2323	self.state_lock.release()
	2324
[db6b092]	2325	if rv:
	2326	return self.clean_info_response(rv)
[bd3e314]	2327	else:
[db6b092]	2328	raise service_error(service_error.req, "No such experiment")
[7a8d667]	2329
[65f3f29]	2330	def get_multi_info(self, req, fid):
	2331	"""
	2332	Return all the stored info that this fedid can access
	2333	"""
[db6b092]	2334	rv = { 'info': [ ] }
[65f3f29]	2335
[db6b092]	2336	self.state_lock.acquire()
	2337	for key in [ k for k in self.state.keys() if isinstance(k, fedid)]:
	2338	self.check_experiment_access(fid, key)
[65f3f29]	2339
[db6b092]	2340	if self.state.has_key(key):
	2341	e = copy.deepcopy(self.state[key])
	2342	e = self.clean_info_response(e)
	2343	rv['info'].append(e)
[65f3f29]	2344	self.state_lock.release()
[db6b092]	2345	return rv
[65f3f29]	2346
[7a8d667]	2347	def terminate_experiment(self, req, fid):
[866c983]	2348	"""
	2349	Swap this experiment out on the federants and delete the shared
	2350	information
	2351	"""
	2352	tbparams = { }
	2353	req = req.get('TerminateRequestBody', None)
	2354	if not req:
	2355	raise service_error(service_error.req,
	2356	"Bad request format (no TerminateRequestBody)")
[db6b092]	2357	force = req.get('force', False)
[866c983]	2358	exp = req.get('experiment', None)
	2359	if exp:
	2360	if exp.has_key('fedid'):
	2361	key = exp['fedid']
	2362	keytype = "fedid"
	2363	elif exp.has_key('localname'):
	2364	key = exp['localname']
	2365	keytype = "localname"
	2366	else:
	2367	raise service_error(service_error.req, "Unknown lookup type")
	2368	else:
	2369	raise service_error(service_error.req, "No request?")
	2370
	2371	self.check_experiment_access(fid, key)
	2372
[db6b092]	2373	dealloc_list = [ ]
[46e4682]	2374
	2375
[5ae3857]	2376	# Create a logger that logs to the dealloc_list as well as to the main
	2377	# log file.
	2378	dealloc_log = logging.getLogger('fedd.experiment_control.%s' % key)
	2379	h = logging.StreamHandler(self.list_log(dealloc_list))
	2380	# XXX: there should be a global one of these rather than repeating the
	2381	# code.
	2382	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	2383	'%d %b %y %H:%M:%S'))
	2384	dealloc_log.addHandler(h)
	2385
	2386	self.state_lock.acquire()
	2387	fed_exp = self.state.get(key, None)
	2388
	2389	if fed_exp:
	2390	# This branch of the conditional holds the lock to generate a
	2391	# consistent temporary tbparams variable to deallocate experiments.
	2392	# It releases the lock to do the deallocations and reacquires it to
	2393	# remove the experiment state when the termination is complete.
	2394
	2395	# First make sure that the experiment creation is complete.
	2396	status = fed_exp.get('experimentStatus', None)
	2397
	2398	if status:
	2399	if status in ('starting', 'terminating'):
	2400	if not force:
	2401	self.state_lock.release()
	2402	raise service_error(service_error.partial,
	2403	'Experiment still being created or destroyed')
	2404	else:
	2405	self.log.warning('Experiment in %s state ' % status + \
	2406	'being terminated by force.')
	2407	else:
	2408	# No status??? trouble
	2409	self.state_lock.release()
	2410	raise service_error(service_error.internal,
	2411	"Experiment has no status!?")
	2412
	2413	ids = []
	2414	# experimentID is a list of dicts that are self-describing
	2415	# identifiers. This finds all the fedids and localnames - the
	2416	# keys of self.state - and puts them into ids.
	2417	for id in fed_exp.get('experimentID', []):
	2418	if id.has_key('fedid'): ids.append(id['fedid'])
	2419	if id.has_key('localname'): ids.append(id['localname'])
	2420
	2421	# Collect the allocation/segment ids
	2422	for fed in fed_exp.get('federant', []):
	2423	try:
[69692a9]	2424	tb = fed['uri']
[5ae3857]	2425	aid = fed['allocID']
	2426	except KeyError, e:
	2427	continue
	2428	tbparams[tb] = aid
	2429	fed_exp['experimentStatus'] = 'terminating'
	2430	if self.state_filename: self.write_state()
	2431	self.state_lock.release()
	2432
	2433	# Stop everyone. NB, wait_for_all waits until a thread starts and
	2434	# then completes, so we can't wait if nothing starts. So, no
	2435	# tbparams, no start.
	2436	if len(tbparams) > 0:
	2437	thread_pool = self.thread_pool(self.nthreads)
	2438	for tb in tbparams.keys():
	2439	# Create and start a thread to stop the segment
	2440	thread_pool.wait_for_slot()
[ecf679e]	2441	#uri = self.tbmap.get(tb, None)
	2442	uri = tb
[5ae3857]	2443	t = self.pooled_thread(\
[e19b75c]	2444	target=self.terminate_segment(log=dealloc_log,
[fd556d1]	2445	testbed=tb,
[5ae3857]	2446	cert_file=self.cert_file,
	2447	cert_pwd=self.cert_pwd,
	2448	trusted_certs=self.trusted_certs,
	2449	caller=self.call_TerminateSegment),
	2450	args=(uri, tbparams[tb]), name=tb,
	2451	pdata=thread_pool, trace_file=self.trace_file)
	2452	t.start()
	2453	# Wait for completions
	2454	thread_pool.wait_for_all_done()
	2455
	2456	# release the allocations (failed experiments have done this
	2457	# already, and starting experiments may be in odd states, so we
	2458	# ignore errors releasing those allocations
	2459	try:
	2460	for tb in tbparams.keys():
[ecf679e]	2461	# This releases access by uri
	2462	self.release_access(None, tbparams[tb], uri=tb)
[5ae3857]	2463	except service_error, e:
	2464	if status != 'failed' and not force:
	2465	raise e
	2466
	2467	# Remove the terminated experiment
	2468	self.state_lock.acquire()
	2469	for id in ids:
	2470	if self.state.has_key(id): del self.state[id]
	2471
	2472	if self.state_filename: self.write_state()
	2473	self.state_lock.release()
	2474
	2475	return {
	2476	'experiment': exp ,
	2477	'deallocationLog': "".join(dealloc_list),
	2478	}
	2479	else:
	2480	# Don't forget to release the lock
	2481	self.state_lock.release()
	2482	raise service_error(service_error.req, "No saved state")

Note: See TracBrowser for help on using the repository browser.

Download in other formats: