Context Navigation

source: fedd/federation/experiment_control.py @ cc6091c

compt_changes

Last change on this file since cc6091c was cde9b98, checked in by Ted Faber <faber@…>, 13 years ago
More debug logging
Property mode set to `100644`
File size: 92.8 KB

Rev	Line
[6679c122]	1	#!/usr/local/bin/python
	2
	3	import os,sys
	4
	5	import re
	6	import random
	7	import string
	8	import subprocess
	9	import tempfile
	10	import copy
[eee2b2e]	11	import pickle
[c971895]	12	import logging
[79b6596]	13	import signal
	14	import time
[6679c122]	15
[3df9b33]	16	import os.path
	17
[3441fe3]	18	import traceback
[c971895]	19	# For parsing visualization output and splitter output
	20	import xml.parsers.expat
[3441fe3]	21
[6c57fe9]	22	from threading import Lock, Thread, Condition
	23	from subprocess import call, Popen, PIPE
[c573278]	24	from string import join
[6679c122]	25
[db6b092]	26	from urlparse import urlparse
	27	from urllib2 import urlopen
	28
[ec4fb42]	29	from util import *
[6bedbdba]	30	from deter import fedid, generate_fedid
[9460b1e]	31	from remote_service import xmlrpc_handler, soap_handler, service_caller
[c971895]	32	from service_error import service_error
[2761484]	33	from synch_store import synch_store
[73e7f5c]	34	from experiment_partition import experiment_partition
[1d73342]	35	from experiment_control_legacy import experiment_control_legacy
[7206e5a]	36	from authorizer import abac_authorizer
[faea607]	37	from thread_pool import thread_pool, pooled_thread
[ab3d6c5]	38	from experiment_info import experiment_info, allocation_info, federated_service
[22a1a77]	39	from operation_status import operation_status
[6679c122]	40
[6bedbdba]	41	from deter import topdl
[044dd20]	42	from deter import ip_allocator
	43	from deter import ip_addr
[f07fa49]	44	import list_log
[db6b092]	45
[11a08b0]	46
	47	class nullHandler(logging.Handler):
	48	def emit(self, record): pass
	49
	50	fl = logging.getLogger("fedd.experiment_control")
	51	fl.addHandler(nullHandler())
	52
[1d73342]	53	class experiment_control_local(experiment_control_legacy):
[0ea11af]	54	"""
	55	Control of experiments that this system can directly access.
	56
	57	Includes experiment creation, termination and information dissemination.
	58	Thred safe.
	59	"""
[79b6596]	60
	61	class ssh_cmd_timeout(RuntimeError): pass
[6679c122]	62
[f069052]	63	call_RequestAccess = service_caller('RequestAccess')
	64	call_ReleaseAccess = service_caller('ReleaseAccess')
[cc8d8e9]	65	call_StartSegment = service_caller('StartSegment')
[db974ed]	66	call_TerminateSegment = service_caller('TerminateSegment')
[6e33086]	67	call_InfoSegment = service_caller('InfoSegment')
[22a1a77]	68	call_OperationSegment = service_caller('OperationSegment')
[5f6929a]	69	call_Ns2Topdl = service_caller('Ns2Topdl')
[058f58e]	70
[3f6bc5f]	71	def __init__(self, config=None, auth=None):
[866c983]	72	"""
	73	Intialize the various attributes, most from the config object
	74	"""
	75
	76	def parse_tarfile_list(tf):
	77	"""
	78	Parse a tarfile list from the configuration. This is a set of
	79	paths and tarfiles separated by spaces.
	80	"""
	81	rv = [ ]
	82	if tf is not None:
	83	tl = tf.split()
	84	while len(tl) > 1:
	85	p, t = tl[0:2]
	86	del tl[0:2]
	87	rv.append((p, t))
	88	return rv
	89
[f07fa49]	90	self.list_log = list_log.list_log
[866c983]	91
	92	self.cert_file = config.get("experiment_control", "cert_file")
	93	if self.cert_file:
	94	self.cert_pwd = config.get("experiment_control", "cert_pwd")
	95	else:
	96	self.cert_file = config.get("globals", "cert_file")
	97	self.cert_pwd = config.get("globals", "cert_pwd")
	98
	99	self.trusted_certs = config.get("experiment_control", "trusted_certs") \
	100	or config.get("globals", "trusted_certs")
	101
[6c57fe9]	102	self.repodir = config.get("experiment_control", "repodir")
[7183b48]	103	self.repo_url = config.get("experiment_control", "repo_url",
	104	"https://users.isi.deterlab.net:23235");
[cc8d8e9]	105
[866c983]	106	self.exp_stem = "fed-stem"
	107	self.log = logging.getLogger("fedd.experiment_control")
	108	set_log_level(config, "experiment_control", self.log)
	109	self.muxmax = 2
[35a4c01]	110	self.nthreads = 10
[866c983]	111	self.randomize_experiments = False
	112
	113	self.splitter = None
	114	self.ssh_keygen = "/usr/bin/ssh-keygen"
	115	self.ssh_identity_file = None
	116
	117
	118	self.debug = config.getboolean("experiment_control", "create_debug")
[69692a9]	119	self.cleanup = not config.getboolean("experiment_control",
	120	"leave_tmpfiles")
[866c983]	121	self.state_filename = config.get("experiment_control",
	122	"experiment_state")
[2761484]	123	self.store_filename = config.get("experiment_control",
	124	"synch_store")
	125	self.store_url = config.get("experiment_control", "store_url")
[5f6929a]	126	self.splitter_url = config.get("experiment_control", "ns2topdl_uri")
[866c983]	127	self.fedkit = parse_tarfile_list(\
	128	config.get("experiment_control", "fedkit"))
	129	self.gatewaykit = parse_tarfile_list(\
	130	config.get("experiment_control", "gatewaykit"))
	131
[175b444]	132	dt = config.get("experiment_control", "direct_transit")
[7206e5a]	133	self.auth_type = config.get('experiment_control', 'auth_type') \
	134	or 'legacy'
	135	self.auth_dir = config.get('experiment_control', 'auth_dir')
[6e33086]	136	# XXX: document this!
	137	self.info_cache_limit = \
	138	config.getint('experiment_control', 'info_cache', 600)
[139e2e2]	139	if dt: self.direct_transit = [ tb.strip() for tb in dt.split(",")]
	140	else: self.direct_transit = [ ]
[866c983]	141	# NB for internal master/slave ops, not experiment setup
	142	self.ssh_type = config.get("experiment_control", "sshkeytype", "rsa")
[ca489e8]	143
[db6b092]	144	self.overrides = set([])
	145	ovr = config.get('experiment_control', 'overrides')
	146	if ovr:
	147	for o in ovr.split(","):
	148	o = o.strip()
	149	if o.startswith('fedid:'): o = o[len('fedid:'):]
	150	self.overrides.add(fedid(hexstr=o))
[ca489e8]	151
[866c983]	152	self.state = { }
	153	self.state_lock = Lock()
	154	self.tclsh = "/usr/local/bin/otclsh"
[5f6929a]	155	self.tcl_splitter = config.get("ns2topdl", "tcl_splitter") or \
[866c983]	156	config.get("experiment_control", "tcl_splitter",
	157	"/usr/testbed/lib/ns2ir/parse.tcl")
	158	mapdb_file = config.get("experiment_control", "mapdb")
	159	self.trace_file = sys.stderr
	160
	161	self.def_expstart = \
	162	"sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& " +\
	163	"/tmp/federate";
	164	self.def_mexpstart = "sudo -H /usr/local/federation/bin/make_hosts " +\
	165	"FEDDIR/hosts";
	166	self.def_gwstart = \
	167	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF>& " +\
	168	"/tmp/bridge.log";
	169	self.def_mgwstart = \
	170	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF >& " +\
	171	"/tmp/bridge.log";
	172	self.def_gwimage = "FBSD61-TUNNEL2";
	173	self.def_gwtype = "pc";
	174	self.local_access = { }
	175
[7206e5a]	176	if self.auth_type == 'legacy':
	177	if auth:
	178	self.auth = auth
	179	else:
	180	self.log.error( "[access]: No authorizer initialized, " +\
	181	"creating local one.")
	182	auth = authorizer()
[5ecb9a3]	183	self.get_access = self.legacy_get_access
[7206e5a]	184	elif self.auth_type == 'abac':
	185	self.auth = abac_authorizer(load=self.auth_dir)
	186	else:
	187	raise service_error(service_error.internal,
	188	"Unknown auth_type: %s" % self.auth_type)
[866c983]	189
	190	if mapdb_file:
	191	self.read_mapdb(mapdb_file)
	192	else:
	193	self.log.warn("[experiment_control] No testbed map, using defaults")
	194	self.tbmap = {
	195	'deter':'https://users.isi.deterlab.net:23235',
	196	'emulab':'https://users.isi.deterlab.net:23236',
	197	'ucb':'https://users.isi.deterlab.net:23237',
	198	}
	199
	200	# Grab saved state. OK to do this w/o locking because it's read only
	201	# and only one thread should be in existence that can see self.state at
	202	# this point.
	203	if self.state_filename:
	204	self.read_state()
	205
[2761484]	206	if self.store_filename:
	207	self.read_store()
	208	else:
	209	self.log.warning("No saved synch store")
	210	self.synch_store = synch_store
	211
[866c983]	212	# Dispatch tables
	213	self.soap_services = {\
[a3ad8bd]	214	'New': soap_handler('New', self.new_experiment),
[e19b75c]	215	'Create': soap_handler('Create', self.create_experiment),
[866c983]	216	'Vtopo': soap_handler('Vtopo', self.get_vtopo),
	217	'Vis': soap_handler('Vis', self.get_vis),
	218	'Info': soap_handler('Info', self.get_info),
[65f3f29]	219	'MultiInfo': soap_handler('MultiInfo', self.get_multi_info),
[22a1a77]	220	'Operation': soap_handler('Operation', self.do_operation),
[866c983]	221	'Terminate': soap_handler('Terminate',
[e19b75c]	222	self.terminate_experiment),
[2761484]	223	'GetValue': soap_handler('GetValue', self.GetValue),
	224	'SetValue': soap_handler('SetValue', self.SetValue),
[866c983]	225	}
	226
	227	self.xmlrpc_services = {\
[a3ad8bd]	228	'New': xmlrpc_handler('New', self.new_experiment),
[e19b75c]	229	'Create': xmlrpc_handler('Create', self.create_experiment),
[866c983]	230	'Vtopo': xmlrpc_handler('Vtopo', self.get_vtopo),
	231	'Vis': xmlrpc_handler('Vis', self.get_vis),
	232	'Info': xmlrpc_handler('Info', self.get_info),
[65f3f29]	233	'MultiInfo': xmlrpc_handler('MultiInfo', self.get_multi_info),
[866c983]	234	'Terminate': xmlrpc_handler('Terminate',
[e19b75c]	235	self.terminate_experiment),
[22a1a77]	236	'Operation': xmlrpc_handler('Operation', self.do_operation),
[2761484]	237	'GetValue': xmlrpc_handler('GetValue', self.GetValue),
	238	'SetValue': xmlrpc_handler('SetValue', self.SetValue),
[866c983]	239	}
[19cc408]	240
[a97394b]	241	# Call while holding self.state_lock
[eee2b2e]	242	def write_state(self):
[866c983]	243	"""
	244	Write a new copy of experiment state after copying the existing state
	245	to a backup.
	246
	247	State format is a simple pickling of the state dictionary.
	248	"""
	249	if os.access(self.state_filename, os.W_OK):
[40dd8c1]	250	copy_file(self.state_filename, \
	251	"%s.bak" % self.state_filename)
[866c983]	252	try:
	253	f = open(self.state_filename, 'w')
	254	pickle.dump(self.state, f)
[d3c8759]	255	except EnvironmentError, e:
[866c983]	256	self.log.error("Can't write file %s: %s" % \
	257	(self.state_filename, e))
	258	except pickle.PicklingError, e:
	259	self.log.error("Pickling problem: %s" % e)
	260	except TypeError, e:
	261	self.log.error("Pickling problem (TypeError): %s" % e)
[eee2b2e]	262
[2761484]	263	@staticmethod
[29d5f7c]	264	def get_alloc_ids(exp):
[2761484]	265	"""
[29d5f7c]	266	Used by read_store and read state. This used to be worse.
[2761484]	267	"""
	268
[29d5f7c]	269	return [ a.allocID for a in exp.get_all_allocations() ]
	270
[2761484]	271
[a97394b]	272	# Call while holding self.state_lock
[eee2b2e]	273	def read_state(self):
[866c983]	274	"""
	275	Read a new copy of experiment state. Old state is overwritten.
	276
	277	State format is a simple pickling of the state dictionary.
	278	"""
[cc8d8e9]	279
[866c983]	280	try:
	281	f = open(self.state_filename, "r")
	282	self.state = pickle.load(f)
	283	self.log.debug("[read_state]: Read state from %s" % \
	284	self.state_filename)
[d3c8759]	285	except EnvironmentError, e:
[866c983]	286	self.log.warning("[read_state]: No saved state: Can't open %s: %s"\
	287	% (self.state_filename, e))
	288	except pickle.UnpicklingError, e:
	289	self.log.warning(("[read_state]: No saved state: " + \
	290	"Unpickling failed: %s") % e)
	291
[cc8d8e9]	292	for s in self.state.values():
[866c983]	293	try:
[cc8d8e9]	294
[29d5f7c]	295	eid = s.fedid
[cc8d8e9]	296	if eid :
[7206e5a]	297	if self.auth_type == 'legacy':
	298	# XXX: legacy
	299	# Give the owner rights to the experiment
[29d5f7c]	300	#self.auth.set_attribute(s['owner'], eid)
[7206e5a]	301	# And holders of the eid as well
	302	self.auth.set_attribute(eid, eid)
	303	# allow overrides to control experiments as well
	304	for o in self.overrides:
	305	self.auth.set_attribute(o, eid)
	306	# Set permissions to allow reading of the software
	307	# repo, if any, as well.
	308	for a in self.get_alloc_ids(s):
	309	self.auth.set_attribute(a, 'repo/%s' % eid)
[cc8d8e9]	310	else:
	311	raise KeyError("No experiment id")
[866c983]	312	except KeyError, e:
	313	self.log.warning("[read_state]: State ownership or identity " +\
	314	"misformatted in %s: %s" % (self.state_filename, e))
[4064742]	315
[34bc05c]	316	def read_mapdb(self, file):
[866c983]	317	"""
	318	Read a simple colon separated list of mappings for the
	319	label-to-testbed-URL mappings. Clears or creates self.tbmap.
[a11eda5]	320	also adds testbeds to active if they include , active after
	321	their name.
[866c983]	322	"""
	323
	324	self.tbmap = { }
[a11eda5]	325	self.tbactive = set()
[866c983]	326	lineno =0
	327	try:
	328	f = open(file, "r")
	329	for line in f:
	330	lineno += 1
	331	line = line.strip()
	332	if line.startswith('#') or len(line) == 0:
	333	continue
	334	try:
	335	label, url = line.split(':', 1)
[a11eda5]	336	if ',' in label:
	337	label, act = label.split(',', 1)
	338	active = (act.strip() == 'active')
	339	else:
	340	active = False
[866c983]	341	self.tbmap[label] = url
[a11eda5]	342	if active: self.tbactive.add(label)
[866c983]	343	except ValueError, e:
	344	self.log.warn("[read_mapdb] Ignored bad line (%d) in " +\
	345	"map db: %s %s" % (lineno, line, e))
[d3c8759]	346	except EnvironmentError, e:
[866c983]	347	self.log.warning("[read_mapdb]: No saved map database: Can't " +\
	348	"open %s: %s" % (file, e))
[1ec5d4a]	349	else:
	350	f.close()
[2761484]	351
	352	def read_store(self):
	353	try:
	354	self.synch_store = synch_store()
	355	self.synch_store.load(self.store_filename)
	356	self.log.debug("[read_store]: Read store from %s" % \
	357	self.store_filename)
[d3c8759]	358	except EnvironmentError, e:
[2761484]	359	self.log.warning("[read_store]: No saved store: Can't open %s: %s"\
	360	% (self.state_filename, e))
	361	self.synch_store = synch_store()
	362
	363	# Set the initial permissions on data in the store. XXX: This ad hoc
	364	# authorization attribute initialization is getting out of hand.
[7206e5a]	365	# XXX: legacy
	366	if self.auth_type == 'legacy':
	367	for k in self.synch_store.all_keys():
	368	try:
	369	if k.startswith('fedid:'):
	370	fid = fedid(hexstr=k[6:46])
	371	if self.state.has_key(fid):
	372	for a in self.get_alloc_ids(self.state[fid]):
	373	self.auth.set_attribute(a, k)
	374	except ValueError, e:
	375	self.log.warn("Cannot deduce permissions for %s" % k)
[2761484]	376
	377
	378	def write_store(self):
	379	"""
	380	Write a new copy of synch_store after writing current state
	381	to a backup. We use the internal synch_store pickle method to avoid
	382	incinsistent data.
	383
	384	State format is a simple pickling of the store.
	385	"""
	386	if os.access(self.store_filename, os.W_OK):
	387	copy_file(self.store_filename, \
	388	"%s.bak" % self.store_filename)
	389	try:
	390	self.synch_store.save(self.store_filename)
[d3c8759]	391	except EnvironmentError, e:
[2761484]	392	self.log.error("Can't write file %s: %s" % \
	393	(self.store_filename, e))
	394	except TypeError, e:
	395	self.log.error("Pickling problem (TypeError): %s" % e)
	396
[cf0ff4f]	397
	398	def remove_dirs(self, dir):
	399	"""
	400	Remove the directory tree and all files rooted at dir. Log any errors,
	401	but continue.
	402	"""
	403	self.log.debug("[removedirs]: removing %s" % dir)
	404	try:
	405	for path, dirs, files in os.walk(dir, topdown=False):
	406	for f in files:
	407	os.remove(os.path.join(path, f))
	408	for d in dirs:
	409	os.rmdir(os.path.join(path, d))
	410	os.rmdir(dir)
	411	except EnvironmentError, e:
	412	self.log.error("Error deleting directory tree in %s" % e);
	413
	414	@staticmethod
	415	def make_temp_certfile(expcert, tmpdir):
	416	"""
	417	make a protected copy of the access certificate so the experiment
	418	controller can act as the experiment principal. mkstemp is the most
	419	secure way to do that. The directory should be created by
	420	mkdtemp. Return the filename.
	421	"""
	422	if expcert and tmpdir:
	423	try:
	424	certf, certfn = tempfile.mkstemp(suffix=".pem", dir=tmpdir)
	425	f = os.fdopen(certf, 'w')
	426	print >> f, expcert
	427	f.close()
	428	except EnvironmentError, e:
	429	raise service_error(service_error.internal,
	430	"Cannot create temp cert file?")
	431	return certfn
	432	else:
	433	return None
	434
[866c983]	435
[6679c122]	436	def generate_ssh_keys(self, dest, type="rsa" ):
[866c983]	437	"""
	438	Generate a set of keys for the gateways to use to talk.
	439
	440	Keys are of type type and are stored in the required dest file.
	441	"""
	442	valid_types = ("rsa", "dsa")
	443	t = type.lower();
	444	if t not in valid_types: raise ValueError
	445	cmd = [self.ssh_keygen, '-t', t, '-N', '', '-f', dest]
	446
	447	try:
	448	trace = open("/dev/null", "w")
[d3c8759]	449	except EnvironmentError:
[866c983]	450	raise service_error(service_error.internal,
	451	"Cannot open /dev/null??");
	452
	453	# May raise CalledProcessError
	454	self.log.debug("[generate_ssh_keys]: %s" % " ".join(cmd))
[4ea1e22]	455	rv = call(cmd, stdout=trace, stderr=trace, close_fds=True)
[866c983]	456	if rv != 0:
	457	raise service_error(service_error.internal,
	458	"Cannot generate nonce ssh keys. %s return code %d" \
	459	% (self.ssh_keygen, rv))
[6679c122]	460
[3df9b33]	461	def generate_seer_certs(self, destdir):
	462	'''
	463	Create a SEER ca cert and a node cert in destdir/ca.pem and
	464	destdir/node.pem respectively. These will be distributed throughout
	465	the federated experiment. This routine reports errors via
	466	service_errors.
	467	'''
	468	openssl = '/usr/bin/openssl'
	469	# All the filenames and parameters we need for openssl calls below
	470	ca_key =os.path.join(destdir, 'ca.key')
	471	ca_pem = os.path.join(destdir, 'ca.pem')
	472	node_key =os.path.join(destdir, 'node.key')
	473	node_pem = os.path.join(destdir, 'node.pem')
	474	node_req = os.path.join(destdir, 'node.req')
	475	node_signed = os.path.join(destdir, 'node.signed')
[9bde415]	476	days = '%s' % (365 * 10)
[95be336]	477	serial = '%s' % random.randint(0, 1<<16)
[3df9b33]	478
	479	try:
	480	# Sequence of calls to create a CA key, create a ca cert, create a
	481	# node key, node signing request, and finally a signed node
	482	# certificate.
	483	sequence = (
	484	(openssl, 'genrsa', '-out', ca_key, '1024'),
	485	(openssl, 'req', '-new', '-x509', '-key', ca_key, '-out',
	486	ca_pem, '-days', days, '-subj',
	487	'/C=US/ST=CA/O=DETER/OU=fedd/CN=CA' ),
	488	(openssl, 'genrsa', '-out', node_key, '1024'),
	489	(openssl, 'req', '-new', '-key', node_key, '-out',
	490	node_req, '-days', days, '-subj',
	491	'/C=US/ST=CA/O=DETER/OU=fedd/CN=node' ),
	492	(openssl, 'x509', '-CA', ca_pem, '-CAkey', ca_key,
[95be336]	493	'-set_serial', serial, '-req', '-in', node_req,
[3df9b33]	494	'-out', node_signed, '-days', days),
	495	)
	496	# Do all that stuff; bail if there's an error, and push all the
	497	# output to dev/null.
	498	for cmd in sequence:
	499	trace = open("/dev/null", "w")
	500	rv = call(cmd, stdout=trace, stderr=trace, close_fds=True)
	501	if rv != 0:
	502	raise service_error(service_error.internal,
	503	"Cannot generate SEER certs. %s return code %d" \
	504	% (' '.join(cmd), rv))
	505	# Concatinate the node key and signed certificate into node.pem
	506	f = open(node_pem, 'w')
	507	for comp in (node_signed, node_key):
	508	g = open(comp, 'r')
[9bde415]	509	f.write(g.read())
[3df9b33]	510	g.close()
	511	f.close()
	512
	513	# Throw out intermediaries.
[95be336]	514	for fn in (ca_key, node_key, node_req, node_signed):
[3df9b33]	515	os.unlink(fn)
	516
	517	except EnvironmentError, e:
	518	# Any difficulties with the file system wind up here
	519	raise service_error(service_error.internal,
	520	"File error on %s while creating SEER certs: %s" % \
	521	(e.filename, e.strerror))
	522
	523
	524
[0d830de]	525	def gentopo(self, str):
[866c983]	526	"""
[1d73342]	527	Generate the topology data structure from the splitter's XML
[866c983]	528	representation of it.
	529
	530	The topology XML looks like:
	531	<experiment>
	532	<nodes>
	533	<node><vname></vname><ips>ip1:ip2</ips></node>
	534	</nodes>
	535	<lans>
	536	<lan>
	537	<vname></vname><vnode></vnode><ip></ip>
	538	<bandwidth></bandwidth><member>node:port</member>
	539	</lan>
	540	</lans>
	541	"""
	542	class topo_parse:
	543	"""
	544	Parse the topology XML and create the dats structure.
	545	"""
	546	def __init__(self):
	547	# Typing of the subelements for data conversion
	548	self.str_subelements = ('vname', 'vnode', 'ips', 'ip', 'member')
	549	self.int_subelements = ( 'bandwidth',)
	550	self.float_subelements = ( 'delay',)
	551	# The final data structure
	552	self.nodes = [ ]
	553	self.lans = [ ]
	554	self.topo = { \
	555	'node': self.nodes,\
	556	'lan' : self.lans,\
	557	}
	558	self.element = { } # Current element being created
	559	self.chars = "" # Last text seen
	560
	561	def end_element(self, name):
	562	# After each sub element the contents is added to the current
	563	# element or to the appropriate list.
	564	if name == 'node':
	565	self.nodes.append(self.element)
	566	self.element = { }
	567	elif name == 'lan':
	568	self.lans.append(self.element)
	569	self.element = { }
	570	elif name in self.str_subelements:
	571	self.element[name] = self.chars
	572	self.chars = ""
	573	elif name in self.int_subelements:
	574	self.element[name] = int(self.chars)
	575	self.chars = ""
	576	elif name in self.float_subelements:
	577	self.element[name] = float(self.chars)
	578	self.chars = ""
	579
	580	def found_chars(self, data):
	581	self.chars += data.rstrip()
	582
	583
	584	tp = topo_parse();
	585	parser = xml.parsers.expat.ParserCreate()
	586	parser.EndElementHandler = tp.end_element
	587	parser.CharacterDataHandler = tp.found_chars
	588
	589	parser.Parse(str)
	590
	591	return tp.topo
	592
[0d830de]	593
	594	def genviz(self, topo):
[866c983]	595	"""
	596	Generate the visualization the virtual topology
	597	"""
	598
	599	neato = "/usr/local/bin/neato"
	600	# These are used to parse neato output and to create the visualization
	601	# file.
[0ac1934]	602	vis_re = re.compile('^\s"?([\w\-]+)"?\s+\[.pos="([\d\.]+),([\d\.]+)"')
[866c983]	603	vis_fmt = "<node><name>%s</name><x>%s</x><y>%s</y><type>" + \
	604	"%s</type></node>"
	605
	606	try:
	607	# Node names
	608	nodes = [ n['vname'] for n in topo['node'] ]
	609	topo_lans = topo['lan']
[cc8d8e9]	610	except KeyError, e:
	611	raise service_error(service_error.internal, "Bad topology: %s" %e)
[866c983]	612
	613	lans = { }
	614	links = { }
	615
	616	# Walk through the virtual topology, organizing the connections into
	617	# 2-node connections (links) and more-than-2-node connections (lans).
	618	# When a lan is created, it's added to the list of nodes (there's a
	619	# node in the visualization for the lan).
	620	for l in topo_lans:
	621	if links.has_key(l['vname']):
	622	if len(links[l['vname']]) < 2:
	623	links[l['vname']].append(l['vnode'])
	624	else:
	625	nodes.append(l['vname'])
	626	lans[l['vname']] = links[l['vname']]
	627	del links[l['vname']]
	628	lans[l['vname']].append(l['vnode'])
	629	elif lans.has_key(l['vname']):
	630	lans[l['vname']].append(l['vnode'])
	631	else:
	632	links[l['vname']] = [ l['vnode'] ]
	633
	634
	635	# Open up a temporary file for dot to turn into a visualization
	636	try:
	637	df, dotname = tempfile.mkstemp()
	638	dotfile = os.fdopen(df, 'w')
[d3c8759]	639	except EnvironmentError:
[866c983]	640	raise service_error(service_error.internal,
	641	"Failed to open file in genviz")
	642
[db6b092]	643	try:
	644	dnull = open('/dev/null', 'w')
[d3c8759]	645	except EnvironmentError:
[db6b092]	646	service_error(service_error.internal,
[886307f]	647	"Failed to open /dev/null in genviz")
	648
[866c983]	649	# Generate a dot/neato input file from the links, nodes and lans
	650	try:
	651	print >>dotfile, "graph G {"
	652	for n in nodes:
	653	print >>dotfile, '\t"%s"' % n
	654	for l in links.keys():
	655	print >>dotfile, '\t"%s" -- "%s"' % tuple(links[l])
	656	for l in lans.keys():
	657	for n in lans[l]:
	658	print >>dotfile, '\t "%s" -- "%s"' % (n,l)
	659	print >>dotfile, "}"
	660	dotfile.close()
	661	except TypeError:
	662	raise service_error(service_error.internal,
	663	"Single endpoint link in vtopo")
[d3c8759]	664	except EnvironmentError:
[866c983]	665	raise service_error(service_error.internal, "Cannot write dot file")
	666
	667	# Use dot to create a visualization
[5954004]	668	try:
	669	dot = Popen([neato, '-Gstart=rand', '-Gepsilon=0.005',
	670	'-Gmaxiter=2000', '-Gpack=true', dotname], stdout=PIPE,
	671	stderr=dnull, close_fds=True)
	672	except EnvironmentError:
	673	raise service_error(service_error.internal,
	674	"Cannot generate visualization: is graphviz available?")
[db6b092]	675	dnull.close()
[866c983]	676
	677	# Translate dot to vis format
	678	vis_nodes = [ ]
	679	vis = { 'node': vis_nodes }
	680	for line in dot.stdout:
	681	m = vis_re.match(line)
	682	if m:
	683	vn = m.group(1)
	684	vis_node = {'name': vn, \
	685	'x': float(m.group(2)),\
	686	'y' : float(m.group(3)),\
	687	}
	688	if vn in links.keys() or vn in lans.keys():
	689	vis_node['type'] = 'lan'
	690	else:
	691	vis_node['type'] = 'node'
	692	vis_nodes.append(vis_node)
	693	rv = dot.wait()
	694
	695	os.remove(dotname)
[1fed67b]	696	# XXX: graphviz seems to use low return codes for warnings, like
	697	# "couldn't find font"
	698	if rv < 2 : return vis
[866c983]	699	else: return None
[d0ae12d]	700
[db6b092]	701
[725c55d]	702	def release_access(self, tb, aid, tbmap=None, uri=None, cert_file=None,
	703	cert_pwd=None):
[e19b75c]	704	"""
	705	Release access to testbed through fedd
	706	"""
[db6b092]	707
[fd07c48]	708	if not uri and tbmap:
	709	uri = tbmap.get(tb, None)
[e19b75c]	710	if not uri:
[69692a9]	711	raise service_error(service_error.server_config,
[e19b75c]	712	"Unknown testbed: %s" % tb)
[db6b092]	713
[e19b75c]	714	if self.local_access.has_key(uri):
	715	resp = self.local_access[uri].ReleaseAccess(\
[29d5f7c]	716	{ 'ReleaseAccessRequestBody' :
	717	{'allocID': {'fedid': aid}},},
[725c55d]	718	fedid(file=cert_file))
[e19b75c]	719	resp = { 'ReleaseAccessResponseBody': resp }
	720	else:
[29d5f7c]	721	resp = self.call_ReleaseAccess(uri, {'allocID': {'fedid': aid} },
[725c55d]	722	cert_file, cert_pwd, self.trusted_certs)
[db6b092]	723
[e19b75c]	724	# better error coding
[db6b092]	725
[5f6929a]	726	def remote_ns2topdl(self, uri, desc):
[db6b092]	727
[e19b75c]	728	req = {
	729	'description' : { 'ns2description': desc },
[db6b092]	730	}
	731
[5f6929a]	732	r = self.call_Ns2Topdl(uri, req, self.cert_file, self.cert_pwd,
[e19b75c]	733	self.trusted_certs)
	734
[5f6929a]	735	if r.has_key('Ns2TopdlResponseBody'):
	736	r = r['Ns2TopdlResponseBody']
[1dcaff4]	737	ed = r.get('experimentdescription', None)
	738	if ed.has_key('topdldescription'):
	739	return topdl.Topology(**ed['topdldescription'])
[e19b75c]	740	else:
	741	raise service_error(service_error.protocol,
	742	"Bad splitter response (no output)")
	743	else:
	744	raise service_error(service_error.protocol, "Bad splitter response")
[cc8d8e9]	745
[e19b75c]	746	class start_segment:
[fd556d1]	747	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[f07fa49]	748	cert_pwd=None, trusted_certs=None, caller=None,
	749	log_collector=None):
[cc8d8e9]	750	self.log = log
	751	self.debug = debug
	752	self.cert_file = cert_file
	753	self.cert_pwd = cert_pwd
	754	self.trusted_certs = None
	755	self.caller = caller
[fd556d1]	756	self.testbed = testbed
[f07fa49]	757	self.log_collector = log_collector
[69692a9]	758	self.response = None
[b4b19c7]	759	self.node = { }
[9e5e251]	760	self.subs = { }
[e83f2f2]	761	self.proof = None
[b4b19c7]	762
	763	def make_map(self, resp):
[29d5f7c]	764	if 'segmentdescription' not in resp or \
	765	'topdldescription' not in resp['segmentdescription']:
	766	self.log.warn('No topology returned from startsegment')
	767	return
	768
	769	top = topdl.Topology(
	770	**resp['segmentdescription']['topdldescription'])
	771
	772	for e in top.elements:
	773	if isinstance(e, topdl.Computer):
[1ae1aa2]	774	self.node[e.name] = e
[9e5e251]	775	for s in top.substrates:
	776	self.subs[s.name] = s
[cc8d8e9]	777
[43197eb]	778	def __call__(self, uri, aid, topo, masters, attrs=None, connInfo=None):
[cc8d8e9]	779	req = {
	780	'allocID': { 'fedid' : aid },
	781	'segmentdescription': {
	782	'topdldescription': topo.to_dict(),
	783	},
	784	}
[e02cd14]	785
	786	if connInfo:
	787	req['connection'] = connInfo
[43197eb]	788
	789	import_svcs = [ s for m in masters.values() \
	790	for s in m if self.testbed in s.importers]
	791
	792	if import_svcs or self.testbed in masters:
	793	req['service'] = []
	794
	795	for s in import_svcs:
	796	for r in s.reqs:
	797	sr = copy.deepcopy(r)
	798	sr['visibility'] = 'import';
	799	req['service'].append(sr)
	800
	801	for s in masters.get(self.testbed, []):
	802	for r in s.reqs:
	803	sr = copy.deepcopy(r)
	804	sr['visibility'] = 'export';
	805	req['service'].append(sr)
	806
[6c57fe9]	807	if attrs:
	808	req['fedAttr'] = attrs
[cc8d8e9]	809
[fd556d1]	810	try:
[13e3dd2]	811	self.log.debug("Calling StartSegment at %s " % uri)
[fd556d1]	812	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	813	self.trusted_certs)
[f07fa49]	814	if r.has_key('StartSegmentResponseBody'):
	815	lval = r['StartSegmentResponseBody'].get('allocationLog',
	816	None)
	817	if lval and self.log_collector:
	818	for line in lval.splitlines(True):
	819	self.log_collector.write(line)
[b4b19c7]	820	self.make_map(r['StartSegmentResponseBody'])
[e83f2f2]	821	if 'proof' in r: self.proof = r['proof']
[69692a9]	822	self.response = r
[f07fa49]	823	else:
	824	raise service_error(service_error.internal,
	825	"Bad response!?: %s" %r)
[fd556d1]	826	return True
	827	except service_error, e:
	828	self.log.error("Start segment failed on %s: %s" % \
	829	(self.testbed, e))
	830	return False
[cc8d8e9]	831
	832
[5ae3857]	833
[e19b75c]	834	class terminate_segment:
[fd556d1]	835	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[5ae3857]	836	cert_pwd=None, trusted_certs=None, caller=None):
	837	self.log = log
	838	self.debug = debug
	839	self.cert_file = cert_file
	840	self.cert_pwd = cert_pwd
	841	self.trusted_certs = None
	842	self.caller = caller
[fd556d1]	843	self.testbed = testbed
[5ae3857]	844
	845	def __call__(self, uri, aid ):
	846	req = {
[29d5f7c]	847	'allocID': {'fedid': aid },
[5ae3857]	848	}
[a69de97]	849	self.log.info("Calling terminate segment")
[fd556d1]	850	try:
	851	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	852	self.trusted_certs)
[a69de97]	853	self.log.info("Terminate segment succeeded")
[fd556d1]	854	return True
	855	except service_error, e:
	856	self.log.error("Terminate segment failed on %s: %s" % \
	857	(self.testbed, e))
	858	return False
[db6b092]	859
[6e33086]	860	class info_segment(start_segment):
	861	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
	862	cert_pwd=None, trusted_certs=None, caller=None,
	863	log_collector=None):
	864	experiment_control_local.start_segment.__init__(self, debug,
	865	log, testbed, cert_file, cert_pwd, trusted_certs,
	866	caller, log_collector)
	867
	868	def __call__(self, uri, aid):
	869	req = { 'allocID': { 'fedid' : aid } }
	870
	871	try:
	872	self.log.debug("Calling InfoSegment at %s " % uri)
	873	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	874	self.trusted_certs)
	875	if r.has_key('InfoSegmentResponseBody'):
	876	self.make_map(r['InfoSegmentResponseBody'])
	877	if 'proof' in r: self.proof = r['proof']
	878	self.response = r
	879	else:
	880	raise service_error(service_error.internal,
	881	"Bad response!?: %s" %r)
	882	return True
	883	except service_error, e:
	884	self.log.error("Info segment failed on %s: %s" % \
	885	(self.testbed, e))
	886	return False
	887
[22a1a77]	888	class operation_segment:
	889	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
	890	cert_pwd=None, trusted_certs=None, caller=None,
	891	log_collector=None):
	892	self.log = log
	893	self.debug = debug
	894	self.cert_file = cert_file
	895	self.cert_pwd = cert_pwd
	896	self.trusted_certs = None
	897	self.caller = caller
	898	self.testbed = testbed
	899	self.status = None
	900
	901	def __call__(self, uri, aid, op, targets, params):
	902	req = {
	903	'allocID': { 'fedid' : aid },
	904	'operation': op,
	905	'target': targets,
	906	}
	907	if params: req['parameter'] = params
	908
	909
	910	try:
	911	self.log.debug("Calling OperationSegment at %s " % uri)
	912	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	913	self.trusted_certs)
	914	if 'OperationSegmentResponseBody' in r:
	915	r = r['OperationSegmentResponseBody']
	916	if 'status' in r:
	917	self.status = r['status']
	918	else:
	919	raise service_error(service_error.internal,
	920	"Bad response!?: %s" %r)
	921	return True
	922	except service_error, e:
	923	self.log.error("Operation segment failed on %s: %s" % \
	924	(self.testbed, e))
	925	return False
	926
[6e33086]	927	def annotate_topology(self, top, data):
[f671ef7]	928	# These routines do various parts of the annotation
	929	def add_new_names(nl, l):
	930	""" add any names in nl to the list in l """
	931	for n in nl:
	932	if n not in l: l.append(n)
	933
	934	def merge_services(ne, e):
	935	for ns in ne.service:
	936	# NB: the else is on the for
	937	for s in e.service:
	938	if ns.name == s.name:
	939	s.importer = ns.importer
	940	s.param = ns.param
	941	s.description = ns.description
	942	s.status = ns.status
	943	break
	944	else:
	945	e.service.append(ns)
	946
	947	def merge_oses(ne, e):
	948	"""
	949	Merge the operating system entries of ne into e
	950	"""
	951	for nos in ne.os:
	952	# NB: the else is on the for
	953	for os in e.os:
[7aaa8dc]	954	if nos.name == os.name:
	955	os.version = nos.version
	956	os.version = nos.distribution
	957	os.version = nos.distributionversion
[f671ef7]	958	for a in nos.attribute:
[db3da0b]	959	if os.get_attribute(a.attribute):
	960	os.remove_attribute(a.attribute)
[7aaa8dc]	961	os.set_attribute(a.attribute, a.value)
[f671ef7]	962	break
	963	else:
[db3da0b]	964	# If both nodes have one OS, this is a replacement
	965	if len(ne.os) == 1 and len(e.os) == 1: e.os = ne.os
	966	else: e.os.append(nos)
[6e33086]	967
	968	# Annotate the topology with embedding info
	969	for e in top.elements:
	970	if isinstance(e, topdl.Computer):
	971	for s in data:
[f671ef7]	972	ne = s.node.get(e.name, None)
	973	if ne is not None:
	974	add_new_names(ne.localname, e.localname)
	975	e.status = ne.status
	976	merge_services(ne, e)
	977	add_new_names(ne.operation, e.operation)
	978	if ne.os: merge_oses(ne, e)
[6e33086]	979	break
[9e5e251]	980	# Annotate substrates
	981	for s in top.substrates:
	982	for d in data:
	983	ss = d.subs.get(s.name, None)
	984	if ss is not None:
	985	if ss.capacity is not None:
	986	s.capacity = ss.capacity
	987	if s.latency is not None:
	988	s.latency = ss.latency
[6e33086]	989
	990
	991
[43197eb]	992	def allocate_resources(self, allocated, masters, eid, expid,
[b4b19c7]	993	tbparams, top, topo, tmpdir, alloc_log=None, log_collector=None,
[c573278]	994	attrs=None, connInfo={}, tbmap=None, expcert=None):
[69692a9]	995
[cc8d8e9]	996	started = { } # Testbeds where a sub-experiment started
	997	# successfully
	998
	999	# XXX
	1000	fail_soft = False
	1001
[fd07c48]	1002	if tbmap is None: tbmap = { }
	1003
[cc8d8e9]	1004	log = alloc_log or self.log
	1005
[faea607]	1006	tp = thread_pool(self.nthreads)
[cc8d8e9]	1007	threads = [ ]
[b4b19c7]	1008	starters = [ ]
[cc8d8e9]	1009
[c573278]	1010	if expcert:
	1011	cert = expcert
	1012	pw = None
	1013	else:
	1014	cert = self.cert_file
[822d31b]	1015	pw = self.cert_pwd
[c573278]	1016
[109a32a]	1017	for tb in allocated.keys():
	1018	# Create and start a thread to start the segment, and save it
	1019	# to get the return value later
[ab847bc]	1020	tb_attrs = copy.copy(attrs)
[faea607]	1021	tp.wait_for_slot()
[9294673]	1022	uri = tbparams[tb].uri or tbmap.get(testbed_base(tb), None)
[ab847bc]	1023	base, suffix = split_testbed(tb)
	1024	if suffix:
	1025	tb_attrs.append({'attribute': 'experiment_name',
[175b444]	1026	'value': "%s-%s" % (eid, suffix)})
[ab847bc]	1027	else:
	1028	tb_attrs.append({'attribute': 'experiment_name', 'value': eid})
[109a32a]	1029	if not uri:
	1030	raise service_error(service_error.internal,
	1031	"Unknown testbed %s !?" % tb)
	1032
[9294673]	1033	aid = tbparams[tb].allocID
	1034	if not aid:
[cc8d8e9]	1035	raise service_error(service_error.internal,
	1036	"No alloc id for testbed %s !?" % tb)
	1037
[b4b19c7]	1038	s = self.start_segment(log=log, debug=self.debug,
[c573278]	1039	testbed=tb, cert_file=cert,
	1040	cert_pwd=pw, trusted_certs=self.trusted_certs,
[b4b19c7]	1041	caller=self.call_StartSegment,
	1042	log_collector=log_collector)
	1043	starters.append(s)
[faea607]	1044	t = pooled_thread(\
[b4b19c7]	1045	target=s, name=tb,
[ab847bc]	1046	args=(uri, aid, topo[tb], masters, tb_attrs, connInfo[tb]),
[faea607]	1047	pdata=tp, trace_file=self.trace_file)
[69692a9]	1048	threads.append(t)
	1049	t.start()
[cc8d8e9]	1050
[109a32a]	1051	# Wait until all finish (keep pinging the log, though)
	1052	mins = 0
[dadc4da]	1053	revoked = False
[faea607]	1054	while not tp.wait_for_all_done(60.0):
[109a32a]	1055	mins += 1
	1056	alloc_log.info("Waiting for sub threads (it has been %d mins)" \
	1057	% mins)
[dadc4da]	1058	if not revoked and \
[f52f5df]	1059	len([ t.getName() for t in threads if t.rv == False]) > 0:
[dadc4da]	1060	# a testbed has failed. Revoke this experiment's
	1061	# synchronizarion values so that sub experiments will not
	1062	# deadlock waiting for synchronization that will never happen
	1063	self.log.info("A subexperiment has failed to swap in, " + \
	1064	"revoking synch keys")
	1065	var_key = "fedid:%s" % expid
	1066	for k in self.synch_store.all_keys():
	1067	if len(k) > 45 and k[0:46] == var_key:
	1068	self.synch_store.revoke_key(k)
	1069	revoked = True
[69692a9]	1070
[cc8d8e9]	1071	failed = [ t.getName() for t in threads if not t.rv ]
	1072	succeeded = [tb for tb in allocated.keys() if tb not in failed]
[3132419]	1073
[cc8d8e9]	1074	# If one failed clean up, unless fail_soft is set
[32e7d93]	1075	if failed:
[cc8d8e9]	1076	if not fail_soft:
[faea607]	1077	tp.clear()
[cc8d8e9]	1078	for tb in succeeded:
	1079	# Create and start a thread to stop the segment
[faea607]	1080	tp.wait_for_slot()
[9294673]	1081	uri = tbparams[tb].uri
[faea607]	1082	t = pooled_thread(\
[32e7d93]	1083	target=self.terminate_segment(log=log,
[fd556d1]	1084	testbed=tb,
[696a689]	1085	cert_file=cert,
	1086	cert_pwd=pw,
[32e7d93]	1087	trusted_certs=self.trusted_certs,
	1088	caller=self.call_TerminateSegment),
[9294673]	1089	args=(uri, tbparams[tb].allocID),
[32e7d93]	1090	name=tb,
[faea607]	1091	pdata=tp, trace_file=self.trace_file)
[cc8d8e9]	1092	t.start()
[f52f5df]	1093	# Wait until all finish (if any are being stopped)
	1094	if succeeded:
[faea607]	1095	tp.wait_for_all_done()
[cc8d8e9]	1096
	1097	# release the allocations
	1098	for tb in tbparams.keys():
[725c55d]	1099	try:
[9294673]	1100	self.release_access(tb, tbparams[tb].allocID,
	1101	tbmap=tbmap, uri=tbparams[tb].uri,
[696a689]	1102	cert_file=cert, cert_pwd=pw)
[725c55d]	1103	except service_error, e:
	1104	self.log.warn("Error releasing access: %s" % e.desc)
[cc8d8e9]	1105	# Remove the placeholder
	1106	self.state_lock.acquire()
[29d5f7c]	1107	self.state[eid].status = 'failed'
[6e33086]	1108	self.state[eid].updated()
[cc8d8e9]	1109	if self.state_filename: self.write_state()
	1110	self.state_lock.release()
[05e8da8]	1111	# Remove the repo dir
	1112	self.remove_dirs("%s/%s" %(self.repodir, expid))
	1113	# Walk up tmpdir, deleting as we go
	1114	if self.cleanup:
	1115	self.remove_dirs(tmpdir)
	1116	else:
	1117	log.debug("[start_experiment]: not removing %s" % tmpdir)
	1118
[cc8d8e9]	1119
	1120	log.error("Swap in failed on %s" % ",".join(failed))
	1121	return
	1122	else:
[29d5f7c]	1123	# Walk through the successes and gather the proofs
[e83f2f2]	1124	proofs = { }
[b4b19c7]	1125	for s in starters:
[e83f2f2]	1126	if s.proof:
	1127	proofs[s.testbed] = s.proof
[6e33086]	1128	self.annotate_topology(top, starters)
[cc8d8e9]	1129	log.info("[start_segment]: Experiment %s active" % eid)
	1130
	1131
	1132	# Walk up tmpdir, deleting as we go
[69692a9]	1133	if self.cleanup:
[05e8da8]	1134	self.remove_dirs(tmpdir)
[69692a9]	1135	else:
	1136	log.debug("[start_experiment]: not removing %s" % tmpdir)
[cc8d8e9]	1137
[b4b19c7]	1138	# Insert the experiment into our state and update the disk copy.
[cc8d8e9]	1139	self.state_lock.acquire()
[29d5f7c]	1140	self.state[expid].status = 'active'
[cc8d8e9]	1141	self.state[eid] = self.state[expid]
[29d5f7c]	1142	self.state[eid].top = top
[6e33086]	1143	self.state[eid].updated()
[e83f2f2]	1144	# Append startup proofs
[29d5f7c]	1145	for f in self.state[eid].get_all_allocations():
	1146	if f.tb in proofs:
	1147	f.proof.append(proofs[f.tb])
[e83f2f2]	1148
[cc8d8e9]	1149	if self.state_filename: self.write_state()
	1150	self.state_lock.release()
	1151	return
	1152
	1153
[895a133]	1154	def add_kit(self, e, kit):
	1155	"""
	1156	Add a Software object created from the list of (install, location)
	1157	tuples passed as kit to the software attribute of an object e. We
	1158	do this enough to break out the code, but it's kind of a hack to
	1159	avoid changing the old tuple rep.
	1160	"""
	1161
	1162	s = [ topdl.Software(install=i, location=l) for i, l in kit]
	1163
	1164	if isinstance(e.software, list): e.software.extend(s)
	1165	else: e.software = s
	1166
[913dc7a]	1167	def append_experiment_authorization(self, expid, attrs,
	1168	need_state_lock=True):
	1169	"""
	1170	Append the authorization information to system state
	1171	"""
	1172
	1173	for p, a in attrs:
	1174	self.auth.set_attribute(p, a)
	1175	self.auth.save()
	1176
	1177	if need_state_lock: self.state_lock.acquire()
[29d5f7c]	1178	# XXX: really a no op?
	1179	#self.state[expid]['auth'].update(attrs)
[913dc7a]	1180	if self.state_filename: self.write_state()
	1181	if need_state_lock: self.state_lock.release()
	1182
[a96d946]	1183	def clear_experiment_authorization(self, expid, need_state_lock=True):
[913dc7a]	1184	"""
	1185	Attrs is a set of attribute principal pairs that need to be removed
	1186	from the authenticator. Remove them and save the authenticator.
	1187	"""
	1188
	1189	if need_state_lock: self.state_lock.acquire()
[29d5f7c]	1190	# XXX: should be a no-op
	1191	#if expid in self.state and 'auth' in self.state[expid]:
	1192	#for p, a in self.state[expid]['auth']:
	1193	#self.auth.unset_attribute(p, a)
	1194	#self.state[expid]['auth'] = set()
[913dc7a]	1195	if self.state_filename: self.write_state()
	1196	if need_state_lock: self.state_lock.release()
[b67fd22]	1197	self.auth.save()
[913dc7a]	1198
[895a133]	1199
[b4b19c7]	1200	def create_experiment_state(self, fid, req, expid, expcert,
[a3ad8bd]	1201	state='starting'):
[895a133]	1202	"""
	1203	Create the initial entry in the experiment's state. The expid and
	1204	expcert are the experiment's fedid and certifacte that represents that
	1205	ID, which are installed in the experiment state. If the request
	1206	includes a suggested local name that is used if possible. If the local
	1207	name is already taken by an experiment owned by this user that has
[a3ad8bd]	1208	failed, it is overwritten. Otherwise new letters are added until a
[895a133]	1209	valid localname is found. The generated local name is returned.
	1210	"""
	1211
	1212	if req.has_key('experimentID') and \
	1213	req['experimentID'].has_key('localname'):
	1214	overwrite = False
	1215	eid = req['experimentID']['localname']
	1216	# If there's an old failed experiment here with the same local name
	1217	# and accessible by this user, we'll overwrite it, otherwise we'll
	1218	# fall through and do the collision avoidance.
	1219	old_expid = self.get_experiment_fedid(eid)
[74572ba]	1220	if old_expid:
	1221	users_experiment = True
	1222	try:
	1223	self.check_experiment_access(fid, old_expid)
	1224	except service_error, e:
	1225	if e.code == service_error.access: users_experiment = False
	1226	else: raise e
	1227	if users_experiment:
	1228	self.state_lock.acquire()
[29d5f7c]	1229	status = self.state[eid].status
[74572ba]	1230	if status and status == 'failed':
	1231	# remove the old access attributes
	1232	self.clear_experiment_authorization(eid,
	1233	need_state_lock=False)
	1234	overwrite = True
	1235	del self.state[eid]
	1236	del self.state[old_expid]
	1237	self.state_lock.release()
[6031c9d]	1238	else:
	1239	self.log.info('Experiment %s exists, ' % eid + \
	1240	'but this user cannot access it')
[895a133]	1241	self.state_lock.acquire()
	1242	while (self.state.has_key(eid) and not overwrite):
	1243	eid += random.choice(string.ascii_letters)
	1244	# Initial state
[29d5f7c]	1245	self.state[eid] = experiment_info(fedid=expid, localname=eid,
	1246	identity=expcert)
[895a133]	1247	self.state[expid] = self.state[eid]
[913dc7a]	1248	if self.state_filename: self.write_state()
	1249	self.state_lock.release()
[895a133]	1250	else:
	1251	eid = self.exp_stem
	1252	for i in range(0,5):
	1253	eid += random.choice(string.ascii_letters)
	1254	self.state_lock.acquire()
	1255	while (self.state.has_key(eid)):
	1256	eid = self.exp_stem
	1257	for i in range(0,5):
	1258	eid += random.choice(string.ascii_letters)
	1259	# Initial state
[29d5f7c]	1260	self.state[eid] = experiment_info(fedid=expid, localname=eid,
	1261	identity=expcert)
[895a133]	1262	self.state[expid] = self.state[eid]
[913dc7a]	1263	if self.state_filename: self.write_state()
	1264	self.state_lock.release()
	1265
	1266	# Let users touch the state. Authorize this fid and the expid itself
	1267	# to touch the experiment, as well as allowing th eoverrides.
	1268	self.append_experiment_authorization(eid,
	1269	set([(fid, expid), (expid,expid)] + \
	1270	[ (o, expid) for o in self.overrides]))
[895a133]	1271
	1272	return eid
	1273
	1274
	1275	def allocate_ips_to_topo(self, top):
	1276	"""
[69692a9]	1277	Add an ip4_address attribute to all the hosts in the topology, based on
[895a133]	1278	the shared substrates on which they sit. An /etc/hosts file is also
[69692a9]	1279	created and returned as a list of hostfiles entries. We also return
	1280	the allocator, because we may need to allocate IPs to portals
	1281	(specifically DRAGON portals).
[895a133]	1282	"""
	1283	subs = sorted(top.substrates,
	1284	cmp=lambda x,y: cmp(len(x.interfaces), len(y.interfaces)),
	1285	reverse=True)
	1286	ips = ip_allocator(int(ip_addr("10.0.0.0")), 2 **24)
	1287	ifs = { }
	1288	hosts = [ ]
	1289
	1290	for idx, s in enumerate(subs):
[289ff7e]	1291	net_size = len(s.interfaces)+2
	1292
	1293	a = ips.allocate(net_size)
[895a133]	1294	if a :
	1295	base, num = a
[289ff7e]	1296	if num < net_size:
[895a133]	1297	raise service_error(service_error.internal,
	1298	"Allocator returned wrong number of IPs??")
	1299	else:
	1300	raise service_error(service_error.req,
	1301	"Cannot allocate IP addresses")
[062b991]	1302	mask = ips.min_alloc
	1303	while mask < net_size:
	1304	mask *= 2
[289ff7e]	1305
[062b991]	1306	netmask = ((2**32-1) ^ (mask-1))
[895a133]	1307
	1308	base += 1
	1309	for i in s.interfaces:
	1310	i.attribute.append(
	1311	topdl.Attribute('ip4_address',
	1312	"%s" % ip_addr(base)))
[289ff7e]	1313	i.attribute.append(
	1314	topdl.Attribute('ip4_netmask',
	1315	"%s" % ip_addr(int(netmask))))
	1316
[1e7f268]	1317	hname = i.element.name
[895a133]	1318	if ifs.has_key(hname):
	1319	hosts.append("%s\t%s-%s %s-%d" % \
	1320	(ip_addr(base), hname, s.name, hname,
	1321	ifs[hname]))
	1322	else:
	1323	ifs[hname] = 0
	1324	hosts.append("%s\t%s-%s %s-%d %s" % \
	1325	(ip_addr(base), hname, s.name, hname,
	1326	ifs[hname], hname))
	1327
	1328	ifs[hname] += 1
	1329	base += 1
[69692a9]	1330	return hosts, ips
[895a133]	1331
[1d73342]	1332	def get_access_to_testbeds(self, testbeds, fid, allocated,
[725c55d]	1333	tbparam, masters, tbmap, expid=None, expcert=None):
[6e63513]	1334	for tb in testbeds:
[1d73342]	1335	self.get_access(tb, tbparam, fid, masters, tbmap, expid,
[c573278]	1336	expcert)
[6e63513]	1337	allocated[tb] = 1
	1338
[1d73342]	1339	def get_access(self, tb, tbparam,fid, masters, tbmap, expid=None,
	1340	expcert=None):
[6e63513]	1341	"""
	1342	Get access to testbed through fedd and set the parameters for that tb
	1343	"""
	1344	def get_export_project(svcs):
	1345	"""
	1346	Look through for the list of federated_service for this testbed
	1347	objects for a project_export service, and extract the project
	1348	parameter.
	1349	"""
	1350
	1351	pe = [s for s in svcs if s.name=='project_export']
	1352	if len(pe) == 1:
	1353	return pe[0].params.get('project', None)
	1354	elif len(pe) == 0:
	1355	return None
	1356	else:
	1357	raise service_error(service_error.req,
	1358	"More than one project export is not supported")
	1359
[d0912be]	1360	def add_services(svcs, type, slist, keys):
[63c6664]	1361	"""
[d0912be]	1362	Add the given services to slist. type is import or export. Also
	1363	add a mapping entry from the assigned id to the original service
	1364	record.
[63c6664]	1365	"""
	1366	for i, s in enumerate(svcs):
	1367	idx = '%s%d' % (type, i)
[d0912be]	1368	keys[idx] = s
[63c6664]	1369	sr = {'id': idx, 'name': s.name, 'visibility': type }
	1370	if s.params:
	1371	sr['fedAttr'] = [ { 'attribute': k, 'value': v } \
	1372	for k, v in s.params.items()]
	1373	slist.append(sr)
	1374
[6e63513]	1375	uri = tbmap.get(testbed_base(tb), None)
	1376	if not uri:
	1377	raise service_error(service_error.server_config,
	1378	"Unknown testbed: %s" % tb)
	1379
	1380	export_svcs = masters.get(tb,[])
	1381	import_svcs = [ s for m in masters.values() \
	1382	for s in m \
	1383	if tb in s.importers ]
	1384
	1385	export_project = get_export_project(export_svcs)
	1386	# Compose the credential list so that IDs come before attributes
	1387	creds = set()
	1388	keys = set()
[c573278]	1389	certs = self.auth.get_creds_for_principal(fid)
	1390	if expid:
	1391	certs.update(self.auth.get_creds_for_principal(expid))
	1392	for c in certs:
[6e63513]	1393	keys.add(c.issuer_cert())
	1394	creds.add(c.attribute_cert())
	1395	creds = list(keys) + list(creds)
	1396
[c573278]	1397	if expcert: cert, pw = expcert, None
	1398	else: cert, pw = self.cert_file, self.cert_pw
	1399
[6e63513]	1400	# Request credentials
	1401	req = {
	1402	'abac_credential': creds,
	1403	}
	1404	# Make the service request from the services we're importing and
	1405	# exporting. Keep track of the export request ids so we can
	1406	# collect the resulting info from the access response.
	1407	e_keys = { }
	1408	if import_svcs or export_svcs:
[63c6664]	1409	slist = []
[d0912be]	1410	add_services(import_svcs, 'import', slist, e_keys)
	1411	add_services(export_svcs, 'export', slist, e_keys)
[63c6664]	1412	req['service'] = slist
[6e63513]	1413
	1414	if self.local_access.has_key(uri):
	1415	# Local access call
	1416	req = { 'RequestAccessRequestBody' : req }
	1417	r = self.local_access[uri].RequestAccess(req,
	1418	fedid(file=self.cert_file))
	1419	r = { 'RequestAccessResponseBody' : r }
	1420	else:
[c573278]	1421	r = self.call_RequestAccess(uri, req, cert, pw, self.trusted_certs)
[6e63513]	1422
[725c55d]	1423	if r.has_key('RequestAccessResponseBody'):
	1424	# Through to here we have a valid response, not a fault.
	1425	# Access denied is a fault, so something better or worse than
	1426	# access denied has happened.
	1427	r = r['RequestAccessResponseBody']
	1428	self.log.debug("[get_access] Access granted")
	1429	else:
	1430	raise service_error(service_error.protocol,
	1431	"Bad proxy response")
[e83f2f2]	1432	if 'proof' not in r:
	1433	raise service_error(service_error.protocol,
	1434	"Bad access response (no access proof)")
[ab3d6c5]	1435
[9294673]	1436	tbparam[tb] = allocation_info(allocID=r['allocID'].get('fedid', None),
[ab3d6c5]	1437	tb=tb, uri=uri, proof=[r['proof']],
	1438	services=masters.get(tb, None))
[6e63513]	1439
	1440	# Collect the responses corresponding to the services this testbed
	1441	# exports. These will be the service requests that we will include in
	1442	# the start segment requests (with appropriate visibility values) to
	1443	# import and export the segments.
	1444	for s in r.get('service', []):
	1445	id = s.get('id', None)
[ab3d6c5]	1446	# Note that this attaches the response to the object in the masters
	1447	# data structure. (The e_keys index disappears when this fcn
	1448	# returns)
[6e63513]	1449	if id and id in e_keys:
	1450	e_keys[id].reqs.append(s)
	1451
	1452	# Add attributes to parameter space. We don't allow attributes to
	1453	# overlay any parameters already installed.
	1454	for a in r.get('fedAttr', []):
	1455	try:
[9294673]	1456	if a['attribute']:
	1457	tbparam[tb].set_attribute(a['attribute'], a['value'])
[6e63513]	1458	except KeyError:
	1459	self.log.error("Bad attribute in response: %s" % a)
	1460
	1461
[7fe81be]	1462	def split_topology(self, top, topo, testbeds):
[895a133]	1463	"""
[e02cd14]	1464	Create the sub-topologies that are needed for experiment instantiation.
[895a133]	1465	"""
	1466	for tb in testbeds:
	1467	topo[tb] = top.clone()
[7fe81be]	1468	# copy in for loop allows deletions from the original
	1469	for e in [ e for e in topo[tb].elements]:
[895a133]	1470	etb = e.get_attribute('testbed')
[7fe81be]	1471	# NB: elements without a testbed attribute won't appear in any
	1472	# sub topologies.
	1473	if not etb or etb != tb:
[895a133]	1474	for i in e.interface:
	1475	for s in i.subs:
	1476	try:
	1477	s.interfaces.remove(i)
	1478	except ValueError:
	1479	raise service_error(service_error.internal,
	1480	"Can't remove interface??")
[7fe81be]	1481	topo[tb].elements.remove(e)
[895a133]	1482	topo[tb].make_indices()
	1483
[2627eb3]	1484	def confirm_software(self, top):
	1485	"""
	1486	Make sure that the software to be loaded in the topo is all available
	1487	before we begin making access requests, etc. This is a subset of
	1488	wrangle_software.
	1489	"""
	1490	pkgs = set([ d for i, d in self.fedkit + self.gatewaykit ])
	1491	pkgs.update([x.location for e in top.elements for x in e.software])
	1492
	1493	for pkg in pkgs:
	1494	loc = pkg
	1495
	1496	scheme, host, path = urlparse(loc)[0:3]
	1497	dest = os.path.basename(path)
	1498	if not scheme:
	1499	if not loc.startswith('/'):
	1500	loc = "/%s" % loc
	1501	loc = "file://%s" %loc
	1502	# NB: if scheme was found, loc == pkg
	1503	try:
	1504	u = urlopen(loc)
	1505	u.close()
	1506	except Exception, e:
	1507	raise service_error(service_error.req,
	1508	"Cannot open %s: %s" % (loc, e))
	1509	return True
	1510
[895a133]	1511	def wrangle_software(self, expid, top, topo, tbparams):
	1512	"""
	1513	Copy software out to the repository directory, allocate permissions and
	1514	rewrite the segment topologies to look for the software in local
	1515	places.
	1516	"""
	1517
	1518	# Copy the rpms and tarfiles to a distribution directory from
	1519	# which the federants can retrieve them
	1520	linkpath = "%s/software" % expid
	1521	softdir ="%s/%s" % ( self.repodir, linkpath)
	1522	softmap = { }
[2627eb3]	1523
	1524	# self.fedkit and self.gateway kit are lists of tuples of
	1525	# (install_location, download_location) this extracts the download
	1526	# locations.
	1527	pkgs = set([ d for i, d in self.fedkit + self.gatewaykit ])
	1528	pkgs.update([x.location for e in top.elements for x in e.software])
[895a133]	1529	try:
	1530	os.makedirs(softdir)
[d3c8759]	1531	except EnvironmentError, e:
[895a133]	1532	raise service_error(
	1533	"Cannot create software directory: %s" % e)
	1534	# The actual copying. Everything's converted into a url for copying.
[913dc7a]	1535	auth_attrs = set()
[895a133]	1536	for pkg in pkgs:
	1537	loc = pkg
	1538
	1539	scheme, host, path = urlparse(loc)[0:3]
	1540	dest = os.path.basename(path)
	1541	if not scheme:
	1542	if not loc.startswith('/'):
	1543	loc = "/%s" % loc
	1544	loc = "file://%s" %loc
[2627eb3]	1545	# NB: if scheme was found, loc == pkg
[895a133]	1546	try:
	1547	u = urlopen(loc)
	1548	except Exception, e:
	1549	raise service_error(service_error.req,
	1550	"Cannot open %s: %s" % (loc, e))
	1551	try:
	1552	f = open("%s/%s" % (softdir, dest) , "w")
	1553	self.log.debug("Writing %s/%s" % (softdir,dest) )
	1554	data = u.read(4096)
	1555	while data:
	1556	f.write(data)
	1557	data = u.read(4096)
	1558	f.close()
	1559	u.close()
	1560	except Exception, e:
	1561	raise service_error(service_error.internal,
	1562	"Could not copy %s: %s" % (loc, e))
	1563	path = re.sub("/tmp", "", linkpath)
	1564	# XXX
	1565	softmap[pkg] = \
[7183b48]	1566	"%s/%s/%s" %\
	1567	( self.repo_url, path, dest)
[895a133]	1568
[913dc7a]	1569	# Allow the individual segments to access the software by assigning
	1570	# an attribute to each testbed allocation that encodes the data to
	1571	# be released. This expression collects the data for each run of
	1572	# the loop.
	1573	auth_attrs.update([
[9294673]	1574	(tbparams[tb].allocID, "/%s/%s" % ( path, dest)) \
[913dc7a]	1575	for tb in tbparams.keys()])
	1576
	1577	self.append_experiment_authorization(expid, auth_attrs)
[895a133]	1578
	1579	# Convert the software locations in the segments into the local
	1580	# copies on this host
	1581	for soft in [ s for tb in topo.values() \
	1582	for e in tb.elements \
	1583	if getattr(e, 'software', False) \
	1584	for s in e.software ]:
	1585	if softmap.has_key(soft.location):
	1586	soft.location = softmap[soft.location]
	1587
	1588
[a3ad8bd]	1589	def new_experiment(self, req, fid):
	1590	"""
	1591	The external interface to empty initial experiment creation called from
	1592	the dispatcher.
	1593
	1594	Creates a working directory, splits the incoming description using the
	1595	splitter script and parses out the avrious subsections using the
	1596	lcasses above. Once each sub-experiment is created, use pooled threads
	1597	to instantiate them and start it all up.
	1598	"""
[2bb8b35]	1599	self.log.info("New experiment call started for %s" % fid)
[7206e5a]	1600	req = req.get('NewRequestBody', None)
	1601	if not req:
	1602	raise service_error(service_error.req,
	1603	"Bad request format (no NewRequestBody)")
	1604
	1605	if self.auth.import_credentials(data_list=req.get('credential', [])):
	1606	self.auth.save()
[c573278]	1607
[8cab4c2]	1608	try:
	1609	access_ok, proof = self.auth.check_attribute(fid, 'new',
	1610	with_proof=True)
	1611	except service_error, e:
	1612	self.log.info("New experiment call for %s: access denied" % fid)
	1613	raise e
	1614
[e83f2f2]	1615
	1616	if not access_ok:
[2bb8b35]	1617	self.log.info("New experiment call for %s: Access denied" % fid)
[e83f2f2]	1618	raise service_error(service_error.access, "New access denied",
	1619	proof=[proof])
[a3ad8bd]	1620
	1621	try:
	1622	tmpdir = tempfile.mkdtemp(prefix="split-")
[d3c8759]	1623	except EnvironmentError:
[a3ad8bd]	1624	raise service_error(service_error.internal, "Cannot create tmp dir")
	1625
	1626	# Generate an ID for the experiment (slice) and a certificate that the
	1627	# allocator can use to prove they own it. We'll ship it back through
[7206e5a]	1628	# the encrypted connection. If the requester supplied one, use it.
	1629	if 'experimentAccess' in req and 'X509' in req['experimentAccess']:
	1630	expcert = req['experimentAccess']['X509']
[962ea25]	1631	expid = fedid(certstr=expcert)
[7206e5a]	1632	self.state_lock.acquire()
	1633	if expid in self.state:
	1634	self.state_lock.release()
	1635	raise service_error(service_error.req,
	1636	'fedid %s identifies an existing experiment' % expid)
	1637	self.state_lock.release()
	1638	else:
	1639	(expid, expcert) = generate_fedid("test", dir=tmpdir, log=self.log)
[a3ad8bd]	1640
	1641	#now we're done with the tmpdir, and it should be empty
	1642	if self.cleanup:
	1643	self.log.debug("[new_experiment]: removing %s" % tmpdir)
	1644	os.rmdir(tmpdir)
	1645	else:
	1646	self.log.debug("[new_experiment]: not removing %s" % tmpdir)
	1647
	1648	eid = self.create_experiment_state(fid, req, expid, expcert,
	1649	state='empty')
	1650
	1651	rv = {
	1652	'experimentID': [
	1653	{'localname' : eid }, { 'fedid': copy.copy(expid) }
	1654	],
	1655	'experimentStatus': 'empty',
[e83f2f2]	1656	'experimentAccess': { 'X509' : expcert },
	1657	'proof': proof.to_dict(),
[a3ad8bd]	1658	}
	1659
[2bb8b35]	1660	self.log.info("New experiment call succeeded for %s" % fid)
[a3ad8bd]	1661	return rv
	1662
[cf0ff4f]	1663	# create_experiment sub-functions
	1664
[5ecb9a3]	1665	@staticmethod
[cf0ff4f]	1666	def get_experiment_key(req, field='experimentID'):
[5ecb9a3]	1667	"""
	1668	Parse the experiment identifiers out of the request (the request body
	1669	tag has been removed). Specifically this pulls either the fedid or the
	1670	localname out of the experimentID field. A fedid is preferred. If
	1671	neither is present or the request does not contain the fields,
	1672	service_errors are raised.
	1673	"""
	1674	# Get the experiment access
[cf0ff4f]	1675	exp = req.get(field, None)
[5ecb9a3]	1676	if exp:
	1677	if exp.has_key('fedid'):
	1678	key = exp['fedid']
	1679	elif exp.has_key('localname'):
	1680	key = exp['localname']
	1681	else:
	1682	raise service_error(service_error.req, "Unknown lookup type")
	1683	else:
	1684	raise service_error(service_error.req, "No request?")
	1685
	1686	return key
	1687
	1688	def get_experiment_ids_and_start(self, key, tmpdir):
	1689	"""
	1690	Get the experiment name, id and access certificate from the state, and
	1691	set the experiment state to 'starting'. returns a triple (fedid,
	1692	localname, access_cert_file). The access_cert_file is a copy of the
	1693	contents of the access certificate, created in the tempdir with
	1694	restricted permissions. If things are confused, raise an exception.
	1695	"""
	1696
	1697	expid = eid = None
	1698	self.state_lock.acquire()
[29d5f7c]	1699	if key in self.state:
	1700	exp = self.state[key]
	1701	exp.status = "starting"
[6e33086]	1702	exp.updated()
[29d5f7c]	1703	expid = exp.fedid
	1704	eid = exp.localname
	1705	expcert = exp.identity
[5ecb9a3]	1706	self.state_lock.release()
	1707
	1708	# make a protected copy of the access certificate so the experiment
	1709	# controller can act as the experiment principal.
	1710	if expcert:
	1711	expcert_file = self.make_temp_certfile(expcert, tmpdir)
	1712	if not expcert_file:
	1713	raise service_error(service_error.internal,
	1714	"Cannot create temp cert file?")
	1715	else:
	1716	expcert_file = None
	1717
	1718	return (eid, expid, expcert_file)
	1719
	1720	def get_topology(self, req, tmpdir):
	1721	"""
	1722	Get the ns2 content and put it into a file for parsing. Call the local
	1723	or remote parser and return the topdl.Topology. Errors result in
	1724	exceptions. req is the request and tmpdir is a work directory.
	1725	"""
	1726
	1727	# The tcl parser needs to read a file so put the content into that file
	1728	descr=req.get('experimentdescription', None)
	1729	if descr:
[a7c0bcb]	1730	if 'ns2description' in descr:
	1731	file_content=descr['ns2description']
	1732	elif 'topdldescription' in descr:
	1733	return topdl.Topology(**descr['topdldescription'])
	1734	else:
	1735	raise service_error(service_error.req,
	1736	'Unknown experiment description type')
[5ecb9a3]	1737	else:
	1738	raise service_error(service_error.req, "No experiment description")
	1739
	1740
	1741	if self.splitter_url:
	1742	self.log.debug("Calling remote topdl translator at %s" % \
	1743	self.splitter_url)
	1744	top = self.remote_ns2topdl(self.splitter_url, file_content)
	1745	else:
	1746	tclfile = os.path.join(tmpdir, "experiment.tcl")
	1747	if file_content:
	1748	try:
	1749	f = open(tclfile, 'w')
	1750	f.write(file_content)
	1751	f.close()
	1752	except EnvironmentError:
	1753	raise service_error(service_error.internal,
	1754	"Cannot write temp experiment description")
	1755	else:
	1756	raise service_error(service_error.req,
	1757	"Only ns2descriptions supported")
	1758	pid = "dummy"
	1759	gid = "dummy"
	1760	eid = "dummy"
	1761
	1762	tclcmd = [self.tclsh, self.tcl_splitter, '-t', '-x',
	1763	str(self.muxmax), '-m', 'dummy']
	1764
	1765	tclcmd.extend([pid, gid, eid, tclfile])
	1766
	1767	self.log.debug("running local splitter %s", " ".join(tclcmd))
	1768	# This is just fantastic. As a side effect the parser copies
	1769	# tb_compat.tcl into the current directory, so that directory
	1770	# must be writable by the fedd user. Doing this in the
	1771	# temporary subdir ensures this is the case.
	1772	tclparser = Popen(tclcmd, stdout=PIPE, close_fds=True,
	1773	cwd=tmpdir)
	1774	split_data = tclparser.stdout
	1775
	1776	top = topdl.topology_from_xml(file=split_data, top="experiment")
	1777	os.remove(tclfile)
	1778
	1779	return top
	1780
[1660f7c]	1781	def get_testbed_services(self, req, testbeds):
[5ecb9a3]	1782	"""
[57facae]	1783	Parse the services section of the request into two dicts mapping
	1784	testbed to lists of federated_service objects. The first dict maps all
	1785	exporters of services to those service objects, the second maps
	1786	testbeds to service objects only for services requiring portals.
[5ecb9a3]	1787	"""
[57facae]	1788	# We construct both dicts here because deriving the second is more
	1789	# comples than it looks - both the keys and lists can differ, and it's
	1790	# much easier to generate both in one pass.
[5ecb9a3]	1791	masters = { }
[57facae]	1792	pmasters = { }
[5ecb9a3]	1793	for s in req.get('service', []):
	1794	# If this is a service request with the importall field
	1795	# set, fill it out.
	1796
	1797	if s.get('importall', False):
	1798	s['import'] = [ tb for tb in testbeds \
	1799	if tb not in s.get('export',[])]
	1800	del s['importall']
	1801
	1802	# Add the service to masters
	1803	for tb in s.get('export', []):
	1804	if s.get('name', None):
	1805
	1806	params = { }
	1807	for a in s.get('fedAttr', []):
	1808	params[a.get('attribute', '')] = a.get('value','')
	1809
	1810	fser = federated_service(name=s['name'],
	1811	exporter=tb, importers=s.get('import',[]),
	1812	params=params)
	1813	if fser.name == 'hide_hosts' \
	1814	and 'hosts' not in fser.params:
	1815	fser.params['hosts'] = \
	1816	",".join(tb_hosts.get(fser.exporter, []))
	1817	if tb in masters: masters[tb].append(fser)
	1818	else: masters[tb] = [fser]
[57facae]	1819
	1820	if fser.portal:
	1821	if tb in pmasters: pmasters[tb].append(fser)
	1822	else: pmasters[tb] = [fser]
[5ecb9a3]	1823	else:
	1824	self.log.error('Testbed service does not have name " + \
	1825	"and importers')
[57facae]	1826	return masters, pmasters
[5ecb9a3]	1827
[cf0ff4f]	1828	def generate_keys_and_hosts(self, tmpdir, expid, hosts, tbparams):
	1829	"""
[3df9b33]	1830	Create the ssh keys necessary for interconnecting the portal nodes and
[cf0ff4f]	1831	the global hosts file for letting each segment know about the IP
	1832	addresses in play. Save these into the repo. Add attributes to the
	1833	autorizer allowing access controllers to download them and return a set
[3df9b33]	1834	of attributes that inform the segments where to find this stuff. May
[cf0ff4f]	1835	raise service_errors in if there are problems.
	1836	"""
	1837	gw_pubkey_base = "fed.%s.pub" % self.ssh_type
	1838	gw_secretkey_base = "fed.%s" % self.ssh_type
[3df9b33]	1839	keydir = os.path.join(tmpdir, 'keys')
	1840	gw_pubkey = os.path.join(keydir, gw_pubkey_base)
	1841	gw_secretkey = os.path.join(keydir, gw_secretkey_base)
[cf0ff4f]	1842
	1843	try:
	1844	self.generate_ssh_keys(gw_secretkey, self.ssh_type)
	1845	except ValueError:
	1846	raise service_error(service_error.server_config,
	1847	"Bad key type (%s)" % self.ssh_type)
	1848
[3df9b33]	1849	self.generate_seer_certs(keydir)
[cf0ff4f]	1850
	1851	# Copy configuration files into the remote file store
	1852	# The config urlpath
	1853	configpath = "/%s/config" % expid
	1854	# The config file system location
	1855	configdir ="%s%s" % ( self.repodir, configpath)
	1856	try:
	1857	os.makedirs(configdir)
	1858	except EnvironmentError, e:
	1859	raise service_error(service_error.internal,
	1860	"Cannot create config directory: %s" % e)
	1861	try:
	1862	f = open("%s/hosts" % configdir, "w")
	1863	print >> f, string.join(hosts, '\n')
	1864	f.close()
	1865	except EnvironmentError, e:
	1866	raise service_error(service_error.internal,
	1867	"Cannot write hosts file: %s" % e)
	1868	try:
[3df9b33]	1869	copy_file(gw_pubkey, os.path.join(configdir, gw_pubkey_base))
	1870	copy_file(gw_secretkey, os.path.join(configdir, gw_secretkey_base))
	1871	copy_file(os.path.join(keydir, 'ca.pem'),
	1872	os.path.join(configdir, 'ca.pem'))
	1873	copy_file(os.path.join(keydir, 'node.pem'),
	1874	os.path.join(configdir, 'node.pem'))
[cf0ff4f]	1875	except EnvironmentError, e:
	1876	raise service_error(service_error.internal,
	1877	"Cannot copy keyfiles: %s" % e)
	1878
[913dc7a]	1879	# Allow the individual testbeds to access the configuration files,
	1880	# again by setting an attribute for the relevant pathnames on each
	1881	# allocation principal. Yeah, that's a long list comprehension.
	1882	self.append_experiment_authorization(expid, set([
[9294673]	1883	(tbparams[tb].allocID, "%s/%s" % (configpath, f)) \
[913dc7a]	1884	for tb in tbparams.keys() \
[3df9b33]	1885	for f in ("hosts", 'ca.pem', 'node.pem',
	1886	gw_secretkey_base, gw_pubkey_base)]))
[cf0ff4f]	1887
	1888	attrs = [
	1889	{
	1890	'attribute': 'ssh_pubkey',
	1891	'value': '%s/%s/config/%s' % \
	1892	(self.repo_url, expid, gw_pubkey_base)
	1893	},
	1894	{
	1895	'attribute': 'ssh_secretkey',
	1896	'value': '%s/%s/config/%s' % \
	1897	(self.repo_url, expid, gw_secretkey_base)
	1898	},
	1899	{
	1900	'attribute': 'hosts',
	1901	'value': '%s/%s/config/hosts' % \
	1902	(self.repo_url, expid)
	1903	},
[3df9b33]	1904	{
	1905	'attribute': 'seer_ca_pem',
	1906	'value': '%s/%s/config/%s' % \
	1907	(self.repo_url, expid, 'ca.pem')
	1908	},
	1909	{
	1910	'attribute': 'seer_node_pem',
	1911	'value': '%s/%s/config/%s' % \
	1912	(self.repo_url, expid, 'node.pem')
	1913	},
[cf0ff4f]	1914	]
	1915	return attrs
	1916
	1917
	1918	def get_vtopo(self, req, fid):
	1919	"""
	1920	Return the stored virtual topology for this experiment
	1921	"""
	1922	rv = None
	1923	state = None
[2bb8b35]	1924	self.log.info("vtopo call started for %s" % fid)
[cf0ff4f]	1925
	1926	req = req.get('VtopoRequestBody', None)
	1927	if not req:
	1928	raise service_error(service_error.req,
	1929	"Bad request format (no VtopoRequestBody)")
	1930	exp = req.get('experiment', None)
	1931	if exp:
	1932	if exp.has_key('fedid'):
	1933	key = exp['fedid']
	1934	keytype = "fedid"
	1935	elif exp.has_key('localname'):
	1936	key = exp['localname']
	1937	keytype = "localname"
	1938	else:
	1939	raise service_error(service_error.req, "Unknown lookup type")
	1940	else:
	1941	raise service_error(service_error.req, "No request?")
	1942
[8cab4c2]	1943	try:
	1944	proof = self.check_experiment_access(fid, key)
	1945	except service_error, e:
	1946	self.log.info("vtopo call failed for %s: access denied" % fid)
	1947	raise e
[cf0ff4f]	1948
	1949	self.state_lock.acquire()
[29d5f7c]	1950	# XXX: this needs to be recalculated
[80b1e82]	1951	if key in self.state:
	1952	if self.state[key].top is not None:
	1953	vtopo = topdl.topology_to_vtopo(self.state[key].top)
[e83f2f2]	1954	rv = { 'experiment' : {keytype: key },
[80b1e82]	1955	'vtopo': vtopo,
[e83f2f2]	1956	'proof': proof.to_dict(),
[cf0ff4f]	1957	}
	1958	else:
[80b1e82]	1959	state = self.state[key].status
[cf0ff4f]	1960	self.state_lock.release()
	1961
[2bb8b35]	1962	if rv:
	1963	self.log.info("vtopo call completed for %s %s " % \
	1964	(key, fid))
	1965	return rv
[cf0ff4f]	1966	else:
	1967	if state:
[2bb8b35]	1968	self.log.info("vtopo call completed for %s %s (Not ready)" % \
	1969	(key, fid))
[cf0ff4f]	1970	raise service_error(service_error.partial,
	1971	"Not ready: %s" % state)
	1972	else:
[2bb8b35]	1973	self.log.info("vtopo call completed for %s %s (No experiment)"\
	1974	% (key, fid))
[cf0ff4f]	1975	raise service_error(service_error.req, "No such experiment")
	1976
	1977	def get_vis(self, req, fid):
	1978	"""
	1979	Return the stored visualization for this experiment
	1980	"""
	1981	rv = None
	1982	state = None
	1983
[2bb8b35]	1984	self.log.info("vis call started for %s" % fid)
[cf0ff4f]	1985	req = req.get('VisRequestBody', None)
	1986	if not req:
	1987	raise service_error(service_error.req,
	1988	"Bad request format (no VisRequestBody)")
	1989	exp = req.get('experiment', None)
	1990	if exp:
	1991	if exp.has_key('fedid'):
	1992	key = exp['fedid']
	1993	keytype = "fedid"
	1994	elif exp.has_key('localname'):
	1995	key = exp['localname']
	1996	keytype = "localname"
	1997	else:
	1998	raise service_error(service_error.req, "Unknown lookup type")
	1999	else:
	2000	raise service_error(service_error.req, "No request?")
	2001
[8cab4c2]	2002	try:
	2003	proof = self.check_experiment_access(fid, key)
	2004	except service_error, e:
	2005	self.log.info("vis call failed for %s: access denied" % fid)
	2006	raise e
[cf0ff4f]	2007
	2008	self.state_lock.acquire()
[80b1e82]	2009	# Generate the visualization
	2010	if key in self.state:
	2011	if self.state[key].top is not None:
	2012	try:
	2013	vis = self.genviz(
[6a50b78]	2014	topdl.topology_to_vtopo(self.state[key].top))
[80b1e82]	2015	except service_error, e:
	2016	self.state_lock.release()
	2017	raise e
	2018	rv = { 'experiment' : {keytype: key },
	2019	'vis': vis,
[e83f2f2]	2020	'proof': proof.to_dict(),
[80b1e82]	2021	}
[cf0ff4f]	2022	else:
[80b1e82]	2023	state = self.state[key].status
[cf0ff4f]	2024	self.state_lock.release()
	2025
[2bb8b35]	2026	if rv:
	2027	self.log.info("vis call completed for %s %s " % \
	2028	(key, fid))
	2029	return rv
[cf0ff4f]	2030	else:
	2031	if state:
[2bb8b35]	2032	self.log.info("vis call completed for %s %s (not ready)" % \
	2033	(key, fid))
[cf0ff4f]	2034	raise service_error(service_error.partial,
	2035	"Not ready: %s" % state)
	2036	else:
[2bb8b35]	2037	self.log.info("vis call completed for %s %s (no experiment)" % \
	2038	(key, fid))
[cf0ff4f]	2039	raise service_error(service_error.req, "No such experiment")
	2040
	2041
[ec3aa4d]	2042	def save_federant_information(self, allocated, tbparams, eid, top):
[cf0ff4f]	2043	"""
	2044	Store the various data that have changed in the experiment state
	2045	between when it was started and the beginning of resource allocation.
	2046	This is basically the information about each local allocation. This
[e83f2f2]	2047	fills in the values of the placeholder allocation in the state. It
	2048	also collects the access proofs and returns them as dicts for a
	2049	response message.
[cf0ff4f]	2050	"""
[29d5f7c]	2051	self.state_lock.acquire()
	2052	exp = self.state[eid]
[ec3aa4d]	2053	exp.top = top.clone()
[cf0ff4f]	2054	# save federant information
	2055	for k in allocated.keys():
[9294673]	2056	exp.add_allocation(tbparams[k])
[ec3aa4d]	2057	top.elements.append(topdl.Testbed(uri=tbparams[k].uri,
[ab3d6c5]	2058	type="testbed", localname=[k],
	2059	service=[ s.to_topdl() for s in tbparams[k].services]))
[cf0ff4f]	2060
[e83f2f2]	2061	# Access proofs for the response message
	2062	proofs = [copy.deepcopy(p) for k in tbparams.keys()\
[9294673]	2063	for p in tbparams[k].proof]
[6e33086]	2064	exp.updated()
[cf0ff4f]	2065	if self.state_filename:
	2066	self.write_state()
	2067	self.state_lock.release()
[e83f2f2]	2068	return proofs
[cf0ff4f]	2069
	2070	def clear_placeholder(self, eid, expid, tmpdir):
	2071	"""
	2072	Clear the placeholder and remove any allocated temporary dir.
	2073	"""
	2074
	2075	self.state_lock.acquire()
	2076	del self.state[eid]
	2077	del self.state[expid]
	2078	if self.state_filename: self.write_state()
	2079	self.state_lock.release()
	2080	if tmpdir and self.cleanup:
	2081	self.remove_dirs(tmpdir)
	2082
	2083	# end of create_experiment sub-functions
[5ecb9a3]	2084
[e19b75c]	2085	def create_experiment(self, req, fid):
[db6b092]	2086	"""
	2087	The external interface to experiment creation called from the
	2088	dispatcher.
	2089
	2090	Creates a working directory, splits the incoming description using the
[43197eb]	2091	splitter script and parses out the various subsections using the
[1a4ee0f]	2092	classes above. Once each sub-experiment is created, use pooled threads
	2093	to instantiate them and start it all up.
[db6b092]	2094	"""
[7183b48]	2095
[2bb8b35]	2096	self.log.info("Create experiment call started for %s" % fid)
[7183b48]	2097	req = req.get('CreateRequestBody', None)
[5ecb9a3]	2098	if req:
[cf0ff4f]	2099	key = self.get_experiment_key(req)
[5ecb9a3]	2100	else:
[7183b48]	2101	raise service_error(service_error.req,
	2102	"Bad request format (no CreateRequestBody)")
	2103
[6e63513]	2104	# Import information from the requester
	2105	if self.auth.import_credentials(data_list=req.get('credential', [])):
	2106	self.auth.save()
[cde9b98]	2107	else:
	2108	self.log.debug("Failed to import delegation credentials(!)")
[6e63513]	2109
[8cab4c2]	2110	try:
	2111	# Make sure that the caller can talk to us
	2112	proof = self.check_experiment_access(fid, key)
	2113	except service_error, e:
	2114	self.log.info("Create experiment call failed for %s: access denied"\
	2115	% fid)
	2116	raise e
	2117
[db6b092]	2118
[fd07c48]	2119	# Install the testbed map entries supplied with the request into a copy
	2120	# of the testbed map.
	2121	tbmap = dict(self.tbmap)
[a11eda5]	2122	tbactive = set(self.tbactive)
[fd07c48]	2123	for m in req.get('testbedmap', []):
	2124	if 'testbed' in m and 'uri' in m:
	2125	tbmap[m['testbed']] = m['uri']
[a11eda5]	2126	if 'active' in m and m['active']: tbactive.add(m['testbed'])
[fd07c48]	2127
[5ecb9a3]	2128	# a place to work
[db6b092]	2129	try:
	2130	tmpdir = tempfile.mkdtemp(prefix="split-")
[895a133]	2131	os.mkdir(tmpdir+"/keys")
[d3c8759]	2132	except EnvironmentError:
[db6b092]	2133	raise service_error(service_error.internal, "Cannot create tmp dir")
	2134
	2135	tbparams = { }
	2136
[5ecb9a3]	2137	eid, expid, expcert_file = \
	2138	self.get_experiment_ids_and_start(key, tmpdir)
[c573278]	2139
[5ecb9a3]	2140	# This catches exceptions to clear the placeholder if necessary
[db6b092]	2141	try:
[5ecb9a3]	2142	if not (eid and expid):
	2143	raise service_error(service_error.internal,
	2144	"Cannot find local experiment info!?")
[5f6929a]	2145
[5ecb9a3]	2146	top = self.get_topology(req, tmpdir)
[2627eb3]	2147	self.confirm_software(top)
[5ecb9a3]	2148	# Assign the IPs
[69692a9]	2149	hosts, ip_allocator = self.allocate_ips_to_topo(top)
[1a4ee0f]	2150	# Find the testbeds to look up
[5334044]	2151	tb_hosts = { }
[5ecb9a3]	2152	testbeds = [ ]
	2153	for e in top.elements:
	2154	if isinstance(e, topdl.Computer):
	2155	tb = e.get_attribute('testbed') or 'default'
	2156	if tb in tb_hosts: tb_hosts[tb].append(e.name)
	2157	else:
	2158	tb_hosts[tb] = [ e.name ]
	2159	testbeds.append(tb)
	2160
[57facae]	2161	masters, pmasters = self.get_testbed_services(req, testbeds)
[895a133]	2162	allocated = { } # Testbeds we can access
	2163	topo ={ } # Sub topologies
[e02cd14]	2164	connInfo = { } # Connection information
[5334044]	2165
[2627eb3]	2166	self.split_topology(top, topo, testbeds)
	2167
[5ecb9a3]	2168	self.get_access_to_testbeds(testbeds, fid, allocated,
	2169	tbparams, masters, tbmap, expid, expcert_file)
[5f96438]	2170
[cf0ff4f]	2171	attrs = self.generate_keys_and_hosts(tmpdir, expid, hosts, tbparams)
[cc8d8e9]	2172
[fd07c48]	2173	part = experiment_partition(self.auth, self.store_url, tbmap,
[a11eda5]	2174	self.muxmax, self.direct_transit, tbactive)
[5334044]	2175	part.add_portals(top, topo, eid, pmasters, tbparams, ip_allocator,
[2761484]	2176	connInfo, expid)
[913dc7a]	2177
	2178	auth_attrs = set()
[ab847bc]	2179	# Now get access to the dynamic testbeds (those added above)
	2180	for tb in [ t for t in topo if t not in allocated]:
[cf0ff4f]	2181	self.get_access(tb, tbparams, fid, masters, tbmap,
	2182	expid, expcert_file)
[ab847bc]	2183	allocated[tb] = 1
	2184	store_keys = topo[tb].get_attribute('store_keys')
	2185	# Give the testbed access to keys it exports or imports
	2186	if store_keys:
[b16cfc0]	2187	auth_attrs.update(set([
[9294673]	2188	(tbparams[tb].allocID, sk) \
[913dc7a]	2189	for sk in store_keys.split(" ")]))
	2190
	2191	if auth_attrs:
	2192	self.append_experiment_authorization(expid, auth_attrs)
[69692a9]	2193
[cf0ff4f]	2194	# transit and disconnected testbeds may not have a connInfo entry.
	2195	# Fill in the blanks.
	2196	for t in allocated.keys():
	2197	if not connInfo.has_key(t):
	2198	connInfo[t] = { }
	2199
[895a133]	2200	self.wrangle_software(expid, top, topo, tbparams)
[cc8d8e9]	2201
[e83f2f2]	2202	proofs = self.save_federant_information(allocated, tbparams,
[ec3aa4d]	2203	eid, top)
[866c983]	2204	except service_error, e:
	2205	# If something goes wrong in the parse (usually an access error)
	2206	# clear the placeholder state. From here on out the code delays
[db6b092]	2207	# exceptions. Failing at this point returns a fault to the remote
	2208	# caller.
[2bb8b35]	2209
	2210	self.log.info("Create experiment call failed for %s %s: %s" %
	2211	(eid, fid, e))
[cf0ff4f]	2212	self.clear_placeholder(eid, expid, tmpdir)
[866c983]	2213	raise e
	2214
[db6b092]	2215	# Start the background swapper and return the starting state. From
	2216	# here on out, the state will stick around a while.
[866c983]	2217
[db6b092]	2218	# Create a logger that logs to the experiment's state object as well as
	2219	# to the main log file.
	2220	alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
[29d5f7c]	2221	alloc_collector = self.list_log(self.state[eid].log)
[f07fa49]	2222	h = logging.StreamHandler(alloc_collector)
[db6b092]	2223	# XXX: there should be a global one of these rather than repeating the
	2224	# code.
	2225	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	2226	'%d %b %y %H:%M:%S'))
	2227	alloc_log.addHandler(h)
[617592b]	2228
[db6b092]	2229	# Start a thread to do the resource allocation
[e19b75c]	2230	t = Thread(target=self.allocate_resources,
[43197eb]	2231	args=(allocated, masters, eid, expid, tbparams,
[b4b19c7]	2232	top, topo, tmpdir, alloc_log, alloc_collector, attrs,
[725c55d]	2233	connInfo, tbmap, expcert_file),
[db6b092]	2234	name=eid)
	2235	t.start()
	2236
	2237	rv = {
	2238	'experimentID': [
	2239	{'localname' : eid }, { 'fedid': copy.copy(expid) }
	2240	],
	2241	'experimentStatus': 'starting',
[e83f2f2]	2242	'proof': [ proof.to_dict() ] + proofs,
[db6b092]	2243	}
[2bb8b35]	2244	self.log.info("Create experiment call succeeded for %s %s" % \
	2245	(eid, fid))
[db6b092]	2246
	2247	return rv
[9479343]	2248
	2249	def get_experiment_fedid(self, key):
	2250	"""
[db6b092]	2251	find the fedid associated with the localname key in the state database.
[9479343]	2252	"""
	2253
[db6b092]	2254	rv = None
	2255	self.state_lock.acquire()
[29d5f7c]	2256	if key in self.state:
	2257	rv = self.state[key].fedid
[db6b092]	2258	self.state_lock.release()
	2259	return rv
[a97394b]	2260
[4064742]	2261	def check_experiment_access(self, fid, key):
[866c983]	2262	"""
	2263	Confirm that the fid has access to the experiment. Though a request
	2264	may be made in terms of a local name, the access attribute is always
	2265	the experiment's fedid.
	2266	"""
	2267	if not isinstance(key, fedid):
[db6b092]	2268	key = self.get_experiment_fedid(key)
[866c983]	2269
[e83f2f2]	2270	access_ok, proof = self.auth.check_attribute(fid, key, with_proof=True)
	2271
	2272	if access_ok:
	2273	return proof
[866c983]	2274	else:
[e83f2f2]	2275	raise service_error(service_error.access, "Access Denied",
	2276	proof)
[4064742]	2277
	2278
[db6b092]	2279	def get_handler(self, path, fid):
[cf0ff4f]	2280	"""
	2281	Perhaps surprisingly named, this function handles HTTP GET requests to
	2282	this server (SOAP requests are POSTs).
	2283	"""
[7183b48]	2284	self.log.info("Get handler %s %s" % (path, fid))
[e83f2f2]	2285	# XXX: log proofs?
[6c57fe9]	2286	if self.auth.check_attribute(fid, path):
	2287	return ("%s/%s" % (self.repodir, path), "application/binary")
	2288	else:
	2289	return (None, None)
[987aaa1]	2290
[6e33086]	2291	def update_info(self, key, force=False):
	2292	top = None
	2293	self.state_lock.acquire()
	2294	if key in self.state:
	2295	if force or self.state[key].older_than(self.info_cache_limit):
[6a50b78]	2296	top = self.state[key].top
[6e33086]	2297	if top is not None: top = top.clone()
	2298	d1, info_params, cert, d2 = \
	2299	self.get_segment_info(self.state[key], need_lock=False)
	2300	self.state_lock.release()
	2301
	2302	if top is None: return
	2303
	2304	try:
	2305	tmpdir = tempfile.mkdtemp(prefix="info-")
	2306	except EnvironmentError:
	2307	raise service_error(service_error.internal,
	2308	"Cannot create tmp dir")
	2309	cert_file = self.make_temp_certfile(cert, tmpdir)
	2310
	2311	data = []
	2312	try:
	2313	for k, (uri, aid) in info_params.items():
	2314	info=self.info_segment(log=self.log, testbed=uri,
	2315	cert_file=cert_file, cert_pwd=None,
	2316	trusted_certs=self.trusted_certs,
	2317	caller=self.call_InfoSegment)
	2318	info(uri, aid)
	2319	data.append(info)
	2320	# Clean up the tmpdir no matter what
	2321	finally:
	2322	if tmpdir: self.remove_dirs(tmpdir)
	2323
	2324	self.annotate_topology(top, data)
	2325	self.state_lock.acquire()
	2326	if key in self.state:
	2327	self.state[key].top = top
	2328	self.state[key].updated()
	2329	if self.state_filename: self.write_state()
	2330	self.state_lock.release()
	2331
[29d5f7c]	2332
[c52c48d]	2333	def get_info(self, req, fid):
[866c983]	2334	"""
	2335	Return all the stored info about this experiment
	2336	"""
	2337	rv = None
	2338
[2bb8b35]	2339	self.log.info("Info call started for %s" % fid)
[866c983]	2340	req = req.get('InfoRequestBody', None)
	2341	if not req:
	2342	raise service_error(service_error.req,
[65f3f29]	2343	"Bad request format (no InfoRequestBody)")
[866c983]	2344	exp = req.get('experiment', None)
[80b1e82]	2345	legacy = req.get('legacy', False)
[6e33086]	2346	fresh = req.get('fresh', False)
[866c983]	2347	if exp:
	2348	if exp.has_key('fedid'):
	2349	key = exp['fedid']
	2350	keytype = "fedid"
	2351	elif exp.has_key('localname'):
	2352	key = exp['localname']
	2353	keytype = "localname"
	2354	else:
	2355	raise service_error(service_error.req, "Unknown lookup type")
	2356	else:
	2357	raise service_error(service_error.req, "No request?")
	2358
[8cab4c2]	2359	try:
	2360	proof = self.check_experiment_access(fid, key)
	2361	except service_error, e:
	2362	self.log.info("Info call failed for %s: access denied" % fid)
	2363
[866c983]	2364
[6e33086]	2365	self.update_info(key, fresh)
	2366
[866c983]	2367	self.state_lock.acquire()
	2368	if self.state.has_key(key):
[29d5f7c]	2369	rv = self.state[key].get_info()
[6e33086]	2370	# Copy the topo if we need legacy annotations
[80b1e82]	2371	if legacy:
[6a50b78]	2372	top = self.state[key].top
[80b1e82]	2373	if top is not None: top = top.clone()
[866c983]	2374	self.state_lock.release()
[2bb8b35]	2375	self.log.info("Gathered Info for %s %s" % (key, fid))
[866c983]	2376
[80b1e82]	2377	# If the legacy visualization and topology representations are
[6e33086]	2378	# requested, calculate them and add them to the return.
[80b1e82]	2379	if legacy and rv is not None:
[2bb8b35]	2380	self.log.info("Generating legacy Info for %s %s" % (key, fid))
[80b1e82]	2381	if top is not None:
	2382	vtopo = topdl.topology_to_vtopo(top)
	2383	if vtopo is not None:
	2384	rv['vtopo'] = vtopo
	2385	try:
	2386	vis = self.genviz(vtopo)
	2387	except service_error, e:
	2388	self.log.debug('Problem generating visualization: %s' \
	2389	% e)
	2390	vis = None
	2391	if vis is not None:
	2392	rv['vis'] = vis
[db6b092]	2393	if rv:
[2bb8b35]	2394	self.log.info("Info succeded for %s %s" % (key, fid))
[29d5f7c]	2395	rv['proof'] = proof.to_dict()
	2396	return rv
[2bb8b35]	2397	else:
	2398	self.log.info("Info failed for %s %s: no experiment" % (key, fid))
[db6b092]	2399	raise service_error(service_error.req, "No such experiment")
[7a8d667]	2400
[22a1a77]	2401	def operate_on_segments(self, op_params, cert, op, testbeds, params,
	2402	results):
	2403	"""
	2404	Call OperateSegment on multiple testbeds and gather the results.
	2405	op_params contains the parameters needed to contact that testbed, cert
	2406	is a certificate containing the fedid to use, op is the operation,
	2407	testbeds is a dict mapping testbed name to targets in that testbed,
	2408	params are the parameters to include a,d results is a growing list of
	2409	the results of the calls.
	2410	"""
	2411	try:
	2412	tmpdir = tempfile.mkdtemp(prefix="info-")
	2413	except EnvironmentError:
	2414	raise service_error(service_error.internal,
	2415	"Cannot create tmp dir")
	2416	cert_file = self.make_temp_certfile(cert, tmpdir)
	2417
	2418	try:
	2419	for tb, targets in testbeds.items():
	2420	if tb in op_params:
	2421	uri, aid = op_params[tb]
	2422	operate=self.operation_segment(log=self.log, testbed=uri,
	2423	cert_file=cert_file, cert_pwd=None,
	2424	trusted_certs=self.trusted_certs,
	2425	caller=self.call_OperationSegment)
	2426	if operate(uri, aid, op, targets, params):
	2427	if operate.status is not None:
	2428	results.extend(operate.status)
	2429	continue
	2430	# Something went wrong in a weird way. Add statuses
	2431	# that reflect that to results
	2432	for t in targets:
	2433	results.append(operation_status(t,
	2434	operation_status.federant,
[b709861]	2435	'Unexpected error on %s' % tb))
[22a1a77]	2436	# Clean up the tmpdir no matter what
	2437	finally:
	2438	if tmpdir: self.remove_dirs(tmpdir)
	2439
	2440	def do_operation(self, req, fid):
	2441	"""
	2442	Find the testbeds holding each target and ask them to carry out the
	2443	operation. Return the statuses.
	2444	"""
	2445	# Map an element to the testbed containing it
	2446	def element_to_tb(e):
	2447	if isinstance(e, topdl.Computer): return e.get_attribute("testbed")
	2448	elif isinstance(e, topdl.Testbed): return e.name
	2449	else: return None
	2450	# If d is an operation_status object, make it a dict
	2451	def make_dict(d):
	2452	if isinstance(d, dict): return d
	2453	elif isinstance(d, operation_status): return d.to_dict()
	2454	else: return { }
	2455
[b709861]	2456	def element_name(e):
	2457	if isinstance(e, topdl.Computer): return e.name
	2458	elif isinstance(e, topdl.Testbed):
	2459	if e.localname: return e.localname[0]
	2460	else: return None
	2461	else: return None
	2462
[8cab4c2]	2463	self.log.info("Operation call started for %s" % fid)
[22a1a77]	2464	req = req.get('OperationRequestBody', None)
	2465	if not req:
	2466	raise service_error(service_error.req,
	2467	"Bad request format (no OperationRequestBody)")
	2468	exp = req.get('experiment', None)
	2469	op = req.get('operation', None)
[b709861]	2470	targets = set(req.get('target', []))
[22a1a77]	2471	params = req.get('parameter', None)
	2472
	2473	if exp:
	2474	if 'fedid' in exp:
	2475	key = exp['fedid']
	2476	keytype = "fedid"
	2477	elif 'localname' in exp:
	2478	key = exp['localname']
	2479	keytype = "localname"
	2480	else:
	2481	raise service_error(service_error.req, "Unknown lookup type")
	2482	else:
	2483	raise service_error(service_error.req, "No request?")
	2484
[b709861]	2485	if op is None or not targets:
[22a1a77]	2486	raise service_error(service_error.req, "No request?")
	2487
[8cab4c2]	2488	try:
	2489	proof = self.check_experiment_access(fid, key)
	2490	except service_error, e:
	2491	self.log.info("Operation call failed for %s: access denied" % fid)
	2492	raise e
	2493
[22a1a77]	2494	self.state_lock.acquire()
	2495	if key in self.state:
	2496	d1, op_params, cert, d2 = \
[b709861]	2497	self.get_segment_info(self.state[key], need_lock=False,
	2498	key='tb')
[22a1a77]	2499	top = self.state[key].top
	2500	if top is not None:
	2501	top = top.clone()
	2502	self.state_lock.release()
	2503
	2504	if top is None:
[8cab4c2]	2505	self.log.info("Operation call failed for %s: not active" % fid)
[22a1a77]	2506	raise service_error(service_error.partial, "No topology yet",
	2507	proof=proof)
	2508
	2509	testbeds = { }
	2510	results = []
	2511	for e in top.elements:
[b709861]	2512	ename = element_name(e)
	2513	if ename in targets:
[22a1a77]	2514	tb = element_to_tb(e)
[b709861]	2515	targets.remove(ename)
[22a1a77]	2516	if tb is not None:
[b709861]	2517	if tb in testbeds: testbeds[tb].append(ename)
	2518	else: testbeds[tb] = [ ename ]
[22a1a77]	2519	else:
	2520	results.append(operation_status(e.name,
	2521	code=operation_status.no_target,
	2522	description='Cannot map target to testbed'))
	2523
[b709861]	2524	for t in targets:
	2525	results.append(operation_status(t, operation_status.no_target))
	2526
[22a1a77]	2527	self.operate_on_segments(op_params, cert, op, testbeds, params,
	2528	results)
	2529
[8cab4c2]	2530	self.log.info("Operation call succeeded for %s" % fid)
[22a1a77]	2531	return {
	2532	'experiment': exp,
[b709861]	2533	'status': [make_dict(r) for r in results],
[22a1a77]	2534	'proof': proof.to_dict()
	2535	}
	2536
	2537
[65f3f29]	2538	def get_multi_info(self, req, fid):
	2539	"""
	2540	Return all the stored info that this fedid can access
	2541	"""
[e83f2f2]	2542	rv = { 'info': [ ], 'proof': [ ] }
[65f3f29]	2543
[2bb8b35]	2544	self.log.info("Multi Info call started for %s" % fid)
[db6b092]	2545	self.state_lock.acquire()
	2546	for key in [ k for k in self.state.keys() if isinstance(k, fedid)]:
[829246e]	2547	try:
[e83f2f2]	2548	proof = self.check_experiment_access(fid, key)
[829246e]	2549	except service_error, e:
	2550	if e.code == service_error.access:
	2551	continue
	2552	else:
[2bb8b35]	2553	self.log.info("Multi Info call failed for %s: %s" % \
	2554	(e,fid))
[829246e]	2555	self.state_lock.release()
	2556	raise e
[65f3f29]	2557
[db6b092]	2558	if self.state.has_key(key):
[29d5f7c]	2559	e = self.state[key].get_info()
	2560	e['proof'] = proof.to_dict()
[db6b092]	2561	rv['info'].append(e)
[e83f2f2]	2562	rv['proof'].append(proof.to_dict())
[65f3f29]	2563	self.state_lock.release()
[2bb8b35]	2564	self.log.info("Multi Info call succeeded for %s" % fid)
[db6b092]	2565	return rv
[65f3f29]	2566
[cf0ff4f]	2567	def check_termination_status(self, fed_exp, force):
[e07c8f3]	2568	"""
[cf0ff4f]	2569	Confirm that the experiment is sin a valid state to stop (or force it)
	2570	return the state - invalid states for deletion and force settings cause
	2571	exceptions.
[e07c8f3]	2572	"""
[cf0ff4f]	2573	self.state_lock.acquire()
[29d5f7c]	2574	status = fed_exp.status
[e07c8f3]	2575
[cf0ff4f]	2576	if status:
	2577	if status in ('starting', 'terminating'):
	2578	if not force:
	2579	self.state_lock.release()
	2580	raise service_error(service_error.partial,
	2581	'Experiment still being created or destroyed')
	2582	else:
	2583	self.log.warning('Experiment in %s state ' % status + \
	2584	'being terminated by force.')
	2585	self.state_lock.release()
	2586	return status
[725c55d]	2587	else:
[cf0ff4f]	2588	# No status??? trouble
	2589	self.state_lock.release()
	2590	raise service_error(service_error.internal,
	2591	"Experiment has no status!?")
	2592
[b709861]	2593	def get_segment_info(self, fed_exp, need_lock=True, key='aid'):
[cf0ff4f]	2594	ids = []
	2595	term_params = { }
[6e33086]	2596	if need_lock: self.state_lock.acquire()
[29d5f7c]	2597	ids = [ x for x in (fed_exp.localname, fed_exp.fedid) if x is not None ]
	2598	expcert = fed_exp.identity
	2599	repo = "%s" % fed_exp.fedid
[cf0ff4f]	2600
	2601	# Collect the allocation/segment ids into a dict keyed by the fedid
[29d5f7c]	2602	# of the allocation that contains a tuple of uri, aid
	2603	for i, fed in enumerate(fed_exp.get_all_allocations()):
	2604	uri = fed.uri
	2605	aid = fed.allocID
[b709861]	2606	if key == 'aid': term_params[aid] = (uri, aid)
	2607	elif key == 'tb': term_params[fed.tb] = (uri, aid)
	2608
[6e33086]	2609	if need_lock: self.state_lock.release()
	2610	return ids, term_params, expcert, repo
	2611
	2612
	2613	def get_termination_info(self, fed_exp):
	2614	self.state_lock.acquire()
	2615	ids, term_params, expcert, repo = self.get_segment_info(fed_exp, False)
[cf0ff4f]	2616	# Change the experiment state
[29d5f7c]	2617	fed_exp.status = 'terminating'
[6e33086]	2618	fed_exp.updated()
[cf0ff4f]	2619	if self.state_filename: self.write_state()
	2620	self.state_lock.release()
	2621
	2622	return ids, term_params, expcert, repo
	2623
	2624
	2625	def deallocate_resources(self, term_params, expcert, status, force,
	2626	dealloc_log):
	2627	tmpdir = None
	2628	# This try block makes sure the tempdir is cleared
	2629	try:
	2630	# If no expcert, try the deallocation as the experiment
	2631	# controller instance.
	2632	if expcert and self.auth_type != 'legacy':
	2633	try:
	2634	tmpdir = tempfile.mkdtemp(prefix="term-")
	2635	except EnvironmentError:
	2636	raise service_error(service_error.internal,
	2637	"Cannot create tmp dir")
	2638	cert_file = self.make_temp_certfile(expcert, tmpdir)
	2639	pw = None
	2640	else:
	2641	cert_file = self.cert_file
	2642	pw = self.cert_pwd
	2643
	2644	# Stop everyone. NB, wait_for_all waits until a thread starts
	2645	# and then completes, so we can't wait if nothing starts. So,
	2646	# no tbparams, no start.
	2647	if len(term_params) > 0:
	2648	tp = thread_pool(self.nthreads)
	2649	for k, (uri, aid) in term_params.items():
	2650	# Create and start a thread to stop the segment
	2651	tp.wait_for_slot()
	2652	t = pooled_thread(\
	2653	target=self.terminate_segment(log=dealloc_log,
	2654	testbed=uri,
	2655	cert_file=cert_file,
	2656	cert_pwd=pw,
	2657	trusted_certs=self.trusted_certs,
	2658	caller=self.call_TerminateSegment),
	2659	args=(uri, aid), name=k,
	2660	pdata=tp, trace_file=self.trace_file)
	2661	t.start()
	2662	# Wait for completions
	2663	tp.wait_for_all_done()
	2664
	2665	# release the allocations (failed experiments have done this
	2666	# already, and starting experiments may be in odd states, so we
	2667	# ignore errors releasing those allocations
	2668	try:
	2669	for k, (uri, aid) in term_params.items():
	2670	self.release_access(None, aid, uri=uri,
	2671	cert_file=cert_file, cert_pwd=pw)
	2672	except service_error, e:
	2673	if status != 'failed' and not force:
	2674	raise e
	2675
	2676	# Clean up the tmpdir no matter what
	2677	finally:
	2678	if tmpdir: self.remove_dirs(tmpdir)
	2679
[7a8d667]	2680	def terminate_experiment(self, req, fid):
[866c983]	2681	"""
	2682	Swap this experiment out on the federants and delete the shared
	2683	information
	2684	"""
[2bb8b35]	2685	self.log.info("Terminate experiment call started for %s" % fid)
[866c983]	2686	tbparams = { }
	2687	req = req.get('TerminateRequestBody', None)
	2688	if not req:
	2689	raise service_error(service_error.req,
	2690	"Bad request format (no TerminateRequestBody)")
	2691
[cf0ff4f]	2692	key = self.get_experiment_key(req, 'experiment')
[8cab4c2]	2693	try:
	2694	proof = self.check_experiment_access(fid, key)
	2695	except service_error, e:
	2696	self.log.info(
	2697	"Terminate experiment call failed for %s: access denied" \
	2698	% fid)
	2699	raise e
[cf0ff4f]	2700	exp = req.get('experiment', False)
	2701	force = req.get('force', False)
[866c983]	2702
[db6b092]	2703	dealloc_list = [ ]
[46e4682]	2704
	2705
[5ae3857]	2706	# Create a logger that logs to the dealloc_list as well as to the main
	2707	# log file.
	2708	dealloc_log = logging.getLogger('fedd.experiment_control.%s' % key)
[a69de97]	2709	dealloc_log.info("Terminating %s " %key)
[5ae3857]	2710	h = logging.StreamHandler(self.list_log(dealloc_list))
	2711	# XXX: there should be a global one of these rather than repeating the
	2712	# code.
	2713	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	2714	'%d %b %y %H:%M:%S'))
	2715	dealloc_log.addHandler(h)
	2716
	2717	self.state_lock.acquire()
	2718	fed_exp = self.state.get(key, None)
[cf0ff4f]	2719	self.state_lock.release()
[e07c8f3]	2720	repo = None
[5ae3857]	2721
	2722	if fed_exp:
[cf0ff4f]	2723	status = self.check_termination_status(fed_exp, force)
[6e33086]	2724	# get_termination_info updates the experiment state
[cf0ff4f]	2725	ids, term_params, expcert, repo = self.get_termination_info(fed_exp)
	2726	self.deallocate_resources(term_params, expcert, status, force,
	2727	dealloc_log)
[5ae3857]	2728
	2729	# Remove the terminated experiment
	2730	self.state_lock.acquire()
	2731	for id in ids:
[a96d946]	2732	self.clear_experiment_authorization(id, need_state_lock=False)
[cf0ff4f]	2733	if id in self.state: del self.state[id]
[5ae3857]	2734
	2735	if self.state_filename: self.write_state()
	2736	self.state_lock.release()
	2737
[2761484]	2738	# Delete any synch points associated with this experiment. All
	2739	# synch points begin with the fedid of the experiment.
	2740	fedid_keys = set(["fedid:%s" % f for f in ids \
	2741	if isinstance(f, fedid)])
	2742	for k in self.synch_store.all_keys():
	2743	try:
	2744	if len(k) > 45 and k[0:46] in fedid_keys:
	2745	self.synch_store.del_value(k)
[dadc4da]	2746	except synch_store.BadDeletionError:
[2761484]	2747	pass
	2748	self.write_store()
[e07c8f3]	2749
	2750	# Remove software and other cached stuff from the filesystem.
	2751	if repo:
	2752	self.remove_dirs("%s/%s" % (self.repodir, repo))
[e83f2f2]	2753
[2bb8b35]	2754	self.log.info("Terminate experiment succeeded for %s %s" % \
	2755	(key, fid))
[5ae3857]	2756	return {
	2757	'experiment': exp ,
[cf0ff4f]	2758	'deallocationLog': string.join(dealloc_list, ''),
[e83f2f2]	2759	'proof': [proof.to_dict()],
[5ae3857]	2760	}
	2761	else:
[2bb8b35]	2762	self.log.info("Terminate experiment failed for %s %s: no state" % \
	2763	(key, fid))
[5ae3857]	2764	raise service_error(service_error.req, "No saved state")
[2761484]	2765
	2766
	2767	def GetValue(self, req, fid):
	2768	"""
	2769	Get a value from the synchronized store
	2770	"""
	2771	req = req.get('GetValueRequestBody', None)
	2772	if not req:
	2773	raise service_error(service_error.req,
	2774	"Bad request format (no GetValueRequestBody)")
	2775
[cf0ff4f]	2776	name = req.get('name', None)
	2777	wait = req.get('wait', False)
[2761484]	2778	rv = { 'name': name }
	2779
[e83f2f2]	2780	if not name:
	2781	raise service_error(service_error.req, "No name?")
	2782
	2783	access_ok, proof = self.auth.check_attribute(fid, name, with_proof=True)
	2784
	2785	if access_ok:
[d8442da]	2786	self.log.debug("[GetValue] asking for %s " % name)
[dadc4da]	2787	try:
	2788	v = self.synch_store.get_value(name, wait)
	2789	except synch_store.RevokedKeyError:
	2790	# No more synch on this key
	2791	raise service_error(service_error.federant,
	2792	"Synch key %s revoked" % name)
[2761484]	2793	if v is not None:
	2794	rv['value'] = v
[e83f2f2]	2795	rv['proof'] = proof.to_dict()
[109a32a]	2796	self.log.debug("[GetValue] got %s from %s" % (v, name))
[2761484]	2797	return rv
	2798	else:
[e83f2f2]	2799	raise service_error(service_error.access, "Access Denied",
	2800	proof=proof)
[2761484]	2801
	2802
	2803	def SetValue(self, req, fid):
	2804	"""
	2805	Set a value in the synchronized store
	2806	"""
	2807	req = req.get('SetValueRequestBody', None)
	2808	if not req:
	2809	raise service_error(service_error.req,
	2810	"Bad request format (no SetValueRequestBody)")
	2811
[cf0ff4f]	2812	name = req.get('name', None)
	2813	v = req.get('value', '')
[2761484]	2814
[e83f2f2]	2815	if not name:
	2816	raise service_error(service_error.req, "No name?")
	2817
	2818	access_ok, proof = self.auth.check_attribute(fid, name, with_proof=True)
	2819
	2820	if access_ok:
[2761484]	2821	try:
	2822	self.synch_store.set_value(name, v)
	2823	self.write_store()
[109a32a]	2824	self.log.debug("[SetValue] set %s to %s" % (name, v))
[2761484]	2825	except synch_store.CollisionError:
	2826	# Translate into a service_error
	2827	raise service_error(service_error.req,
	2828	"Value already set: %s" %name)
[dadc4da]	2829	except synch_store.RevokedKeyError:
	2830	# No more synch on this key
	2831	raise service_error(service_error.federant,
	2832	"Synch key %s revoked" % name)
[e83f2f2]	2833	return { 'name': name, 'value': v, 'proof': proof.to_dict() }
[2761484]	2834	else:
[e83f2f2]	2835	raise service_error(service_error.access, "Access Denied",
	2836	proof=proof)

Note: See TracBrowser for help on using the repository browser.

Download in other formats: