Context Navigation

source: fedd/federation/desktop_access.py @ 5dbcc93

Last change on this file since 5dbcc93 was 5dbcc93, checked in by Ted Faber <faber@…>, 11 years ago
Last few tricks. Manage hosts, make whole script sudo
Property mode set to `100644`
File size: 20.2 KB

Line
1	#!/usr/local/bin/python
2
3	import os,sys
4	import re
5	import string
6	import copy
7	import pickle
8	import logging
9	import random
10	import subprocess
11
12	from util import *
13	from deter import fedid, generate_fedid
14	from authorizer import authorizer, abac_authorizer
15	from service_error import service_error
16	from remote_service import xmlrpc_handler, soap_handler, service_caller
17
18	from deter import topdl
19
20	from access import access_base
21
22	# Make log messages disappear if noone configures a fedd logger. This is
23	# something of an incantation, but basically it creates a logger object
24	# registered to fedd.access if no other module above us has. It's an extra
25	# belt for the suspenders.
26	class nullHandler(logging.Handler):
27	def emit(self, record): pass
28
29	fl = logging.getLogger("fedd.access")
30	fl.addHandler(nullHandler())
31
32
33	# The plug-in itself.
34	class access(access_base):
35	"""
36	This is a demonstration plug-in for fedd. It responds to all the
37	experiment_control requests and keeps internal state. The allocations it
38	makes are simple integers associated with each valid request. It makes use
39	of the general routines in access.access_base.
40
41	Detailed comments in the code and info at
42	"""
43	def __init__(self, config=None, auth=None):
44	"""
45	Initializer. Pulls parameters out of the ConfigParser's access
46	section, and initializes simple internal state. This version reads a
47	maximum integer to assign from the configuration file, while most other
48	configuration entries are read by the base class.
49
50	An access database in the cannonical format is also read as well as a
51	state database that is a hash of internal state. Routines to
52	manipulate these are in the base class, but specializations appear
53	here.
54
55	The access database maps users to a simple string.
56	"""
57
58	# Calling the base initializer, which reads canonical configuration
59	# information and initializes canonical members.
60	access_base.__init__(self, config, auth)
61	# Reading the maximum integer parameter from the configuration file
62
63	self.src_addr = config.get('access', 'interface_address')
64	self.router = config.get('access', 'gateway')
65	self.hostname = config.get('access', 'hostname')
66	# Storage for ephemeral ssh keys and host files
67	self.localdir = config.get('access', 'localdir')
68	self.ssh_identity = None
69
70	# hostname is the name of the ssh endpoint for the other side. That
71	# side needs it to set up routing tables. If hostname is not
72	# available, but an IP address is, use that.
73	if self.hostname is None:
74	if self.src_addr is None:
75	raise service_error(service_error.server_config,
76	'Hostname or interface_address must be set in config')
77	self.hostname = self.src_addr
78
79	self.ssh_port = config.get('access', 'ssh_port', '22')
80
81	# authorization information
82	self.auth_type = config.get('access', 'auth_type') \
83	or 'abac'
84	self.auth_dir = config.get('access', 'auth_dir')
85	accessdb = config.get("access", "accessdb")
86	# initialize the authorization system. We make a call to
87	# read the access database that maps from authorization information
88	# into local information. The local information is parsed by the
89	# translator above.
90	if self.auth_type == 'abac':
91	# Load the current authorization state
92	self.auth = abac_authorizer(load=self.auth_dir)
93	self.access = [ ]
94	if accessdb:
95	try:
96	self.read_access(accessdb)
97	except EnvironmentError, e:
98	self.log.error("Cannot read %s: %s" % \
99	(config.get("access", "accessdb"), e))
100	raise e
101	else:
102	raise service_error(service_error.internal,
103	"Unknown auth_type: %s" % self.auth_type)
104
105	# The superclass has read the state, but if this is the first run ever,
106	# we must initialise the running flag. This plugin only supports one
107	# connection, so StartSegment will fail when self.state['running'] is
108	# true.
109	self.state_lock.acquire()
110	if 'running' not in self.state:
111	self.state['running'] = False
112	self.state_lock.release()
113
114	# These dictionaries register the plug-in's local routines for handline
115	# these four messages with the server code above. There's a version
116	# for SOAP and XMLRPC, depending on which interfaces the plugin
117	# supports. There's rarely a technical reason not to support one or
118	# the other - the plugin code almost never deals with the transport -
119	# but if a plug-in writer wanted to disable XMLRPC, they could leave
120	# the self.xmlrpc_services dictionary empty.
121	self.soap_services = {\
122	'RequestAccess': soap_handler("RequestAccess", self.RequestAccess),
123	'ReleaseAccess': soap_handler("ReleaseAccess", self.ReleaseAccess),
124	'StartSegment': soap_handler("StartSegment", self.StartSegment),
125	'TerminateSegment': soap_handler("TerminateSegment",
126	self.TerminateSegment),
127	}
128	self.xmlrpc_services = {\
129	'RequestAccess': xmlrpc_handler('RequestAccess',
130	self.RequestAccess),
131	'ReleaseAccess': xmlrpc_handler('ReleaseAccess',
132	self.ReleaseAccess),
133	'StartSegment': xmlrpc_handler("StartSegment", self.StartSegment),
134	'TerminateSegment': xmlrpc_handler('TerminateSegment',
135	self.TerminateSegment),
136	}
137	self.call_SetValue = service_caller('SetValue', log=self.log)
138	self.call_GetValue = service_caller('GetValue', log=self.log)
139
140	# ReleaseAccess come from the base class, this is a slightly modified
141	# RequestAccess from the base that includes a fedAttr to force this side to
142	# be active.
143	def RequestAccess(self, req, fid):
144	"""
145	Handle an access request. Success here maps the requester into the
146	local access control space and establishes state about that user keyed
147	to a fedid. We also save a copy of the certificate underlying that
148	fedid so this allocation can access configuration information and
149	shared parameters on the experiment controller.
150	"""
151
152	self.log.info("RequestAccess called by %s" % fid)
153	# The dance to get into the request body
154	if req.has_key('RequestAccessRequestBody'):
155	req = req['RequestAccessRequestBody']
156	else:
157	raise service_error(service_error.req, "No request!?")
158
159	# Base class lookup routine. If this fails, it throws a service
160	# exception denying access that triggers a fault response back to the
161	# caller.
162	found, owners, proof = self.lookup_access(req, fid)
163	self.log.info(
164	"[RequestAccess] Access granted local creds %s" % found)
165	# Make a fedid for this allocation
166	allocID, alloc_cert = generate_fedid(subj="alloc", log=self.log)
167	aid = unicode(allocID)
168
169	# Store the data about this allocation:
170	self.state_lock.acquire()
171	self.state[aid] = { }
172	self.state[aid]['user'] = found
173	self.state[aid]['owners'] = owners
174	self.state[aid]['auth'] = set()
175	# Authorize the creating fedid and the principal representing the
176	# allocation to manipulate it.
177	self.append_allocation_authorization(aid,
178	((fid, allocID), (allocID, allocID)))
179	self.write_state()
180	self.state_lock.release()
181
182	# Create a directory to stash the certificate in, ans stash it.
183	try:
184	f = open("%s/%s.pem" % (self.certdir, aid), "w")
185	print >>f, alloc_cert
186	f.close()
187	except EnvironmentError, e:
188	raise service_error(service_error.internal,
189	"Can't open %s/%s : %s" % (self.certdir, aid, e))
190	self.log.debug('[RequestAccess] Returning allocation ID: %s' % allocID)
191	msg = {
192	'allocID': { 'fedid': allocID },
193	'fedAttr': [{ 'attribute': 'nat_portals', 'value': 'True' }],
194	'proof': proof.to_dict()
195	}
196	return msg
197
198	def validate_topology(self, top):
199	'''
200	Validate the topology. Desktops can only be single connections.
201	Though the topology will include a portal and a node, the access
202	controller will implement both on one node.
203
204	As more capabilities are added to the contoller the constraints here
205	will relax.
206	'''
207
208	comps = []
209	for e in top.elements:
210	if isinstance(e, topdl.Computer): comps.append(e)
211	if len(comps) > 2:
212	raise service_error(service_error.req,
213	"Desktop only supports 1-node subexperiments")
214
215	portals = 0
216	for c in comps:
217	if c.get_attribute('portal') is not None:
218	portals += 1
219	continue
220	if len(c.interface) > 1:
221	raise service_error(service_error.req,
222	"Desktop Node has more than one interface")
223	i = c.interface[0]
224	if len(i.subs) > 1:
225	raise service_error(service_error.req,
226	"Desktop Node has more than one substate on interface")
227	sub = i.subs[0]
228	for i in sub.interfaces:
229	if i.element not in comps:
230	raise service_error(service_error.req,
231	"Desktop Node connected to non-portal")
232
233	if portals > 1:
234	raise service_error(service_error.req,
235	"Desktop segment has more than one portal")
236	return True
237
238	def validate_connInfo(self, connInfo):
239	if len(connInfo) != 1:
240	raise service_error(service_error.req,
241	"Desktop segment requests multiple connections")
242	if connInfo[0]['type'] != 'ssh':
243	raise service_error(service_error.req,
244	"Desktop segment requires ssh connecton")
245	return True
246
247	def export_store_info(self, certfile, connInfo):
248	'''
249	Tell the other portal node where to reach this desktop. The other side
250	uses this information to set up routing, though the ssh_port is unused
251	as the Desktop always initiates ssh connections.
252	'''
253	values = { 'peer': self.hostname, 'ssh_port': self.ssh_port }
254	for c in connInfo:
255	for p in c.get('parameter', []):
256	if p.get('type','') == 'input': continue
257	pname = p.get('name', '')
258	key = p.get('key', '')
259	surl = p.get('store', '')
260	if pname not in values:
261	self.log('Unknown export parameter: %s' % pname)
262	continue
263	val = values[pname]
264	req = { 'name': key, 'value': val }
265	self.log.debug('Setting %s (%s) to %s on %s' % \
266	(pname, key, val, surl))
267	self.call_SetValue(surl, req, certfile)
268
269	def set_route(self, dest, script, gw=None, src=None):
270	if sys.platform.startswith('freebsd'):
271	if src is not None and gw is not None:
272	raise service_error(service_error.internal,
273	'FreeBSD will not route based on src address')
274	elif src is not None:
275	raise service_error(service_error.internal,
276	'FreeBSD will not route based on src address')
277	elif gw is not None:
278	print >>script, 'route add %s %s' % (dest, gw)
279	elif sys.platform.startswith('linux'):
280	if src is not None and gw is not None:
281	print >>script, 'ip route add %s via %s src %s' % \
282	(dest, gw, src)
283	elif src is not None:
284	print >>script, 'ip route add %s src %s' % \
285	(dest, src)
286	elif gw is not None:
287	print >>script, 'ip route add %s via %s' % (dest, gw)
288	else:
289	raise service_error(service_error.internal,
290	'Unknown platform %s' % sys.platform)
291
292	def unset_route(self, dest, script):
293	rv = 0
294	if sys.platform.startswith('freebsd'):
295	print >>script, 'route delete %s' % dest
296	elif sys.platform.startswith('linux'):
297	print >>script, 'ip route delete %s' % dest
298
299	def find_a_peer(self, addr):
300	'''
301	Find another node in the experiment that's on our subnet. This is a
302	hack to handle the problem that we really cannot require the desktop to
303	dynamically route. Will be improved by distributing static routes.
304	'''
305
306	peer = None
307	hosts = os.path.join(self.localdir, 'hosts')
308	p = addr.rfind('.')
309	if p == -1:
310	raise service_error(service_error.req, 'bad address in topology')
311	prefix = addr[0:p]
312	addr_re = re.compile('(%s.\\d+)' % prefix)
313	try:
314	f = open(hosts, 'r')
315	for line in f:
316	m = addr_re.search(line)
317	if m is not None and m.group(1) != addr:
318	peer = m.group(1)
319	break
320	else:
321	raise service_error(service_error.req,
322	'No other nodes in this subnet??')
323	except EnvironmentError, e:
324	raise service_error(service_error.internal,
325	'Cannot open %s: %s' % (e.filename, e.strerror))
326	return peer
327
328
329
330
331	def configure_desktop(self, top, connInfo):
332	'''
333	Build the connection. Establish routing to the peer if using a
334	separate interface, wait until the other end confirms setup, establish
335	the ssh layer-two tunnel (tap), assign the in-experiment IP address to
336	the tunnel and establish routing to the experiment through the tap.
337	'''
338
339
340	# get the peer and ssh port from the portal and our IP from the other
341	peer = None
342	port = None
343	my_addr = None
344	for e in top.elements:
345	if not isinstance(e, topdl.Computer): continue
346	if e.get_attribute('portal') is None:
347	# there should be one interface with one IPv4 address
348	if len(e.interface) <1 :
349	raise service_error(service_error.internal,
350	'No interface on experiment node!?!?')
351	my_addr = e.interface[0].get_attribute('ip4_address')
352	else:
353	for ci in connInfo:
354	if ci.get('portal', '') != e.name: continue
355	peer = ci.get('peer')
356	port = '22'
357	for a in ci.get('fedAttr', []):
358	if a['attribute'] == 'ssh_port': port = a['value']
359
360	# XXX scan hosts for IP addresses and compose better routing entry
361
362	if not all([peer, port, my_addr]):
363	raise service_error(service_error.req,
364	'Cannot find all config parameters %s %s %s' % (peer, port, my_addr))
365
366	exp_peer = self.find_a_peer(my_addr)
367
368	cscript = os.path.join(self.localdir, 'connect')
369	dscript = os.path.join(self.localdir, 'disconnect')
370	local_hosts = os.path.join(self.localdir, 'hosts')
371	try:
372	f = open(cscript, 'w')
373	print >>f, '#!/bin/sh'
374	# This picks the outgoing interface to the experiment using the
375	# routing system.
376	self.set_route(peer, f, self.router, self.src_addr)
377	# Wait until the other end reports that it is configured py placing
378	# a file this end can access into its local file system. Try once
379	# a minute.
380	print >>f,'while ! /usr/bin/scp -o "StrictHostKeyChecking no" -i %s %s:/usr/local/federation/etc/prep_done /dev/null; do' % (self.ssh_identity, peer)
381	print >>f, 'sleep 60; done'
382	print >>f, ('ssh -w 0:0 -p %s -o "Tunnel ethernet" ' + \
383	'-o "StrictHostKeyChecking no" -i %s %s perl -I/usr/local/federation/lib /usr/local/federation/bin/setup_bridge.pl --tapno=0 --addr=%s &') % \
384	(port, self.ssh_identity, peer, my_addr)
385	# This should give the tap a a chance to come up
386	print >>f,'sleep 10'
387	# Add experiment nodes to hosts
388	print >>f, 'cp /etc/hosts /etc/hosts.DETER.fedd.hold'
389	print >>f, 'echo "#--- BEGIN FEDD ADDITIONS ---" >> /etc/hosts'
390	print >>f, 'cat %s >> /etc/hosts' % local_hosts
391	print >>f, 'echo "#--- END FEDD ADDITIONS ---" >> /etc/hosts'
392	# Assign tap address and route experiment connections through it.
393	print >>f, 'ifconfig tap0 %s netmask 255.255.255.0 up' % \
394	my_addr
395	self.set_route('10.0.0.0/8', f, exp_peer)
396	f.close()
397	os.chmod(cscript, 0755)
398	f = open(dscript, 'w')
399	print >>f, '#!/bin/sh'
400	print >>f, 'ifconfig tap0 destroy'
401	self.unset_route(peer, f)
402	self.unset_route('10.0.0.0/8', f)
403	print >>f, 'mv /etc/hosts.DETER.fedd.hold /etc/hosts'
404	f.close()
405	os.chmod(dscript, 0755)
406	except EnvironmentError, e:
407	raise service_error(service_error.internal,
408	'Cannot create connect %s: %s' % (e.filename, e.strerror))
409	script_log = open('/tmp/connect.log', 'w')
410	subprocess.Popen(['sudo', '/bin/sh', cscript], stdout=script_log, stderr=script_log)
411	return True
412
413	def StartSegment(self, req, fid):
414	"""
415	Start a segment. In this simple skeleton, this means to parse the
416	request and assign an unassigned integer to it. We store the integer
417	in the persistent state.
418	"""
419	try:
420	req = req['StartSegmentRequestBody']
421	# Get the request topology. If not present, a KeyError is thrown.
422	topref = req['segmentdescription']['topdldescription']
423	# The fedid of the allocation we're attaching resources to
424	auth_attr = req['allocID']['fedid']
425	except KeyError:
426	raise service_error(service_error.req, "Badly formed request")
427
428	# String version of the allocation ID for keying
429	aid = "%s" % auth_attr
430	# Authorization check
431	access_ok, proof = self.auth.check_attribute(fid, auth_attr,
432	with_proof=True)
433	if not access_ok:
434	raise service_error(service_error.access, "Access denied",
435	proof=proof)
436	else:
437	# See if this is a replay of an earlier succeeded StartSegment -
438	# sometimes SSL kills 'em. If so, replay the response rather than
439	# redoing the allocation.
440	self.state_lock.acquire()
441	# Test and set :-)
442	running = self.state['running']
443	self.state['running'] = True
444	retval = self.state[aid].get('started', None)
445	self.state_lock.release()
446	if retval:
447	self.log.warning(
448	"[StartSegment] Duplicate StartSegment for %s: " \
449	% aid + \
450	"replaying response")
451	return retval
452	if running:
453	self.log.debug('[StartSegment] already running')
454	raise service_error(service_error.federant,
455	'Desktop is already in an experiment')
456
457	certfile = "%s/%s.pem" % (self.certdir, aid)
458
459	# Convert the topology into topdl data structures. Again, the
460	# skeletion doesn't do anything with it, but this is how one parses a
461	# topology request.
462	if topref: topo = topdl.Topology(**topref)
463	else:
464	raise service_error(service_error.req,
465	"Request missing segmentdescription'")
466
467	err = None
468	try:
469	self.validate_topology(topo)
470
471	# The attributes of the request. The ones we care about are the ssh
472	# keys to operate the tunnel.
473	attrs = req.get('fedAttr', [])
474	for a in attrs:
475	# Save the hosts and ssh_privkeys to our local dir
476	if a['attribute'] in ('hosts', 'ssh_secretkey'):
477	self.log.debug('Getting %s from %s' % \
478	(a['attribute'], a['value']))
479	get_url(a['value'], certfile, self.localdir, log=self.log)
480	base = os.path.basename(a['value'])
481	if a['attribute'] == 'ssh_secretkey':
482	self.ssh_identity = os.path.join(self.localdir, base)
483	os.chmod(os.path.join(self.localdir, base), 0600)
484	else:
485	self.log.debug('Ignoring attribute %s' % a['attribute'])
486
487	# Gather connection information and exchange parameters.
488	connInfo = req.get('connection', [])
489	self.validate_connInfo(connInfo)
490	self.export_store_info(certfile, connInfo)
491	self.import_store_info(certfile, connInfo)
492
493	#build it
494	self.configure_desktop(topo, connInfo)
495	except service_error, e:
496	err = e
497
498	# Save the information
499	if err is None:
500	# It's possible that the StartSegment call gets retried (!). if
501	# the 'started' key is in the allocation, we'll return it rather
502	# than redo the setup. The integer allocation was saved when we
503	# made it.
504	self.state_lock.acquire()
505	self.state[aid]['started'] = {
506	'allocID': req['allocID'],
507	'allocationLog': "Allocatation complete",
508	'segmentdescription': { 'topdldescription': topo.to_dict() },
509	'proof': proof.to_dict(),
510	}
511	retval = copy.deepcopy(self.state[aid]['started'])
512	self.write_state()
513	self.state_lock.release()
514	else:
515	# Something bad happened - clear the "running" flag so we can try
516	# again
517	self.state_lock.acquire()
518	self.state['running'] = False
519	self.state_lock.release()
520	raise err
521
522	return retval
523
524	def TerminateSegment(self, req, fid):
525	"""
526	Remove the resources associated with th eallocation and stop the music.
527	In this example, this simply means removing the integer we allocated.
528	"""
529	# Gather the same access information as for Start Segment
530	try:
531	req = req['TerminateSegmentRequestBody']
532	except KeyError:
533	raise service_error(service_error.req, "Badly formed request")
534
535	auth_attr = req['allocID']['fedid']
536	aid = "%s" % auth_attr
537
538	self.log.debug("Terminate request for %s" %aid)
539	# Check authorization
540	access_ok, proof = self.auth.check_attribute(fid, auth_attr,
541	with_proof=True)
542	if not access_ok:
543	raise service_error(service_error.access, "Access denied",
544	proof=proof)
545	cscript = os.path.join(self.localdir, 'connect')
546	dscript = os.path.join(self.localdir, 'disconnect')
547	# Do the work of disconnecting
548	if os.path.exists(dscript):
549	self.log.debug('calling %s' % dscript)
550	rv = subprocess.call(['sudo', '/bin/sh', dscript])
551	if rv != 0:
552	self.log.warning('%s had an error: %d' % (dscript, rv))
553	else:
554	self.log.warn('No disconnection script!?')
555
556	try:
557	for bfn in os.listdir(self.localdir):
558	fn = os.path.join(self.localdir, bfn)
559	self.log.debug('Removing %s' % fn)
560	if os.path.exists(fn):
561	os.remove(fn)
562	except EnvironmentError, e:
563	self.log.warn('Failed to remove %s: %s' % (e.filename, e.strerror))
564
565	self.ssh_identity = None
566
567	self.state_lock.acquire()
568	self.state['running'] = False
569	self.state_lock.release()
570
571	return { 'allocID': req['allocID'], 'proof': proof.to_dict() }

Note: See TracBrowser for help on using the repository browser.

Download in other formats: