Context Navigation

source: fedd/federation/desktop_access.py @ 5f5ce09

Last change on this file since 5f5ce09 was 5f5ce09, checked in by Ted Faber <faber@…>, 10 years ago
Quagga needs the pidfile
Property mode set to `100644`
File size: 20.9 KB

Line
1	#!/usr/local/bin/python
2
3	import os,sys
4	import re
5	import string
6	import copy
7	import pickle
8	import logging
9	import random
10	import subprocess
11
12	from util import *
13	from deter import fedid, generate_fedid
14	from authorizer import authorizer, abac_authorizer
15	from service_error import service_error
16	from remote_service import xmlrpc_handler, soap_handler, service_caller
17
18	from deter import topdl
19
20	from access import access_base
21
22	# Make log messages disappear if noone configures a fedd logger. This is
23	# something of an incantation, but basically it creates a logger object
24	# registered to fedd.access if no other module above us has. It's an extra
25	# belt for the suspenders.
26	class nullHandler(logging.Handler):
27	def emit(self, record): pass
28
29	fl = logging.getLogger("fedd.access")
30	fl.addHandler(nullHandler())
31
32
33	# The plug-in itself.
34	class access(access_base):
35	"""
36	This is a demonstration plug-in for fedd. It responds to all the
37	experiment_control requests and keeps internal state. The allocations it
38	makes are simple integers associated with each valid request. It makes use
39	of the general routines in access.access_base.
40
41	Detailed comments in the code and info at
42	"""
43	def __init__(self, config=None, auth=None):
44	"""
45	Initializer. Pulls parameters out of the ConfigParser's access
46	section, and initializes simple internal state. This version reads a
47	maximum integer to assign from the configuration file, while most other
48	configuration entries are read by the base class.
49
50	An access database in the cannonical format is also read as well as a
51	state database that is a hash of internal state. Routines to
52	manipulate these are in the base class, but specializations appear
53	here.
54
55	The access database maps users to a simple string.
56	"""
57
58	# Calling the base initializer, which reads canonical configuration
59	# information and initializes canonical members.
60	access_base.__init__(self, config, auth)
61	# Reading the maximum integer parameter from the configuration file
62
63	self.src_addr = config.get('access', 'interface_address')
64	self.router = config.get('access', 'gateway')
65	self.hostname = config.get('access', 'hostname')
66	# Storage for ephemeral ssh keys and host files
67	self.localdir = config.get('access', 'localdir')
68	self.ssh_identity = None
69
70	# hostname is the name of the ssh endpoint for the other side. That
71	# side needs it to set up routing tables. If hostname is not
72	# available, but an IP address is, use that.
73	if self.hostname is None:
74	if self.src_addr is None:
75	raise service_error(service_error.server_config,
76	'Hostname or interface_address must be set in config')
77	self.hostname = self.src_addr
78
79	self.ssh_port = config.get('access', 'ssh_port', '22')
80
81	# authorization information
82	self.auth_type = config.get('access', 'auth_type') \
83	or 'abac'
84	self.auth_dir = config.get('access', 'auth_dir')
85	accessdb = config.get("access", "accessdb")
86	# initialize the authorization system. We make a call to
87	# read the access database that maps from authorization information
88	# into local information. The local information is parsed by the
89	# translator above.
90	if self.auth_type == 'abac':
91	# Load the current authorization state
92	self.auth = abac_authorizer(load=self.auth_dir)
93	self.access = [ ]
94	if accessdb:
95	try:
96	self.read_access(accessdb)
97	except EnvironmentError, e:
98	self.log.error("Cannot read %s: %s" % \
99	(config.get("access", "accessdb"), e))
100	raise e
101	else:
102	raise service_error(service_error.internal,
103	"Unknown auth_type: %s" % self.auth_type)
104
105	# The superclass has read the state, but if this is the first run ever,
106	# we must initialise the running flag. This plugin only supports one
107	# connection, so StartSegment will fail when self.state['running'] is
108	# true.
109	self.state_lock.acquire()
110	if 'running' not in self.state:
111	self.state['running'] = False
112	self.state_lock.release()
113
114	# These dictionaries register the plug-in's local routines for handline
115	# these four messages with the server code above. There's a version
116	# for SOAP and XMLRPC, depending on which interfaces the plugin
117	# supports. There's rarely a technical reason not to support one or
118	# the other - the plugin code almost never deals with the transport -
119	# but if a plug-in writer wanted to disable XMLRPC, they could leave
120	# the self.xmlrpc_services dictionary empty.
121	self.soap_services = {\
122	'RequestAccess': soap_handler("RequestAccess", self.RequestAccess),
123	'ReleaseAccess': soap_handler("ReleaseAccess", self.ReleaseAccess),
124	'StartSegment': soap_handler("StartSegment", self.StartSegment),
125	'TerminateSegment': soap_handler("TerminateSegment",
126	self.TerminateSegment),
127	}
128	self.xmlrpc_services = {\
129	'RequestAccess': xmlrpc_handler('RequestAccess',
130	self.RequestAccess),
131	'ReleaseAccess': xmlrpc_handler('ReleaseAccess',
132	self.ReleaseAccess),
133	'StartSegment': xmlrpc_handler("StartSegment", self.StartSegment),
134	'TerminateSegment': xmlrpc_handler('TerminateSegment',
135	self.TerminateSegment),
136	}
137	self.call_SetValue = service_caller('SetValue', log=self.log)
138	self.call_GetValue = service_caller('GetValue', log=self.log)
139
140	# ReleaseAccess come from the base class, this is a slightly modified
141	# RequestAccess from the base that includes a fedAttr to force this side to
142	# be active.
143	def RequestAccess(self, req, fid):
144	"""
145	Handle an access request. Success here maps the requester into the
146	local access control space and establishes state about that user keyed
147	to a fedid. We also save a copy of the certificate underlying that
148	fedid so this allocation can access configuration information and
149	shared parameters on the experiment controller.
150	"""
151
152	self.log.info("RequestAccess called by %s" % fid)
153	# The dance to get into the request body
154	if req.has_key('RequestAccessRequestBody'):
155	req = req['RequestAccessRequestBody']
156	else:
157	raise service_error(service_error.req, "No request!?")
158
159	# Base class lookup routine. If this fails, it throws a service
160	# exception denying access that triggers a fault response back to the
161	# caller.
162	found, owners, proof = self.lookup_access(req, fid)
163	self.log.info(
164	"[RequestAccess] Access granted local creds %s" % found)
165	# Make a fedid for this allocation
166	allocID, alloc_cert = generate_fedid(subj="alloc", log=self.log)
167	aid = unicode(allocID)
168
169	# Store the data about this allocation:
170	self.state_lock.acquire()
171	self.state[aid] = { }
172	self.state[aid]['user'] = found
173	self.state[aid]['owners'] = owners
174	self.state[aid]['auth'] = set()
175	# Authorize the creating fedid and the principal representing the
176	# allocation to manipulate it.
177	self.append_allocation_authorization(aid,
178	((fid, allocID), (allocID, allocID)))
179	self.write_state()
180	self.state_lock.release()
181
182	# Create a directory to stash the certificate in, ans stash it.
183	try:
184	f = open("%s/%s.pem" % (self.certdir, aid), "w")
185	print >>f, alloc_cert
186	f.close()
187	except EnvironmentError, e:
188	raise service_error(service_error.internal,
189	"Can't open %s/%s : %s" % (self.certdir, aid, e))
190	self.log.debug('[RequestAccess] Returning allocation ID: %s' % allocID)
191	msg = {
192	'allocID': { 'fedid': allocID },
193	'fedAttr': [{ 'attribute': 'nat_portals', 'value': 'True' }],
194	'proof': proof.to_dict()
195	}
196	return msg
197
198	def validate_topology(self, top):
199	'''
200	Validate the topology. Desktops can only be single connections.
201	Though the topology will include a portal and a node, the access
202	controller will implement both on one node.
203
204	As more capabilities are added to the contoller the constraints here
205	will relax.
206	'''
207
208	comps = []
209	for e in top.elements:
210	if isinstance(e, topdl.Computer): comps.append(e)
211	if len(comps) > 2:
212	raise service_error(service_error.req,
213	"Desktop only supports 1-node subexperiments")
214
215	portals = 0
216	for c in comps:
217	if c.get_attribute('portal') is not None:
218	portals += 1
219	continue
220	if len(c.interface) > 1:
221	raise service_error(service_error.req,
222	"Desktop Node has more than one interface")
223	i = c.interface[0]
224	if len(i.subs) > 1:
225	raise service_error(service_error.req,
226	"Desktop Node has more than one substate on interface")
227	sub = i.subs[0]
228	for i in sub.interfaces:
229	if i.element not in comps:
230	raise service_error(service_error.req,
231	"Desktop Node connected to non-portal")
232
233	if portals > 1:
234	raise service_error(service_error.req,
235	"Desktop segment has more than one portal")
236	return True
237
238	def validate_connInfo(self, connInfo):
239	if len(connInfo) != 1:
240	raise service_error(service_error.req,
241	"Desktop segment requests multiple connections")
242	if connInfo[0]['type'] != 'ssh':
243	raise service_error(service_error.req,
244	"Desktop segment requires ssh connecton")
245	return True
246
247	def export_store_info(self, certfile, connInfo):
248	'''
249	Tell the other portal node where to reach this desktop. The other side
250	uses this information to set up routing, though the ssh_port is unused
251	as the Desktop always initiates ssh connections.
252	'''
253	values = { 'peer': self.hostname, 'ssh_port': self.ssh_port }
254	for c in connInfo:
255	for p in c.get('parameter', []):
256	if p.get('type','') == 'input': continue
257	pname = p.get('name', '')
258	key = p.get('key', '')
259	surl = p.get('store', '')
260	if pname not in values:
261	self.log('Unknown export parameter: %s' % pname)
262	continue
263	val = values[pname]
264	req = { 'name': key, 'value': val }
265	self.log.debug('Setting %s (%s) to %s on %s' % \
266	(pname, key, val, surl))
267	self.call_SetValue(surl, req, certfile)
268
269	def set_route(self, dest, script, gw=None, src=None):
270	if sys.platform.startswith('freebsd'):
271	if src is not None and gw is not None:
272	raise service_error(service_error.internal,
273	'FreeBSD will not route based on src address')
274	elif src is not None:
275	raise service_error(service_error.internal,
276	'FreeBSD will not route based on src address')
277	elif gw is not None:
278	print >>script, 'route add %s %s' % (dest, gw)
279	elif sys.platform.startswith('linux'):
280	if src is not None and gw is not None:
281	print >>script, 'ip route add %s via %s src %s' % \
282	(dest, gw, src)
283	elif src is not None:
284	print >>script, 'ip route add %s src %s' % \
285	(dest, src)
286	elif gw is not None:
287	print >>script, 'ip route add %s via %s' % (dest, gw)
288	else:
289	raise service_error(service_error.internal,
290	'Unknown platform %s' % sys.platform)
291
292	def unset_route(self, dest, script):
293	rv = 0
294	if sys.platform.startswith('freebsd'):
295	print >>script, 'route delete %s' % dest
296	elif sys.platform.startswith('linux'):
297	print >>script, 'ip route delete %s' % dest
298
299	def find_a_peer(self, addr):
300	'''
301	Find another node in the experiment that's on our subnet. This is a
302	hack to handle the problem that we really cannot require the desktop to
303	dynamically route. Will be improved by distributing static routes.
304	'''
305
306	peer = None
307	hosts = os.path.join(self.localdir, 'hosts')
308	p = addr.rfind('.')
309	if p == -1:
310	raise service_error(service_error.req, 'bad address in topology')
311	prefix = addr[0:p]
312	addr_re = re.compile('(%s.\\d+)' % prefix)
313	try:
314	f = open(hosts, 'r')
315	for line in f:
316	m = addr_re.search(line)
317	if m is not None and m.group(1) != addr:
318	peer = m.group(1)
319	break
320	else:
321	raise service_error(service_error.req,
322	'No other nodes in this subnet??')
323	except EnvironmentError, e:
324	raise service_error(service_error.internal,
325	'Cannot open %s: %s' % (e.filename, e.strerror))
326	return peer
327
328
329
330
331	def configure_desktop(self, top, connInfo):
332	'''
333	Build the connection. Establish routing to the peer if using a
334	separate interface, wait until the other end confirms setup, establish
335	the ssh layer-two tunnel (tap), assign the in-experiment IP address to
336	the tunnel and establish routing to the experiment through the tap.
337	'''
338
339
340	# get the peer and ssh port from the portal and our IP from the other
341	peer = None
342	port = None
343	my_addr = None
344	my_name = None
345	for e in top.elements:
346	if not isinstance(e, topdl.Computer): continue
347	if e.get_attribute('portal') is None:
348	my_name = e.name
349	# there should be one interface with one IPv4 address
350	if len(e.interface) <1 :
351	raise service_error(service_error.internal,
352	'No interface on experiment node!?!?')
353	my_addr = e.interface[0].get_attribute('ip4_address')
354	else:
355	for ci in connInfo:
356	if ci.get('portal', '') != e.name: continue
357	peer = ci.get('peer')
358	port = '22'
359	for a in ci.get('fedAttr', []):
360	if a['attribute'] == 'ssh_port': port = a['value']
361
362	# XXX scan hosts for IP addresses and compose better routing entry
363
364	if not all([peer, port, my_addr]):
365	raise service_error(service_error.req,
366	'Cannot find all config parameters %s %s %s' % (peer, port, my_addr))
367
368	exp_peer = self.find_a_peer(my_addr)
369
370	cscript = os.path.join(self.localdir, 'connect')
371	dscript = os.path.join(self.localdir, 'disconnect')
372	local_hosts = os.path.join(self.localdir, 'hosts')
373	zebra_conf = os.path.join(self.localdir, 'zebra.conf')
374	ospfd_conf = os.path.join(self.localdir, 'ospfd.conf')
375	try:
376	f = open(cscript, 'w')
377	print >>f, '#!/bin/sh'
378	# This picks the outgoing interface to the experiment using the
379	# routing system.
380	self.set_route(peer, f, self.router, self.src_addr)
381	# Wait until the other end reports that it is configured py placing
382	# a file this end can access into its local file system. Try once
383	# a minute.
384	print >>f,'while ! /usr/bin/scp -o "StrictHostKeyChecking no" -i %s %s:/usr/local/federation/etc/prep_done /dev/null; do' % (self.ssh_identity, peer)
385	print >>f, 'sleep 60; done'
386	print >>f, ('ssh -w 0:0 -p %s -o "Tunnel ethernet" ' + \
387	'-o "StrictHostKeyChecking no" -i %s %s perl -I/usr/local/federation/lib /usr/local/federation/bin/setup_bridge.pl --tapno=0 --addr=%s &') % \
388	(port, self.ssh_identity, peer, my_addr)
389	# This should give the tap a a chance to come up
390	print >>f,'sleep 10'
391	# Add experiment nodes to hosts
392	print >>f, 'cp /etc/hosts /etc/hosts.DETER.fedd.hold'
393	print >>f, 'echo "#--- BEGIN FEDD ADDITIONS ---" >> /etc/hosts'
394	print >>f, 'cat %s >> /etc/hosts' % local_hosts
395	print >>f, 'echo "#--- END FEDD ADDITIONS ---" >> /etc/hosts'
396	# Assign tap address and route experiment connections through it.
397	print >>f, 'ifconfig tap0 %s netmask 255.255.255.0 up' % \
398	my_addr
399	# self.set_route('10.0.0.0/8', f, exp_peer)
400	print >>f, '/usr/local/sbin/zebra -d -i /var/run/zebra.pid -f %s' % zebra_conf
401	print >>f, '/usr/local/sbin/ospfd -d -i /var/run/ospfd.pid -f %s' % ospfd_conf
402	f.close()
403	os.chmod(cscript, 0755)
404	f = open(dscript, 'w')
405	print >>f, '#!/bin/sh'
406	print >>f, 'ifconfig tap0 destroy'
407	self.unset_route(peer, f)
408	#self.unset_route('10.0.0.0/8', f)
409	print >>f, 'mv /etc/hosts.DETER.fedd.hold /etc/hosts'
410	print >>f, 'kill `cat /var/run/ospfd.pid`'
411	print >>f, 'kill `cat /var/run/zebra.pid`'
412	f.close()
413	os.chmod(dscript, 0755)
414	f = open(zebra_conf, 'w')
415	print >>f, 'hostname %s' % my_name
416	print >>f, 'interface tap0'
417	f.close()
418	os.chmod(zebra_conf, 0644)
419	f = open(ospfd_conf, 'w')
420	print >>f, 'hostname %s' % my_name
421	print >>f, 'router ospf'
422	print >>f, ' redistribute static'
423	print >>f, ' network %s/24 area 0.0.0.2' % my_addr
424	except EnvironmentError, e:
425	raise service_error(service_error.internal,
426	'Cannot create connect %s: %s' % (e.filename, e.strerror))
427	script_log = open('/tmp/connect.log', 'w')
428	subprocess.Popen(['sudo', '/bin/sh', cscript], stdout=script_log, stderr=script_log)
429	return True
430
431	def StartSegment(self, req, fid):
432	"""
433	Start a segment. In this simple skeleton, this means to parse the
434	request and assign an unassigned integer to it. We store the integer
435	in the persistent state.
436	"""
437	try:
438	req = req['StartSegmentRequestBody']
439	# Get the request topology. If not present, a KeyError is thrown.
440	topref = req['segmentdescription']['topdldescription']
441	# The fedid of the allocation we're attaching resources to
442	auth_attr = req['allocID']['fedid']
443	except KeyError:
444	raise service_error(service_error.req, "Badly formed request")
445
446	# String version of the allocation ID for keying
447	aid = "%s" % auth_attr
448	# Authorization check
449	access_ok, proof = self.auth.check_attribute(fid, auth_attr,
450	with_proof=True)
451	if not access_ok:
452	raise service_error(service_error.access, "Access denied",
453	proof=proof)
454	else:
455	# See if this is a replay of an earlier succeeded StartSegment -
456	# sometimes SSL kills 'em. If so, replay the response rather than
457	# redoing the allocation.
458	self.state_lock.acquire()
459	# Test and set :-)
460	running = self.state['running']
461	self.state['running'] = True
462	retval = self.state[aid].get('started', None)
463	self.state_lock.release()
464	if retval:
465	self.log.warning(
466	"[StartSegment] Duplicate StartSegment for %s: " \
467	% aid + \
468	"replaying response")
469	return retval
470	if running:
471	self.log.debug('[StartSegment] already running')
472	raise service_error(service_error.federant,
473	'Desktop is already in an experiment')
474
475	certfile = "%s/%s.pem" % (self.certdir, aid)
476
477	# Convert the topology into topdl data structures. Again, the
478	# skeletion doesn't do anything with it, but this is how one parses a
479	# topology request.
480	if topref: topo = topdl.Topology(**topref)
481	else:
482	raise service_error(service_error.req,
483	"Request missing segmentdescription'")
484
485	err = None
486	try:
487	self.validate_topology(topo)
488
489	# The attributes of the request. The ones we care about are the ssh
490	# keys to operate the tunnel.
491	attrs = req.get('fedAttr', [])
492	for a in attrs:
493	# Save the hosts and ssh_privkeys to our local dir
494	if a['attribute'] in ('hosts', 'ssh_secretkey'):
495	self.log.debug('Getting %s from %s' % \
496	(a['attribute'], a['value']))
497	get_url(a['value'], certfile, self.localdir, log=self.log)
498	base = os.path.basename(a['value'])
499	if a['attribute'] == 'ssh_secretkey':
500	self.ssh_identity = os.path.join(self.localdir, base)
501	os.chmod(os.path.join(self.localdir, base), 0600)
502	else:
503	self.log.debug('Ignoring attribute %s' % a['attribute'])
504
505	# Gather connection information and exchange parameters.
506	connInfo = req.get('connection', [])
507	self.validate_connInfo(connInfo)
508	self.export_store_info(certfile, connInfo)
509	self.import_store_info(certfile, connInfo)
510
511	#build it
512	self.configure_desktop(topo, connInfo)
513	except service_error, e:
514	err = e
515
516	# Save the information
517	if err is None:
518	# It's possible that the StartSegment call gets retried (!). if
519	# the 'started' key is in the allocation, we'll return it rather
520	# than redo the setup. The integer allocation was saved when we
521	# made it.
522	self.state_lock.acquire()
523	self.state[aid]['started'] = {
524	'allocID': req['allocID'],
525	'allocationLog': "Allocatation complete",
526	'segmentdescription': { 'topdldescription': topo.to_dict() },
527	'proof': proof.to_dict(),
528	}
529	retval = copy.deepcopy(self.state[aid]['started'])
530	self.write_state()
531	self.state_lock.release()
532	else:
533	# Something bad happened - clear the "running" flag so we can try
534	# again
535	self.state_lock.acquire()
536	self.state['running'] = False
537	self.state_lock.release()
538	raise err
539
540	return retval
541
542	def TerminateSegment(self, req, fid):
543	"""
544	Remove the resources associated with th eallocation and stop the music.
545	In this example, this simply means removing the integer we allocated.
546	"""
547	# Gather the same access information as for Start Segment
548	try:
549	req = req['TerminateSegmentRequestBody']
550	except KeyError:
551	raise service_error(service_error.req, "Badly formed request")
552
553	auth_attr = req['allocID']['fedid']
554	aid = "%s" % auth_attr
555
556	self.log.debug("Terminate request for %s" %aid)
557	# Check authorization
558	access_ok, proof = self.auth.check_attribute(fid, auth_attr,
559	with_proof=True)
560	if not access_ok:
561	raise service_error(service_error.access, "Access denied",
562	proof=proof)
563	cscript = os.path.join(self.localdir, 'connect')
564	dscript = os.path.join(self.localdir, 'disconnect')
565	# Do the work of disconnecting
566	if os.path.exists(dscript):
567	self.log.debug('calling %s' % dscript)
568	rv = subprocess.call(['sudo', '/bin/sh', dscript])
569	if rv != 0:
570	self.log.warning('%s had an error: %d' % (dscript, rv))
571	else:
572	self.log.warn('No disconnection script!?')
573
574	try:
575	for bfn in os.listdir(self.localdir):
576	fn = os.path.join(self.localdir, bfn)
577	self.log.debug('Removing %s' % fn)
578	if os.path.exists(fn):
579	os.remove(fn)
580	except EnvironmentError, e:
581	self.log.warn('Failed to remove %s: %s' % (e.filename, e.strerror))
582
583	self.ssh_identity = None
584
585	self.state_lock.acquire()
586	self.state['running'] = False
587	self.state_lock.release()
588
589	return { 'allocID': req['allocID'], 'proof': proof.to_dict() }

Note: See TracBrowser for help on using the repository browser.

Download in other formats: