Context Navigation

source: fedkit/splitter.pl @ 906c763

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02

Last change on this file since 906c763 was 22bb7f8, checked in by Ted Faber <faber@…>, 17 years ago
Auto generate the ssh keys used to coordinate gateways.
Property mode set to `100644`
File size: 37.2 KB

Line
1	#!/usr/bin/perl
2
3	use strict;
4
5	use Getopt::Std;
6	use IO::File;
7	use IO::Dir;
8	use IO::Pipe;
9	use File::Copy;
10
11	my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12	my $local_script_dir = ".";
13	my($pid, $gid); # Process and group IDs for calling parse.tcl
14	my $splitter_config; # Configuration file
15	my $debug; # True if thecalled in debug mode
16	my $verbose; # True for extra progress reports
17	my $startem; # If true, start the sub-experiments
18	my $eid; # Experiment ID
19	my $tcl; # The experiment description (topology file)
20	my $master; # Master testbed
21	my $tmpdir; # tmp files
22	my $tb_config; # testbed configurations
23	my $smb_share; # Share to mount from the master
24	my $project_user; # User to mount project dirs as
25	my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26	my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27	# basename)
28	my($keytype); # Type (DSA or RSA) of generated gateway keys
29	my $tcl_splitter; # tcl program to split experiments
30	# (changed during devel)
31	my $tclsh; # tclsh to call directly (changed during devel)
32	my $muxmax; # Maximum number of links/lans over 1 gw pair
33	my @tarfiles; # Tarfiles in use by this experiment
34	my @rpms; # Rpms in use by this experiment
35	my $timeout; # The timeout to use for experiment swap ins
36	my %opts; # Parsed options
37
38	my $tbparams = {}; # Map of the per-testbed parameters from the
39	# testbeds file. It is a reference to a hash
40	# of hashes (because it's passed around a bunch
41	# and it's nicer to have one access pattern
42	# throughout the script, in the main loop and
43	# the subroutines). That access is exemplified
44	# by $tbparams->{'deter'}->{'domain'} which is
45	# the domain parameter of the DETER testbed.
46	my $fail_soft; # Do not swap failed sub-experiments out
47	my $max_children=1; # Maximum number of simultaneous swap-ins
48
49	# Parse the config file. The format is a colon-separated parameter name
50	# followed by the value of that parameter to the end of the line. This parses
51	# that format and puts the parameters into the referenced hash. Parameter
52	# names are mapped to lower case, parameter values are unchanged. Returns 0 on
53	# failure (e.g. file open) and 1 on success.
54	sub parse_config {
55	my($file, $href) = @_;
56	my $fh = new IO::File($file);
57
58	unless ($fh) {
59	warn "Can't open $file: $!\n";
60	return 0;
61	}
62
63	while (<$fh>) {
64	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
65	chomp;
66	/^([^:]+):\s(.)/ && do {
67	my $key = $1;
68
69	$key =~ tr/A-Z/a-z/;
70	$href->{$key} = $2;
71	next;
72	};
73	warn "Unparasble line in $file: $_\n";
74	}
75	$fh->close(); # It will close when it goes out of scope, but...
76	return 1;
77	}
78
79	# Parse an easier-to-read testbeds file (the original was comma-separated
80	# unadorned strings). The format is a testbed scope as [testbed] followed by
81	# the colon-separated attribute-value pairs for the testbed. Right now these
82	# go into a set of global hashes indexed by testbed, but that should probably
83	# change.
84
85	sub parse_testbeds {
86	my($file, $tbparams) = @_; # Testbeds file and parameter hash
87	my $fh = new IO::File($file); # Testbeds filehandle
88	my $tb; # Current testbed
89	# Convert attribute in the file to tbparams hash key
90	my %attr_to_hash = (
91	"opsnode" => "host",
92	"user" => "user",
93	"domain" => "domain",
94	"project" => "project",
95	"connectortype" => "gwtype",
96	"slavenodestartcmd" => "expstart",
97	"slaveconnectorstartcmd" => "gwstart",
98	"masternodestartcmd" => "mexpstart",
99	"masterconnectorstartcmd" => "mgwstart",
100	"connectorimage" => "gwimage",
101	"fileserver" => "fs",
102	"boss" => "boss",
103	"tunnelcfg" => "tun"
104	);
105
106
107	unless ($fh) {
108	warn "Can't open $file: $!\n";
109	return 0;
110	}
111
112	while (<$fh>) {
113	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
114	chomp;
115	/^\s\[(.)\]/ && do {
116	$tb = $1;
117	$tbparams->{$tb} = {} unless $tbparams->{$tb};
118	next;
119	};
120
121	/^([^:]+):\s(.)/ && do {
122	unless ($tb) {
123	warn "Ignored attribute definition before testbed " .
124	"defined in $file: $_\n";
125	next;
126	}
127	my $key = $1;
128	$key =~ tr/A-Z/a-z/;
129	my $var = $attr_to_hash{$key};
130
131	if ($var) { $tbparams->{$tb}->{$var} = $2; }
132	else { warn "Unknown keyword $key in $file\n"; }
133
134	next;
135	};
136	warn "Unparasble line in $file: $_\n";
137	}
138	$fh->close(); # It will close when it goes out of scope, but...
139	return 1;
140	}
141
142	# Generate SSH keys for use by the gateways. The parameters are the type and
143	# the filename for the private key. The pubkey will be stored in a filename
144	# with the same name as the private key but with .pub appended. Type can be
145	# dsa or rsa.
146
147	sub generate_ssh_keys {
148	my($type, $dest) = @_;
149
150	$type =~ tr/A-Z/a-z/;
151	return 0 if $type !~ /(rsa\|dsa)/;
152	system("/usr/bin/ssh-keygen -t $type -N \"\" -f $dest");
153	return $@ ? 0 : 1;
154	}
155
156	# use scp to transfer a file, reporting true if successful and false otherwise.
157	# Parameters are the local file name, the ssh host destination (either hostname
158	# oe user@host), and an optional destination file name or directory. If no
159	# destination is given, the file is transferred to the given user's home
160	# directory. If only a machine is given in the ssh host destination, the
161	# current user is used.
162	sub scp_file {
163	my($file, $user, $host, $dest) = @_;
164
165	# XXX system with a relative pathname is sort of gross
166	system("scp $file $user\@$host:$dest");
167	if ($?) {
168	warn "scp failed $?\n";
169	return 0;
170	}
171	else { return 1; }
172	}
173
174	# use ssh to execute the given command on the machine (and as the user) in
175	# $where. Parameters are the ssh destination directive ($where) and the
176	# command to execute, and a prefix to be placed on a message generated if the
177	# command fails. On failure print a warning if a warning prefix was given and
178	# return false. If timeout is given fork a process and set an alarm of that
179	# many seconds. Timeouts also return 0;
180	sub ssh_cmd {
181	my($user, $host, $cmd, $wname, $timeout) = @_;
182	my $pid; # Child pid
183
184	$timeout = 0 unless $timeout; # Force default timeout
185
186	if ( $pid = fork () ) {
187	# Parent process
188	# The eval acts as a signal catcher. If the alarm goes off inside
189	# the eval, the die will put "alarm\n" into $@, otherwise the
190	# return value of the execution in the child process will be used.
191	my $rv = eval {
192	local $SIG{'ALRM'} = sub{ die "alarm\n"; };
193	my $rv;
194
195	alarm $timeout;
196	$rv = waitpid($pid, 0);
197	alarm 0;
198	$rv;
199	};
200
201	# If the eval succeeded, $@ will be null and we can use $rv, which
202	# is the return code from the subprocess. If the eval timed out,
203	# print a warning and assume the best.
204	if ($@ eq "alarm\n" ) {
205	warn "$wname timed out - pid $pid still live\n";
206	return 1;
207	}
208	else {
209	return $rv;
210	}
211	}
212	else {
213	# Child process
214	exec("ssh $user\@$host $cmd");
215	exit 0;
216	}
217	}
218
219	# Ship local copies of the federation scripts out to the given host. If any of
220	# the script transfers fails, return 0. The scripts to transfer are from the
221	# global @scripts and are found locally in $local_script_dir (another global).
222	sub ship_scripts {
223	my($host, $user, $dest_dir) = @_; # Where, who, where remotely
224	my $s;
225
226	&ssh_cmd($user, $host, "mkdir -p $dest_dir");
227	for $s (@scripts) {
228	&scp_file("$local_script_dir/$s", $user, $host, $dest_dir) \|\|
229	return 0;
230	}
231	return 1;
232	}
233
234	# Ship per-testbed configuration generated by this script to the remote /proj
235	# directories on the remote testbeds
236	sub ship_configs {
237	my($host, $user, $src_dir, $dest_dir) = @_; # Where, who, where remotely
238	my($d, $f);
239
240	$d = IO::Dir->new($src_dir) \|\| return 0;
241
242	# All directories under $tmpdir are 770 so we can delete them later.
243	&ssh_cmd($user, $host, "mkdir -p $dest_dir") \|\| return 0;
244	&ssh_cmd($user, $host, "chmod 770 $dest_dir") \|\| return 0;
245	while ( $f = $d->read()) {
246	next if $f =~ /^\./;
247	if ( -d "$src_dir/$f" ) {
248	&ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") \|\|
249	return 0;
250	}
251	else {
252	&scp_file("$src_dir/$f", $user, $host, $dest_dir) \|\| return 0;
253	}
254	}
255	return 1;
256	}
257
258	# Start a sub section of the experiment on a given testbed. The testbed and
259	# the user to start the experiment as are pulled from the global per-testbed
260	# hash, passed in as $tbparams, as is the project name on the remote testbed.
261	# Parameters are the testbed and the experiment id. Configuration files are
262	# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
263	# Then the current state of the experiment determined using expinfo. From that
264	# state, the experiment is either created, modified or spapped in. If
265	# everything succeeds, true is returned. If the global verbose is set progress
266	# messages are printed.
267	sub start_segment {
268	my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
269	# per-testbed parameters and remote
270	# swap-in timeout
271	my $host = # Host name of remote ops (FQDN)
272	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
273	my $user = $tbparams->{$tb}->{'user'}; # user to pass to ssh
274	my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
275	# experiment under
276	my $tclfile = "./$eid.$tb.tcl"; # Local tcl file with the
277	# sub-experiment
278	my $proj_dir = "/proj/$pid/exp/$eid/tmp"; # Where to stash federation stuff
279	my $tarfiles_dir = "/proj/$pid/tarfiles/$eid"; # Where to stash tarfiles
280	my $rpms_dir = "/proj/$pid/rpms/$eid"; # Where to stash rpms
281	my $to_hostname = "$proj_dir/hosts"; # remote hostnames file
282	my $state; # State of remote experiment
283	my $status = new IO::Pipe; # The pipe to get status
284
285	# Determine the status of the remote experiment
286	$status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") \|\|
287	die "Can't ssh to $user\@$host:$!\n";
288
289	# XXX: this is simple now. Parsing may become more complex
290	while (<$status>) {
291	/State: (\w+)/ && ($state = $1);
292	/No\s+such\s+experiment/ && ($state = "none");
293	}
294	$status->close();
295	print "$tb: $state\n";
296
297	# Copy the experiment definition data over
298	print "transferring subexperiment to $tb\n" if $verbose;
299	&scp_file("$tmpdir/$tb/$tclfile", $user, $host) \|\| return 0;
300	# Clear out any old experiment data; if not deleted, copies over it by
301	# different users will fail.
302	# (O /bin/csh, how evil thou art. The -c and the escaped single quotes
303	# force the /bin/sh interpretation of the trailing * (which we need to keep
304	# tmp around)) Again, this needs to be done more properly once we have a
305	# non-ssh interface here.)
306	print "clearing experiment subdirs on $tb\n" if $verbose;
307	&ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") \|\|
308	return 0;
309	print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
310	&ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") \|\|
311	return 0;
312	print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
313	&ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") \|\|
314	return 0;
315	print "clearing experiment rpms subdirs on $tb\n" if $verbose;
316	&ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") \|\|
317	return 0;
318	print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
319	&ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") \|\|
320	return 0;
321	# Remote experiment is active. Modify it.
322	if ($state eq "active") {
323	print "Transferring federation support files to $tb\n" if $verbose;
324	# First copy new scripts and hostinfo into the remote /proj
325	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
326	return 0;
327	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
328	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
329
330	if ( -d "$tmpdir/tarfiles") {
331	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
332	return 0;
333	}
334
335	if ( -d "$tmpdir/rpms") {
336	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
337	return 0;
338	}
339
340	print "Modifying $eid in place on $tb\n" if $verbose;
341	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " .
342	"$eid $tclfile", "modexp", $timeout) \|\| return 0;
343	return 1;
344	}
345
346	# Remote experiment is swapped out, modify it and swap it in.
347	if ($state eq "swapped") {
348	print "Transferring federation support files to $tb\n" if $verbose;
349	# First copy new scripts and hostinfo into the remote /proj (because
350	# the experiment exists, the directory tree should be there.
351	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
352	return 0;
353	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
354	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
355	if ( -d "$tmpdir/tarfiles") {
356	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
357	return 0;
358	}
359
360	if ( -d "$tmpdir/rpms") {
361	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
362	return 0;
363	}
364
365	print "Modifying $eid on $tb\n" if $verbose;
366	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile",
367	"modexp") \|\| return 0;
368	print "Swapping $eid in on $tb\n" if $verbose;
369	# Now start up
370	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
371	"swapexp", $timeout) \|\| return 0;
372	return 1;
373	}
374
375	# No remote experiment. Create one. We do this in 2 steps so we can put
376	# the configuration files and scripts into the new experiment directories.
377	if ($state eq "none") {
378
379	if ( -d "$tmpdir/tarfiles") {
380	# Tarfiles have to exist for the creation to work
381	print "copying tarfiles to $tb\n";
382	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
383	return 0;
384	}
385
386	if ( -d "$tmpdir/rpms") {
387	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
388	return 0;
389	}
390
391	print "Creating $eid on $tb\n" if $verbose;
392	&ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " .
393	"$pid -e $eid $tclfile", "startexp") \|\| return 0;
394	# After startexp succeeds, the per-experiment directories exist on the
395	# remote testbed.
396	print "Transferring federation support files to $tb\n" if $verbose;
397	# First copy new scripts and hostinfo into the remote /proj
398	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
399	return 0;
400	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
401	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
402	# Now start up
403	print "Swapping $eid in on $tb\n" if $verbose;
404	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
405	"swapexp", $timeout) \|\| return 0;
406	return 1;
407	}
408
409	# Every branch for a known state returns. If execution gets here, the
410	# state is unknown.
411	warn "unknown state: $state\n";
412	return 0;
413	}
414
415	# Swap out a sub-experiment - probably because another has failed. Arguments
416	# are testbed and experiment. Most of the control flow is similar to
417	# start_segment, though much simpler.
418	sub stop_segment {
419	my($tb, $eid, $tbparams) = @_; # testbed, experiment ID and
420	# per-testbed parameters
421	my $user = $tbparams->{$tb}->{'user'}; # testbed user
422	my $host = # Ops node
423	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
424	my $pid = $tbparams->{$tb}->{'project'};# testbed project
425
426	print "Stopping $eid on $tb\n" if $verbose;
427	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out",
428	"swapexp (out)") \|\| return 0;
429	return 1;
430	}
431
432
433	$pid = $gid = "dummy"; # Default project and group to pass to
434	# $tcl_splitter above. These are total
435	# dummy arguments; the splitter doesn't
436	# use them at all, but we supply them to
437	# keep our changes to the parser minimal.
438	# Argument processing.
439	getopts('Ft:c:p:f:ndvN', \%opts);
440	$splitter_config = $opts{'c'} \|\| "./splitter.conf";
441	$debug = $opts{'d'};
442	$verbose = $opts{'v'} \|\| $opts{'d'};
443
444	&parse_config("$splitter_config", \%opts) \|\|
445	die "Cannot read config file $splitter_config: $!\n";
446
447	warn "-N does nothing now. Only one testbeds format supported.\n"
448	if $opts{'N'};
449	$fail_soft = $opts{'F'} \|\| $opts{'failsoft'};
450	$startem = $opts{'n'} ? 0 : 1; # If true, start the sub-experiments
451	$timeout = $opts{'t'} \|\| $opts{'timeout'};
452	$eid = $opts{'experiment'}; # Experiment ID
453	$tcl = $opts{'f'} \|\| shift; # The experiment description
454	$master = $opts{'master'}; # Master testbed
455	$tmpdir = $opts{'tmpdir'} \|\| $opts{'tempdir'}\|\| "/tmp"; # tmp files
456	$tb_config = $opts{'testbeds'} \|\| "./testbeds"; # testbed configurations
457	$local_script_dir = $opts{'scriptdir'}; # Local scripts
458	$muxmax = $opts{'muxlimit'} \|\| 3; # Number of connections muxed on one
459	# gateway
460
461	$max_children = $opts{'p'} \|\| $opts{'maxchildren'}
462	if $opts{'p'} \|\| $opts{'maxchildren'};
463
464	$smb_share = $opts{'smbshare'} \|\| # Share to mount from the master
465	die "Must give an SMB share\n";
466	$project_user = $opts{'smbuser'} \|\| # User to mount project dirs as
467	die "Must give an SMB user\n";
468
469	# tcl program to split experiments (changed during devel)
470	$tcl_splitter = $opts{'tclparse'} \|\| "/usr/testbed/lib/ns2ir/parse.tcl";
471	# tclsh to call directly (changed during devel)
472	$tclsh = $opts{'tclsh'} \|\| "/usr/local/bin/otclsh";
473
474	# Prefix to avoid collisions
475	$tmpdir .= "/split$$";
476
477	print "Temp files are in $tmpdir\n" if $verbose;
478	# Create a workspace
479	unless (-d "$tmpdir") {
480	mkdir("$tmpdir") \|\| die "Can't create $tmpdir: $!";
481	}
482
483	# If the keys are given, use them. Otherwise create a set under $tmpdir
484
485	if ( $opts{'gatewatpubkey'} && $opts{'gatewaysecretkey'}) {
486	$gw_pubkey = $opts{'gatewaypubkey'};
487	$gw_secretkey = $opts{'gatewaysecretkey'};
488	}
489	else {
490	$keytype = $opts{'gatewaykeytype'} \|\| "rsa";
491	mkdir("$tmpdir/keys") \|\| die "Can't create temoprary key dir: $!\n";
492	$gw_pubkey = "$tmpdir/keys/fed.$keytype.pub";
493	$gw_secretkey = "$tmpdir/keys/fed.$keytype";
494	print "Generating $keytype keys\n" if $verbose;
495	generate_ssh_keys($keytype, $gw_secretkey) \|\|
496	die "Cannot generate kets:$@\n";
497	}
498	# Generate the basenames
499	($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
500	($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
501
502
503
504	# Validate scripts directory
505	for my $s (@scripts) {
506	die "$local_script_dir/$s not in local script directory. Try -d\n"
507	unless -r "$local_script_dir/$s";
508	}
509
510	die "Must supply file, master and experiment" unless $master && $tcl && $eid;
511
512	&parse_testbeds($tb_config, $tbparams) \|\|
513	die "Cannot testbed congfigurations from $tb_config: $!\n";
514
515	# Open a pipe to the splitter program and start it parsing the experiments
516	my $pipe = new IO::Pipe;
517	# NB no more -p call on parse call.
518	$pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl") \|\|
519	die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n";
520
521	# Parsing variables
522	my $ctb; # Current testbed
523	my %allocated; # If allocated{$tb} > 0, $tb is in use
524	my $destfile; # File that the sub-experiment tcl file is
525	# being written to, or "" if none. Also used
526	# for hostnames file.
527	my $desthandle; # File handle for distfile
528	my $gateways; # when gateway lists are being processed this
529	# is the testbed whose gateways are being
530	# gathered.
531	my $control_gateway; # Control net gateway for the current testbed
532	my %active_end; # If active_end{"a-b"} > 0 then a is the active
533	# end of the a <-> b connector pair.
534
535	# Parse the splitter output. This loop creates the sub experiments, gateway
536	# configurations and hostnames file
537	while (<$pipe>) {
538	# Start of a sub-experiment
539	/^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
540	$ctb = $1;
541
542	# If we know the testbed, start collecting its sub experiment tcl
543	# description. If not, warn the caller and ignore the configuration of
544	# this testbed.
545	if ($tbparams->{$ctb}) {
546	$allocated{$ctb}++; # Keep track of the testbeds allocated
547
548	unless (-d "$tmpdir/$ctb") {
549	mkdir("$tmpdir/$ctb") \|\| die "Can't create $tmpdir/$ctb: $!";
550	}
551	$destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
552
553	$desthandle = new IO::File(">$destfile") \|\|
554	die "Cannot open $destfile:$!\n";
555	}
556	else {
557	warn "No such testbed $ctb\n";
558	$destfile = "";
559	}
560	next;
561	};
562
563	# End of that experiment
564	/^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
565	# Simple syntax check and close out this experiment's tcl description
566	die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
567	$desthandle->close();
568	$destfile = $ctb = "";
569	next;
570	};
571
572	# Beginning of a gateway set
573	/^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
574	$gateways = $1;
575	# If we've heard of this tb, create the config lines for it one at a
576	# time.
577	if ($allocated{$gateways}) {
578	# Just in case. This directory should already have been created
579	# above.
580	unless (-d "$tmpdir/$gateways") {
581	mkdir("$tmpdir/$gateways") \|\|
582	die "Can't create $tmpdir/$gateways: $!";
583	}
584	}
585	else {
586	warn "Gateways given (and ignored) for testbed not in use: " .
587	"$gateways\n";
588	$gateways = 0;
589	}
590	next;
591	};
592	# End of the gateways section. Output the client config for this testbed
593	/^#\s+End\s+gateways\s+\((\w+)\)/ && do {
594	die "Mismatched gateway markers ($1, $gateways)\n"
595	unless !$gateways \|\| $gateways == $1;
596
597	if ($control_gateway ) {
598	# Client config
599	my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
600	my $master_project = $tbparams->{$master}->{'project'};
601	die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
602	print $cc "ControlGateway: $control_gateway\n";
603	print $cc "SMBShare: $smb_share\n";
604	print $cc "ProjectUser: $project_user\n";
605	print $cc "ProjectName: $master_project\n";
606	$cc->close();
607	}
608	else { warn "No control gateway for $gateways?\n"; }
609
610	$gateways = 0;
611	next;
612	};
613	# Beginning of the hostnames list. Collection is always in the hostnames
614	# file.
615	/^#\s+Begin\s+hostnames/ && do {
616	$destfile = "$tmpdir/hostnames";
617	$desthandle = new IO::File(">$destfile") \|\|
618	die "Can't open $destfile:$!\n";
619	next;
620	};
621	# end of the hostnames list.
622	/^#\s+End\s+hostnames/ && do {
623	$desthandle->close();
624	$destfile = "";
625	next;
626	};
627
628	# Generate gateway configuration info, one file per line
629	$gateways && do {
630	chomp;
631	my($dtb, $myname, $desthost, $type) = split(" ", $_);
632
633	# Many of these are to simplify print statements
634	my $sdomain = # domain for the source
635	$tbparams->{$gateways}->{'domain'};
636	my $ddomain = # domain for the destination
637	$tbparams->{$dtb}->{'domain'};
638	my $sproject = # Project of the destination
639	$tbparams->{$gateways}->{'project'};
640	my $fs = # Master fs node (FQDN)
641	$tbparams->{$master}->{'fs'} . $tbparams->{$master}->{'domain'};
642	my $boss = # Master boss node (FQDN)
643	$tbparams->{$master}->{'boss'} . $tbparams->{$master}->{'domain'};
644	my $remote_script_dir = # Remote fed script location
645	"/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
646	my $active; # Is this the active side of
647	# the connector?
648
649	$sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
650	$ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
651
652	my $conf_file = "$myname$sdomain.gw.conf";
653	# translate to lower case so the `hostname` hack for specifying
654	# configuration files works.
655	$conf_file =~ tr/A-Z/a-z/;
656
657	# If either end of this link is in the master side of the testbed, that
658	# side is the active end. Otherwise the first testbed encountered in
659	# the file will be the active end. The $active_end variable keeps
660	# track of those decisions
661	if ( $dtb eq $master ) { $active = "false"; }
662	elsif ($gateways eq $master ) { $active = "true"; }
663	elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
664	else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
665
666	# This is used to create the client configuration.
667	$control_gateway = "$myname$sdomain"
668	if $type =~ /(control\|both)/;
669
670	# Write out the file
671	my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")\|\|
672	die "can't open $tmpdir/$gateways/$conf_file: $!\n";
673
674	print $gwconfig "Active: $active\n";
675	print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
676	print $gwconfig "BossName: $boss\n";
677	print $gwconfig "FsName: $fs\n";
678	print $gwconfig "Type: $type\n";
679	print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
680	print $gwconfig "Peer: $desthost$ddomain\n";
681	print $gwconfig "Pubkeys: " .
682	"/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
683	print $gwconfig "Privkeys: " .
684	"/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
685	$gwconfig->close();
686
687	# This testbed has a gateway (most will) so make a copy of the keys it
688	# needs in this testbed's subdirectory. start_segment will transfer
689	# them.
690	unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
691	copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") \|\|
692	die "Can't copy pubkeys ($gw_pubkey to " .
693	"$tmpdir/$gateways/$gw_pubkey_base): $!\n";
694	}
695	if ($active eq "true" ) {
696	unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
697	copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") \|\|
698	die "Can't copy secret keys ($gw_secretkey to " .
699	"$tmpdir/$gateways/$gw_secretkey_base): $!\n";
700	}
701	}
702
703	#done processing gateway entry, ready for next line
704	next;
705	};
706	(/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
707	next if /^#/;
708	chomp;
709	push(@tarfiles, $_);
710	next;
711	};
712	(/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
713	next if /^#/;
714	chomp;
715	push(@rpms, $_);
716	next;
717	};
718
719	next unless $destfile; # Unidentified testbed, ignore config
720	# local copies that can be used in the substitutions below
721	my $gwtype = $tbparams->{$ctb}->{'gwtype'};
722	my $gwimage = $tbparams->{$ctb}->{'gwimage'};
723	my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
724	my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
725	my $gwstart = $tbparams->{$ctb}->{'gwstart'};
726	my $expstart = $tbparams->{$ctb}->{'expstart'};
727	my $project = $tbparams->{$ctb}->{'project'};
728
729	# Substitute variables
730	s/GWTYPE/$gwtype/g;
731	s/GWIMAGE/$gwimage/g;
732	if ($ctb eq $master ) {
733	s/GWSTART/$mgwstart/g;
734	s/EXPSTART/$mexpstart/g;
735	}
736	else {
737	s/GWSTART/$gwstart/g;
738	s/EXPSTART/$expstart/g;
739	}
740	# XXX: oh is this bad
741	s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
742	s#PROJDIR#/proj/$project/#g;
743	s#EID#$eid#g;
744	s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
745	print $desthandle $_;
746	}
747	$pipe->close();
748	die "No nodes in master testbed ($master)\n" unless $allocated{$master};
749
750	# Copy tarfiles and rpms needed at remote sites to the staging directories.
751	# Start_segment will distribute them
752	for my $t (@tarfiles) {
753	die "tarfile '$t' unreadable: $!\n" unless -r $t;
754	unless (-d "$tmpdir/tarfiles") {
755	mkdir("$tmpdir/tarfiles") \|\|
756	die "Can't create $tmpdir/tarfiles:$!\n";
757	}
758	copy($t, "$tmpdir/tarfiles") \|\|
759	die "Can't copy $t to $tmpdir/tarfiles:$!\n";
760	}
761
762	for my $r (@rpms) {
763	die "rpm '$r' unreadable: $!\n" unless -r $r;
764	unless (-d "$tmpdir/rpms") {
765	mkdir("$tmpdir/rpms") \|\|
766	die "Can't create $tmpdir/rpms:$!\n";
767	}
768	copy($r, "$tmpdir/rpms") \|\|
769	die "Can't copy $r to $tmpdir/rpms:$!\n";
770	}
771
772	exit(0) unless $startem;
773
774	my %started; # If $started{$tb} then $tb successfully started
775	my %child; # If $child{$pid} then a process with that pid is
776	# working on a starting a segment
777	my $nworking = 0; # Number of children working on swapin
778	my $pid; # Scratch variable for pids
779
780	# Start up the slave sub-experiments first
781	TESTBED:
782	for my $tb (keys %allocated) {
783	if ( $tb ne $master ) {
784	while ( $nworking == $max_children ) {
785	print "Waiting for a child process to complete\n" if $verbose;
786	if (($pid = wait()) != -1 ) {
787	# The $? >> 8 is the exit code of the subprocess, which is
788	# non-zero if the &start_segment routine failed.
789	my $exit_code = ($? >> 8);
790
791	print "Child $pid completed exit code ($exit_code)\n"
792	if $verbose;
793	$nworking--;
794	$started{$child{$pid}}++ unless $exit_code;
795	if ($child{$pid} ) { delete $child{$pid}; }
796	else { warn "Reaped a pid we did not start?? ($pid)\n"; }
797	last TESTBED if $exit_code;
798	}
799	else { warn "wait returned without reaping: $!\n"; }
800	}
801	if ( $pid = fork() ) {
802	# Parent process
803	$nworking ++;
804	$child{$pid} = $tb;
805	print "Started process $pid to start testbed $tb\n"
806	if $verbose;
807	}
808	else {
809	# Child. Note that we reverse the sense of the return code when it
810	# becomes an exit value. Zero exit values indicate success.
811	exit(!&start_segment($tb, $eid, $tbparams, $timeout));
812	}
813	}
814	}
815
816	# Now wait for any still running processes.
817	while ( $nworking ) {
818	print "Waiting for a child process to complete ($nworking running)\n"
819	if $verbose;
820	if (($pid = wait()) != -1 ) {
821	# The $? >> 8 is the exit code of the subprocess, which is
822	# non-zero if the &start_segment routine failed.
823	my $exit_code = ($? >> 8);
824
825	print "Child $pid completed exit code ($exit_code)\n"
826	if $verbose;
827	$nworking--;
828	$started{$child{$pid}}++ unless $exit_code;
829	if ($child{$pid} ) { delete $child{$pid}; }
830	else { warn "Reaped a pid we did not start?? ($pid)\n"; }
831	}
832	else { warn "wait returned without reaping: $!\n"; }
833	}
834
835	# Now the master
836	if (&start_segment($master, $eid, $tbparams, $timeout)) {
837	$started{$master}++;
838	}
839
840	# If any testbed failed, swap the rest out.
841	if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
842	for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
843	print "Error starting experiment\n";
844	exit(1);
845	}
846	print "Experiment started\n";
847	print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
848	system("rm -rf $tmpdir") unless $debug;
849	exit(0); # set the exit value
850
851	=pod
852
853	=head1 NAME
854
855	B<splitter.pl>
856
857	=head1 SYNOPSIS
858
859	B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
860	[B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]
861
862	=head1 DESCRIPTION
863
864	B<splitter.pl> invokes the DETER experiment parser to split an annotated
865	experiment into multiple sub-experments and instantiates the sub-experiments on
866	their intended testbeds. Annotation is accomplished using the
867	tb-set-node-testbed command, added to the parser.
868
869	Much of the script's behavior depends on the configuration file, specified with
870	the B<-c> flag and defaulting to F<./splitter.conf>.
871
872	The testbed labels supplied in the B<tb-set-node-testbed> command are
873	meaningful based on their presence in the testbeds file. that file can be
874	specified in the configuration file using the B<Testbeds> directive, and
875	defaults to F<./testbeds>. The syntax is described below.
876
877	Most of the intermediate files are staged in a sub-directory of a temporary
878	files directory and deleted at the end of the script. Specifying the B<-d>
879	flag on the command line avoids the deletion for debbugging. By default the
880	temporary files directory is directory is F</tmp> and can be reset in the
881	configuration file using the B<Tmpdir> directive. Intermediate files are
882	stored under a subdirectory formed by adding the process ID of the splitter
883	process. For example, if the temporary files directory is F</tmp> and the
884	B<splitter.pl> process ID is 2323, the temporary files will be stored in
885	F</tmp/split2323/>.
886
887	The expreriment is split out into one experiment description per testbed in the
888	temporary directory named as F<experiment.testbed.tcl> where the experiment is
889	the experiment ID given in the configuration file, and the testbed is the
890	tb-set-node-testbed parameter for the nodes in the file.
891
892	If the B<-n> option is absent the sub-experiments are then instantiated on
893	their testbeds. (Here B<-n> is analogous to its use in L<make(1)>).
894	Per-testbed parameters are set in the testbeds file. Sub-experiments on
895	slave testbeds are instantiated in a random order, but the master testbed is
896	currently instantiated last.
897
898	Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
899	and the maximum number of simultaneous processes to start.
900
901	Scripts to start federation (the federation kit) are copied into the local
902	experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>. These are
903	taken from the directory given by the B<ScriptDir> directive in the
904	configuration file.
905
906	If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
907	F<splitter.conf>.
908
909	If any sub-experiment fails to instantiate, the other sub-exeriments are
910	swapped out. B<-F> avoids this swap out, which can also be specified as
911	B<SoftFail: true> in F<splitter.conf>
912
913	=head2 Configuration File
914
915	The configuration file is a simple set of colon-separated parameters and
916	values. A configuration file must be present, either specified in the B<-c>
917	flag or the default F<./splitter.conf>. All the parameter names are case
918	insensitive, but should not include any whitespace. Parameter values may
919	include whitespace, but no newlines.
920
921	Possible parameters are:
922
923	=over 5
924
925	=item Experiment
926
927	The name of the experiment on the various testbeds
928
929	=item Master
930
931	The master testbed label from the testbeds file, described below.
932
933	=item Testbeds
934
935	The testbeds file described below, giving per-testbed parameters. If this
936	directive is absent the testbeds file defaults to F<./testbeds>
937
938	=item ScriptDir
939
940	Location of the default federation scripts, i.e. the federation kit.
941
942	=item GatewayPubkey
943
944	=item GatewaySecretKey
945
946	The names of the files containing secret and public keys to use in setting up
947	tunnels between testbeds. If given they are used, otherwise keys are generated.
948
949	=item GatewayKeyType
950
951	This controls the kind of SSH keys generated to configure the geatways. If
952	given this must be B<dsa> or B<rsa>, and it defaults to B<rsa>. The parameter
953	is csase insensitive.
954
955	=item TmpDir
956
957	=item TempDir
958
959	The directory where temporary files are created. These are synonyms, but
960	should both be specified, B<TmpDir> has priority. If neither is specified,
961	F</tmp> is used.
962
963	=item SMBShare
964
965	The SMB share on the master testbed that will be exported to remote clients.
966
967	=item SMBUser
968
969	The experiment user to mount project directories as. This user needs to be a
970	member of the exported experiment - that is one of the users in the project
971	containing this experiment on the master testbed.
972
973	=item Timeout
974
975	Value in seconds after which a swap-in operatioin will be considered a success.
976	Often long swap-ins will hang when there are partial failures. This works
977	around this issue. (This behavior can be requested on the command line by
978	specifying B<-t> I<secs>.)
979
980	=item FailSoft
981
982	If not set, failure of any sub experiment swaps the rest out. Setting this to
983	any value avoids this swap out. (This behavior can be requested on the command
984	line by specifying B<-F>.)
985
986	=item MuxLimit
987
988	The maximum bumber of links/lans carried by one gateway pair
989
990	=item Tclparse
991
992	The pathname to the experiment parsing program. Only developers should set
993	this.
994
995	=item Tclsh
996
997	The pathname to the local oTcl shell. Only developers should set
998	this.
999
1000	=back
1001
1002	=head2 Testbeds file
1003
1004	The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
1005	scoped attribute-value pairs where each attribute is specified on a separate
1006	line of the configuration file. Each testbed's parameters are preceeded by the
1007	testbed label in brackets ([]) on a line by itself. After that the parameters
1008	are specified as parameter: value. This is essentially the same format as the
1009	configuration file. Parameters are:
1010
1011	=over 4
1012
1013	=item User
1014
1015	The user under which to make requests to this testbed. The user running
1016	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
1017	testbed.
1018
1019	=item OpsNode
1020
1021	The host name of the testbed's ops node. The user calling B<splitter.pl> must
1022	be able to execute commands on this host via L<ssh(1)>.
1023
1024	=item Domain
1025
1026	The domain of nodes in this testbed (including the ops host). This parameter
1027	should always start with a period.
1028
1029	=item Project
1030
1031	The project under which to instantiate sub-experiments on this testbed.
1032
1033	=item ConnectorType
1034
1035	The node type for inter-testbed connector nodes on this testbed.
1036
1037	=item SlaveNodeStartCmd
1038
1039	The start command to run on experimental nodes when this testbed is used as a
1040	slave. In all the start commands the following string substitutions are made:
1041
1042	=over 10
1043
1044	=item FEDDIR
1045
1046	The local experiment's federation scripts directory. Each local experiment
1047	will have this replaced by the scripts directory on its local boss.
1048
1049	=item GWCONF
1050
1051	The full pathname of the gateway configuration file. As with FEDDIR, this is
1052	on the local boss.
1053
1054	=item PROJDIR
1055
1056	The project directory on the local boss.
1057
1058	=item EID
1059
1060	The local experiment name.
1061
1062	=back
1063
1064	All startcmds specified in F<testbeds> undergo these expansions.
1065
1066	=item SlaveConnectorStartCmd
1067
1068	The start command to run on gateway nodes when this testbed is used as a slave.
1069	The same string substitutions are made in this command as in SlaveNodeStartCmd.
1070
1071	=item MasterNodeStartCmd
1072
1073	The start command to run on experimental nodes when this testbed is used as a
1074	master. The same string substitutions are made in this command as in
1075	SlaveNodeStartCmd.
1076
1077	=item MasterConnectorStartCmd
1078
1079	The start command to run on gateway nodes when this testbed is used as a
1080	master. The same string substitutions are made in this command as in
1081	SlaveNodeStartCmd.
1082
1083	=item ConnectorImage
1084
1085	The disk image to be loaded on a gateway node on this testbed.
1086
1087	=item FileServer
1088
1089	The node in the master testbed from which filesystems are mounted.
1090
1091	=item Boss
1092
1093	The node in the master testbed that controls the testbed.
1094
1095	=item TunnelCfg
1096
1097	True if the connector needs to do DETER federation. This parameter will
1098	probably be removed.
1099
1100
1101	=back
1102
1103	=head1 ENVIRONMENT
1104
1105	B<splitter.pl> does not directly make use of environment variables, but calls
1106	out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1107	environment.
1108
1109	=head1 BUGS
1110
1111	A deprecated B<-N> flag was used to select testbeds file format. Only one
1112	format is supported now, and B<-N> generates a warning, but otherwise does not
1113	affect B<splitter.pl>.
1114
1115	=head1 SEE ALSO
1116
1117	L<sh(1)>, L<ssh(1)>
1118
1119	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: