Context Navigation

source: fedkit/splitter.pl @ 33e3537

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02

Last change on this file since 33e3537 was 33e3537, checked in by Ted Faber <faber@…>, 16 years ago
events to both federants - start repeaters on both sides of the gateway
Property mode set to `100644`
File size: 38.1 KB

Line
1	#!/usr/bin/perl
2
3	use strict;
4
5	use Getopt::Std;
6	use IO::File;
7	use IO::Dir;
8	use IO::Pipe;
9	use File::Copy;
10
11	my @scripts = ("fed_bootstrap", "federate.sh", "smbmount.pl",
12	"make_hosts", "fed-tun.pl", "fed_evrepeater" );
13	my $local_script_dir = ".";
14	my($pid, $gid); # Process and group IDs for calling parse.tcl
15	my $splitter_config; # Configuration file
16	my $debug; # True if thecalled in debug mode
17	my $verbose; # True for extra progress reports
18	my $startem; # If true, start the sub-experiments
19	my $eid; # Experiment ID
20	my $tcl; # The experiment description (topology file)
21	my $master; # Master testbed
22	my $tmpdir; # tmp files
23	my $tb_config; # testbed configurations
24	my $smb_share; # Share to mount from the master
25	my $project_user; # User to mount project dirs as
26	my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
27	my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
28	# basename)
29	my($keytype); # Type (DSA or RSA) of generated gateway keys
30	my $tcl_splitter; # tcl program to split experiments
31	# (changed during devel)
32	my $tclsh; # tclsh to call directly (changed during devel)
33	my $muxmax; # Maximum number of links/lans over 1 gw pair
34	my @tarfiles; # Tarfiles in use by this experiment
35	my @rpms; # Rpms in use by this experiment
36	my $timeout; # The timeout to use for experiment swap ins
37	my %opts; # Parsed options
38
39	my $tbparams = {}; # Map of the per-testbed parameters from the
40	# testbeds file. It is a reference to a hash
41	# of hashes (because it's passed around a bunch
42	# and it's nicer to have one access pattern
43	# throughout the script, in the main loop and
44	# the subroutines). That access is exemplified
45	# by $tbparams->{'deter'}->{'domain'} which is
46	# the domain parameter of the DETER testbed.
47	my $fail_soft; # Do not swap failed sub-experiments out
48	my $max_children=1; # Maximum number of simultaneous swap-ins
49
50	# Parse the config file. The format is a colon-separated parameter name
51	# followed by the value of that parameter to the end of the line. This parses
52	# that format and puts the parameters into the referenced hash. Parameter
53	# names are mapped to lower case, parameter values are unchanged. Returns 0 on
54	# failure (e.g. file open) and 1 on success.
55	sub parse_config {
56	my($file, $href) = @_;
57	my $fh = new IO::File($file);
58
59	unless ($fh) {
60	warn "Can't open $file: $!\n";
61	return 0;
62	}
63
64	while (<$fh>) {
65	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
66	chomp;
67	/^([^:]+):\s(.)/ && do {
68	my $key = $1;
69
70	$key =~ tr/A-Z/a-z/;
71	$href->{$key} = $2;
72	next;
73	};
74	warn "Unparasble line in $file: $_\n";
75	}
76	$fh->close(); # It will close when it goes out of scope, but...
77	return 1;
78	}
79
80	# Parse an easier-to-read testbeds file (the original was comma-separated
81	# unadorned strings). The format is a testbed scope as [testbed] followed by
82	# the colon-separated attribute-value pairs for the testbed. Right now these
83	# go into a set of global hashes indexed by testbed, but that should probably
84	# change.
85
86	sub parse_testbeds {
87	my($file, $tbparams) = @_; # Testbeds file and parameter hash
88	my $fh = new IO::File($file); # Testbeds filehandle
89	my $tb; # Current testbed
90	# Convert attribute in the file to tbparams hash key
91	my %attr_to_hash = (
92	"opsnode" => "host",
93	"user" => "user",
94	"domain" => "domain",
95	"project" => "project",
96	"connectortype" => "gwtype",
97	"slavenodestartcmd" => "expstart",
98	"slaveconnectorstartcmd" => "gwstart",
99	"masternodestartcmd" => "mexpstart",
100	"masterconnectorstartcmd" => "mgwstart",
101	"connectorimage" => "gwimage",
102	"fileserver" => "fs",
103	"boss" => "boss",
104	"eventserver" => "eventserver",
105	"tunnelcfg" => "tun"
106	);
107
108
109	unless ($fh) {
110	warn "Can't open $file: $!\n";
111	return 0;
112	}
113
114	while (<$fh>) {
115	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
116	chomp;
117	/^\s\[(.)\]/ && do {
118	$tb = $1;
119	$tbparams->{$tb} = {} unless $tbparams->{$tb};
120	next;
121	};
122
123	/^([^:]+):\s(.)/ && do {
124	unless ($tb) {
125	warn "Ignored attribute definition before testbed " .
126	"defined in $file: $_\n";
127	next;
128	}
129	my $key = $1;
130	$key =~ tr/A-Z/a-z/;
131	my $var = $attr_to_hash{$key};
132
133	if ($var) { $tbparams->{$tb}->{$var} = $2; }
134	else { warn "Unknown keyword $key in $file\n"; }
135
136	next;
137	};
138	warn "Unparasble line in $file: $_\n";
139	}
140	$fh->close(); # It will close when it goes out of scope, but...
141	return 1;
142	}
143
144	# Generate SSH keys for use by the gateways. The parameters are the type and
145	# the filename for the private key. The pubkey will be stored in a filename
146	# with the same name as the private key but with .pub appended. Type can be
147	# dsa or rsa.
148
149	sub generate_ssh_keys {
150	my($type, $dest) = @_;
151
152	$type =~ tr/A-Z/a-z/;
153	return 0 if $type !~ /(rsa\|dsa)/;
154	system("/usr/bin/ssh-keygen -t $type -N \"\" -f $dest");
155	return $@ ? 0 : 1;
156	}
157
158	# use scp to transfer a file, reporting true if successful and false otherwise.
159	# Parameters are the local file name, the ssh host destination (either hostname
160	# oe user@host), and an optional destination file name or directory. If no
161	# destination is given, the file is transferred to the given user's home
162	# directory. If only a machine is given in the ssh host destination, the
163	# current user is used.
164	sub scp_file {
165	my($file, $user, $host, $dest) = @_;
166
167	# XXX system with a relative pathname is sort of gross
168	system("scp $file $user\@$host:$dest");
169	if ($?) {
170	warn "scp failed $?\n";
171	return 0;
172	}
173	else { return 1; }
174	}
175
176	# use ssh to execute the given command on the machine (and as the user) in
177	# $where. Parameters are the ssh destination directive ($where) and the
178	# command to execute, and a prefix to be placed on a message generated if the
179	# command fails. On failure print a warning if a warning prefix was given and
180	# return false. If timeout is given fork a process and set an alarm of that
181	# many seconds. Timeouts also return 0;
182	sub ssh_cmd {
183	my($user, $host, $cmd, $wname, $timeout) = @_;
184	my $pid; # Child pid
185
186	$timeout = 0 unless $timeout; # Force default timeout
187
188	if ( $pid = fork () ) {
189	# Parent process
190	# The eval acts as a signal catcher. If the alarm goes off inside
191	# the eval, the die will put "alarm\n" into $@, otherwise the
192	# return value of the execution in the child process will be used.
193	my $rv = eval {
194	local $SIG{'ALRM'} = sub{ die "alarm\n"; };
195	my $rv;
196
197	alarm $timeout;
198	$rv = waitpid($pid, 0);
199	alarm 0;
200	$rv;
201	};
202
203	# If the eval succeeded, $@ will be null and we can use $rv, which
204	# is the return code from the subprocess. If the eval timed out,
205	# print a warning and assume the best.
206	if ($@ eq "alarm\n" ) {
207	warn "$wname timed out - pid $pid still live\n";
208	return 1;
209	}
210	else {
211	return $rv;
212	}
213	}
214	else {
215	# Child process
216	exec("ssh $user\@$host $cmd");
217	exit 0;
218	}
219	}
220
221	# Ship local copies of the federation scripts out to the given host. If any of
222	# the script transfers fails, return 0. The scripts to transfer are from the
223	# global @scripts and are found locally in $local_script_dir (another global).
224	sub ship_scripts {
225	my($host, $user, $dest_dir) = @_; # Where, who, where remotely
226	my $s;
227
228	&ssh_cmd($user, $host, "mkdir -p $dest_dir");
229	for $s (@scripts) {
230	&scp_file("$local_script_dir/$s", $user, $host, $dest_dir) \|\|
231	return 0;
232	}
233	return 1;
234	}
235
236	# Ship per-testbed configuration generated by this script to the remote /proj
237	# directories on the remote testbeds
238	sub ship_configs {
239	my($host, $user, $src_dir, $dest_dir) = @_; # Where, who, where remotely
240	my($d, $f);
241
242	$d = IO::Dir->new($src_dir) \|\| return 0;
243
244	# All directories under $tmpdir are 770 so we can delete them later.
245	&ssh_cmd($user, $host, "mkdir -p $dest_dir") \|\| return 0;
246	&ssh_cmd($user, $host, "chmod 770 $dest_dir") \|\| return 0;
247	while ( $f = $d->read()) {
248	next if $f =~ /^\./;
249	if ( -d "$src_dir/$f" ) {
250	&ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") \|\|
251	return 0;
252	}
253	else {
254	&scp_file("$src_dir/$f", $user, $host, $dest_dir) \|\| return 0;
255	}
256	}
257	return 1;
258	}
259
260	# Start a sub section of the experiment on a given testbed. The testbed and
261	# the user to start the experiment as are pulled from the global per-testbed
262	# hash, passed in as $tbparams, as is the project name on the remote testbed.
263	# Parameters are the testbed and the experiment id. Configuration files are
264	# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
265	# Then the current state of the experiment determined using expinfo. From that
266	# state, the experiment is either created, modified or spapped in. If
267	# everything succeeds, true is returned. If the global verbose is set progress
268	# messages are printed.
269	sub start_segment {
270	my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
271	# per-testbed parameters and remote
272	# swap-in timeout
273	my $host = # Host name of remote ops (FQDN)
274	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
275	my $user = $tbparams->{$tb}->{'user'}; # user to pass to ssh
276	my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
277	# experiment under
278	my $tclfile = "./$eid.$tb.tcl"; # Local tcl file with the
279	# sub-experiment
280	my $proj_dir = "/proj/$pid/exp/$eid/tmp"; # Where to stash federation stuff
281	my $tarfiles_dir = "/proj/$pid/tarfiles/$eid"; # Where to stash tarfiles
282	my $rpms_dir = "/proj/$pid/rpms/$eid"; # Where to stash rpms
283	my $to_hostname = "$proj_dir/hosts"; # remote hostnames file
284	my $state; # State of remote experiment
285	my $status = new IO::Pipe; # The pipe to get status
286
287	# Determine the status of the remote experiment
288	$status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") \|\|
289	die "Can't ssh to $user\@$host:$!\n";
290
291	# XXX: this is simple now. Parsing may become more complex
292	while (<$status>) {
293	/State: (\w+)/ && ($state = $1);
294	/No\s+such\s+experiment/ && ($state = "none");
295	}
296	$status->close();
297	print "$tb: $state\n";
298
299	# Copy the experiment definition data over
300	print "transferring subexperiment to $tb\n" if $verbose;
301	&scp_file("$tmpdir/$tb/$tclfile", $user, $host) \|\| return 0;
302	# Clear out any old experiment data; if not deleted, copies over it by
303	# different users will fail.
304	# (O /bin/csh, how evil thou art. The -c and the escaped single quotes
305	# force the /bin/sh interpretation of the trailing * (which we need to keep
306	# tmp around)) Again, this needs to be done more properly once we have a
307	# non-ssh interface here.)
308	print "clearing experiment subdirs on $tb\n" if $verbose;
309	&ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") \|\|
310	return 0;
311	print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
312	&ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") \|\|
313	return 0;
314	print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
315	&ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") \|\|
316	return 0;
317	print "clearing experiment rpms subdirs on $tb\n" if $verbose;
318	&ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") \|\|
319	return 0;
320	print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
321	&ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") \|\|
322	return 0;
323	# Remote experiment is active. Modify it.
324	if ($state eq "active") {
325	print "Transferring federation support files to $tb\n" if $verbose;
326	# First copy new scripts and hostinfo into the remote /proj
327	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
328	return 0;
329	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
330	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
331
332	if ( -d "$tmpdir/tarfiles") {
333	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
334	return 0;
335	}
336
337	if ( -d "$tmpdir/rpms") {
338	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
339	return 0;
340	}
341
342	print "Modifying $eid in place on $tb\n" if $verbose;
343	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " .
344	"$eid $tclfile", "modexp", $timeout) \|\| return 0;
345	return 1;
346	}
347
348	# Remote experiment is swapped out, modify it and swap it in.
349	if ($state eq "swapped") {
350	print "Transferring federation support files to $tb\n" if $verbose;
351	# First copy new scripts and hostinfo into the remote /proj (because
352	# the experiment exists, the directory tree should be there.
353	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
354	return 0;
355	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
356	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
357	if ( -d "$tmpdir/tarfiles") {
358	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
359	return 0;
360	}
361
362	if ( -d "$tmpdir/rpms") {
363	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
364	return 0;
365	}
366
367	print "Modifying $eid on $tb\n" if $verbose;
368	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile",
369	"modexp") \|\| return 0;
370	print "Swapping $eid in on $tb\n" if $verbose;
371	# Now start up
372	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
373	"swapexp", $timeout) \|\| return 0;
374	return 1;
375	}
376
377	# No remote experiment. Create one. We do this in 2 steps so we can put
378	# the configuration files and scripts into the new experiment directories.
379	if ($state eq "none") {
380
381	if ( -d "$tmpdir/tarfiles") {
382	# Tarfiles have to exist for the creation to work
383	print "copying tarfiles to $tb\n";
384	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
385	return 0;
386	}
387
388	if ( -d "$tmpdir/rpms") {
389	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
390	return 0;
391	}
392
393	print "Creating $eid on $tb\n" if $verbose;
394	&ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " .
395	"$pid -e $eid $tclfile", "startexp") \|\| return 0;
396	# After startexp succeeds, the per-experiment directories exist on the
397	# remote testbed.
398	print "Transferring federation support files to $tb\n" if $verbose;
399	# First copy new scripts and hostinfo into the remote /proj
400	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
401	return 0;
402	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
403	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
404	# Now start up
405	print "Swapping $eid in on $tb\n" if $verbose;
406	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
407	"swapexp", $timeout) \|\| return 0;
408	return 1;
409	}
410
411	# Every branch for a known state returns. If execution gets here, the
412	# state is unknown.
413	warn "unknown state: $state\n";
414	return 0;
415	}
416
417	# Swap out a sub-experiment - probably because another has failed. Arguments
418	# are testbed and experiment. Most of the control flow is similar to
419	# start_segment, though much simpler.
420	sub stop_segment {
421	my($tb, $eid, $tbparams) = @_; # testbed, experiment ID and
422	# per-testbed parameters
423	my $user = $tbparams->{$tb}->{'user'}; # testbed user
424	my $host = # Ops node
425	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
426	my $pid = $tbparams->{$tb}->{'project'};# testbed project
427
428	print "Stopping $eid on $tb\n" if $verbose;
429	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out",
430	"swapexp (out)") \|\| return 0;
431	return 1;
432	}
433
434
435	$pid = $gid = "dummy"; # Default project and group to pass to
436	# $tcl_splitter above. These are total
437	# dummy arguments; the splitter doesn't
438	# use them at all, but we supply them to
439	# keep our changes to the parser minimal.
440	# Argument processing.
441	getopts('Ft:c:p:f:ndvN', \%opts);
442	$splitter_config = $opts{'c'} \|\| "./splitter.conf";
443	$debug = $opts{'d'};
444	$verbose = $opts{'v'} \|\| $opts{'d'};
445
446	&parse_config("$splitter_config", \%opts) \|\|
447	die "Cannot read config file $splitter_config: $!\n";
448
449	warn "-N does nothing now. Only one testbeds format supported.\n"
450	if $opts{'N'};
451	$fail_soft = $opts{'F'} \|\| $opts{'failsoft'};
452	$startem = $opts{'n'} ? 0 : 1; # If true, start the sub-experiments
453	$timeout = $opts{'t'} \|\| $opts{'timeout'};
454	$eid = $opts{'experiment'}; # Experiment ID
455	$tcl = $opts{'f'} \|\| shift; # The experiment description
456	$master = $opts{'master'}; # Master testbed
457	$tmpdir = $opts{'tmpdir'} \|\| $opts{'tempdir'}\|\| "/tmp"; # tmp files
458	$tb_config = $opts{'testbeds'} \|\| "./testbeds"; # testbed configurations
459	$local_script_dir = $opts{'scriptdir'}; # Local scripts
460	$muxmax = $opts{'muxlimit'} \|\| 3; # Number of connections muxed on one
461	# gateway
462
463	$max_children = $opts{'p'} \|\| $opts{'maxchildren'}
464	if $opts{'p'} \|\| $opts{'maxchildren'};
465
466	$smb_share = $opts{'smbshare'} \|\| # Share to mount from the master
467	die "Must give an SMB share\n";
468	$project_user = $opts{'smbuser'} \|\| # User to mount project dirs as
469	die "Must give an SMB user\n";
470
471	# tcl program to split experiments (changed during devel)
472	$tcl_splitter = $opts{'tclparse'} \|\| "/usr/testbed/lib/ns2ir/parse.tcl";
473	# tclsh to call directly (changed during devel)
474	$tclsh = $opts{'tclsh'} \|\| "/usr/local/bin/otclsh";
475
476	# Prefix to avoid collisions
477	$tmpdir .= "/split$$";
478
479	print "Temp files are in $tmpdir\n" if $verbose;
480	# Create a workspace
481	unless (-d "$tmpdir") {
482	mkdir("$tmpdir") \|\| die "Can't create $tmpdir: $!";
483	}
484
485	# If the keys are given, use them. Otherwise create a set under $tmpdir
486
487	if ( $opts{'gatewatpubkey'} && $opts{'gatewaysecretkey'}) {
488	$gw_pubkey = $opts{'gatewaypubkey'};
489	$gw_secretkey = $opts{'gatewaysecretkey'};
490	}
491	else {
492	$keytype = $opts{'gatewaykeytype'} \|\| "rsa";
493	mkdir("$tmpdir/keys") \|\| die "Can't create temoprary key dir: $!\n";
494	$gw_pubkey = "$tmpdir/keys/fed.$keytype.pub";
495	$gw_secretkey = "$tmpdir/keys/fed.$keytype";
496	print "Generating $keytype keys\n" if $verbose;
497	generate_ssh_keys($keytype, $gw_secretkey) \|\|
498	die "Cannot generate kets:$@\n";
499	}
500	# Generate the basenames
501	($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
502	($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
503
504
505
506	# Validate scripts directory
507	for my $s (@scripts) {
508	die "$local_script_dir/$s not in local script directory. Try -d\n"
509	unless -r "$local_script_dir/$s";
510	}
511
512	die "Must supply file, master and experiment" unless $master && $tcl && $eid;
513
514	&parse_testbeds($tb_config, $tbparams) \|\|
515	die "Cannot testbed congfigurations from $tb_config: $!\n";
516
517	# Open a pipe to the splitter program and start it parsing the experiments
518	my $pipe = new IO::Pipe;
519	# NB no more -p call on parse call.
520	$pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl") \|\|
521	die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n";
522
523	# Parsing variables
524	my $ctb; # Current testbed
525	my %allocated; # If allocated{$tb} > 0, $tb is in use
526	my $destfile; # File that the sub-experiment tcl file is
527	# being written to, or "" if none. Also used
528	# for hostnames file.
529	my $desthandle; # File handle for distfile
530	my $gateways; # when gateway lists are being processed this
531	# is the testbed whose gateways are being
532	# gathered.
533	my $control_gateway; # Control net gateway for the current testbed
534	my %active_end; # If active_end{"a-b"} > 0 then a is the active
535	# end of the a <-> b connector pair.
536
537	# Parse the splitter output. This loop creates the sub experiments, gateway
538	# configurations and hostnames file
539	while (<$pipe>) {
540	# Start of a sub-experiment
541	/^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
542	$ctb = $1;
543
544	# If we know the testbed, start collecting its sub experiment tcl
545	# description. If not, warn the caller and ignore the configuration of
546	# this testbed.
547	if ($tbparams->{$ctb}) {
548	$allocated{$ctb}++; # Keep track of the testbeds allocated
549
550	unless (-d "$tmpdir/$ctb") {
551	mkdir("$tmpdir/$ctb") \|\| die "Can't create $tmpdir/$ctb: $!";
552	}
553	$destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
554
555	$desthandle = new IO::File(">$destfile") \|\|
556	die "Cannot open $destfile:$!\n";
557	}
558	else {
559	warn "No such testbed $ctb\n";
560	$destfile = "";
561	}
562	next;
563	};
564
565	# End of that experiment
566	/^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
567	# Simple syntax check and close out this experiment's tcl description
568	die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
569	$desthandle->close();
570	$destfile = $ctb = "";
571	next;
572	};
573
574	# Beginning of a gateway set
575	/^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
576	$gateways = $1;
577	# If we've heard of this tb, create the config lines for it one at a
578	# time.
579	if ($allocated{$gateways}) {
580	# Just in case. This directory should already have been created
581	# above.
582	unless (-d "$tmpdir/$gateways") {
583	mkdir("$tmpdir/$gateways") \|\|
584	die "Can't create $tmpdir/$gateways: $!";
585	}
586	}
587	else {
588	warn "Gateways given (and ignored) for testbed not in use: " .
589	"$gateways\n";
590	$gateways = 0;
591	}
592	next;
593	};
594	# End of the gateways section. Output the client config for this testbed
595	/^#\s+End\s+gateways\s+\((\w+)\)/ && do {
596	die "Mismatched gateway markers ($1, $gateways)\n"
597	unless !$gateways \|\| $gateways == $1;
598
599	if ($control_gateway ) {
600	# Client config
601	my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
602	my $master_project = $tbparams->{$master}->{'project'};
603	die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
604	print $cc "ControlGateway: $control_gateway\n";
605	print $cc "SMBShare: $smb_share\n";
606	print $cc "ProjectUser: $project_user\n";
607	print $cc "ProjectName: $master_project\n";
608	$cc->close();
609	}
610	else { warn "No control gateway for $gateways?\n"; }
611
612	$gateways = 0;
613	next;
614	};
615	# Beginning of the hostnames list. Collection is always in the hostnames
616	# file.
617	/^#\s+Begin\s+hostnames/ && do {
618	$destfile = "$tmpdir/hostnames";
619	$desthandle = new IO::File(">$destfile") \|\|
620	die "Can't open $destfile:$!\n";
621	next;
622	};
623	# end of the hostnames list.
624	/^#\s+End\s+hostnames/ && do {
625	$desthandle->close();
626	$destfile = "";
627	next;
628	};
629
630	# Generate gateway configuration info, one file per line
631	$gateways && do {
632	chomp;
633	my($dtb, $myname, $desthost, $type) = split(" ", $_);
634
635	# Many of these are to simplify print statements
636	my $sdomain = # domain for the source
637	$tbparams->{$gateways}->{'domain'};
638	my $ddomain = # domain for the destination
639	$tbparams->{$dtb}->{'domain'};
640	my $sproject = # Project of the source
641	$tbparams->{$gateways}->{'project'};
642	my $dproject = # Project of the destination
643	$tbparams->{$dtb}->{'project'};
644	my $fs = # Master fs node (FQDN)
645	$tbparams->{$master}->{'fs'} . $tbparams->{$master}->{'domain'};
646	my $boss = # Master boss node (FQDN)
647	$tbparams->{$master}->{'boss'} . $tbparams->{$master}->{'domain'};
648	my $event_server = # Master event-server (FQDN)
649	$tbparams->{$master}->{'eventserver'} .
650	$tbparams->{$master}->{'domain'};
651	my $remote_event_server = # Slave event-server (FQDN)
652	$tbparams->{$dtb}->{'eventserver'} .
653	$tbparams->{$dtb}->{'domain'};
654	my $remote_script_dir = # Remote fed script location
655	"/proj/" . $dproject . "/exp/$eid/tmp";
656	my $local_script_dir = # Local fed script location
657	"/proj/" . $sproject . "/exp/$eid/tmp";
658	my $active; # Is this the active side of
659	# the connector?
660
661	$sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
662	$ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
663
664	my $conf_file = "$myname$sdomain.gw.conf";
665	my $remote_conf_file = "$desthost$ddomain.gw.conf";
666	# translate to lower case so the `hostname` hack for specifying
667	# configuration files works.
668	$conf_file =~ tr/A-Z/a-z/;
669	$remote_conf_file =~ tr/A-Z/a-z/;
670
671	# If either end of this link is in the master side of the testbed, that
672	# side is the active end. Otherwise the first testbed encountered in
673	# the file will be the active end. The $active_end variable keeps
674	# track of those decisions
675	if ( $dtb eq $master ) { $active = "false"; }
676	elsif ($gateways eq $master ) { $active = "true"; }
677	elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
678	else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
679
680	# This is used to create the client configuration.
681	$control_gateway = "$myname$sdomain"
682	if $type =~ /(control\|both)/;
683
684	# Write out the file
685	my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")\|\|
686	die "can't open $tmpdir/$gateways/$conf_file: $!\n";
687
688	print $gwconfig "Active: $active\n";
689	print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
690	print $gwconfig "BossName: $boss\n";
691	print $gwconfig "FsName: $fs\n";
692	print $gwconfig "EventServerName: $event_server\n";
693	print $gwconfig "RemoteEventServerName: $remote_event_server\n";
694	print $gwconfig "Type: $type\n";
695	print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
696	print $gwconfig "EventRepeater: $local_script_dir/fed_evrepeater\n";
697	print $gwconfig "RemoteExperiment: $dproject/$eid\n";
698	print $gwconfig "LocalExperiment: $sproject/$eid\n";
699	print $gwconfig "RemoteConfigFile: " .
700	"$remote_script_dir/$remote_conf_file\n";
701	print $gwconfig "Peer: $desthost$ddomain\n";
702	print $gwconfig "Pubkeys: " .
703	"/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
704	print $gwconfig "Privkeys: " .
705	"/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
706	$gwconfig->close();
707
708	# This testbed has a gateway (most will) so make a copy of the keys it
709	# needs in this testbed's subdirectory. start_segment will transfer
710	# them.
711	unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
712	copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") \|\|
713	die "Can't copy pubkeys ($gw_pubkey to " .
714	"$tmpdir/$gateways/$gw_pubkey_base): $!\n";
715	}
716	if ($active eq "true" ) {
717	unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
718	copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") \|\|
719	die "Can't copy secret keys ($gw_secretkey to " .
720	"$tmpdir/$gateways/$gw_secretkey_base): $!\n";
721	}
722	}
723
724	#done processing gateway entry, ready for next line
725	next;
726	};
727	(/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
728	next if /^#/;
729	chomp;
730	push(@tarfiles, $_);
731	next;
732	};
733	(/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
734	next if /^#/;
735	chomp;
736	push(@rpms, $_);
737	next;
738	};
739
740	next unless $destfile; # Unidentified testbed, ignore config
741	# local copies that can be used in the substitutions below
742	my $gwtype = $tbparams->{$ctb}->{'gwtype'};
743	my $gwimage = $tbparams->{$ctb}->{'gwimage'};
744	my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
745	my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
746	my $gwstart = $tbparams->{$ctb}->{'gwstart'};
747	my $expstart = $tbparams->{$ctb}->{'expstart'};
748	my $project = $tbparams->{$ctb}->{'project'};
749
750	# Substitute variables
751	s/GWTYPE/$gwtype/g;
752	s/GWIMAGE/$gwimage/g;
753	if ($ctb eq $master ) {
754	s/GWSTART/$mgwstart/g;
755	s/EXPSTART/$mexpstart/g;
756	}
757	else {
758	s/GWSTART/$gwstart/g;
759	s/EXPSTART/$expstart/g;
760	}
761	# XXX: oh is this bad
762	s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
763	s#PROJDIR#/proj/$project/#g;
764	s#EID#$eid#g;
765	s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
766	print $desthandle $_;
767	}
768	$pipe->close();
769	die "No nodes in master testbed ($master)\n" unless $allocated{$master};
770
771	# Copy tarfiles and rpms needed at remote sites to the staging directories.
772	# Start_segment will distribute them
773	for my $t (@tarfiles) {
774	die "tarfile '$t' unreadable: $!\n" unless -r $t;
775	unless (-d "$tmpdir/tarfiles") {
776	mkdir("$tmpdir/tarfiles") \|\|
777	die "Can't create $tmpdir/tarfiles:$!\n";
778	}
779	copy($t, "$tmpdir/tarfiles") \|\|
780	die "Can't copy $t to $tmpdir/tarfiles:$!\n";
781	}
782
783	for my $r (@rpms) {
784	die "rpm '$r' unreadable: $!\n" unless -r $r;
785	unless (-d "$tmpdir/rpms") {
786	mkdir("$tmpdir/rpms") \|\|
787	die "Can't create $tmpdir/rpms:$!\n";
788	}
789	copy($r, "$tmpdir/rpms") \|\|
790	die "Can't copy $r to $tmpdir/rpms:$!\n";
791	}
792
793	exit(0) unless $startem;
794
795	my %started; # If $started{$tb} then $tb successfully started
796	my %child; # If $child{$pid} then a process with that pid is
797	# working on a starting a segment
798	my $nworking = 0; # Number of children working on swapin
799	my $pid; # Scratch variable for pids
800
801	# Start up the slave sub-experiments first
802	TESTBED:
803	for my $tb (keys %allocated) {
804	if ( $tb ne $master ) {
805	while ( $nworking == $max_children ) {
806	print "Waiting for a child process to complete\n" if $verbose;
807	if (($pid = wait()) != -1 ) {
808	# The $? >> 8 is the exit code of the subprocess, which is
809	# non-zero if the &start_segment routine failed.
810	my $exit_code = ($? >> 8);
811
812	print "Child $pid completed exit code ($exit_code)\n"
813	if $verbose;
814	$nworking--;
815	$started{$child{$pid}}++ unless $exit_code;
816	if ($child{$pid} ) { delete $child{$pid}; }
817	else { warn "Reaped a pid we did not start?? ($pid)\n"; }
818	last TESTBED if $exit_code;
819	}
820	else { warn "wait returned without reaping: $!\n"; }
821	}
822	if ( $pid = fork() ) {
823	# Parent process
824	$nworking ++;
825	$child{$pid} = $tb;
826	print "Started process $pid to start testbed $tb\n"
827	if $verbose;
828	}
829	else {
830	# Child. Note that we reverse the sense of the return code when it
831	# becomes an exit value. Zero exit values indicate success.
832	exit(!&start_segment($tb, $eid, $tbparams, $timeout));
833	}
834	}
835	}
836
837	# Now wait for any still running processes.
838	while ( $nworking ) {
839	print "Waiting for a child process to complete ($nworking running)\n"
840	if $verbose;
841	if (($pid = wait()) != -1 ) {
842	# The $? >> 8 is the exit code of the subprocess, which is
843	# non-zero if the &start_segment routine failed.
844	my $exit_code = ($? >> 8);
845
846	print "Child $pid completed exit code ($exit_code)\n"
847	if $verbose;
848	$nworking--;
849	$started{$child{$pid}}++ unless $exit_code;
850	if ($child{$pid} ) { delete $child{$pid}; }
851	else { warn "Reaped a pid we did not start?? ($pid)\n"; }
852	}
853	else { warn "wait returned without reaping: $!\n"; }
854	}
855
856	# Now the master
857	if (&start_segment($master, $eid, $tbparams, $timeout)) {
858	$started{$master}++;
859	}
860
861	# If any testbed failed, swap the rest out.
862	if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
863	for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
864	print "Error starting experiment\n";
865	exit(1);
866	}
867	print "Experiment started\n";
868	print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
869	system("rm -rf $tmpdir") unless $debug;
870	exit(0); # set the exit value
871
872	=pod
873
874	=head1 NAME
875
876	B<splitter.pl>
877
878	=head1 SYNOPSIS
879
880	B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
881	[B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]
882
883	=head1 DESCRIPTION
884
885	B<splitter.pl> invokes the DETER experiment parser to split an annotated
886	experiment into multiple sub-experments and instantiates the sub-experiments on
887	their intended testbeds. Annotation is accomplished using the
888	tb-set-node-testbed command, added to the parser.
889
890	Much of the script's behavior depends on the configuration file, specified with
891	the B<-c> flag and defaulting to F<./splitter.conf>.
892
893	The testbed labels supplied in the B<tb-set-node-testbed> command are
894	meaningful based on their presence in the testbeds file. that file can be
895	specified in the configuration file using the B<Testbeds> directive, and
896	defaults to F<./testbeds>. The syntax is described below.
897
898	Most of the intermediate files are staged in a sub-directory of a temporary
899	files directory and deleted at the end of the script. Specifying the B<-d>
900	flag on the command line avoids the deletion for debbugging. By default the
901	temporary files directory is directory is F</tmp> and can be reset in the
902	configuration file using the B<Tmpdir> directive. Intermediate files are
903	stored under a subdirectory formed by adding the process ID of the splitter
904	process. For example, if the temporary files directory is F</tmp> and the
905	B<splitter.pl> process ID is 2323, the temporary files will be stored in
906	F</tmp/split2323/>.
907
908	The expreriment is split out into one experiment description per testbed in the
909	temporary directory named as F<experiment.testbed.tcl> where the experiment is
910	the experiment ID given in the configuration file, and the testbed is the
911	tb-set-node-testbed parameter for the nodes in the file.
912
913	If the B<-n> option is absent the sub-experiments are then instantiated on
914	their testbeds. (Here B<-n> is analogous to its use in L<make(1)>).
915	Per-testbed parameters are set in the testbeds file. Sub-experiments on
916	slave testbeds are instantiated in a random order, but the master testbed is
917	currently instantiated last.
918
919	Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
920	and the maximum number of simultaneous processes to start.
921
922	Scripts to start federation (the federation kit) are copied into the local
923	experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>. These are
924	taken from the directory given by the B<ScriptDir> directive in the
925	configuration file.
926
927	If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
928	F<splitter.conf>.
929
930	If any sub-experiment fails to instantiate, the other sub-exeriments are
931	swapped out. B<-F> avoids this swap out, which can also be specified as
932	B<SoftFail: true> in F<splitter.conf>
933
934	=head2 Configuration File
935
936	The configuration file is a simple set of colon-separated parameters and
937	values. A configuration file must be present, either specified in the B<-c>
938	flag or the default F<./splitter.conf>. All the parameter names are case
939	insensitive, but should not include any whitespace. Parameter values may
940	include whitespace, but no newlines.
941
942	Possible parameters are:
943
944	=over 5
945
946	=item Experiment
947
948	The name of the experiment on the various testbeds
949
950	=item Master
951
952	The master testbed label from the testbeds file, described below.
953
954	=item Testbeds
955
956	The testbeds file described below, giving per-testbed parameters. If this
957	directive is absent the testbeds file defaults to F<./testbeds>
958
959	=item ScriptDir
960
961	Location of the default federation scripts, i.e. the federation kit.
962
963	=item GatewayPubkey
964
965	=item GatewaySecretKey
966
967	The names of the files containing secret and public keys to use in setting up
968	tunnels between testbeds. If given they are used, otherwise keys are generated.
969
970	=item GatewayKeyType
971
972	This controls the kind of SSH keys generated to configure the geatways. If
973	given this must be B<dsa> or B<rsa>, and it defaults to B<rsa>. The parameter
974	is csase insensitive.
975
976	=item TmpDir
977
978	=item TempDir
979
980	The directory where temporary files are created. These are synonyms, but
981	should both be specified, B<TmpDir> has priority. If neither is specified,
982	F</tmp> is used.
983
984	=item SMBShare
985
986	The SMB share on the master testbed that will be exported to remote clients.
987
988	=item SMBUser
989
990	The experiment user to mount project directories as. This user needs to be a
991	member of the exported experiment - that is one of the users in the project
992	containing this experiment on the master testbed.
993
994	=item Timeout
995
996	Value in seconds after which a swap-in operatioin will be considered a success.
997	Often long swap-ins will hang when there are partial failures. This works
998	around this issue. (This behavior can be requested on the command line by
999	specifying B<-t> I<secs>.)
1000
1001	=item FailSoft
1002
1003	If not set, failure of any sub experiment swaps the rest out. Setting this to
1004	any value avoids this swap out. (This behavior can be requested on the command
1005	line by specifying B<-F>.)
1006
1007	=item MuxLimit
1008
1009	The maximum bumber of links/lans carried by one gateway pair
1010
1011	=item Tclparse
1012
1013	The pathname to the experiment parsing program. Only developers should set
1014	this.
1015
1016	=item Tclsh
1017
1018	The pathname to the local oTcl shell. Only developers should set
1019	this.
1020
1021	=back
1022
1023	=head2 Testbeds file
1024
1025	The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
1026	scoped attribute-value pairs where each attribute is specified on a separate
1027	line of the configuration file. Each testbed's parameters are preceeded by the
1028	testbed label in brackets ([]) on a line by itself. After that the parameters
1029	are specified as parameter: value. This is essentially the same format as the
1030	configuration file. Parameters are:
1031
1032	=over 4
1033
1034	=item User
1035
1036	The user under which to make requests to this testbed. The user running
1037	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
1038	testbed.
1039
1040	=item OpsNode
1041
1042	The host name of the testbed's ops node. The user calling B<splitter.pl> must
1043	be able to execute commands on this host via L<ssh(1)>.
1044
1045	=item Domain
1046
1047	The domain of nodes in this testbed (including the ops host). This parameter
1048	should always start with a period.
1049
1050	=item Project
1051
1052	The project under which to instantiate sub-experiments on this testbed.
1053
1054	=item ConnectorType
1055
1056	The node type for inter-testbed connector nodes on this testbed.
1057
1058	=item SlaveNodeStartCmd
1059
1060	The start command to run on experimental nodes when this testbed is used as a
1061	slave. In all the start commands the following string substitutions are made:
1062
1063	=over 10
1064
1065	=item FEDDIR
1066
1067	The local experiment's federation scripts directory. Each local experiment
1068	will have this replaced by the scripts directory on its local boss.
1069
1070	=item GWCONF
1071
1072	The full pathname of the gateway configuration file. As with FEDDIR, this is
1073	on the local boss.
1074
1075	=item PROJDIR
1076
1077	The project directory on the local boss.
1078
1079	=item EID
1080
1081	The local experiment name.
1082
1083	=back
1084
1085	All startcmds specified in F<testbeds> undergo these expansions.
1086
1087	=item SlaveConnectorStartCmd
1088
1089	The start command to run on gateway nodes when this testbed is used as a slave.
1090	The same string substitutions are made in this command as in SlaveNodeStartCmd.
1091
1092	=item MasterNodeStartCmd
1093
1094	The start command to run on experimental nodes when this testbed is used as a
1095	master. The same string substitutions are made in this command as in
1096	SlaveNodeStartCmd.
1097
1098	=item MasterConnectorStartCmd
1099
1100	The start command to run on gateway nodes when this testbed is used as a
1101	master. The same string substitutions are made in this command as in
1102	SlaveNodeStartCmd.
1103
1104	=item ConnectorImage
1105
1106	The disk image to be loaded on a gateway node on this testbed.
1107
1108	=item FileServer
1109
1110	The node in the master testbed from which filesystems are mounted.
1111
1112	=item Boss
1113
1114	The node in the master testbed that controls the testbed.
1115
1116	=item TunnelCfg
1117
1118	True if the connector needs to do DETER federation. This parameter will
1119	probably be removed.
1120
1121
1122	=back
1123
1124	=head1 ENVIRONMENT
1125
1126	B<splitter.pl> does not directly make use of environment variables, but calls
1127	out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1128	environment.
1129
1130	=head1 BUGS
1131
1132	A deprecated B<-N> flag was used to select testbeds file format. Only one
1133	format is supported now, and B<-N> generates a warning, but otherwise does not
1134	affect B<splitter.pl>.
1135
1136	=head1 SEE ALSO
1137
1138	L<sh(1)>, L<ssh(1)>
1139
1140	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: