Context Navigation

source: fedkit/splitter.pl @ 637adfa

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02

Last change on this file since 637adfa was 637adfa, checked in by Ted Faber <faber@…>, 17 years ago
docs, fail soft feature
Property mode set to `100644`
File size: 35.3 KB

Line
1	#!/usr/bin/perl
2
3	use strict;
4
5	use Getopt::Std;
6	use IO::File;
7	use IO::Dir;
8	use IO::Pipe;
9	use File::Copy;
10
11	my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12	my $local_script_dir = ".";
13	my($pid, $gid); # Process and group IDs for calling parse.tcl
14	my $splitter_config; # Configuration file
15	my $debug; # True if thecalled in debug mode
16	my $verbose; # True for extra progress reports
17	my $startem; # If true, start the sub-experiments
18	my $eid; # Experiment ID
19	my $tcl; # The experiment description (topology file)
20	my $master; # Master testbed
21	my $tmpdir; # tmp files
22	my $tb_config; # testbed configurations
23	my $smb_share; # Share to mount from the master
24	my $project_user; # User to mount project dirs as
25	my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26	my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27	# basename)
28	my $tcl_splitter; # tcl program to split experiments
29	# (changed during devel)
30	my $tclsh; # tclsh to call directly (changed during devel)
31	my @tarfiles; # Tarfiles in use by this experiment
32	my $timeout; # The timeout to use for experiment swap ins
33	my %opts; # Parsed options
34
35	my $tbparams = {}; # Map of the per-testbed parameters from the
36	# testbeds file. It is a reference to a hash
37	# of hashes (because it's passed around a bunch
38	# and it's nicer to have one access pattern
39	# throughout the script, in the main loop and
40	# the subroutines). That access is exemplified
41	# by $tbparams->{'deter'}->{'domain'} which is
42	# the domain parameter of the DETER testbed.
43	my $fail_soft; # Do not swap failed sub-experiments out
44
45	# Parse the config file. The format is a colon-separated parameter name
46	# followed by the value of that parameter to the end of the line. This parses
47	# that format and puts the parameters into the referenced hash. Parameter
48	# names are mapped to lower case, parameter values are unchanged. Returns 0 on
49	# failure (e.g. file open) and 1 on success.
50	sub parse_config {
51	my($file, $href) = @_;
52	my $fh = new IO::File($file);
53
54	unless ($fh) {
55	warn "Can't open $file: $!\n";
56	return 0;
57	}
58
59	while (<$fh>) {
60	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
61	chomp;
62	/^([^:]+):\s(.)/ && do {
63	my $key = $1;
64
65	$key =~ tr/A-Z/a-z/;
66	$href->{$key} = $2;
67	next;
68	};
69	warn "Unparasble line in $file: $_\n";
70	}
71	$fh->close(); # It will close when it goes out of scope, but...
72	return 1;
73	}
74
75	# Parse an easier-to-read testbeds file (the original was comma-separated
76	# unadorned strings). The format is a testbed scope as [testbed] followed by
77	# the colon-separated attribute-value pairs for the testbed. Right now these
78	# go into a set of global hashes indexed by testbed, but that should probably
79	# change.
80
81	sub parse_testbeds {
82	my($file, $tbparams) = @_; # Testbeds file and parameter hash
83	my $fh = new IO::File($file); # Testbeds filehandle
84	my $tb; # Current testbed
85	# Convert attribute in the file to global variable name. XXX: Again, this
86	# needs to be a 2-level hash
87	my %attr_to_hash = (
88	"opsnode" => "host",
89	"user" => "user",
90	"domain" => "domain",
91	"project" => "project",
92	"connectortype" => "gwtype",
93	"slavenodestartcmd" => "expstart",
94	"slaveconnectorstartcmd" => "gwstart",
95	"masternodestartcmd" => "mexpstart",
96	"masterconnectorstartcmd" => "mgwstart",
97	"connectorimage" => "gwimage",
98	"fileserver" => "fs",
99	"boss" => "boss",
100	"tunnelcfg" => "tun"
101	);
102
103
104	unless ($fh) {
105	warn "Can't open $file: $!\n";
106	return 0;
107	}
108
109	while (<$fh>) {
110	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
111	chomp;
112	/^\s\[(.)\]/ && do {
113	$tb = $1;
114	$tbparams->{$tb} = {} unless $tbparams->{$tb};
115	next;
116	};
117
118	/^([^:]+):\s(.)/ && do {
119	unless ($tb) {
120	warn "Ignored attribute definition before testbed " .
121	"defined in $file: $_\n";
122	next;
123	}
124	my $key = $1;
125	$key =~ tr/A-Z/a-z/;
126	my $var = $attr_to_hash{$key};
127
128	if ($var) { $tbparams->{$tb}->{$var} = $2; }
129	else { warn "Unknown keyword $key in $file\n"; }
130
131	next;
132	};
133	warn "Unparasble line in $file: $_\n";
134	}
135	$fh->close(); # It will close when it goes out of scope, but...
136	return 1;
137	}
138
139
140	# use scp to transfer a file, reporting true if successful and false otherwise.
141	# Parameters are the local file name, the ssh host destination (either hostname
142	# oe user@host), and an optional destination file name or directory. If no
143	# destination is given, the file is transferred to the given user's home
144	# directory. If only a machine is given in the ssh host destination, the
145	# current user is used.
146	sub scp_file {
147	my($file, $user, $host, $dest) = @_;
148
149	# XXX system with a relative pathname is sort of gross
150	system("scp $file $user\@$host:$dest");
151	if ($?) {
152	warn "scp failed $?\n";
153	return 0;
154	}
155	else { return 1; }
156	}
157
158	# use ssh to execute the given command on the machine (and as the user) in
159	# $where. Parameters are the ssh destination directive ($where) and the
160	# command to execute, and a prefix to be placed on a message generated if the
161	# command fails. On failure print a warning if a warning prefix was given and
162	# return false. If timeout is given fork a process and set an alarm of that
163	# many seconds. Timeouts also return 0;
164	sub ssh_cmd {
165	my($user, $host, $cmd, $wname, $timeout) = @_;
166	my $pid; # Child pid
167
168	if ( $timeout ) {
169	if ( $pid = fork () ) {
170	# Parent process
171	# The eval acts as a signal catcher. If the alarm goes off inside
172	# the eval, the die will put "alarm\n" into $@, otherwise the
173	# return value of the execution in the child process will be used.
174	my $rv = eval {
175	local $SIG{'ALRM'} = sub{ die "alarm\n"; };
176	my $rv;
177
178	alarm $timeout;
179	$rv = waitpid($pid, 0);
180	alarm 0;
181	$rv;
182	};
183
184	# If the eval succeeded, $@ will be null and we can use $rv, which
185	# is the return code from the subprocess. If the eval timed out,
186	# print a warning and assume the best.
187	if ($@ eq "alarm\n" ) {
188	warn "$wname timed out - pid $pid still live\n";
189	return 1;
190	}
191	else {
192	return $rv;
193	}
194	}
195	else {
196	# Child process
197	# XXX system with a relative pathname is sort of gross
198	exec("ssh $user\@$host $cmd");
199	exit 0;
200	}
201	}
202	else {
203	# No timeout, do all the work here.
204	# XXX system with a relative pathname is sort of gross
205	system ("ssh $user\@$host $cmd");
206	if ($?) {
207	warn "$wname failed $?\n" if $wname;
208	return 0;
209	}
210	else { return 1; }
211	}
212	}
213
214	# Ship local copies of the federation scripts out to the given host. If any of
215	# the script transfers fails, return 0. The scripts to transfer are from the
216	# global @scripts and are found locally in $local_script_dir (another global).
217	sub ship_scripts {
218	my($host, $user, $dest_dir) = @_; # Where, who, where remotely
219	my $s;
220
221	&ssh_cmd($user, $host, "mkdir -p $dest_dir");
222	for $s (@scripts) {
223	&scp_file("$local_script_dir/$s", $user, $host, $dest_dir) \|\|
224	return 0;
225	}
226	return 1;
227	}
228
229	# Ship per-testbed configuration generated by this script to the remote /proj
230	# directories on the remote testbeds
231	sub ship_configs {
232	my($host, $user, $src_dir, $dest_dir) = @_; # Where, who, where remotely
233	my($d, $f);
234
235	$d = IO::Dir->new($src_dir) \|\| return 0;
236
237	# All directories under $tmpdir are 770 so we can delete them later.
238	&ssh_cmd($user, $host, "mkdir -p $dest_dir") \|\| return 0;
239	&ssh_cmd($user, $host, "chmod 770 $dest_dir") \|\| return 0;
240	while ( $f = $d->read()) {
241	next if $f =~ /^\./;
242	if ( -d "$src_dir/$f" ) {
243	&ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") \|\|
244	return 0;
245	}
246	else {
247	&scp_file("$src_dir/$f", $user, $host, $dest_dir) \|\| return 0;
248	}
249	}
250	return 1;
251	}
252
253	# Start a sub section of the experiment on a given testbed. The testbed and
254	# the user to start the experiment as are pulled from the global per-testbed
255	# hash, passed in as $tbparams, as is the project name on the remote testbed.
256	# Parameters are the testbed and the experiment id. Configuration files are
257	# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
258	# Then the current state of the experiment determined using expinfo. From that
259	# state, the experiment is either created, modified or spapped in. If
260	# everything succeeds, true is returned. If the global verbose is set progress
261	# messages are printed.
262	sub start_segment {
263	my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
264	# per-testbed parameters and remote
265	# swap-in timeout
266	my $host = # Host name of remote ops (FQDN)
267	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
268	my $user = $tbparams->{$tb}->{'user'}; # user to pass to ssh
269	my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
270	# experiment under
271	my $tclfile = "./$eid.$tb.tcl"; # Local tcl file with the
272	# sub-experiment
273	my $proj_dir = "/proj/$pid/exp/$eid/tmp"; # Where to stash federation stuff
274	my $tarfiles_dir = "/proj/$pid/tarfiles/$eid"; # Where to stash tarfiles
275	my $to_hostname = "$proj_dir/hosts"; # remote hostnames file
276	my $state; # State of remote experiment
277	my $status = new IO::Pipe; # The pipe to get status
278
279	# Determine the status of the remote experiment
280	$status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") \|\|
281	die "Can't ssh to $user\@$host:$!\n";
282
283	# XXX: this is simple now. Parsing may become more complex
284	while (<$status>) {
285	/State: (\w+)/ && ($state = $1);
286	/No\s+such\s+experiment/ && ($state = "none");
287	}
288	$status->close();
289	print "$tb: $state\n";
290
291	# Copy the experiment definition data over
292	print "transferring subexperiment to $tb\n" if $verbose;
293	&scp_file("$tmpdir/$tb/$tclfile", $user, $host) \|\| return 0;
294	# Clear out any old experiment data; if not deleted, copies over it by
295	# different users will fail.
296	# (O /bin/csh, how evil thou art. The -c and the escaped single quotes
297	# force the /bin/sh interpretation of the trailing * (which we need to keep
298	# tmp around)) Again, this needs to be done more properly once we have a
299	# non-ssh interface here.)
300	print "clearing experiment subdirs on $tb\n" if $verbose;
301	&ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") \|\|
302	return 0;
303	print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
304	&ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") \|\|
305	return 0;
306	print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
307	&ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") \|\|
308	return 0;
309	# Remote experiment is active. Modify it.
310	if ($state eq "active") {
311	print "Transferring federation support files to $tb\n" if $verbose;
312	# First copy new scripts and hostinfo into the remote /proj
313	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
314	return 0;
315	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
316	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
317	if ( -d "$tmpdir/tarfiles") {
318	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
319	return 0;
320	}
321
322	print "Modifying $eid in place on $tb\n" if $verbose;
323	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " .
324	"$eid $tclfile", "modexp", $timeout) \|\| return 0;
325	return 1;
326	}
327
328	# Remote experiment is swapped out, modify it and swap it in.
329	if ($state eq "swapped") {
330	print "Transferring federation support files to $tb\n" if $verbose;
331	# First copy new scripts and hostinfo into the remote /proj (because
332	# the experiment exists, the directory tree should be there.
333	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
334	return 0;
335	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
336	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
337	if ( -d "$tmpdir/tarfiles") {
338	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
339	return 0;
340	}
341
342	print "Modifying $eid on $tb\n" if $verbose;
343	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile",
344	"modexp") \|\| return 0;
345	print "Swapping $eid in on $tb\n" if $verbose;
346	# Now start up
347	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
348	"swapexp", $timeout) \|\| return 0;
349	return 1;
350	}
351
352	# No remote experiment. Create one. We do this in 2 steps so we can put
353	# the configuration files and scripts into the new experiment directories.
354	if ($state eq "none") {
355
356	if ( -d "$tmpdir/tarfiles") {
357	# Tarfiles have to exist for the creation to work
358	print "copying tarfiles to $tb\n";
359	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
360	return 0;
361	}
362	print "Creating $eid on $tb\n" if $verbose;
363	&ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " .
364	"$pid -e $eid $tclfile", "startexp") \|\| return 0;
365	print "Transferring federation support files to $tb\n" if $verbose;
366	# First copy new scripts and hostinfo into the remote /proj
367	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
368	return 0;
369	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
370	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
371	# Now start up
372	print "Swapping $eid in on $tb\n" if $verbose;
373	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
374	"swapexp", $timeout) \|\| return 0;
375	return 1;
376	}
377
378	# Every branch for a known state returns. If execution gets here, the
379	# state is unknown.
380	warn "unknown state: $state\n";
381	return 0;
382	}
383
384	# Swap out a sub-experiment - probably because another has failed. Arguments
385	# are testbed and experiment. Most of the control flow is similar to
386	# start_segment, though much simpler.
387	sub stop_segment {
388	my($tb, $eid, $tbparams) = @_; # testbed, experiment ID and
389	# per-testbed parameters
390	my $user = $tbparams->{$tb}->{'user'}; # testbed user
391	my $host = # Ops node
392	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
393	my $pid = $tbparams->{$tb}->{'project'};# testbed project
394
395	print "Stopping $eid on $tb\n" if $verbose;
396	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out",
397	"swapexp (out)") \|\| return 0;
398	return 1;
399	}
400
401
402	$pid = $gid = "dummy"; # Default project and group to pass to
403	# $tcl_splitter above. These are total
404	# dummy arguments; the splitter doesn't
405	# use them at all, but we supply them to
406	# keep our changes to the parser minimal.
407	# Argument processing.
408	getopts('Ft:c:f:ndvN', \%opts);
409	$splitter_config = $opts{'c'} \|\| "./splitter.conf";
410	$debug = $opts{'d'};
411	$verbose = $opts{'v'} \|\| $opts{'d'};
412
413	&parse_config("$splitter_config", \%opts) \|\|
414	die "Cannot read config file $splitter_config: $!\n";
415	$fail_soft = $opts{'F'} \|\| $opts{'failsoft'};
416
417	$startem = $opts{'n'} ? 0 : 1; # If true, start the sub-experiments
418	$timeout = $opts{'t'} \|\| $opts{'timeout'};
419	$eid = $opts{'experiment'}; # Experiment ID
420	$tcl = $opts{'f'} \|\| shift; # The experiment description
421	$master = $opts{'master'}; # Master testbed
422	$tmpdir = $opts{'tmpdir'} \|\| $opts{'tempdir'}\|\| "/tmp"; # tmp files
423	$tb_config = $opts{'testbeds'} \|\| "./testbeds"; # testbed configurations
424	$local_script_dir = $opts{'scriptdir'}; # Local scripts
425
426	$smb_share = $opts{'smbshare'} \|\| # Share to mount from the master
427	die "Must give an SMB share\n";
428	$project_user = $opts{'smbuser'} \|\| # User to mount project dirs as
429	die "Must give an SMB user\n";
430
431	# For now specify these. We may want to generate them later.
432	$gw_pubkey = $opts{'gatewaypubkey'};
433	($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
434	$gw_secretkey = $opts{'gatewaysecretkey'};
435	($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
436
437	# tcl program to split experiments (changed during devel)
438	$tcl_splitter = $opts{'tclparse'} \|\| "/usr/testbed/lib/ns2ir/parse.tcl";
439	# tclsh to call directly (changed during devel)
440	$tclsh = $opts{'tclsh'} \|\| "/usr/local/bin/otclsh";
441
442	# Prefix to avoid collisions
443	$tmpdir .= "/split$$";
444
445	print "Temp files are in $tmpdir\n" if $verbose;
446	# Create a workspace
447	unless (-d "$tmpdir") {
448	mkdir("$tmpdir") \|\| die "Can't create $tmpdir: $!";
449	}
450
451	# Validate scripts directory
452	for my $s (@scripts) {
453	die "$local_script_dir/$s not in local script directory. Try -d\n"
454	unless -r "$local_script_dir/$s";
455	}
456
457	die "Must supply file, master and experiment" unless $master && $tcl && $eid;
458
459	if ($opts{'N'} ) {
460	&parse_testbeds($tb_config, $tbparams) \|\|
461	die "Cannot testbed congfigurations from $tb_config: $!\n";
462	}
463	else {
464	# Read a hash of per-testbed parameters from the local configurations.
465	my $conf = new IO::File($tb_config) \|\|
466	die "can't read testbed configutions from $tb_config: $!\n";
467	while (<$conf>) {
468	next if /^#/;
469	chomp;
470	my($tb, $h, $d, $u, $p, $es, $gs, $mes, $mgs, $t, $i, $fs, $boss,
471	$tun) = split(":", $_);
472	$tbparams->{$tb}->{'host'} = $h;
473	$tbparams->{$tb}->{'user'} = $u;
474	$tbparams->{$tb}->{'domain'} = $d;
475	$tbparams->{$tb}->{'project'} = $p;
476	$tbparams->{$tb}->{'gwtype'} = $t;
477	$tbparams->{$tb}->{'expstart'} = $es;
478	$tbparams->{$tb}->{'gwstart'} = $gs;
479	$tbparams->{$tb}->{'mexpstart'} = $mes;
480	$tbparams->{$tb}->{'mgwstart'} = $mgs;
481	$tbparams->{$tb}->{'gwimage'} = $i;
482	$tbparams->{$tb}->{'fs'} = $fs;
483	$tbparams->{$tb}->{'boss'} = $boss;
484	$tbparams->{$tb}->{'tun'} = $tun;
485
486	# Make sure the domain starts with a period
487	$tbparams->{$tb}->{'domain'} = "." . $tbparams->{$tb}->{'domain'}
488	unless $tbparams->{$tb}->{'domain'} =~ /^\./;
489	}
490	$conf->close();
491	}
492
493	# Open a pipe to the splitter program and start it parsing the experiments
494	my $pipe = new IO::Pipe;
495	# NB no more -p call on parse call.
496	$pipe->reader("$tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl") \|\|
497	die "Cannot execute $tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl:$!\n";
498
499	# Parsing variables
500	my $ctb; # Current testbed
501	my %allocated; # If allocated{$tb} > 0, $tb is in use
502	my $destfile; # File that the sub-experiment tcl file is
503	# being written to, or "" if none. Also used
504	# for hostnames file.
505	my $desthandle; # File handle for distfile
506	my $gateways; # when gateway lists are being processed this
507	# is the testbed whose gateways are being
508	# gathered.
509	my $control_gateway; # Control net gateway for the current testbed
510	my %active_end; # If active_end{"a-b"} > 0 then a is the active
511	# end of the a <-> b connector pair.
512
513	# Parse the splitter output. This loop creates the sub experiments, gateway
514	# configurations and hostnames file
515	while (<$pipe>) {
516	# Start of a sub-experiment
517	/^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
518	$ctb = $1;
519
520	# If we know the testbed, start collecting its sub experiment tcl
521	# description. If not, warn the caller and ignore the configuration of
522	# this testbed.
523	if ($tbparams->{$ctb}) {
524	$allocated{$ctb}++; # Keep track of the testbeds allocated
525
526	unless (-d "$tmpdir/$ctb") {
527	mkdir("$tmpdir/$ctb") \|\| die "Can't create $tmpdir/$ctb: $!";
528	}
529	$destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
530
531	$desthandle = new IO::File(">$destfile") \|\|
532	die "Cannot open $destfile:$!\n";
533	}
534	else {
535	warn "No such testbed $ctb\n";
536	$destfile = "";
537	}
538	next;
539	};
540
541	# End of that experiment
542	/^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
543	# Simple syntax check and close out this experiment's tcl description
544	die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
545	$desthandle->close();
546	$destfile = $ctb = "";
547	next;
548	};
549
550	# Beginning of a gateway set
551	/^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
552	$gateways = $1;
553	# If we've heard of this tb, create the config lines for it one at a
554	# time.
555	if ($allocated{$gateways}) {
556	# Just in case. This directory should already have been created
557	# above.
558	unless (-d "$tmpdir/$gateways") {
559	mkdir("$tmpdir/$gateways") \|\|
560	die "Can't create $tmpdir/$gateways: $!";
561	}
562	}
563	else {
564	warn "Gateways given (and ignored) for testbed not in use: " .
565	"$gateways\n";
566	$gateways = 0;
567	}
568	next;
569	};
570	# End of the gateways section. Output the client config for this testbed
571	/^#\s+End\s+gateways\s+\((\w+)\)/ && do {
572	die "Mismatched gateway markers ($1, $gateways)\n"
573	unless !$gateways \|\| $gateways == $1;
574
575	if ($control_gateway ) {
576	# Client config
577	my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
578	my $master_project = $tbparams->{$master}->{'project'};
579	die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
580	print $cc "ControlGateway: $control_gateway\n";
581	print $cc "SMBShare: $smb_share\n";
582	print $cc "ProjectUser: $project_user\n";
583	print $cc "ProjectName: $master_project\n";
584	$cc->close();
585	}
586	else { warn "No control gateway for $gateways?\n"; }
587
588	$gateways = 0;
589	next;
590	};
591	# Beginning of the hostnames list. Collection is always in the hostnames
592	# file.
593	/^#\s+Begin\s+hostnames/ && do {
594	$destfile = "$tmpdir/hostnames";
595	$desthandle = new IO::File(">$destfile") \|\|
596	die "Can't open $destfile:$!\n";
597	next;
598	};
599	# end of the hostnames list.
600	/^#\s+End\s+hostnames/ && do {
601	$desthandle->close();
602	$destfile = "";
603	next;
604	};
605
606	# Generate gateway configuration info, one file per line
607	$gateways && do {
608	chomp;
609	my($dtb, $myname, $desthost, $type) = split(" ", $_);
610
611	# Many of these are to simplify print statements
612	my $sdomain = # domain for the source
613	$tbparams->{$gateways}->{'domain'};
614	my $ddomain = # domain for the destination
615	$tbparams->{$dtb}->{'domain'};
616	my $sproject = # Project of the destination
617	$tbparams->{$gateways}->{'project'};
618	my $fs = # Master fs node (FQDN)
619	$tbparams->{$master}->{'fs'} . $tbparams->{$master}->{'domain'};
620	my $boss = # Master boss node (FQDN)
621	$tbparams->{$master}->{'boss'} . $tbparams->{$master}->{'domain'};
622	my $remote_script_dir = # Remote fed script location
623	"/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
624	my $active; # Is this the active side of
625	# the connector?
626
627	$sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
628	$ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
629
630	my $conf_file = "$myname$sdomain.gw.conf";
631	# translate to lower case so the `hostname` hack for specifying
632	# configuration files works.
633	$conf_file =~ tr/A-Z/a-z/;
634
635	# If either end of this link is in the master side of the testbed, that
636	# side is the active end. Otherwise the first testbed encountered in
637	# the file will be the active end. The $active_end variable keeps
638	# track of those decisions
639	if ( $dtb eq $master ) { $active = "false"; }
640	elsif ($gateways eq $master ) { $active = "true"; }
641	elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
642	else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
643
644	# This is used to create the client configuration.
645	$control_gateway = "$myname$sdomain"
646	if $type =~ /(control\|both)/;
647
648	# Write out the file
649	my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")\|\|
650	die "can't open $tmpdir/$gateways/$conf_file: $!\n";
651
652	print $gwconfig "Active: $active\n";
653	print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
654	print $gwconfig "BossName: $boss\n";
655	print $gwconfig "FsName: $fs\n";
656	print $gwconfig "Type: $type\n";
657	print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
658	print $gwconfig "Peer: $desthost$ddomain\n";
659	print $gwconfig "Pubkeys: " .
660	"/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
661	print $gwconfig "Privkeys: " .
662	"/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
663	$gwconfig->close();
664
665	# This testbed has a gateway (most will) so make a copy of the keys it
666	# needs in this testbed's subdirectory. start_segment will transfer
667	# them.
668	unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
669	copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") \|\|
670	die "Can't copy pubkeys ($gw_pubkey to " .
671	"$tmpdir/$gateways/$gw_pubkey_base): $!\n";
672	}
673	if ($active eq "true" ) {
674	unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
675	copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") \|\|
676	die "Can't copy secret keys ($gw_secretkey to " .
677	"$tmpdir/$gateways/$gw_secretkey_base): $!\n";
678	}
679	}
680
681	#done processing gateway entry, ready for next line
682	next;
683	};
684	(/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
685	next if /^#/;
686	chomp;
687	push(@tarfiles, $_);
688	next;
689	};
690
691	next unless $destfile; # Unidentified testbed, ignore config
692	# local copies that can be used in the substitutions below
693	my $gwtype = $tbparams->{$ctb}->{'gwtype'};
694	my $gwimage = $tbparams->{$ctb}->{'gwimage'};
695	my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
696	my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
697	my $gwstart = $tbparams->{$ctb}->{'gwstart'};
698	my $expstart = $tbparams->{$ctb}->{'expstart'};
699	my $project = $tbparams->{$ctb}->{'project'};
700
701	# Substitute variables
702	s/GWTYPE/$gwtype/g;
703	s/GWIMAGE/$gwimage/g;
704	if ($ctb eq $master ) {
705	s/GWSTART/$mgwstart/g;
706	s/EXPSTART/$mexpstart/g;
707	}
708	else {
709	s/GWSTART/$gwstart/g;
710	s/EXPSTART/$expstart/g;
711	}
712	# XXX: oh is this bad
713	s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
714	s#PROJDIR#/proj/$project/#g;
715	s#EID#$eid#g;
716	s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
717	print $desthandle $_;
718	}
719	$pipe->close();
720	die "No nodes in master testbed ($master)\n" unless $allocated{$master};
721
722	for my $t (@tarfiles) {
723	die "tarfile '$t' unreadable: $!\n" unless -r $t;
724	unless (-d "$tmpdir/tarfiles") {
725	mkdir("$tmpdir/tarfiles") \|\|
726	die "Can't create $tmpdir/tarfiles:$!\n";
727	}
728	copy($t, "$tmpdir/tarfiles") \|\|
729	die "Can't copy $t to $tmpdir/tarfiles:$!\n";
730	}
731
732	exit(0) unless $startem;
733
734	my %started; # If $started{$tb} then $tb successfully started
735
736	# Start up the slave sub-experiments first
737	TESTBED:
738	for my $tb (keys %allocated) {
739	if ($tb ne $master) {
740	if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
741	else { last TESTBED; }
742	}
743	}
744
745	# Now the master
746	if (&start_segment($master, $eid, $tbparams, $timeout)) {
747	$started{$master}++;
748	}
749
750	# If any testbed failed, swap the rest out.
751	if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
752	for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
753	print "Error starting experiment\n";
754	exit(1);
755	}
756	print "Experiment started\n";
757	print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
758	system("rm -rf $tmpdir") unless $debug;
759	exit(0); # set the exit value
760
761	=pod
762
763	=head1 NAME
764
765	B<splitter.pl>
766
767	=head1 SYNOPSIS
768
769	B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
770	[B<-f> F<experiment_tcl>] [F<experiment_tcl>]
771
772	=head1 DESCRIPTION
773
774	B<splitter.pl> invokes the DETER experiment parser to split an annotated
775	experiment into multiple sub-experments and instantiates the sub-experiments on
776	their intended testbeds. Annotation is accomplished using the
777	tb-set-node-testbed command, added to the parser.
778
779	Much of the script's behavior depends on the configuration file, specified with
780	the B<-c> flag and defaulting to F<./splitter.conf>.
781
782	The testbed labels supplied in the B<tb-set-node-testbed> command are
783	meaningful based on their presence in the testbeds file. that file can be
784	specified in the configuration file using the B<Testbeds> directive, and
785	defaults to F<./testbeds>. The syntax is described below.
786
787	Most of the intermediate files are staged in a sub-directory of a temporary
788	files directory and deleted at the end of the script. Specifying the B<-d>
789	flag on the command line avoids the deletion for debbugging. By default the
790	temporary files directory is directory is F</tmp> and can be reset in the
791	configuration file using the B<Tmpdir> directive. Intermediate files are
792	stored under a subdirectory formed by adding the process ID of the splitter
793	process. For example, if the temporary files directory is F</tmp> and the
794	B<splitter.pl> process ID is 2323, the temporary files will be stored in
795	F</tmp/split2323/>.
796
797	The expreriment is split out into one experiment description per testbed in the
798	temporary directory named as F<experiment.testbed.tcl> where the experiment is
799	the experiment ID given in the configuration file, and the testbed is the
800	tb-set-node-testbed parameter for the nodes in the file.
801
802	If the B<-n> option is absent the sub-experiments are then instantiated on
803	their testbeds. (Here B<-n> is analogous to its use in L<make(1)>).
804	Per-testbed parameters are set in the testbeds file. Sub-experiments on
805	slave testbeds are instantiated in a random order, but the master testbed is
806	currently instantiated last.
807
808	Scripts to start federation (the federation kit) are copied into the local
809	experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>. These are
810	taken from the directory given by the B<ScriptDir> directive in the
811	configuration file.
812
813	If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
814	F<splitter.conf>.
815
816	If any sub-experiment fails to instantiate, the other sub-exeriments are
817	swapped out. B<-F> avoids this swap out, which can also be specified as
818	B<SoftFail: true> in F<splitter.conf>
819
820	=head2 Configuration File
821
822	The configuration file is a simple set of colon-separated parameters and
823	values. A configuration file must be present, either specified in the B<-c>
824	flag or the default F<./splitter.conf>. All the parameter names are case
825	insensitive, but should not include any whitespace. Parameter values may
826	include whitespace, but no newlines.
827
828	Possible parameters are:
829
830	=over 5
831
832	=item Experiment
833
834	The name of the experiment on the various testbeds
835
836	=item Master
837
838	The master testbed label from the testbeds file, described below.
839
840	=item Testbeds
841
842	The testbeds file described below, giving per-testbed parameters. If this
843	directive is absent the testbeds file defaults to F<./testbeds>
844
845	=item ScriptDir
846
847	Location of the default federation scripts, i.e. the federation kit.
848
849	=item GatewayPubkey
850
851	=item GatewaySecretKey
852
853	The names of the files containing secret and public keys to use in setting up
854	tunnels between testbeds. These will eventually be automatically generated.
855
856	=item TmpDir
857
858	=item TempDir
859
860	The directory where temporary files are created. These are synonyms, but
861	should both be specified, B<TmpDir> has priority. If neither is specified,
862	F</tmp> is used.
863
864	=item SMBShare
865
866	The SMB share on the master testbed that will be exported to remote clients.
867
868	=item SMBUser
869
870	The experiment user to mount project directories as. This user needs to be a
871	member of the exported experiment - that is one of the users in the project
872	containing this experiment on the master testbed.
873
874	=item Tclparse
875
876	The pathname to the experiment parsing program. Only developers should set
877	this.
878
879	=item Tclsh
880
881	The pathname to the local oTcl shell. Only developers should set
882	this.
883
884	=item Timeout
885
886	Value in seconds after which a swap-in operatioin will be considered a success.
887	Often long swap-ins will hang when there are partial failures. This works
888	around this issue.
889
890	=item FailSoft
891
892	If not set, failure of any sub experiment swaps the rest out. Setting this to any value avoids this swap out. (Also B<-F>.)
893
894	=back
895
896	=head2 Testbeds file
897
898	The configuration file (F<./testbeds> unless overridden by B<-c>) is a
899	colon-separated set of parameters keyed by testbed name. The fields, in order,
900	are:
901
902	=over 5
903
904	=item name
905
906	The testbed to which this line of parameters applies.
907
908	=item user
909
910	The user under which to make requests to this testbed. The user running
911	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
912	testbed.
913
914	=item host
915
916	The host name of the testbed's ops node. The user calling B<splitter.pl> must
917	be able to execute commands on this host via L<ssh(1)>.
918
919	=item domain
920
921	The domain of nodes in this testbed (including the ops host). This parameter
922	should always start with a period.
923
924	=item project
925
926	The project under which to instantiate sub-experiments on this testbed.
927
928	=item gateway type
929
930	The node type for inter-testbed gateway nodes on this testbed.
931
932	=item experiment start (slave)
933
934	The start command to run on experimental nodes when this testbed is used as a
935	slave. In all the start commands the string FEDDIR will be replaced by the
936	local experiment's federation scripts directory and the string GWCONF replaced
937	by the gatway configuration file.
938
939	=item gateway start (slave)
940
941	The start command to run on gateway nodes when this testbed is used as a slave.
942	The same string substitutions are made in this command as in experiment start.
943
944	=item experiment start (master)
945
946	The start command to run on experimental nodes when this testbed is used as a
947	master. The same string substitutions are made in this command as in
948	experiment start.
949
950	=item gateway start (master)
951
952	The start command to run on gateway nodes when this testbed is used as a
953	master. The same string substitutions are made in this command as in
954	experiment start.
955
956	=item gateway image
957
958	The disk image to be loaded on a gateway node on this testbed.
959
960	=item filesystem node
961
962	The node in the master testbed from which filesystems are mounted.
963
964	=item boss node
965
966	The node in the master testbed that controls the testbed.
967
968	=item tunnel configuration
969
970	True if the connector needs to do DETER federation. This parameter will
971	probably be removed.
972
973	=back
974
975	The parsing of the testbeds is extremely simple. Colons separate each
976	field and there is n provision for escaping them at this time.
977
978	=head2 New Testbeds File Format
979
980	The testbeds file has ben revamped to make it more human readable. The
981	parameters are now named and specified on separate lines of the configuration
982	file. Each testbed's parameters are preceeded by the testbed label in brackets
983	([]) on a line by itself. After that the parameters are specified as
984	parameter: value. This is essentially the same format as the configuration
985	file. Parameters are:
986
987	=over 4
988
989	=item User
990
991	The user under which to make requests to this testbed. The user running
992	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
993	testbed.
994
995	=item OpsNode
996
997	The host name of the testbed's ops node. The user calling B<splitter.pl> must
998	be able to execute commands on this host via L<ssh(1)>.
999
1000	=item Domain
1001
1002	The domain of nodes in this testbed (including the ops host). This parameter
1003	should always start with a period.
1004
1005	=item Project
1006
1007	The project under which to instantiate sub-experiments on this testbed.
1008
1009	=item ConnectorType
1010
1011	The node type for inter-testbed connector nodes on this testbed.
1012
1013	=item SlaveNodeStartCmd
1014
1015	The start command to run on experimental nodes when this testbed is used as a
1016	slave. In all the start commands the string FEDDIR will be replaced by the
1017	local experiment's federation scripts directory and the string GWCONF replaced
1018	by the gatway configuration file.
1019
1020	=item SlaveConnectorStartCmd
1021
1022	The start command to run on gateway nodes when this testbed is used as a slave.
1023	The same string substitutions are made in this command as in experiment start.
1024
1025	=item MasterNodeStartCmd
1026
1027	The start command to run on experimental nodes when this testbed is used as a
1028	master. The same string substitutions are made in this command as in
1029	experiment start.
1030
1031	=item MasterConnectorStartCmd
1032
1033	The start command to run on gateway nodes when this testbed is used as a
1034	master. The same string substitutions are made in this command as in
1035	experiment start.
1036
1037	=item ConnectorImage
1038
1039	The disk image to be loaded on a gateway node on this testbed.
1040
1041	=item FileServer
1042
1043	The node in the master testbed from which filesystems are mounted.
1044
1045	=item Boss
1046
1047	The node in the master testbed that controls the testbed.
1048
1049	=item TunnelCfg
1050
1051	True if the connector needs to do DETER federation. This parameter will
1052	probably be removed.
1053
1054
1055	=back
1056
1057
1058	=head1 ENVIRONMENT
1059
1060	B<splitter.pl> does not directly make use of environment variables, but calls
1061	out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1062	environment.
1063
1064	=head1 SEE ALSO
1065
1066	L<sh(1)>, L<ssh(1)>
1067
1068	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: