Context Navigation

source: fedkit/splitter.pl @ 33548e1

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02

Last change on this file since 33548e1 was 33548e1, checked in by Ted Faber <faber@…>, 17 years ago
Remove -N : all testbeds files use the "new" format now Update docs for new textbeds format Neaten up the timeout code in ssh_cmd
Property mode set to `100644`
File size: 33.1 KB

Line
1	#!/usr/bin/perl
2
3	use strict;
4
5	use Getopt::Std;
6	use IO::File;
7	use IO::Dir;
8	use IO::Pipe;
9	use File::Copy;
10
11	my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12	my $local_script_dir = ".";
13	my($pid, $gid); # Process and group IDs for calling parse.tcl
14	my $splitter_config; # Configuration file
15	my $debug; # True if thecalled in debug mode
16	my $verbose; # True for extra progress reports
17	my $startem; # If true, start the sub-experiments
18	my $eid; # Experiment ID
19	my $tcl; # The experiment description (topology file)
20	my $master; # Master testbed
21	my $tmpdir; # tmp files
22	my $tb_config; # testbed configurations
23	my $smb_share; # Share to mount from the master
24	my $project_user; # User to mount project dirs as
25	my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26	my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27	# basename)
28	my $tcl_splitter; # tcl program to split experiments
29	# (changed during devel)
30	my $tclsh; # tclsh to call directly (changed during devel)
31	my @tarfiles; # Tarfiles in use by this experiment
32	my @rpms; # Rpms in use by this experiment
33	my $timeout; # The timeout to use for experiment swap ins
34	my %opts; # Parsed options
35
36	my $tbparams = {}; # Map of the per-testbed parameters from the
37	# testbeds file. It is a reference to a hash
38	# of hashes (because it's passed around a bunch
39	# and it's nicer to have one access pattern
40	# throughout the script, in the main loop and
41	# the subroutines). That access is exemplified
42	# by $tbparams->{'deter'}->{'domain'} which is
43	# the domain parameter of the DETER testbed.
44	my $fail_soft; # Do not swap failed sub-experiments out
45
46	# Parse the config file. The format is a colon-separated parameter name
47	# followed by the value of that parameter to the end of the line. This parses
48	# that format and puts the parameters into the referenced hash. Parameter
49	# names are mapped to lower case, parameter values are unchanged. Returns 0 on
50	# failure (e.g. file open) and 1 on success.
51	sub parse_config {
52	my($file, $href) = @_;
53	my $fh = new IO::File($file);
54
55	unless ($fh) {
56	warn "Can't open $file: $!\n";
57	return 0;
58	}
59
60	while (<$fh>) {
61	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
62	chomp;
63	/^([^:]+):\s(.)/ && do {
64	my $key = $1;
65
66	$key =~ tr/A-Z/a-z/;
67	$href->{$key} = $2;
68	next;
69	};
70	warn "Unparasble line in $file: $_\n";
71	}
72	$fh->close(); # It will close when it goes out of scope, but...
73	return 1;
74	}
75
76	# Parse an easier-to-read testbeds file (the original was comma-separated
77	# unadorned strings). The format is a testbed scope as [testbed] followed by
78	# the colon-separated attribute-value pairs for the testbed. Right now these
79	# go into a set of global hashes indexed by testbed, but that should probably
80	# change.
81
82	sub parse_testbeds {
83	my($file, $tbparams) = @_; # Testbeds file and parameter hash
84	my $fh = new IO::File($file); # Testbeds filehandle
85	my $tb; # Current testbed
86	# Convert attribute in the file to tbparams hash key
87	my %attr_to_hash = (
88	"opsnode" => "host",
89	"user" => "user",
90	"domain" => "domain",
91	"project" => "project",
92	"connectortype" => "gwtype",
93	"slavenodestartcmd" => "expstart",
94	"slaveconnectorstartcmd" => "gwstart",
95	"masternodestartcmd" => "mexpstart",
96	"masterconnectorstartcmd" => "mgwstart",
97	"connectorimage" => "gwimage",
98	"fileserver" => "fs",
99	"boss" => "boss",
100	"tunnelcfg" => "tun"
101	);
102
103
104	unless ($fh) {
105	warn "Can't open $file: $!\n";
106	return 0;
107	}
108
109	while (<$fh>) {
110	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
111	chomp;
112	/^\s\[(.)\]/ && do {
113	$tb = $1;
114	$tbparams->{$tb} = {} unless $tbparams->{$tb};
115	next;
116	};
117
118	/^([^:]+):\s(.)/ && do {
119	unless ($tb) {
120	warn "Ignored attribute definition before testbed " .
121	"defined in $file: $_\n";
122	next;
123	}
124	my $key = $1;
125	$key =~ tr/A-Z/a-z/;
126	my $var = $attr_to_hash{$key};
127
128	if ($var) { $tbparams->{$tb}->{$var} = $2; }
129	else { warn "Unknown keyword $key in $file\n"; }
130
131	next;
132	};
133	warn "Unparasble line in $file: $_\n";
134	}
135	$fh->close(); # It will close when it goes out of scope, but...
136	return 1;
137	}
138
139
140	# use scp to transfer a file, reporting true if successful and false otherwise.
141	# Parameters are the local file name, the ssh host destination (either hostname
142	# oe user@host), and an optional destination file name or directory. If no
143	# destination is given, the file is transferred to the given user's home
144	# directory. If only a machine is given in the ssh host destination, the
145	# current user is used.
146	sub scp_file {
147	my($file, $user, $host, $dest) = @_;
148
149	# XXX system with a relative pathname is sort of gross
150	system("scp $file $user\@$host:$dest");
151	if ($?) {
152	warn "scp failed $?\n";
153	return 0;
154	}
155	else { return 1; }
156	}
157
158	# use ssh to execute the given command on the machine (and as the user) in
159	# $where. Parameters are the ssh destination directive ($where) and the
160	# command to execute, and a prefix to be placed on a message generated if the
161	# command fails. On failure print a warning if a warning prefix was given and
162	# return false. If timeout is given fork a process and set an alarm of that
163	# many seconds. Timeouts also return 0;
164	sub ssh_cmd {
165	my($user, $host, $cmd, $wname, $timeout) = @_;
166	my $pid; # Child pid
167
168	$timeout = 0 unless $timeout; # Force default timeout
169
170	if ( $pid = fork () ) {
171	# Parent process
172	# The eval acts as a signal catcher. If the alarm goes off inside
173	# the eval, the die will put "alarm\n" into $@, otherwise the
174	# return value of the execution in the child process will be used.
175	my $rv = eval {
176	local $SIG{'ALRM'} = sub{ die "alarm\n"; };
177	my $rv;
178
179	alarm $timeout;
180	$rv = waitpid($pid, 0);
181	alarm 0;
182	$rv;
183	};
184
185	# If the eval succeeded, $@ will be null and we can use $rv, which
186	# is the return code from the subprocess. If the eval timed out,
187	# print a warning and assume the best.
188	if ($@ eq "alarm\n" ) {
189	warn "$wname timed out - pid $pid still live\n";
190	return 1;
191	}
192	else {
193	return $rv;
194	}
195	}
196	else {
197	# Child process
198	exec("ssh $user\@$host $cmd");
199	exit 0;
200	}
201	}
202
203	# Ship local copies of the federation scripts out to the given host. If any of
204	# the script transfers fails, return 0. The scripts to transfer are from the
205	# global @scripts and are found locally in $local_script_dir (another global).
206	sub ship_scripts {
207	my($host, $user, $dest_dir) = @_; # Where, who, where remotely
208	my $s;
209
210	&ssh_cmd($user, $host, "mkdir -p $dest_dir");
211	for $s (@scripts) {
212	&scp_file("$local_script_dir/$s", $user, $host, $dest_dir) \|\|
213	return 0;
214	}
215	return 1;
216	}
217
218	# Ship per-testbed configuration generated by this script to the remote /proj
219	# directories on the remote testbeds
220	sub ship_configs {
221	my($host, $user, $src_dir, $dest_dir) = @_; # Where, who, where remotely
222	my($d, $f);
223
224	$d = IO::Dir->new($src_dir) \|\| return 0;
225
226	# All directories under $tmpdir are 770 so we can delete them later.
227	&ssh_cmd($user, $host, "mkdir -p $dest_dir") \|\| return 0;
228	&ssh_cmd($user, $host, "chmod 770 $dest_dir") \|\| return 0;
229	while ( $f = $d->read()) {
230	next if $f =~ /^\./;
231	if ( -d "$src_dir/$f" ) {
232	&ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") \|\|
233	return 0;
234	}
235	else {
236	&scp_file("$src_dir/$f", $user, $host, $dest_dir) \|\| return 0;
237	}
238	}
239	return 1;
240	}
241
242	# Start a sub section of the experiment on a given testbed. The testbed and
243	# the user to start the experiment as are pulled from the global per-testbed
244	# hash, passed in as $tbparams, as is the project name on the remote testbed.
245	# Parameters are the testbed and the experiment id. Configuration files are
246	# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
247	# Then the current state of the experiment determined using expinfo. From that
248	# state, the experiment is either created, modified or spapped in. If
249	# everything succeeds, true is returned. If the global verbose is set progress
250	# messages are printed.
251	sub start_segment {
252	my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
253	# per-testbed parameters and remote
254	# swap-in timeout
255	my $host = # Host name of remote ops (FQDN)
256	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
257	my $user = $tbparams->{$tb}->{'user'}; # user to pass to ssh
258	my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
259	# experiment under
260	my $tclfile = "./$eid.$tb.tcl"; # Local tcl file with the
261	# sub-experiment
262	my $proj_dir = "/proj/$pid/exp/$eid/tmp"; # Where to stash federation stuff
263	my $tarfiles_dir = "/proj/$pid/tarfiles/$eid"; # Where to stash tarfiles
264	my $rpms_dir = "/proj/$pid/rpms/$eid"; # Where to stash rpms
265	my $to_hostname = "$proj_dir/hosts"; # remote hostnames file
266	my $state; # State of remote experiment
267	my $status = new IO::Pipe; # The pipe to get status
268
269	# Determine the status of the remote experiment
270	$status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") \|\|
271	die "Can't ssh to $user\@$host:$!\n";
272
273	# XXX: this is simple now. Parsing may become more complex
274	while (<$status>) {
275	/State: (\w+)/ && ($state = $1);
276	/No\s+such\s+experiment/ && ($state = "none");
277	}
278	$status->close();
279	print "$tb: $state\n";
280
281	# Copy the experiment definition data over
282	print "transferring subexperiment to $tb\n" if $verbose;
283	&scp_file("$tmpdir/$tb/$tclfile", $user, $host) \|\| return 0;
284	# Clear out any old experiment data; if not deleted, copies over it by
285	# different users will fail.
286	# (O /bin/csh, how evil thou art. The -c and the escaped single quotes
287	# force the /bin/sh interpretation of the trailing * (which we need to keep
288	# tmp around)) Again, this needs to be done more properly once we have a
289	# non-ssh interface here.)
290	print "clearing experiment subdirs on $tb\n" if $verbose;
291	&ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") \|\|
292	return 0;
293	print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
294	&ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") \|\|
295	return 0;
296	print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
297	&ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") \|\|
298	return 0;
299	print "clearing experiment rpms subdirs on $tb\n" if $verbose;
300	&ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") \|\|
301	return 0;
302	print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
303	&ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") \|\|
304	return 0;
305	# Remote experiment is active. Modify it.
306	if ($state eq "active") {
307	print "Transferring federation support files to $tb\n" if $verbose;
308	# First copy new scripts and hostinfo into the remote /proj
309	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
310	return 0;
311	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
312	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
313
314	if ( -d "$tmpdir/tarfiles") {
315	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
316	return 0;
317	}
318
319	if ( -d "$tmpdir/rpms") {
320	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
321	return 0;
322	}
323
324	print "Modifying $eid in place on $tb\n" if $verbose;
325	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " .
326	"$eid $tclfile", "modexp", $timeout) \|\| return 0;
327	return 1;
328	}
329
330	# Remote experiment is swapped out, modify it and swap it in.
331	if ($state eq "swapped") {
332	print "Transferring federation support files to $tb\n" if $verbose;
333	# First copy new scripts and hostinfo into the remote /proj (because
334	# the experiment exists, the directory tree should be there.
335	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
336	return 0;
337	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
338	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
339	if ( -d "$tmpdir/tarfiles") {
340	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
341	return 0;
342	}
343
344	if ( -d "$tmpdir/rpms") {
345	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
346	return 0;
347	}
348
349	print "Modifying $eid on $tb\n" if $verbose;
350	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile",
351	"modexp") \|\| return 0;
352	print "Swapping $eid in on $tb\n" if $verbose;
353	# Now start up
354	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
355	"swapexp", $timeout) \|\| return 0;
356	return 1;
357	}
358
359	# No remote experiment. Create one. We do this in 2 steps so we can put
360	# the configuration files and scripts into the new experiment directories.
361	if ($state eq "none") {
362
363	if ( -d "$tmpdir/tarfiles") {
364	# Tarfiles have to exist for the creation to work
365	print "copying tarfiles to $tb\n";
366	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
367	return 0;
368	}
369
370	if ( -d "$tmpdir/rpms") {
371	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
372	return 0;
373	}
374
375	print "Creating $eid on $tb\n" if $verbose;
376	&ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " .
377	"$pid -e $eid $tclfile", "startexp") \|\| return 0;
378	# After startexp succeeds, the per-experiment directories exist on the
379	# remote testbed.
380	print "Transferring federation support files to $tb\n" if $verbose;
381	# First copy new scripts and hostinfo into the remote /proj
382	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
383	return 0;
384	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
385	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
386	# Now start up
387	print "Swapping $eid in on $tb\n" if $verbose;
388	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
389	"swapexp", $timeout) \|\| return 0;
390	return 1;
391	}
392
393	# Every branch for a known state returns. If execution gets here, the
394	# state is unknown.
395	warn "unknown state: $state\n";
396	return 0;
397	}
398
399	# Swap out a sub-experiment - probably because another has failed. Arguments
400	# are testbed and experiment. Most of the control flow is similar to
401	# start_segment, though much simpler.
402	sub stop_segment {
403	my($tb, $eid, $tbparams) = @_; # testbed, experiment ID and
404	# per-testbed parameters
405	my $user = $tbparams->{$tb}->{'user'}; # testbed user
406	my $host = # Ops node
407	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
408	my $pid = $tbparams->{$tb}->{'project'};# testbed project
409
410	print "Stopping $eid on $tb\n" if $verbose;
411	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out",
412	"swapexp (out)") \|\| return 0;
413	return 1;
414	}
415
416
417	$pid = $gid = "dummy"; # Default project and group to pass to
418	# $tcl_splitter above. These are total
419	# dummy arguments; the splitter doesn't
420	# use them at all, but we supply them to
421	# keep our changes to the parser minimal.
422	# Argument processing.
423	getopts('Ft:c:f:ndvN', \%opts);
424	$splitter_config = $opts{'c'} \|\| "./splitter.conf";
425	$debug = $opts{'d'};
426	$verbose = $opts{'v'} \|\| $opts{'d'};
427
428	&parse_config("$splitter_config", \%opts) \|\|
429	die "Cannot read config file $splitter_config: $!\n";
430	$fail_soft = $opts{'F'} \|\| $opts{'failsoft'};
431
432	warn "-N does nothing now. Only one testbeds format supported.\n"
433	if $opts{'N'};
434	$startem = $opts{'n'} ? 0 : 1; # If true, start the sub-experiments
435	$timeout = $opts{'t'} \|\| $opts{'timeout'};
436	$eid = $opts{'experiment'}; # Experiment ID
437	$tcl = $opts{'f'} \|\| shift; # The experiment description
438	$master = $opts{'master'}; # Master testbed
439	$tmpdir = $opts{'tmpdir'} \|\| $opts{'tempdir'}\|\| "/tmp"; # tmp files
440	$tb_config = $opts{'testbeds'} \|\| "./testbeds"; # testbed configurations
441	$local_script_dir = $opts{'scriptdir'}; # Local scripts
442
443	$smb_share = $opts{'smbshare'} \|\| # Share to mount from the master
444	die "Must give an SMB share\n";
445	$project_user = $opts{'smbuser'} \|\| # User to mount project dirs as
446	die "Must give an SMB user\n";
447
448	# For now specify these. We may want to generate them later.
449	$gw_pubkey = $opts{'gatewaypubkey'};
450	($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
451	$gw_secretkey = $opts{'gatewaysecretkey'};
452	($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
453
454	# tcl program to split experiments (changed during devel)
455	$tcl_splitter = $opts{'tclparse'} \|\| "/usr/testbed/lib/ns2ir/parse.tcl";
456	# tclsh to call directly (changed during devel)
457	$tclsh = $opts{'tclsh'} \|\| "/usr/local/bin/otclsh";
458
459	# Prefix to avoid collisions
460	$tmpdir .= "/split$$";
461
462	print "Temp files are in $tmpdir\n" if $verbose;
463	# Create a workspace
464	unless (-d "$tmpdir") {
465	mkdir("$tmpdir") \|\| die "Can't create $tmpdir: $!";
466	}
467
468	# Validate scripts directory
469	for my $s (@scripts) {
470	die "$local_script_dir/$s not in local script directory. Try -d\n"
471	unless -r "$local_script_dir/$s";
472	}
473
474	die "Must supply file, master and experiment" unless $master && $tcl && $eid;
475
476	&parse_testbeds($tb_config, $tbparams) \|\|
477	die "Cannot testbed congfigurations from $tb_config: $!\n";
478
479	# Open a pipe to the splitter program and start it parsing the experiments
480	my $pipe = new IO::Pipe;
481	# NB no more -p call on parse call.
482	$pipe->reader("$tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl") \|\|
483	die "Cannot execute $tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl:$!\n";
484
485	# Parsing variables
486	my $ctb; # Current testbed
487	my %allocated; # If allocated{$tb} > 0, $tb is in use
488	my $destfile; # File that the sub-experiment tcl file is
489	# being written to, or "" if none. Also used
490	# for hostnames file.
491	my $desthandle; # File handle for distfile
492	my $gateways; # when gateway lists are being processed this
493	# is the testbed whose gateways are being
494	# gathered.
495	my $control_gateway; # Control net gateway for the current testbed
496	my %active_end; # If active_end{"a-b"} > 0 then a is the active
497	# end of the a <-> b connector pair.
498
499	# Parse the splitter output. This loop creates the sub experiments, gateway
500	# configurations and hostnames file
501	while (<$pipe>) {
502	# Start of a sub-experiment
503	/^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
504	$ctb = $1;
505
506	# If we know the testbed, start collecting its sub experiment tcl
507	# description. If not, warn the caller and ignore the configuration of
508	# this testbed.
509	if ($tbparams->{$ctb}) {
510	$allocated{$ctb}++; # Keep track of the testbeds allocated
511
512	unless (-d "$tmpdir/$ctb") {
513	mkdir("$tmpdir/$ctb") \|\| die "Can't create $tmpdir/$ctb: $!";
514	}
515	$destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
516
517	$desthandle = new IO::File(">$destfile") \|\|
518	die "Cannot open $destfile:$!\n";
519	}
520	else {
521	warn "No such testbed $ctb\n";
522	$destfile = "";
523	}
524	next;
525	};
526
527	# End of that experiment
528	/^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
529	# Simple syntax check and close out this experiment's tcl description
530	die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
531	$desthandle->close();
532	$destfile = $ctb = "";
533	next;
534	};
535
536	# Beginning of a gateway set
537	/^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
538	$gateways = $1;
539	# If we've heard of this tb, create the config lines for it one at a
540	# time.
541	if ($allocated{$gateways}) {
542	# Just in case. This directory should already have been created
543	# above.
544	unless (-d "$tmpdir/$gateways") {
545	mkdir("$tmpdir/$gateways") \|\|
546	die "Can't create $tmpdir/$gateways: $!";
547	}
548	}
549	else {
550	warn "Gateways given (and ignored) for testbed not in use: " .
551	"$gateways\n";
552	$gateways = 0;
553	}
554	next;
555	};
556	# End of the gateways section. Output the client config for this testbed
557	/^#\s+End\s+gateways\s+\((\w+)\)/ && do {
558	die "Mismatched gateway markers ($1, $gateways)\n"
559	unless !$gateways \|\| $gateways == $1;
560
561	if ($control_gateway ) {
562	# Client config
563	my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
564	my $master_project = $tbparams->{$master}->{'project'};
565	die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
566	print $cc "ControlGateway: $control_gateway\n";
567	print $cc "SMBShare: $smb_share\n";
568	print $cc "ProjectUser: $project_user\n";
569	print $cc "ProjectName: $master_project\n";
570	$cc->close();
571	}
572	else { warn "No control gateway for $gateways?\n"; }
573
574	$gateways = 0;
575	next;
576	};
577	# Beginning of the hostnames list. Collection is always in the hostnames
578	# file.
579	/^#\s+Begin\s+hostnames/ && do {
580	$destfile = "$tmpdir/hostnames";
581	$desthandle = new IO::File(">$destfile") \|\|
582	die "Can't open $destfile:$!\n";
583	next;
584	};
585	# end of the hostnames list.
586	/^#\s+End\s+hostnames/ && do {
587	$desthandle->close();
588	$destfile = "";
589	next;
590	};
591
592	# Generate gateway configuration info, one file per line
593	$gateways && do {
594	chomp;
595	my($dtb, $myname, $desthost, $type) = split(" ", $_);
596
597	# Many of these are to simplify print statements
598	my $sdomain = # domain for the source
599	$tbparams->{$gateways}->{'domain'};
600	my $ddomain = # domain for the destination
601	$tbparams->{$dtb}->{'domain'};
602	my $sproject = # Project of the destination
603	$tbparams->{$gateways}->{'project'};
604	my $fs = # Master fs node (FQDN)
605	$tbparams->{$master}->{'fs'} . $tbparams->{$master}->{'domain'};
606	my $boss = # Master boss node (FQDN)
607	$tbparams->{$master}->{'boss'} . $tbparams->{$master}->{'domain'};
608	my $remote_script_dir = # Remote fed script location
609	"/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
610	my $active; # Is this the active side of
611	# the connector?
612
613	$sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
614	$ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
615
616	my $conf_file = "$myname$sdomain.gw.conf";
617	# translate to lower case so the `hostname` hack for specifying
618	# configuration files works.
619	$conf_file =~ tr/A-Z/a-z/;
620
621	# If either end of this link is in the master side of the testbed, that
622	# side is the active end. Otherwise the first testbed encountered in
623	# the file will be the active end. The $active_end variable keeps
624	# track of those decisions
625	if ( $dtb eq $master ) { $active = "false"; }
626	elsif ($gateways eq $master ) { $active = "true"; }
627	elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
628	else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
629
630	# This is used to create the client configuration.
631	$control_gateway = "$myname$sdomain"
632	if $type =~ /(control\|both)/;
633
634	# Write out the file
635	my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")\|\|
636	die "can't open $tmpdir/$gateways/$conf_file: $!\n";
637
638	print $gwconfig "Active: $active\n";
639	print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
640	print $gwconfig "BossName: $boss\n";
641	print $gwconfig "FsName: $fs\n";
642	print $gwconfig "Type: $type\n";
643	print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
644	print $gwconfig "Peer: $desthost$ddomain\n";
645	print $gwconfig "Pubkeys: " .
646	"/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
647	print $gwconfig "Privkeys: " .
648	"/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
649	$gwconfig->close();
650
651	# This testbed has a gateway (most will) so make a copy of the keys it
652	# needs in this testbed's subdirectory. start_segment will transfer
653	# them.
654	unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
655	copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") \|\|
656	die "Can't copy pubkeys ($gw_pubkey to " .
657	"$tmpdir/$gateways/$gw_pubkey_base): $!\n";
658	}
659	if ($active eq "true" ) {
660	unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
661	copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") \|\|
662	die "Can't copy secret keys ($gw_secretkey to " .
663	"$tmpdir/$gateways/$gw_secretkey_base): $!\n";
664	}
665	}
666
667	#done processing gateway entry, ready for next line
668	next;
669	};
670	(/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
671	next if /^#/;
672	chomp;
673	push(@tarfiles, $_);
674	next;
675	};
676	(/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
677	next if /^#/;
678	chomp;
679	push(@rpms, $_);
680	next;
681	};
682
683	next unless $destfile; # Unidentified testbed, ignore config
684	# local copies that can be used in the substitutions below
685	my $gwtype = $tbparams->{$ctb}->{'gwtype'};
686	my $gwimage = $tbparams->{$ctb}->{'gwimage'};
687	my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
688	my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
689	my $gwstart = $tbparams->{$ctb}->{'gwstart'};
690	my $expstart = $tbparams->{$ctb}->{'expstart'};
691	my $project = $tbparams->{$ctb}->{'project'};
692
693	# Substitute variables
694	s/GWTYPE/$gwtype/g;
695	s/GWIMAGE/$gwimage/g;
696	if ($ctb eq $master ) {
697	s/GWSTART/$mgwstart/g;
698	s/EXPSTART/$mexpstart/g;
699	}
700	else {
701	s/GWSTART/$gwstart/g;
702	s/EXPSTART/$expstart/g;
703	}
704	# XXX: oh is this bad
705	s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
706	s#PROJDIR#/proj/$project/#g;
707	s#EID#$eid#g;
708	s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
709	print $desthandle $_;
710	}
711	$pipe->close();
712	die "No nodes in master testbed ($master)\n" unless $allocated{$master};
713
714	# Copy tarfiles and rpms needed at remote sites to the staging directories.
715	# Start_segment will distribute them
716	for my $t (@tarfiles) {
717	die "tarfile '$t' unreadable: $!\n" unless -r $t;
718	unless (-d "$tmpdir/tarfiles") {
719	mkdir("$tmpdir/tarfiles") \|\|
720	die "Can't create $tmpdir/tarfiles:$!\n";
721	}
722	copy($t, "$tmpdir/tarfiles") \|\|
723	die "Can't copy $t to $tmpdir/tarfiles:$!\n";
724	}
725
726	for my $r (@rpms) {
727	die "rpm '$r' unreadable: $!\n" unless -r $r;
728	unless (-d "$tmpdir/rpms") {
729	mkdir("$tmpdir/rpms") \|\|
730	die "Can't create $tmpdir/rpms:$!\n";
731	}
732	copy($r, "$tmpdir/rpms") \|\|
733	die "Can't copy $r to $tmpdir/rpms:$!\n";
734	}
735
736	exit(0) unless $startem;
737
738	my %started; # If $started{$tb} then $tb successfully started
739
740	# Start up the slave sub-experiments first
741	TESTBED:
742	for my $tb (keys %allocated) {
743	if ($tb ne $master) {
744	if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
745	else { last TESTBED; }
746	}
747	}
748
749	# Now the master
750	if (&start_segment($master, $eid, $tbparams, $timeout)) {
751	$started{$master}++;
752	}
753
754	# If any testbed failed, swap the rest out.
755	if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
756	for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
757	print "Error starting experiment\n";
758	exit(1);
759	}
760	print "Experiment started\n";
761	print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
762	system("rm -rf $tmpdir") unless $debug;
763	exit(0); # set the exit value
764
765	=pod
766
767	=head1 NAME
768
769	B<splitter.pl>
770
771	=head1 SYNOPSIS
772
773	B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
774	[B<-f> F<experiment_tcl>] [F<experiment_tcl>]
775
776	=head1 DESCRIPTION
777
778	B<splitter.pl> invokes the DETER experiment parser to split an annotated
779	experiment into multiple sub-experments and instantiates the sub-experiments on
780	their intended testbeds. Annotation is accomplished using the
781	tb-set-node-testbed command, added to the parser.
782
783	Much of the script's behavior depends on the configuration file, specified with
784	the B<-c> flag and defaulting to F<./splitter.conf>.
785
786	The testbed labels supplied in the B<tb-set-node-testbed> command are
787	meaningful based on their presence in the testbeds file. that file can be
788	specified in the configuration file using the B<Testbeds> directive, and
789	defaults to F<./testbeds>. The syntax is described below.
790
791	Most of the intermediate files are staged in a sub-directory of a temporary
792	files directory and deleted at the end of the script. Specifying the B<-d>
793	flag on the command line avoids the deletion for debbugging. By default the
794	temporary files directory is directory is F</tmp> and can be reset in the
795	configuration file using the B<Tmpdir> directive. Intermediate files are
796	stored under a subdirectory formed by adding the process ID of the splitter
797	process. For example, if the temporary files directory is F</tmp> and the
798	B<splitter.pl> process ID is 2323, the temporary files will be stored in
799	F</tmp/split2323/>.
800
801	The expreriment is split out into one experiment description per testbed in the
802	temporary directory named as F<experiment.testbed.tcl> where the experiment is
803	the experiment ID given in the configuration file, and the testbed is the
804	tb-set-node-testbed parameter for the nodes in the file.
805
806	If the B<-n> option is absent the sub-experiments are then instantiated on
807	their testbeds. (Here B<-n> is analogous to its use in L<make(1)>).
808	Per-testbed parameters are set in the testbeds file. Sub-experiments on
809	slave testbeds are instantiated in a random order, but the master testbed is
810	currently instantiated last.
811
812	Scripts to start federation (the federation kit) are copied into the local
813	experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>. These are
814	taken from the directory given by the B<ScriptDir> directive in the
815	configuration file.
816
817	If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
818	F<splitter.conf>.
819
820	If any sub-experiment fails to instantiate, the other sub-exeriments are
821	swapped out. B<-F> avoids this swap out, which can also be specified as
822	B<SoftFail: true> in F<splitter.conf>
823
824	=head2 Configuration File
825
826	The configuration file is a simple set of colon-separated parameters and
827	values. A configuration file must be present, either specified in the B<-c>
828	flag or the default F<./splitter.conf>. All the parameter names are case
829	insensitive, but should not include any whitespace. Parameter values may
830	include whitespace, but no newlines.
831
832	Possible parameters are:
833
834	=over 5
835
836	=item Experiment
837
838	The name of the experiment on the various testbeds
839
840	=item Master
841
842	The master testbed label from the testbeds file, described below.
843
844	=item Testbeds
845
846	The testbeds file described below, giving per-testbed parameters. If this
847	directive is absent the testbeds file defaults to F<./testbeds>
848
849	=item ScriptDir
850
851	Location of the default federation scripts, i.e. the federation kit.
852
853	=item GatewayPubkey
854
855	=item GatewaySecretKey
856
857	The names of the files containing secret and public keys to use in setting up
858	tunnels between testbeds. These will eventually be automatically generated.
859
860	=item TmpDir
861
862	=item TempDir
863
864	The directory where temporary files are created. These are synonyms, but
865	should both be specified, B<TmpDir> has priority. If neither is specified,
866	F</tmp> is used.
867
868	=item SMBShare
869
870	The SMB share on the master testbed that will be exported to remote clients.
871
872	=item SMBUser
873
874	The experiment user to mount project directories as. This user needs to be a
875	member of the exported experiment - that is one of the users in the project
876	containing this experiment on the master testbed.
877
878	=item Tclparse
879
880	The pathname to the experiment parsing program. Only developers should set
881	this.
882
883	=item Tclsh
884
885	The pathname to the local oTcl shell. Only developers should set
886	this.
887
888	=item Timeout
889
890	Value in seconds after which a swap-in operatioin will be considered a success.
891	Often long swap-ins will hang when there are partial failures. This works
892	around this issue.
893
894	=item FailSoft
895
896	If not set, failure of any sub experiment swaps the rest out. Setting this to any value avoids this swap out. (Also B<-F>.)
897
898	=back
899
900	=head2 Testbeds file
901
902	The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
903	scoped attribute-value pairs where each attribute is specified on a separate
904	line of the configuration file. Each testbed's parameters are preceeded by the
905	testbed label in brackets ([]) on a line by itself. After that the parameters
906	are specified as parameter: value. This is essentially the same format as the
907	configuration file. Parameters are:
908
909	=over 4
910
911	=item User
912
913	The user under which to make requests to this testbed. The user running
914	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
915	testbed.
916
917	=item OpsNode
918
919	The host name of the testbed's ops node. The user calling B<splitter.pl> must
920	be able to execute commands on this host via L<ssh(1)>.
921
922	=item Domain
923
924	The domain of nodes in this testbed (including the ops host). This parameter
925	should always start with a period.
926
927	=item Project
928
929	The project under which to instantiate sub-experiments on this testbed.
930
931	=item ConnectorType
932
933	The node type for inter-testbed connector nodes on this testbed.
934
935	=item SlaveNodeStartCmd
936
937	The start command to run on experimental nodes when this testbed is used as a
938	slave. In all the start commands the string FEDDIR will be replaced by the
939	local experiment's federation scripts directory and the string GWCONF replaced
940	by the gatway configuration file.
941
942	=item SlaveConnectorStartCmd
943
944	The start command to run on gateway nodes when this testbed is used as a slave.
945	The same string substitutions are made in this command as in experiment start.
946
947	=item MasterNodeStartCmd
948
949	The start command to run on experimental nodes when this testbed is used as a
950	master. The same string substitutions are made in this command as in
951	experiment start.
952
953	=item MasterConnectorStartCmd
954
955	The start command to run on gateway nodes when this testbed is used as a
956	master. The same string substitutions are made in this command as in
957	experiment start.
958
959	=item ConnectorImage
960
961	The disk image to be loaded on a gateway node on this testbed.
962
963	=item FileServer
964
965	The node in the master testbed from which filesystems are mounted.
966
967	=item Boss
968
969	The node in the master testbed that controls the testbed.
970
971	=item TunnelCfg
972
973	True if the connector needs to do DETER federation. This parameter will
974	probably be removed.
975
976
977	=back
978
979
980	=head1 ENVIRONMENT
981
982	B<splitter.pl> does not directly make use of environment variables, but calls
983	out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
984	environment.
985
986	=head1 SEE ALSO
987
988	L<sh(1)>, L<ssh(1)>
989
990	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: