Context Navigation

source: fedkit/splitter.pl @ e5fee75

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02

Last change on this file since e5fee75 was e5fee75, checked in by Ted Faber <faber@…>, 17 years ago
multiplexed gateway support
Property mode set to `100644`
File size: 36.0 KB

Line
1	#!/usr/bin/perl
2
3	use strict;
4
5	use Getopt::Std;
6	use IO::File;
7	use IO::Dir;
8	use IO::Pipe;
9	use File::Copy;
10
11	my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12	my $local_script_dir = ".";
13	my($pid, $gid); # Process and group IDs for calling parse.tcl
14	my $splitter_config; # Configuration file
15	my $debug; # True if thecalled in debug mode
16	my $verbose; # True for extra progress reports
17	my $startem; # If true, start the sub-experiments
18	my $eid; # Experiment ID
19	my $tcl; # The experiment description (topology file)
20	my $master; # Master testbed
21	my $tmpdir; # tmp files
22	my $tb_config; # testbed configurations
23	my $smb_share; # Share to mount from the master
24	my $project_user; # User to mount project dirs as
25	my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26	my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27	# basename)
28	my $tcl_splitter; # tcl program to split experiments
29	# (changed during devel)
30	my $tclsh; # tclsh to call directly (changed during devel)
31	my $muxmax; # Maximum number of links/lans over 1 gw pair
32	my @tarfiles; # Tarfiles in use by this experiment
33	my @rpms; # Rpms in use by this experiment
34	my $timeout; # The timeout to use for experiment swap ins
35	my %opts; # Parsed options
36
37	my $tbparams = {}; # Map of the per-testbed parameters from the
38	# testbeds file. It is a reference to a hash
39	# of hashes (because it's passed around a bunch
40	# and it's nicer to have one access pattern
41	# throughout the script, in the main loop and
42	# the subroutines). That access is exemplified
43	# by $tbparams->{'deter'}->{'domain'} which is
44	# the domain parameter of the DETER testbed.
45	my $fail_soft; # Do not swap failed sub-experiments out
46	my $max_children=1; # Maximum number of simultaneous swap-ins
47
48	# Parse the config file. The format is a colon-separated parameter name
49	# followed by the value of that parameter to the end of the line. This parses
50	# that format and puts the parameters into the referenced hash. Parameter
51	# names are mapped to lower case, parameter values are unchanged. Returns 0 on
52	# failure (e.g. file open) and 1 on success.
53	sub parse_config {
54	my($file, $href) = @_;
55	my $fh = new IO::File($file);
56
57	unless ($fh) {
58	warn "Can't open $file: $!\n";
59	return 0;
60	}
61
62	while (<$fh>) {
63	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
64	chomp;
65	/^([^:]+):\s(.)/ && do {
66	my $key = $1;
67
68	$key =~ tr/A-Z/a-z/;
69	$href->{$key} = $2;
70	next;
71	};
72	warn "Unparasble line in $file: $_\n";
73	}
74	$fh->close(); # It will close when it goes out of scope, but...
75	return 1;
76	}
77
78	# Parse an easier-to-read testbeds file (the original was comma-separated
79	# unadorned strings). The format is a testbed scope as [testbed] followed by
80	# the colon-separated attribute-value pairs for the testbed. Right now these
81	# go into a set of global hashes indexed by testbed, but that should probably
82	# change.
83
84	sub parse_testbeds {
85	my($file, $tbparams) = @_; # Testbeds file and parameter hash
86	my $fh = new IO::File($file); # Testbeds filehandle
87	my $tb; # Current testbed
88	# Convert attribute in the file to tbparams hash key
89	my %attr_to_hash = (
90	"opsnode" => "host",
91	"user" => "user",
92	"domain" => "domain",
93	"project" => "project",
94	"connectortype" => "gwtype",
95	"slavenodestartcmd" => "expstart",
96	"slaveconnectorstartcmd" => "gwstart",
97	"masternodestartcmd" => "mexpstart",
98	"masterconnectorstartcmd" => "mgwstart",
99	"connectorimage" => "gwimage",
100	"fileserver" => "fs",
101	"boss" => "boss",
102	"tunnelcfg" => "tun"
103	);
104
105
106	unless ($fh) {
107	warn "Can't open $file: $!\n";
108	return 0;
109	}
110
111	while (<$fh>) {
112	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
113	chomp;
114	/^\s\[(.)\]/ && do {
115	$tb = $1;
116	$tbparams->{$tb} = {} unless $tbparams->{$tb};
117	next;
118	};
119
120	/^([^:]+):\s(.)/ && do {
121	unless ($tb) {
122	warn "Ignored attribute definition before testbed " .
123	"defined in $file: $_\n";
124	next;
125	}
126	my $key = $1;
127	$key =~ tr/A-Z/a-z/;
128	my $var = $attr_to_hash{$key};
129
130	if ($var) { $tbparams->{$tb}->{$var} = $2; }
131	else { warn "Unknown keyword $key in $file\n"; }
132
133	next;
134	};
135	warn "Unparasble line in $file: $_\n";
136	}
137	$fh->close(); # It will close when it goes out of scope, but...
138	return 1;
139	}
140
141
142	# use scp to transfer a file, reporting true if successful and false otherwise.
143	# Parameters are the local file name, the ssh host destination (either hostname
144	# oe user@host), and an optional destination file name or directory. If no
145	# destination is given, the file is transferred to the given user's home
146	# directory. If only a machine is given in the ssh host destination, the
147	# current user is used.
148	sub scp_file {
149	my($file, $user, $host, $dest) = @_;
150
151	# XXX system with a relative pathname is sort of gross
152	system("scp $file $user\@$host:$dest");
153	if ($?) {
154	warn "scp failed $?\n";
155	return 0;
156	}
157	else { return 1; }
158	}
159
160	# use ssh to execute the given command on the machine (and as the user) in
161	# $where. Parameters are the ssh destination directive ($where) and the
162	# command to execute, and a prefix to be placed on a message generated if the
163	# command fails. On failure print a warning if a warning prefix was given and
164	# return false. If timeout is given fork a process and set an alarm of that
165	# many seconds. Timeouts also return 0;
166	sub ssh_cmd {
167	my($user, $host, $cmd, $wname, $timeout) = @_;
168	my $pid; # Child pid
169
170	$timeout = 0 unless $timeout; # Force default timeout
171
172	if ( $pid = fork () ) {
173	# Parent process
174	# The eval acts as a signal catcher. If the alarm goes off inside
175	# the eval, the die will put "alarm\n" into $@, otherwise the
176	# return value of the execution in the child process will be used.
177	my $rv = eval {
178	local $SIG{'ALRM'} = sub{ die "alarm\n"; };
179	my $rv;
180
181	alarm $timeout;
182	$rv = waitpid($pid, 0);
183	alarm 0;
184	$rv;
185	};
186
187	# If the eval succeeded, $@ will be null and we can use $rv, which
188	# is the return code from the subprocess. If the eval timed out,
189	# print a warning and assume the best.
190	if ($@ eq "alarm\n" ) {
191	warn "$wname timed out - pid $pid still live\n";
192	return 1;
193	}
194	else {
195	return $rv;
196	}
197	}
198	else {
199	# Child process
200	exec("ssh $user\@$host $cmd");
201	exit 0;
202	}
203	}
204
205	# Ship local copies of the federation scripts out to the given host. If any of
206	# the script transfers fails, return 0. The scripts to transfer are from the
207	# global @scripts and are found locally in $local_script_dir (another global).
208	sub ship_scripts {
209	my($host, $user, $dest_dir) = @_; # Where, who, where remotely
210	my $s;
211
212	&ssh_cmd($user, $host, "mkdir -p $dest_dir");
213	for $s (@scripts) {
214	&scp_file("$local_script_dir/$s", $user, $host, $dest_dir) \|\|
215	return 0;
216	}
217	return 1;
218	}
219
220	# Ship per-testbed configuration generated by this script to the remote /proj
221	# directories on the remote testbeds
222	sub ship_configs {
223	my($host, $user, $src_dir, $dest_dir) = @_; # Where, who, where remotely
224	my($d, $f);
225
226	$d = IO::Dir->new($src_dir) \|\| return 0;
227
228	# All directories under $tmpdir are 770 so we can delete them later.
229	&ssh_cmd($user, $host, "mkdir -p $dest_dir") \|\| return 0;
230	&ssh_cmd($user, $host, "chmod 770 $dest_dir") \|\| return 0;
231	while ( $f = $d->read()) {
232	next if $f =~ /^\./;
233	if ( -d "$src_dir/$f" ) {
234	&ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") \|\|
235	return 0;
236	}
237	else {
238	&scp_file("$src_dir/$f", $user, $host, $dest_dir) \|\| return 0;
239	}
240	}
241	return 1;
242	}
243
244	# Start a sub section of the experiment on a given testbed. The testbed and
245	# the user to start the experiment as are pulled from the global per-testbed
246	# hash, passed in as $tbparams, as is the project name on the remote testbed.
247	# Parameters are the testbed and the experiment id. Configuration files are
248	# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
249	# Then the current state of the experiment determined using expinfo. From that
250	# state, the experiment is either created, modified or spapped in. If
251	# everything succeeds, true is returned. If the global verbose is set progress
252	# messages are printed.
253	sub start_segment {
254	my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
255	# per-testbed parameters and remote
256	# swap-in timeout
257	my $host = # Host name of remote ops (FQDN)
258	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
259	my $user = $tbparams->{$tb}->{'user'}; # user to pass to ssh
260	my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
261	# experiment under
262	my $tclfile = "./$eid.$tb.tcl"; # Local tcl file with the
263	# sub-experiment
264	my $proj_dir = "/proj/$pid/exp/$eid/tmp"; # Where to stash federation stuff
265	my $tarfiles_dir = "/proj/$pid/tarfiles/$eid"; # Where to stash tarfiles
266	my $rpms_dir = "/proj/$pid/rpms/$eid"; # Where to stash rpms
267	my $to_hostname = "$proj_dir/hosts"; # remote hostnames file
268	my $state; # State of remote experiment
269	my $status = new IO::Pipe; # The pipe to get status
270
271	# Determine the status of the remote experiment
272	$status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") \|\|
273	die "Can't ssh to $user\@$host:$!\n";
274
275	# XXX: this is simple now. Parsing may become more complex
276	while (<$status>) {
277	/State: (\w+)/ && ($state = $1);
278	/No\s+such\s+experiment/ && ($state = "none");
279	}
280	$status->close();
281	print "$tb: $state\n";
282
283	# Copy the experiment definition data over
284	print "transferring subexperiment to $tb\n" if $verbose;
285	&scp_file("$tmpdir/$tb/$tclfile", $user, $host) \|\| return 0;
286	# Clear out any old experiment data; if not deleted, copies over it by
287	# different users will fail.
288	# (O /bin/csh, how evil thou art. The -c and the escaped single quotes
289	# force the /bin/sh interpretation of the trailing * (which we need to keep
290	# tmp around)) Again, this needs to be done more properly once we have a
291	# non-ssh interface here.)
292	print "clearing experiment subdirs on $tb\n" if $verbose;
293	&ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") \|\|
294	return 0;
295	print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
296	&ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") \|\|
297	return 0;
298	print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
299	&ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") \|\|
300	return 0;
301	print "clearing experiment rpms subdirs on $tb\n" if $verbose;
302	&ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") \|\|
303	return 0;
304	print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
305	&ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") \|\|
306	return 0;
307	# Remote experiment is active. Modify it.
308	if ($state eq "active") {
309	print "Transferring federation support files to $tb\n" if $verbose;
310	# First copy new scripts and hostinfo into the remote /proj
311	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
312	return 0;
313	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
314	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
315
316	if ( -d "$tmpdir/tarfiles") {
317	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
318	return 0;
319	}
320
321	if ( -d "$tmpdir/rpms") {
322	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
323	return 0;
324	}
325
326	print "Modifying $eid in place on $tb\n" if $verbose;
327	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " .
328	"$eid $tclfile", "modexp", $timeout) \|\| return 0;
329	return 1;
330	}
331
332	# Remote experiment is swapped out, modify it and swap it in.
333	if ($state eq "swapped") {
334	print "Transferring federation support files to $tb\n" if $verbose;
335	# First copy new scripts and hostinfo into the remote /proj (because
336	# the experiment exists, the directory tree should be there.
337	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
338	return 0;
339	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
340	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
341	if ( -d "$tmpdir/tarfiles") {
342	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
343	return 0;
344	}
345
346	if ( -d "$tmpdir/rpms") {
347	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
348	return 0;
349	}
350
351	print "Modifying $eid on $tb\n" if $verbose;
352	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile",
353	"modexp") \|\| return 0;
354	print "Swapping $eid in on $tb\n" if $verbose;
355	# Now start up
356	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
357	"swapexp", $timeout) \|\| return 0;
358	return 1;
359	}
360
361	# No remote experiment. Create one. We do this in 2 steps so we can put
362	# the configuration files and scripts into the new experiment directories.
363	if ($state eq "none") {
364
365	if ( -d "$tmpdir/tarfiles") {
366	# Tarfiles have to exist for the creation to work
367	print "copying tarfiles to $tb\n";
368	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
369	return 0;
370	}
371
372	if ( -d "$tmpdir/rpms") {
373	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
374	return 0;
375	}
376
377	print "Creating $eid on $tb\n" if $verbose;
378	&ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " .
379	"$pid -e $eid $tclfile", "startexp") \|\| return 0;
380	# After startexp succeeds, the per-experiment directories exist on the
381	# remote testbed.
382	print "Transferring federation support files to $tb\n" if $verbose;
383	# First copy new scripts and hostinfo into the remote /proj
384	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
385	return 0;
386	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
387	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
388	# Now start up
389	print "Swapping $eid in on $tb\n" if $verbose;
390	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
391	"swapexp", $timeout) \|\| return 0;
392	return 1;
393	}
394
395	# Every branch for a known state returns. If execution gets here, the
396	# state is unknown.
397	warn "unknown state: $state\n";
398	return 0;
399	}
400
401	# Swap out a sub-experiment - probably because another has failed. Arguments
402	# are testbed and experiment. Most of the control flow is similar to
403	# start_segment, though much simpler.
404	sub stop_segment {
405	my($tb, $eid, $tbparams) = @_; # testbed, experiment ID and
406	# per-testbed parameters
407	my $user = $tbparams->{$tb}->{'user'}; # testbed user
408	my $host = # Ops node
409	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
410	my $pid = $tbparams->{$tb}->{'project'};# testbed project
411
412	print "Stopping $eid on $tb\n" if $verbose;
413	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out",
414	"swapexp (out)") \|\| return 0;
415	return 1;
416	}
417
418
419	$pid = $gid = "dummy"; # Default project and group to pass to
420	# $tcl_splitter above. These are total
421	# dummy arguments; the splitter doesn't
422	# use them at all, but we supply them to
423	# keep our changes to the parser minimal.
424	# Argument processing.
425	getopts('Ft:c:p:f:ndvN', \%opts);
426	$splitter_config = $opts{'c'} \|\| "./splitter.conf";
427	$debug = $opts{'d'};
428	$verbose = $opts{'v'} \|\| $opts{'d'};
429
430	&parse_config("$splitter_config", \%opts) \|\|
431	die "Cannot read config file $splitter_config: $!\n";
432
433	warn "-N does nothing now. Only one testbeds format supported.\n"
434	if $opts{'N'};
435	$fail_soft = $opts{'F'} \|\| $opts{'failsoft'};
436	$startem = $opts{'n'} ? 0 : 1; # If true, start the sub-experiments
437	$timeout = $opts{'t'} \|\| $opts{'timeout'};
438	$eid = $opts{'experiment'}; # Experiment ID
439	$tcl = $opts{'f'} \|\| shift; # The experiment description
440	$master = $opts{'master'}; # Master testbed
441	$tmpdir = $opts{'tmpdir'} \|\| $opts{'tempdir'}\|\| "/tmp"; # tmp files
442	$tb_config = $opts{'testbeds'} \|\| "./testbeds"; # testbed configurations
443	$local_script_dir = $opts{'scriptdir'}; # Local scripts
444	$muxmax = $opts{'muxlimit'} \|\| 3; # Number of connections muxed on one
445	# gateway
446
447	$max_children = $opts{'p'} \|\| $opts{'maxchildren'}
448	if $opts{'p'} \|\| $opts{'maxchildren'};
449
450	$smb_share = $opts{'smbshare'} \|\| # Share to mount from the master
451	die "Must give an SMB share\n";
452	$project_user = $opts{'smbuser'} \|\| # User to mount project dirs as
453	die "Must give an SMB user\n";
454
455	# For now specify these. We may want to generate them later.
456	$gw_pubkey = $opts{'gatewaypubkey'};
457	($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
458	$gw_secretkey = $opts{'gatewaysecretkey'};
459	($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
460
461	# tcl program to split experiments (changed during devel)
462	$tcl_splitter = $opts{'tclparse'} \|\| "/usr/testbed/lib/ns2ir/parse.tcl";
463	# tclsh to call directly (changed during devel)
464	$tclsh = $opts{'tclsh'} \|\| "/usr/local/bin/otclsh";
465
466	# Prefix to avoid collisions
467	$tmpdir .= "/split$$";
468
469	print "Temp files are in $tmpdir\n" if $verbose;
470	# Create a workspace
471	unless (-d "$tmpdir") {
472	mkdir("$tmpdir") \|\| die "Can't create $tmpdir: $!";
473	}
474
475	# Validate scripts directory
476	for my $s (@scripts) {
477	die "$local_script_dir/$s not in local script directory. Try -d\n"
478	unless -r "$local_script_dir/$s";
479	}
480
481	die "Must supply file, master and experiment" unless $master && $tcl && $eid;
482
483	&parse_testbeds($tb_config, $tbparams) \|\|
484	die "Cannot testbed congfigurations from $tb_config: $!\n";
485
486	# Open a pipe to the splitter program and start it parsing the experiments
487	my $pipe = new IO::Pipe;
488	# NB no more -p call on parse call.
489	$pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl") \|\|
490	die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n";
491
492	# Parsing variables
493	my $ctb; # Current testbed
494	my %allocated; # If allocated{$tb} > 0, $tb is in use
495	my $destfile; # File that the sub-experiment tcl file is
496	# being written to, or "" if none. Also used
497	# for hostnames file.
498	my $desthandle; # File handle for distfile
499	my $gateways; # when gateway lists are being processed this
500	# is the testbed whose gateways are being
501	# gathered.
502	my $control_gateway; # Control net gateway for the current testbed
503	my %active_end; # If active_end{"a-b"} > 0 then a is the active
504	# end of the a <-> b connector pair.
505
506	# Parse the splitter output. This loop creates the sub experiments, gateway
507	# configurations and hostnames file
508	while (<$pipe>) {
509	# Start of a sub-experiment
510	/^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
511	$ctb = $1;
512
513	# If we know the testbed, start collecting its sub experiment tcl
514	# description. If not, warn the caller and ignore the configuration of
515	# this testbed.
516	if ($tbparams->{$ctb}) {
517	$allocated{$ctb}++; # Keep track of the testbeds allocated
518
519	unless (-d "$tmpdir/$ctb") {
520	mkdir("$tmpdir/$ctb") \|\| die "Can't create $tmpdir/$ctb: $!";
521	}
522	$destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
523
524	$desthandle = new IO::File(">$destfile") \|\|
525	die "Cannot open $destfile:$!\n";
526	}
527	else {
528	warn "No such testbed $ctb\n";
529	$destfile = "";
530	}
531	next;
532	};
533
534	# End of that experiment
535	/^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
536	# Simple syntax check and close out this experiment's tcl description
537	die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
538	$desthandle->close();
539	$destfile = $ctb = "";
540	next;
541	};
542
543	# Beginning of a gateway set
544	/^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
545	$gateways = $1;
546	# If we've heard of this tb, create the config lines for it one at a
547	# time.
548	if ($allocated{$gateways}) {
549	# Just in case. This directory should already have been created
550	# above.
551	unless (-d "$tmpdir/$gateways") {
552	mkdir("$tmpdir/$gateways") \|\|
553	die "Can't create $tmpdir/$gateways: $!";
554	}
555	}
556	else {
557	warn "Gateways given (and ignored) for testbed not in use: " .
558	"$gateways\n";
559	$gateways = 0;
560	}
561	next;
562	};
563	# End of the gateways section. Output the client config for this testbed
564	/^#\s+End\s+gateways\s+\((\w+)\)/ && do {
565	die "Mismatched gateway markers ($1, $gateways)\n"
566	unless !$gateways \|\| $gateways == $1;
567
568	if ($control_gateway ) {
569	# Client config
570	my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
571	my $master_project = $tbparams->{$master}->{'project'};
572	die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
573	print $cc "ControlGateway: $control_gateway\n";
574	print $cc "SMBShare: $smb_share\n";
575	print $cc "ProjectUser: $project_user\n";
576	print $cc "ProjectName: $master_project\n";
577	$cc->close();
578	}
579	else { warn "No control gateway for $gateways?\n"; }
580
581	$gateways = 0;
582	next;
583	};
584	# Beginning of the hostnames list. Collection is always in the hostnames
585	# file.
586	/^#\s+Begin\s+hostnames/ && do {
587	$destfile = "$tmpdir/hostnames";
588	$desthandle = new IO::File(">$destfile") \|\|
589	die "Can't open $destfile:$!\n";
590	next;
591	};
592	# end of the hostnames list.
593	/^#\s+End\s+hostnames/ && do {
594	$desthandle->close();
595	$destfile = "";
596	next;
597	};
598
599	# Generate gateway configuration info, one file per line
600	$gateways && do {
601	chomp;
602	my($dtb, $myname, $desthost, $type) = split(" ", $_);
603
604	# Many of these are to simplify print statements
605	my $sdomain = # domain for the source
606	$tbparams->{$gateways}->{'domain'};
607	my $ddomain = # domain for the destination
608	$tbparams->{$dtb}->{'domain'};
609	my $sproject = # Project of the destination
610	$tbparams->{$gateways}->{'project'};
611	my $fs = # Master fs node (FQDN)
612	$tbparams->{$master}->{'fs'} . $tbparams->{$master}->{'domain'};
613	my $boss = # Master boss node (FQDN)
614	$tbparams->{$master}->{'boss'} . $tbparams->{$master}->{'domain'};
615	my $remote_script_dir = # Remote fed script location
616	"/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
617	my $active; # Is this the active side of
618	# the connector?
619
620	$sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
621	$ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
622
623	my $conf_file = "$myname$sdomain.gw.conf";
624	# translate to lower case so the `hostname` hack for specifying
625	# configuration files works.
626	$conf_file =~ tr/A-Z/a-z/;
627
628	# If either end of this link is in the master side of the testbed, that
629	# side is the active end. Otherwise the first testbed encountered in
630	# the file will be the active end. The $active_end variable keeps
631	# track of those decisions
632	if ( $dtb eq $master ) { $active = "false"; }
633	elsif ($gateways eq $master ) { $active = "true"; }
634	elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
635	else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
636
637	# This is used to create the client configuration.
638	$control_gateway = "$myname$sdomain"
639	if $type =~ /(control\|both)/;
640
641	# Write out the file
642	my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")\|\|
643	die "can't open $tmpdir/$gateways/$conf_file: $!\n";
644
645	print $gwconfig "Active: $active\n";
646	print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
647	print $gwconfig "BossName: $boss\n";
648	print $gwconfig "FsName: $fs\n";
649	print $gwconfig "Type: $type\n";
650	print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
651	print $gwconfig "Peer: $desthost$ddomain\n";
652	print $gwconfig "Pubkeys: " .
653	"/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
654	print $gwconfig "Privkeys: " .
655	"/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
656	$gwconfig->close();
657
658	# This testbed has a gateway (most will) so make a copy of the keys it
659	# needs in this testbed's subdirectory. start_segment will transfer
660	# them.
661	unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
662	copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") \|\|
663	die "Can't copy pubkeys ($gw_pubkey to " .
664	"$tmpdir/$gateways/$gw_pubkey_base): $!\n";
665	}
666	if ($active eq "true" ) {
667	unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
668	copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") \|\|
669	die "Can't copy secret keys ($gw_secretkey to " .
670	"$tmpdir/$gateways/$gw_secretkey_base): $!\n";
671	}
672	}
673
674	#done processing gateway entry, ready for next line
675	next;
676	};
677	(/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
678	next if /^#/;
679	chomp;
680	push(@tarfiles, $_);
681	next;
682	};
683	(/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
684	next if /^#/;
685	chomp;
686	push(@rpms, $_);
687	next;
688	};
689
690	next unless $destfile; # Unidentified testbed, ignore config
691	# local copies that can be used in the substitutions below
692	my $gwtype = $tbparams->{$ctb}->{'gwtype'};
693	my $gwimage = $tbparams->{$ctb}->{'gwimage'};
694	my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
695	my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
696	my $gwstart = $tbparams->{$ctb}->{'gwstart'};
697	my $expstart = $tbparams->{$ctb}->{'expstart'};
698	my $project = $tbparams->{$ctb}->{'project'};
699
700	# Substitute variables
701	s/GWTYPE/$gwtype/g;
702	s/GWIMAGE/$gwimage/g;
703	if ($ctb eq $master ) {
704	s/GWSTART/$mgwstart/g;
705	s/EXPSTART/$mexpstart/g;
706	}
707	else {
708	s/GWSTART/$gwstart/g;
709	s/EXPSTART/$expstart/g;
710	}
711	# XXX: oh is this bad
712	s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
713	s#PROJDIR#/proj/$project/#g;
714	s#EID#$eid#g;
715	s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
716	print $desthandle $_;
717	}
718	$pipe->close();
719	die "No nodes in master testbed ($master)\n" unless $allocated{$master};
720
721	# Copy tarfiles and rpms needed at remote sites to the staging directories.
722	# Start_segment will distribute them
723	for my $t (@tarfiles) {
724	die "tarfile '$t' unreadable: $!\n" unless -r $t;
725	unless (-d "$tmpdir/tarfiles") {
726	mkdir("$tmpdir/tarfiles") \|\|
727	die "Can't create $tmpdir/tarfiles:$!\n";
728	}
729	copy($t, "$tmpdir/tarfiles") \|\|
730	die "Can't copy $t to $tmpdir/tarfiles:$!\n";
731	}
732
733	for my $r (@rpms) {
734	die "rpm '$r' unreadable: $!\n" unless -r $r;
735	unless (-d "$tmpdir/rpms") {
736	mkdir("$tmpdir/rpms") \|\|
737	die "Can't create $tmpdir/rpms:$!\n";
738	}
739	copy($r, "$tmpdir/rpms") \|\|
740	die "Can't copy $r to $tmpdir/rpms:$!\n";
741	}
742
743	exit(0) unless $startem;
744
745	my %started; # If $started{$tb} then $tb successfully started
746	my %child; # If $child{$pid} then a process with that pid is
747	# working on a starting a segment
748	my $nworking = 0; # Number of children working on swapin
749	my $pid; # Scratch variable for pids
750
751	# Start up the slave sub-experiments first
752	TESTBED:
753	for my $tb (keys %allocated) {
754	if ( $tb ne $master ) {
755	while ( $nworking == $max_children ) {
756	print "Waiting for a child process to complete\n" if $verbose;
757	if (($pid = wait()) != -1 ) {
758	# The $? >> 8 is the exit code of the subprocess, which is
759	# non-zero if the &start_segment routine failed.
760	my $exit_code = ($? >> 8);
761
762	print "Child $pid completed exit code ($exit_code)\n"
763	if $verbose;
764	$nworking--;
765	$started{$child{$pid}}++ unless $exit_code;
766	if ($child{$pid} ) { delete $child{$pid}; }
767	else { warn "Reaped a pid we did not start?? ($pid)\n"; }
768	last TESTBED if $exit_code;
769	}
770	else { warn "wait returned without reaping: $!\n"; }
771	}
772	if ( $pid = fork() ) {
773	# Parent process
774	$nworking ++;
775	$child{$pid} = $tb;
776	print "Started process $pid to start testbed $tb\n"
777	if $verbose;
778	}
779	else {
780	# Child. Note that we reverse the sense of the return code when it
781	# becomes an exit value. Zero exit values indicate success.
782	exit(!&start_segment($tb, $eid, $tbparams, $timeout));
783	}
784	}
785	}
786
787	# Now wait for any still running processes.
788	while ( $nworking ) {
789	print "Waiting for a child process to complete ($nworking running)\n"
790	if $verbose;
791	if (($pid = wait()) != -1 ) {
792	# The $? >> 8 is the exit code of the subprocess, which is
793	# non-zero if the &start_segment routine failed.
794	my $exit_code = ($? >> 8);
795
796	print "Child $pid completed exit code ($exit_code)\n"
797	if $verbose;
798	$nworking--;
799	$started{$child{$pid}}++ unless $exit_code;
800	if ($child{$pid} ) { delete $child{$pid}; }
801	else { warn "Reaped a pid we did not start?? ($pid)\n"; }
802	}
803	else { warn "wait returned without reaping: $!\n"; }
804	}
805
806	# Now the master
807	if (&start_segment($master, $eid, $tbparams, $timeout)) {
808	$started{$master}++;
809	}
810
811	# If any testbed failed, swap the rest out.
812	if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
813	for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
814	print "Error starting experiment\n";
815	exit(1);
816	}
817	print "Experiment started\n";
818	print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
819	system("rm -rf $tmpdir") unless $debug;
820	exit(0); # set the exit value
821
822	=pod
823
824	=head1 NAME
825
826	B<splitter.pl>
827
828	=head1 SYNOPSIS
829
830	B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
831	[B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]
832
833	=head1 DESCRIPTION
834
835	B<splitter.pl> invokes the DETER experiment parser to split an annotated
836	experiment into multiple sub-experments and instantiates the sub-experiments on
837	their intended testbeds. Annotation is accomplished using the
838	tb-set-node-testbed command, added to the parser.
839
840	Much of the script's behavior depends on the configuration file, specified with
841	the B<-c> flag and defaulting to F<./splitter.conf>.
842
843	The testbed labels supplied in the B<tb-set-node-testbed> command are
844	meaningful based on their presence in the testbeds file. that file can be
845	specified in the configuration file using the B<Testbeds> directive, and
846	defaults to F<./testbeds>. The syntax is described below.
847
848	Most of the intermediate files are staged in a sub-directory of a temporary
849	files directory and deleted at the end of the script. Specifying the B<-d>
850	flag on the command line avoids the deletion for debbugging. By default the
851	temporary files directory is directory is F</tmp> and can be reset in the
852	configuration file using the B<Tmpdir> directive. Intermediate files are
853	stored under a subdirectory formed by adding the process ID of the splitter
854	process. For example, if the temporary files directory is F</tmp> and the
855	B<splitter.pl> process ID is 2323, the temporary files will be stored in
856	F</tmp/split2323/>.
857
858	The expreriment is split out into one experiment description per testbed in the
859	temporary directory named as F<experiment.testbed.tcl> where the experiment is
860	the experiment ID given in the configuration file, and the testbed is the
861	tb-set-node-testbed parameter for the nodes in the file.
862
863	If the B<-n> option is absent the sub-experiments are then instantiated on
864	their testbeds. (Here B<-n> is analogous to its use in L<make(1)>).
865	Per-testbed parameters are set in the testbeds file. Sub-experiments on
866	slave testbeds are instantiated in a random order, but the master testbed is
867	currently instantiated last.
868
869	Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
870	and the maximum number of simultaneous processes to start.
871
872	Scripts to start federation (the federation kit) are copied into the local
873	experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>. These are
874	taken from the directory given by the B<ScriptDir> directive in the
875	configuration file.
876
877	If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
878	F<splitter.conf>.
879
880	If any sub-experiment fails to instantiate, the other sub-exeriments are
881	swapped out. B<-F> avoids this swap out, which can also be specified as
882	B<SoftFail: true> in F<splitter.conf>
883
884	=head2 Configuration File
885
886	The configuration file is a simple set of colon-separated parameters and
887	values. A configuration file must be present, either specified in the B<-c>
888	flag or the default F<./splitter.conf>. All the parameter names are case
889	insensitive, but should not include any whitespace. Parameter values may
890	include whitespace, but no newlines.
891
892	Possible parameters are:
893
894	=over 5
895
896	=item Experiment
897
898	The name of the experiment on the various testbeds
899
900	=item Master
901
902	The master testbed label from the testbeds file, described below.
903
904	=item Testbeds
905
906	The testbeds file described below, giving per-testbed parameters. If this
907	directive is absent the testbeds file defaults to F<./testbeds>
908
909	=item ScriptDir
910
911	Location of the default federation scripts, i.e. the federation kit.
912
913	=item GatewayPubkey
914
915	=item GatewaySecretKey
916
917	The names of the files containing secret and public keys to use in setting up
918	tunnels between testbeds. These will eventually be automatically generated.
919
920	=item TmpDir
921
922	=item TempDir
923
924	The directory where temporary files are created. These are synonyms, but
925	should both be specified, B<TmpDir> has priority. If neither is specified,
926	F</tmp> is used.
927
928	=item SMBShare
929
930	The SMB share on the master testbed that will be exported to remote clients.
931
932	=item SMBUser
933
934	The experiment user to mount project directories as. This user needs to be a
935	member of the exported experiment - that is one of the users in the project
936	containing this experiment on the master testbed.
937
938	=item Timeout
939
940	Value in seconds after which a swap-in operatioin will be considered a success.
941	Often long swap-ins will hang when there are partial failures. This works
942	around this issue. (This behavior can be requested on the command line by
943	specifying B<-t> I<secs>.)
944
945	=item FailSoft
946
947	If not set, failure of any sub experiment swaps the rest out. Setting this to
948	any value avoids this swap out. (This behavior can be requested on the command
949	line by specifying B<-F>.)
950
951	=item MuxLimit
952
953	The maximum bumber of links/lans carried by one gateway pair
954
955	=item Tclparse
956
957	The pathname to the experiment parsing program. Only developers should set
958	this.
959
960	=item Tclsh
961
962	The pathname to the local oTcl shell. Only developers should set
963	this.
964
965	=back
966
967	=head2 Testbeds file
968
969	The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
970	scoped attribute-value pairs where each attribute is specified on a separate
971	line of the configuration file. Each testbed's parameters are preceeded by the
972	testbed label in brackets ([]) on a line by itself. After that the parameters
973	are specified as parameter: value. This is essentially the same format as the
974	configuration file. Parameters are:
975
976	=over 4
977
978	=item User
979
980	The user under which to make requests to this testbed. The user running
981	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
982	testbed.
983
984	=item OpsNode
985
986	The host name of the testbed's ops node. The user calling B<splitter.pl> must
987	be able to execute commands on this host via L<ssh(1)>.
988
989	=item Domain
990
991	The domain of nodes in this testbed (including the ops host). This parameter
992	should always start with a period.
993
994	=item Project
995
996	The project under which to instantiate sub-experiments on this testbed.
997
998	=item ConnectorType
999
1000	The node type for inter-testbed connector nodes on this testbed.
1001
1002	=item SlaveNodeStartCmd
1003
1004	The start command to run on experimental nodes when this testbed is used as a
1005	slave. In all the start commands the following string substitutions are made:
1006
1007	=over 10
1008
1009	=item FEDDIR
1010
1011	The local experiment's federation scripts directory. Each local experiment
1012	will have this replaced by the scripts directory on its local boss.
1013
1014	=item GWCONF
1015
1016	The full pathname of the gateway configuration file. As with FEDDIR, this is
1017	on the local boss.
1018
1019	=item PROJDIR
1020
1021	The project directory on the local boss.
1022
1023	=item EID
1024
1025	The local experiment name.
1026
1027	=back
1028
1029	All startcmds specified in F<testbeds> undergo these expansions.
1030
1031	=item SlaveConnectorStartCmd
1032
1033	The start command to run on gateway nodes when this testbed is used as a slave.
1034	The same string substitutions are made in this command as in SlaveNodeStartCmd.
1035
1036	=item MasterNodeStartCmd
1037
1038	The start command to run on experimental nodes when this testbed is used as a
1039	master. The same string substitutions are made in this command as in
1040	SlaveNodeStartCmd.
1041
1042	=item MasterConnectorStartCmd
1043
1044	The start command to run on gateway nodes when this testbed is used as a
1045	master. The same string substitutions are made in this command as in
1046	SlaveNodeStartCmd.
1047
1048	=item ConnectorImage
1049
1050	The disk image to be loaded on a gateway node on this testbed.
1051
1052	=item FileServer
1053
1054	The node in the master testbed from which filesystems are mounted.
1055
1056	=item Boss
1057
1058	The node in the master testbed that controls the testbed.
1059
1060	=item TunnelCfg
1061
1062	True if the connector needs to do DETER federation. This parameter will
1063	probably be removed.
1064
1065
1066	=back
1067
1068	=head1 ENVIRONMENT
1069
1070	B<splitter.pl> does not directly make use of environment variables, but calls
1071	out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1072	environment.
1073
1074	=head1 BUGS
1075
1076	A deprecated B<-N> flag was used to select testbeds file format. Only one
1077	format is supported now, and B<-N> generates a warning, but otherwise does not
1078	affect B<splitter.pl>.
1079
1080	=head1 SEE ALSO
1081
1082	L<sh(1)>, L<ssh(1)>
1083
1084	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: