Context Navigation

source: fedkit/splitter.pl @ 5479c80

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02

Last change on this file since 5479c80 was 5479c80, checked in by Ted Faber <faber@…>, 17 years ago
Timeout critical sshes in setting up experiments. The commands don't seem to terminate when partial swap-ins are accomplished, or perhaps after long swap-ins. This provides a way to move on.
Property mode set to `100644`
File size: 34.6 KB

Line
1	#!/usr/bin/perl
2
3	use strict;
4
5	use Getopt::Std;
6	use IO::File;
7	use IO::Dir;
8	use IO::Pipe;
9	use File::Copy;
10
11	my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12	my $local_script_dir = ".";
13	my($pid, $gid); # Process and group IDs for calling parse.tcl
14	my $splitter_config; # Configuration file
15	my $debug; # True if thecalled in debug mode
16	my $verbose; # True for extra progress reports
17	my $startem; # If true, start the sub-experiments
18	my $eid; # Experiment ID
19	my $tcl; # The experiment description (topology file)
20	my $master; # Master testbed
21	my $tmpdir; # tmp files
22	my $tb_config; # testbed configurations
23	my $smb_share; # Share to mount from the master
24	my $project_user; # User to mount project dirs as
25	my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26	my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27	# basename)
28	my $tcl_splitter; # tcl program to split experiments
29	# (changed during devel)
30	my $tclsh; # tclsh to call directly (changed during devel)
31	my @tarfiles; # Tarfiles in use by this experiment
32	my $timeout; # The timeout to use for experiment swap ins
33	my %opts; # Parsed options
34
35	my $tbparams = {}; # Map of the per-testbed parameters from the
36	# testbeds file. It is a reference to a hash
37	# of hashes (because it's passed around a bunch
38	# and it's nicer to have one access pattern
39	# throughout the script, in the main loop and
40	# the subroutines). That access is exemplified
41	# by $tbparams->{'deter'}->{'domain'} which is
42	# the domain parameter of the DETER testbed.
43
44	# Parse the config file. The format is a colon-separated parameter name
45	# followed by the value of that parameter to the end of the line. This parses
46	# that format and puts the parameters into the referenced hash. Parameter
47	# names are mapped to lower case, parameter values are unchanged. Returns 0 on
48	# failure (e.g. file open) and 1 on success.
49	sub parse_config {
50	my($file, $href) = @_;
51	my $fh = new IO::File($file);
52
53	unless ($fh) {
54	warn "Can't open $file: $!\n";
55	return 0;
56	}
57
58	while (<$fh>) {
59	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
60	chomp;
61	/^([^:]+):\s(.)/ && do {
62	my $key = $1;
63
64	$key =~ tr/A-Z/a-z/;
65	$href->{$key} = $2;
66	next;
67	};
68	warn "Unparasble line in $file: $_\n";
69	}
70	$fh->close(); # It will close when it goes out of scope, but...
71	return 1;
72	}
73
74	# Parse an easier-to-read testbeds file (the original was comma-separated
75	# unadorned strings). The format is a testbed scope as [testbed] followed by
76	# the colon-separated attribute-value pairs for the testbed. Right now these
77	# go into a set of global hashes indexed by testbed, but that should probably
78	# change.
79
80	sub parse_testbeds {
81	my($file, $tbparams) = @_; # Testbeds file and parameter hash
82	my $fh = new IO::File($file); # Testbeds filehandle
83	my $tb; # Current testbed
84	# Convert attribute in the file to global variable name. XXX: Again, this
85	# needs to be a 2-level hash
86	my %attr_to_hash = (
87	"opsnode" => "host",
88	"user" => "user",
89	"domain" => "domain",
90	"project" => "project",
91	"connectortype" => "gwtype",
92	"slavenodestartcmd" => "expstart",
93	"slaveconnectorstartcmd" => "gwstart",
94	"masternodestartcmd" => "mexpstart",
95	"masterconnectorstartcmd" => "mgwstart",
96	"connectorimage" => "gwimage",
97	"fileserver" => "fs",
98	"boss" => "boss",
99	"tunnelcfg" => "tun"
100	);
101
102
103	unless ($fh) {
104	warn "Can't open $file: $!\n";
105	return 0;
106	}
107
108	while (<$fh>) {
109	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
110	chomp;
111	/^\s\[(.)\]/ && do {
112	$tb = $1;
113	$tbparams->{$tb} = {} unless $tbparams->{$tb};
114	next;
115	};
116
117	/^([^:]+):\s(.)/ && do {
118	unless ($tb) {
119	warn "Ignored attribute definition before testbed " .
120	"defined in $file: $_\n";
121	next;
122	}
123	my $key = $1;
124	$key =~ tr/A-Z/a-z/;
125	my $var = $attr_to_hash{$key};
126
127	if ($var) { $tbparams->{$tb}->{$var} = $2; }
128	else { warn "Unknown keyword $key in $file\n"; }
129
130	next;
131	};
132	warn "Unparasble line in $file: $_\n";
133	}
134	$fh->close(); # It will close when it goes out of scope, but...
135	return 1;
136	}
137
138
139	# use scp to transfer a file, reporting true if successful and false otherwise.
140	# Parameters are the local file name, the ssh host destination (either hostname
141	# oe user@host), and an optional destination file name or directory. If no
142	# destination is given, the file is transferred to the given user's home
143	# directory. If only a machine is given in the ssh host destination, the
144	# current user is used.
145	sub scp_file {
146	my($file, $user, $host, $dest) = @_;
147
148	# XXX system with a relative pathname is sort of gross
149	system("scp $file $user\@$host:$dest");
150	if ($?) {
151	warn "scp failed $?\n";
152	return 0;
153	}
154	else { return 1; }
155	}
156
157	# use ssh to execute the given command on the machine (and as the user) in
158	# $where. Parameters are the ssh destination directive ($where) and the
159	# command to execute, and a prefix to be placed on a message generated if the
160	# command fails. On failure print a warning if a warning prefix was given and
161	# return false. If timeout is given fork a process and set an alarm of that
162	# many seconds. Timeouts also return 0;
163	sub ssh_cmd {
164	my($user, $host, $cmd, $wname, $timeout) = @_;
165	my $pid; # Child pid
166
167	if ( $timeout ) {
168	if ( $pid = fork () ) {
169	# Parent process
170	# The eval acts as a signal catcher. If the alarm goes off inside
171	# the eval, the die will put "alarm\n" into $@, otherwise the
172	# return value of the execution in the child process will be used.
173	my $rv = eval {
174	local $SIG{'ALRM'} = sub{ die "alarm\n"; };
175	my $rv;
176
177	alarm $timeout;
178	$rv = waitpid($pid, 0);
179	alarm 0;
180	$rv;
181	};
182
183	# If the eval succeeded, $@ will be null and we can use $rv, which
184	# is the return code from the subprocess. If the eval timed out,
185	# print a warning and assume the best.
186	if ($@ eq "alarm\n" ) {
187	warn "$wname timed out - pid $pid still live\n";
188	return 1;
189	}
190	else {
191	return $rv;
192	}
193	}
194	else {
195	# Child process
196	# XXX system with a relative pathname is sort of gross
197	exec("ssh $user\@$host $cmd");
198	exit 0;
199	}
200	}
201	else {
202	# No timeout, do all the work here.
203	# XXX system with a relative pathname is sort of gross
204	system ("ssh $user\@$host $cmd");
205	if ($?) {
206	warn "$wname failed $?\n" if $wname;
207	return 0;
208	}
209	else { return 1; }
210	}
211	}
212
213	# Ship local copies of the federation scripts out to the given host. If any of
214	# the script transfers fails, return 0. The scripts to transfer are from the
215	# global @scripts and are found locally in $local_script_dir (another global).
216	sub ship_scripts {
217	my($host, $user, $dest_dir) = @_; # Where, who, where remotely
218	my $s;
219
220	&ssh_cmd($user, $host, "mkdir -p $dest_dir");
221	for $s (@scripts) {
222	&scp_file("$local_script_dir/$s", $user, $host, $dest_dir) \|\|
223	return 0;
224	}
225	return 1;
226	}
227
228	# Ship per-testbed configuration generated by this script to the remote /proj
229	# directories on the remote testbeds
230	sub ship_configs {
231	my($host, $user, $src_dir, $dest_dir) = @_; # Where, who, where remotely
232	my($d, $f);
233
234	$d = IO::Dir->new($src_dir) \|\| return 0;
235
236	# All directories under $tmpdir are 770 so we can delete them later.
237	&ssh_cmd($user, $host, "mkdir -p $dest_dir") \|\| return 0;
238	&ssh_cmd($user, $host, "chmod 770 $dest_dir") \|\| return 0;
239	while ( $f = $d->read()) {
240	next if $f =~ /^\./;
241	if ( -d "$src_dir/$f" ) {
242	&ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") \|\|
243	return 0;
244	}
245	else {
246	&scp_file("$src_dir/$f", $user, $host, $dest_dir) \|\| return 0;
247	}
248	}
249	return 1;
250	}
251
252	# Start a sub section of the experiment on a given testbed. The testbed and
253	# the user to start the experiment as are pulled from the global per-testbed
254	# hash, passed in as $tbparams, as is the project name on the remote testbed.
255	# Parameters are the testbed and the experiment id. Configuration files are
256	# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
257	# Then the current state of the experiment determined using expinfo. From that
258	# state, the experiment is either created, modified or spapped in. If
259	# everything succeeds, true is returned. If the global verbose is set progress
260	# messages are printed.
261	sub start_segment {
262	my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
263	# per-testbed parameters and remote
264	# swap-in timeout
265	my $host = # Host name of remote ops (FQDN)
266	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
267	my $user = $tbparams->{$tb}->{'user'}; # user to pass to ssh
268	my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
269	# experiment under
270	my $tclfile = "./$eid.$tb.tcl"; # Local tcl file with the
271	# sub-experiment
272	my $proj_dir = "/proj/$pid/exp/$eid/tmp"; # Where to stash federation stuff
273	my $tarfiles_dir = "/proj/$pid/tarfiles/$eid"; # Where to stash tarfiles
274	my $to_hostname = "$proj_dir/hosts"; # remote hostnames file
275	my $state; # State of remote experiment
276	my $status = new IO::Pipe; # The pipe to get status
277
278	# Determine the status of the remote experiment
279	$status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") \|\|
280	die "Can't ssh to $user\@$host:$!\n";
281
282	# XXX: this is simple now. Parsing may become more complex
283	while (<$status>) {
284	/State: (\w+)/ && ($state = $1);
285	/No\s+such\s+experiment/ && ($state = "none");
286	}
287	$status->close();
288	print "$tb: $state\n";
289
290	# Copy the experiment definition data over
291	print "transferring subexperiment to $tb\n" if $verbose;
292	&scp_file("$tmpdir/$tb/$tclfile", $user, $host) \|\| return 0;
293	# Clear out any old experiment data; if not deleted, copies over it by
294	# different users will fail.
295	# (O /bin/csh, how evil thou art. The -c and the escaped single quotes
296	# force the /bin/sh interpretation of the trailing * (which we need to keep
297	# tmp around)) Again, this needs to be done more properly once we have a
298	# non-ssh interface here.)
299	print "clearing experiment subdirs on $tb\n" if $verbose;
300	&ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") \|\|
301	return 0;
302	print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
303	&ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") \|\|
304	return 0;
305	print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
306	&ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") \|\|
307	return 0;
308	# Remote experiment is active. Modify it.
309	if ($state eq "active") {
310	print "Transferring federation support files to $tb\n" if $verbose;
311	# First copy new scripts and hostinfo into the remote /proj
312	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
313	return 0;
314	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
315	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
316	if ( -d "$tmpdir/tarfiles") {
317	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
318	return 0;
319	}
320
321	print "Modifying $eid in place on $tb\n" if $verbose;
322	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " .
323	"$eid $tclfile", "modexp", $timeout) \|\| return 0;
324	return 1;
325	}
326
327	# Remote experiment is swapped out, modify it and swap it in.
328	if ($state eq "swapped") {
329	print "Transferring federation support files to $tb\n" if $verbose;
330	# First copy new scripts and hostinfo into the remote /proj (because
331	# the experiment exists, the directory tree should be there.
332	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
333	return 0;
334	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
335	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
336	if ( -d "$tmpdir/tarfiles") {
337	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
338	return 0;
339	}
340
341	print "Modifying $eid on $tb\n" if $verbose;
342	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile",
343	"modexp") \|\| return 0;
344	print "Swapping $eid in on $tb\n" if $verbose;
345	# Now start up
346	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
347	"swapexp", $timeout) \|\| return 0;
348	return 1;
349	}
350
351	# No remote experiment. Create one. We do this in 2 steps so we can put
352	# the configuration files and scripts into the new experiment directories.
353	if ($state eq "none") {
354
355	if ( -d "$tmpdir/tarfiles") {
356	# Tarfiles have to exist for the creation to work
357	print "copying tarfiles to $tb\n";
358	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
359	return 0;
360	}
361	print "Creating $eid on $tb\n" if $verbose;
362	&ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " .
363	"$pid -e $eid $tclfile", "startexp") \|\| return 0;
364	print "Transferring federation support files to $tb\n" if $verbose;
365	# First copy new scripts and hostinfo into the remote /proj
366	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
367	return 0;
368	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
369	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
370	# Now start up
371	print "Swapping $eid in on $tb\n" if $verbose;
372	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
373	"swapexp", $timeout) \|\| return 0;
374	return 1;
375	}
376
377	# Every branch for a known state returns. If execution gets here, the
378	# state is unknown.
379	warn "unknown state: $state\n";
380	return 0;
381	}
382
383	# Swap out a sub-experiment - probably because another has failed. Arguments
384	# are testbed and experiment. Most of the control flow is similar to
385	# start_segment, though much simpler.
386	sub stop_segment {
387	my($tb, $eid, $tbparams) = @_; # testbed, experiment ID and
388	# per-testbed parameters
389	my $user = $tbparams->{$tb}->{'user'}; # testbed user
390	my $host = # Ops node
391	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
392	my $pid = $tbparams->{$tb}->{'project'};# testbed project
393
394	print "Stopping $eid on $tb\n" if $verbose;
395	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out",
396	"swapexp (out)") \|\| return 0;
397	return 1;
398	}
399
400
401	$pid = $gid = "dummy"; # Default project and group to pass to
402	# $tcl_splitter above. These are total
403	# dummy arguments; the splitter doesn't
404	# use them at all, but we supply them to
405	# keep our changes to the parser minimal.
406	# Argument processing.
407	getopts('t:c:f:ndvN', \%opts);
408	$splitter_config = $opts{'c'} \|\| "./splitter.conf";
409	$debug = $opts{'d'};
410	$verbose = $opts{'v'} \|\| $opts{'d'};
411
412	&parse_config("$splitter_config", \%opts) \|\|
413	die "Cannot read config file $splitter_config: $!\n";
414
415
416	$startem = $opts{'n'} ? 0 : 1; # If true, start the sub-experiments
417	$timeout = $opts{'t'} \|\| $opts{'timeout'};
418	$eid = $opts{'experiment'}; # Experiment ID
419	$tcl = $opts{'f'} \|\| shift; # The experiment description
420	$master = $opts{'master'}; # Master testbed
421	$tmpdir = $opts{'tmpdir'} \|\| $opts{'tempdir'}\|\| "/tmp"; # tmp files
422	$tb_config = $opts{'testbeds'} \|\| "./testbeds"; # testbed configurations
423	$local_script_dir = $opts{'scriptdir'}; # Local scripts
424
425	$smb_share = $opts{'smbshare'} \|\| # Share to mount from the master
426	die "Must give an SMB share\n";
427	$project_user = $opts{'smbuser'} \|\| # User to mount project dirs as
428	die "Must give an SMB user\n";
429
430	# For now specify these. We may want to generate them later.
431	$gw_pubkey = $opts{'gatewaypubkey'};
432	($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
433	$gw_secretkey = $opts{'gatewaysecretkey'};
434	($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
435
436	# tcl program to split experiments (changed during devel)
437	$tcl_splitter = $opts{'tclparse'} \|\| "/usr/testbed/lib/ns2ir/parse.tcl";
438	# tclsh to call directly (changed during devel)
439	$tclsh = $opts{'tclsh'} \|\| "/usr/local/bin/otclsh";
440
441	# Prefix to avoid collisions
442	$tmpdir .= "/split$$";
443
444	print "Temp files are in $tmpdir\n" if $verbose;
445	# Create a workspace
446	unless (-d "$tmpdir") {
447	mkdir("$tmpdir") \|\| die "Can't create $tmpdir: $!";
448	}
449
450	# Validate scripts directory
451	for my $s (@scripts) {
452	die "$local_script_dir/$s not in local script directory. Try -d\n"
453	unless -r "$local_script_dir/$s";
454	}
455
456	die "Must supply file, master and experiment" unless $master && $tcl && $eid;
457
458	if ($opts{'N'} ) {
459	&parse_testbeds($tb_config, $tbparams) \|\|
460	die "Cannot testbed congfigurations from $tb_config: $!\n";
461	}
462	else {
463	# Read a hash of per-testbed parameters from the local configurations.
464	my $conf = new IO::File($tb_config) \|\|
465	die "can't read testbed configutions from $tb_config: $!\n";
466	while (<$conf>) {
467	next if /^#/;
468	chomp;
469	my($tb, $h, $d, $u, $p, $es, $gs, $mes, $mgs, $t, $i, $fs, $boss,
470	$tun) = split(":", $_);
471	$tbparams->{$tb}->{'host'} = $h;
472	$tbparams->{$tb}->{'user'} = $u;
473	$tbparams->{$tb}->{'domain'} = $d;
474	$tbparams->{$tb}->{'project'} = $p;
475	$tbparams->{$tb}->{'gwtype'} = $t;
476	$tbparams->{$tb}->{'expstart'} = $es;
477	$tbparams->{$tb}->{'gwstart'} = $gs;
478	$tbparams->{$tb}->{'mexpstart'} = $mes;
479	$tbparams->{$tb}->{'mgwstart'} = $mgs;
480	$tbparams->{$tb}->{'gwimage'} = $i;
481	$tbparams->{$tb}->{'fs'} = $fs;
482	$tbparams->{$tb}->{'boss'} = $boss;
483	$tbparams->{$tb}->{'tun'} = $tun;
484
485	# Make sure the domain starts with a period
486	$tbparams->{$tb}->{'domain'} = "." . $tbparams->{$tb}->{'domain'}
487	unless $tbparams->{$tb}->{'domain'} =~ /^\./;
488	}
489	$conf->close();
490	}
491
492	# Open a pipe to the splitter program and start it parsing the experiments
493	my $pipe = new IO::Pipe;
494	# NB no more -p call on parse call.
495	$pipe->reader("$tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl") \|\|
496	die "Cannot execute $tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl:$!\n";
497
498	# Parsing variables
499	my $ctb; # Current testbed
500	my %allocated; # If allocated{$tb} > 0, $tb is in use
501	my $destfile; # File that the sub-experiment tcl file is
502	# being written to, or "" if none. Also used
503	# for hostnames file.
504	my $desthandle; # File handle for distfile
505	my $gateways; # when gateway lists are being processed this
506	# is the testbed whose gateways are being
507	# gathered.
508	my $control_gateway; # Control net gateway for the current testbed
509	my %active_end; # If active_end{"a-b"} > 0 then a is the active
510	# end of the a <-> b connector pair.
511
512	# Parse the splitter output. This loop creates the sub experiments, gateway
513	# configurations and hostnames file
514	while (<$pipe>) {
515	# Start of a sub-experiment
516	/^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
517	$ctb = $1;
518
519	# If we know the testbed, start collecting its sub experiment tcl
520	# description. If not, warn the caller and ignore the configuration of
521	# this testbed.
522	if ($tbparams->{$ctb}) {
523	$allocated{$ctb}++; # Keep track of the testbeds allocated
524
525	unless (-d "$tmpdir/$ctb") {
526	mkdir("$tmpdir/$ctb") \|\| die "Can't create $tmpdir/$ctb: $!";
527	}
528	$destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
529
530	$desthandle = new IO::File(">$destfile") \|\|
531	die "Cannot open $destfile:$!\n";
532	}
533	else {
534	warn "No such testbed $ctb\n";
535	$destfile = "";
536	}
537	next;
538	};
539
540	# End of that experiment
541	/^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
542	# Simple syntax check and close out this experiment's tcl description
543	die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
544	$desthandle->close();
545	$destfile = $ctb = "";
546	next;
547	};
548
549	# Beginning of a gateway set
550	/^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
551	$gateways = $1;
552	# If we've heard of this tb, create the config lines for it one at a
553	# time.
554	if ($allocated{$gateways}) {
555	# Just in case. This directory should already have been created
556	# above.
557	unless (-d "$tmpdir/$gateways") {
558	mkdir("$tmpdir/$gateways") \|\|
559	die "Can't create $tmpdir/$gateways: $!";
560	}
561	}
562	else {
563	warn "Gateways given (and ignored) for testbed not in use: " .
564	"$gateways\n";
565	$gateways = 0;
566	}
567	next;
568	};
569	# End of the gateways section. Output the client config for this testbed
570	/^#\s+End\s+gateways\s+\((\w+)\)/ && do {
571	die "Mismatched gateway markers ($1, $gateways)\n"
572	unless !$gateways \|\| $gateways == $1;
573
574	if ($control_gateway ) {
575	# Client config
576	my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
577	my $master_project = $tbparams->{$master}->{'project'};
578	die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
579	print $cc "ControlGateway: $control_gateway\n";
580	print $cc "SMBShare: $smb_share\n";
581	print $cc "ProjectUser: $project_user\n";
582	print $cc "ProjectName: $master_project\n";
583	$cc->close();
584	}
585	else { warn "No control gateway for $gateways?\n"; }
586
587	$gateways = 0;
588	next;
589	};
590	# Beginning of the hostnames list. Collection is always in the hostnames
591	# file.
592	/^#\s+Begin\s+hostnames/ && do {
593	$destfile = "$tmpdir/hostnames";
594	$desthandle = new IO::File(">$destfile") \|\|
595	die "Can't open $destfile:$!\n";
596	next;
597	};
598	# end of the hostnames list.
599	/^#\s+End\s+hostnames/ && do {
600	$desthandle->close();
601	$destfile = "";
602	next;
603	};
604
605	# Generate gateway configuration info, one file per line
606	$gateways && do {
607	chomp;
608	my($dtb, $myname, $desthost, $type) = split(" ", $_);
609
610	# Many of these are to simplify print statements
611	my $sdomain = # domain for the source
612	$tbparams->{$gateways}->{'domain'};
613	my $ddomain = # domain for the destination
614	$tbparams->{$dtb}->{'domain'};
615	my $sproject = # Project of the destination
616	$tbparams->{$gateways}->{'project'};
617	my $fs = # Master fs node (FQDN)
618	$tbparams->{$master}->{'fs'} . $tbparams->{$master}->{'domain'};
619	my $boss = # Master boss node (FQDN)
620	$tbparams->{$master}->{'boss'} . $tbparams->{$master}->{'domain'};
621	my $remote_script_dir = # Remote fed script location
622	"/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
623	my $active; # Is this the active side of
624	# the connector?
625
626	$sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
627	$ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
628
629	my $conf_file = "$myname$sdomain.gw.conf";
630	# translate to lower case so the `hostname` hack for specifying
631	# configuration files works.
632	$conf_file =~ tr/A-Z/a-z/;
633
634	# If either end of this link is in the master side of the testbed, that
635	# side is the active end. Otherwise the first testbed encountered in
636	# the file will be the active end. The $active_end variable keeps
637	# track of those decisions
638	if ( $dtb eq $master ) { $active = "false"; }
639	elsif ($gateways eq $master ) { $active = "true"; }
640	elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
641	else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
642
643	# This is used to create the client configuration.
644	$control_gateway = "$myname$sdomain"
645	if $type =~ /(control\|both)/;
646
647	# Write out the file
648	my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")\|\|
649	die "can't open $tmpdir/$gateways/$conf_file: $!\n";
650
651	print $gwconfig "Active: $active\n";
652	print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
653	print $gwconfig "BossName: $boss\n";
654	print $gwconfig "FsName: $fs\n";
655	print $gwconfig "Type: $type\n";
656	print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
657	print $gwconfig "Peer: $desthost$ddomain\n";
658	print $gwconfig "Pubkeys: " .
659	"/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
660	print $gwconfig "Privkeys: " .
661	"/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
662	$gwconfig->close();
663
664	# This testbed has a gateway (most will) so make a copy of the keys it
665	# needs in this testbed's subdirectory. start_segment will transfer
666	# them.
667	unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
668	copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") \|\|
669	die "Can't copy pubkeys ($gw_pubkey to " .
670	"$tmpdir/$gateways/$gw_pubkey_base): $!\n";
671	}
672	if ($active eq "true" ) {
673	unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
674	copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") \|\|
675	die "Can't copy secret keys ($gw_secretkey to " .
676	"$tmpdir/$gateways/$gw_secretkey_base): $!\n";
677	}
678	}
679
680	#done processing gateway entry, ready for next line
681	next;
682	};
683	(/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
684	next if /^#/;
685	chomp;
686	push(@tarfiles, $_);
687	next;
688	};
689
690	next unless $destfile; # Unidentified testbed, ignore config
691	# local copies that can be used in the substitutions below
692	my $gwtype = $tbparams->{$ctb}->{'gwtype'};
693	my $gwimage = $tbparams->{$ctb}->{'gwimage'};
694	my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
695	my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
696	my $gwstart = $tbparams->{$ctb}->{'gwstart'};
697	my $expstart = $tbparams->{$ctb}->{'expstart'};
698	my $project = $tbparams->{$ctb}->{'project'};
699
700	# Substitute variables
701	s/GWTYPE/$gwtype/g;
702	s/GWIMAGE/$gwimage/g;
703	if ($ctb eq $master ) {
704	s/GWSTART/$mgwstart/g;
705	s/EXPSTART/$mexpstart/g;
706	}
707	else {
708	s/GWSTART/$gwstart/g;
709	s/EXPSTART/$expstart/g;
710	}
711	# XXX: oh is this bad
712	s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
713	s#PROJDIR#/proj/$project/#g;
714	s#EID#$eid#g;
715	s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
716	print $desthandle $_;
717	}
718	$pipe->close();
719	die "No nodes in master testbed ($master)\n" unless $allocated{$master};
720
721	for my $t (@tarfiles) {
722	die "tarfile '$t' unreadable: $!\n" unless -r $t;
723	unless (-d "$tmpdir/tarfiles") {
724	mkdir("$tmpdir/tarfiles") \|\|
725	die "Can't create $tmpdir/tarfiles:$!\n";
726	}
727	copy($t, "$tmpdir/tarfiles") \|\|
728	die "Can't copy $t to $tmpdir/tarfiles:$!\n";
729	}
730
731	exit(0) unless $startem;
732
733	my %started; # If $started{$tb} then $tb successfully started
734
735	# Start up the slave sub-experiments first
736	TESTBED:
737	for my $tb (keys %allocated) {
738	if ($tb ne $master) {
739	if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
740	else { last TESTBED; }
741	}
742	}
743
744	# Now the master
745	if (&start_segment($master, $eid, $tbparams, $timeout)) {
746	$started{$master}++;
747	}
748
749	# If any testbed failed, swap the rest out.
750	if ( scalar(keys %started) != scalar(keys %allocated)) {
751	for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
752	print "Error starting experiment\n";
753	exit(1);
754	}
755	print "Experiment started\n";
756	print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
757	system("rm -rf $tmpdir") unless $debug;
758	exit(0); # set the exit value
759
760	=pod
761
762	=head1 NAME
763
764	B<splitter.pl>
765
766	=head1 SYNOPSIS
767
768	B<splitter.pl> [B<-nd>] [B<-c> F<config_file>] [B<-f> F<experiment_tcl>]
769	[F<experiment_tcl>]
770
771	=head1 DESCRIPTION
772
773	B<splitter.pl> invokes the DETER experiment parser to split an annotated
774	experiment into multiple sub-experments and instantiates the sub-experiments on
775	their intended testbeds. Annotation is accomplished using the
776	tb-set-node-testbed command, added to the parser.
777
778	Much of the script's behavior depends on the configuration file, specified with
779	the B<-c> flag and defaulting to F<./splitter.conf>.
780
781	The testbed labels supplied in the B<tb-set-node-testbed> command are
782	meaningful based on their presence in the testbeds file. that file can be
783	specified in the configuration file using the B<Testbeds> directive, and
784	defaults to F<./testbeds>. The syntax is described below.
785
786	Most of the intermediate files are staged in a sub-directory of a temporary
787	files directory and deleted at the end of the script. Specifying the B<-d>
788	flag on the command line avoids the deletion for debbugging. By default the
789	temporary files directory is directory is F</tmp> and can be reset in the
790	configuration file using the B<Tmpdir> directive. Intermediate files are
791	stored under a subdirectory formed by adding the process ID of the splitter
792	process. For example, if the temporary files directory is F</tmp> and the
793	B<splitter.pl> process ID is 2323, the temporary files will be stored in
794	F</tmp/split2323/>.
795
796	The expreriment is split out into one experiment description per testbed in the
797	temporary directory named as F<experiment.testbed.tcl> where the experiment is
798	the experiment ID given in the configuration file, and the testbed is the
799	tb-set-node-testbed parameter for the nodes in the file.
800
801	If the B<-n> option is absent the sub-experiments are then instantiated on
802	their testbeds. (Here B<-n> is analogous to its use in L<make(1)>).
803	Per-testbed parameters are set in the testbeds file. Sub-experiments on
804	slave testbeds are instantiated in a random order, but the master testbed is
805	currently instantiated last.
806
807	Scripts to start federation (the federation kit) are copied into the local
808	experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>. These are
809	taken from the directory given by the B<ScriptDir> directive in the
810	configuration file.
811
812	If any sub-experiment fails to instantiate, the other sub-exeriments are
813	swapped out.
814
815	=head2 Configuration File
816
817	The configuration file is a simple set of colon-separated parameters and
818	values. A configuration file must be present, either specified in the B<-c>
819	flag or the default F<./splitter.conf>. All the parameter names are case
820	insensitive, but should not include any whitespace. Parameter values may
821	include whitespace, but no newlines.
822
823	Possible parameters are:
824
825	=over 5
826
827	=item Experiment
828
829	The name of the experiment on the various testbeds
830
831	=item Master
832
833	The master testbed label from the testbeds file, described below.
834
835	=item Testbeds
836
837	The testbeds file described below, giving per-testbed parameters. If this
838	directive is absent the testbeds file defaults to F<./testbeds>
839
840	=item ScriptDir
841
842	Location of the default federation scripts, i.e. the federation kit.
843
844	=item GatewayPubkey
845
846	=item GatewaySecretKey
847
848	The names of the files containing secret and public keys to use in setting up
849	tunnels between testbeds. These will eventually be automatically generated.
850
851	=item TmpDir
852
853	=item TempDir
854
855	The directory where temporary files are created. These are synonyms, but
856	should both be specified, B<TmpDir> has priority. If neither is specified,
857	F</tmp> is used.
858
859	=item SMBShare
860
861	The SMB share on the master testbed that will be exported to remote clients.
862
863	=item SMBUser
864
865	The experiment user to mount project directories as. This user needs to be a
866	member of the exported experiment - that is one of the users in the project
867	containing this experiment on the master testbed.
868
869	=item Tclparse
870
871	The pathname to the experiment parsing program. Only developers should set
872	this.
873
874	=item Tclsh
875
876	The pathname to the local oTcl shell. Only developers should set
877	this.
878
879	=back
880
881	=head2 Testbeds file
882
883	The configuration file (F<./testbeds> unless overridden by B<-c>) is a
884	colon-separated set of parameters keyed by testbed name. The fields, in order,
885	are:
886
887	=over 5
888
889	=item name
890
891	The testbed to which this line of parameters applies.
892
893	=item user
894
895	The user under which to make requests to this testbed. The user running
896	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
897	testbed.
898
899	=item host
900
901	The host name of the testbed's ops node. The user calling B<splitter.pl> must
902	be able to execute commands on this host via L<ssh(1)>.
903
904	=item domain
905
906	The domain of nodes in this testbed (including the ops host). This parameter
907	should always start with a period.
908
909	=item project
910
911	The project under which to instantiate sub-experiments on this testbed.
912
913	=item gateway type
914
915	The node type for inter-testbed gateway nodes on this testbed.
916
917	=item experiment start (slave)
918
919	The start command to run on experimental nodes when this testbed is used as a
920	slave. In all the start commands the string FEDDIR will be replaced by the
921	local experiment's federation scripts directory and the string GWCONF replaced
922	by the gatway configuration file.
923
924	=item gateway start (slave)
925
926	The start command to run on gateway nodes when this testbed is used as a slave.
927	The same string substitutions are made in this command as in experiment start.
928
929	=item experiment start (master)
930
931	The start command to run on experimental nodes when this testbed is used as a
932	master. The same string substitutions are made in this command as in
933	experiment start.
934
935	=item gateway start (master)
936
937	The start command to run on gateway nodes when this testbed is used as a
938	master. The same string substitutions are made in this command as in
939	experiment start.
940
941	=item gateway image
942
943	The disk image to be loaded on a gateway node on this testbed.
944
945	=item filesystem node
946
947	The node in the master testbed from which filesystems are mounted.
948
949	=item boss node
950
951	The node in the master testbed that controls the testbed.
952
953	=item tunnel configuration
954
955	True if the connector needs to do DETER federation. This parameter will
956	probably be removed.
957
958	=back
959
960	The parsing of the testbeds is extremely simple. Colons separate each
961	field and there is n provision for escaping them at this time.
962
963	=head2 New Testbeds File Format
964
965	The testbeds file has ben revamped to make it more human readable. The
966	parameters are now named and specified on separate lines of the configuration
967	file. Each testbed's parameters are preceeded by the testbed label in brackets
968	([]) on a line by itself. After that the parameters are specified as
969	parameter: value. This is essentially the same format as the configuration
970	file. Parameters are:
971
972	=over 4
973
974	=item User
975
976	The user under which to make requests to this testbed. The user running
977	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
978	testbed.
979
980	=item OpsNode
981
982	The host name of the testbed's ops node. The user calling B<splitter.pl> must
983	be able to execute commands on this host via L<ssh(1)>.
984
985	=item Domain
986
987	The domain of nodes in this testbed (including the ops host). This parameter
988	should always start with a period.
989
990	=item Project
991
992	The project under which to instantiate sub-experiments on this testbed.
993
994	=item ConnectorType
995
996	The node type for inter-testbed connector nodes on this testbed.
997
998	=item SlaveNodeStartCmd
999
1000	The start command to run on experimental nodes when this testbed is used as a
1001	slave. In all the start commands the string FEDDIR will be replaced by the
1002	local experiment's federation scripts directory and the string GWCONF replaced
1003	by the gatway configuration file.
1004
1005	=item SlaveConnectorStartCmd
1006
1007	The start command to run on gateway nodes when this testbed is used as a slave.
1008	The same string substitutions are made in this command as in experiment start.
1009
1010	=item MasterNodeStartCmd
1011
1012	The start command to run on experimental nodes when this testbed is used as a
1013	master. The same string substitutions are made in this command as in
1014	experiment start.
1015
1016	=item MasterConnectorStartCmd
1017
1018	The start command to run on gateway nodes when this testbed is used as a
1019	master. The same string substitutions are made in this command as in
1020	experiment start.
1021
1022	=item ConnectorImage
1023
1024	The disk image to be loaded on a gateway node on this testbed.
1025
1026	=item FileServer
1027
1028	The node in the master testbed from which filesystems are mounted.
1029
1030	=item Boss
1031
1032	The node in the master testbed that controls the testbed.
1033
1034	=item TunnelCfg
1035
1036	True if the connector needs to do DETER federation. This parameter will
1037	probably be removed.
1038
1039
1040	=back
1041
1042
1043	=head1 ENVIRONMENT
1044
1045	B<splitter.pl> does not directly make use of environment variables, but calls
1046	out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1047	environment.
1048
1049	=head1 SEE ALSO
1050
1051	L<sh(1)>, L<ssh(1)>
1052
1053	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: