Context Navigation

source: fedkit/splitter.pl @ e2a71ebe

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02

Last change on this file since e2a71ebe was e2a71ebe, checked in by Ted Faber <faber@…>, 17 years ago
remote rpms
Property mode set to `100644`
File size: 36.5 KB

Line
1	#!/usr/bin/perl
2
3	use strict;
4
5	use Getopt::Std;
6	use IO::File;
7	use IO::Dir;
8	use IO::Pipe;
9	use File::Copy;
10
11	my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12	my $local_script_dir = ".";
13	my($pid, $gid); # Process and group IDs for calling parse.tcl
14	my $splitter_config; # Configuration file
15	my $debug; # True if thecalled in debug mode
16	my $verbose; # True for extra progress reports
17	my $startem; # If true, start the sub-experiments
18	my $eid; # Experiment ID
19	my $tcl; # The experiment description (topology file)
20	my $master; # Master testbed
21	my $tmpdir; # tmp files
22	my $tb_config; # testbed configurations
23	my $smb_share; # Share to mount from the master
24	my $project_user; # User to mount project dirs as
25	my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26	my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27	# basename)
28	my $tcl_splitter; # tcl program to split experiments
29	# (changed during devel)
30	my $tclsh; # tclsh to call directly (changed during devel)
31	my @tarfiles; # Tarfiles in use by this experiment
32	my @rpms; # Rpms in use by this experiment
33	my $timeout; # The timeout to use for experiment swap ins
34	my %opts; # Parsed options
35
36	my $tbparams = {}; # Map of the per-testbed parameters from the
37	# testbeds file. It is a reference to a hash
38	# of hashes (because it's passed around a bunch
39	# and it's nicer to have one access pattern
40	# throughout the script, in the main loop and
41	# the subroutines). That access is exemplified
42	# by $tbparams->{'deter'}->{'domain'} which is
43	# the domain parameter of the DETER testbed.
44	my $fail_soft; # Do not swap failed sub-experiments out
45
46	# Parse the config file. The format is a colon-separated parameter name
47	# followed by the value of that parameter to the end of the line. This parses
48	# that format and puts the parameters into the referenced hash. Parameter
49	# names are mapped to lower case, parameter values are unchanged. Returns 0 on
50	# failure (e.g. file open) and 1 on success.
51	sub parse_config {
52	my($file, $href) = @_;
53	my $fh = new IO::File($file);
54
55	unless ($fh) {
56	warn "Can't open $file: $!\n";
57	return 0;
58	}
59
60	while (<$fh>) {
61	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
62	chomp;
63	/^([^:]+):\s(.)/ && do {
64	my $key = $1;
65
66	$key =~ tr/A-Z/a-z/;
67	$href->{$key} = $2;
68	next;
69	};
70	warn "Unparasble line in $file: $_\n";
71	}
72	$fh->close(); # It will close when it goes out of scope, but...
73	return 1;
74	}
75
76	# Parse an easier-to-read testbeds file (the original was comma-separated
77	# unadorned strings). The format is a testbed scope as [testbed] followed by
78	# the colon-separated attribute-value pairs for the testbed. Right now these
79	# go into a set of global hashes indexed by testbed, but that should probably
80	# change.
81
82	sub parse_testbeds {
83	my($file, $tbparams) = @_; # Testbeds file and parameter hash
84	my $fh = new IO::File($file); # Testbeds filehandle
85	my $tb; # Current testbed
86	# Convert attribute in the file to tbparams hash key
87	my %attr_to_hash = (
88	"opsnode" => "host",
89	"user" => "user",
90	"domain" => "domain",
91	"project" => "project",
92	"connectortype" => "gwtype",
93	"slavenodestartcmd" => "expstart",
94	"slaveconnectorstartcmd" => "gwstart",
95	"masternodestartcmd" => "mexpstart",
96	"masterconnectorstartcmd" => "mgwstart",
97	"connectorimage" => "gwimage",
98	"fileserver" => "fs",
99	"boss" => "boss",
100	"tunnelcfg" => "tun"
101	);
102
103
104	unless ($fh) {
105	warn "Can't open $file: $!\n";
106	return 0;
107	}
108
109	while (<$fh>) {
110	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
111	chomp;
112	/^\s\[(.)\]/ && do {
113	$tb = $1;
114	$tbparams->{$tb} = {} unless $tbparams->{$tb};
115	next;
116	};
117
118	/^([^:]+):\s(.)/ && do {
119	unless ($tb) {
120	warn "Ignored attribute definition before testbed " .
121	"defined in $file: $_\n";
122	next;
123	}
124	my $key = $1;
125	$key =~ tr/A-Z/a-z/;
126	my $var = $attr_to_hash{$key};
127
128	if ($var) { $tbparams->{$tb}->{$var} = $2; }
129	else { warn "Unknown keyword $key in $file\n"; }
130
131	next;
132	};
133	warn "Unparasble line in $file: $_\n";
134	}
135	$fh->close(); # It will close when it goes out of scope, but...
136	return 1;
137	}
138
139
140	# use scp to transfer a file, reporting true if successful and false otherwise.
141	# Parameters are the local file name, the ssh host destination (either hostname
142	# oe user@host), and an optional destination file name or directory. If no
143	# destination is given, the file is transferred to the given user's home
144	# directory. If only a machine is given in the ssh host destination, the
145	# current user is used.
146	sub scp_file {
147	my($file, $user, $host, $dest) = @_;
148
149	# XXX system with a relative pathname is sort of gross
150	system("scp $file $user\@$host:$dest");
151	if ($?) {
152	warn "scp failed $?\n";
153	return 0;
154	}
155	else { return 1; }
156	}
157
158	# use ssh to execute the given command on the machine (and as the user) in
159	# $where. Parameters are the ssh destination directive ($where) and the
160	# command to execute, and a prefix to be placed on a message generated if the
161	# command fails. On failure print a warning if a warning prefix was given and
162	# return false. If timeout is given fork a process and set an alarm of that
163	# many seconds. Timeouts also return 0;
164	sub ssh_cmd {
165	my($user, $host, $cmd, $wname, $timeout) = @_;
166	my $pid; # Child pid
167
168	if ( $timeout ) {
169	if ( $pid = fork () ) {
170	# Parent process
171	# The eval acts as a signal catcher. If the alarm goes off inside
172	# the eval, the die will put "alarm\n" into $@, otherwise the
173	# return value of the execution in the child process will be used.
174	my $rv = eval {
175	local $SIG{'ALRM'} = sub{ die "alarm\n"; };
176	my $rv;
177
178	alarm $timeout;
179	$rv = waitpid($pid, 0);
180	alarm 0;
181	$rv;
182	};
183
184	# If the eval succeeded, $@ will be null and we can use $rv, which
185	# is the return code from the subprocess. If the eval timed out,
186	# print a warning and assume the best.
187	if ($@ eq "alarm\n" ) {
188	warn "$wname timed out - pid $pid still live\n";
189	return 1;
190	}
191	else {
192	return $rv;
193	}
194	}
195	else {
196	# Child process
197	# XXX system with a relative pathname is sort of gross
198	exec("ssh $user\@$host $cmd");
199	exit 0;
200	}
201	}
202	else {
203	# No timeout, do all the work here.
204	# XXX system with a relative pathname is sort of gross
205	system ("ssh $user\@$host $cmd");
206	if ($?) {
207	warn "$wname failed $?\n" if $wname;
208	return 0;
209	}
210	else { return 1; }
211	}
212	}
213
214	# Ship local copies of the federation scripts out to the given host. If any of
215	# the script transfers fails, return 0. The scripts to transfer are from the
216	# global @scripts and are found locally in $local_script_dir (another global).
217	sub ship_scripts {
218	my($host, $user, $dest_dir) = @_; # Where, who, where remotely
219	my $s;
220
221	&ssh_cmd($user, $host, "mkdir -p $dest_dir");
222	for $s (@scripts) {
223	&scp_file("$local_script_dir/$s", $user, $host, $dest_dir) \|\|
224	return 0;
225	}
226	return 1;
227	}
228
229	# Ship per-testbed configuration generated by this script to the remote /proj
230	# directories on the remote testbeds
231	sub ship_configs {
232	my($host, $user, $src_dir, $dest_dir) = @_; # Where, who, where remotely
233	my($d, $f);
234
235	$d = IO::Dir->new($src_dir) \|\| return 0;
236
237	# All directories under $tmpdir are 770 so we can delete them later.
238	&ssh_cmd($user, $host, "mkdir -p $dest_dir") \|\| return 0;
239	&ssh_cmd($user, $host, "chmod 770 $dest_dir") \|\| return 0;
240	while ( $f = $d->read()) {
241	next if $f =~ /^\./;
242	if ( -d "$src_dir/$f" ) {
243	&ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") \|\|
244	return 0;
245	}
246	else {
247	&scp_file("$src_dir/$f", $user, $host, $dest_dir) \|\| return 0;
248	}
249	}
250	return 1;
251	}
252
253	# Start a sub section of the experiment on a given testbed. The testbed and
254	# the user to start the experiment as are pulled from the global per-testbed
255	# hash, passed in as $tbparams, as is the project name on the remote testbed.
256	# Parameters are the testbed and the experiment id. Configuration files are
257	# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
258	# Then the current state of the experiment determined using expinfo. From that
259	# state, the experiment is either created, modified or spapped in. If
260	# everything succeeds, true is returned. If the global verbose is set progress
261	# messages are printed.
262	sub start_segment {
263	my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
264	# per-testbed parameters and remote
265	# swap-in timeout
266	my $host = # Host name of remote ops (FQDN)
267	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
268	my $user = $tbparams->{$tb}->{'user'}; # user to pass to ssh
269	my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
270	# experiment under
271	my $tclfile = "./$eid.$tb.tcl"; # Local tcl file with the
272	# sub-experiment
273	my $proj_dir = "/proj/$pid/exp/$eid/tmp"; # Where to stash federation stuff
274	my $tarfiles_dir = "/proj/$pid/tarfiles/$eid"; # Where to stash tarfiles
275	my $rpms_dir = "/proj/$pid/rpms/$eid"; # Where to stash rpms
276	my $to_hostname = "$proj_dir/hosts"; # remote hostnames file
277	my $state; # State of remote experiment
278	my $status = new IO::Pipe; # The pipe to get status
279
280	# Determine the status of the remote experiment
281	$status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") \|\|
282	die "Can't ssh to $user\@$host:$!\n";
283
284	# XXX: this is simple now. Parsing may become more complex
285	while (<$status>) {
286	/State: (\w+)/ && ($state = $1);
287	/No\s+such\s+experiment/ && ($state = "none");
288	}
289	$status->close();
290	print "$tb: $state\n";
291
292	# Copy the experiment definition data over
293	print "transferring subexperiment to $tb\n" if $verbose;
294	&scp_file("$tmpdir/$tb/$tclfile", $user, $host) \|\| return 0;
295	# Clear out any old experiment data; if not deleted, copies over it by
296	# different users will fail.
297	# (O /bin/csh, how evil thou art. The -c and the escaped single quotes
298	# force the /bin/sh interpretation of the trailing * (which we need to keep
299	# tmp around)) Again, this needs to be done more properly once we have a
300	# non-ssh interface here.)
301	print "clearing experiment subdirs on $tb\n" if $verbose;
302	&ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") \|\|
303	return 0;
304	print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
305	&ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") \|\|
306	return 0;
307	print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
308	&ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") \|\|
309	return 0;
310	print "clearing experiment rpms subdirs on $tb\n" if $verbose;
311	&ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") \|\|
312	return 0;
313	print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
314	&ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") \|\|
315	return 0;
316	# Remote experiment is active. Modify it.
317	if ($state eq "active") {
318	print "Transferring federation support files to $tb\n" if $verbose;
319	# First copy new scripts and hostinfo into the remote /proj
320	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
321	return 0;
322	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
323	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
324
325	if ( -d "$tmpdir/tarfiles") {
326	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
327	return 0;
328	}
329
330	if ( -d "$tmpdir/rpms") {
331	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
332	return 0;
333	}
334
335	print "Modifying $eid in place on $tb\n" if $verbose;
336	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " .
337	"$eid $tclfile", "modexp", $timeout) \|\| return 0;
338	return 1;
339	}
340
341	# Remote experiment is swapped out, modify it and swap it in.
342	if ($state eq "swapped") {
343	print "Transferring federation support files to $tb\n" if $verbose;
344	# First copy new scripts and hostinfo into the remote /proj (because
345	# the experiment exists, the directory tree should be there.
346	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
347	return 0;
348	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
349	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
350	if ( -d "$tmpdir/tarfiles") {
351	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
352	return 0;
353	}
354
355	if ( -d "$tmpdir/rpms") {
356	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
357	return 0;
358	}
359
360	print "Modifying $eid on $tb\n" if $verbose;
361	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile",
362	"modexp") \|\| return 0;
363	print "Swapping $eid in on $tb\n" if $verbose;
364	# Now start up
365	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
366	"swapexp", $timeout) \|\| return 0;
367	return 1;
368	}
369
370	# No remote experiment. Create one. We do this in 2 steps so we can put
371	# the configuration files and scripts into the new experiment directories.
372	if ($state eq "none") {
373
374	if ( -d "$tmpdir/tarfiles") {
375	# Tarfiles have to exist for the creation to work
376	print "copying tarfiles to $tb\n";
377	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
378	return 0;
379	}
380
381	if ( -d "$tmpdir/rpms") {
382	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
383	return 0;
384	}
385
386	print "Creating $eid on $tb\n" if $verbose;
387	&ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " .
388	"$pid -e $eid $tclfile", "startexp") \|\| return 0;
389	# After startexp succeeds, the per-experiment directories exist on the
390	# remote testbed.
391	print "Transferring federation support files to $tb\n" if $verbose;
392	# First copy new scripts and hostinfo into the remote /proj
393	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
394	return 0;
395	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
396	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
397	# Now start up
398	print "Swapping $eid in on $tb\n" if $verbose;
399	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
400	"swapexp", $timeout) \|\| return 0;
401	return 1;
402	}
403
404	# Every branch for a known state returns. If execution gets here, the
405	# state is unknown.
406	warn "unknown state: $state\n";
407	return 0;
408	}
409
410	# Swap out a sub-experiment - probably because another has failed. Arguments
411	# are testbed and experiment. Most of the control flow is similar to
412	# start_segment, though much simpler.
413	sub stop_segment {
414	my($tb, $eid, $tbparams) = @_; # testbed, experiment ID and
415	# per-testbed parameters
416	my $user = $tbparams->{$tb}->{'user'}; # testbed user
417	my $host = # Ops node
418	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
419	my $pid = $tbparams->{$tb}->{'project'};# testbed project
420
421	print "Stopping $eid on $tb\n" if $verbose;
422	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out",
423	"swapexp (out)") \|\| return 0;
424	return 1;
425	}
426
427
428	$pid = $gid = "dummy"; # Default project and group to pass to
429	# $tcl_splitter above. These are total
430	# dummy arguments; the splitter doesn't
431	# use them at all, but we supply them to
432	# keep our changes to the parser minimal.
433	# Argument processing.
434	getopts('Ft:c:f:ndvN', \%opts);
435	$splitter_config = $opts{'c'} \|\| "./splitter.conf";
436	$debug = $opts{'d'};
437	$verbose = $opts{'v'} \|\| $opts{'d'};
438
439	&parse_config("$splitter_config", \%opts) \|\|
440	die "Cannot read config file $splitter_config: $!\n";
441	$fail_soft = $opts{'F'} \|\| $opts{'failsoft'};
442
443	$startem = $opts{'n'} ? 0 : 1; # If true, start the sub-experiments
444	$timeout = $opts{'t'} \|\| $opts{'timeout'};
445	$eid = $opts{'experiment'}; # Experiment ID
446	$tcl = $opts{'f'} \|\| shift; # The experiment description
447	$master = $opts{'master'}; # Master testbed
448	$tmpdir = $opts{'tmpdir'} \|\| $opts{'tempdir'}\|\| "/tmp"; # tmp files
449	$tb_config = $opts{'testbeds'} \|\| "./testbeds"; # testbed configurations
450	$local_script_dir = $opts{'scriptdir'}; # Local scripts
451
452	$smb_share = $opts{'smbshare'} \|\| # Share to mount from the master
453	die "Must give an SMB share\n";
454	$project_user = $opts{'smbuser'} \|\| # User to mount project dirs as
455	die "Must give an SMB user\n";
456
457	# For now specify these. We may want to generate them later.
458	$gw_pubkey = $opts{'gatewaypubkey'};
459	($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
460	$gw_secretkey = $opts{'gatewaysecretkey'};
461	($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
462
463	# tcl program to split experiments (changed during devel)
464	$tcl_splitter = $opts{'tclparse'} \|\| "/usr/testbed/lib/ns2ir/parse.tcl";
465	# tclsh to call directly (changed during devel)
466	$tclsh = $opts{'tclsh'} \|\| "/usr/local/bin/otclsh";
467
468	# Prefix to avoid collisions
469	$tmpdir .= "/split$$";
470
471	print "Temp files are in $tmpdir\n" if $verbose;
472	# Create a workspace
473	unless (-d "$tmpdir") {
474	mkdir("$tmpdir") \|\| die "Can't create $tmpdir: $!";
475	}
476
477	# Validate scripts directory
478	for my $s (@scripts) {
479	die "$local_script_dir/$s not in local script directory. Try -d\n"
480	unless -r "$local_script_dir/$s";
481	}
482
483	die "Must supply file, master and experiment" unless $master && $tcl && $eid;
484
485	if ($opts{'N'} ) {
486	&parse_testbeds($tb_config, $tbparams) \|\|
487	die "Cannot testbed congfigurations from $tb_config: $!\n";
488	}
489	else {
490	# Read a hash of per-testbed parameters from the local configurations.
491	my $conf = new IO::File($tb_config) \|\|
492	die "can't read testbed configutions from $tb_config: $!\n";
493	while (<$conf>) {
494	next if /^#/;
495	chomp;
496	my($tb, $h, $d, $u, $p, $es, $gs, $mes, $mgs, $t, $i, $fs, $boss,
497	$tun) = split(":", $_);
498	$tbparams->{$tb}->{'host'} = $h;
499	$tbparams->{$tb}->{'user'} = $u;
500	$tbparams->{$tb}->{'domain'} = $d;
501	$tbparams->{$tb}->{'project'} = $p;
502	$tbparams->{$tb}->{'gwtype'} = $t;
503	$tbparams->{$tb}->{'expstart'} = $es;
504	$tbparams->{$tb}->{'gwstart'} = $gs;
505	$tbparams->{$tb}->{'mexpstart'} = $mes;
506	$tbparams->{$tb}->{'mgwstart'} = $mgs;
507	$tbparams->{$tb}->{'gwimage'} = $i;
508	$tbparams->{$tb}->{'fs'} = $fs;
509	$tbparams->{$tb}->{'boss'} = $boss;
510	$tbparams->{$tb}->{'tun'} = $tun;
511
512	# Make sure the domain starts with a period
513	$tbparams->{$tb}->{'domain'} = "." . $tbparams->{$tb}->{'domain'}
514	unless $tbparams->{$tb}->{'domain'} =~ /^\./;
515	}
516	$conf->close();
517	}
518
519	# Open a pipe to the splitter program and start it parsing the experiments
520	my $pipe = new IO::Pipe;
521	# NB no more -p call on parse call.
522	$pipe->reader("$tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl") \|\|
523	die "Cannot execute $tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl:$!\n";
524
525	# Parsing variables
526	my $ctb; # Current testbed
527	my %allocated; # If allocated{$tb} > 0, $tb is in use
528	my $destfile; # File that the sub-experiment tcl file is
529	# being written to, or "" if none. Also used
530	# for hostnames file.
531	my $desthandle; # File handle for distfile
532	my $gateways; # when gateway lists are being processed this
533	# is the testbed whose gateways are being
534	# gathered.
535	my $control_gateway; # Control net gateway for the current testbed
536	my %active_end; # If active_end{"a-b"} > 0 then a is the active
537	# end of the a <-> b connector pair.
538
539	# Parse the splitter output. This loop creates the sub experiments, gateway
540	# configurations and hostnames file
541	while (<$pipe>) {
542	# Start of a sub-experiment
543	/^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
544	$ctb = $1;
545
546	# If we know the testbed, start collecting its sub experiment tcl
547	# description. If not, warn the caller and ignore the configuration of
548	# this testbed.
549	if ($tbparams->{$ctb}) {
550	$allocated{$ctb}++; # Keep track of the testbeds allocated
551
552	unless (-d "$tmpdir/$ctb") {
553	mkdir("$tmpdir/$ctb") \|\| die "Can't create $tmpdir/$ctb: $!";
554	}
555	$destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
556
557	$desthandle = new IO::File(">$destfile") \|\|
558	die "Cannot open $destfile:$!\n";
559	}
560	else {
561	warn "No such testbed $ctb\n";
562	$destfile = "";
563	}
564	next;
565	};
566
567	# End of that experiment
568	/^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
569	# Simple syntax check and close out this experiment's tcl description
570	die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
571	$desthandle->close();
572	$destfile = $ctb = "";
573	next;
574	};
575
576	# Beginning of a gateway set
577	/^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
578	$gateways = $1;
579	# If we've heard of this tb, create the config lines for it one at a
580	# time.
581	if ($allocated{$gateways}) {
582	# Just in case. This directory should already have been created
583	# above.
584	unless (-d "$tmpdir/$gateways") {
585	mkdir("$tmpdir/$gateways") \|\|
586	die "Can't create $tmpdir/$gateways: $!";
587	}
588	}
589	else {
590	warn "Gateways given (and ignored) for testbed not in use: " .
591	"$gateways\n";
592	$gateways = 0;
593	}
594	next;
595	};
596	# End of the gateways section. Output the client config for this testbed
597	/^#\s+End\s+gateways\s+\((\w+)\)/ && do {
598	die "Mismatched gateway markers ($1, $gateways)\n"
599	unless !$gateways \|\| $gateways == $1;
600
601	if ($control_gateway ) {
602	# Client config
603	my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
604	my $master_project = $tbparams->{$master}->{'project'};
605	die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
606	print $cc "ControlGateway: $control_gateway\n";
607	print $cc "SMBShare: $smb_share\n";
608	print $cc "ProjectUser: $project_user\n";
609	print $cc "ProjectName: $master_project\n";
610	$cc->close();
611	}
612	else { warn "No control gateway for $gateways?\n"; }
613
614	$gateways = 0;
615	next;
616	};
617	# Beginning of the hostnames list. Collection is always in the hostnames
618	# file.
619	/^#\s+Begin\s+hostnames/ && do {
620	$destfile = "$tmpdir/hostnames";
621	$desthandle = new IO::File(">$destfile") \|\|
622	die "Can't open $destfile:$!\n";
623	next;
624	};
625	# end of the hostnames list.
626	/^#\s+End\s+hostnames/ && do {
627	$desthandle->close();
628	$destfile = "";
629	next;
630	};
631
632	# Generate gateway configuration info, one file per line
633	$gateways && do {
634	chomp;
635	my($dtb, $myname, $desthost, $type) = split(" ", $_);
636
637	# Many of these are to simplify print statements
638	my $sdomain = # domain for the source
639	$tbparams->{$gateways}->{'domain'};
640	my $ddomain = # domain for the destination
641	$tbparams->{$dtb}->{'domain'};
642	my $sproject = # Project of the destination
643	$tbparams->{$gateways}->{'project'};
644	my $fs = # Master fs node (FQDN)
645	$tbparams->{$master}->{'fs'} . $tbparams->{$master}->{'domain'};
646	my $boss = # Master boss node (FQDN)
647	$tbparams->{$master}->{'boss'} . $tbparams->{$master}->{'domain'};
648	my $remote_script_dir = # Remote fed script location
649	"/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
650	my $active; # Is this the active side of
651	# the connector?
652
653	$sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
654	$ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
655
656	my $conf_file = "$myname$sdomain.gw.conf";
657	# translate to lower case so the `hostname` hack for specifying
658	# configuration files works.
659	$conf_file =~ tr/A-Z/a-z/;
660
661	# If either end of this link is in the master side of the testbed, that
662	# side is the active end. Otherwise the first testbed encountered in
663	# the file will be the active end. The $active_end variable keeps
664	# track of those decisions
665	if ( $dtb eq $master ) { $active = "false"; }
666	elsif ($gateways eq $master ) { $active = "true"; }
667	elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
668	else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
669
670	# This is used to create the client configuration.
671	$control_gateway = "$myname$sdomain"
672	if $type =~ /(control\|both)/;
673
674	# Write out the file
675	my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")\|\|
676	die "can't open $tmpdir/$gateways/$conf_file: $!\n";
677
678	print $gwconfig "Active: $active\n";
679	print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
680	print $gwconfig "BossName: $boss\n";
681	print $gwconfig "FsName: $fs\n";
682	print $gwconfig "Type: $type\n";
683	print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
684	print $gwconfig "Peer: $desthost$ddomain\n";
685	print $gwconfig "Pubkeys: " .
686	"/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
687	print $gwconfig "Privkeys: " .
688	"/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
689	$gwconfig->close();
690
691	# This testbed has a gateway (most will) so make a copy of the keys it
692	# needs in this testbed's subdirectory. start_segment will transfer
693	# them.
694	unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
695	copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") \|\|
696	die "Can't copy pubkeys ($gw_pubkey to " .
697	"$tmpdir/$gateways/$gw_pubkey_base): $!\n";
698	}
699	if ($active eq "true" ) {
700	unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
701	copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") \|\|
702	die "Can't copy secret keys ($gw_secretkey to " .
703	"$tmpdir/$gateways/$gw_secretkey_base): $!\n";
704	}
705	}
706
707	#done processing gateway entry, ready for next line
708	next;
709	};
710	(/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
711	next if /^#/;
712	chomp;
713	push(@tarfiles, $_);
714	next;
715	};
716	(/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
717	next if /^#/;
718	chomp;
719	push(@rpms, $_);
720	next;
721	};
722
723	next unless $destfile; # Unidentified testbed, ignore config
724	# local copies that can be used in the substitutions below
725	my $gwtype = $tbparams->{$ctb}->{'gwtype'};
726	my $gwimage = $tbparams->{$ctb}->{'gwimage'};
727	my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
728	my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
729	my $gwstart = $tbparams->{$ctb}->{'gwstart'};
730	my $expstart = $tbparams->{$ctb}->{'expstart'};
731	my $project = $tbparams->{$ctb}->{'project'};
732
733	# Substitute variables
734	s/GWTYPE/$gwtype/g;
735	s/GWIMAGE/$gwimage/g;
736	if ($ctb eq $master ) {
737	s/GWSTART/$mgwstart/g;
738	s/EXPSTART/$mexpstart/g;
739	}
740	else {
741	s/GWSTART/$gwstart/g;
742	s/EXPSTART/$expstart/g;
743	}
744	# XXX: oh is this bad
745	s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
746	s#PROJDIR#/proj/$project/#g;
747	s#EID#$eid#g;
748	s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
749	print $desthandle $_;
750	}
751	$pipe->close();
752	die "No nodes in master testbed ($master)\n" unless $allocated{$master};
753
754	# Copy tarfiles and rpms needed at remote sites to the staging directories.
755	# Start_segment will distribute them
756	for my $t (@tarfiles) {
757	die "tarfile '$t' unreadable: $!\n" unless -r $t;
758	unless (-d "$tmpdir/tarfiles") {
759	mkdir("$tmpdir/tarfiles") \|\|
760	die "Can't create $tmpdir/tarfiles:$!\n";
761	}
762	copy($t, "$tmpdir/tarfiles") \|\|
763	die "Can't copy $t to $tmpdir/tarfiles:$!\n";
764	}
765
766	for my $r (@rpms) {
767	die "rpm '$r' unreadable: $!\n" unless -r $r;
768	unless (-d "$tmpdir/rpms") {
769	mkdir("$tmpdir/rpms") \|\|
770	die "Can't create $tmpdir/rpms:$!\n";
771	}
772	copy($r, "$tmpdir/rpms") \|\|
773	die "Can't copy $r to $tmpdir/rpms:$!\n";
774	}
775
776	exit(0) unless $startem;
777
778	my %started; # If $started{$tb} then $tb successfully started
779
780	# Start up the slave sub-experiments first
781	TESTBED:
782	for my $tb (keys %allocated) {
783	if ($tb ne $master) {
784	if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
785	else { last TESTBED; }
786	}
787	}
788
789	# Now the master
790	if (&start_segment($master, $eid, $tbparams, $timeout)) {
791	$started{$master}++;
792	}
793
794	# If any testbed failed, swap the rest out.
795	if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
796	for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
797	print "Error starting experiment\n";
798	exit(1);
799	}
800	print "Experiment started\n";
801	print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
802	system("rm -rf $tmpdir") unless $debug;
803	exit(0); # set the exit value
804
805	=pod
806
807	=head1 NAME
808
809	B<splitter.pl>
810
811	=head1 SYNOPSIS
812
813	B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
814	[B<-f> F<experiment_tcl>] [F<experiment_tcl>]
815
816	=head1 DESCRIPTION
817
818	B<splitter.pl> invokes the DETER experiment parser to split an annotated
819	experiment into multiple sub-experments and instantiates the sub-experiments on
820	their intended testbeds. Annotation is accomplished using the
821	tb-set-node-testbed command, added to the parser.
822
823	Much of the script's behavior depends on the configuration file, specified with
824	the B<-c> flag and defaulting to F<./splitter.conf>.
825
826	The testbed labels supplied in the B<tb-set-node-testbed> command are
827	meaningful based on their presence in the testbeds file. that file can be
828	specified in the configuration file using the B<Testbeds> directive, and
829	defaults to F<./testbeds>. The syntax is described below.
830
831	Most of the intermediate files are staged in a sub-directory of a temporary
832	files directory and deleted at the end of the script. Specifying the B<-d>
833	flag on the command line avoids the deletion for debbugging. By default the
834	temporary files directory is directory is F</tmp> and can be reset in the
835	configuration file using the B<Tmpdir> directive. Intermediate files are
836	stored under a subdirectory formed by adding the process ID of the splitter
837	process. For example, if the temporary files directory is F</tmp> and the
838	B<splitter.pl> process ID is 2323, the temporary files will be stored in
839	F</tmp/split2323/>.
840
841	The expreriment is split out into one experiment description per testbed in the
842	temporary directory named as F<experiment.testbed.tcl> where the experiment is
843	the experiment ID given in the configuration file, and the testbed is the
844	tb-set-node-testbed parameter for the nodes in the file.
845
846	If the B<-n> option is absent the sub-experiments are then instantiated on
847	their testbeds. (Here B<-n> is analogous to its use in L<make(1)>).
848	Per-testbed parameters are set in the testbeds file. Sub-experiments on
849	slave testbeds are instantiated in a random order, but the master testbed is
850	currently instantiated last.
851
852	Scripts to start federation (the federation kit) are copied into the local
853	experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>. These are
854	taken from the directory given by the B<ScriptDir> directive in the
855	configuration file.
856
857	If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
858	F<splitter.conf>.
859
860	If any sub-experiment fails to instantiate, the other sub-exeriments are
861	swapped out. B<-F> avoids this swap out, which can also be specified as
862	B<SoftFail: true> in F<splitter.conf>
863
864	=head2 Configuration File
865
866	The configuration file is a simple set of colon-separated parameters and
867	values. A configuration file must be present, either specified in the B<-c>
868	flag or the default F<./splitter.conf>. All the parameter names are case
869	insensitive, but should not include any whitespace. Parameter values may
870	include whitespace, but no newlines.
871
872	Possible parameters are:
873
874	=over 5
875
876	=item Experiment
877
878	The name of the experiment on the various testbeds
879
880	=item Master
881
882	The master testbed label from the testbeds file, described below.
883
884	=item Testbeds
885
886	The testbeds file described below, giving per-testbed parameters. If this
887	directive is absent the testbeds file defaults to F<./testbeds>
888
889	=item ScriptDir
890
891	Location of the default federation scripts, i.e. the federation kit.
892
893	=item GatewayPubkey
894
895	=item GatewaySecretKey
896
897	The names of the files containing secret and public keys to use in setting up
898	tunnels between testbeds. These will eventually be automatically generated.
899
900	=item TmpDir
901
902	=item TempDir
903
904	The directory where temporary files are created. These are synonyms, but
905	should both be specified, B<TmpDir> has priority. If neither is specified,
906	F</tmp> is used.
907
908	=item SMBShare
909
910	The SMB share on the master testbed that will be exported to remote clients.
911
912	=item SMBUser
913
914	The experiment user to mount project directories as. This user needs to be a
915	member of the exported experiment - that is one of the users in the project
916	containing this experiment on the master testbed.
917
918	=item Tclparse
919
920	The pathname to the experiment parsing program. Only developers should set
921	this.
922
923	=item Tclsh
924
925	The pathname to the local oTcl shell. Only developers should set
926	this.
927
928	=item Timeout
929
930	Value in seconds after which a swap-in operatioin will be considered a success.
931	Often long swap-ins will hang when there are partial failures. This works
932	around this issue.
933
934	=item FailSoft
935
936	If not set, failure of any sub experiment swaps the rest out. Setting this to any value avoids this swap out. (Also B<-F>.)
937
938	=back
939
940	=head2 Testbeds file
941
942	The configuration file (F<./testbeds> unless overridden by B<-c>) is a
943	colon-separated set of parameters keyed by testbed name. The fields, in order,
944	are:
945
946	=over 5
947
948	=item name
949
950	The testbed to which this line of parameters applies.
951
952	=item user
953
954	The user under which to make requests to this testbed. The user running
955	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
956	testbed.
957
958	=item host
959
960	The host name of the testbed's ops node. The user calling B<splitter.pl> must
961	be able to execute commands on this host via L<ssh(1)>.
962
963	=item domain
964
965	The domain of nodes in this testbed (including the ops host). This parameter
966	should always start with a period.
967
968	=item project
969
970	The project under which to instantiate sub-experiments on this testbed.
971
972	=item gateway type
973
974	The node type for inter-testbed gateway nodes on this testbed.
975
976	=item experiment start (slave)
977
978	The start command to run on experimental nodes when this testbed is used as a
979	slave. In all the start commands the string FEDDIR will be replaced by the
980	local experiment's federation scripts directory and the string GWCONF replaced
981	by the gatway configuration file.
982
983	=item gateway start (slave)
984
985	The start command to run on gateway nodes when this testbed is used as a slave.
986	The same string substitutions are made in this command as in experiment start.
987
988	=item experiment start (master)
989
990	The start command to run on experimental nodes when this testbed is used as a
991	master. The same string substitutions are made in this command as in
992	experiment start.
993
994	=item gateway start (master)
995
996	The start command to run on gateway nodes when this testbed is used as a
997	master. The same string substitutions are made in this command as in
998	experiment start.
999
1000	=item gateway image
1001
1002	The disk image to be loaded on a gateway node on this testbed.
1003
1004	=item filesystem node
1005
1006	The node in the master testbed from which filesystems are mounted.
1007
1008	=item boss node
1009
1010	The node in the master testbed that controls the testbed.
1011
1012	=item tunnel configuration
1013
1014	True if the connector needs to do DETER federation. This parameter will
1015	probably be removed.
1016
1017	=back
1018
1019	The parsing of the testbeds is extremely simple. Colons separate each
1020	field and there is n provision for escaping them at this time.
1021
1022	=head2 New Testbeds File Format
1023
1024	The testbeds file has ben revamped to make it more human readable. The
1025	parameters are now named and specified on separate lines of the configuration
1026	file. Each testbed's parameters are preceeded by the testbed label in brackets
1027	([]) on a line by itself. After that the parameters are specified as
1028	parameter: value. This is essentially the same format as the configuration
1029	file. Parameters are:
1030
1031	=over 4
1032
1033	=item User
1034
1035	The user under which to make requests to this testbed. The user running
1036	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
1037	testbed.
1038
1039	=item OpsNode
1040
1041	The host name of the testbed's ops node. The user calling B<splitter.pl> must
1042	be able to execute commands on this host via L<ssh(1)>.
1043
1044	=item Domain
1045
1046	The domain of nodes in this testbed (including the ops host). This parameter
1047	should always start with a period.
1048
1049	=item Project
1050
1051	The project under which to instantiate sub-experiments on this testbed.
1052
1053	=item ConnectorType
1054
1055	The node type for inter-testbed connector nodes on this testbed.
1056
1057	=item SlaveNodeStartCmd
1058
1059	The start command to run on experimental nodes when this testbed is used as a
1060	slave. In all the start commands the string FEDDIR will be replaced by the
1061	local experiment's federation scripts directory and the string GWCONF replaced
1062	by the gatway configuration file.
1063
1064	=item SlaveConnectorStartCmd
1065
1066	The start command to run on gateway nodes when this testbed is used as a slave.
1067	The same string substitutions are made in this command as in experiment start.
1068
1069	=item MasterNodeStartCmd
1070
1071	The start command to run on experimental nodes when this testbed is used as a
1072	master. The same string substitutions are made in this command as in
1073	experiment start.
1074
1075	=item MasterConnectorStartCmd
1076
1077	The start command to run on gateway nodes when this testbed is used as a
1078	master. The same string substitutions are made in this command as in
1079	experiment start.
1080
1081	=item ConnectorImage
1082
1083	The disk image to be loaded on a gateway node on this testbed.
1084
1085	=item FileServer
1086
1087	The node in the master testbed from which filesystems are mounted.
1088
1089	=item Boss
1090
1091	The node in the master testbed that controls the testbed.
1092
1093	=item TunnelCfg
1094
1095	True if the connector needs to do DETER federation. This parameter will
1096	probably be removed.
1097
1098
1099	=back
1100
1101
1102	=head1 ENVIRONMENT
1103
1104	B<splitter.pl> does not directly make use of environment variables, but calls
1105	out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1106	environment.
1107
1108	=head1 SEE ALSO
1109
1110	L<sh(1)>, L<ssh(1)>
1111
1112	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: