Context Navigation

source: fedkit/splitter.pl @ 5e71d34

Last change on this file since 5e71d34 was 7c3008e, checked in by Ted Faber <faber@…>, 17 years ago
checkpoint
Property mode set to `100644`
File size: 46.3 KB

Line
1	#!/usr/bin/perl
2
3	use strict;
4
5	use Getopt::Std;
6	use IO::File;
7	use IO::Dir;
8	use IO::Pipe;
9	use File::Copy;
10	use XML::Parser;
11
12	my @scripts = ("fed_bootstrap", "federate.sh", "smbmount.FreeBSD.pl",
13	"smbmount.Linux.pl", "make_hosts", "fed-tun.pl", "fed_evrepeater",
14	"rc.accounts.patch");
15	my $local_script_dir = ".";
16	my($pid, $gid); # Process and group IDs for calling parse.tcl
17	my $splitter_config; # Configuration file
18	my $debug; # True if thecalled in debug mode
19	my $verbose; # True for extra progress reports
20	my $startem; # If true, start the sub-experiments
21	my $eid; # Experiment ID
22	my $tcl; # The experiment description (topology file)
23	my $master; # Master testbed
24	my $tmpdir; # tmp files
25	my $tb_config; # testbed configurations
26	my $smb_share; # Share to mount from the master
27	my $project_user; # User to mount project dirs as
28	my $auth_proj; # Local project for resource access
29	my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
30	my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
31	# basename)
32	my($keytype); # Type (DSA or RSA) of generated gateway keys
33	my $tcl_splitter; # tcl program to split experiments
34	# (changed during devel)
35	my $tclsh; # tclsh to call directly (changed during devel)
36	my $fedd_client; # Program to call for testbed access params
37	my $muxmax; # Maximum number of links/lans over 1 gw pair
38	my @tarfiles; # Tarfiles in use by this experiment
39	my @rpms; # Rpms in use by this experiment
40	my $timeout; # The timeout to use for experiment swap ins
41	my %opts; # Parsed options
42
43	my $tbparams = {}; # Map of the per-testbed parameters from the
44	# testbeds file. It is a reference to a hash
45	# of hashes (because it's passed around a bunch
46	# and it's nicer to have one access pattern
47	# throughout the script, in the main loop and
48	# the subroutines). That access is exemplified
49	# by $tbparams->{'deter'}->{'domain'} which is
50	# the domain parameter of the DETER testbed.
51	my $fail_soft; # Do not swap failed sub-experiments out
52	my $max_children=1; # Maximum number of simultaneous swap-ins
53
54	# Default commands for starting experiment and gateway nodes. Testbeds can
55	# override these. (The 'm' prefixed commands are for operating as the master
56	# testbed.)
57	my $def_expstart = "sudo -H /bin/sh FEDDIR/fed_bootstrap >& /tmp/federate";
58	my $def_mexpstart = "sudo -H FEDDIR/make_hosts FEDDIR/hosts";
59	my $def_gwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF>& /tmp/bridge.log";
60	my $def_mgwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF >& /tmp/bridge.log";
61	my $def_gwimage = "FBSD61-TUNNEL2";
62	my $def_gwtype = "pc";
63
64	# Parse the config file. The format is a colon-separated parameter name
65	# followed by the value of that parameter to the end of the line. This parses
66	# that format and puts the parameters into the referenced hash. Parameter
67	# names are mapped to lower case, parameter values are unchanged. Returns 0 on
68	# failure (e.g. file open) and 1 on success.
69	sub parse_config {
70	my($file, $href) = @_;
71	my $fh = new IO::File($file);
72
73	unless ($fh) {
74	warn "Can't open $file: $!\n";
75	return 0;
76	}
77
78	while (<$fh>) {
79	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
80	chomp;
81	/^([^:]+):\s(.)/ && do {
82	my $key = $1;
83
84	$key =~ tr/A-Z/a-z/;
85	$href->{$key} = $2;
86	next;
87	};
88	warn "Unparasble line in $file: $_\n";
89	}
90	$fh->close(); # It will close when it goes out of scope, but...
91	return 1;
92	}
93
94	# Parse an easier-to-read testbeds file (the original was comma-separated
95	# unadorned strings). The format is a testbed scope as [testbed] followed by
96	# the colon-separated attribute-value pairs for the testbed. Right now these
97	# go into a set of global hashes indexed by testbed, but that should probably
98	# change. The file parameter is an open IO::Handle. &parse_testbeds_filename
99	# opens the file and calls this. Parse_testbeds can be used on pipes as well,
100	# e.g. fedd_client output.
101	sub parse_testbeds {
102	my($fh, $tbparams) = @_; # Testbeds file and parameter hash
103	my $tb; # Current testbed
104	# Convert attribute in the file to tbparams hash key
105	my %attr_to_hash = (
106	"opsnode" => "host",
107	"user" => "user",
108	"domain" => "domain",
109	"project" => "project",
110	"connectortype" => "gwtype",
111	"slavenodestartcmd" => "expstart",
112	"slaveconnectorstartcmd" => "gwstart",
113	"masternodestartcmd" => "mexpstart",
114	"masterconnectorstartcmd" => "mgwstart",
115	"connectorimage" => "gwimage",
116	"fileserver" => "fs",
117	"boss" => "boss",
118	"eventserver" => "eventserver",
119	"tunnelcfg" => "tun",
120	"uri" => "uri",
121	"access" => "access"
122	);
123
124	while (<$fh>) {
125	next if /^\s#/ \|\| /^\s$/; # Skip comments & blanks
126	print STDERR "testbeds: $_";
127	chomp;
128	/^\s\[(.)\]/ && do {
129	$tb = $1;
130	$tbparams->{$tb} = {} unless $tbparams->{$tb};
131	next;
132	};
133
134	/^([^:]+):\s(.)/ && do {
135	unless ($tb) {
136	warn "Ignored attribute definition before testbed: $_\n ";
137	next;
138	}
139	my $key = $1;
140	$key =~ tr/A-Z/a-z/;
141	my $var = $attr_to_hash{$key};
142
143	if ($var) { $tbparams->{$tb}->{$var} = $2; }
144	else { warn "Unknown keyword $key\n"; }
145
146	next;
147	};
148	warn "Unparasble line: $_\n";
149	}
150	return 1;
151	}
152
153
154	# Open the given file name and parse the testbeds file it contains by calling
155	# &parse_testbeds.
156	sub parse_testbeds_filename {
157	my($file, $tbparams) = @_; # Testbeds file and parameter hash
158	my $fh = new IO::File($file); # Testbeds filehandle
159
160	if ($fh) {
161	my $rv = &parse_testbeds($fh, $tbparams);
162	$fh->close(); # It will close when it goes out of scope, but...
163	$rv;
164	}
165	else {
166	warn "Can't open $file: $!\n";
167	return 0;
168	}
169	}
170
171	# Generate SSH keys for use by the gateways. The parameters are the type and
172	# the filename for the private key. The pubkey will be stored in a filename
173	# with the same name as the private key but with .pub appended. Type can be
174	# dsa or rsa.
175
176	sub generate_ssh_keys {
177	my($type, $dest) = @_;
178
179	$type =~ tr/A-Z/a-z/;
180	return 0 if $type !~ /(rsa\|dsa)/;
181	system("/usr/bin/ssh-keygen -t $type -N \"\" -f $dest");
182	return $@ ? 0 : 1;
183	}
184
185	# use scp to transfer a file, reporting true if successful and false otherwise.
186	# Parameters are the local file name, the ssh host destination (either hostname
187	# oe user@host), and an optional destination file name or directory. If no
188	# destination is given, the file is transferred to the given user's home
189	# directory. If only a machine is given in the ssh host destination, the
190	# current user is used.
191	sub scp_file {
192	my($file, $user, $host, $dest) = @_;
193
194	# XXX system with a relative pathname is sort of gross
195	system("scp $file $user\@$host:$dest");
196	if ($?) {
197	warn "scp failed $?\n";
198	return 0;
199	}
200	else { return 1; }
201	}
202
203	# use ssh to execute the given command on the machine (and as the user) in
204	# $where. Parameters are the ssh destination directive ($where) and the
205	# command to execute, and a prefix to be placed on a message generated if the
206	# command fails. On failure print a warning if a warning prefix was given and
207	# return false. If timeout is given fork a process and set an alarm of that
208	# many seconds. Timeouts also return 0;
209	sub ssh_cmd {
210	my($user, $host, $cmd, $wname, $timeout) = @_;
211	my $pid; # Child pid
212
213	$timeout = 0 unless $timeout; # Force default timeout
214
215	if ( $pid = fork () ) {
216	# Parent process
217	# The eval acts as a signal catcher. If the alarm goes off inside
218	# the eval, the die will put "alarm\n" into $@, otherwise the
219	# return value of the execution in the child process will be used.
220	my $rv = eval {
221	local $SIG{'ALRM'} = sub{ die "alarm\n"; };
222	my $rv;
223
224	alarm $timeout;
225	$rv = waitpid($pid, 0);
226	alarm 0;
227	$rv;
228	};
229
230	# If the eval succeeded, $@ will be null and we can use $rv, which
231	# is the return code from the subprocess. If the eval timed out,
232	# print a warning and assume the best.
233	if ($@ eq "alarm\n" ) {
234	warn "$wname timed out - pid $pid still live\n";
235	return 1;
236	}
237	else {
238	return $rv;
239	}
240	}
241	else {
242	# Child process
243	exec("ssh $user\@$host $cmd");
244	exit 0;
245	}
246	}
247
248	# Ship local copies of the federation scripts out to the given host. If any of
249	# the script transfers fails, return 0. The scripts to transfer are from the
250	# global @scripts and are found locally in $local_script_dir (another global).
251	sub ship_scripts {
252	my($host, $user, $dest_dir) = @_; # Where, who, where remotely
253	my $s;
254
255	&ssh_cmd($user, $host, "mkdir -p $dest_dir");
256	for $s (@scripts) {
257	&scp_file("$local_script_dir/$s", $user, $host, $dest_dir) \|\|
258	return 0;
259	}
260	return 1;
261	}
262
263	# Ship per-testbed configuration generated by this script to the remote /proj
264	# directories on the remote testbeds
265	sub ship_configs {
266	my($host, $user, $src_dir, $dest_dir) = @_; # Where, who, where remotely
267	my($d, $f);
268
269	$d = IO::Dir->new($src_dir) \|\| return 0;
270
271	# All directories under $tmpdir are 770 so we can delete them later.
272	&ssh_cmd($user, $host, "mkdir -p $dest_dir") \|\| return 0;
273	&ssh_cmd($user, $host, "chmod 770 $dest_dir") \|\| return 0;
274	while ( $f = $d->read()) {
275	next if $f =~ /^\./;
276	if ( -d "$src_dir/$f" ) {
277	&ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") \|\|
278	return 0;
279	}
280	else {
281	&scp_file("$src_dir/$f", $user, $host, $dest_dir) \|\| return 0;
282	}
283	}
284	return 1;
285	}
286
287	# Start a sub section of the experiment on a given testbed. The testbed and
288	# the user to start the experiment as are pulled from the global per-testbed
289	# hash, passed in as $tbparams, as is the project name on the remote testbed.
290	# Parameters are the testbed and the experiment id. Configuration files are
291	# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
292	# Then the current state of the experiment determined using expinfo. From that
293	# state, the experiment is either created, modified or spapped in. If
294	# everything succeeds, true is returned. If the global verbose is set progress
295	# messages are printed.
296	sub start_segment {
297	my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
298	# per-testbed parameters and remote
299	# swap-in timeout
300	my $host = # Host name of remote ops (FQDN)
301	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
302	my $user = $tbparams->{$tb}->{'user'}; # user to pass to ssh
303	my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
304	# experiment under
305	my $tclfile = "./$eid.$tb.tcl"; # Local tcl file with the
306	# sub-experiment
307	my $proj_dir = "/proj/$pid/exp/$eid/tmp"; # Where to stash federation stuff
308	my $tarfiles_dir = "/proj/$pid/tarfiles/$eid"; # Where to stash tarfiles
309	my $rpms_dir = "/proj/$pid/rpms/$eid"; # Where to stash rpms
310	my $to_hostname = "$proj_dir/hosts"; # remote hostnames file
311	my $state; # State of remote experiment
312	my $status = new IO::Pipe; # The pipe to get status
313
314	# Determine the status of the remote experiment
315	$status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") \|\|
316	die "Can't ssh to $user\@$host:$!\n";
317	# XXX: this is simple now. Parsing may become more complex
318	while (<$status>) {
319	/State: (\w+)/ && ($state = $1);
320	/No\s+such\s+experiment/ && ($state = "none");
321	}
322	$status->close();
323	print "$tb: $state\n";
324
325	# Copy the experiment definition data over
326	print "transferring subexperiment to $tb\n" if $verbose;
327	&scp_file("$tmpdir/$tb/$tclfile", $user, $host) \|\| return 0;
328	# Clear out any old experiment data; if not deleted, copies over it by
329	# different users will fail.
330	# (O /bin/csh, how evil thou art. The -c and the escaped single quotes
331	# force the /bin/sh interpretation of the trailing * (which we need to keep
332	# tmp around)) Again, this needs to be done more properly once we have a
333	# non-ssh interface here.)
334	print "clearing experiment subdirs on $tb\n" if $verbose;
335	&ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") \|\|
336	return 0;
337	print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
338	&ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") \|\|
339	return 0;
340	print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
341	&ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") \|\|
342	return 0;
343	print "clearing experiment rpms subdirs on $tb\n" if $verbose;
344	&ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") \|\|
345	return 0;
346	print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
347	&ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") \|\|
348	return 0;
349	# Remote experiment is active. Modify it.
350	if ($state eq "active") {
351	print "Transferring federation support files to $tb\n" if $verbose;
352	# First copy new scripts and hostinfo into the remote /proj
353	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
354	return 0;
355	# Copy the virtual topology out as well
356	&scp_file("$tmpdir/vtopo.xml", $user, $host, "$proj_dir/vtopo.xml") \|\|
357	return 0;
358	&scp_file("$tmpdir/viz.xml", $user, $host, "$proj_dir/viz.xml") \|\|
359	return 0;
360	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
361	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
362
363	if ( -d "$tmpdir/tarfiles") {
364	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
365	return 0;
366	}
367
368	if ( -d "$tmpdir/rpms") {
369	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
370	return 0;
371	}
372
373	print "Modifying $eid in place on $tb\n" if $verbose;
374	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " .
375	"$eid $tclfile", "modexp", $timeout) \|\| return 0;
376	return 1;
377	}
378
379	# Remote experiment is swapped out, modify it and swap it in.
380	if ($state eq "swapped") {
381	print "Transferring federation support files to $tb\n" if $verbose;
382	# First copy new scripts and hostinfo into the remote /proj (because
383	# the experiment exists, the directory tree should be there.
384	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
385	return 0;
386	# Copy the virtual topology out as well
387	&scp_file("$tmpdir/vtopo.xml", $user, $host, "$proj_dir/vtopo.xml") \|\|
388	return 0;
389	&scp_file("$tmpdir/viz.xml", $user, $host, "$proj_dir/viz.xml") \|\|
390	return 0;
391	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
392	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
393	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
394	if ( -d "$tmpdir/tarfiles") {
395	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
396	return 0;
397	}
398
399	if ( -d "$tmpdir/rpms") {
400	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
401	return 0;
402	}
403
404	print "Modifying $eid on $tb\n" if $verbose;
405	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile",
406	"modexp") \|\| return 0;
407	print "Swapping $eid in on $tb\n" if $verbose;
408	# Now start up
409	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
410	"swapexp", $timeout) \|\| return 0;
411	return 1;
412	}
413
414	# No remote experiment. Create one. We do this in 2 steps so we can put
415	# the configuration files and scripts into the new experiment directories.
416	if ($state eq "none") {
417
418	if ( -d "$tmpdir/tarfiles") {
419	# Tarfiles have to exist for the creation to work
420	print "copying tarfiles to $tb\n";
421	&ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) \|\|
422	return 0;
423	}
424
425	if ( -d "$tmpdir/rpms") {
426	&ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) \|\|
427	return 0;
428	}
429
430	print "Creating $eid on $tb\n" if $verbose;
431	&ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " .
432	"$pid -e $eid $tclfile", "startexp") \|\| return 0;
433	# After startexp succeeds, the per-experiment directories exist on the
434	# remote testbed.
435	print "Transferring federation support files to $tb\n" if $verbose;
436	# First copy new scripts and hostinfo into the remote /proj
437	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) \|\|
438	return 0;
439	# Copy the virtual topology out as well
440	&scp_file("$tmpdir/vtopo.xml", $user, $host, "$proj_dir/vtopo.xml") \|\|
441	return 0;
442	&scp_file("$tmpdir/viz.xml", $user, $host, "$proj_dir/viz.xml") \|\|
443	return 0;
444	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
445	&ship_scripts($host, $user, $proj_dir) \|\| return 0;
446	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) \|\| return 0;
447	# Now start up
448	print "Swapping $eid in on $tb\n" if $verbose;
449	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in",
450	"swapexp", $timeout) \|\| return 0;
451	return 1;
452	}
453
454	# Every branch for a known state returns. If execution gets here, the
455	# state is unknown.
456	warn "unknown state: $state\n";
457	return 0;
458	}
459
460	# Swap out a sub-experiment - probably because another has failed. Arguments
461	# are testbed and experiment. Most of the control flow is similar to
462	# start_segment, though much simpler.
463	sub stop_segment {
464	my($tb, $eid, $tbparams) = @_; # testbed, experiment ID and
465	# per-testbed parameters
466	my $user = $tbparams->{$tb}->{'user'}; # testbed user
467	my $host = # Ops node
468	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
469	my $pid = $tbparams->{$tb}->{'project'};# testbed project
470
471	print "Stopping $eid on $tb\n" if $verbose;
472	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out",
473	"swapexp (out)") \|\| return 0;
474	return 1;
475	}
476
477	# Fill tbparams with results from the fedd call. The command is passed in and
478	# a string with any relevant error conditions is returned. undef is success.
479	sub fedd_access_request{
480	my($cmd) = @_;
481	my($rv)=undef;
482
483	system("$cmd 2> /tmp/splitter.err.$$ > /tmp/splitter.$$" );
484
485	if ( ! $? ) {
486	&parse_testbeds_filename("/tmp/splitter.$$", $tbparams) \|\|
487	($rv = "Error reading fedd output: $!\n");
488	}
489	else {
490	my $f = new IO::File("/tmp/splitter.err.$$");
491	$rv = "Fedd_client error:\n";
492	while (<$f>) { $rv .= $_; }
493	$f->close();
494	}
495	unlink("/tmp/splitter.$$", "/tmp/splitter.err.$$");
496	return $rv;
497	}
498
499	# Generate visualization info from the topo file.
500	sub genviz {
501	my($file, $outfile)= @_;
502	my %nodes;
503	my $chars;
504	my $in_node;
505	my $in_lan;
506	my $lan;
507	my %links;
508	my %lans;
509	my $rv;
510	my $dotfile = "/tmp/split$$.dot";
511	my $neato = "/usr/local/bin/neato";
512	my $g = new IO::File(">$dotfile") \|\| return;
513	my $p = new IO::Pipe() \|\| return;
514	my $out = new IO::File(">$outfile") \|\| die "open $!\n";
515
516	sub start_element {
517	my($expat, $element) = @_;
518	$in_node++ if $element eq "node";
519	if ( $element eq "lan" ) {
520	$in_lan++;
521	$lan = {};
522	}
523	}
524
525	sub end_element {
526	my($expat, $element) = @_;
527
528	$in_node = 0 if $element eq "node";
529	$nodes{$chars} = "node" if $in_node && $element eq "vname";
530	if ($in_lan) {
531	if ( $element ne "lan") {
532	$lan->{$element} = $chars if $element =~/(vname\|vnode)/;
533	}
534	else {
535	$in_lan = 0;
536	my $vname = $lan->{'vname'};
537	if ( $links{$vname} && @{$links{$vname}} ==2 ) {
538	# this link needs to be a lan
539	$nodes{$vname} = "lan";
540	$lans{$lan->{'vname'}} = ();
541	foreach my $l (@{$links{$vname}}) {
542	push(@{$lans{$vname}}, $l);
543	}
544	push(@{$lans{$vname}}, $lan->{'vnode'});
545	delete $links{$vname};
546	$lan={};
547	return;
548	}
549	if ( $lans{$vname} && @{$lans{$vname}}) {
550	push(@{$lans{$vname}}, $lan->{'vnode'});
551	$lan = {};
552	return;
553	}
554	$links{$vname} = () unless $links{$vname};
555	push(@{$links{$vname}}, $lan->{'vnode'});
556	$lan = {};
557	return;
558	}
559	}
560	}
561	sub found_chars {
562	my($expat, $data) = @_;
563	$chars = $data;
564	}
565
566	my $parser = new XML::Parser(Handlers => {
567	Start => \&start_element,
568	End => \&end_element,
569	Char => \&found_chars
570	});
571
572	print "$file\n";
573	$parser->parsefile($file);
574
575	print $g "graph G {\n";
576	foreach my $n (keys %nodes) {
577	print $g "\t\"$n\"\n";
578	}
579	foreach my $l (keys %links) {
580	print $g "\t", join(" -- ", @{$links{$l}}), "\n";
581	}
582	foreach my $l (keys %lans) {
583	foreach my $n (@{$lans{$l}}) {
584	print $g "\t \"$n\" -- \"$l\"\n";
585	}
586	}
587	print $g "}\n";
588	$g->close();
589	$p->reader("$neato -Gstart=rand -Gepsilon=0.005 -Gmaxiter=20000 " .
590	"-Gpack=true $dotfile");
591	print $out "<vis>\n";
592	while (<$p>) {
593	/^\s"?([\w\-]+)"?\s+\[.pos=\"(\d+),(\d+)\"/ && do {
594	my ($n, $x, $y) = ($1, $2, $3);
595
596	print $out "<node><name>$n</name><x>$x</x><y>$y</y><type>" .
597	"$nodes{$n}</type></node>\n" if $nodes{$n};
598	};
599	}
600	print $out "</vis>\n";
601	$p->close();
602	unlink("$dotfile");
603	}
604
605	$pid = $gid = "dummy"; # Default project and group to pass to
606	# $tcl_splitter above. These are total
607	# dummy arguments; the splitter doesn't
608	# use them at all, but we supply them to
609	# keep our changes to the parser minimal.
610	# Argument processing.
611	getopts('Ft:c:p:f:ndvNP:', \%opts);
612	$splitter_config = $opts{'c'} \|\| "./splitter.conf";
613	$debug = $opts{'d'};
614	$verbose = $opts{'v'} \|\| $opts{'d'};
615
616	&parse_config("$splitter_config", \%opts) \|\|
617	die "Cannot read config file $splitter_config: $!\n";
618
619	warn "-N does nothing now. Only one testbeds format supported.\n"
620	if $opts{'N'};
621	$fail_soft = $opts{'F'} \|\| $opts{'failsoft'};
622	$startem = $opts{'n'} ? 0 : 1; # If true, start the sub-experiments
623	$timeout = $opts{'t'} \|\| $opts{'timeout'};
624	$eid = $opts{'experiment'}; # Experiment ID
625	$tcl = $opts{'f'} \|\| shift; # The experiment description
626	$master = $opts{'master'}; # Master testbed
627	$tmpdir = $opts{'tmpdir'} \|\| $opts{'tempdir'}\|\| "/tmp"; # tmp files
628	$tb_config = $opts{'testbeds'} \|\| "./testbeds"; # testbed configurations
629	$local_script_dir = $opts{'scriptdir'}; # Local scripts
630	$muxmax = $opts{'muxlimit'} \|\| 3; # Number of connections muxed on one
631	# gateway
632
633	$max_children = $opts{'p'} \|\| $opts{'maxchildren'}
634	if $opts{'p'} \|\| $opts{'maxchildren'};
635
636	$smb_share = $opts{'smbshare'} \|\| # Share to mount from the master
637	die "Must give an SMB share\n";
638	$project_user = $opts{'smbuser'} \|\| # User to mount project dirs as
639	die "Must give an SMB user\n";
640	$auth_proj = $opts{'P'};
641
642	# tcl program to split experiments (changed during devel)
643	$tcl_splitter = $opts{'tclparse'} \|\| "/usr/testbed/lib/ns2ir/parse.tcl";
644	# tclsh to call directly (changed during devel)
645	$tclsh = $opts{'tclsh'} \|\| "/usr/local/bin/otclsh";
646	# fedd_client to get testbed access parameters
647	$fedd_client = $opts{'feddclient'} \|\| "fedd_client";
648
649	# Prefix to avoid collisions
650	$tmpdir .= "/split$$";
651
652	print "Temp files are in $tmpdir\n" if $verbose;
653	# Create a workspace
654	unless (-d "$tmpdir") {
655	mkdir("$tmpdir") \|\| die "Can't create $tmpdir: $!";
656	}
657
658	# If the keys are given, use them. Otherwise create a set under $tmpdir
659
660	if ( $opts{'gatewatpubkey'} && $opts{'gatewaysecretkey'}) {
661	$gw_pubkey = $opts{'gatewaypubkey'};
662	$gw_secretkey = $opts{'gatewaysecretkey'};
663	}
664	else {
665	$keytype = $opts{'gatewaykeytype'} \|\| "rsa";
666	mkdir("$tmpdir/keys") \|\| die "Can't create temoprary key dir: $!\n";
667	$gw_pubkey = "$tmpdir/keys/fed.$keytype.pub";
668	$gw_secretkey = "$tmpdir/keys/fed.$keytype";
669	print "Generating $keytype keys\n" if $verbose;
670	generate_ssh_keys($keytype, $gw_secretkey) \|\|
671	die "Cannot generate kets:$@\n";
672	}
673	# Generate the basenames
674	($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
675	($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
676
677
678
679	# Validate scripts directory
680	for my $s (@scripts) {
681	die "$local_script_dir/$s not in local script directory. Try -d\n"
682	unless -r "$local_script_dir/$s";
683	}
684
685	die "Must supply file, master and experiment" unless $master && $tcl && $eid;
686
687	&parse_testbeds_filename($tb_config, $tbparams) \|\|
688	die "Cannot testbed congfigurations from $tb_config: $!\n";
689
690	# Open a pipe to the splitter program and start it parsing the experiments
691	my $pipe = new IO::Pipe;
692	# NB no more -p call on parse call.
693	$pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl") \|\|
694	die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n";
695
696	# Parsing variables
697	my $ctb; # Current testbed
698	my %allocated; # If allocated{$tb} > 0, $tb is in use
699	my $destfile; # File that the sub-experiment tcl file is
700	# being written to, or "" if none. Also used
701	# for hostnames file.
702	my $desthandle; # File handle for distfile
703	my $gateways; # when gateway lists are being processed this
704	# is the testbed whose gateways are being
705	# gathered.
706	my $control_gateway; # Control net gateway for the current testbed
707	my %active_end; # If active_end{"a-b"} > 0 then a is the active
708	# end of the a <-> b connector pair.
709	my $vtopo; # IO::File for virtual topology info
710
711	# Parse the splitter output. This loop creates the sub experiments, gateway
712	# configurations and hostnames file
713	while (<$pipe>) {
714	# Vtopo is virtual topology about the entire experiment. Copy it to the
715	# $tmpdir for distribution far and wide.
716	(/^#\s+Begin\s+Vtopo/../^#\s+End\s+Vtopo/) && do {
717	/^#\s+Begin/ && do {
718	$vtopo = new IO::File(">$tmpdir/vtopo.xml");
719	warn "Can't open $tmpdir/vtopo.xml:$!\n" unless $vtopo;
720	next;
721	};
722	/^#\s+End/ && do {
723	$vtopo->close() if $vtopo;
724	undef $vtopo;
725	genviz("$tmpdir/vtopo.xml", "$tmpdir/viz.xml");
726	next;
727	};
728	print $vtopo $_ if $vtopo;
729	next;
730	};
731	# Allbeds lists all the testbeds that this experiment accesses. This code
732	# acquires access to them and pulls in their access parameters from fedd.
733	(/^#\s+Begin\s+Allbeds/../^#\s+End\s+Allbeds/) && do {
734	next if /^#/;
735	chomp;
736
737	my $tb; # Current testbed
738	my @nodes; # Current testbed node requests
739
740	# The Allbeds line has the testbed name first separated by the node
741	# requirements of the testbeds. A node requirement is separated form
742	# teh testbed name and other node requirements by a vertical bar (\|).
743	# This pulls the testbed off the front (which must be present) and
744	# splits the node descriptors out by the vertical bar. The first
745	# vertical bar (the one after the testbed) is removed by the intial
746	# regular expression to avoid a null entry in @nodes. The node
747	# requests are of the form image:type:count and can be passed directly
748	# to fedd_client as parameters.
749	/([^\|]+)\\|?(.*)/ && do {
750	my $n; # Scratch
751
752	($tb , $n) = ($1, $2);
753	@nodes = split(/\\|/, $n);
754	};
755
756	# If this testbed has not had its access parameters read from fedd, try
757	# to read them, if we have a way to talk to fedd
758	unless ($tbparams->{$tb}->{'access'} \|\| !$fedd_client) {
759	my $access_pipe = new IO::Pipe \|\|
760	die "Can't open pipe to fedd:$!\n";
761	my $proj = $auth_proj ? " -p $auth_proj " : "";
762	my @cmds;
763	my $rv;
764
765	print("Checking access to $tb using " . $tbparams->{$tb}->{'uri'}
766	. "\n") if $verbose;
767
768	# First access command, implicitly uses localhost fedd
769	push(@cmds,"$fedd_client -t " .
770	$tbparams->{$tb}->{'uri'} . " -T $ENV{HOME}/cacert.pem ".
771	"-l $tb $proj" . (@nodes ? " -n " : " ") .
772	join(" -n ", @nodes));
773	# Second try access command, implicitly directly contact testbed
774	push(@cmds,"$fedd_client -t " .
775	$tbparams->{$tb}->{'uri'} . " -u " .
776	$tbparams->{$tb}->{'uri'} . " -T $ENV{HOME}/cacert.pem ".
777	"-l $tb $proj" . (@nodes ? " -n " : " ") .
778	join(" -n ", @nodes));
779	# Third try access command, implicitly directly contact testbed
780	# using only federated id.
781	push(@cmds,"$fedd_client -f -a -t " .
782	$tbparams->{$tb}->{'uri'} . " -u " .
783	$tbparams->{$tb}->{'uri'} . " -T $ENV{HOME}/cacert.pem ".
784	"-l $tb $proj" . (@nodes ? " -n " : " ") .
785	join(" -n ", @nodes));
786
787	foreach my $c (@cmds) {
788	print "$c\n" if $verbose;
789	$rv = &fedd_access_request($c);
790	warn($rv) if $rv;
791
792	last if $rv eq undef;
793	}
794	die "Cannot get access to $tb\n" if $rv;
795	}
796	next;
797	};
798
799	# Start of a sub-experiment
800	/^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
801	$ctb = $1;
802
803	# If we know the testbed, start collecting its sub experiment tcl
804	# description. If not, warn the user.
805	if ($tbparams->{$ctb}->{'access'}) {
806	$allocated{$ctb}++; # Keep track of the testbeds allocated
807
808	unless (-d "$tmpdir/$ctb") {
809	mkdir("$tmpdir/$ctb") \|\| die "Can't create $tmpdir/$ctb: $!";
810	}
811	$destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
812
813	$desthandle = new IO::File(">$destfile") \|\|
814	die "Cannot open $destfile:$!\n";
815	}
816	else{
817	warn "No such testbed $ctb\n";
818	$destfile = "";
819	}
820	next;
821	};
822
823	# End of that experiment
824	/^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
825	# Simple syntax check and close out this experiment's tcl description
826	die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
827	$desthandle->close() if $desthandle;
828	$destfile = $ctb = "";
829	next;
830	};
831
832	# Beginning of a gateway set
833	/^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
834	$gateways = $1;
835	# If we've heard of this tb, create the config lines for it one at a
836	# time.
837	if ($allocated{$gateways}) {
838	# Just in case. This directory should already have been created
839	# above.
840	unless (-d "$tmpdir/$gateways") {
841	mkdir("$tmpdir/$gateways") \|\|
842	die "Can't create $tmpdir/$gateways: $!";
843	}
844	}
845	else {
846	warn "Gateways given (and ignored) for testbed not in use: " .
847	"$gateways\n";
848	$gateways = 0;
849	}
850	next;
851	};
852	# End of the gateways section. Output the client config for this testbed
853	/^#\s+End\s+gateways\s+\((\w+)\)/ && do {
854	die "Mismatched gateway markers ($1, $gateways)\n"
855	unless !$gateways \|\| $gateways == $1;
856
857	if ($control_gateway ) {
858	# Client config
859	my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
860	my $master_project = $tbparams->{$master}->{'project'};
861	die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
862	print $cc "ControlGateway: $control_gateway\n";
863	print $cc "SMBShare: $smb_share\n";
864	print $cc "ProjectUser: $project_user\n";
865	print $cc "ProjectName: $master_project\n";
866	$cc->close();
867	}
868	else { warn "No control gateway for $gateways?\n"; }
869
870	$gateways = 0;
871	next;
872	};
873	# Beginning of the hostnames list. Collection is always in the hostnames
874	# file.
875	/^#\s+Begin\s+hostnames/ && do {
876	$destfile = "$tmpdir/hostnames";
877	$desthandle = new IO::File(">$destfile") \|\|
878	die "Can't open $destfile:$!\n";
879	next;
880	};
881	# end of the hostnames list.
882	/^#\s+End\s+hostnames/ && do {
883	$desthandle->close();
884	$destfile = "";
885	next;
886	};
887
888	# Generate gateway configuration info, one file per line
889	$gateways && do {
890	chomp;
891	my($dtb, $myname, $desthost, $type) = split(" ", $_);
892
893	# Many of these are to simplify print statements
894	my $sdomain = # domain for the source
895	$tbparams->{$gateways}->{'domain'};
896	my $ddomain = # domain for the destination
897	$tbparams->{$dtb}->{'domain'};
898	my $sproject = # Project of the source
899	$tbparams->{$gateways}->{'project'};
900	my $dproject = # Project of the destination
901	$tbparams->{$dtb}->{'project'};
902	my $fs = # Master fs node (FQDN)
903	$tbparams->{$master}->{'fs'} . $tbparams->{$master}->{'domain'};
904	my $boss = # Master boss node (FQDN)
905	$tbparams->{$master}->{'boss'} . $tbparams->{$master}->{'domain'};
906	my $event_server = # Master event-server (FQDN)
907	$tbparams->{$master}->{'eventserver'} .
908	$tbparams->{$master}->{'domain'};
909	my $remote_event_server = # Slave event-server (FQDN)
910	$tbparams->{$dtb}->{'eventserver'} .
911	$tbparams->{$dtb}->{'domain'};
912	my $remote_script_dir = # Remote fed script location
913	"/proj/" . $dproject . "/exp/$eid/tmp";
914	my $local_script_dir = # Local fed script location
915	"/proj/" . $sproject . "/exp/$eid/tmp";
916	my $active; # Is this the active side of
917	# the connector?
918	my $tunnel_cfg = # Use DETER's config stuff
919	$tbparams->{$gateways}->{'tun'} \|\| "false";
920
921
922	$sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
923	$ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
924
925	my $conf_file = "$myname$sdomain.gw.conf";
926	my $remote_conf_file = "$desthost$ddomain.gw.conf";
927	# translate to lower case so the `hostname` hack for specifying
928	# configuration files works.
929	$conf_file =~ tr/A-Z/a-z/;
930	$remote_conf_file =~ tr/A-Z/a-z/;
931
932	# If either end of this link is in the master side of the testbed, that
933	# side is the active end. Otherwise the first testbed encountered in
934	# the file will be the active end. The $active_end variable keeps
935	# track of those decisions
936	if ( $dtb eq $master ) { $active = "false"; }
937	elsif ($gateways eq $master ) { $active = "true"; }
938	elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
939	else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
940
941	# This is used to create the client configuration.
942	$control_gateway = "$myname$sdomain"
943	if $type =~ /(control\|both)/;
944
945	# Write out the file
946	my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")\|\|
947	die "can't open $tmpdir/$gateways/$conf_file: $!\n";
948
949	print $gwconfig "Active: $active\n";
950	print $gwconfig "TunnelCfg: $tunnel_cfg\n";
951	print $gwconfig "BossName: $boss\n";
952	print $gwconfig "FsName: $fs\n";
953	print $gwconfig "EventServerName: $event_server\n";
954	print $gwconfig "RemoteEventServerName: $remote_event_server\n";
955	print $gwconfig "Type: $type\n";
956	print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
957	print $gwconfig "EventRepeater: $local_script_dir/fed_evrepeater\n";
958	print $gwconfig "RemoteExperiment: $dproject/$eid\n";
959	print $gwconfig "LocalExperiment: $sproject/$eid\n";
960	print $gwconfig "RemoteConfigFile: " .
961	"$remote_script_dir/$remote_conf_file\n";
962	print $gwconfig "Peer: $desthost$ddomain\n";
963	print $gwconfig "Pubkeys: " .
964	"/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
965	print $gwconfig "Privkeys: " .
966	"/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
967	$gwconfig->close();
968
969	# This testbed has a gateway (most will) so make a copy of the keys it
970	# needs in this testbed's subdirectory. start_segment will transfer
971	# them.
972	unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
973	copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") \|\|
974	die "Can't copy pubkeys ($gw_pubkey to " .
975	"$tmpdir/$gateways/$gw_pubkey_base): $!\n";
976	}
977	if ($active eq "true" ) {
978	unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
979	copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") \|\|
980	die "Can't copy secret keys ($gw_secretkey to " .
981	"$tmpdir/$gateways/$gw_secretkey_base): $!\n";
982	}
983	}
984
985	#done processing gateway entry, ready for next line
986	next;
987	};
988	(/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
989	next if /^#/;
990	chomp;
991	push(@tarfiles, $_);
992	next;
993	};
994	(/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
995	next if /^#/;
996	chomp;
997	push(@rpms, $_);
998	next;
999	};
1000
1001	next unless $destfile; # Unidentified testbed, ignore config
1002	# local copies that can be used in the substitutions below
1003	my $gwtype = $tbparams->{$ctb}->{'gwtype'} \|\| $def_gwtype;
1004	my $gwimage = $tbparams->{$ctb}->{'gwimage'} \|\| $def_gwimage;
1005	my $mgwstart = $tbparams->{$ctb}->{'mgwstart'} \|\| $def_mgwstart;
1006	my $mexpstart = $tbparams->{$ctb}->{'mexpstart'} \|\| $def_mexpstart;
1007	my $gwstart = $tbparams->{$ctb}->{'gwstart'} \|\| $def_gwstart;
1008	my $expstart = $tbparams->{$ctb}->{'expstart'} \|\| $def_expstart;
1009	my $project = $tbparams->{$ctb}->{'project'};
1010
1011	# Substitute variables
1012	s/GWTYPE/$gwtype/g;
1013	s/GWIMAGE/$gwimage/g;
1014	if ($ctb eq $master ) {
1015	s/GWSTART/$mgwstart/g;
1016	s/EXPSTART/$mexpstart/g;
1017	}
1018	else {
1019	s/GWSTART/$gwstart/g;
1020	s/EXPSTART/$expstart/g;
1021	}
1022	# XXX: oh is this bad
1023	s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
1024	s#PROJDIR#/proj/$project/#g;
1025	s#EID#$eid#g;
1026	s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
1027	print $desthandle $_;
1028	}
1029	$pipe->close();
1030	die "No nodes in master testbed ($master)\n" unless $allocated{$master};
1031
1032	# Copy tarfiles and rpms needed at remote sites to the staging directories.
1033	# Start_segment will distribute them
1034	for my $t (@tarfiles) {
1035	die "tarfile '$t' unreadable: $!\n" unless -r $t;
1036	unless (-d "$tmpdir/tarfiles") {
1037	mkdir("$tmpdir/tarfiles") \|\|
1038	die "Can't create $tmpdir/tarfiles:$!\n";
1039	}
1040	copy($t, "$tmpdir/tarfiles") \|\|
1041	die "Can't copy $t to $tmpdir/tarfiles:$!\n";
1042	}
1043
1044	for my $r (@rpms) {
1045	die "rpm '$r' unreadable: $!\n" unless -r $r;
1046	unless (-d "$tmpdir/rpms") {
1047	mkdir("$tmpdir/rpms") \|\|
1048	die "Can't create $tmpdir/rpms:$!\n";
1049	}
1050	copy($r, "$tmpdir/rpms") \|\|
1051	die "Can't copy $r to $tmpdir/rpms:$!\n";
1052	}
1053
1054	exit(0) unless $startem;
1055
1056	my %started; # If $started{$tb} then $tb successfully started
1057	my %child; # If $child{$pid} then a process with that pid is
1058	# working on a starting a segment
1059	my $nworking = 0; # Number of children working on swapin
1060	my $pid; # Scratch variable for pids
1061
1062	# Start up the slave sub-experiments first
1063	TESTBED:
1064	for my $tb (keys %allocated) {
1065	if ( $tb ne $master ) {
1066	while ( $nworking == $max_children ) {
1067	print "Waiting for a child process to complete\n" if $verbose;
1068	if (($pid = wait()) != -1 ) {
1069	# The $? >> 8 is the exit code of the subprocess, which is
1070	# non-zero if the &start_segment routine failed.
1071	my $exit_code = ($? >> 8);
1072
1073	print "Child $pid completed exit code ($exit_code)\n"
1074	if $verbose;
1075	$nworking--;
1076	$started{$child{$pid}}++ unless $exit_code;
1077	if ($child{$pid} ) { delete $child{$pid}; }
1078	else { warn "Reaped a pid we did not start?? ($pid)\n"; }
1079	last TESTBED if $exit_code;
1080	}
1081	else { warn "wait returned without reaping: $!\n"; }
1082	}
1083	if ( $pid = fork() ) {
1084	# Parent process
1085	$nworking ++;
1086	$child{$pid} = $tb;
1087	print "Started process $pid to start testbed $tb\n"
1088	if $verbose;
1089	}
1090	else {
1091	# Child. Note that we reverse the sense of the return code when it
1092	# becomes an exit value. Zero exit values indicate success.
1093	exit(!&start_segment($tb, $eid, $tbparams, $timeout));
1094	}
1095	}
1096	}
1097
1098	# Now wait for any still running processes.
1099	while ( $nworking ) {
1100	print "Waiting for a child process to complete ($nworking running)\n"
1101	if $verbose;
1102	if (($pid = wait()) != -1 ) {
1103	# The $? >> 8 is the exit code of the subprocess, which is
1104	# non-zero if the &start_segment routine failed.
1105	my $exit_code = ($? >> 8);
1106
1107	print "Child $pid completed exit code ($exit_code)\n"
1108	if $verbose;
1109	$nworking--;
1110	$started{$child{$pid}}++ unless $exit_code;
1111	if ($child{$pid} ) { delete $child{$pid}; }
1112	else { warn "Reaped a pid we did not start?? ($pid)\n"; }
1113	}
1114	else { warn "wait returned without reaping: $!\n"; }
1115	}
1116
1117	# Now the master
1118	if (&start_segment($master, $eid, $tbparams, $timeout)) {
1119	$started{$master}++;
1120	}
1121
1122	# If any testbed failed, swap the rest out.
1123	if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
1124	for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
1125	print "Error starting experiment\n";
1126	exit(1);
1127	}
1128	print "Experiment started\n";
1129	print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
1130	system("rm -rf $tmpdir") unless $debug;
1131	exit(0); # set the exit value
1132
1133	=pod
1134
1135	=head1 NAME
1136
1137	B<splitter.pl>
1138
1139	=head1 SYNOPSIS
1140
1141	B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
1142	[B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]
1143
1144	=head1 DESCRIPTION
1145
1146	B<splitter.pl> invokes the DETER experiment parser to split an annotated
1147	experiment into multiple sub-experments and instantiates the sub-experiments on
1148	their intended testbeds. Annotation is accomplished using the
1149	tb-set-node-testbed command, added to the parser.
1150
1151	Much of the script's behavior depends on the configuration file, specified with
1152	the B<-c> flag and defaulting to F<./splitter.conf>.
1153
1154	The testbed labels supplied in the B<tb-set-node-testbed> command are
1155	meaningful based on their presence in the testbeds file. that file can be
1156	specified in the configuration file using the B<Testbeds> directive, and
1157	defaults to F<./testbeds>. The syntax is described below.
1158
1159	Most of the intermediate files are staged in a sub-directory of a temporary
1160	files directory and deleted at the end of the script. Specifying the B<-d>
1161	flag on the command line avoids the deletion for debbugging. By default the
1162	temporary files directory is directory is F</tmp> and can be reset in the
1163	configuration file using the B<Tmpdir> directive. Intermediate files are
1164	stored under a subdirectory formed by adding the process ID of the splitter
1165	process. For example, if the temporary files directory is F</tmp> and the
1166	B<splitter.pl> process ID is 2323, the temporary files will be stored in
1167	F</tmp/split2323/>.
1168
1169	The expreriment is split out into one experiment description per testbed in the
1170	temporary directory named as F<experiment.testbed.tcl> where the experiment is
1171	the experiment ID given in the configuration file, and the testbed is the
1172	tb-set-node-testbed parameter for the nodes in the file.
1173
1174	If the B<-n> option is absent the sub-experiments are then instantiated on
1175	their testbeds. (Here B<-n> is analogous to its use in L<make(1)>).
1176	Per-testbed parameters are set in the testbeds file. Sub-experiments on
1177	slave testbeds are instantiated in a random order, but the master testbed is
1178	currently instantiated last.
1179
1180	Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
1181	and the maximum number of simultaneous processes to start.
1182
1183	Scripts to start federation (the federation kit) are copied into the local
1184	experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>. These are
1185	taken from the directory given by the B<ScriptDir> directive in the
1186	configuration file.
1187
1188	If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
1189	F<splitter.conf>.
1190
1191	If any sub-experiment fails to instantiate, the other sub-exeriments are
1192	swapped out. B<-F> avoids this swap out, which can also be specified as
1193	B<SoftFail: true> in F<splitter.conf>
1194
1195	=head2 Configuration File
1196
1197	The configuration file is a simple set of colon-separated parameters and
1198	values. A configuration file must be present, either specified in the B<-c>
1199	flag or the default F<./splitter.conf>. All the parameter names are case
1200	insensitive, but should not include any whitespace. Parameter values may
1201	include whitespace, but no newlines.
1202
1203	Possible parameters are:
1204
1205	=over 5
1206
1207	=item Experiment
1208
1209	The name of the experiment on the various testbeds
1210
1211	=item Master
1212
1213	The master testbed label from the testbeds file, described below.
1214
1215	=item Testbeds
1216
1217	The testbeds file described below, giving per-testbed parameters. If this
1218	directive is absent the testbeds file defaults to F<./testbeds>
1219
1220	=item ScriptDir
1221
1222	Location of the default federation scripts, i.e. the federation kit.
1223
1224	=item GatewayPubkey
1225
1226	=item GatewaySecretKey
1227
1228	The names of the files containing secret and public keys to use in setting up
1229	tunnels between testbeds. If given they are used, otherwise keys are generated.
1230
1231	=item GatewayKeyType
1232
1233	This controls the kind of SSH keys generated to configure the geatways. If
1234	given this must be B<dsa> or B<rsa>, and it defaults to B<rsa>. The parameter
1235	is csase insensitive.
1236
1237	=item TmpDir
1238
1239	=item TempDir
1240
1241	The directory where temporary files are created. These are synonyms, but
1242	should both be specified, B<TmpDir> has priority. If neither is specified,
1243	F</tmp> is used.
1244
1245	=item SMBShare
1246
1247	The SMB share on the master testbed that will be exported to remote clients.
1248
1249	=item SMBUser
1250
1251	The experiment user to mount project directories as. This user needs to be a
1252	member of the exported experiment - that is one of the users in the project
1253	containing this experiment on the master testbed.
1254
1255	=item Timeout
1256
1257	Value in seconds after which a swap-in operatioin will be considered a success.
1258	Often long swap-ins will hang when there are partial failures. This works
1259	around this issue. (This behavior can be requested on the command line by
1260	specifying B<-t> I<secs>.)
1261
1262	=item FailSoft
1263
1264	If not set, failure of any sub experiment swaps the rest out. Setting this to
1265	any value avoids this swap out. (This behavior can be requested on the command
1266	line by specifying B<-F>.)
1267
1268	=item MuxLimit
1269
1270	The maximum bumber of links/lans carried by one gateway pair
1271
1272	=item Tclparse
1273
1274	The pathname to the experiment parsing program. Only developers should set
1275	this.
1276
1277	=item Tclsh
1278
1279	The pathname to the local oTcl shell. Only developers should set
1280	this.
1281
1282	=back
1283
1284	=head2 Testbeds file
1285
1286	The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
1287	scoped attribute-value pairs where each attribute is specified on a separate
1288	line of the configuration file. Each testbed's parameters are preceeded by the
1289	testbed label in brackets ([]) on a line by itself. After that the parameters
1290	are specified as parameter: value. This is essentially the same format as the
1291	configuration file. Parameters are:
1292
1293	=over 4
1294
1295	=item User
1296
1297	The user under which to make requests to this testbed. The user running
1298	B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
1299	testbed.
1300
1301	=item OpsNode
1302
1303	The host name of the testbed's ops node. The user calling B<splitter.pl> must
1304	be able to execute commands on this host via L<ssh(1)>.
1305
1306	=item Domain
1307
1308	The domain of nodes in this testbed (including the ops host). This parameter
1309	should always start with a period.
1310
1311	=item Project
1312
1313	The project under which to instantiate sub-experiments on this testbed.
1314
1315	=item ConnectorType
1316
1317	The node type for inter-testbed connector nodes on this testbed.
1318
1319	=item SlaveNodeStartCmd
1320
1321	The start command to run on experimental nodes when this testbed is used as a
1322	slave. In all the start commands the following string substitutions are made:
1323
1324	=over 10
1325
1326	=item FEDDIR
1327
1328	The local experiment's federation scripts directory. Each local experiment
1329	will have this replaced by the scripts directory on its local boss.
1330
1331	=item GWCONF
1332
1333	The full pathname of the gateway configuration file. As with FEDDIR, this is
1334	on the local boss.
1335
1336	=item PROJDIR
1337
1338	The project directory on the local boss.
1339
1340	=item EID
1341
1342	The local experiment name.
1343
1344	=back
1345
1346	All startcmds specified in F<testbeds> undergo these expansions.
1347
1348	=item SlaveConnectorStartCmd
1349
1350	The start command to run on gateway nodes when this testbed is used as a slave.
1351	The same string substitutions are made in this command as in SlaveNodeStartCmd.
1352
1353	=item MasterNodeStartCmd
1354
1355	The start command to run on experimental nodes when this testbed is used as a
1356	master. The same string substitutions are made in this command as in
1357	SlaveNodeStartCmd.
1358
1359	=item MasterConnectorStartCmd
1360
1361	The start command to run on gateway nodes when this testbed is used as a
1362	master. The same string substitutions are made in this command as in
1363	SlaveNodeStartCmd.
1364
1365	=item ConnectorImage
1366
1367	The disk image to be loaded on a gateway node on this testbed.
1368
1369	=item FileServer
1370
1371	The node in the master testbed from which filesystems are mounted.
1372
1373	=item Boss
1374
1375	The node in the master testbed that controls the testbed.
1376
1377	=item TunnelCfg
1378
1379	True if the connector needs to do DETER federation. This parameter will
1380	probably be removed.
1381
1382
1383	=back
1384
1385	=head1 ENVIRONMENT
1386
1387	B<splitter.pl> does not directly make use of environment variables, but calls
1388	out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1389	environment.
1390
1391	=head1 BUGS
1392
1393	A deprecated B<-N> flag was used to select testbeds file format. Only one
1394	format is supported now, and B<-N> generates a warning, but otherwise does not
1395	affect B<splitter.pl>.
1396
1397	=head1 SEE ALSO
1398
1399	L<sh(1)>, L<ssh(1)>
1400
1401	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: