source: fedkit/splitter.pl @ e2a71ebe

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since e2a71ebe was e2a71ebe, checked in by Ted Faber <faber@…>, 17 years ago

remote rpms

  • Property mode set to 100644
File size: 36.5 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4
5use Getopt::Std;
6use IO::File;
7use IO::Dir;
8use IO::Pipe;
9use File::Copy;
10
11my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12my $local_script_dir = ".";
13my($pid, $gid);                 # Process and group IDs for calling parse.tcl
14my $splitter_config;            # Configuration file
15my $debug;                      # True if thecalled in debug mode
16my $verbose;                    # True for extra progress reports
17my $startem;                    # If true, start the sub-experiments
18my $eid;                        # Experiment ID
19my $tcl;                        # The experiment description (topology file)
20my $master;                     # Master testbed
21my $tmpdir;                     # tmp files
22my $tb_config;                  # testbed configurations
23my $smb_share;                  # Share to mount from the master
24my $project_user;               # User to mount project dirs as
25my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27                                # basename)
28my $tcl_splitter;               # tcl program to split experiments
29                                # (changed during devel)
30my $tclsh;                      # tclsh to call directly (changed during devel)
31my @tarfiles;                   # Tarfiles in use by this experiment
32my @rpms;                       # Rpms in use by this experiment
33my $timeout;                    # The timeout to use for experiment swap ins
34my %opts;                       # Parsed options
35
36my $tbparams = {};              # Map of the per-testbed parameters from the
37                                # testbeds file.  It is a reference to a hash
38                                # of hashes (because it's passed around a bunch
39                                # and it's nicer to have one access pattern
40                                # throughout the script, in the main loop and
41                                # the subroutines).  That access is exemplified
42                                # by  $tbparams->{'deter'}->{'domain'} which is
43                                # the domain parameter of the DETER testbed. 
44my $fail_soft;                  # Do not swap failed sub-experiments out
45
46# Parse the config file.  The format is a colon-separated parameter name
47# followed by the value of that parameter to the end of the line.  This parses
48# that format and puts the parameters into the referenced hash.  Parameter
49# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
50# failure (e.g. file open) and 1 on success.
51sub parse_config {
52    my($file, $href) = @_;
53    my $fh = new IO::File($file);
54       
55    unless ($fh) {
56        warn "Can't open $file: $!\n";
57        return 0;
58    }
59
60    while (<$fh>) {
61        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
62        chomp;
63        /^([^:]+):\s*(.*)/ && do {
64            my $key = $1; 
65
66            $key =~ tr/A-Z/a-z/;
67            $href->{$key} = $2;
68            next;
69        };
70        warn "Unparasble line in $file: $_\n";
71    }
72    $fh->close();   # It will close when it goes out of scope, but...
73    return 1;
74}
75
76# Parse an easier-to-read testbeds file (the original was comma-separated
77# unadorned strings).  The format is a testbed scope as [testbed] followed by
78# the colon-separated attribute-value pairs for the testbed.  Right now these
79# go into a set of global hashes indexed by testbed, but that should probably
80# change.
81
82sub parse_testbeds {
83    my($file, $tbparams) = @_;      # Testbeds file and parameter hash
84    my $fh = new IO::File($file);  # Testbeds filehandle
85    my $tb;                         # Current testbed
86    # Convert attribute in the file to tbparams hash key
87    my %attr_to_hash = (
88        "opsnode" => "host",
89        "user" => "user",
90        "domain" => "domain",
91        "project" => "project",
92        "connectortype" => "gwtype",
93        "slavenodestartcmd" => "expstart",
94        "slaveconnectorstartcmd" => "gwstart",
95        "masternodestartcmd" => "mexpstart",
96        "masterconnectorstartcmd" => "mgwstart",
97        "connectorimage" => "gwimage",
98        "fileserver" => "fs",
99        "boss" => "boss",
100        "tunnelcfg" => "tun"
101    );
102
103
104    unless ($fh) {
105        warn "Can't open $file: $!\n";
106        return 0;
107    }
108
109    while (<$fh>) {
110        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
111        chomp;
112        /^\s*\[(.*)\]/ && do {
113            $tb = $1;
114            $tbparams->{$tb} = {} unless $tbparams->{$tb};
115            next;
116        };
117
118        /^([^:]+):\s*(.*)/ && do {
119            unless ($tb) {
120                warn "Ignored attribute definition before testbed " .
121                    "defined in $file: $_\n";
122                next;
123            }
124            my $key = $1; 
125            $key =~ tr/A-Z/a-z/;
126            my $var = $attr_to_hash{$key};
127
128            if ($var) { $tbparams->{$tb}->{$var} = $2; }
129            else { warn "Unknown keyword $key in $file\n"; }
130
131            next;
132        };
133        warn "Unparasble line in $file: $_\n";
134    }
135    $fh->close();   # It will close when it goes out of scope, but...
136    return 1;
137}
138
139
140# use scp to transfer a file, reporting true if successful and false otherwise.
141# Parameters are the local file name, the ssh host destination (either hostname
142# oe user@host), and an optional destination file name or directory.  If no
143# destination is given, the file is transferred to the given user's home
144# directory.  If only a machine is given in the ssh host destination, the
145# current user is used.
146sub scp_file {
147    my($file, $user, $host, $dest) = @_;
148
149    # XXX system with a relative pathname is sort of gross
150    system("scp $file $user\@$host:$dest");
151    if ($?) {
152        warn "scp failed $?\n";
153        return 0;
154    }
155    else { return 1; }
156}
157
158# use ssh to execute the given command on the machine (and as the user) in
159# $where.  Parameters are the ssh destination directive ($where) and the
160# command to execute, and a prefix to be placed on a message generated if the
161# command fails.   On failure print a warning if a warning prefix was given and
162# return false.  If timeout is given fork a process and set an alarm of that
163# many seconds.  Timeouts also return 0;
164sub ssh_cmd {
165    my($user, $host, $cmd, $wname, $timeout) = @_;
166    my $pid;                # Child pid
167
168    if ( $timeout ) {
169        if ( $pid = fork () ) {
170            # Parent process
171            # The eval acts as a signal catcher.  If the alarm goes off inside
172            # the eval, the die will put "alarm\n" into $@, otherwise the
173            # return value of the execution in the child process will be used.
174            my $rv = eval {
175                local $SIG{'ALRM'} = sub{ die "alarm\n"; };
176                my $rv;
177
178                alarm $timeout;
179                $rv = waitpid($pid, 0);
180                alarm 0;
181                $rv;
182            };
183
184            # If the eval succeeded, $@ will be null and we can use $rv, which
185            # is the return code from the subprocess.  If the eval timed out,
186            # print a warning and assume the best.
187            if ($@ eq "alarm\n" ) {
188                warn "$wname timed out - pid $pid still live\n";
189                return 1;
190            }
191            else {
192                return $rv;
193            }
194        }
195        else {
196            # Child process
197            # XXX system with a relative pathname is sort of gross
198            exec("ssh $user\@$host $cmd");
199            exit 0;
200        }
201    }
202    else {
203        # No timeout, do all the work here.
204        # XXX system with a relative pathname is sort of gross
205        system ("ssh $user\@$host $cmd");
206        if ($?) {
207            warn "$wname failed $?\n" if $wname;
208            return 0;
209        }
210        else { return 1; }
211    }
212}
213
214# Ship local copies of the federation scripts out to the given host.  If any of
215# the script transfers fails, return 0.  The scripts to transfer are from the
216# global @scripts and are found locally in $local_script_dir (another global).
217sub ship_scripts {
218    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
219    my $s;
220
221    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
222    for $s (@scripts) {
223        &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
224            return 0;
225    }
226    return 1;
227}
228
229# Ship per-testbed configuration generated by this script to the remote /proj
230# directories on the remote testbeds
231sub ship_configs {
232    my($host, $user, $src_dir, $dest_dir) = @_;     # Where, who, where remotely
233    my($d, $f);
234
235    $d = IO::Dir->new($src_dir) || return 0;
236
237    # All directories under $tmpdir are 770 so we can delete them later.
238    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
239    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
240    while ( $f = $d->read()) {
241        next if $f =~ /^\./;
242        if ( -d "$src_dir/$f" ) {
243            &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
244                return 0;
245        }
246        else {
247            &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
248        }
249    }
250    return 1;
251}
252
253# Start a sub section of the experiment on a given testbed.  The testbed and
254# the user to start the experiment as are pulled from the global per-testbed
255# hash, passed in as $tbparams, as is the project name on the remote testbed.
256# Parameters are the testbed and the experiment id.  Configuration files are
257# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
258# Then the current state of the experiment determined using expinfo.  From that
259# state, the experiment is either created, modified or spapped in.  If
260# everything succeeds, true is returned.  If the global verbose is set progress
261# messages are printed.
262sub start_segment {
263    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
264                                            # per-testbed parameters and remote
265                                            # swap-in timeout
266    my $host =                              # Host name of remote ops (FQDN)
267        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
268    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
269    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
270                                            # experiment under
271    my $tclfile = "./$eid.$tb.tcl";         # Local tcl file with the
272                                            # sub-experiment
273    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
274    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
275    my $rpms_dir = "/proj/$pid/rpms/$eid";  # Where to stash rpms
276    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
277    my $state;                              # State of remote experiment
278    my $status = new IO::Pipe;              # The pipe to get status
279
280    # Determine the status of the remote experiment
281    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
282        die "Can't ssh to $user\@$host:$!\n";
283
284    # XXX: this is simple now.  Parsing may become more complex
285    while (<$status>) {
286        /State: (\w+)/ && ($state = $1);
287        /No\s+such\s+experiment/ && ($state = "none");
288    }
289    $status->close();
290    print "$tb: $state\n";
291
292    # Copy the experiment definition data over
293    print "transferring subexperiment to $tb\n" if $verbose;
294    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
295    # Clear out any old experiment data; if not deleted, copies over it by
296    # different users will fail.
297    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
298    # force the /bin/sh interpretation of the trailing * (which we need to keep
299    # tmp around))  Again, this needs to be done more properly once we have a
300    # non-ssh interface here.)
301    print "clearing experiment subdirs on $tb\n" if $verbose;
302    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
303        return 0;
304    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
305    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
306        return 0;
307    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
308    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
309        return 0;
310    print "clearing experiment rpms subdirs on $tb\n" if $verbose;
311    &ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") || 
312        return 0;
313    print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
314    &ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") || 
315        return 0;
316    # Remote experiment is active.  Modify it.
317    if ($state eq "active") {
318        print "Transferring federation support files to $tb\n" if $verbose;
319        # First copy new scripts and hostinfo into the remote /proj
320        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
321            return 0;
322        &ship_scripts($host, $user, $proj_dir) || return 0;
323        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
324
325        if ( -d "$tmpdir/tarfiles") {
326            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
327                return 0;
328        }
329
330        if ( -d "$tmpdir/rpms") {
331            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
332                return 0;
333        }
334
335        print "Modifying $eid in place on $tb\n" if $verbose;
336        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
337            "$eid $tclfile", "modexp", $timeout) || return 0;
338        return 1;
339    }
340
341    # Remote experiment is swapped out, modify it and swap it in.
342    if ($state eq "swapped") {
343        print "Transferring federation support files to $tb\n" if $verbose;
344        # First copy new scripts and hostinfo into the remote /proj (because
345        # the experiment exists, the directory tree should be there.
346        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
347            return 0;
348        &ship_scripts($host, $user, $proj_dir) || return 0;
349        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
350        if ( -d "$tmpdir/tarfiles") {
351            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
352                return 0;
353        }
354
355        if ( -d "$tmpdir/rpms") {
356            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
357                return 0;
358        }
359
360        print "Modifying $eid on $tb\n" if $verbose;
361        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
362            "modexp") || return 0;
363        print "Swapping $eid in on $tb\n" if $verbose;
364        # Now start up
365        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
366            "swapexp", $timeout) || return 0;
367        return 1;
368    }
369
370    # No remote experiment.  Create one.  We do this in 2 steps so we can put
371    # the configuration files and scripts into the new experiment directories.
372    if ($state eq "none") {
373
374        if ( -d "$tmpdir/tarfiles") {
375            # Tarfiles have to exist for the creation to work
376            print "copying tarfiles to $tb\n";
377            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
378                return 0;
379        }
380
381        if ( -d "$tmpdir/rpms") {
382            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
383                return 0;
384        }
385
386        print "Creating $eid on $tb\n" if $verbose;
387        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
388            "$pid -e $eid $tclfile", "startexp") || return 0;
389        # After startexp succeeds, the per-experiment directories exist on the
390        # remote testbed.
391        print "Transferring federation support files to $tb\n" if $verbose;
392        # First copy new scripts and hostinfo into the remote /proj
393        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
394            return 0;
395        &ship_scripts($host, $user, $proj_dir) || return 0;
396        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
397        # Now start up
398        print "Swapping $eid in on $tb\n" if $verbose;
399        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
400            "swapexp", $timeout) || return 0;
401        return 1;
402    }
403
404    # Every branch for a known state returns.  If execution gets here, the
405    # state is unknown.
406    warn "unknown state: $state\n";
407    return 0;
408}
409
410# Swap out a sub-experiment - probably because another has failed.  Arguments
411# are testbed and experiment.  Most of the control flow is similar to
412# start_segment, though much simpler.
413sub stop_segment {
414    my($tb, $eid, $tbparams) = @_;          # testbed, experiment ID and
415                                            # per-testbed parameters
416    my $user = $tbparams->{$tb}->{'user'};  # testbed user
417    my $host =                              # Ops node
418        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
419    my $pid = $tbparams->{$tb}->{'project'};# testbed project
420
421    print "Stopping $eid on $tb\n" if $verbose;
422    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
423        "swapexp (out)") || return 0;
424    return 1;
425}
426
427
428$pid = $gid = "dummy";              # Default project and group to pass to
429                                    # $tcl_splitter above.  These are total
430                                    # dummy arguments;  the splitter doesn't
431                                    # use them at all, but we supply them to
432                                    # keep our changes to the parser minimal.
433# Argument processing.
434getopts('Ft:c:f:ndvN', \%opts);
435$splitter_config = $opts{'c'} || "./splitter.conf";
436$debug = $opts{'d'};
437$verbose = $opts{'v'} || $opts{'d'};
438
439&parse_config("$splitter_config", \%opts) || 
440    die "Cannot read config file $splitter_config: $!\n";
441$fail_soft = $opts{'F'} || $opts{'failsoft'};
442
443$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
444$timeout = $opts{'t'} || $opts{'timeout'};
445$eid = $opts{'experiment'};             # Experiment ID
446$tcl = $opts{'f'} || shift;             # The experiment description
447$master = $opts{'master'};              # Master testbed
448$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
449$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
450$local_script_dir = $opts{'scriptdir'}; # Local scripts
451
452$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
453    die "Must give an SMB share\n";
454$project_user = $opts{'smbuser'} ||     # User to mount project dirs as
455    die "Must give an SMB user\n";
456
457# For now specify these.  We may want to generate them later.
458$gw_pubkey = $opts{'gatewaypubkey'};
459($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
460$gw_secretkey = $opts{'gatewaysecretkey'};
461($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
462
463# tcl program to split experiments (changed during devel)
464$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
465# tclsh to call directly (changed during devel)
466$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
467
468# Prefix to avoid collisions
469$tmpdir .= "/split$$";
470
471print "Temp files are in $tmpdir\n" if $verbose;
472# Create a workspace
473unless (-d "$tmpdir") {
474    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
475}
476
477# Validate scripts directory
478for my $s (@scripts) {
479    die "$local_script_dir/$s not in local script directory. Try -d\n"
480        unless -r "$local_script_dir/$s";
481}
482
483die "Must supply file, master and experiment" unless $master && $tcl && $eid;
484
485if ($opts{'N'} ) {
486    &parse_testbeds($tb_config, $tbparams) ||
487        die "Cannot testbed congfigurations from $tb_config: $!\n";
488}
489else {
490    # Read a hash of per-testbed parameters from the local configurations.
491    my $conf = new IO::File($tb_config) || 
492        die "can't read testbed configutions from $tb_config: $!\n";
493    while (<$conf>) {
494        next if /^#/;
495        chomp;
496        my($tb, $h, $d, $u, $p, $es, $gs, $mes, $mgs, $t, $i, $fs, $boss,
497            $tun) = split(":", $_);
498        $tbparams->{$tb}->{'host'} = $h;
499        $tbparams->{$tb}->{'user'} = $u;
500        $tbparams->{$tb}->{'domain'} = $d;
501        $tbparams->{$tb}->{'project'} = $p;
502        $tbparams->{$tb}->{'gwtype'} = $t;
503        $tbparams->{$tb}->{'expstart'} = $es;
504        $tbparams->{$tb}->{'gwstart'} = $gs;
505        $tbparams->{$tb}->{'mexpstart'} = $mes;
506        $tbparams->{$tb}->{'mgwstart'} = $mgs;
507        $tbparams->{$tb}->{'gwimage'} = $i;
508        $tbparams->{$tb}->{'fs'} = $fs;
509        $tbparams->{$tb}->{'boss'} = $boss;
510        $tbparams->{$tb}->{'tun'} = $tun;
511
512        # Make sure the domain starts with a period
513        $tbparams->{$tb}->{'domain'} = "." . $tbparams->{$tb}->{'domain'}
514            unless $tbparams->{$tb}->{'domain'} =~ /^\./;
515    }
516    $conf->close();
517}
518
519# Open a pipe to the splitter program and start it parsing the experiments
520my $pipe = new IO::Pipe;
521# NB no more -p call on parse call.
522$pipe->reader("$tclsh $tcl_splitter -s -m $master  $pid $gid $eid $tcl") || 
523    die "Cannot execute $tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl:$!\n";
524
525# Parsing variables
526my $ctb;                        # Current testbed
527my %allocated;                  # If allocated{$tb} > 0, $tb is in use
528my $destfile;                   # File that the sub-experiment tcl file is
529                                # being written to, or "" if none.  Also used
530                                # for hostnames file.
531my $desthandle;                 # File handle for distfile
532my $gateways;                   # when gateway lists are being processed this
533                                # is the testbed whose gateways are being
534                                # gathered.
535my $control_gateway;            # Control net gateway for the current testbed
536my %active_end;                 # If active_end{"a-b"} > 0 then a is the active
537                                # end of the a <-> b connector pair.
538
539# Parse the splitter output.  This loop creates the sub experiments, gateway
540# configurations and hostnames file
541while (<$pipe>) {
542    # Start of a sub-experiment
543    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
544        $ctb = $1;
545
546        # If we know the testbed, start collecting its sub experiment tcl
547        # description.  If not, warn the caller and ignore the configuration of
548        # this testbed.
549        if ($tbparams->{$ctb}) {
550            $allocated{$ctb}++; # Keep track of the testbeds allocated
551
552            unless (-d "$tmpdir/$ctb") {
553                mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
554            }
555            $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
556
557            $desthandle = new IO::File(">$destfile") || 
558                die "Cannot open $destfile:$!\n";
559        }
560        else { 
561            warn "No such testbed $ctb\n";
562            $destfile = "";
563        }
564        next;
565    };
566
567    # End of that experiment
568    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
569        # Simple syntax check and close out this experiment's tcl description
570        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
571        $desthandle->close();
572        $destfile = $ctb = "";
573        next;
574    };
575
576    # Beginning of a gateway set
577    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
578        $gateways = $1;
579        # If we've heard of this tb, create the config lines for it one at a
580        # time.
581        if ($allocated{$gateways}) {
582            # Just in case.  This directory should already have been created
583            # above.
584            unless (-d "$tmpdir/$gateways") {
585                mkdir("$tmpdir/$gateways") || 
586                    die "Can't create $tmpdir/$gateways: $!";
587            }
588        }
589        else {
590            warn "Gateways given (and ignored) for testbed not in use: " .
591                "$gateways\n";
592            $gateways = 0;
593        }
594        next;
595    };
596    # End of the gateways section.  Output the client config for this testbed
597    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
598        die "Mismatched gateway markers ($1, $gateways)\n" 
599            unless !$gateways || $gateways == $1;
600
601        if ($control_gateway ) {
602            # Client config
603            my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
604            my $master_project = $tbparams->{$master}->{'project'};
605            die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
606            print $cc "ControlGateway: $control_gateway\n";
607            print $cc "SMBShare: $smb_share\n";
608            print $cc "ProjectUser: $project_user\n";
609            print $cc "ProjectName: $master_project\n";
610            $cc->close();
611        }
612        else { warn "No control gateway for $gateways?\n"; }
613           
614        $gateways = 0;
615        next;
616    };
617    # Beginning of the hostnames list.  Collection is always in the hostnames
618    # file.
619    /^#\s+Begin\s+hostnames/ && do {
620        $destfile = "$tmpdir/hostnames";
621        $desthandle = new IO::File(">$destfile") || 
622            die "Can't open $destfile:$!\n";
623        next;
624    };
625    # end of the hostnames list.
626    /^#\s+End\s+hostnames/ && do {
627        $desthandle->close();
628        $destfile = "";
629        next;
630    };
631
632    # Generate gateway configuration info, one file per line
633    $gateways && do {
634        chomp;
635        my($dtb, $myname, $desthost, $type) = split(" ", $_);
636
637        # Many of these are to simplify print statements
638        my $sdomain =                           # domain for the source
639            $tbparams->{$gateways}->{'domain'};
640        my $ddomain =                           # domain for the destination
641            $tbparams->{$dtb}->{'domain'};
642        my $sproject =                          # Project of the destination
643            $tbparams->{$gateways}->{'project'};
644        my $fs =                                # Master fs node (FQDN)
645            $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
646        my $boss =                              # Master boss node (FQDN)
647            $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
648        my $remote_script_dir =                 # Remote fed script location
649            "/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
650        my $active;                             # Is this the active side of
651                                                # the connector?
652
653        $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
654        $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
655
656        my $conf_file = "$myname$sdomain.gw.conf";
657        # translate to lower case so the `hostname` hack for specifying
658        # configuration files works.
659        $conf_file =~ tr/A-Z/a-z/;
660
661        # If either end of this link is in the master side of the testbed, that
662        # side is the active end. Otherwise the first testbed encountered in
663        # the file will be the active end.  The $active_end variable keeps
664        # track of those decisions
665        if ( $dtb eq $master ) { $active = "false"; }
666        elsif ($gateways eq $master ) { $active = "true"; }
667        elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
668        else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
669
670        # This is used to create the client configuration.
671        $control_gateway = "$myname$sdomain"
672            if $type =~ /(control|both)/;
673
674        # Write out the file
675        my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
676            die "can't open $tmpdir/$gateways/$conf_file: $!\n";
677
678        print $gwconfig "Active: $active\n";
679        print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
680        print $gwconfig "BossName: $boss\n";
681        print $gwconfig "FsName: $fs\n";
682        print $gwconfig "Type: $type\n";
683        print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
684        print $gwconfig "Peer: $desthost$ddomain\n";
685        print $gwconfig "Pubkeys: " . 
686            "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
687        print $gwconfig "Privkeys: " .
688            "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
689        $gwconfig->close();
690
691        # This testbed has a gateway (most will) so make a copy of the keys it
692        # needs in this testbed's subdirectory.  start_segment will transfer
693        # them.
694        unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
695            copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
696                die "Can't copy pubkeys ($gw_pubkey to " . 
697                    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
698        }
699        if ($active eq "true" ) {
700            unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
701                copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
702                    die "Can't copy secret keys ($gw_secretkey to " . 
703                        "$tmpdir/$gateways/$gw_secretkey_base): $!\n";
704            }
705        }
706
707        #done processing gateway entry, ready for next line
708        next; 
709    };
710    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
711        next if /^#/;
712        chomp;
713        push(@tarfiles, $_);
714        next;
715    };
716    (/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
717        next if /^#/;
718        chomp;
719        push(@rpms, $_);
720        next;
721    };
722
723    next unless $destfile;  # Unidentified testbed, ignore config
724    # local copies that can be used in the substitutions below
725    my $gwtype = $tbparams->{$ctb}->{'gwtype'};
726    my $gwimage = $tbparams->{$ctb}->{'gwimage'};
727    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
728    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
729    my $gwstart = $tbparams->{$ctb}->{'gwstart'};
730    my $expstart = $tbparams->{$ctb}->{'expstart'};
731    my $project = $tbparams->{$ctb}->{'project'};
732
733    # Substitute variables
734    s/GWTYPE/$gwtype/g;
735    s/GWIMAGE/$gwimage/g;
736    if ($ctb eq $master ) {
737        s/GWSTART/$mgwstart/g;
738        s/EXPSTART/$mexpstart/g;
739    }
740    else {
741        s/GWSTART/$gwstart/g;
742        s/EXPSTART/$expstart/g;
743    }
744    # XXX: oh is this bad
745    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
746    s#PROJDIR#/proj/$project/#g;
747    s#EID#$eid#g;
748    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
749    print $desthandle $_;
750}
751$pipe->close();
752die "No nodes in master testbed ($master)\n" unless $allocated{$master};
753
754# Copy tarfiles and rpms needed at remote sites to the staging directories.
755# Start_segment will distribute them
756for my $t  (@tarfiles) {
757    die "tarfile '$t' unreadable: $!\n" unless -r $t;
758    unless (-d "$tmpdir/tarfiles") {
759        mkdir("$tmpdir/tarfiles") || 
760            die "Can't create $tmpdir/tarfiles:$!\n";
761    }
762    copy($t, "$tmpdir/tarfiles") || 
763        die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
764}
765
766for my $r  (@rpms) {
767    die "rpm '$r' unreadable: $!\n" unless -r $r;
768    unless (-d "$tmpdir/rpms") {
769        mkdir("$tmpdir/rpms") || 
770            die "Can't create $tmpdir/rpms:$!\n";
771    }
772    copy($r, "$tmpdir/rpms") || 
773        die "Can't copy $r to  $tmpdir/rpms:$!\n";
774}
775
776exit(0) unless $startem;
777
778my %started;                # If $started{$tb} then $tb successfully started
779
780# Start up the slave sub-experiments first
781TESTBED:
782for my $tb  (keys %allocated) {
783    if ($tb ne $master) {
784        if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
785        else { last TESTBED; }
786    }
787}
788
789# Now the master
790if (&start_segment($master, $eid, $tbparams, $timeout)) { 
791    $started{$master}++;
792}
793
794# If any testbed failed, swap the rest out.
795if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
796    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
797    print "Error starting experiment\n";
798    exit(1);
799}
800print "Experiment started\n";
801print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
802system("rm -rf $tmpdir") unless $debug;
803exit(0);    # set the exit value
804
805=pod
806
807=head1 NAME
808
809B<splitter.pl>
810
811=head1 SYNOPSIS
812
813B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
814    [B<-f> F<experiment_tcl>] [F<experiment_tcl>]
815
816=head1 DESCRIPTION
817
818B<splitter.pl> invokes the DETER experiment parser to split an annotated
819experiment into multiple sub-experments and instantiates the sub-experiments on
820their intended testbeds.  Annotation is accomplished using the
821tb-set-node-testbed command, added to the parser.
822
823Much of the script's behavior depends on the configuration file, specified with
824the B<-c> flag and defaulting to F<./splitter.conf>.
825
826The testbed labels supplied in the B<tb-set-node-testbed> command are
827meaningful based on their presence in the testbeds file.  that file can be
828specified in the configuration file using the B<Testbeds> directive, and
829defaults to F<./testbeds>.  The syntax is described below.
830
831Most of the intermediate files are staged in a sub-directory of a temporary
832files directory and deleted at the end of the script.  Specifying the B<-d>
833flag on the command line avoids the deletion for debbugging.  By default the
834temporary files directory is directory is F</tmp> and can be reset in the
835configuration file using the B<Tmpdir> directive.  Intermediate files are
836stored under a subdirectory formed by adding the process ID of the splitter
837process.  For example, if the temporary files directory is F</tmp> and the
838B<splitter.pl> process ID is 2323, the temporary files will be stored in
839F</tmp/split2323/>.
840
841The expreriment is split out into one experiment description per testbed in the
842temporary directory named as F<experiment.testbed.tcl> where the experiment is
843the experiment ID given in the configuration file, and the testbed is the
844tb-set-node-testbed parameter for the nodes in the file.
845
846If the B<-n> option is absent the sub-experiments are then instantiated on
847their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
848Per-testbed parameters are set in the testbeds file.  Sub-experiments on
849slave testbeds are instantiated in a random order, but the master testbed is
850currently instantiated last.
851
852Scripts to start federation (the federation kit) are copied into the local
853experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
854taken from the directory given by the B<ScriptDir> directive in the
855configuration file.
856
857If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
858F<splitter.conf>.
859
860If any sub-experiment fails to instantiate, the other sub-exeriments are
861swapped out.  B<-F> avoids this swap out, which can also be specified as
862B<SoftFail: true> in F<splitter.conf>
863
864=head2 Configuration File
865
866The configuration file is a simple set of colon-separated parameters and
867values.  A configuration file must be present, either specified in the B<-c>
868flag or the default F<./splitter.conf>.  All the parameter names are case
869insensitive, but should not include any whitespace.  Parameter values may
870include whitespace, but no newlines.
871
872Possible parameters are:
873
874=over 5
875
876=item Experiment
877
878The name of the experiment on the various testbeds
879
880=item Master
881
882The master testbed label from the testbeds file, described below.
883
884=item Testbeds
885
886The testbeds file described below, giving per-testbed parameters.  If this
887directive is absent the testbeds file defaults to F<./testbeds>
888
889=item ScriptDir
890
891Location of the default federation scripts, i.e. the federation kit.
892
893=item GatewayPubkey
894
895=item GatewaySecretKey
896
897The names of the files containing secret and public keys to use in setting up
898tunnels between testbeds.  These will eventually be automatically generated.
899
900=item TmpDir
901
902=item TempDir
903
904The directory where temporary files are created.  These are synonyms, but
905should both be specified, B<TmpDir> has priority.  If neither is specified,
906F</tmp> is used.
907
908=item SMBShare
909
910The SMB share on the master testbed that will be exported to remote clients.
911
912=item SMBUser
913
914The experiment user to mount project directories as.  This user needs to be a
915member of the exported experiment - that is one of the users in the project
916containing this experiment on the master testbed.
917
918=item Tclparse
919
920The pathname to the experiment parsing program.  Only developers should set
921this.
922
923=item Tclsh
924
925The pathname to the local oTcl shell.  Only developers should set
926this.
927
928=item Timeout
929
930Value in seconds after which a swap-in operatioin will be considered a success.
931Often long swap-ins will hang when there are partial failures.  This works
932around this issue.
933
934=item FailSoft
935
936If not set, failure of any sub experiment swaps the rest out.  Setting this to any value avoids this swap out.  (Also B<-F>.)
937
938=back
939
940=head2 Testbeds file
941
942The configuration file (F<./testbeds> unless overridden by B<-c>) is a
943colon-separated set of parameters keyed by testbed name.  The fields, in order,
944are:
945
946=over 5
947
948=item name
949
950The testbed to which this line of parameters applies.
951
952=item user
953
954The user under which to make requests to this testbed.  The user running
955B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
956testbed.
957
958=item host
959
960The host name of the testbed's ops node.  The user calling B<splitter.pl> must
961be able to execute commands on this host via L<ssh(1)>.
962
963=item domain
964
965The domain of nodes in this testbed (including the ops host).  This parameter
966should always start with a period.
967
968=item project
969
970The project under which to instantiate sub-experiments on this testbed.
971
972=item gateway type
973
974The node type for inter-testbed gateway nodes on this testbed.
975
976=item experiment start (slave)
977
978The start command to run on experimental nodes when this testbed is used as a
979slave.  In all the start commands the string FEDDIR will be replaced by the
980local experiment's federation scripts directory and the string GWCONF replaced
981by the gatway configuration file.
982
983=item gateway start (slave)
984
985The start command to run on gateway nodes when this testbed is used as a slave.
986The same string substitutions are made in this command as in experiment start.
987
988=item experiment start (master)
989
990The start command to run on experimental nodes when this testbed is used as a
991master.  The same string substitutions are made in this command as in
992experiment start.
993
994=item gateway start (master)
995
996The start command to run on gateway nodes when this testbed is used as a
997master.  The same string substitutions are made in this command as in
998experiment start.
999
1000=item gateway image
1001
1002The disk image to be loaded on a gateway node on this testbed.
1003
1004=item filesystem node
1005
1006The node in the master testbed from which filesystems are mounted.
1007
1008=item boss node
1009
1010The node in the master testbed that controls the testbed.
1011
1012=item tunnel configuration
1013
1014True if the connector needs to do DETER federation.  This parameter will
1015probably be removed.
1016
1017=back
1018
1019The parsing of the testbeds is extremely simple.  Colons separate each
1020field and there is n provision for escaping them at this time.
1021
1022=head2 New Testbeds File Format
1023
1024The testbeds file has ben revamped to make it more human readable.  The
1025parameters are now named and specified on separate lines of the configuration
1026file.  Each testbed's parameters are preceeded by the testbed label in brackets
1027([]) on a line by itself.  After that the parameters are specified as
1028parameter: value.  This is essentially the same format as the configuration
1029file.  Parameters are:
1030
1031=over 4
1032
1033=item User
1034
1035The user under which to make requests to this testbed.  The user running
1036B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
1037testbed.
1038
1039=item OpsNode
1040
1041The host name of the testbed's ops node.  The user calling B<splitter.pl> must
1042be able to execute commands on this host via L<ssh(1)>.
1043
1044=item Domain
1045
1046The domain of nodes in this testbed (including the ops host).  This parameter
1047should always start with a period.
1048
1049=item Project
1050
1051The project under which to instantiate sub-experiments on this testbed.
1052
1053=item ConnectorType
1054
1055The node type for inter-testbed connector nodes on this testbed.
1056
1057=item SlaveNodeStartCmd
1058
1059The start command to run on experimental nodes when this testbed is used as a
1060slave.  In all the start commands the string FEDDIR will be replaced by the
1061local experiment's federation scripts directory and the string GWCONF replaced
1062by the gatway configuration file.
1063
1064=item SlaveConnectorStartCmd
1065
1066The start command to run on gateway nodes when this testbed is used as a slave.
1067The same string substitutions are made in this command as in experiment start.
1068
1069=item MasterNodeStartCmd
1070
1071The start command to run on experimental nodes when this testbed is used as a
1072master.  The same string substitutions are made in this command as in
1073experiment start.
1074
1075=item MasterConnectorStartCmd
1076
1077The start command to run on gateway nodes when this testbed is used as a
1078master.  The same string substitutions are made in this command as in
1079experiment start.
1080
1081=item ConnectorImage
1082
1083The disk image to be loaded on a gateway node on this testbed.
1084
1085=item FileServer
1086
1087The node in the master testbed from which filesystems are mounted.
1088
1089=item Boss
1090
1091The node in the master testbed that controls the testbed.
1092
1093=item TunnelCfg
1094
1095True if the connector needs to do DETER federation.  This parameter will
1096probably be removed.
1097
1098
1099=back
1100
1101
1102=head1 ENVIRONMENT
1103
1104B<splitter.pl> does not directly make use of environment variables, but calls
1105out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1106environment.
1107
1108=head1 SEE ALSO
1109
1110L<sh(1)>, L<ssh(1)>
1111
1112=cut
Note: See TracBrowser for help on using the repository browser.