source: fedkit/splitter.pl @ 2396559e

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since 2396559e was 2396559e, checked in by Ted Faber <faber@…>, 16 years ago

fedd integration

  • Property mode set to 100644
File size: 40.5 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4
5use Getopt::Std;
6use IO::File;
7use IO::Dir;
8use IO::Pipe;
9use File::Copy;
10
11my @scripts = ("fed_bootstrap", "federate.sh", "smbmount.pl", 
12    "make_hosts", "fed-tun.pl", "fed_evrepeater" );
13my $local_script_dir = ".";
14my($pid, $gid);                 # Process and group IDs for calling parse.tcl
15my $splitter_config;            # Configuration file
16my $debug;                      # True if thecalled in debug mode
17my $verbose;                    # True for extra progress reports
18my $startem;                    # If true, start the sub-experiments
19my $eid;                        # Experiment ID
20my $tcl;                        # The experiment description (topology file)
21my $master;                     # Master testbed
22my $tmpdir;                     # tmp files
23my $tb_config;                  # testbed configurations
24my $smb_share;                  # Share to mount from the master
25my $project_user;               # User to mount project dirs as
26my $auth_proj;                  # Local project for resource access
27my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
28my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
29                                # basename)
30my($keytype);                   # Type (DSA or RSA) of generated gateway keys
31my $tcl_splitter;               # tcl program to split experiments
32                                # (changed during devel)
33my $tclsh;                      # tclsh to call directly (changed during devel)
34my $fedd_client;                # Program to call for testbed access params
35my $muxmax;                     # Maximum number of links/lans over 1 gw pair
36my @tarfiles;                   # Tarfiles in use by this experiment
37my @rpms;                       # Rpms in use by this experiment
38my $timeout;                    # The timeout to use for experiment swap ins
39my %opts;                       # Parsed options
40
41my $tbparams = {};              # Map of the per-testbed parameters from the
42                                # testbeds file.  It is a reference to a hash
43                                # of hashes (because it's passed around a bunch
44                                # and it's nicer to have one access pattern
45                                # throughout the script, in the main loop and
46                                # the subroutines).  That access is exemplified
47                                # by  $tbparams->{'deter'}->{'domain'} which is
48                                # the domain parameter of the DETER testbed. 
49my $fail_soft;                  # Do not swap failed sub-experiments out
50my $max_children=1;             # Maximum number of simultaneous swap-ins
51
52# Default commands for starting experiment and gateway nodes.  Testbeds can
53# override these.  (The 'm' prefixed commands are for operating as the master
54# testbed.)
55my $def_expstart = "sudo -H /bin/sh FEDDIR/fed_bootstrap >& /tmp/federate";
56my $def_mexpstart = "sudo -H FEDDIR/make_hosts FEDDIR/hosts";
57my $def_gwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF>& /tmp/bridge.log";
58my $def_mgwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF >& /tmp/bridge.log";
59my $def_gwimage = "FBSD61-TUNNEL2";
60my $def_gwtype = "pc";
61
62# Parse the config file.  The format is a colon-separated parameter name
63# followed by the value of that parameter to the end of the line.  This parses
64# that format and puts the parameters into the referenced hash.  Parameter
65# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
66# failure (e.g. file open) and 1 on success.
67sub parse_config {
68    my($file, $href) = @_;
69    my $fh = new IO::File($file);
70       
71    unless ($fh) {
72        warn "Can't open $file: $!\n";
73        return 0;
74    }
75
76    while (<$fh>) {
77        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
78        chomp;
79        /^([^:]+):\s*(.*)/ && do {
80            my $key = $1; 
81
82            $key =~ tr/A-Z/a-z/;
83            $href->{$key} = $2;
84            next;
85        };
86        warn "Unparasble line in $file: $_\n";
87    }
88    $fh->close();   # It will close when it goes out of scope, but...
89    return 1;
90}
91
92# Parse an easier-to-read testbeds file (the original was comma-separated
93# unadorned strings).  The format is a testbed scope as [testbed] followed by
94# the colon-separated attribute-value pairs for the testbed.  Right now these
95# go into a set of global hashes indexed by testbed, but that should probably
96# change.  The file parameter is an open IO::Handle.  &parse_testbeds_filename
97# opens the file and calls this.  Parse_testbeds can be used on pipes as well,
98# e.g. fedd_client output.
99sub parse_testbeds {
100    my($fh, $tbparams) = @_;        # Testbeds file and parameter hash
101    my $tb;                         # Current testbed
102    # Convert attribute in the file to tbparams hash key
103    my %attr_to_hash = (
104        "opsnode" => "host",
105        "user" => "user",
106        "domain" => "domain",
107        "project" => "project",
108        "connectortype" => "gwtype",
109        "slavenodestartcmd" => "expstart",
110        "slaveconnectorstartcmd" => "gwstart",
111        "masternodestartcmd" => "mexpstart",
112        "masterconnectorstartcmd" => "mgwstart",
113        "connectorimage" => "gwimage",
114        "fileserver" => "fs",
115        "boss" => "boss",
116        "eventserver" => "eventserver",
117        "tunnelcfg" => "tun",
118        "uri" => "uri",
119        "access" => "access"
120    );
121
122    while (<$fh>) {
123        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
124        print STDERR "testbeds: $_";
125        chomp;
126        /^\s*\[(.*)\]/ && do {
127            $tb = $1;
128            $tbparams->{$tb} = {} unless $tbparams->{$tb};
129            next;
130        };
131
132        /^([^:]+):\s*(.*)/ && do {
133            unless ($tb) {
134                warn "Ignored attribute definition before testbed: $_\n ";
135                next;
136            }
137            my $key = $1; 
138            $key =~ tr/A-Z/a-z/;
139            my $var = $attr_to_hash{$key};
140
141            if ($var) { $tbparams->{$tb}->{$var} = $2; }
142            else { warn "Unknown keyword $key\n"; }
143
144            next;
145        };
146        warn "Unparasble line: $_\n";
147    }
148    return 1;
149}
150
151
152# Open the given file name and parse the testbeds file it contains by calling
153# &parse_testbeds.
154sub parse_testbeds_filename {
155    my($file, $tbparams) = @_;      # Testbeds file and parameter hash
156    my $fh = new IO::File($file);   # Testbeds filehandle
157
158    if ($fh) {
159        my $rv = &parse_testbeds($fh, $tbparams);
160        $fh->close();   # It will close when it goes out of scope, but...
161        $rv;
162    }
163    else {
164        warn "Can't open $file: $!\n";
165        return 0;
166    }
167}
168
169# Generate SSH keys for use by the gateways.  The parameters are the type and
170# the filename for the private key.  The pubkey will be stored in a filename
171# with the same name as the private key but with .pub appended.  Type can be
172# dsa or rsa.
173
174sub generate_ssh_keys {
175    my($type, $dest) = @_;
176
177    $type =~ tr/A-Z/a-z/;
178    return 0 if $type !~ /(rsa|dsa)/;
179    system("/usr/bin/ssh-keygen -t $type -N \"\" -f $dest");
180    return $@ ? 0 : 1;
181}
182
183# use scp to transfer a file, reporting true if successful and false otherwise.
184# Parameters are the local file name, the ssh host destination (either hostname
185# oe user@host), and an optional destination file name or directory.  If no
186# destination is given, the file is transferred to the given user's home
187# directory.  If only a machine is given in the ssh host destination, the
188# current user is used.
189sub scp_file {
190    my($file, $user, $host, $dest) = @_;
191
192    # XXX system with a relative pathname is sort of gross
193    system("scp $file $user\@$host:$dest");
194    if ($?) {
195        warn "scp failed $?\n";
196        return 0;
197    }
198    else { return 1; }
199}
200
201# use ssh to execute the given command on the machine (and as the user) in
202# $where.  Parameters are the ssh destination directive ($where) and the
203# command to execute, and a prefix to be placed on a message generated if the
204# command fails.   On failure print a warning if a warning prefix was given and
205# return false.  If timeout is given fork a process and set an alarm of that
206# many seconds.  Timeouts also return 0;
207sub ssh_cmd {
208    my($user, $host, $cmd, $wname, $timeout) = @_;
209    my $pid;                # Child pid
210
211    $timeout = 0 unless $timeout;   # Force default timeout
212
213    if ( $pid = fork () ) {
214        # Parent process
215        # The eval acts as a signal catcher.  If the alarm goes off inside
216        # the eval, the die will put "alarm\n" into $@, otherwise the
217        # return value of the execution in the child process will be used.
218        my $rv = eval {
219            local $SIG{'ALRM'} = sub{ die "alarm\n"; };
220            my $rv;
221
222            alarm $timeout;
223            $rv = waitpid($pid, 0);
224            alarm 0;
225            $rv;
226        };
227
228        # If the eval succeeded, $@ will be null and we can use $rv, which
229        # is the return code from the subprocess.  If the eval timed out,
230        # print a warning and assume the best.
231        if ($@ eq "alarm\n" ) {
232            warn "$wname timed out - pid $pid still live\n";
233            return 1;
234        }
235        else {
236            return $rv;
237        }
238    }
239    else {
240        # Child process
241        exec("ssh $user\@$host $cmd");
242        exit 0;
243    }
244}
245
246# Ship local copies of the federation scripts out to the given host.  If any of
247# the script transfers fails, return 0.  The scripts to transfer are from the
248# global @scripts and are found locally in $local_script_dir (another global).
249sub ship_scripts {
250    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
251    my $s;
252
253    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
254    for $s (@scripts) {
255        &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
256            return 0;
257    }
258    return 1;
259}
260
261# Ship per-testbed configuration generated by this script to the remote /proj
262# directories on the remote testbeds
263sub ship_configs {
264    my($host, $user, $src_dir, $dest_dir) = @_;     # Where, who, where remotely
265    my($d, $f);
266
267    $d = IO::Dir->new($src_dir) || return 0;
268
269    # All directories under $tmpdir are 770 so we can delete them later.
270    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
271    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
272    while ( $f = $d->read()) {
273        next if $f =~ /^\./;
274        if ( -d "$src_dir/$f" ) {
275            &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
276                return 0;
277        }
278        else {
279            &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
280        }
281    }
282    return 1;
283}
284
285# Start a sub section of the experiment on a given testbed.  The testbed and
286# the user to start the experiment as are pulled from the global per-testbed
287# hash, passed in as $tbparams, as is the project name on the remote testbed.
288# Parameters are the testbed and the experiment id.  Configuration files are
289# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
290# Then the current state of the experiment determined using expinfo.  From that
291# state, the experiment is either created, modified or spapped in.  If
292# everything succeeds, true is returned.  If the global verbose is set progress
293# messages are printed.
294sub start_segment {
295    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
296                                            # per-testbed parameters and remote
297                                            # swap-in timeout
298    my $host =                              # Host name of remote ops (FQDN)
299        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
300    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
301    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
302                                            # experiment under
303    my $tclfile = "./$eid.$tb.tcl";         # Local tcl file with the
304                                            # sub-experiment
305    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
306    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
307    my $rpms_dir = "/proj/$pid/rpms/$eid";  # Where to stash rpms
308    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
309    my $state;                              # State of remote experiment
310    my $status = new IO::Pipe;              # The pipe to get status
311
312    # Determine the status of the remote experiment
313    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
314        die "Can't ssh to $user\@$host:$!\n";
315    # XXX: this is simple now.  Parsing may become more complex
316    while (<$status>) {
317        /State: (\w+)/ && ($state = $1);
318        /No\s+such\s+experiment/ && ($state = "none");
319    }
320    $status->close();
321    print "$tb: $state\n";
322
323    # Copy the experiment definition data over
324    print "transferring subexperiment to $tb\n" if $verbose;
325    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
326    # Clear out any old experiment data; if not deleted, copies over it by
327    # different users will fail.
328    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
329    # force the /bin/sh interpretation of the trailing * (which we need to keep
330    # tmp around))  Again, this needs to be done more properly once we have a
331    # non-ssh interface here.)
332    print "clearing experiment subdirs on $tb\n" if $verbose;
333    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
334        return 0;
335    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
336    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
337        return 0;
338    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
339    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
340        return 0;
341    print "clearing experiment rpms subdirs on $tb\n" if $verbose;
342    &ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") || 
343        return 0;
344    print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
345    &ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") || 
346        return 0;
347    # Remote experiment is active.  Modify it.
348    if ($state eq "active") {
349        print "Transferring federation support files to $tb\n" if $verbose;
350        # First copy new scripts and hostinfo into the remote /proj
351        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
352            return 0;
353        &ship_scripts($host, $user, $proj_dir) || return 0;
354        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
355
356        if ( -d "$tmpdir/tarfiles") {
357            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
358                return 0;
359        }
360
361        if ( -d "$tmpdir/rpms") {
362            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
363                return 0;
364        }
365
366        print "Modifying $eid in place on $tb\n" if $verbose;
367        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
368            "$eid $tclfile", "modexp", $timeout) || return 0;
369        return 1;
370    }
371
372    # Remote experiment is swapped out, modify it and swap it in.
373    if ($state eq "swapped") {
374        print "Transferring federation support files to $tb\n" if $verbose;
375        # First copy new scripts and hostinfo into the remote /proj (because
376        # the experiment exists, the directory tree should be there.
377        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
378            return 0;
379        &ship_scripts($host, $user, $proj_dir) || return 0;
380        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
381        if ( -d "$tmpdir/tarfiles") {
382            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
383                return 0;
384        }
385
386        if ( -d "$tmpdir/rpms") {
387            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
388                return 0;
389        }
390
391        print "Modifying $eid on $tb\n" if $verbose;
392        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
393            "modexp") || return 0;
394        print "Swapping $eid in on $tb\n" if $verbose;
395        # Now start up
396        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
397            "swapexp", $timeout) || return 0;
398        return 1;
399    }
400
401    # No remote experiment.  Create one.  We do this in 2 steps so we can put
402    # the configuration files and scripts into the new experiment directories.
403    if ($state eq "none") {
404
405        if ( -d "$tmpdir/tarfiles") {
406            # Tarfiles have to exist for the creation to work
407            print "copying tarfiles to $tb\n";
408            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
409                return 0;
410        }
411
412        if ( -d "$tmpdir/rpms") {
413            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
414                return 0;
415        }
416
417        print "Creating $eid on $tb\n" if $verbose;
418        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
419            "$pid -e $eid $tclfile", "startexp") || return 0;
420        # After startexp succeeds, the per-experiment directories exist on the
421        # remote testbed.
422        print "Transferring federation support files to $tb\n" if $verbose;
423        # First copy new scripts and hostinfo into the remote /proj
424        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
425            return 0;
426        &ship_scripts($host, $user, $proj_dir) || return 0;
427        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
428        # Now start up
429        print "Swapping $eid in on $tb\n" if $verbose;
430        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
431            "swapexp", $timeout) || return 0;
432        return 1;
433    }
434
435    # Every branch for a known state returns.  If execution gets here, the
436    # state is unknown.
437    warn "unknown state: $state\n";
438    return 0;
439}
440
441# Swap out a sub-experiment - probably because another has failed.  Arguments
442# are testbed and experiment.  Most of the control flow is similar to
443# start_segment, though much simpler.
444sub stop_segment {
445    my($tb, $eid, $tbparams) = @_;          # testbed, experiment ID and
446                                            # per-testbed parameters
447    my $user = $tbparams->{$tb}->{'user'};  # testbed user
448    my $host =                              # Ops node
449        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
450    my $pid = $tbparams->{$tb}->{'project'};# testbed project
451
452    print "Stopping $eid on $tb\n" if $verbose;
453    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
454        "swapexp (out)") || return 0;
455    return 1;
456}
457
458
459$pid = $gid = "dummy";              # Default project and group to pass to
460                                    # $tcl_splitter above.  These are total
461                                    # dummy arguments;  the splitter doesn't
462                                    # use them at all, but we supply them to
463                                    # keep our changes to the parser minimal.
464# Argument processing.
465getopts('Ft:c:p:f:ndvNP:', \%opts);
466$splitter_config = $opts{'c'} || "./splitter.conf";
467$debug = $opts{'d'};
468$verbose = $opts{'v'} || $opts{'d'};
469
470&parse_config("$splitter_config", \%opts) || 
471    die "Cannot read config file $splitter_config: $!\n";
472
473warn "-N does nothing now.  Only one testbeds format supported.\n"
474    if $opts{'N'};
475$fail_soft = $opts{'F'} || $opts{'failsoft'};
476$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
477$timeout = $opts{'t'} || $opts{'timeout'};
478$eid = $opts{'experiment'};             # Experiment ID
479$tcl = $opts{'f'} || shift;             # The experiment description
480$master = $opts{'master'};              # Master testbed
481$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
482$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
483$local_script_dir = $opts{'scriptdir'}; # Local scripts
484$muxmax  = $opts{'muxlimit'} || 3;      # Number of connections muxed on one
485                                        # gateway
486
487$max_children = $opts{'p'} || $opts{'maxchildren'} 
488    if $opts{'p'} || $opts{'maxchildren'};
489
490$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
491    die "Must give an SMB share\n";
492$project_user = $opts{'smbuser'} ||     # User to mount project dirs as
493    die "Must give an SMB user\n";
494$auth_proj = $opts{'P'} || die "Must give an authentication project (-P)\n";
495
496# tcl program to split experiments (changed during devel)
497$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
498# tclsh to call directly (changed during devel)
499$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
500# fedd_client to get testbed access parameters
501$fedd_client = $opts{'feddclient'} || "fedd_client";
502
503# Prefix to avoid collisions
504$tmpdir .= "/split$$";
505
506print "Temp files are in $tmpdir\n" if $verbose;
507# Create a workspace
508unless (-d "$tmpdir") {
509    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
510}
511
512# If the keys are given, use them.  Otherwise create a set under $tmpdir
513
514if ( $opts{'gatewatpubkey'} && $opts{'gatewaysecretkey'}) {
515    $gw_pubkey = $opts{'gatewaypubkey'};
516    $gw_secretkey = $opts{'gatewaysecretkey'};
517}
518else {
519    $keytype = $opts{'gatewaykeytype'} || "rsa";
520    mkdir("$tmpdir/keys") || die "Can't create temoprary key dir: $!\n";
521    $gw_pubkey = "$tmpdir/keys/fed.$keytype.pub";
522    $gw_secretkey = "$tmpdir/keys/fed.$keytype";
523    print "Generating $keytype keys\n" if $verbose;
524    generate_ssh_keys($keytype, $gw_secretkey) || 
525        die "Cannot generate kets:$@\n";
526}
527# Generate the basenames
528($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
529($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
530
531
532
533# Validate scripts directory
534for my $s (@scripts) {
535    die "$local_script_dir/$s not in local script directory. Try -d\n"
536        unless -r "$local_script_dir/$s";
537}
538
539die "Must supply file, master and experiment" unless $master && $tcl && $eid;
540
541&parse_testbeds_filename($tb_config, $tbparams) ||
542    die "Cannot testbed congfigurations from $tb_config: $!\n";
543
544# Open a pipe to the splitter program and start it parsing the experiments
545my $pipe = new IO::Pipe;
546# NB no more -p call on parse call.
547$pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master  $pid $gid $eid $tcl") || 
548    die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n";
549
550# Parsing variables
551my $ctb;                        # Current testbed
552my %allocated;                  # If allocated{$tb} > 0, $tb is in use
553my $destfile;                   # File that the sub-experiment tcl file is
554                                # being written to, or "" if none.  Also used
555                                # for hostnames file.
556my $desthandle;                 # File handle for distfile
557my $gateways;                   # when gateway lists are being processed this
558                                # is the testbed whose gateways are being
559                                # gathered.
560my $control_gateway;            # Control net gateway for the current testbed
561my %active_end;                 # If active_end{"a-b"} > 0 then a is the active
562                                # end of the a <-> b connector pair.
563
564# Parse the splitter output.  This loop creates the sub experiments, gateway
565# configurations and hostnames file
566while (<$pipe>) {
567    # Allbeds lists all the testbeds that this experiment accesses.  This code
568    # acquires access to them and pulls in their access parameters from fedd.
569    (/^#\s+Begin\s+Allbeds/../^#\s+End\s+Allbeds/) && do {
570        next if /^#/;
571        chomp;
572
573        my $tb = $_;            # Current testbed
574        # If this testbed has not had its access parameters read from fedd, try
575        # to read them, if we have a way to talk to fedd
576        unless ($tbparams->{$tb}->{'access'} && $fedd_client) { 
577            my $access_pipe = new IO::Pipe || 
578                die "Can't open pipe to fedd:$!\n";
579
580            print("Checking access to $tb using " . $tbparams->{$tb}->{'uri'}
581                . "\n") if $verbose;
582
583            my $cmd = "$fedd_client -t " . 
584                $tbparams->{$tb}->{'uri'} .  " -T $ENV{HOME}/cacert.pem ".
585                "-l $tb -p $auth_proj | /usr/bin/tee fedd.$tb ";
586            print "$cmd\n" if $verbose;
587
588            $access_pipe->reader($cmd) || 
589                    die "Can't exec fedd_client: $!\n";
590
591            &parse_testbeds($access_pipe, $tbparams) || 
592                warn("Error reading fedd output: $!\n");
593            $access_pipe->close();
594        }
595        next;
596    };
597
598    # Start of a sub-experiment
599    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
600        $ctb = $1;
601
602        # If we know the testbed, start collecting its sub experiment tcl
603        # description.  If not, warn the user.
604        if ($tbparams->{$ctb}->{'access'}) {
605            $allocated{$ctb}++; # Keep track of the testbeds allocated
606
607            unless (-d "$tmpdir/$ctb") {
608                mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
609            }
610            $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
611
612            $desthandle = new IO::File(">$destfile") || 
613                die "Cannot open $destfile:$!\n";
614        }
615        else{
616            warn "No such testbed $ctb\n";
617            $destfile = "";
618        }
619        next;
620    };
621
622    # End of that experiment
623    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
624        # Simple syntax check and close out this experiment's tcl description
625        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
626        $desthandle->close() if $desthandle;
627        $destfile = $ctb = "";
628        next;
629    };
630
631    # Beginning of a gateway set
632    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
633        $gateways = $1;
634        # If we've heard of this tb, create the config lines for it one at a
635        # time.
636        if ($allocated{$gateways}) {
637            # Just in case.  This directory should already have been created
638            # above.
639            unless (-d "$tmpdir/$gateways") {
640                mkdir("$tmpdir/$gateways") || 
641                    die "Can't create $tmpdir/$gateways: $!";
642            }
643        }
644        else {
645            warn "Gateways given (and ignored) for testbed not in use: " .
646                "$gateways\n";
647            $gateways = 0;
648        }
649        next;
650    };
651    # End of the gateways section.  Output the client config for this testbed
652    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
653        die "Mismatched gateway markers ($1, $gateways)\n" 
654            unless !$gateways || $gateways == $1;
655
656        if ($control_gateway ) {
657            # Client config
658            my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
659            my $master_project = $tbparams->{$master}->{'project'};
660            die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
661            print $cc "ControlGateway: $control_gateway\n";
662            print $cc "SMBShare: $smb_share\n";
663            print $cc "ProjectUser: $project_user\n";
664            print $cc "ProjectName: $master_project\n";
665            $cc->close();
666        }
667        else { warn "No control gateway for $gateways?\n"; }
668           
669        $gateways = 0;
670        next;
671    };
672    # Beginning of the hostnames list.  Collection is always in the hostnames
673    # file.
674    /^#\s+Begin\s+hostnames/ && do {
675        $destfile = "$tmpdir/hostnames";
676        $desthandle = new IO::File(">$destfile") || 
677            die "Can't open $destfile:$!\n";
678        next;
679    };
680    # end of the hostnames list.
681    /^#\s+End\s+hostnames/ && do {
682        $desthandle->close();
683        $destfile = "";
684        next;
685    };
686
687    # Generate gateway configuration info, one file per line
688    $gateways && do {
689        chomp;
690        my($dtb, $myname, $desthost, $type) = split(" ", $_);
691
692        # Many of these are to simplify print statements
693        my $sdomain =                           # domain for the source
694            $tbparams->{$gateways}->{'domain'};
695        my $ddomain =                           # domain for the destination
696            $tbparams->{$dtb}->{'domain'};
697        my $sproject =                          # Project of the source
698            $tbparams->{$gateways}->{'project'};
699        my $dproject =                          # Project of the destination
700            $tbparams->{$dtb}->{'project'};
701        my $fs =                                # Master fs node (FQDN)
702            $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
703        my $boss =                              # Master boss node (FQDN)
704            $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
705        my $event_server =                      # Master event-server (FQDN)
706            $tbparams->{$master}->{'eventserver'} . 
707            $tbparams->{$master}->{'domain'};
708        my $remote_event_server =               # Slave event-server (FQDN)
709            $tbparams->{$dtb}->{'eventserver'} . 
710            $tbparams->{$dtb}->{'domain'};
711        my $remote_script_dir =                 # Remote fed script location
712            "/proj/" . $dproject . "/exp/$eid/tmp";
713        my $local_script_dir =                  # Local fed script location
714            "/proj/" . $sproject . "/exp/$eid/tmp";
715        my $active;                             # Is this the active side of
716                                                # the connector?
717        my $tunnel_cfg =                        # Use DETER's config stuff
718            $tbparams->{$gateways}->{'tun'} || "false";
719                                   
720
721        $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
722        $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
723
724        my $conf_file = "$myname$sdomain.gw.conf";
725        my $remote_conf_file = "$desthost$ddomain.gw.conf";
726        # translate to lower case so the `hostname` hack for specifying
727        # configuration files works.
728        $conf_file =~ tr/A-Z/a-z/;
729        $remote_conf_file =~ tr/A-Z/a-z/;
730
731        # If either end of this link is in the master side of the testbed, that
732        # side is the active end. Otherwise the first testbed encountered in
733        # the file will be the active end.  The $active_end variable keeps
734        # track of those decisions
735        if ( $dtb eq $master ) { $active = "false"; }
736        elsif ($gateways eq $master ) { $active = "true"; }
737        elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
738        else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
739
740        # This is used to create the client configuration.
741        $control_gateway = "$myname$sdomain"
742            if $type =~ /(control|both)/;
743
744        # Write out the file
745        my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
746            die "can't open $tmpdir/$gateways/$conf_file: $!\n";
747
748        print $gwconfig "Active: $active\n";
749        print $gwconfig "TunnelCfg: $tunnel_cfg\n";
750        print $gwconfig "BossName: $boss\n";
751        print $gwconfig "FsName: $fs\n";
752        print $gwconfig "EventServerName: $event_server\n";
753        print $gwconfig "RemoteEventServerName: $remote_event_server\n";
754        print $gwconfig "Type: $type\n";
755        print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
756        print $gwconfig "EventRepeater: $local_script_dir/fed_evrepeater\n";
757        print $gwconfig "RemoteExperiment: $dproject/$eid\n";
758        print $gwconfig "LocalExperiment: $sproject/$eid\n";
759        print $gwconfig "RemoteConfigFile: " . 
760            "$remote_script_dir/$remote_conf_file\n";
761        print $gwconfig "Peer: $desthost$ddomain\n";
762        print $gwconfig "Pubkeys: " . 
763            "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
764        print $gwconfig "Privkeys: " .
765            "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
766        $gwconfig->close();
767
768        # This testbed has a gateway (most will) so make a copy of the keys it
769        # needs in this testbed's subdirectory.  start_segment will transfer
770        # them.
771        unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
772            copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
773                die "Can't copy pubkeys ($gw_pubkey to " . 
774                    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
775        }
776        if ($active eq "true" ) {
777            unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
778                copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
779                    die "Can't copy secret keys ($gw_secretkey to " . 
780                        "$tmpdir/$gateways/$gw_secretkey_base): $!\n";
781            }
782        }
783
784        #done processing gateway entry, ready for next line
785        next; 
786    };
787    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
788        next if /^#/;
789        chomp;
790        push(@tarfiles, $_);
791        next;
792    };
793    (/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
794        next if /^#/;
795        chomp;
796        push(@rpms, $_);
797        next;
798    };
799
800    next unless $destfile;  # Unidentified testbed, ignore config
801    # local copies that can be used in the substitutions below
802    my $gwtype = $tbparams->{$ctb}->{'gwtype'} || $def_gwtype;
803    my $gwimage = $tbparams->{$ctb}->{'gwimage'} || $def_gwimage;
804    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'} || $def_mgwstart;
805    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'} || $def_mexpstart;
806    my $gwstart = $tbparams->{$ctb}->{'gwstart'} || $def_gwstart;
807    my $expstart = $tbparams->{$ctb}->{'expstart'} || $def_expstart;
808    my $project = $tbparams->{$ctb}->{'project'};
809
810    # Substitute variables
811    s/GWTYPE/$gwtype/g;
812    s/GWIMAGE/$gwimage/g;
813    if ($ctb eq $master ) {
814        s/GWSTART/$mgwstart/g;
815        s/EXPSTART/$mexpstart/g;
816    }
817    else {
818        s/GWSTART/$gwstart/g;
819        s/EXPSTART/$expstart/g;
820    }
821    # XXX: oh is this bad
822    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
823    s#PROJDIR#/proj/$project/#g;
824    s#EID#$eid#g;
825    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
826    print $desthandle $_;
827}
828$pipe->close();
829die "No nodes in master testbed ($master)\n" unless $allocated{$master};
830
831# Copy tarfiles and rpms needed at remote sites to the staging directories.
832# Start_segment will distribute them
833for my $t  (@tarfiles) {
834    die "tarfile '$t' unreadable: $!\n" unless -r $t;
835    unless (-d "$tmpdir/tarfiles") {
836        mkdir("$tmpdir/tarfiles") || 
837            die "Can't create $tmpdir/tarfiles:$!\n";
838    }
839    copy($t, "$tmpdir/tarfiles") || 
840        die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
841}
842
843for my $r  (@rpms) {
844    die "rpm '$r' unreadable: $!\n" unless -r $r;
845    unless (-d "$tmpdir/rpms") {
846        mkdir("$tmpdir/rpms") || 
847            die "Can't create $tmpdir/rpms:$!\n";
848    }
849    copy($r, "$tmpdir/rpms") || 
850        die "Can't copy $r to  $tmpdir/rpms:$!\n";
851}
852
853exit(0) unless $startem;
854
855my %started;                # If $started{$tb} then $tb successfully started
856my %child;                  # If $child{$pid} then a process with that pid is
857                            # working on a starting a segment
858my $nworking = 0;           # Number of children working on swapin
859my $pid;                    # Scratch variable for pids
860
861# Start up the slave sub-experiments first
862TESTBED:
863for my $tb  (keys %allocated) {
864    if ( $tb ne $master ) {
865        while ( $nworking == $max_children ) {
866            print "Waiting for a child process to complete\n" if $verbose;
867            if (($pid = wait()) != -1 ) {
868                # The $? >> 8 is the exit code of the subprocess, which is
869                # non-zero if the &start_segment routine failed.
870                my $exit_code = ($? >> 8);
871
872                print "Child $pid completed exit code ($exit_code)\n"
873                    if $verbose;
874                $nworking--;
875                $started{$child{$pid}}++ unless $exit_code;
876                if ($child{$pid} ) { delete $child{$pid}; }
877                else { warn "Reaped a pid we did not start?? ($pid)\n"; }
878                last TESTBED if $exit_code;
879            }
880            else { warn "wait returned without reaping: $!\n"; }
881        }
882        if ( $pid = fork() ) {
883            # Parent process
884            $nworking ++;
885            $child{$pid} = $tb;
886            print "Started process $pid to start testbed $tb\n"
887                if $verbose;
888        }
889        else {
890            # Child.  Note that we reverse the sense of the return code when it
891            # becomes an exit value.  Zero exit values indicate success.
892            exit(!&start_segment($tb, $eid, $tbparams, $timeout));
893        }
894    }
895}
896
897# Now wait for any still running processes.
898while ( $nworking ) {
899    print "Waiting for a child process to complete ($nworking running)\n" 
900        if $verbose;
901    if (($pid = wait()) != -1 ) {
902        # The $? >> 8 is the exit code of the subprocess, which is
903        # non-zero if the &start_segment routine failed.
904        my $exit_code = ($? >> 8);
905
906        print "Child $pid completed exit code ($exit_code)\n"
907            if $verbose;
908        $nworking--;
909        $started{$child{$pid}}++ unless $exit_code;
910        if ($child{$pid} ) { delete $child{$pid}; }
911        else { warn "Reaped a pid we did not start?? ($pid)\n"; }
912    }
913    else { warn "wait returned without reaping: $!\n"; }
914}
915
916# Now the master
917if (&start_segment($master, $eid, $tbparams, $timeout)) { 
918    $started{$master}++;
919}
920
921# If any testbed failed, swap the rest out.
922if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
923    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
924    print "Error starting experiment\n";
925    exit(1);
926}
927print "Experiment started\n";
928print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
929system("rm -rf $tmpdir") unless $debug;
930exit(0);    # set the exit value
931
932=pod
933
934=head1 NAME
935
936B<splitter.pl>
937
938=head1 SYNOPSIS
939
940B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
941    [B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]
942
943=head1 DESCRIPTION
944
945B<splitter.pl> invokes the DETER experiment parser to split an annotated
946experiment into multiple sub-experments and instantiates the sub-experiments on
947their intended testbeds.  Annotation is accomplished using the
948tb-set-node-testbed command, added to the parser.
949
950Much of the script's behavior depends on the configuration file, specified with
951the B<-c> flag and defaulting to F<./splitter.conf>.
952
953The testbed labels supplied in the B<tb-set-node-testbed> command are
954meaningful based on their presence in the testbeds file.  that file can be
955specified in the configuration file using the B<Testbeds> directive, and
956defaults to F<./testbeds>.  The syntax is described below.
957
958Most of the intermediate files are staged in a sub-directory of a temporary
959files directory and deleted at the end of the script.  Specifying the B<-d>
960flag on the command line avoids the deletion for debbugging.  By default the
961temporary files directory is directory is F</tmp> and can be reset in the
962configuration file using the B<Tmpdir> directive.  Intermediate files are
963stored under a subdirectory formed by adding the process ID of the splitter
964process.  For example, if the temporary files directory is F</tmp> and the
965B<splitter.pl> process ID is 2323, the temporary files will be stored in
966F</tmp/split2323/>.
967
968The expreriment is split out into one experiment description per testbed in the
969temporary directory named as F<experiment.testbed.tcl> where the experiment is
970the experiment ID given in the configuration file, and the testbed is the
971tb-set-node-testbed parameter for the nodes in the file.
972
973If the B<-n> option is absent the sub-experiments are then instantiated on
974their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
975Per-testbed parameters are set in the testbeds file.  Sub-experiments on
976slave testbeds are instantiated in a random order, but the master testbed is
977currently instantiated last.
978
979Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
980and the maximum number of simultaneous processes to start.
981
982Scripts to start federation (the federation kit) are copied into the local
983experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
984taken from the directory given by the B<ScriptDir> directive in the
985configuration file.
986
987If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
988F<splitter.conf>.
989
990If any sub-experiment fails to instantiate, the other sub-exeriments are
991swapped out.  B<-F> avoids this swap out, which can also be specified as
992B<SoftFail: true> in F<splitter.conf>
993
994=head2 Configuration File
995
996The configuration file is a simple set of colon-separated parameters and
997values.  A configuration file must be present, either specified in the B<-c>
998flag or the default F<./splitter.conf>.  All the parameter names are case
999insensitive, but should not include any whitespace.  Parameter values may
1000include whitespace, but no newlines.
1001
1002Possible parameters are:
1003
1004=over 5
1005
1006=item Experiment
1007
1008The name of the experiment on the various testbeds
1009
1010=item Master
1011
1012The master testbed label from the testbeds file, described below.
1013
1014=item Testbeds
1015
1016The testbeds file described below, giving per-testbed parameters.  If this
1017directive is absent the testbeds file defaults to F<./testbeds>
1018
1019=item ScriptDir
1020
1021Location of the default federation scripts, i.e. the federation kit.
1022
1023=item GatewayPubkey
1024
1025=item GatewaySecretKey
1026
1027The names of the files containing secret and public keys to use in setting up
1028tunnels between testbeds.  If given they are used, otherwise keys are generated.
1029
1030=item GatewayKeyType
1031
1032This controls the kind of SSH keys generated to configure the geatways.  If
1033given this must be B<dsa> or B<rsa>, and it defaults to B<rsa>.  The parameter
1034is csase insensitive.
1035
1036=item TmpDir
1037
1038=item TempDir
1039
1040The directory where temporary files are created.  These are synonyms, but
1041should both be specified, B<TmpDir> has priority.  If neither is specified,
1042F</tmp> is used.
1043
1044=item SMBShare
1045
1046The SMB share on the master testbed that will be exported to remote clients.
1047
1048=item SMBUser
1049
1050The experiment user to mount project directories as.  This user needs to be a
1051member of the exported experiment - that is one of the users in the project
1052containing this experiment on the master testbed.
1053
1054=item Timeout
1055
1056Value in seconds after which a swap-in operatioin will be considered a success.
1057Often long swap-ins will hang when there are partial failures.  This works
1058around this issue.  (This behavior can be requested on the command line by
1059specifying B<-t> I<secs>.)
1060
1061=item FailSoft
1062
1063If not set, failure of any sub experiment swaps the rest out.  Setting this to
1064any value avoids this swap out.  (This behavior can be requested on the command
1065line by specifying B<-F>.)
1066
1067=item MuxLimit
1068
1069The maximum bumber of links/lans carried by one gateway pair
1070
1071=item Tclparse
1072
1073The pathname to the experiment parsing program.  Only developers should set
1074this.
1075
1076=item Tclsh
1077
1078The pathname to the local oTcl shell.  Only developers should set
1079this.
1080
1081=back
1082
1083=head2 Testbeds file
1084
1085The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
1086scoped attribute-value pairs where each attribute is specified on a separate
1087line of the configuration file.  Each testbed's parameters are preceeded by the
1088testbed label in brackets ([]) on a line by itself.  After that the parameters
1089are specified as parameter: value.  This is essentially the same format as the
1090configuration file.  Parameters are:
1091
1092=over 4
1093
1094=item User
1095
1096The user under which to make requests to this testbed.  The user running
1097B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
1098testbed.
1099
1100=item OpsNode
1101
1102The host name of the testbed's ops node.  The user calling B<splitter.pl> must
1103be able to execute commands on this host via L<ssh(1)>.
1104
1105=item Domain
1106
1107The domain of nodes in this testbed (including the ops host).  This parameter
1108should always start with a period.
1109
1110=item Project
1111
1112The project under which to instantiate sub-experiments on this testbed.
1113
1114=item ConnectorType
1115
1116The node type for inter-testbed connector nodes on this testbed.
1117
1118=item SlaveNodeStartCmd
1119
1120The start command to run on experimental nodes when this testbed is used as a
1121slave.  In all the start commands the following string substitutions are made:
1122
1123=over 10
1124
1125=item FEDDIR
1126
1127The local experiment's federation scripts directory.  Each local experiment
1128will have this replaced by the scripts directory on its local boss.
1129
1130=item GWCONF
1131
1132The full pathname of the gateway configuration file.  As with FEDDIR, this is
1133on the local boss.
1134
1135=item PROJDIR
1136
1137The project directory on the local boss.
1138
1139=item EID
1140
1141The local experiment name.
1142
1143=back
1144
1145All startcmds specified in F<testbeds> undergo these expansions.
1146
1147=item SlaveConnectorStartCmd
1148
1149The start command to run on gateway nodes when this testbed is used as a slave.
1150The same string substitutions are made in this command as in SlaveNodeStartCmd.
1151
1152=item MasterNodeStartCmd
1153
1154The start command to run on experimental nodes when this testbed is used as a
1155master.  The same string substitutions are made in this command as in
1156SlaveNodeStartCmd.
1157
1158=item MasterConnectorStartCmd
1159
1160The start command to run on gateway nodes when this testbed is used as a
1161master.  The same string substitutions are made in this command as in
1162SlaveNodeStartCmd.
1163
1164=item ConnectorImage
1165
1166The disk image to be loaded on a gateway node on this testbed.
1167
1168=item FileServer
1169
1170The node in the master testbed from which filesystems are mounted.
1171
1172=item Boss
1173
1174The node in the master testbed that controls the testbed.
1175
1176=item TunnelCfg
1177
1178True if the connector needs to do DETER federation.  This parameter will
1179probably be removed.
1180
1181
1182=back
1183
1184=head1 ENVIRONMENT
1185
1186B<splitter.pl> does not directly make use of environment variables, but calls
1187out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1188environment.
1189
1190=head1 BUGS
1191
1192A deprecated B<-N> flag was used to select testbeds file format.  Only one
1193format is supported now, and B<-N> generates a warning, but otherwise does not
1194affect B<splitter.pl>.
1195
1196=head1 SEE ALSO
1197
1198L<sh(1)>, L<ssh(1)>
1199
1200=cut
Note: See TracBrowser for help on using the repository browser.