Changeset e863014


Ignore:
Timestamp:
Oct 8, 2007 11:30:28 AM (17 years ago)
Author:
Ted Faber <faber@…>
Branches:
axis_example, compt_changes, info-ops, master, version-1.30, version-2.00, version-3.01, version-3.02
Children:
fd7a59b
Parents:
0404558
Message:

Allow slave testbeds to be swapped in concurrently.

-p <max_procs> sets the maximum number of subnets we can do at once.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • fedkit/splitter.pl

    r0404558 re863014  
    4343                                # the domain parameter of the DETER testbed. 
    4444my $fail_soft;                  # Do not swap failed sub-experiments out
     45my $max_children=1;             # Maximum number of simultaneous swap-ins
    4546
    4647# Parse the config file.  The format is a colon-separated parameter name
     
    421422                                    # keep our changes to the parser minimal.
    422423# Argument processing.
    423 getopts('Ft:c:f:ndvN', \%opts);
     424getopts('Ft:c:p:f:ndvN', \%opts);
    424425$splitter_config = $opts{'c'} || "./splitter.conf";
    425426$debug = $opts{'d'};
     
    428429&parse_config("$splitter_config", \%opts) ||
    429430    die "Cannot read config file $splitter_config: $!\n";
    430 $fail_soft = $opts{'F'} || $opts{'failsoft'};
    431431
    432432warn "-N does nothing now.  Only one testbeds format supported.\n"
    433433    if $opts{'N'};
     434$fail_soft = $opts{'F'} || $opts{'failsoft'};
    434435$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
    435436$timeout = $opts{'t'} || $opts{'timeout'};
     
    440441$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
    441442$local_script_dir = $opts{'scriptdir'}; # Local scripts
     443
     444$max_children = $opts{'p'} || $opts{'maxchildren'}
     445    if $opts{'p'} || $opts{'maxchildren'};
    442446
    443447$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
     
    737741
    738742my %started;                # If $started{$tb} then $tb successfully started
     743my %child;                  # If $child{$pid} then a process with that pid is
     744                            # working on a starting a segment
     745my $nworking = 0;           # Number of children working on swapin
     746my $pid;                    # Scratch variable for pids
    739747
    740748# Start up the slave sub-experiments first
    741749TESTBED:
    742750for my $tb  (keys %allocated) {
    743     if ($tb ne $master) {
    744         if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
    745         else { last TESTBED; }
    746     }
     751    if ( $tb ne $master ) {
     752        while ( $nworking == $max_children ) {
     753            print "Waiting for a child process to complete\n" if $verbose;
     754            if (($pid = wait()) != -1 ) {
     755                # The $? >> 8 is the exit code of the subprocess, which is
     756                # non-zero if the &start_segment routine failed.
     757                my $exit_code = ($? >> 8);
     758
     759                print "Child $pid completed exit code ($exit_code)\n"
     760                    if $verbose;
     761                $nworking--;
     762                $started{$child{$pid}}++ unless $exit_code;
     763                if ($child{$pid} ) { delete $child{$pid}; }
     764                else { warn "Reaped a pid we did not start?? ($pid)\n"; }
     765                last TESTBED if $exit_code;
     766            }
     767            else { warn "wait returned without reaping: $!\n"; }
     768        }
     769        if ( $pid = fork() ) {
     770            # Parent process
     771            $nworking ++;
     772            $child{$pid} = $tb;
     773            print "Started process $pid to start testbed $tb\n"
     774                if $verbose;
     775        }
     776        else {
     777            # Child.  Note that we reverse the sense of the return code when it
     778            # becomes an exit value.  Zero exit values indicate success.
     779            exit(!&start_segment($tb, $eid, $tbparams, $timeout));
     780        }
     781    }
     782}
     783
     784# Now wait for any still running processes.
     785while ( $nworking ) {
     786    print "Waiting for a child process to complete ($nworking running)\n"
     787        if $verbose;
     788    if (($pid = wait()) != -1 ) {
     789        # The $? >> 8 is the exit code of the subprocess, which is
     790        # non-zero if the &start_segment routine failed.
     791        my $exit_code = ($? >> 8);
     792
     793        print "Child $pid completed exit code ($exit_code)\n"
     794            if $verbose;
     795        $nworking--;
     796        $started{$child{$pid}}++ unless $exit_code;
     797        if ($child{$pid} ) { delete $child{$pid}; }
     798        else { warn "Reaped a pid we did not start?? ($pid)\n"; }
     799    }
     800    else { warn "wait returned without reaping: $!\n"; }
    747801}
    748802
     
    772826
    773827B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
    774     [B<-f> F<experiment_tcl>] [F<experiment_tcl>]
     828    [B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]
    775829
    776830=head1 DESCRIPTION
     
    810864currently instantiated last.
    811865
     866Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
     867and the maximum number of simultaneous processes to start.
     868
    812869Scripts to start federation (the federation kit) are copied into the local
    813870experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
Note: See TracChangeset for help on using the changeset viewer.