Show
Ignore:
Timestamp:
10/08/07 11:30:28 (5 years ago)
Author:
Ted Faber <faber@…>
Children:
fd7a59b1f60b5ee0919cd3fdb74b62388b7c3155
Parents:
04045586ff25f1c7baf2e744523dc7ca3cb5374b
git-committer:
Ted Faber <faber@isi.edu> / 2007-10-08T18:30:28Z+0000
Message:

Allow slave testbeds to be swapped in concurrently.

-p <max_procs> sets the maximum number of subnets we can do at once.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • fedkit/splitter.pl

    r8412883 re863014  
    4343                                # the domain parameter of the DETER testbed.   
    4444my $fail_soft;                  # Do not swap failed sub-experiments out 
     45my $max_children=1;             # Maximum number of simultaneous swap-ins 
    4546 
    4647# Parse the config file.  The format is a colon-separated parameter name 
     
    421422                                    # keep our changes to the parser minimal. 
    422423# Argument processing. 
    423 getopts('Ft:c:f:ndvN', \%opts); 
     424getopts('Ft:c:p:f:ndvN', \%opts); 
    424425$splitter_config = $opts{'c'} || "./splitter.conf"; 
    425426$debug = $opts{'d'}; 
     
    428429&parse_config("$splitter_config", \%opts) ||  
    429430    die "Cannot read config file $splitter_config: $!\n"; 
    430 $fail_soft = $opts{'F'} || $opts{'failsoft'}; 
    431431 
    432432warn "-N does nothing now.  Only one testbeds format supported.\n" 
    433433    if $opts{'N'}; 
     434$fail_soft = $opts{'F'} || $opts{'failsoft'}; 
    434435$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments 
    435436$timeout = $opts{'t'} || $opts{'timeout'}; 
     
    440441$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations 
    441442$local_script_dir = $opts{'scriptdir'}; # Local scripts 
     443 
     444$max_children = $opts{'p'} || $opts{'maxchildren'}  
     445    if $opts{'p'} || $opts{'maxchildren'}; 
    442446 
    443447$smb_share = $opts{'smbshare'} ||       # Share to mount from the master 
     
    737741 
    738742my %started;                # If $started{$tb} then $tb successfully started 
     743my %child;                  # If $child{$pid} then a process with that pid is 
     744                            # working on a starting a segment 
     745my $nworking = 0;           # Number of children working on swapin 
     746my $pid;                    # Scratch variable for pids 
    739747 
    740748# Start up the slave sub-experiments first 
    741749TESTBED: 
    742750for my $tb  (keys %allocated) { 
    743     if ($tb ne $master) { 
    744         if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; } 
    745         else { last TESTBED; } 
    746     } 
     751    if ( $tb ne $master ) { 
     752        while ( $nworking == $max_children ) { 
     753            print "Waiting for a child process to complete\n" if $verbose; 
     754            if (($pid = wait()) != -1 ) { 
     755                # The $? >> 8 is the exit code of the subprocess, which is 
     756                # non-zero if the &start_segment routine failed. 
     757                my $exit_code = ($? >> 8); 
     758 
     759                print "Child $pid completed exit code ($exit_code)\n" 
     760                    if $verbose; 
     761                $nworking--; 
     762                $started{$child{$pid}}++ unless $exit_code; 
     763                if ($child{$pid} ) { delete $child{$pid}; } 
     764                else { warn "Reaped a pid we did not start?? ($pid)\n"; } 
     765                last TESTBED if $exit_code; 
     766            } 
     767            else { warn "wait returned without reaping: $!\n"; } 
     768        } 
     769        if ( $pid = fork() ) { 
     770            # Parent process 
     771            $nworking ++; 
     772            $child{$pid} = $tb; 
     773            print "Started process $pid to start testbed $tb\n" 
     774                if $verbose; 
     775        } 
     776        else { 
     777            # Child.  Note that we reverse the sense of the return code when it 
     778            # becomes an exit value.  Zero exit values indicate success. 
     779            exit(!&start_segment($tb, $eid, $tbparams, $timeout)); 
     780        } 
     781    } 
     782} 
     783 
     784# Now wait for any still running processes. 
     785while ( $nworking ) { 
     786    print "Waiting for a child process to complete ($nworking running)\n"  
     787        if $verbose; 
     788    if (($pid = wait()) != -1 ) { 
     789        # The $? >> 8 is the exit code of the subprocess, which is 
     790        # non-zero if the &start_segment routine failed. 
     791        my $exit_code = ($? >> 8); 
     792 
     793        print "Child $pid completed exit code ($exit_code)\n" 
     794            if $verbose; 
     795        $nworking--; 
     796        $started{$child{$pid}}++ unless $exit_code; 
     797        if ($child{$pid} ) { delete $child{$pid}; } 
     798        else { warn "Reaped a pid we did not start?? ($pid)\n"; } 
     799    } 
     800    else { warn "wait returned without reaping: $!\n"; } 
    747801} 
    748802 
     
    772826 
    773827B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>] 
    774     [B<-f> F<experiment_tcl>] [F<experiment_tcl>] 
     828    [B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>] 
    775829 
    776830=head1 DESCRIPTION 
     
    810864currently instantiated last. 
    811865 
     866Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter 
     867and the maximum number of simultaneous processes to start. 
     868 
    812869Scripts to start federation (the federation kit) are copied into the local 
    813870experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are