Changeset 33548e1 for fedkit


Ignore:
Timestamp:
Oct 5, 2007 10:14:02 AM (17 years ago)
Author:
Ted Faber <faber@…>
Branches:
axis_example, compt_changes, info-ops, master, version-1.30, version-2.00, version-3.01, version-3.02
Children:
8412883
Parents:
e2a71ebe
Message:

Remove -N : all testbeds files use the "new" format now
Update docs for new textbeds format
Neaten up the timeout code in ssh_cmd

File:
1 edited

Legend:

Unmodified
Added
Removed
  • fedkit/splitter.pl

    re2a71ebe r33548e1  
    166166    my $pid;                # Child pid
    167167
    168     if ( $timeout ) {
    169         if ( $pid = fork () ) {
    170             # Parent process
    171             # The eval acts as a signal catcher.  If the alarm goes off inside
    172             # the eval, the die will put "alarm\n" into $@, otherwise the
    173             # return value of the execution in the child process will be used.
    174             my $rv = eval {
    175                 local $SIG{'ALRM'} = sub{ die "alarm\n"; };
    176                 my $rv;
    177 
    178                 alarm $timeout;
    179                 $rv = waitpid($pid, 0);
    180                 alarm 0;
    181                 $rv;
    182             };
    183 
    184             # If the eval succeeded, $@ will be null and we can use $rv, which
    185             # is the return code from the subprocess.  If the eval timed out,
    186             # print a warning and assume the best.
    187             if ($@ eq "alarm\n" ) {
    188                 warn "$wname timed out - pid $pid still live\n";
    189                 return 1;
    190             }
    191             else {
    192                 return $rv;
    193             }
     168    $timeout = 0 unless $timeout;   # Force default timeout
     169
     170    if ( $pid = fork () ) {
     171        # Parent process
     172        # The eval acts as a signal catcher.  If the alarm goes off inside
     173        # the eval, the die will put "alarm\n" into $@, otherwise the
     174        # return value of the execution in the child process will be used.
     175        my $rv = eval {
     176            local $SIG{'ALRM'} = sub{ die "alarm\n"; };
     177            my $rv;
     178
     179            alarm $timeout;
     180            $rv = waitpid($pid, 0);
     181            alarm 0;
     182            $rv;
     183        };
     184
     185        # If the eval succeeded, $@ will be null and we can use $rv, which
     186        # is the return code from the subprocess.  If the eval timed out,
     187        # print a warning and assume the best.
     188        if ($@ eq "alarm\n" ) {
     189            warn "$wname timed out - pid $pid still live\n";
     190            return 1;
    194191        }
    195192        else {
    196             # Child process
    197             # XXX system with a relative pathname is sort of gross
    198             exec("ssh $user\@$host $cmd");
    199             exit 0;
     193            return $rv;
    200194        }
    201195    }
    202196    else {
    203         # No timeout, do all the work here.
    204         # XXX system with a relative pathname is sort of gross
    205         system ("ssh $user\@$host $cmd");
    206         if ($?) {
    207             warn "$wname failed $?\n" if $wname;
    208             return 0;
    209         }
    210         else { return 1; }
     197        # Child process
     198        exec("ssh $user\@$host $cmd");
     199        exit 0;
    211200    }
    212201}
     
    441430$fail_soft = $opts{'F'} || $opts{'failsoft'};
    442431
     432warn "-N does nothing now.  Only one testbeds format supported.\n"
     433    if $opts{'N'};
    443434$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
    444435$timeout = $opts{'t'} || $opts{'timeout'};
     
    483474die "Must supply file, master and experiment" unless $master && $tcl && $eid;
    484475
    485 if ($opts{'N'} ) {
    486     &parse_testbeds($tb_config, $tbparams) ||
    487         die "Cannot testbed congfigurations from $tb_config: $!\n";
    488 }
    489 else {
    490     # Read a hash of per-testbed parameters from the local configurations.
    491     my $conf = new IO::File($tb_config) ||
    492         die "can't read testbed configutions from $tb_config: $!\n";
    493     while (<$conf>) {
    494         next if /^#/;
    495         chomp;
    496         my($tb, $h, $d, $u, $p, $es, $gs, $mes, $mgs, $t, $i, $fs, $boss,
    497             $tun) = split(":", $_);
    498         $tbparams->{$tb}->{'host'} = $h;
    499         $tbparams->{$tb}->{'user'} = $u;
    500         $tbparams->{$tb}->{'domain'} = $d;
    501         $tbparams->{$tb}->{'project'} = $p;
    502         $tbparams->{$tb}->{'gwtype'} = $t;
    503         $tbparams->{$tb}->{'expstart'} = $es;
    504         $tbparams->{$tb}->{'gwstart'} = $gs;
    505         $tbparams->{$tb}->{'mexpstart'} = $mes;
    506         $tbparams->{$tb}->{'mgwstart'} = $mgs;
    507         $tbparams->{$tb}->{'gwimage'} = $i;
    508         $tbparams->{$tb}->{'fs'} = $fs;
    509         $tbparams->{$tb}->{'boss'} = $boss;
    510         $tbparams->{$tb}->{'tun'} = $tun;
    511 
    512         # Make sure the domain starts with a period
    513         $tbparams->{$tb}->{'domain'} = "." . $tbparams->{$tb}->{'domain'}
    514             unless $tbparams->{$tb}->{'domain'} =~ /^\./;
    515     }
    516     $conf->close();
    517 }
     476&parse_testbeds($tb_config, $tbparams) ||
     477    die "Cannot testbed congfigurations from $tb_config: $!\n";
    518478
    519479# Open a pipe to the splitter program and start it parsing the experiments
     
    940900=head2 Testbeds file
    941901
    942 The configuration file (F<./testbeds> unless overridden by B<-c>) is a
    943 colon-separated set of parameters keyed by testbed name.  The fields, in order,
    944 are:
    945 
    946 =over 5
    947 
    948 =item name
    949 
    950 The testbed to which this line of parameters applies.
    951 
    952 =item user
     902The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
     903scoped attribute-value pairs where each attribute is specified on a separate
     904line of the configuration file.  Each testbed's parameters are preceeded by the
     905testbed label in brackets ([]) on a line by itself.  After that the parameters
     906are specified as parameter: value.  This is essentially the same format as the
     907configuration file.  Parameters are:
     908
     909=over 4
     910
     911=item User
    953912
    954913The user under which to make requests to this testbed.  The user running
     
    956915testbed.
    957916
    958 =item host
     917=item OpsNode
    959918
    960919The host name of the testbed's ops node.  The user calling B<splitter.pl> must
    961920be able to execute commands on this host via L<ssh(1)>.
    962921
    963 =item domain
     922=item Domain
    964923
    965924The domain of nodes in this testbed (including the ops host).  This parameter
    966925should always start with a period.
    967926
    968 =item project
     927=item Project
    969928
    970929The project under which to instantiate sub-experiments on this testbed.
    971930
    972 =item gateway type
    973 
    974 The node type for inter-testbed gateway nodes on this testbed.
    975 
    976 =item experiment start (slave)
     931=item ConnectorType
     932
     933The node type for inter-testbed connector nodes on this testbed.
     934
     935=item SlaveNodeStartCmd
    977936
    978937The start command to run on experimental nodes when this testbed is used as a
     
    981940by the gatway configuration file.
    982941
    983 =item gateway start (slave)
     942=item SlaveConnectorStartCmd
    984943
    985944The start command to run on gateway nodes when this testbed is used as a slave.
    986945The same string substitutions are made in this command as in experiment start.
    987946
    988 =item experiment start (master)
     947=item MasterNodeStartCmd
    989948
    990949The start command to run on experimental nodes when this testbed is used as a
     
    992951experiment start.
    993952
    994 =item gateway start (master)
     953=item MasterConnectorStartCmd
    995954
    996955The start command to run on gateway nodes when this testbed is used as a
     
    998957experiment start.
    999958
    1000 =item gateway image
    1001 
    1002 The disk image to be loaded on a gateway node on this testbed.
    1003 
    1004 =item filesystem node
    1005 
    1006 The node in the master testbed from which filesystems are mounted.
    1007 
    1008 =item boss node
    1009 
    1010 The node in the master testbed that controls the testbed.
    1011 
    1012 =item tunnel configuration
    1013 
    1014 True if the connector needs to do DETER federation.  This parameter will
    1015 probably be removed.
    1016 
    1017 =back
    1018 
    1019 The parsing of the testbeds is extremely simple.  Colons separate each
    1020 field and there is n provision for escaping them at this time.
    1021 
    1022 =head2 New Testbeds File Format
    1023 
    1024 The testbeds file has ben revamped to make it more human readable.  The
    1025 parameters are now named and specified on separate lines of the configuration
    1026 file.  Each testbed's parameters are preceeded by the testbed label in brackets
    1027 ([]) on a line by itself.  After that the parameters are specified as
    1028 parameter: value.  This is essentially the same format as the configuration
    1029 file.  Parameters are:
    1030 
    1031 =over 4
    1032 
    1033 =item User
    1034 
    1035 The user under which to make requests to this testbed.  The user running
    1036 B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
    1037 testbed.
    1038 
    1039 =item OpsNode
    1040 
    1041 The host name of the testbed's ops node.  The user calling B<splitter.pl> must
    1042 be able to execute commands on this host via L<ssh(1)>.
    1043 
    1044 =item Domain
    1045 
    1046 The domain of nodes in this testbed (including the ops host).  This parameter
    1047 should always start with a period.
    1048 
    1049 =item Project
    1050 
    1051 The project under which to instantiate sub-experiments on this testbed.
    1052 
    1053 =item ConnectorType
    1054 
    1055 The node type for inter-testbed connector nodes on this testbed.
    1056 
    1057 =item SlaveNodeStartCmd
    1058 
    1059 The start command to run on experimental nodes when this testbed is used as a
    1060 slave.  In all the start commands the string FEDDIR will be replaced by the
    1061 local experiment's federation scripts directory and the string GWCONF replaced
    1062 by the gatway configuration file.
    1063 
    1064 =item SlaveConnectorStartCmd
    1065 
    1066 The start command to run on gateway nodes when this testbed is used as a slave.
    1067 The same string substitutions are made in this command as in experiment start.
    1068 
    1069 =item MasterNodeStartCmd
    1070 
    1071 The start command to run on experimental nodes when this testbed is used as a
    1072 master.  The same string substitutions are made in this command as in
    1073 experiment start.
    1074 
    1075 =item MasterConnectorStartCmd
    1076 
    1077 The start command to run on gateway nodes when this testbed is used as a
    1078 master.  The same string substitutions are made in this command as in
    1079 experiment start.
    1080 
    1081959=item ConnectorImage
    1082960
Note: See TracChangeset for help on using the changeset viewer.