Slurm_installation_1.7

#!/bin/bash
################################################################################
# Copyright (C) 2019-2024 NI SP GmbH
# All Rights Reserved
#
# [email protected] / www.ni-sp.com
#
# We provide the information on an as is basis.
# We provide no warranties, express or implied, related to the
# accuracy, completeness, timeliness, useability, and/or merchantability
# of the data and are not liable for any loss, damage, claim, liability,
# expense, or penalty, or for any direct, indirect, special, secondary,
# incidental, consequential, or exemplary damages or lost profit
# deriving from the use or misuse of this information.
################################################################################
# Version v1.7 including accounting support

RED='\033[0;31m'; GREEN='\033[0;32m'; GREY='\033[0;37m'; BLUE='\034[0;37m'; NC='\033[0m'
ORANGE='\033[0;33m'; BLUE='\033[0;34m'; WHITE='\033[0;97m'; UNLIN='\033[0;4m'
echo -e "${GREEN}###################################################"
echo -e "Welcome to the SLURM Installation Script"
echo -e "###################################################${NC}"
sleep 1.2

main_centos()
{
    disableSElinux
    checkCentosVersion
    createRequiredUsers
    setupRequiredCentosRepositories
    installMariaDBforCentos
    installMungeForCentos
    setupRngToolsForCentos
    setupMungeForCentos
    buildSlurmForCentos
    setupSlurmForCentos
    createRequiredFiles
    fixingPermissions
    enableSystemdServices
    executeFirstSlurmCommands
    exit 0
}

setupSlurmForCentos()
{
    cd ~/rpmbuild/RPMS/x86_64/

    # skipping slurm-openlava and slurm-torque because of missing perl-Switch
    sudo yum --nogpgcheck localinstall slurm-[0-9]*.el*.x86_64.rpm slurm-contribs-*.el*.x86_64.rpm slurm-devel-*.el*.x86_64.rpm slurm-example-configs-*.el*.x86_64.rpm slurm-libpmi-*.el*.x86_64.rpm slurm-pam_slurm-*.el*.x86_64.rpm slurm-perlapi-*.el*.x86_64.rpm slurm-slurmctld-*.el*.x86_64.rpm slurm-slurmd-*.el*.x86_64.rpm slurm-slurmdbd-*.el*.x86_64.rpm -y

    # create the SLURM default configuration with
    # compute nodes called "NodeName=linux[1-32]"
    # in a cluster called "cluster"
    # and a partition name called "test"
    # Feel free to adapt to your needs
    HOST=`hostname`

    sudo mkdir /etc/slurm/
    cat << EOF | sudo tee /etc/slurm/slurm.conf

# slurm.conf file generated by configurator easy.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
SlurmctldHost=localhost
#
#MailProg=/bin/mail
MpiDefault=none
#MpiParams=ports=#-#
ProctrackType=proctrack/cgroup
ReturnToService=2
SlurmctldPidFile=/var/run/slurmctld.pid
#SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
#SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurm/slurmd
SlurmUser=slurm
#SlurmdUser=root
StateSaveLocation=/var/spool/slurm/
SwitchType=switch/none
TaskPlugin=task/affinity
#
#
# TIMERS
#KillWait=30
#MinJobAge=300
#SlurmctldTimeout=120
#SlurmdTimeout=300
#
#
# SCHEDULING
SchedulerType=sched/backfill
SelectType=select/cons_tres
SelectTypeParameters=CR_Core
#
#
# LOGGING AND ACCOUNTING
AccountingStorageType=accounting_storage/none
ClusterName=cluster
#JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
#SlurmctldDebug=info
SlurmctldLogFile=/var/log/slurmctld.log
#SlurmdDebug=info
SlurmdLogFile=/var/log/slurmd.log
#
#
# COMPUTE NODES
NodeName=$HOST State=idle Feature=dcv2,other
# NodeName=linux[1-32] CPUs=1 State=UNKNOWN
# NodeName=linux1 NodeAddr=128.197.115.158 CPUs=4 State=UNKNOWN
# NodeName=linux2 NodeAddr=128.197.115.7 CPUs=4 State=UNKNOWN

PartitionName=test Nodes=$HOST Default=YES MaxTime=INFINITE State=UP
# PartitionName=test Nodes=$HOST,linux[1-32] Default=YES MaxTime=INFINITE State=UP

# DefMemPerNode=1000
# MaxMemPerNode=1000
# DefMemPerCPU=4000 
# MaxMemPerCPU=4096

EOF

    if [ "$slurm_accounting_support" == "1" ]
    then
        StorageType=accounting_storage/mysql
        DbdHost=localhost
        StorageHost=$DbdHost
        StorageLoc=slurm_acct_db
        StorageUser=slurm
        SlurmUser=$StorageUser
        random_mysql_password=$(tr -dc '0-9a-zA-Z@' < /dev/urandom | head -c 20)
        StoragePass=$random_mysql_password
        StoragePort=3306

        createMysqlDatabase $StorageLoc $StorageUser $StoragePass

        sudo sed -i 's/AccountingStorageType=accounting_storage\/none/AccountingStorageType=accounting_storage\/slurmdbd/' /etc/slurm/slurm.conf

        cat <<EOF | sudo tee /etc/slurm/slurmdbd.conf
StorageType=$StorageType
DbdHost=$DbdHost
StorageHost=$StorageHost
StorageLoc=$StorageLoc
StorageUser=$StorageUser
SlurmUser=$SlurmUser
StoragePass=$StoragePass
StoragePort=$StoragePort
LogFile=/var/log/slurmdbd.log
EOF
fi

        cat << EOF | sudo tee /etc/slurm/cgroup.conf
###
#
# Slurm cgroup support configuration file
#
# See man slurm.conf and man cgroup.conf for further
# information on cgroup configuration parameters
#--
CgroupPlugin=cgroup/v1
# CgroupAutomount=yes

ConstrainCores=no
ConstrainRAMSpace=no
EOF

        if [ ! -f /etc/my.cnf.d/slurm.cnf  ]
        then
            total_memory=$(free -m | awk '/^Mem:/{print $2}')
            innodb_buffer_percent=50
            innodb_buffer_pool_size=$((total_memory * innodb_buffer_percent / 100))
            cat <<EOF | sudo tee /etc/my.cnf.d/slurm.cnf
[mariadb]
innodb_lock_wait_timeout=900
innodb_log_file_size=128M
max_allowed_packet=32M
innodb_buffer_pool_size=${innodb_buffer_pool_size}M
EOF

            sudo systemctl restart mariadb
        fi
}

buildSlurmForCentos()
{
    # build and install SLURM
    sudo yum install python3 gcc openssl openssl-devel pam-devel numactl numactl-devel hwloc lua readline-devel ncurses-devel man2html libibmad libibumad rpm-build  perl-ExtUtils-MakeMaker.noarch -y
    if [ "$OSVERSION" == "7" ]
    then
        sudo yum install rrdtool-devel lua-devel hwloc-devel -y
    fi
    if [ "$OSVERSION" == "8" ]
    then
        sudo yum install rpm-build make -y
        # dnf --enablerepo=PowerTools install rrdtool-devel lua-devel hwloc-devel -y
        sudo dnf --enablerepo=powertools install rrdtool-devel lua-devel hwloc-devel rpm-build -y
        # dnf group install "Development Tools"
    fi
    if [ "$OSVERSION" == "9" ]
    then
        sudo yum install rpm-build make -y
        # dnf --enablerepo=PowerTools install rrdtool-devel lua-devel hwloc-devel -y
        sudo dnf --enablerepo=crb install rrdtool-devel lua-devel hwloc-devel -y
        # dnf group install "Development Tools"
    fi

    mkdir slurm-tmp
    cd slurm-tmp
    if [ "$VER" == "" ]; then
        export VER=20.02-latest    # latest 20.02.XX version
        export VER=20.11.3
        export VER=20.11-latest   # slurm-20.11-latest.tar.bz2
        export VER=20.11.9        # slurm-20.11-latest.tar.bz2
        export VER=22.05.9
        # export VER=23.02.2
    fi
    # https://download.schedmd.com/slurm/slurm-20.02.3.tar.bz2
    wget https://download.schedmd.com/slurm/slurm-$VER.tar.bz2

    [ $? != 0 ] && echo Problem downloading https://download.schedmd.com/slurm/slurm-$VER.tar.bz2 ... Exiting && exit

    if [ "$OSVERSION" == "9" ] ; then
        # fix LTO issue on 9
        # https://bugs.schedmd.com/show_bug.cgi?id=14565
        rpmbuild -ta slurm-$VER.tar.bz2 --define '_lto_cflags %{nil}' --with mysql     # and wait a few minutes until SLURM has been compiled
    else
        rpmbuild -ta slurm-$VER.tar.bz2     # and wait a few minutes until SLURM has been compiled
    fi
    # if [ "$OSVERSION" == "7" ] ; then
    # fi
    # if [ "$OSVERSION" == "8" ] ; then
    #     rpm-build -ta slurm-$VER.tar.bz2    # and wait a few minutes until SLURM has been compiled
    # fi

    rm slurm-$VER.tar.bz2
    cd ..
    rmdir slurm-tmp

    # get perl-Switch
    # sudo yum install cpan -y
}

setupMungeForCentos()
{
    sudo /usr/sbin/create-munge-key -r -f
    sudo sh -c  "dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key"
    sudo chown munge: /etc/munge/munge.key
    sudo chmod 400 /etc/munge/munge.key

    sudo systemctl enable munge
    sudo systemctl start munge
}

setupRngToolsForCentos()
{
    sudo yum install rng-tools -y
    sudo rngd -r /dev/urandom
}

disableSElinux()
{
    if [ "$slurm_accounting_support" == "1" ]
    then
        # SLURM accounting support
        if [ "$OSVERSION" == "9" ] ; then
            sudo setenforce 0
            cat << EOF | sudo tee /etc/selinux/config
SELINUX=disabled
SELINUXTYPE=targeted
EOF
        fi
    fi
}

installMungeForCentos()
{
    if [ "$OSVERSION" == "7" ] ; then
        sudo yum install munge munge-libs munge-devel -y
    fi
    if [ "$OSVERSION" == "8" ] ; then
        sudo yum install munge munge-libs  -y
        sudo dnf --enablerepo=powertools install munge-devel -y
    fi
    if [ "$OSVERSION" == "9" ] ; then
        sudo yum install munge munge-libs  -y
        sudo dnf --enablerepo=crb install munge-devel -y
    fi
}

installMariaDBforCentos()
{
    if [ "$slurm_accounting_support" == "1" ]
    then
        if ! rpm -qa | egrep -iq mariadb-server
        then
            # SLURM accounting support
            if [ "$OSVERSION" == "9" ]
            then
                sudo yum install MariaDB-server MariaDB-devel dnf -y
                sudo systemctl enable --now mariadb
            else
                sudo yum install MariaDB-server MariaDB-devel dnf -y
                sudo systemctl enable --now mariadb
            fi
        fi
    fi
}

checkCentosVersion()
{
    OSVERSION="7"
    # [ "`hostnamectl | grep Kernel | grep el8`" != "" ] && OSVERSION="8"
    . /etc/os-release

    if [[ $VERSION =~ ^8 ]]
    then
    OSVERSION="8"
    # in case of repo access issues uncomment the following lines
    # sudo sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-*
    # sudo sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
    fi

    if [[ $VERSION =~ ^9 ]]
    then
        OSVERSION="9"
    fi
}

setupRequiredCentosRepositories()
{
    sudo yum install epel-release -y
    if [ "$OSVERSION" == "7" ] ; then
        sudo curl -LsS https://r.mariadb.com/downloads/mariadb_repo_setup | sudo bash
        sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -y
        # sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
    fi
    if [ "$OSVERSION" == "8" ] ; then
        sudo curl -LsS https://r.mariadb.com/downloads/mariadb_repo_setup | sudo bash
        sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm -y
        # sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
    fi
    if [ "$OSVERSION" == "9" ] ; then
        sudo curl -LsS https://r.mariadb.com/downloads/mariadb_repo_setup | sudo bash
        sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm -y
        # sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
    fi
}
checkLinuxOsDistro()
{
    # Check for lsb_release command
    if command -v lsb_release > /dev/null 2>&1
    then
        OSDISTRO=$(lsb_release -si)
    else
        if [ -e /etc/issue ]
        then
            if cat /etc/issue | egrep -iq "centos"
            then
                OSDISTRO="centos"
            elif cat /etc/issue | egrep -iq "ubuntu"
            then
                OSDISTRO="ubuntu"
            else
                if [ -e /etc/os-release ]
                then
                    if cat /etc/os-release | egrep -iq "centos"
                    then
                        OSDISTRO="centos"
                    else
                        OSDISTRO="Unknown"
                    fi
                fi
            fi
        fi
    fi
    echo "The current Linux distribution is: $OSDISTRO"
}

createMysqlDatabase()
{
    StorageLoc=$1
    StorageUser=$2
    StoragePass=$3

    root_password_check=true
    while $root_password_check
    do
        echo "If you already have mysql/mariadb installed, please type the password. Leave empty (just press enter) if this server is fresh (without mysql/mariadb) or if there is no password or the password is configured under .my.cnf file."
        read mysql_root_password
        export MYSQL_PWD=$mysql_root_password
        if sudo mysql -u root -e "SELECT 1" &> /dev/null
        then
            if ! sudo mysql -u root -e "use $StorageLoc" 2> /dev/null
            then
                sudo mysql -u root -e "CREATE DATABASE $StorageLoc;"
        		sudo mysql -u root -e "CREATE USER '$StorageUser'@'localhost' IDENTIFIED BY '$StoragePass';"
        		sudo mysql -u root -e "ALTER USER '$StorageUser'@'localhost' IDENTIFIED BY '$StoragePass';"
        		sudo mysql -u root -e "GRANT ALL PRIVILEGES ON $StorageLoc.* TO '$StorageUser'@'localhost';"
        		sudo mysql -u root -e "FLUSH PRIVILEGES;"
                root_password_check=false
            fi
        unset MYSQL_PWD
        else
            echo "Was not possible to connect with MySQL or MariaDB server. Please type the correct passord.."
        fi
    done
}


executeFirstSlurmCommands()
{
    echo Sleep for a few seconds for slurmctld to come up ...
    sleep 5

    # show cluster
    echo
    echo Output from: \"sinfo\"
    sinfo

    # sinfo -Nle
    echo
    echo Output from: \"scontrol show partition\"
    scontrol show partition

    # show host info as slurm sees it
    echo
    echo Output from: \"slurmd -C\"
    slurmd -C

    # in case host is in drain status
    # scontrol update nodename=$HOST state=idle

    echo
    echo Output from: \"scontrol show nodes\"
    scontrol show nodes

    # If jobs are running on the node:
    # scontrol update nodename=$HOST state=resume

    # lets run our first job
    echo
    echo Output from: \"srun hostname\"
    srun hostname

    echo Sleep for a few seconds for slurmd to come up ...
    sleep 2

    # show cluster
    echo
    echo Output from: \"sinfo\"
    sinfo

    # sinfo -Nle
    echo
    echo Output from: \"scontrol show partition\"
    scontrol show partition

    # show host info as slurm sees it
    echo
    echo Output from: \"slurmd -C\"
    slurmd -C

    # in case host is in drain status
    # scontrol update nodename=$HOST state=idle

    echo
    echo Output from: \"scontrol show nodes\"
    scontrol show nodes

    # If jobs are running on the node:
    # scontrol update nodename=$HOST state=resume

    # lets run our first job
    echo
    echo Output from: \"srun hostname\"
    srun hostname
}

enableSystemdServices()
{
    # slurmdbd needs to connect with slurmctld and vice-versa, causing a race condition.
    # the best option for now is start slurmdbd, sleep, start slurmctld, another sleep to wait the registration and then restart slurmdbd
    # this is not ideal, but will work. the slurm dev need to be contacted to fix this problem
    sudo systemctl daemon-reload
    sudo systemctl enable --now slurmdbd
    sleep 5
    sudo systemctl enable --now slurmctld
    sleep 10
    sudo systemctl restart slurmdbd
    sudo systemctl enable --now slurmd
}

createRequiredFiles()
{
    sudo mkdir /var/spool/slurm
    sudo mkdir /var/spool/slurm/slurmctld
    sudo mkdir /var/spool/slurm/cluster_state
    sudo touch /var/log/slurmctld.log
    sudo touch /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
}

fixingPermissions()
{
    sudo chown -R slurm:slurm /etc/slurm
    sudo chmod 600 /etc/slurm/slurmdbd.conf
    sudo chown slurm:slurm /var/spool/slurm
    sudo chmod 755 /var/spool/slurm
    sudo chown slurm:slurm /var/spool/slurm/slurmctld
    sudo chmod 755 /var/spool/slurm/slurmctld
    sudo chown slurm:slurm /var/spool/slurm/cluster_state
    sudo chown slurm:slurm /var/log/slurmctld.log
    sudo chown slurm: /var/log/slurm_jobacct.log /var/log/slurm_jobcomp.log
    sudo chmod 777 /var/spool   # hack for now as otherwise slurmctld is complaining
}

createRequiredUsers()
{
    export MUNGEUSER=966
    sudo groupadd -g $MUNGEUSER munge
    if ! id "$USERNAME" &> /dev/null
    then
        sudo useradd  -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGEUSER -g munge  -s /sbin/nologin munge
    fi

    export SLURMUSER=967

    if ! getent group slurm &> /dev/null
    then
        sudo groupadd -g $SLURMUSER slurm
    fi

    sudo useradd  -m -c "SLURM workload manager" -d /var/lib/slurm -u $SLURMUSER -g slurm  -s /bin/bash slurm
}

askSlurmAccountingSupport()
{
    valid_answer=true
    slurm_accounting_support=0
    while $valid_answer
    do
        echo -e "${GREEN}##########################################################################"
        echo "Do you want to enable Slurm accounting support? Possible answers: [yes/no]"
        echo -e  "##########################################################################${NC}"
        read answer
        answer_lowercase=$(echo "$answer" | tr '[:upper:]' '[:lower:]')

        if [ "$answer_lowercase" == "y" ] || [ "$answer_lowercase" == "yes" ]
        then
            slurm_accounting_support=1
            valid_answer=false
        elif [ "$answer_lowercase" == "n" ] || [ "$answer_lowercase" == "no" ]
        then
            slurm_accounting_support=0
            valid_answer=false
        else
            echo "Invalid input!"
        fi
    done
}
main_ubuntu()
{
    checkUbuntuVersion
    createRequiredUsers
    setupRequiredUbuntuRepositories
    installMariaDBforUbuntu
    installMungeForUbuntu
    setupRngToolsForUbuntu
    setupMungeForUbuntu
    buildSlurmForUbuntu
    setupSlurmForUbuntu
    createRequiredFiles
    fixingPermissions
    setupSystemdForUbuntu
    enableSystemdServices
    executeFirstSlurmCommands
    exit 0
}

setupSystemdForUbuntu()
{
    cat <<EOF  | sudo tee /etc/systemd/system/slurmctld.service
[Unit]
Description=Slurm controller daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurm.conf

[Service]
Type=forking
EnvironmentFile=-/etc/sysconfig/slurmctld
ExecStart=/usr/sbin/slurmctld $SLURMCTLD_OPTIONS
ExecReload=/bin/kill -HUP \$MAINPID
PIDFile=/var/run/slurmctld.pid

[Install]
WantedBy=multi-user.target
EOF

    cat <<EOF | sudo tee /etc/systemd/system/slurmdbd.service
[Unit]
Description=Slurm DBD accounting daemon
Wants=network.target munge.service slurmctld.service
After=network.target munge.service slurmctld.service
ConditionPathExists=/etc/slurm/slurmdbd.conf

[Service]
Type=forking
EnvironmentFile=-/etc/sysconfig/slurmdbd
ExecStart=/usr/sbin/slurmdbd $SLURMDBD_OPTIONS
ExecReload=/bin/kill -HUP \$MAINPID
PIDFile=/var/run/slurmdbd.pid

[Install]
WantedBy=multi-user.target
EOF

    cat  <<EOF  | sudo tee /etc/systemd/system/slurmd.service
[Unit]
Description=Slurm node daemon
After=network.target munge.service
ConditionPathExists=/etc/slurm/slurm.conf

[Service]
Type=forking
EnvironmentFile=-/etc/sysconfig/slurmd
ExecStart=/usr/sbin/slurmd -d /usr/sbin/slurmstepd $SLURMD_OPTIONS
ExecReload=/bin/kill -HUP \$MAINPID
PIDFile=/var/run/slurmd.pid
KillMode=process
LimitNOFILE=51200
LimitMEMLOCK=infinity
LimitSTACK=infinity

[Install]
WantedBy=multi-user.target
EOF
}

setupSlurmForUbuntu()
{
    # create the SLURM default configuration with
    # compute nodes called "NodeName=linux[1-32]"
    # in a cluster called "cluster"
    # and a partition name called "test"
    # Feel free to adapt to your needs
    HOST=`hostname`

    sudo mkdir /etc/slurm/
    if [[ $(echo "$VERSION_ID >= 22.04" | bc -l) -eq 1 ]]
    then
        ProctrackType="linuxproc"
    else
        ProctrackType="cgroup"
    fi

    cat << EOF | sudo tee /etc/slurm/slurm.conf

# slurm.conf file generated by configurator easy.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
SlurmctldHost=localhost
#
#MailProg=/bin/mail
MpiDefault=none
#MpiParams=ports=#-#
ProctrackType=proctrack/${ProctrackType}
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
#SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
#SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurm/slurmd
SlurmUser=slurm
#SlurmdUser=root
StateSaveLocation=/var/spool/slurm
SwitchType=switch/none
TaskPlugin=task/affinity
#
#
# TIMERS
#KillWait=30
#MinJobAge=300
#SlurmctldTimeout=120
#SlurmdTimeout=300
#
#
# SCHEDULING
SchedulerType=sched/backfill
SelectType=select/cons_tres
SelectTypeParameters=CR_Core
#
#
# LOGGING AND ACCOUNTING
AccountingStorageType=accounting_storage/none
ClusterName=cluster
#JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
#SlurmctldDebug=info
#SlurmctldLogFile=
#SlurmdDebug=info
#SlurmdLogFile=
#
#
# COMPUTE NODES
NodeName=$HOST State=idle Feature=dcv2,other
# NodeName=linux[1-32] CPUs=1 State=UNKNOWN
# NodeName=linux1 NodeAddr=128.197.115.158 CPUs=4 State=UNKNOWN
# NodeName=linux2 NodeAddr=128.197.115.7 CPUs=4 State=UNKNOWN

PartitionName=test Nodes=$HOST Default=YES MaxTime=INFINITE State=UP
# PartitionName=test Nodes=$HOST,linux[1-32] Default=YES MaxTime=INFINITE State=UP

# DefMemPerNode=1000
# MaxMemPerNode=1000
# DefMemPerCPU=4000
# MaxMemPerCPU=4096

EOF

    if [ "$slurm_accounting_support" == "1" ]
    then
        if [ ! -f /etc/slurm/slurmdbd.conf ]
        then
            StorageType=accounting_storage/mysql
            DbdHost=localhost
            StorageHost=$DbdHost
            StorageLoc=slurm_acct_db
            StorageUser=slurm
            SlurmUser=$StorageUser
            random_mysql_password=$(tr -dc '0-9a-zA-Z@' < /dev/urandom | head -c 20)
            StoragePass=$random_mysql_password
            StoragePort=3306

            createMysqlDatabase $StorageLoc $StorageUser $StoragePass

            cat <<EOF | sudo tee /etc/slurm/slurmdbd.conf
StorageType=$StorageType
DbdAddr=$DbdHost
DbdHost=$DbdHost
StorageHost=$StorageHost
StorageLoc=$StorageLoc
StorageUser=$StorageUser
SlurmUser=$SlurmUser
StoragePass=$StoragePass
StoragePort=$StoragePort
LogFile=/var/log/slurmdbd.log
EOF
        fi
        sudo sed -i 's/AccountingStorageType=accounting_storage\/none/AccountingStorageType=accounting_storage\/slurmdbd/' /etc/slurm/slurm.conf
    fi
        cat << EOF | sudo tee /etc/slurm/cgroup.conf
###
#
# Slurm cgroup support configuration file
#
# See man slurm.conf and man cgroup.conf for further
# information on cgroup configuration parameters
#--
CgroupPlugin=cgroup/v1
# CgroupAutomount=yes

ConstrainCores=no
ConstrainRAMSpace=no
EOF

        if [ ! -f /etc/mysql/mariadb.conf.d/99-slurm.cnf  ]
        then
            total_memory=$(free -m | awk '/^Mem:/{print $2}')
            innodb_buffer_percent=50
            innodb_buffer_pool_size=$((total_memory * innodb_buffer_percent / 100))
            cat <<EOF | sudo tee /etc/mysql/mariadb.conf.d/99-slurm.cnf
[mariadb]
innodb_lock_wait_timeout=900
innodb_log_file_size=128M
max_allowed_packet=32M
innodb_buffer_pool_size=${innodb_buffer_pool_size}M
EOF

            sudo systemctl restart mariadb
        fi
}

buildSlurmForUbuntu()
{
    sudo apt update
    sudo DEBIAN_FRONTEND=noninteractive apt -y upgrade
    . /etc/os-release

    sudo DEBIAN_FRONTEND=noninteractive apt -y install python3 gcc openssl numactl hwloc lua5.3 man2html make ruby ruby-dev libmunge-dev libpam0g-dev
    sudo /usr/bin/gem install fpm
    mkdir slurm-tmp
    cd slurm-tmp

    if [ "$VER" == "" ]
    then
        export VER=20.02-latest    # latest 20.02.XX version
        export VER=20.11.3
        export VER=20.11-latest   # slurm-20.11-latest.tar.bz2
        export VER=20.11.9   # slurm-20.11-latest.tar.bz2
        export VER=22.05.9
        # export VER=23.02.2
    fi
    wget https://download.schedmd.com/slurm/slurm-$VER.tar.bz2

    [ $? != 0 ] && echo Problem downloading https://download.schedmd.com/slurm/slurm-$VER.tar.bz2 ... Exiting && exit

    tar jxvf slurm-$VER.tar.bz2
    cd  slurm-[0-9]*.[0-9]
    ./configure --prefix=/usr --sysconfdir=/etc/slurm --enable-pam --with-pam_dir=/lib/x86_64-linux-gnu/security/ --without-shared-libslurm
    make
    make contrib
    sudo make install
    cd ../../
    rm -rf slurm-tmp
}

setupMungeForUbuntu()
{
    if [ "$VERSION_ID" == "22.04" ]
    then
        sudo /usr/sbin/mungekey -f
    else
        sudo /usr/sbin/create-munge-key -r -f
    fi

    sudo sh -c  "dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key"
    sudo chown munge: /etc/munge/munge.key
    sudo chmod 400 /etc/munge/munge.key
}

setupRngToolsForUbuntu()
{
    sudo DEBIAN_FRONTEND=noninteractive apt -y install rng-tools
    sudo rngd -r /dev/urandom
}

installMungeForUbuntu()
{
    sudo DEBIAN_FRONTEND=noninteractive apt -y install munge libmunge-dev libmunge2
}

installMariaDBforUbuntu()
{
    if [ "$slurm_accounting_support" == "1" ]
    then
        if ! dpkg -l | egrep -iq "^.*mariadb-server"
        then
        	sudo DEBIAN_FRONTEND=noninteractive apt -y install mariadb-server libmariadbd-dev libmariadb3
        	sudo systemctl enable --now mariadb
        fi
    fi
}

checkUbuntuVersion()
{
# check if Ubuntu version is compatible
    ubuntu_version=$(lsb_release -rs)
    VERSION_ID=$ubuntu_version
    min_version="18.04"

    if [[ $(echo "$ubuntu_version >= $min_version" | bc -l) -ne 1 ]]
    then
        echo "The Ubuntu >>> $ubuntu_version <<< is not compatible. The minimal version supported is >>> $min_version <<<. Aborting..."
        exit 1
    else
        OSVERSION=$ubuntu_version
    fi
}

setupRequiredUbuntuRepositories()
{
    sudo apt update
    if [ ! -f /etc/apt/sources.list.d/mariadb.list ]
    then
        sudo curl -LsS https://r.mariadb.com/downloads/mariadb_repo_setup | sudo bash
        sudo apt update
    fi
}

# global vars
slurm_accounting_support=0
OSVERSION=""
OSDISTRO=""
SUPPORTED_DISTROS="Centos 7, Centos 8, Centos 9, Ubuntu 18.04, Ubuntu 20.04 and Ubuntu 22.04"

main()
{
    checkLinuxOsDistro
    askSlurmAccountingSupport
    if echo $OSDISTRO | egrep -iq "centos"
    then
        main_centos
    elif echo $OSDISTRO | egrep -iq "ubuntu"
    then
        main_ubuntu
    else
        echo "Unknown Linux OS Distro. The supported distros are: $SUPPORTED_DISTROS"
        echo "Aborting..."
        exit 2
    fi
}

main

# unknown error
exit 255

参考链接如下:
https://www.ni-sp.com/slurm-build-script-and-container-commercial-support/#h-automatic-slurm-build-script-for-rh-centos-7-8-and-9-as-well-as-ubuntu-18-20-and-22-including-accounting


转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。