User Tools

Site Tools


acelab:pbspro
Get software from Altair customer webportal

1. License Installation

[root@sched ~]# ./altair_licensing_13.0.0.linux_x64.bin -i console
Default Install Folder: /usr/local/altair/licensing13.0
License file ......

2. Copy the license file and restart the license server service

[root@sched licensing13.0]# cp ~/altair_lic.dat /usr/local/altair/licensing13.0/
[root@sched licensing13.0]# /etc/init.d/altairlmxd start

3. PBSPro 13 Installation - pick the defaults and point to the license file

[root@sched ~]# tar -xvf PBSPro_13.0.2-CentOS6_x86_64.tar.gz
[root@sched PBSPro_13.0.2]# ls
INSTALL  linux26_x86_64  PBS_License.txt  PBS_VERSION.txt
[root@sched PBSPro_13.0.2]# ./INSTALL

4. Start pbs service [if not stated already]

[root@sched ~]# cat /etc/pbs.conf
PBS_EXEC=/opt/pbs/default
PBS_HOME=/pbsshared/PBS
PBS_START_SERVER=1
PBS_START_MOM=0
PBS_START_SCHED=1
PBS_START_COMM=1
PBS_SERVER=sched
PBS_CORE_LIMIT=unlimited
[root@sched PBSPro_13.0.2]# /etc/init.d/pbs start
Starting PBS
/opt/pbs/default/sbin/pbs_comm ready (pid=9829), Proxy Name:sched.cm.cluster:17001, Threads:4
PBS comm
PBS sched
Connecting to PBS dataservice....connected to PBS dataservice@sched
Using license server at 6200@sched
PBS server
[root@sched PBSPro_13.0.2]#

5. update $PATH and $LB_LIBRARY_PATH

[root@sched ~]# cat /etc/profile.d/pbs.sh 
pathadd() {
    if [ -d "$1" ] && [[ ":$PATH:" != *":$1:"* ]]; then
        PATH="$1:${PATH:+"$PATH"}"
    fi
}

manpathadd() {
    if [ -d "$1" ] && [[ ":$MANPATH:" != *":$1:"* ]]; then
        MANPATH="${MANPATH:+"$MANPATH:"}$1"
    fi
}

libpathadd() {
    if [ -d "$1" ] && [[ ":$LD_LIBRARY_PATH:" != *":$1:"* ]]; then
        LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+"$LD_LIBRARY_PATH:"}$1"
    fi
}

pathadd /opt/pbs/default/bin/ PATH
pathadd /opt/pbs/default/sbin/ PATH
export PATH

manpathadd /opt/pbs/default/man/ MANPATH
export MANPATH

libpathadd /opt/pbs/default/lib/ LD_LIBRARY_PATH
export LD_LIBRARY_PATH
[root@sched ~]#
[root@sched ~]# source /etc/profile.d/pbs.sh

6. Grab the image with the new PBSPro Installation

 
[bright1->device[sched]]% grabimage -w
[bright1->device[sched]]% 
Mon Sep 18 18:48:08 2017 [notice] bright1: Provisioning started: sending sched:/ to bright1:/cm/images/sched-image, mode GRAB, dry run = no
[bright1->device[sched]]% 
Mon Sep 18 18:48:38 2017 [notice] bright1: Provisioning completed: sent sched:/ to bright1:/cm/images/sched-image, mode GRAB, dry run = no
grabimage -w [ COMPLETED ]
[bright1->device[sched]]%

7. Install PBS Commands only on login01

[root@login01 ~]# /cm/shared/PBSPro_13.0.2/INSTALL

8. Grab login01 Image and reset category “login”

[bright1->device[login01]]% grabimage -w
[bright1->device[cnode01]]% 
Mon Sep 18 19:00:10 2017 [notice] bright1: Provisioning started: sending cnode01:/ to bright1:/cm/images/compute-image, mode GRAB, dry run = no
[bright1->device[cnode01]]% 
Mon Sep 18 19:00:14 2017 [notice] bright1: Provisioning completed: sent cnode01:/ to bright1:/cm/images/compute-image, mode GRAB, dry run = no
grabimage -w [ COMPLETED ]
[bright1->device[login01]]%
[bright1->device]% power -c login status

9. Install PBS Execution only on compute node “cnode01

[root@cnode01 PBSPro_13.0.2]# ./INSTALL

10. Grab cnode01 image and reset category “compute” </code> [bright1→device[cnode01]]% grabimage -w [bright1→device[cnode01]]% Mon Sep 18 19:20:10 2017 [notice] bright1: Provisioning started: sending cnode01:/ to bright1:/cm/images/compute-image, mode GRAB, dry run = no [bright1→device[cnode01]]% Mon Sep 18 19:20:14 2017 [notice] bright1: Provisioning completed: sent cnode01:/ to bright1:/cm/images/compute-image, mode GRAB, dry run = no grabimage -w [ COMPLETED ] [bright1→device[cnode01]]% [bright1→device]% power -c compute status </code>

11. Configure the nodes and resources

[root@sched ~]# for i in {01..16}; do qmgr -c "create node cnode$i";done;
[root@sched ~]# qmgr -c "create resource nodetype type=string,flag=h"
[root@sched ~]# qmgr -c "create resource jobtype type=string,flag=h"
[root@sched ~]# qmgr -c "create resource ostype type=string,flag=h"
[root@sched ~]# for i in {02..16}; do qmgr -c "set node cnode$i resources_available.ostype = centos7";done;
[root@sched ~]# for i in {02..16}; do qmgr -c "set node cnode$i resources_available.nodetype = regular";done;

[root@sched ~]# qmgr -c "print node @default"   #to see them

12. Configure Queues

[root@sched scripts]# cat create_q.sh 
#!/bin/bash

qmgr -c "create queue $1"
qmgr -c "set queue $1 queue_type = Execution"
qmgr -c "set queue $1 default_chunk.nodetype = regular"
qmgr -c "set queue $1 enabled = True"
qmgr -c "set queue $1 started = True"
[root@sched scripts]# sh create_q.sh smp
[root@sched scripts]# sh create_q.sh normal
[root@sched scripts]# sh create_q.sh large
[root@sched scripts]# sh create_q.sh special
[root@sched scripts]# sh create_q.sh internal
[root@sched scripts]# qmgr -c "print queue @default"

#SMP QUEUE
[root@sched ~]# qmgr -c "set queue smp max_queued = [u:PBS_GENERIC=20]"
[root@sched ~]# qmgr -c "set queue smp resources_max.ncpus = 12"
[root@sched ~]# qmgr -c "set queue smp resources_max.walltime = 96:00:00"
[root@sched ~]# qmgr -c "set queue smp max_run = [u:PBS_GENERIC=10]

#NORMAL QUEUE
[root@sched ~]# qmgr -c "set queue normal max_queued = [u:PBS_GENERIC=20]"
[root@sched ~]# qmgr -c "set queue normal resources_max.walltime = 48:00:00"
[root@sched ~]# qmgr -c "set queue normal max_run = [u:PBS_GENERIC=10]"
[root@sched ~]# qmgr -c "set queue normal resources_min.ncpus = 24"
[root@sched ~]# qmgr -c "set queue normal resources_max.ncpus = 48"

#LARGE QUEUE
[root@sched ~]# qmgr -c "set queue large Priority = 100"
[root@sched ~]# qmgr -c "set queue large resources_min.ncpus = 48
[root@sched ~]# qmgr -c "set queue large resources_max.ncpus = 72"
[root@sched ~]# qmgr -c "set queue large max_queued = [u:PBS_GENERIC=10]"
[root@sched ~]# qmgr -c "set queue large resources_max.walltime = 96:00:00"
[root@sched ~]# qmgr -c "set queue large resources_default.preempt_targets = queue=empty"
[root@sched ~]# qmgr -c "set queue large acl_group_enable = True"
[root@sched ~]# qmgr -c "set queue large acl_groups = largeq"
[root@sched ~]# qmgr -c "set queue large max_run = [u:PBS_GENERIC=2]"
[root@sched ~]# qmgr -c "set queue large max_run_res.ncpus = [u:PBS_GENERIC=144]"

13. Server Configuration

[root@sched ~]# set server scheduling = True
[root@sched ~]# set server managers = root@sched.cm.cluster
[root@sched ~]# set server operators = smasoka@*
[root@sched ~]# set server default_queue = smp
[root@sched ~]# set server query_other_jobs = True
[root@sched ~]# set server scheduler_iteration = 60
[root@sched ~]# set server flatuid = True
[root@sched ~]# set server resv_enable = True
[root@sched ~]# set server default_qsub_arguments = "-W umask=022"
[root@sched ~]# set server eligible_time_enable = True
[root@sched ~]# set server job_history_enable = True
[root@sched ~]# set server max_concurrent_provision = 5
[root@sched ~]# set server backfill_depth = 10
[root@sched ~]# set server est_start_time_freq = 02:00:00
/var/www/wiki/data/pages/acelab/pbspro.txt · Last modified: 2018/07/27 20:11 by smasoka