1. License Installation
[root@sched ~]# ./altair_licensing_13.0.0.linux_x64.bin -i console Default Install Folder: /usr/local/altair/licensing13.0 License file ......
2. Copy the license file and restart the license server service
[root@sched licensing13.0]# cp ~/altair_lic.dat /usr/local/altair/licensing13.0/ [root@sched licensing13.0]# /etc/init.d/altairlmxd start
3. PBSPro 13 Installation - pick the defaults and point to the license file
[root@sched ~]# tar -xvf PBSPro_13.0.2-CentOS6_x86_64.tar.gz [root@sched PBSPro_13.0.2]# ls INSTALL linux26_x86_64 PBS_License.txt PBS_VERSION.txt [root@sched PBSPro_13.0.2]# ./INSTALL
4. Start pbs service [if not stated already]
[root@sched ~]# cat /etc/pbs.conf PBS_EXEC=/opt/pbs/default PBS_HOME=/pbsshared/PBS PBS_START_SERVER=1 PBS_START_MOM=0 PBS_START_SCHED=1 PBS_START_COMM=1 PBS_SERVER=sched PBS_CORE_LIMIT=unlimited [root@sched PBSPro_13.0.2]# /etc/init.d/pbs start Starting PBS /opt/pbs/default/sbin/pbs_comm ready (pid=9829), Proxy Name:sched.cm.cluster:17001, Threads:4 PBS comm PBS sched Connecting to PBS dataservice....connected to PBS dataservice@sched Using license server at 6200@sched PBS server [root@sched PBSPro_13.0.2]#
5. update $PATH and $LB_LIBRARY_PATH
[root@sched ~]# cat /etc/profile.d/pbs.sh pathadd() { if [ -d "$1" ] && [[ ":$PATH:" != *":$1:"* ]]; then PATH="$1:${PATH:+"$PATH"}" fi } manpathadd() { if [ -d "$1" ] && [[ ":$MANPATH:" != *":$1:"* ]]; then MANPATH="${MANPATH:+"$MANPATH:"}$1" fi } libpathadd() { if [ -d "$1" ] && [[ ":$LD_LIBRARY_PATH:" != *":$1:"* ]]; then LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+"$LD_LIBRARY_PATH:"}$1" fi } pathadd /opt/pbs/default/bin/ PATH pathadd /opt/pbs/default/sbin/ PATH export PATH manpathadd /opt/pbs/default/man/ MANPATH export MANPATH libpathadd /opt/pbs/default/lib/ LD_LIBRARY_PATH export LD_LIBRARY_PATH [root@sched ~]# [root@sched ~]# source /etc/profile.d/pbs.sh
6. Grab the image with the new PBSPro Installation
[bright1->device[sched]]% grabimage -w [bright1->device[sched]]% Mon Sep 18 18:48:08 2017 [notice] bright1: Provisioning started: sending sched:/ to bright1:/cm/images/sched-image, mode GRAB, dry run = no [bright1->device[sched]]% Mon Sep 18 18:48:38 2017 [notice] bright1: Provisioning completed: sent sched:/ to bright1:/cm/images/sched-image, mode GRAB, dry run = no grabimage -w [ COMPLETED ] [bright1->device[sched]]%
7. Install PBS Commands only on login01
[root@login01 ~]# /cm/shared/PBSPro_13.0.2/INSTALL
8. Grab login01 Image and reset category “login”
[bright1->device[login01]]% grabimage -w [bright1->device[cnode01]]% Mon Sep 18 19:00:10 2017 [notice] bright1: Provisioning started: sending cnode01:/ to bright1:/cm/images/compute-image, mode GRAB, dry run = no [bright1->device[cnode01]]% Mon Sep 18 19:00:14 2017 [notice] bright1: Provisioning completed: sent cnode01:/ to bright1:/cm/images/compute-image, mode GRAB, dry run = no grabimage -w [ COMPLETED ] [bright1->device[login01]]% [bright1->device]% power -c login status
9. Install PBS Execution only on compute node “cnode01
[root@cnode01 PBSPro_13.0.2]# ./INSTALL
10. Grab cnode01 image and reset category “compute” </code> [bright1→device[cnode01]]% grabimage -w [bright1→device[cnode01]]% Mon Sep 18 19:20:10 2017 [notice] bright1: Provisioning started: sending cnode01:/ to bright1:/cm/images/compute-image, mode GRAB, dry run = no [bright1→device[cnode01]]% Mon Sep 18 19:20:14 2017 [notice] bright1: Provisioning completed: sent cnode01:/ to bright1:/cm/images/compute-image, mode GRAB, dry run = no grabimage -w [ COMPLETED ] [bright1→device[cnode01]]% [bright1→device]% power -c compute status </code>
11. Configure the nodes and resources
[root@sched ~]# for i in {01..16}; do qmgr -c "create node cnode$i";done; [root@sched ~]# qmgr -c "create resource nodetype type=string,flag=h" [root@sched ~]# qmgr -c "create resource jobtype type=string,flag=h" [root@sched ~]# qmgr -c "create resource ostype type=string,flag=h" [root@sched ~]# for i in {02..16}; do qmgr -c "set node cnode$i resources_available.ostype = centos7";done; [root@sched ~]# for i in {02..16}; do qmgr -c "set node cnode$i resources_available.nodetype = regular";done; [root@sched ~]# qmgr -c "print node @default" #to see them
12. Configure Queues
[root@sched scripts]# cat create_q.sh #!/bin/bash qmgr -c "create queue $1" qmgr -c "set queue $1 queue_type = Execution" qmgr -c "set queue $1 default_chunk.nodetype = regular" qmgr -c "set queue $1 enabled = True" qmgr -c "set queue $1 started = True" [root@sched scripts]# sh create_q.sh smp [root@sched scripts]# sh create_q.sh normal [root@sched scripts]# sh create_q.sh large [root@sched scripts]# sh create_q.sh special [root@sched scripts]# sh create_q.sh internal [root@sched scripts]# qmgr -c "print queue @default" #SMP QUEUE [root@sched ~]# qmgr -c "set queue smp max_queued = [u:PBS_GENERIC=20]" [root@sched ~]# qmgr -c "set queue smp resources_max.ncpus = 12" [root@sched ~]# qmgr -c "set queue smp resources_max.walltime = 96:00:00" [root@sched ~]# qmgr -c "set queue smp max_run = [u:PBS_GENERIC=10] #NORMAL QUEUE [root@sched ~]# qmgr -c "set queue normal max_queued = [u:PBS_GENERIC=20]" [root@sched ~]# qmgr -c "set queue normal resources_max.walltime = 48:00:00" [root@sched ~]# qmgr -c "set queue normal max_run = [u:PBS_GENERIC=10]" [root@sched ~]# qmgr -c "set queue normal resources_min.ncpus = 24" [root@sched ~]# qmgr -c "set queue normal resources_max.ncpus = 48" #LARGE QUEUE [root@sched ~]# qmgr -c "set queue large Priority = 100" [root@sched ~]# qmgr -c "set queue large resources_min.ncpus = 48 [root@sched ~]# qmgr -c "set queue large resources_max.ncpus = 72" [root@sched ~]# qmgr -c "set queue large max_queued = [u:PBS_GENERIC=10]" [root@sched ~]# qmgr -c "set queue large resources_max.walltime = 96:00:00" [root@sched ~]# qmgr -c "set queue large resources_default.preempt_targets = queue=empty" [root@sched ~]# qmgr -c "set queue large acl_group_enable = True" [root@sched ~]# qmgr -c "set queue large acl_groups = largeq" [root@sched ~]# qmgr -c "set queue large max_run = [u:PBS_GENERIC=2]" [root@sched ~]# qmgr -c "set queue large max_run_res.ncpus = [u:PBS_GENERIC=144]"
13. Server Configuration
[root@sched ~]# set server scheduling = True [root@sched ~]# set server managers = root@sched.cm.cluster [root@sched ~]# set server operators = smasoka@* [root@sched ~]# set server default_queue = smp [root@sched ~]# set server query_other_jobs = True [root@sched ~]# set server scheduler_iteration = 60 [root@sched ~]# set server flatuid = True [root@sched ~]# set server resv_enable = True [root@sched ~]# set server default_qsub_arguments = "-W umask=022" [root@sched ~]# set server eligible_time_enable = True [root@sched ~]# set server job_history_enable = True [root@sched ~]# set server max_concurrent_provision = 5 [root@sched ~]# set server backfill_depth = 10 [root@sched ~]# set server est_start_time_freq = 02:00:00