#!/bin/bash WORKDIR="/lustre/users/${USER}/blast_proj" INPUT_FASTA=${WORKDIR}/data_set.fa.gz BLAST_E_VAL="1e-3" BLAST_DB="/mnt/lustre/bsp/NCBI/BLAST/nr" THREADS=24 BLAST_HOURS=0 BLAST_MINUTES=30 ID_FMT="%01d" SPLIT_PREFIX="sub_set" MAIL_ADDRESS="youremail@somewhere.ac.za" zcat ${INPUT_FASTA} | csplit -z -f ${WORKDIR}/${SPLIT_PREFIX} -b "${ID_FMT}.split.fasta" - '/^>/' '{*}' NUM_PARTS=$(ls sub_set*.split.fasta | wc -l) START=0 END=$(expr $NUM_PARTS - 1) TMPSCRIPT=thejob.sh # note: make a distinction between variables set by the containing script (e.g. WORKDIR) and # ones set in the script (e.g. INDEX). The ones set in the script need to be escaped out cat >${TMPSCRIPT} << END #!/bin/bash #PBS -l select=1:ncpus=${THREADS} #PBS -l place=excl:group=nodetype #PBS -l walltime=${BLAST_HOURS}:${BLAST_MINUTES}:00 #PBS -q normal #PBS -m ae #PBS -M ${MAIL_ADDRESS} . /etc/profile.d/modules.sh module add chpc/BIOMODULES module add ncbi-blast/2.6.0 INDEX="${WORKDIR}/${SPLIT_PREFIX}\${PBS_ARRAY_INDEX}" INFILE="\${INDEX}.split.fasta" OUTFILE="\${INDEX}.blastx.xml" cd ${WORKDIR} blastx -num_threads 8 -evalue ${BLAST_E_VAL} -db ${BLAST_DB} -outfmt 5 -query \${INFILE} -out \${OUTFILE} END BLAST_JOBID=$(qsub -N sunblast -J ${START}-${END} ${TMPSCRIPT} | cut -d. -f1) echo "submitted: ${BLAST_JOBID}" rm ${TMPSCRIPT} cat >${TMPSCRIPT} << END #!/bin/bash #PBS -l select=1:ncpus=1 #PBS -l place=free #PBS -l walltime=1:00:00 #PBS -q workq #PBS -m ae #PBS -M ${MAIL_ADDRESS} #PBS -W depend=afterok:${BLAST_JOBID} cd ${WORKDIR} tar jcf blast-xml-output.tar.bz2 *.blastx.xml END qsub -N tarblast ${TMPSCRIPT} rm ${TMPSCRIPT}