Split text-string
Ubuntu / Linux command line
Shell variables: extract sub-string
Split text of filename variable ${FILE} separated by '_'
# filename example
FILE=SRR01234_mapped_ecoli.txt
echo ${FILE%%_*} # get all letters before first appearance of separator '_'
SRR01234
echo ${FILE%_*} # get all before last separator '_'
SRR01234_mapped
echo ${FILE##*_} # get all after last separator '_'
ecoli.txt
echo ${FILE#*_} # get all after first separator '_'
mapped_ecoli.txt
Remove /path/to/file/ using basename
# get filename without path prefix
F=/path/to/sample/SRR01234_mapped_ecoli.txt
FILE=`basename ${F}`
echo ${FILE}
SRR01234_mapped_ecoli.txt
# get filename without path prefix and without extension
F=/path/to/sample/SRR01234_mapped_ecoli.txt
FILENAME=`basename ${F%%.*}` # get all before first dot '.' (remove file ending)
echo ${FILENAME}
SRR01234_mapped_ecoli
# get sample-ID
F=/path/to/sample/SRR01234_mapped_ecoli.txt
FILENAME=`basename ${F%%.*}` # get filename
SAMPLE=${FILENAME%%_*}; # get all before first '_'
echo ${SAMPLE}
SRR01234
# get speciesname "ecoli"
F=/path/to/sample/SRR01234_mapped_ecoli.txt
FILENAME=`basename ${F%%.*}` # get all before first '.' (remove file ending)
SPECIES=${FILENAME##*_} # get all after last '_'
echo ${SPECIES}
ecoli
Alternatively use: cut
# get first word (-f 1) from text-string separated by delimiter '_'
FILE=SRR01234_mapped_ecoli.txt
SAMPLE=$( cut -d '_' -f 1 - <<< "${FILE}" )
echo ${SAMPLE}
SRR01234