Split text-string

Ubuntu / Linux command line

Shell variables: extract sub-string

Split text of filename variable ${FILE} separated by '_'

# filename example

FILE=SRR01234_mapped_ecoli.txt

echo ${FILE%%_*} # get all letters before first appearance of separator '_'

SRR01234

echo ${FILE%_*} # get all before last separator '_'

SRR01234_mapped

echo ${FILE##*_} # get all after last separator '_'

ecoli.txt

echo ${FILE#*_} # get all after first separator '_'

mapped_ecoli.txt

Remove /path/to/file/ using basename

# get filename without path prefix

F=/path/to/sample/SRR01234_mapped_ecoli.txt

FILE=`basename ${F}`

echo ${FILE}

SRR01234_mapped_ecoli.txt

# get filename without path prefix and without extension

F=/path/to/sample/SRR01234_mapped_ecoli.txt

FILENAME=`basename ${F%%.*}` # get all before first dot '.' (remove file ending)

echo ${FILENAME}

SRR01234_mapped_ecoli

# get sample-ID

F=/path/to/sample/SRR01234_mapped_ecoli.txt

FILENAME=`basename ${F%%.*}` # get filename

SAMPLE=${FILENAME%%_*}; # get all before first '_'

echo ${SAMPLE}

SRR01234

# get speciesname "ecoli"

F=/path/to/sample/SRR01234_mapped_ecoli.txt

FILENAME=`basename ${F%%.*}` # get all before first '.' (remove file ending)

SPECIES=${FILENAME##*_} # get all after last '_'

echo ${SPECIES}

ecoli

Alternatively use: cut

# get first word (-f 1) from text-string separated by delimiter '_'

FILE=SRR01234_mapped_ecoli.txt

SAMPLE=$( cut -d '_' -f 1 - <<< "${FILE}" )

echo ${SAMPLE}

SRR01234