Saturday, December 12, 2015

Shell (5): sed (1) .......

sed (Stream editing) is great tool for sculpting text files e.g. file spacing, numbering lines, selective printing, substitutions, etc. 
sed flags: n, p, h, H, g, G, x
#######################
EXTACT LINES, ROWS
#Find the 2nd row
sed '2!d' file
#Extract row 3
sed -n '3p' < file
sed -n '3 p' file
sed -n "3"p file
#Delete line number 1 to 3 in the given file
sed -e '1,3d' < file
#Extract all rows between line 2 and 5, including both
sed -n '2,5p' < file
#Print all lines
sed -n 'p' file
# Print first 10 lines of file
sed 10q
# Print first line of file
sed q
#Print the last 10 lines of a file
sed -e :a -e '$q;N;11,$D;ba'
# Print the last 2 lines of a file
sed '$!N;$!D'
# Print the last line of a file
sed '$!d'                    # method 1
sed -n '$p'                 # method 2
# Print section of file between between 2 patterns
sed -n '/pattern1/,/pattern2/p'             # case sensitive
# Print all of file except section between 2 patterns
 sed '/pattern1/,/pattern2/d'
#Print only lines which match a pattern
 sed -n '/regexp/p'           # method 1
 sed '/regexp/!d'             # method 2

 # Print only lines which do not match a patten
 sed -n '/regexp/!p'          # method 1, corresponds to above
 sed '/regexp/d'              # method 2, simpler syntax

 # Print the line immediately before a regexp, but not the line
 sed -n '/regexp/{g;1!p;};h'

 # Print the line immediately after a regexp, but not the line
 sed -n '/regexp/{n;p;}'

 # Search for AAA and BBB and CCC (in that order)
 sed '/AAA.*BBB.*CCC/!d'
 sed '/AAA/!d; /BBB/!d; /CCC/!d'

 Search for  AAA or BBB or CCC
 sed -e '/AAA/b' -e '/BBB/b' -e '/CCC/b' -e d    # most seds
 gsed '/AAA\|BBB\|CCC/!d'                        # GNU sed only

 #Print paragraph if it contains AAA (blank lines separate paragraphs)
 sed -e '/./{H;$!d;}' -e 'x;/AAA/!d;'

 # Print paragraph if it contains AAA and BBB and CCC (in any order)
 sed -e '/./{H;$!d;}' -e 'x;/AAA/!d;/BBB/!d;/CCC/!d'

 # Print paragraph if it contains AAA or BBB or CCC
 sed -e '/./{H;$!d;}' -e 'x;/AAA/b' -e '/BBB/b' -e '/CCC/b' -e d
 gsed '/./{H;$!d;};x;/AAA\|BBB\|CCC/b;d'         # GNU sed only

 # Print only lines of 65 characters or longer
 sed -n '/^.\{65\}/p'

 # Print only lines of less than 65 characters
 sed -n '/^.\{65\}/!p'        # method 1, corresponds to above
 sed '/^.\{65\}/d'            # method 2, simpler syntax

 # Print section of file from regular expression to end of file
 sed -n '/regexp/,$p'

 # Print section of file based on line numbers (lines 8-12, inclusive)
 sed -n '8,12p'               # method 1
 sed '8,12!d'                 # method 2

 # Print line number 52
 sed -n '52p'                 # method 1
 sed '52!d'                   # method 2
 sed '52q;d'                  # method 3, efficient on large files
 # Beginning at line 3, print every 7th line
 gsed -n '3~7p'               # GNU sed only
 sed -n '3,${p;n;n;n;n;n;n;}' # other seds
#Prints every other line
sed -n '1~2p'  file
sed -n '2~2p'  file
#Print line 1 and 2 more
sed -n '1,+2p' file
#Print line 2 and 2 more
sed -n '2,+2p' file
#print line next to the line with pattern
sed -n '/island/{n;p}' file
#print line next to next of the line with pattern
sed -n '/island/{n;n;p}' file
#print 3rd line after line with pattern
sed -n '/island/{n;n;n;p}' file
#To extract the line just after the given pattern
sed -n '/b/{n;p}' file
#Extract fasta seq with ID ABC
sed -n '/ABC/, />/p' file | sed '$d'
#Add 'gene' after '> symbol
sed 's/>/>gene|/' file
#Prints line with either 1 or both patterns
sed -e '/tyu/b' -e '/uju/b' -e d file
#Find and print all the lines in a file, that match multiple patterns 
sed '/tyu.*uju/!d' file # in that order
sed '/tyu/!d; /uju/!d' file # in any order
#Find and print all the lines, that do not match a pattern.
sed -n '/tyu/!p' file
#Add characters at the beginning and at the end of each line 
sed "s/.*/s&p/" file
#Add characters at the beginning of each line 
sed 's/^/seema/' file
#Add characters at the end of each line
sed 's/$/seema/' file
SUBSTITUTION (replace, swap)
#Replace a pattern with another  at 2nd character or globally
sed 's/./-/2' file
sed 's/./-/g' file
# Replaces only 3rd instance in a line
sed 's/autumn/spring/3' file
# Replaces all instance in a line
sed 's/autumn/spring/g' file
#Globally replace all required symbols (# here) with null
sed 's/#//g' file
#replace words (<span style="background-color: white;") and (<\/span>) with null. Used backslash escape character for the latter
sed 's/<span style="background-color: white;">//g' file1
sed 's/<\/span>//g' file1 > file2
#First capital alphabet of each word  is put in parentheses
echo "Autumnal Trees Are Spectacular." | sed 's/\(\b[A-Z]\)/\(\1\)/g'
echo "Autumnal Trees Are Spectacular." | sed 's/\(\b[a-z]\)/\(\1\)/g'
#Swaps the first two words of each line
sed 's/\([a-zA-Z0-9][a-zA-Z0-9]*\) \([a-zA-Z0-9][a-zA-Z0-9]*\)/\2 \1/' file
sed 's/\([^ ][^ ]*\) \([^ ][^ ]*\)/\2 \1/' file
#Replace by ignoring alphabet case
sed 's/OAK/Maple/i' file
#Replace only second occurrence of the word
sed 's/oak/aspen/2'  file
#Replace a word with another. The substituted word replaces the word since the start of line
sed 's/^.*milk/curd/' file
# Substitute (find and replace) "foo" with "bar" on each line
 sed 's/foo/bar/'             # replaces only 1st instance in a line
 sed 's/foo/bar/4'            # replaces only 4th instance in a line
 sed 's/foo/bar/g'            # replaces ALL instances in a line
 sed 's/\(.*\)foo\(.*foo\)/\1bar\2/' # replace the next-to-last case
 sed 's/\(.*\)foo/\1bar/'            # replace only the last case

 # Substitute "foo" with "bar" only for lines which contain "baz"
 sed '/baz/s/foo/bar/g'

 # Substitute "foo" with "bar" exceptT for lines which contain "baz"
 sed '/baz/!s/foo/bar/g'

 # Change "scarlet" or "ruby" or "emerald" to "opal"
 sed 's/scarlet/opal/g;s/ruby/opal/g;s/emerald/opalg'   # most seds
 gsed 's/scarlet\|ruby\|emerald/opal/g'                # GNU sed only

#Convert RNA to DNA
sed '/^[^>]/ y/uU/tT/' seq_rna  > seq_dna
REMOVE (delete header, blank lines)
#Remove the pattern .fasta from the name of files in the file_list
sed '/.fasta/d' file
#Removes the prefix pattern from row 4th
sed  '4 s/^\pattern//' file
#Remove last four characters of the string
sed -e 's/....$//' file
#Remove header of a file (the -i option will do the change in the file itself)
sed '1 d' file
sed -i '1 d' file
#Remove blank lines from a file
sed '/^$/d' file
#Deletes every other line
sed '1~2d' file
sed '2~2d'  file
#Remove lines starting with a pattern 'gnl'
 sed '/^gnl/d' file> truncated_file

SPACING
# Double space the file
sed G file
# Triple space the file
sed 'G;G' file
# Double space a file which already has blank lines in it. Output file  should contain no more than one blank line between lines of text.
 sed '/^$/d;G' file
# Inserts a blank line above every line which matches "regex"
 sed '/coffee/{x;p;x;}' file
# Inserts a blank line below every line which matches "regex"
 sed '/coffee/G' file
#Insert a blank line above and below every line which matches "regex"
sed '/coffee/{x;p;x;G;}' file
#Undo double-spacing, makes the paragraph compact
sed 'n;d' file
# Delete leading whitespace
 sed 's/^[ \t]*//'  file              
# Delete trailing whitespace
 sed 's/[ \t]*$//' file
# Delete both leading and trailing whitespaces
sed 's/^[ \t]*//;s/[ \t]*$//' file
# Insert 4 blank spaces at beginning of each row
sed 's/^/     /' file
Numbering
# Number each row
sed = file | sed 'N;s/\n/\t/'
sed = file | sed 'N; s/^/     /; s/ *\(.\{6,\}\)\n/\1  /'
# Number each line, even if blank
sed '/./=' file | sed '/./N; s/\n/ /'
# Count the number of lines
sed -n '$=' file

# Delete duplicate, consecutive lines from a file (emulates "uniq").
 sed '$!N; /^\(.*\)\n\1$/!P; D'
#Delete the first 10 lines of a file
 sed '1,10d'

# Delete the last line of a file
 sed '$d'
 # Delete the last 2 lines of a file
 sed 'N;$!P;$!D;$d'

 # Delete the last 10 lines of a file
 sed -e :a -e '$d;N;2,10ba' -e 'P;D'   # method 1
 sed -n -e :a -e '1,10!{P;N;D;};N;ba'  # method 2

 # Delete every 8th line
gsed '0~8d'                           # GNU sed only
sed 'n;n;n;n;n;n;n;d;'                # other seds

# Delete all blank lines from a file
sed '/^$/d'                           # method 1
sed '/./!d'                           # method 2

# Delete all consecutive blank lines from file except the first; also  deletes all blank lines from top and end of file
sed '/./,/^$/!d'          # method 1, allows 0 blanks at top, 1 at EOF
sed '/^$/N;/\n$/D'        # method 2, allows 1 blank at top, 0 at EOF

# Delete all consecutive blank lines from file except the first 2
sed '/^$/N;/\n$/N;//D'

# Delete the last line of each paragraph
sed -n '/^$/{p;h;};/./{x;/./p;}'

#To delete 5 lines after a pattern (excluding the line with the pattern)
sed -e '/pattern/{n;N;N;N;N;d}' file

#To find the length of the specified line
sed -n '3 p' file | wc
sed -n '3 p' file | wc -c
REVERSE
# Reverse order of lines (emulates "tac")
sed '1!G;h;$!d'               # method 1
sed -n '1!G;h;$p'             # method 2
# Reverse each character on the line (emulates "rev")
sed '/\n/!G;s/\(.\)\(.*\n\)/&\2\1/;//D;s/.//'
Others
#add comma at the end of each line

sed "s/$/,/g"  file
#Commify (put commas) numbers above thousand
sed 's/\(^\|[^0-9.]\)\([0-9]\+\)\([0-9]\{3\}\)/\1\2,\3/g' file
#Put parentheses around the matched word
sed 's/^.*milk/(&)/' file
#Use of '&' with proper escape characters. Use of piping
sed 's/and/\&/' file | sed 's/oak/sycamore/'
sed -e 's/and/\&/' -e  's/oak/sycamore/' file
#Separating commands with ;
sed 's/and/\&/;s/oak/sycamore/;s/Carl/John/' file
 #Inserts a line-number on a new line between each existing line.
 sed '=' file
#Joining individual sentences
sed -n '1~2h;2~2{H;g;s/\n/ /;p}' file
sed -n 'N;s/\n/ /p' file
 #Join pairs of lines side-by-side
 sed '$!N;s/\n/ /'

No comments:

Post a Comment