Tuesday, January 26, 2016

Shell (6): Linux/bash commands for day-to-day activity.......

#Move to another directory
cd Desktop

# Move to directory up in hierarchy
cd ..

#List directory
ls

#To determine the size of a file (good way to delete empty files)
ls -l

#Count the number of files in a directory
ls | wc  -l
ls -1 | wc -l

#Create a new file
touch file
touch a b c d

#Copy the folder and its contents from the host to local PC folder
cp -R full_source_path  full_destination path

e.g. cp -R /share/projects/data/genomes /home/pseema/Desktop/new_hypothetical_IS
 cp -R annotation/ /home/pseema/Desktop

#Create directory
mkdir dir_name
mkdir dir1 dir2 dir3

#Remove directory
rm -r dir_name

#Remove file
rm file
rm a b c

#Rename copy
mv file1 file2

#Rename directory
mv dir1 dir2

#Move file to  a new directory
mv file ~/dir
mv *.txt ~/dir

#Check history of commands used
history

#Find files with size 0
find . -size 0

#Delete files with size 0 (empty files). Before executing this critical command, change to the specified directory
cd dir_name
find . -size 0 -delete
find path_to_directory -size 0  -delete

#Find the number of files in a directory
ls -l | wc -l 

# Find the files with a particular pattern (anywhere is the file name), can find line number if needed, then using cat to call the previous command
find path_name/pattern_*
e.g. find /home/pseema/hypothetical_analysis/result_files/*.only_header
find pattern_* |  wc -l
cat `find pattern_*`

#To paste two files (not side by side, just the content)
cat file1 catfile2 > file_merged
cat *.fasta > file_merged

#Find unique lines only (keeps only unique lines)
 awk '!NF || !seen[$0]++' file

#Print isolate names n times (times the number of protein)
printf 'pattern\n%.0s' {1..5}
for i in `seq 5`; do echo "pattern";done

#Add these files to the protein header name files
pr -m -t file1 file2 > combined_file

#Now grep your pattern
grep 'pattern' file

Sort file alphabetically
sort -u file > sorted_length_file

#Sort the file to find lines in the order of maximum frequency
sort file  | uniq -c | sort -n -r > outfile

#Shows common item to file 1 and file2
comm -12  file1 file2  >

 # Find rows only in file 1 (the 2nd file is file 1) 
awk 'FNR == NR { h[$1,$2]; next }; !($1 SUBSEP $2 in h)'  file2 file1
 

#These items occur only in file1 (accessory in 1)
comm -23 file1 file2  >

#These items occur only in file2
 (accessory in 2)
comm -13  file1 file2 > 

#Count lines (emulates "wc -l")
 awk 'END{print NR}' file1

#Prints all the lines with the given pattern
grep  'pattern' file

#renaming file names
path_dir= /home/pseema/dir
gene_name= "inh"
resistant_list="resistant_list.txt"
#Creates a list and outputs into a txt file
ls /share/apps/pacbio/consensus_files > /home/pseema/dir/consensus_ls.txt#remove filenames with .fasta
sed'/.fasta/d' /home/pseema/dir/consensus_ls.txt > /home/pseema/dir/consensus_ls_nofasta.txt
#keep only first 6 characters of each line
cat /home/pseema/dir/consensus_ls_nofasta.txt | cut -c 1-6 > /home/pseema/dir/isolate_id.txt
#replace each  dot wit hyphen in each line
sed 's/./-/2' /home/pseema/dir/isolate_id.txt > /home/pseema/dir/id_corrected.txt
#concatenation
paste -d"\t" /home/pseema/dir/id_corrected.txt /home/pseema/dir/consensus_ls_nofasta.txt > /home/pseema/dir/merged.txt

#remove one file for each isolate ($1 is isolate name)
awk '{a[$1]++}!(a[$1]-1)' /home/pseema/dir/merged.txt > /home/pseema/dir/unique_merged.txt

No comments:

Post a Comment