#Move to another directory
cd Desktop
# Move to directory up in hierarchy
cd ..
#List directory
ls
#To determine the size of a file (good way to delete empty files)
ls -l
#Count the number of files in a directory
ls | wc -l
ls -1 | wc -l
#Create a new file
touch file
touch a b c d
#Copy the folder and its contents from the host to local PC folder
cp -R full_source_path full_destination path
e.g. cp -R /share/projects/data/genomes /home/pseema/Desktop/new_hypothetical_IS
cp -R annotation/ /home/pseema/Desktop
#Create directory
mkdir dir_name
mkdir dir1 dir2 dir3
#Remove directory
rm -r dir_name
#Remove file
rm file
rm a b c
#Rename copy
mv file1 file2
#Rename directory
mv dir1 dir2
#Move file to a new directory
mv file ~/dir
mv *.txt ~/dir
#Check history of commands used
history
#Find files with size 0
find . -size 0
#Delete files with size 0 (empty files). Before executing this critical command, change to the specified directory
cd dir_name
find . -size 0 -delete
find path_to_directory -size 0 -delete
#Find the number of files in a directory
ls -l | wc -l
# Find the files with a particular pattern (anywhere is the file name), can find line number if needed, then using cat to call the previous command
find path_name/pattern_*
e.g. find /home/pseema/hypothetical_analysis/result_files/*.only_header
find pattern_* | wc -l
cat `find pattern_*`
#To paste two files (not side by side, just the content)
cat file1 catfile2 > file_merged
cat *.fasta > file_merged
#Find unique lines only (keeps only unique lines)
awk '!NF || !seen[$0]++' file
#Print isolate names n times (times the number of protein)
printf 'pattern\n%.0s' {1..5}
for i in `seq 5`; do echo "pattern";done
#Add these files to the protein header name files
pr -m -t file1 file2 > combined_file
#Now grep your pattern
grep 'pattern' file
Sort file alphabetically
sort -u file > sorted_length_file
#Sort the file to find lines in the order of maximum frequency
sort file | uniq -c | sort -n -r > outfile
#Shows common item to file 1 and file2
comm -12 file1 file2 >
# Find rows only in file 1 (the 2nd file is file 1)
awk 'FNR == NR { h[$1,$2]; next }; !($1 SUBSEP $2 in h)' file2 file1
#These items occur only in file1 (accessory in 1)
comm -23 file1 file2 >
#These items occur only in file2 (accessory in 2)
comm -13 file1 file2 >
#Count lines (emulates "wc -l")
awk 'END{print NR}' file1
#Prints all the lines with the given pattern
grep 'pattern' file
#renaming file names
path_dir= /home/pseema/dir
gene_name= "inh"
resistant_list="resistant_list.txt"
#Creates a list and outputs into a txt file
ls /share/apps/pacbio/consensus_files > /home/pseema/dir/consensus_ls.txt#remove filenames with .fasta
sed'/.fasta/d' /home/pseema/dir/consensus_ls.txt > /home/pseema/dir/consensus_ls_nofasta.txt
#keep only first 6 characters of each line
cat /home/pseema/dir/consensus_ls_nofasta.txt | cut -c 1-6 > /home/pseema/dir/isolate_id.txt
#replace each dot wit hyphen in each line
sed 's/./-/2' /home/pseema/dir/isolate_id.txt > /home/pseema/dir/id_corrected.txt
#concatenation
paste -d"\t" /home/pseema/dir/id_corrected.txt /home/pseema/dir/consensus_ls_nofasta.txt > /home/pseema/dir/merged.txt
#remove one file for each isolate ($1 is isolate name)
awk '{a[$1]++}!(a[$1]-1)' /home/pseema/dir/merged.txt > /home/pseema/dir/unique_merged.txt
cd Desktop
# Move to directory up in hierarchy
cd ..
#List directory
ls
#To determine the size of a file (good way to delete empty files)
ls -l
#Count the number of files in a directory
ls | wc -l
ls -1 | wc -l
#Create a new file
touch file
touch a b c d
#Copy the folder and its contents from the host to local PC folder
cp -R full_source_path full_destination path
e.g. cp -R /share/projects/data/genomes /home/pseema/Desktop/new_hypothetical_IS
cp -R annotation/ /home/pseema/Desktop
#Create directory
mkdir dir_name
mkdir dir1 dir2 dir3
#Remove directory
rm -r dir_name
#Remove file
rm file
rm a b c
#Rename copy
mv file1 file2
#Rename directory
mv dir1 dir2
#Move file to a new directory
mv file ~/dir
mv *.txt ~/dir
#Check history of commands used
history
#Find files with size 0
find . -size 0
#Delete files with size 0 (empty files). Before executing this critical command, change to the specified directory
cd dir_name
find . -size 0 -delete
find path_to_directory -size 0 -delete
#Find the number of files in a directory
ls -l | wc -l
# Find the files with a particular pattern (anywhere is the file name), can find line number if needed, then using cat to call the previous command
find path_name/pattern_*
e.g. find /home/pseema/hypothetical_analysis/result_files/*.only_header
find pattern_* | wc -l
cat `find pattern_*`
#To paste two files (not side by side, just the content)
cat file1 catfile2 > file_merged
cat *.fasta > file_merged
#Find unique lines only (keeps only unique lines)
awk '!NF || !seen[$0]++' file
#Print isolate names n times (times the number of protein)
printf 'pattern\n%.0s' {1..5}
for i in `seq 5`; do echo "pattern";done
#Add these files to the protein header name files
pr -m -t file1 file2 > combined_file
#Now grep your pattern
grep 'pattern' file
Sort file alphabetically
sort -u file > sorted_length_file
#Sort the file to find lines in the order of maximum frequency
sort file | uniq -c | sort -n -r > outfile
#Shows common item to file 1 and file2
comm -12 file1 file2 >
# Find rows only in file 1 (the 2nd file is file 1)
awk 'FNR == NR { h[$1,$2]; next }; !($1 SUBSEP $2 in h)' file2 file1
#These items occur only in file1 (accessory in 1)
comm -23 file1 file2 >
#These items occur only in file2 (accessory in 2)
comm -13 file1 file2 >
#Count lines (emulates "wc -l")
awk 'END{print NR}' file1
#Prints all the lines with the given pattern
grep 'pattern' file
#renaming file names
path_dir= /home/pseema/dir
gene_name= "inh"
resistant_list="resistant_list.txt"
#Creates a list and outputs into a txt file
ls /share/apps/pacbio/consensus_files > /home/pseema/dir/consensus_ls.txt#remove filenames with .fasta
sed'/.fasta/d' /home/pseema/dir/consensus_ls.txt > /home/pseema/dir/consensus_ls_nofasta.txt
#keep only first 6 characters of each line
cat /home/pseema/dir/consensus_ls_nofasta.txt | cut -c 1-6 > /home/pseema/dir/isolate_id.txt
#replace each dot wit hyphen in each line
sed 's/./-/2' /home/pseema/dir/isolate_id.txt > /home/pseema/dir/id_corrected.txt
#concatenation
paste -d"\t" /home/pseema/dir/id_corrected.txt /home/pseema/dir/consensus_ls_nofasta.txt > /home/pseema/dir/merged.txt
#remove one file for each isolate ($1 is isolate name)
awk '{a[$1]++}!(a[$1]-1)' /home/pseema/dir/merged.txt > /home/pseema/dir/unique_merged.txt
No comments:
Post a Comment