2013年8月9日星期五

some useful awk lines

# - SAM files - #
#Count number of reads aligning to each contig/chromosome and print total and as a percent
awk '{c[$3]++}END{for(j in c) print j,c[j],(c[j]/NR*100),"%"}' Aligned.sam

# - Blast files - #
#remove self hits
awk '$1!=$2' blast_all_vs_all/blast.tab > blast_all_vs_all/blast_no_self.tab
# how many matches are 200bp +?
awk '$4>200' blast_all_vs_all/blast_no_self.tab| wc
#of those how many have 80% ID?
awk '$3>80' blast_all_vs_all/blast_no_self.tab| wc

#random awking
#Show lines where there is a > 0.3 difference in the 5 and 6 columns
awk '($6-$5)>0.3' myfile.tab | less -S

没有评论:

发表评论