BigHorn Sheep (5CM) Bioinformatics
salloc --account=microbiome -t 0-05:00
module load swset/2018.05 gcc/7.3.0 usearch/10.0.240 perl-txt-levenshtein-xs
mkdir -p /gscratch/grandol1/5CM/rawdata
cd /gscratch/grandol1/5CM/rawdata
unpigz --to-stdout /project/microbiome/data_queue/seq/5CM/rawdata/5CM_S1_L001_R1_001.fastq | split -l 13000000 -d --suffix-length=3 --additional-suffix=.fastq - 5CM_R1_ ;
unpigz --to-stdout /project/microbiome/data_queue/seq/5CM/rawdata/5CM_S1_L001_R2_001.fastq | split -l 13000000 -d --suffix-length=3 --additional-suffix=.fastq - 5CM_R2_
//project/microbiome/data_queue/seq/5CM/rawdata/run_parse_count_onSplitInput.pl
./run_splitFastq_fwd.sh
./run_splitFastq_rev.sh
cd /project/microbiome/data_queue/seq/5CM/rawdata
./run_aggregate.sh
cd /project/microbiome/data_queue/seq/5CM/tfmergedreads
./run_slurm_mergereads.pl
/project/microbiome/data_queue/seq/5CM/tfmergedreads/16S/5CM
tfm<-read.csv("/Users/gregg/Desktop/untitled folder/filtermergestats.csv", header=F)
summary(tfm$V2 - tfm$V4)
#pdf(file="read_counts.pdf", height=6, width=6)
plot(tfm$V2, tfm$V2 - tfm$V4, type="n", xlab="Reads per sample",
ylab="Reads removed in merging and filtering",
main="MacGlover BigHorn Sheep 1")
abline(v=quantile(tfm$V2[grep("16S",tfm$V1)], prob=c(0.025, 0.5, 0.975)), col="purple")
points(tfm$V2[grep("16S",tfm$V1)], tfm$V2[grep("16S",tfm$V1)] - tfm$V4[grep("16S", tfm$V1)], col="purple")
cd/project/microbiome/data_queue/seq/5CM
mkdir otu
./run_slurm_mkotu.pl