BigHorn Sheep (5CM) Bioinformatics

salloc --account=microbiome -t 0-05:00

module load swset/2018.05 gcc/7.3.0 usearch/10.0.240 perl-txt-levenshtein-xs

mkdir -p /gscratch/grandol1/5CM/rawdata
cd /gscratch/grandol1/5CM/rawdata
unpigz --to-stdout /project/microbiome/data_queue/seq/5CM/rawdata/5CM_S1_L001_R1_001.fastq | split -l 13000000 -d --suffix-length=3 --additional-suffix=.fastq - 5CM_R1_ ;
unpigz --to-stdout /project/microbiome/data_queue/seq/5CM/rawdata/5CM_S1_L001_R2_001.fastq | split -l 13000000 -d --suffix-length=3 --additional-suffix=.fastq - 5CM_R2_

//project/microbiome/data_queue/seq/5CM/rawdata/run_parse_count_onSplitInput.pl

./run_splitFastq_fwd.sh

./run_splitFastq_rev.sh

cd /project/microbiome/data_queue/seq/5CM/rawdata

./run_aggregate.sh

cd /project/microbiome/data_queue/seq/5CM/tfmergedreads

./run_slurm_mergereads.pl

/project/microbiome/data_queue/seq/5CM/tfmergedreads/16S/5CM

 

tfm<-read.csv("/Users/gregg/Desktop/untitled folder/filtermergestats.csv", header=F) summary(tfm$V2 - tfm$V4) #pdf(file="read_counts.pdf", height=6, width=6) plot(tfm$V2, tfm$V2 - tfm$V4, type="n", xlab="Reads per sample", ylab="Reads removed in merging and filtering", main="MacGlover BigHorn Sheep 1") abline(v=quantile(tfm$V2[grep("16S",tfm$V1)], prob=c(0.025, 0.5, 0.975)), col="purple") points(tfm$V2[grep("16S",tfm$V1)], tfm$V2[grep("16S",tfm$V1)] - tfm$V4[grep("16S", tfm$V1)], col="purple")

cd/project/microbiome/data_queue/seq/5CM

mkdir otu

./run_slurm_mkotu.pl

 

 

 

Â