Code Block |
---|
/project/microbiome/data_queue/seq/LowReadII/rawdata |
salloc --account=microbiome -t 0-06:00 |
mkdir -p /gscratch/grandol1/LowReadII/rawdata |
cd /gscratch/grandol1/LowReadII/rawdata |
unpigz --to-stdout /project/microbiome/data_queue/seq/LowReadII/rawdata/Low-Read-II_S1_L001_R1_001.fastq | split -l 1000000 -d --suffix-length=3 --additional-suffix=.fastq - LowReadII_R1_ ; |
unpigz --to-stdout /project/microbiome/data_queue/seq/LowReadII/rawdata/Low-Read-II_S1_L001_R2_001.fastq | split -l 1000000 -d --suffix-length=3 --additional-suffix=.fastq - LowReadII_R2_ |
//project/microbiome/data_queue/seq/LowReadII/rawdata/run_parse_count_onSplitInput.pl |
cd /project/microbiome/data_queue/seq/LowReadII/rawdata |
./run_splitFastq_fwd.sh |
./run_splitFastq_rev.sh |
./run_aggregate.sh |
cd /project/microbiome/data_queue/seq/LowReadII/tfmergedreads |
./run_slurm_mergereads.pl |
cd /project/microbiome/data_queue/seq/LowReadII/otu |
./run_slurm_mkotu.pl |
vsearch -sintax OTUFILE -db REFERENCEDATABASE -tabbedout OUTPUT -sintax_cutoff 0.8 -strand both -threads 32
Assign taxonomy
Code Block |
---|
salloc --account=microbiome -t 0-02:00 --mem=500G |
...
module load swset/2018.05 gcc/7.3.0 |
...
module load vsearch/2.15.1 |
...
vsearch --sintax zotus.fa --db /project/microbiome/users/grandol1/ref_db/gg_16s_13.5.fa -tabbedout LRII.sintax -sintax_cutoff 0.8 |
Output:
Reading file /project/microbiome/users/grandol1/ref_db/gg_16s_13.5.fa 100%
...
Classified 4038 of 4042 sequences (99.90%)
Convert into useful form:
Code Block |
---|
awk -F "\t" '{OFS=","} NR==1 {print "OTU_ID","SEQS","SIZE","DOMAIN","KINGDOM","PHYLUM","CLASS","ORDER","FAMILY","GENUS","SPECIES"} {gsub(";", ","); gsub("centroid=", ""); gsub("seqs=", ""); gsub("size=", ""); match($4, /d:[^,]+/, d); match($4, /k:[^,]+/, k); match($4, /p:[^,]+/, p); match($4, /c:[^,]+/, c); match($4, /o:[^,]+/, o); match($4, /f:[^,]+/, f); match($4, /g:[^,]+/, g); match($4, /s:[^,]+/, s); print $1, d[0]=="" ? "NA" : d[0], k[0]=="" ? "NA" : k[0], p[0]=="" ? "NA" : p[0], c[0]=="" ? "NA" : c[0], o[0]=="" ? "NA" : o[0], f[0]=="" ? "NA" : f[0], g[0]=="" ? "NA" : g[0], s[0]=="" ? "NA" : s[0] }' LRII.sintax > LRIItaxonomy.csv |