salloc --account=microbiome -t 0-05:00
mkdir -p /gscratch/grandol1/TRNL_Test/rawdata
cd /gscratch/grandol1/TRNL_Test/rawdata
unpigz --to-stdout /project/microbiome/data_queue/seq/TRNL_Test/rawdata/TRNL_Test_S1_L001_R1_001.fastq | split -l 1000000 -d --suffix-length=3 --additional-suffix=.fastq - TRNL_Test_R1_ ;
unpigz --to-stdout /project/microbiome/data_queue/seq/TRNL_Test/rawdata/TRNL_Test_S1_L001_R2_001.fastq | split -l 1000000 -d --suffix-length=3 --additional-suffix=.fastq - TRNL_Test_R2_
//project/microbiome/data_queue/seq/TRNL_Test/rawdata/run_parse_count_onSplitInput.pl
cd /project/microbiome/data_queue/seq/TRNL_Test/rawdata
./run_splitFastq_fwd.sh
./run_splitFastq_rev.sh
./run_aggregate.sh
cd /project/microbiome/data_queue/seq/TRNL_Test/tfmergedreads
./run_slurm_mergereads.pl
Just analyzing trimmed R1s to avoid suspected merge bias because of 2 x 150 sequencing:
cd /project/microbiome/data/seq/gtl_tests
/TRNL_Test/tfmergedreads/16S/TRNL1/trimmed
cp ./*R1.fq /project/microbiome/data_queue/seq/TRNL_Test/
cd /project/microbiome/data/seq/TRNL_Test/
sed -n '1~4s/^@/>/p;2~4p' ./*.fq > ./TrnlTest16S.fa
vsearch --derep_fulllength $s TrnlTest16S.fa \ --strand plus \ --output $s derep.fa \ --sizeout \ --uc $s.derep.uc \ --relabel $s. \ --fasta_width 0
vsearch --cluster_unoise derep.fa --centroids zotus_vsearch.fa --sizein --sizeout