library(dplyr) library(ggplot2) #read in expected parse report KJexpected <- read.csv("/Volumes/Macintosh HD/Users/gregg/Desktop/QC_check/original_parsereport_QcCheck1stepPcr_S1_L001_R1_001.fastq", skip = 3, header = FALSE) #add appropriate column names names(KJexpected) <- c("forwardmid", "reversemid", "count","locus") #create matching variable using both mids KJexpected$pair <- paste0(KJexpected$forwardmid, "_", KJexpected$reversemid) #separate loci Jorgenson16S <- arrange(subset(KJexpected, locus == "16S"), count) JorgensonITS <- arrange(subset(KJexpected, locus == "ITS"), count) #Total read counts by locus ReadTotals16S <- as.numeric(sum(Jorgenson16S$count)) ReadTotalsITS <- as.numeric(sum(JorgensonITS$count)) Total <- ReadTotals16S + ReadTotalsITS KJonly16S <- as.numeric(sum(Jorgenson16S[1:24, 3])) KJonlyITS <- as.numeric(sum(JorgensonITS[1:24, 3])) #Calculate Read percentage per reaction for full run Jorgenson16S$readpercent <- (Jorgenson16S$count/ReadTotals16S) * 100 JorgensonITS$readpercent <- (JorgensonITS$count/ReadTotalsITS) * 100 #Calculate Read percentage per reaction for KJ samples only Jorgenson16S$readpercentKJonly <- (Jorgenson16S$count/KJonly16S) * 100 JorgensonITS$readpercentKJonly <- (JorgensonITS$count/KJonlyITS) * 100 #recombine KJexpected <- rbind(Jorgenson16S, JorgensonITS) #read in sample names KJsamples <- read.csv("/Volumes/Macintosh HD/Users/gregg/Desktop/QC_check/QcCheck1step_Demux.csv") #remove empty wells KJsamples <- KJsamples[1:192,] #change mids to uppercase KJsamples$pair <- toupper(paste0(KJsamples$forwardmid, "_", KJsamples$reversemid)) #subset to needed columns KJsamples <- subset(KJsamples, select = c(samplename, wellposition, pair)) #arrange and koin datasets matching via mid pairs KJsamples <- arrange(inner_join(KJsamples, KJexpected, by = "pair"), locus, samplename) #eliminate controls from KJ only assessment KJsamples$readpercentKJonly[1:72] <- NA KJsamples$readpercentKJonly[97:168] <- NA #create output write.csv(KJsamples, "./JorgensonReport_6-29-21.csv", quote = FALSE, row.names = FALSE)