require(readxl) require(dplyr) #set working directory setwd("/Users/gregg/Desktop/NovaSeq4") #Read in files MasterKey <- read.csv(file="./combined01_29_2021_16SandITS_MIDkey.csv", header=TRUE, stringsAsFactors = FALSE, na.strings = "") NS4 <- read.csv(file="./NovaSeq4SampleList.csv", header=TRUE, stringsAsFactors = FALSE, na.strings = "") #separate and recombine Samples into longer file NS4_1 <- subset(NS4, select = c(Sample, Well, Researcher, Alternate.Plate.Name, ITS1, Substrate, Project)) NS4_2 <- subset(NS4, select = c(Sample, Well, Researcher, Alternate.Plate.Name, ITS2, Substrate, Project)) NS4_3 <- subset(NS4, select = c(Sample, Well, Researcher, Alternate.Plate.Name, X16S2, Substrate, Project)) NS4_4 <- subset(NS4, select = c(Sample, Well, Researcher, Alternate.Plate.Name, X16S1, Substrate, Project)) #rename columns to allow for remerging names(NS4_1)[names(NS4_1) == "ITS1"] <- "midplate" names(NS4_2)[names(NS4_2) == "ITS2"] <- "midplate" names(NS4_3)[names(NS4_3) == "X16S2"] <- "midplate" names(NS4_4)[names(NS4_4) == "X16S1"] <- "midplate" NS4 <- rbind(NS4_1, NS4_2, NS4_3, NS4_4) NS4 <- subset(NS4, midplate != "") #rename columns names(NS4)[names(NS4) == "Well"] <- "wellposition" names(NS4)[names(NS4) == "Researcher"] <- "client_name" names(NS4)[names(NS4) == "Project"] <- "project" names(NS4)[names(NS4) == "Sample"] <- "samplename" names(NS4)[names(NS4) == "Alternate.Plate.Name"] <- "plate" names(NS4)[names(NS4) == "Substrate"] <- "substrate" names(NS4)[names(NS4) == "Alternate.Plate.Name"] <- "plate" ##add unique key element. Concatenate midplate and Well MasterKey$Key <- paste(MasterKey$PlateCode, MasterKey$Position, sep = "_", collapse = NULL) NS4$Key <- paste(NS4$midplate, NS4$wellposition, sep="_", collapse = NULL) #reduce masterkey to relevant values MasterKey <- semi_join(MasterKey, NS4, by="Key") #reorder by same key MasterKey <- MasterKey[order(MasterKey$Key),] NS4 <- NS4[order(NS4$Key),] #fill in forwardmid and reversemid values NS4$forwardmid <- MasterKey$forward_mid NS4$reversemid <- MasterKey$reverse_mid NS4 <- subset(NS4, select = -c(Key)) #rearrange column order NS4 <- NS4 %>% select(forwardmid, reversemid, samplename, project, wellposition, plate, midplate, substrate, client_name) #outputs a csv file write.csv(NS4, "./NovaSeq4_Demux.csv")