I'm trying to input a CSV file, but I get the following error:
associatedata <- read.csv("AssociatedSpeciesID_1.csv", header=TRUE, fileEncoding = 'UTF-8-BOM') %>% mutate_all(na_if, "")
Error in read.table(file = file, header = header, sep = sep, quote = quote, :
more columns than column names
Here's the CSV below: I can't find where the number of columns doesn't matched up. I've tried common solutions to other questions, but nothing's worked.
ObjectID,GlobalID,AssociatedSpeciesKnown,Associates,NewAssociate,UnknownSpecies_Description,AssociatedSpeciesAbundance,Coflowering,ParentGlobalID,CreationDate,Creator,EditDate,Editor
1,54e33e7c-1ff1-464f-8872-df027fcfe8ec,known,Amelanchier utahensis,,,Few,no,9fc6b840-8584-4045-b69f-f0e9488a1f06,1/7/2022 3:55:46 PM,ejob_BLM,1/7/2022 3:55:46 PM,ejob,,
2,68420bc9-d6c6-4d7f-a149-7306399ce5c1,known,NewSpecies,Genus species,,Occasional,yes,9fc6b840-8584-4045-b69f-f0e9488a1f06,1/7/2022 3:55:46 PM,ejob_BLM,1/7/2022 3:55:46 PM,ejob,,
3,88a6807b-b00c-4e58-84bb-4e8cb61409ae,unknown,,,ritiidiwjjviern bg,Common,no,9fc6b840-8584-4045-b69f-f0e9488a1f06,1/7/2022 3:55:46 PM,ejob_BLM,1/7/2022 3:55:46 PM,ejob,,
4,9fc8ea4a-e197-42cc-bd75-614d5b106364,known,Artemisia nova,,,Common,no,ea9eb086-89c2-4aa5-a2f6-95519cd35a58,1/7/2022 3:56:26 PM,ejob_BLM,1/7/2022 3:56:26 PM,ejob,,
CodePudding user response:
The header has 13 fields and all other records have 15 and examining it we see that there are two trailing commas on the end of each data line.
count.fields("abc.csv", sep = ",")
## [1] 13 15 15 15 15
1) If we remove the two trailing commas then it works. (You may not need the strip.white but it was added because the code in the Note at the end is indented 4 spaces to satisfy SO. It won't hurt.)
L <- "abc.csv" |>
readLines() |>
sub(pattern = ",,$", replacement = "")
DF <- read.csv(text = L, strip.white = TRUE)
giving
> str(DF)
'data.frame': 4 obs. of 13 variables:
$ ObjectID : int 1 2 3 4
$ GlobalID : chr "54e33e7c-1ff1-464f-8872-df027fcfe8ec" "68420bc9-d6c6-4d7f-a149-7306399ce5c1" "88a6807b-b00c-4e58-84bb-4e8cb61409ae" "9fc8ea4a-e197-42cc-bd75-614d5b106364"
$ AssociatedSpeciesKnown : chr "known" "known" "unknown" "known"
$ Associates : chr "Amelanchier utahensis" "NewSpecies" "" "Artemisia nova"
$ NewAssociate : chr "" "Genus species" "" ""
$ UnknownSpecies_Description: chr "" "" "ritiidiwjjviern bg" ""
$ AssociatedSpeciesAbundance: chr "Few" "Occasional" "Common" "Common"
$ Coflowering : chr "no" "yes" "no" "no"
$ ParentGlobalID : chr "9fc6b840-8584-4045-b69f-f0e9488a1f06" "9fc6b840-8584-4045-b69f-f0e9488a1f06" "9fc6b840-8584-4045-b69f-f0e9488a1f06" "ea9eb086-89c2-4aa5-a2f6-95519cd35a58"
$ CreationDate : chr "1/7/2022 3:55:46 PM" "1/7/2022 3:55:46 PM" "1/7/2022 3:55:46 PM" "1/7/2022 3:56:26 PM"
$ Creator : chr "ejob_BLM" "ejob_BLM" "ejob_BLM" "ejob_BLM"
$ EditDate : chr "1/7/2022 3:55:46 PM" "1/7/2022 3:55:46 PM" "1/7/2022 3:55:46 PM" "1/7/2022 3:56:26 PM"
$ Editor : chr "ejob" "ejob" "ejob" "ejob"
2) Alternately if sed is on your path then:
read.csv(pipe("sed -e s/,,$// abc.csv"), strip.white = TRUE)
3) This would also work.
DF <- read.csv("abc.csv", header = FALSE, skip = 1, strip.white = TRUE)[1:13]
names(DF) <- read.table("abc.csv", sep = ",", strip.white = TRUE, nrows = 1)
Note
Generate file from question.
Lines <- "ObjectID,GlobalID,AssociatedSpeciesKnown,Associates,NewAssociate,UnknownSpecies_Description,AssociatedSpeciesAbundance,Coflowering,ParentGlobalID,CreationDate,Creator,EditDate,Editor
1,54e33e7c-1ff1-464f-8872-df027fcfe8ec,known,Amelanchier utahensis,,,Few,no,9fc6b840-8584-4045-b69f-f0e9488a1f06,1/7/2022 3:55:46 PM,ejob_BLM,1/7/2022 3:55:46 PM,ejob,,
2,68420bc9-d6c6-4d7f-a149-7306399ce5c1,known,NewSpecies,Genus species,,Occasional,yes,9fc6b840-8584-4045-b69f-f0e9488a1f06,1/7/2022 3:55:46 PM,ejob_BLM,1/7/2022 3:55:46 PM,ejob,,
3,88a6807b-b00c-4e58-84bb-4e8cb61409ae,unknown,,,ritiidiwjjviern bg,Common,no,9fc6b840-8584-4045-b69f-f0e9488a1f06,1/7/2022 3:55:46 PM,ejob_BLM,1/7/2022 3:55:46 PM,ejob,,
4,9fc8ea4a-e197-42cc-bd75-614d5b106364,known,Artemisia nova,,,Common,no,ea9eb086-89c2-4aa5-a2f6-95519cd35a58,1/7/2022 3:56:26 PM,ejob_BLM,1/7/2022 3:56:26 PM,ejob,,
"
cat(Lines, file = "abc.csv")
