I'm trying to find the letter count of the longest word in each sentence. My code is here
new_data <- sample_n(data.frame(stringr::sentences), 30)
new_data
split_data <- data.frame(X = str_remove_all(new_data$stringr..sentences, "\\."))
split_data
split_data <- data.frame(X = str_remove_all(split_data$X, ","))
split_data
split_data <- strsplit(split_data$X," ")
split_data
longest = c()
i=0
while(i<30){
i = i 1
longest[i] <- as.list(split_data)[[i]]
longest[i] <- tail(longest[i][order(nchar(longest[i]))], 1)
}
CodePudding user response:
Assuming the only non-letter characters you need to clean from your corpus are . and , you could use the following. You can also pull the actual words by subsetting each sentence with which.max(). Here you have to be careful of ties though.
library(tidyverse)
set.seed(1)
corpus <- sample(stringr::sentences, 5)
corpus
#> [1] "No doubt about the way the wind blows."
#> [2] "Feel the heat of the weak dying flame."
#> [3] "Take shelter in this tent, but keep still."
#> [4] "The kite flew wildly in the high wind."
#> [5] "The barrel of beer was a brew of malt and hops."
# length of longest words
corpus %>%
str_remove_all("[.,]") %>%
str_split(" ") %>%
lapply(nchar) %>%
lapply(max) %>%
unlist()
#> [1] 5 5 7 6 6
# pull actual longest words
corpus %>%
str_remove_all("[.,]") %>%
str_split(" ") %>%
{map2({.},
{.} %>% lapply(nchar) %>%
lapply(which.max) %>%
unlist(),
`[`)} %>%
unlist()
#> [1] "doubt" "dying" "shelter" "wildly" "barrel"
Created on 2022-02-02 by the reprex package (v2.0.1)
