wordlength by novel

# July 31, 2013
# counting word lengths
# goal: for a character vector, with each entry containing a word, we will count the number of letters in a word
# for instance, if the vector is c("today", "is", "wednesday"), we want our function to return the vector c(5,2,9)

#approach - we'll use the function strsplit(). We need to conver the character vector to a list, with one word per entry in the list.

wordLength <- function(charVec)
# charVec is a character vector, ultimately to be coerced to a list.
#Note that we need to remove 'white space before using wordCount()
{
ll <- sapply(FUN = strsplit, split = "", X = as.list(charVec))
sapply(FUN = length, X = ll)
}

#### EXAMPLE USAGE
#cc <- c("to", "the", "lighthouse")
#wordLength(cc)
#[1] 2 3 10

##########################################

getNovelWords <- function(filename)
{
text<-scan(filename, what="character", sep="\n")
ww<-strsplit(text, "\\W")
ww2<-unlist(ww)
ww2[which(ww2!="")]
}

### EXAMPLE:
#texts <- dir("texts/1800-1809", pattern = "*.txt")
#fn <- paste0("texts/1800-1809/", texts[1])
#tw<- getNovelWords(fn)
#hist(wordLength(tw), freq=FALSE, main = fn)