# # # CS150 Spring 2020 Lab 11 # # Name: # Section: # # Creativity: # # Import required R packages (suppressing conflict messages) library(plyr, warn.conflicts=FALSE) library(stringr, warn.conflicts=FALSE) library(ggplot2, warn.conflicts=FALSE) # Split strings on word boundaries, removing any punctuation # Args: # strings: A vector of strings # Returns a vector of strings (the words) split_and_strip <- function(strings) { # str_subset removes any empty strings, or strings that contain only whitespace. # \S is the character class for non white-space characters unlist(str_split(str_subset(strings, "\\S+"), boundary("word"))) } # Read file into a vector of cleaned and normalized words # Args: # filename: Filename to analyze as a string # Returns a vector of cleaned and normalized words file_to_words <- function(filename) { # TODO: Fill in } # Create a ranked data frame of words and their counts # Args: # words: Vector of cleaned words # Returns a data.frame of words and counts in descending order of count count_and_rank <- function(words) { # TODO: Fill in } # Prompt the user for a file name and construct ranks data.frame filename <- readline(prompt="Enter a filename: ") words <- file_to_words(filename) counts <- count_and_rank(words) # Print 10 most common words and generate a log-log plot count vs. rank # TODO: Your code here