""" CS101 example of scraping a webpage for email addresses """ import urllib.request def get_emails(URL): """ Get the email addresses from the given webpage by extracting the strings following 'mailto:' until the next double-quote character and return them in a list """ webpage = urllib.request.urlopen(URL) search_string = "mailto:" emails = [] for line in webpage: line = line.decode('utf-8') if search_string in line: begin_index = line.find(search_string) + len(search_string) end_index = line.find('"', begin_index) emails.append(line[begin_index:end_index]) return emails # sample call to get_emails() sending in a sample URL: # get_emails("http://www.middlebury.edu/academics/cs/faculty/")