# CS 150 class example

"""
Extract data from a web page.
"""

import urllib.request
import sys

SOURCE_PAGE = "http://www.cs.middlebury.edu/~cm2/tutors/pages/cs101_150_tutoring.html"
PREFIX = "zoom.us/j/"

def get_data(url):
    """
    Extract addresses from webpage
    
    Extract all addresses specified in a link, e.g.
    with PREFIX = "mailto:" finds <a href="mailto:joe@example.com"> 
    with PREFIX = "zoom.us/j/" finds <a href = "https://middlebury.zoom.us/j/5225528930"> 
    
    Args:
        url: URL of page to scrape
        
    Returns:
        List of scraped addresses
    """
    scraped = []    
        
    with urllib.request.urlopen(url) as webpage:
        # Iterate through each line of webpage, just like a file
        for line in webpage:
            line = line.decode('utf-8', 'ignore')  # Obtain a string from the raw bytes
            
            # Search for instances of PREFIX in the line            
            begin_index = line.find(PREFIX)
            if begin_index != -1:
                begin_index += len(PREFIX)  # Advance to first character in address
                end_index = line.find('"', begin_index)  # Find ending quote
                scraped.append(line[begin_index:end_index])  # Extract address               
        return scraped
            

def write_list_to_file(data, output_file, header):
    """
    Write out all data to a file

    Args:
        data: list of strings
        output_file: path of output file
        header: string to write as header of output file
    """
    with open(output_file, "w") as file:
        file.write(header + "\n")
        for item in data:
            file.write(item + "\n")    


outfile = "scraped.txt"    
data = get_data(SOURCE_PAGE)
header = "Scraped data"
write_list_to_file(data, outfile, header)