# import SciPy tools import datascience as ds import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt import random import time def generate_set(set_size): """Return a set of random integers of the given size""" data = set() while len(data) < set_size: data.add(random.randint(0, set_size*10)) return data def generate_list(list_size): """Return a list of random integers of the given size""" data = [] for i in range(list_size): data.append(random.randint(0, list_size*10)) return data def query_data(data, num_queries, largest): """ Perform random membership queries on a collection Args: data: collection supporting in operator num_queries: number of queries run in test largest: largest integer in data Return the total time taken. """ # We'll count them for fun, but won't do anything with the count count = 0 begin = time.time() for i in range(num_queries): # just check to see if a random number is in there random_num = random.randint(0, largest) if random_num in data: count += 1 return time.time() - begin # Revised function speed_data just generates and returns the data def speed_data(num_queries, size_min, size_max, step_size): """ Run multiple speed tests with num_queries on collections of size range(size_min, size_max, step_size) Returns a tuple of size, list runtime, and set runtime """ set_sizes = [] list_times = [] set_times = [] for set_size in range(size_min, size_max, step_size): # Generate random list list_data = generate_list(set_size) # Generate random set set_data = generate_set(set_size) # Time the list querying list_elapsed = query_data(list_data, num_queries, set_size*10) # Time the list querying set_elapsed = query_data(set_data, num_queries, set_size*10) set_sizes.append(set_size) list_times.append(list_elapsed) set_times.append(set_elapsed) return (set_sizes, list_times, set_times) def print_speed_data(set_sizes, list_times, set_times): """Print the table""" print("set_size","list","set",sep="\t") for i in range(len(set_sizes)): print(set_sizes[i], list_times[i], set_times[i], sep="\t") def plot_speed_data(set_sizes, list_times, set_times): """Plot the table using Matplotlib""" plt.plot(set_sizes, list_times, "r", set_sizes, set_times, "b") plt.xlabel("Collection size") plt.ylabel("Time elapsed (seconds)") plt.title("Timing 'in' for list vs. set") plt.legend(('list', 'set'), loc='upper left') plt.show() def run_experiment(): """Execute and plot performance tests""" sizes, list_times, set_times = speed_data(100, 1000, 10000, 500) plot_speed_data(sizes, list_times, set_times) def run_experiment_ds(): """Execute and plot performance tests using datascience""" sizes, list_times, set_times = speed_data(100, 1000, 10000, 500) perf = ds.Table().with_columns( "sizes", sizes, "list", list_times, "set", set_times ) perf.plot("sizes") plt.xlabel("Collection size") plt.ylabel("Time elapsed (seconds)") plt.title("Timing 'in' for list vs. set") plt.show() #sizes, list_times, set_times = speed_data(100, 1000, 10000, 500) #print_speed_data(sizes, list_times, set_times) #run_experiment() #run_experiment_ds()