{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Searching, Sorting, and Timing\n", "==============================\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Agenda\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "1. Timing\n", "2. Prelude: Timing list indexing\n", "3. Linear search\n", "4. Binary search\n", "5. Insertion sort\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## 1. Timing\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "ename": "IndentationError", "evalue": "unexpected indent (, line 2)", "output_type": "error", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m time.time()\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m unexpected indent\n" ] } ], "source": [ "import time\n", " time.time()" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "t1 = time.time()\n", " time.sleep(1)\n", " t2 = time.time()\n", " t2 - t1" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## 1. Prelude: Timing list indexing\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "lst = [0] * 10**5" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import timeit\n", " timeit.timeit(stmt='lst[0]', globals=globals())" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "timeit.timeit(stmt='lst[10**5-1]', globals=globals())" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "times = [timeit.timeit(stmt='lst[{}]'.format(i),\n", " globals=globals(),\n", " number=100)\n", " for i in range(10**5)]" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "times[:10]" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline\n", " import matplotlib.pyplot as plt\n", " plt.plot(times, 'ro')\n", " plt.show()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Observation: accessing an element in a list by index takes a constant\n", "amount of time, regardless of position.\n", "\n", "How? **A Python list uses an array as its underlying data storage\n", "mechanism.** To access an element in an array, the interpreter:\n", "\n", "1. Computes an *offset* into the array by multiplying the element's\n", " index by the size of each array entry (which are uniformly sized,\n", " since they are merely *references* to the actual elements)\n", "2. Adds the offset to the *base address* of the array\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## 1. Linear Search\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Task: to locate an element with a given value in a list (array).\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def index(lst, x):\n", " pass" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "lst = list(range(100))\n", " index(lst, 10)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "index(lst, 99)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "index(lst, -1)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import timeit\n", " lst = list(range(1000))\n", " times = [timeit.timeit(stmt='index(lst, {})'.format(x),\n", " globals=globals(),\n", " number=100)\n", " for x in range(1000)]" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", " plt.plot(times, 'ro')\n", " plt.show()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## 1. Binary search\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Task: to locate an element with a given value in a list (array) whose\n", "contents are *sorted in ascending order*.\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def index(lst, x):\n", " # assume that lst is sorted!!!\n", " pass" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "lst = list(range(1000))\n", " index(lst, 10)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "index(lst, 999)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "index(lst, -1)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import timeit\n", " lst = list(range(1000))\n", " times = [timeit.timeit(stmt='index(lst, {})'.format(x),\n", " globals=globals(),\n", " number=1000)\n", " for x in range(1000)]" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", " plt.plot(times, 'ro')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import timeit\n", " times = []\n", " for size in range(1000, 100000, 100):\n", " lst = list(range(size))\n", " times.append(timeit.timeit(stmt='index(lst, -1)',\n", " globals=globals(),\n", " number=1000))" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", " plt.plot(times, 'ro')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import timeit\n", " times = []\n", " for e in range(5, 20):\n", " lst = list(range(2**e))\n", " times.append(timeit.timeit(stmt='index(lst, -1)',\n", " globals=globals(),\n", " number=100000))" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", " plt.plot(times, 'ro')\n", " plt.show()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## 1. Insertion sort\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Task: to sort the values in a given list (array) in ascending order.\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import random\n", " lst = list(range(1000))\n", " random.shuffle(lst)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.plot(lst, 'ro')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def insertion_sort(lst):\n", " pass" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "insertion_sort(lst)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.plot(lst, 'ro')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import timeit\n", " import random\n", " times = [timeit.timeit(stmt='insertion_sort(lst)',\n", " setup='lst=list(range({})); random.shuffle(lst)'.format(size),\n", " globals=globals(),\n", " number=1)\n", " for size in range(100, 5000, 250)]" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.plot(times, 'ro')\n", " plt.show()" ] } ], "metadata": { "kernelspec": { "argv": [ "python", "-m", "ipykernel_launcher", "-f", "{connection_file}" ], "display_name": "Python 3", "env": null, "interrupt_mode": "signal", "language": "python", "metadata": null, "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" }, "name": "searching-sorting-timing.ipynb", "org": null }, "nbformat": 4, "nbformat_minor": 0 }