{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Objective functions can optionally take in step, budget, and generations.\n", "\n", "step - The same objective function will be run for #evaluation_early_stop_steps, the current step will be passed into the function as an interger. (This is useful for getting a single fold of cross validation for example).\n", "\n", "budget - A parameter that varies over the course of the generations. Gets passed into the objective function as a float between 0 and 1. If the budget of the previous evaluation is less than the current budget, it will get re-evaluated. Useful for using smaller datasets earlier in training.\n", "\n", "generations - an int corresponding to the current generation number.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b45777d6dffe4af3892ecc716ce12a64", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generation: 0%| | 0/100 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#knapsack problem\n", "import numpy as np\n", "import tpot2\n", "import random\n", "import matplotlib.pyplot as plt\n", "from dask.distributed import Client, LocalCluster\n", "\n", "class SubsetSelector(tpot2.BaseIndividual):\n", " def __init__( self,\n", " values,\n", " initial_set = None,\n", " k=1, #step size for shuffling\n", " ):\n", "\n", " if isinstance(values, int):\n", " self.values = set(range(0,values))\n", " else:\n", " self.values = set(values)\n", "\n", "\n", " if initial_set is None:\n", " self.subsets = set(random.choices(values, k=k))\n", " else:\n", " self.subsets = set(initial_set)\n", "\n", " self.k = k\n", "\n", " self.mutation_list = [self._mutate_add, self._mutate_remove]\n", " self.crossover_list = [self._crossover_swap]\n", " \n", "\n", " def mutate(self,):\n", " mutation_list_copy = self.mutation_list.copy()\n", " random.shuffle(mutation_list_copy)\n", " for func in mutation_list_copy:\n", " if func():\n", " return True\n", " return False\n", "\n", " def crossover(self, ind2):\n", " crossover_list_copy = self.crossover_list.copy()\n", " random.shuffle(crossover_list_copy)\n", " for func in crossover_list_copy:\n", " if func(ind2):\n", " return True\n", " return False\n", "\n", " def _mutate_add(self,):\n", " not_included = list(self.values.difference(self.subsets))\n", " if len(not_included) > 1:\n", " self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))\n", " return True\n", " else:\n", " return False\n", "\n", " def _mutate_remove(self,):\n", " if len(self.subsets) > 1:\n", " self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))\n", "\n", " def _crossover_swap(self, ss2):\n", " diffs = self.subsets.symmetric_difference(ss2.subsets)\n", "\n", " if len(diffs) == 0:\n", " return False\n", " for v in diffs:\n", " self.subsets.discard(v)\n", " ss2.subsets.discard(v)\n", " random.choice([self.subsets, ss2.subsets]).add(v)\n", " \n", " return True\n", "\n", " def unique_id(self):\n", " return str(tuple(sorted(self.subsets)))\n", "\n", "def individual_generator():\n", " while True:\n", " yield SubsetSelector(values=np.arange(len(values)))\n", "\n", "\n", "values = np.random.randint(200,size=100)\n", "weights = np.random.random(200)*10\n", "max_weight = 50\n", "\n", "def simple_objective(ind, **kwargs):\n", " subset = np.array(list(ind.subsets))\n", " if len(subset) == 0:\n", " return 0, 0\n", "\n", " total_weight = np.sum(weights[subset])\n", " total_value = np.sum(values[subset])\n", "\n", " if total_weight > max_weight:\n", " total_value = 0\n", "\n", " return total_value, total_weight\n", "\n", "objective_names = [\"Value\", \"Weight\"]\n", "objective_function_weights = [1,-1]\n", "\n", "\n", "\n", "evolver = tpot2.BaseEvolver( individual_generator=individual_generator(), \n", " objective_functions=[simple_objective],\n", " objective_function_weights = objective_function_weights,\n", " bigger_is_better = True,\n", " population_size= 100,\n", " objective_names = objective_names,\n", " generations= 100,\n", " n_jobs=1,\n", " verbose = 1,\n", "\n", ")\n", "\n", "evolver.optimize()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "best subset {0, 1, 8, 12, 16, 17, 27, 33, 39, 40, 49, 60, 61, 62, 63, 68, 71, 72, 75, 80, 83, 88, 93, 94}\n", "Best value 3303.0, weight 49.904270360434694\n", "\n", "All results\n" ] }, { "data": { "text/html": [ "
| \n", " | Selected Index | \n", "Value | \n", "Weight | \n", "Parents | \n", "Variation_Function | \n", "Individual | \n", "Generation | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "(12,) | \n", "111.0 | \n", "2.311217 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x7f8f6ab1b... | \n", "0.0 | \n", "
| 1 | \n", "(4,) | \n", "53.0 | \n", "9.250839 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x7f8f679b8... | \n", "0.0 | \n", "
| 2 | \n", "(49,) | \n", "142.0 | \n", "1.245016 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x7f8f6ab1a... | \n", "0.0 | \n", "
| 3 | \n", "(41,) | \n", "165.0 | \n", "6.678665 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x7f8f6ab1a... | \n", "0.0 | \n", "
| 4 | \n", "(52,) | \n", "149.0 | \n", "4.166724 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x7f8f6ab1a... | \n", "0.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 9995 | \n", "(0, 1, 8, 12, 16, 17, 27, 33, 39, 40, 48, 49, ... | \n", "0.0 | \n", "56.274519 | \n", "((0, 1, 8, 12, 16, 17, 27, 33, 39, 40, 49, 60,... | \n", "mutate | \n", "<__main__.SubsetSelector object at 0x7f8f67c93... | \n", "99.0 | \n", "
| 9996 | \n", "(8, 27, 30, 33, 65, 80, 83, 94) | \n", "871.0 | \n", "11.654615 | \n", "((8, 27, 30, 33, 80, 83, 94),) | \n", "mutate | \n", "<__main__.SubsetSelector object at 0x7f8f67c93... | \n", "99.0 | \n", "
| 9997 | \n", "(27, 48, 60, 80, 83, 94) | \n", "764.0 | \n", "10.725417 | \n", "((27, 33, 60, 80, 83, 94),) | \n", "mutate | \n", "<__main__.SubsetSelector object at 0x7f8f67c93... | \n", "99.0 | \n", "
| 9998 | \n", "(8, 27, 30, 33, 58, 65, 80, 83, 94) | \n", "924.0 | \n", "20.673691 | \n", "((8, 27, 30, 33, 80, 83, 94),) | \n", "mutate | \n", "<__main__.SubsetSelector object at 0x7f8f67c93... | \n", "99.0 | \n", "
| 9999 | \n", "(8, 17, 27, 29, 33, 68, 72, 80, 97) | \n", "1098.0 | \n", "10.477494 | \n", "((8, 17, 27, 29, 33, 68, 72, 80),) | \n", "mutate | \n", "<__main__.SubsetSelector object at 0x7f8f67c93... | \n", "99.0 | \n", "
10000 rows × 7 columns
\n", "