{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Objective functions can optionally take in step, budget, and generations.\n", "\n", "step - The same objective function will be run for #evaluation_early_stop_steps, the current step will be passed into the function as an interger. (This is useful for getting a single fold of cross validation for example).\n", "\n", "budget - A parameter that varies over the course of the generations. Gets passed into the objective function as a float between 0 and 1. If the budget of the previous evaluation is less than the current budget, it will get re-evaluated. Useful for using smaller datasets earlier in training.\n", "\n", "generations - an int corresponding to the current generation number.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/anaconda3/envs/tpotenv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "Generation: 100%|██████████| 100/100 [01:43<00:00, 1.03s/it]\n" ] } ], "source": [ "#knapsack problem\n", "import numpy as np\n", "import tpot\n", "import random\n", "import matplotlib.pyplot as plt\n", "from dask.distributed import Client, LocalCluster\n", "\n", "class SubsetSelector(tpot.individual.BaseIndividual):\n", " def __init__( self,\n", " values,\n", " initial_set = None,\n", " k=1, #step size for shuffling\n", " ):\n", "\n", " if isinstance(values, int):\n", " self.values = set(range(0,values))\n", " else:\n", " self.values = set(values)\n", "\n", "\n", " if initial_set is None:\n", " self.subsets = set(random.choices(values, k=k))\n", " else:\n", " self.subsets = set(initial_set)\n", "\n", " self.k = k\n", "\n", " self.mutation_list = [self._mutate_add, self._mutate_remove]\n", " self.crossover_list = [self._crossover_swap]\n", " \n", "\n", " def mutate(self, rng=None):\n", " mutation_list_copy = self.mutation_list.copy()\n", " random.shuffle(mutation_list_copy)\n", " for func in mutation_list_copy:\n", " if func():\n", " return True\n", " return False\n", "\n", " def crossover(self, ind2, rng=None):\n", " crossover_list_copy = self.crossover_list.copy()\n", " random.shuffle(crossover_list_copy)\n", " for func in crossover_list_copy:\n", " if func(ind2):\n", " return True\n", " return False\n", "\n", " def _mutate_add(self,):\n", " not_included = list(self.values.difference(self.subsets))\n", " if len(not_included) > 1:\n", " self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))\n", " return True\n", " else:\n", " return False\n", "\n", " def _mutate_remove(self,):\n", " if len(self.subsets) > 1:\n", " self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))\n", "\n", " def _crossover_swap(self, ss2):\n", " diffs = self.subsets.symmetric_difference(ss2.subsets)\n", "\n", " if len(diffs) == 0:\n", " return False\n", " for v in diffs:\n", " self.subsets.discard(v)\n", " ss2.subsets.discard(v)\n", " random.choice([self.subsets, ss2.subsets]).add(v)\n", " \n", " return True\n", "\n", " def unique_id(self):\n", " return str(tuple(sorted(self.subsets)))\n", "\n", "def individual_generator():\n", " while True:\n", " yield SubsetSelector(values=np.arange(len(values)))\n", "\n", "\n", "values = np.random.randint(200,size=100)\n", "weights = np.random.random(200)*10\n", "max_weight = 50\n", "\n", "def simple_objective(ind, **kwargs):\n", " subset = np.array(list(ind.subsets))\n", " if len(subset) == 0:\n", " return 0, 0\n", "\n", " total_weight = np.sum(weights[subset])\n", " total_value = np.sum(values[subset])\n", "\n", " if total_weight > max_weight:\n", " total_value = 0\n", "\n", " return total_value, total_weight\n", "\n", "objective_names = [\"Value\", \"Weight\"]\n", "objective_function_weights = [1,-1]\n", "\n", "\n", "\n", "evolver = tpot.evolvers.BaseEvolver( individual_generator=individual_generator(), \n", " objective_functions=[simple_objective],\n", " objective_function_weights = objective_function_weights,\n", " bigger_is_better = True,\n", " population_size= 100,\n", " objective_names = objective_names,\n", " generations= 100,\n", " n_jobs=32,\n", " verbose = 1,\n", "\n", ")\n", "\n", "evolver.optimize()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "best subset {1, 8, 9, 16, 17, 22, 23, 24, 28, 29, 31, 42, 43, 48, 50, 61, 62, 68, 80, 89, 91, 97, 98}\n", "Best value 3070.0, weight 49.01985602703945\n", "\n", "All results\n" ] }, { "data": { "text/html": [ "
| \n", " | Selected Index | \n", "Value | \n", "Weight | \n", "Parents | \n", "Variation_Function | \n", "Individual | \n", "Generation | \n", "Submitted Timestamp | \n", "Completed Timestamp | \n", "Eval Error | \n", "Pareto_Front | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "(40,) | \n", "89.0 | \n", "9.883465 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x32aa80eb0> | \n", "0.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "NaN | \n", "
| 1 | \n", "(45,) | \n", "116.0 | \n", "6.643557 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x32aa83b50> | \n", "0.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "NaN | \n", "
| 2 | \n", "(52,) | \n", "172.0 | \n", "9.273163 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x32aa81210> | \n", "0.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "NaN | \n", "
| 3 | \n", "(33,) | \n", "112.0 | \n", "1.594347 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x32aa838e0> | \n", "0.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "NaN | \n", "
| 4 | \n", "(37,) | \n", "90.0 | \n", "3.273826 | \n", "NaN | \n", "NaN | \n", "<__main__.SubsetSelector object at 0x32aa83e50> | \n", "0.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "NaN | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 9995 | \n", "(1, 9, 16, 23, 24, 31, 77, 79) | \n", "998.0 | \n", "11.622582 | \n", "((1, 9, 16, 17, 23, 24, 31, 77), (1, 9, 16, 17... | \n", "ind_mutate | \n", "<__main__.SubsetSelector object at 0x3a739b010> | \n", "99.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "NaN | \n", "
| 9996 | \n", "(1, 8, 9, 16, 22, 23, 24, 28, 29, 31, 48, 49, ... | \n", "0.0 | \n", "51.400433 | \n", "((1, 8, 9, 16, 17, 22, 23, 24, 28, 29, 31, 48,... | \n", "ind_mutate | \n", "<__main__.SubsetSelector object at 0x3af9a4460> | \n", "99.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "NaN | \n", "
| 9997 | \n", "(1, 4, 8, 9, 16, 17, 23, 24, 31, 49, 68, 77, 8... | \n", "1728.0 | \n", "15.997430 | \n", "((1, 4, 8, 9, 16, 17, 23, 24, 31, 68, 77, 88, ... | \n", "ind_mutate | \n", "<__main__.SubsetSelector object at 0x3aa303430> | \n", "99.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "1.0 | \n", "
| 9998 | \n", "(8, 9, 17, 23, 24, 25, 31, 51, 77) | \n", "972.0 | \n", "11.991547 | \n", "((8, 9, 17, 23, 24, 31, 77, 88), (8, 9, 17, 23... | \n", "ind_mutate | \n", "<__main__.SubsetSelector object at 0x3a7399600> | \n", "99.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "NaN | \n", "
| 9999 | \n", "(8, 23, 24, 73, 79) | \n", "648.0 | \n", "12.109013 | \n", "((8, 16, 17, 23, 24), (8, 16, 17, 23, 24)) | \n", "ind_mutate | \n", "<__main__.SubsetSelector object at 0x3a88d4430> | \n", "99.0 | \n", "1.740209e+09 | \n", "1.740209e+09 | \n", "None | \n", "NaN | \n", "
10000 rows × 11 columns
\n", "