2017-09-03 14:52:23 -07:00
|
|
|
#include <errno.h>
|
|
|
|
|
#include <fcntl.h>
|
2017-08-31 10:58:19 -07:00
|
|
|
#include <stdbool.h>
|
2019-06-21 21:13:27 -07:00
|
|
|
#include <stdint.h>
|
2017-08-31 10:58:19 -07:00
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
2017-09-03 14:52:23 -07:00
|
|
|
#include <unistd.h>
|
2017-08-31 10:58:19 -07:00
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef struct {
|
2017-08-31 10:58:19 -07:00
|
|
|
char* buffer;
|
|
|
|
|
size_t buffer_length;
|
|
|
|
|
ssize_t input_length;
|
2019-06-21 21:11:21 -07:00
|
|
|
} InputBuffer;
|
2017-08-31 10:58:19 -07:00
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef enum {
|
2017-10-01 16:02:19 -07:00
|
|
|
EXECUTE_SUCCESS,
|
|
|
|
|
EXECUTE_DUPLICATE_KEY,
|
2019-06-21 21:11:21 -07:00
|
|
|
} ExecuteResult;
|
2017-08-31 20:36:14 -07:00
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef enum {
|
2017-08-31 17:23:52 -07:00
|
|
|
META_COMMAND_SUCCESS,
|
|
|
|
|
META_COMMAND_UNRECOGNIZED_COMMAND
|
2019-06-21 21:11:21 -07:00
|
|
|
} MetaCommandResult;
|
2017-08-31 17:23:52 -07:00
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef enum {
|
2017-08-31 20:36:14 -07:00
|
|
|
PREPARE_SUCCESS,
|
2017-09-02 14:07:08 -07:00
|
|
|
PREPARE_NEGATIVE_ID,
|
|
|
|
|
PREPARE_STRING_TOO_LONG,
|
2017-08-31 20:36:14 -07:00
|
|
|
PREPARE_SYNTAX_ERROR,
|
|
|
|
|
PREPARE_UNRECOGNIZED_STATEMENT
|
2019-06-21 21:11:21 -07:00
|
|
|
} PrepareResult;
|
2017-08-31 17:23:52 -07:00
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType;
|
2017-08-31 17:23:52 -07:00
|
|
|
|
2019-04-16 16:34:16 +03:00
|
|
|
#define COLUMN_USERNAME_SIZE 32
|
|
|
|
|
#define COLUMN_EMAIL_SIZE 255
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef struct {
|
2017-08-31 20:36:14 -07:00
|
|
|
uint32_t id;
|
2017-09-02 14:07:08 -07:00
|
|
|
char username[COLUMN_USERNAME_SIZE + 1];
|
|
|
|
|
char email[COLUMN_EMAIL_SIZE + 1];
|
2019-06-21 21:11:21 -07:00
|
|
|
} Row;
|
2017-08-31 20:36:14 -07:00
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef struct {
|
2017-08-31 17:23:52 -07:00
|
|
|
StatementType type;
|
2017-08-31 20:36:14 -07:00
|
|
|
Row row_to_insert; // only used by insert statement
|
2019-06-21 21:11:21 -07:00
|
|
|
} Statement;
|
2017-08-31 17:23:52 -07:00
|
|
|
|
2017-09-01 17:16:21 -07:00
|
|
|
#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
|
|
|
|
|
|
|
|
|
|
const uint32_t ID_SIZE = size_of_attribute(Row, id);
|
|
|
|
|
const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
|
|
|
|
|
const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
|
2017-08-31 20:36:14 -07:00
|
|
|
const uint32_t ID_OFFSET = 0;
|
|
|
|
|
const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
|
|
|
|
|
const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
|
|
|
|
|
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
|
|
|
|
|
|
|
|
|
|
const uint32_t PAGE_SIZE = 4096;
|
2023-05-19 21:47:39 -07:00
|
|
|
#define TABLE_MAX_PAGES 400
|
|
|
|
|
|
|
|
|
|
#define INVALID_PAGE_NUM UINT32_MAX
|
2017-08-31 20:36:14 -07:00
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef struct {
|
2017-09-03 14:52:23 -07:00
|
|
|
int file_descriptor;
|
|
|
|
|
uint32_t file_length;
|
2017-09-17 22:35:27 -07:00
|
|
|
uint32_t num_pages;
|
2017-08-31 20:36:14 -07:00
|
|
|
void* pages[TABLE_MAX_PAGES];
|
2019-06-21 21:11:21 -07:00
|
|
|
} Pager;
|
2017-09-03 14:52:23 -07:00
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef struct {
|
2017-09-03 14:52:23 -07:00
|
|
|
Pager* pager;
|
2017-09-17 22:35:27 -07:00
|
|
|
uint32_t root_page_num;
|
2019-06-21 21:11:21 -07:00
|
|
|
} Table;
|
2017-08-31 20:36:14 -07:00
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef struct {
|
2017-09-10 20:34:18 -07:00
|
|
|
Table* table;
|
2017-09-17 22:35:27 -07:00
|
|
|
uint32_t page_num;
|
|
|
|
|
uint32_t cell_num;
|
2017-09-10 20:34:18 -07:00
|
|
|
bool end_of_table; // Indicates a position one past the last element
|
2019-06-21 21:11:21 -07:00
|
|
|
} Cursor;
|
2017-09-10 20:34:18 -07:00
|
|
|
|
2017-08-31 20:36:14 -07:00
|
|
|
void print_row(Row* row) {
|
|
|
|
|
printf("(%d, %s, %s)\n", row->id, row->username, row->email);
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-21 21:11:21 -07:00
|
|
|
typedef enum { NODE_INTERNAL, NODE_LEAF } NodeType;
|
2017-09-17 22:35:27 -07:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Common Node Header Layout
|
|
|
|
|
*/
|
|
|
|
|
const uint32_t NODE_TYPE_SIZE = sizeof(uint8_t);
|
|
|
|
|
const uint32_t NODE_TYPE_OFFSET = 0;
|
|
|
|
|
const uint32_t IS_ROOT_SIZE = sizeof(uint8_t);
|
|
|
|
|
const uint32_t IS_ROOT_OFFSET = NODE_TYPE_SIZE;
|
2017-11-25 10:15:53 -08:00
|
|
|
const uint32_t PARENT_POINTER_SIZE = sizeof(uint32_t);
|
|
|
|
|
const uint32_t PARENT_POINTER_OFFSET = IS_ROOT_OFFSET + IS_ROOT_SIZE;
|
2017-09-17 22:35:27 -07:00
|
|
|
const uint8_t COMMON_NODE_HEADER_SIZE =
|
2017-11-25 10:15:53 -08:00
|
|
|
NODE_TYPE_SIZE + IS_ROOT_SIZE + PARENT_POINTER_SIZE;
|
2017-09-17 22:35:27 -07:00
|
|
|
|
2017-10-09 16:51:34 -07:00
|
|
|
/*
|
|
|
|
|
* Internal Node Header Layout
|
|
|
|
|
*/
|
|
|
|
|
const uint32_t INTERNAL_NODE_NUM_KEYS_SIZE = sizeof(uint32_t);
|
|
|
|
|
const uint32_t INTERNAL_NODE_NUM_KEYS_OFFSET = COMMON_NODE_HEADER_SIZE;
|
|
|
|
|
const uint32_t INTERNAL_NODE_RIGHT_CHILD_SIZE = sizeof(uint32_t);
|
|
|
|
|
const uint32_t INTERNAL_NODE_RIGHT_CHILD_OFFSET =
|
|
|
|
|
INTERNAL_NODE_NUM_KEYS_OFFSET + INTERNAL_NODE_NUM_KEYS_SIZE;
|
|
|
|
|
const uint32_t INTERNAL_NODE_HEADER_SIZE = COMMON_NODE_HEADER_SIZE +
|
|
|
|
|
INTERNAL_NODE_NUM_KEYS_SIZE +
|
|
|
|
|
INTERNAL_NODE_RIGHT_CHILD_SIZE;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Internal Node Body Layout
|
|
|
|
|
*/
|
|
|
|
|
const uint32_t INTERNAL_NODE_KEY_SIZE = sizeof(uint32_t);
|
|
|
|
|
const uint32_t INTERNAL_NODE_CHILD_SIZE = sizeof(uint32_t);
|
|
|
|
|
const uint32_t INTERNAL_NODE_CELL_SIZE =
|
|
|
|
|
INTERNAL_NODE_CHILD_SIZE + INTERNAL_NODE_KEY_SIZE;
|
2017-11-25 14:59:26 -08:00
|
|
|
/* Keep this small for testing */
|
2023-05-19 21:47:39 -07:00
|
|
|
const uint32_t INTERNAL_NODE_MAX_KEYS = 3;
|
2017-10-09 16:51:34 -07:00
|
|
|
|
2017-09-17 22:35:27 -07:00
|
|
|
/*
|
|
|
|
|
* Leaf Node Header Layout
|
|
|
|
|
*/
|
|
|
|
|
const uint32_t LEAF_NODE_NUM_CELLS_SIZE = sizeof(uint32_t);
|
|
|
|
|
const uint32_t LEAF_NODE_NUM_CELLS_OFFSET = COMMON_NODE_HEADER_SIZE;
|
2017-11-11 14:19:12 -08:00
|
|
|
const uint32_t LEAF_NODE_NEXT_LEAF_SIZE = sizeof(uint32_t);
|
|
|
|
|
const uint32_t LEAF_NODE_NEXT_LEAF_OFFSET =
|
|
|
|
|
LEAF_NODE_NUM_CELLS_OFFSET + LEAF_NODE_NUM_CELLS_SIZE;
|
|
|
|
|
const uint32_t LEAF_NODE_HEADER_SIZE = COMMON_NODE_HEADER_SIZE +
|
|
|
|
|
LEAF_NODE_NUM_CELLS_SIZE +
|
|
|
|
|
LEAF_NODE_NEXT_LEAF_SIZE;
|
2017-09-17 22:35:27 -07:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Leaf Node Body Layout
|
|
|
|
|
*/
|
|
|
|
|
const uint32_t LEAF_NODE_KEY_SIZE = sizeof(uint32_t);
|
|
|
|
|
const uint32_t LEAF_NODE_KEY_OFFSET = 0;
|
|
|
|
|
const uint32_t LEAF_NODE_VALUE_SIZE = ROW_SIZE;
|
|
|
|
|
const uint32_t LEAF_NODE_VALUE_OFFSET =
|
|
|
|
|
LEAF_NODE_KEY_OFFSET + LEAF_NODE_KEY_SIZE;
|
|
|
|
|
const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE;
|
|
|
|
|
const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE;
|
|
|
|
|
const uint32_t LEAF_NODE_MAX_CELLS =
|
|
|
|
|
LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE;
|
2017-10-09 16:51:34 -07:00
|
|
|
const uint32_t LEAF_NODE_RIGHT_SPLIT_COUNT = (LEAF_NODE_MAX_CELLS + 1) / 2;
|
|
|
|
|
const uint32_t LEAF_NODE_LEFT_SPLIT_COUNT =
|
|
|
|
|
(LEAF_NODE_MAX_CELLS + 1) - LEAF_NODE_RIGHT_SPLIT_COUNT;
|
2017-09-17 22:35:27 -07:00
|
|
|
|
2017-10-01 16:02:19 -07:00
|
|
|
NodeType get_node_type(void* node) {
|
2017-10-05 20:43:03 -07:00
|
|
|
uint8_t value = *((uint8_t*)(node + NODE_TYPE_OFFSET));
|
2017-10-01 16:02:19 -07:00
|
|
|
return (NodeType)value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void set_node_type(void* node, NodeType type) {
|
|
|
|
|
uint8_t value = type;
|
2017-10-05 20:43:03 -07:00
|
|
|
*((uint8_t*)(node + NODE_TYPE_OFFSET)) = value;
|
2017-10-01 16:02:19 -07:00
|
|
|
}
|
|
|
|
|
|
2017-10-09 16:51:34 -07:00
|
|
|
bool is_node_root(void* node) {
|
|
|
|
|
uint8_t value = *((uint8_t*)(node + IS_ROOT_OFFSET));
|
|
|
|
|
return (bool)value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void set_node_root(void* node, bool is_root) {
|
|
|
|
|
uint8_t value = is_root;
|
|
|
|
|
*((uint8_t*)(node + IS_ROOT_OFFSET)) = value;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-25 14:59:26 -08:00
|
|
|
uint32_t* node_parent(void* node) { return node + PARENT_POINTER_OFFSET; }
|
|
|
|
|
|
2017-10-09 16:51:34 -07:00
|
|
|
uint32_t* internal_node_num_keys(void* node) {
|
|
|
|
|
return node + INTERNAL_NODE_NUM_KEYS_OFFSET;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t* internal_node_right_child(void* node) {
|
|
|
|
|
return node + INTERNAL_NODE_RIGHT_CHILD_OFFSET;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t* internal_node_cell(void* node, uint32_t cell_num) {
|
|
|
|
|
return node + INTERNAL_NODE_HEADER_SIZE + cell_num * INTERNAL_NODE_CELL_SIZE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t* internal_node_child(void* node, uint32_t child_num) {
|
|
|
|
|
uint32_t num_keys = *internal_node_num_keys(node);
|
|
|
|
|
if (child_num > num_keys) {
|
|
|
|
|
printf("Tried to access child_num %d > num_keys %d\n", child_num, num_keys);
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
} else if (child_num == num_keys) {
|
2023-05-19 21:47:39 -07:00
|
|
|
uint32_t* right_child = internal_node_right_child(node);
|
|
|
|
|
if (*right_child == INVALID_PAGE_NUM) {
|
|
|
|
|
printf("Tried to access right child of node, but was invalid page\n");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
return right_child;
|
2017-10-09 16:51:34 -07:00
|
|
|
} else {
|
2023-05-19 21:47:39 -07:00
|
|
|
uint32_t* child = internal_node_cell(node, child_num);
|
|
|
|
|
if (*child == INVALID_PAGE_NUM) {
|
|
|
|
|
printf("Tried to access child %d of node, but was invalid page\n", child_num);
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
return child;
|
2017-10-09 16:51:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t* internal_node_key(void* node, uint32_t key_num) {
|
2017-11-25 14:59:26 -08:00
|
|
|
return (void*)internal_node_cell(node, key_num) + INTERNAL_NODE_CHILD_SIZE;
|
2017-10-09 16:51:34 -07:00
|
|
|
}
|
|
|
|
|
|
2017-09-17 22:35:27 -07:00
|
|
|
uint32_t* leaf_node_num_cells(void* node) {
|
|
|
|
|
return node + LEAF_NODE_NUM_CELLS_OFFSET;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-11 14:19:12 -08:00
|
|
|
uint32_t* leaf_node_next_leaf(void* node) {
|
|
|
|
|
return node + LEAF_NODE_NEXT_LEAF_OFFSET;
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-25 21:00:15 -07:00
|
|
|
void* leaf_node_cell(void* node, uint32_t cell_num) {
|
2017-09-17 22:35:27 -07:00
|
|
|
return node + LEAF_NODE_HEADER_SIZE + cell_num * LEAF_NODE_CELL_SIZE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t* leaf_node_key(void* node, uint32_t cell_num) {
|
|
|
|
|
return leaf_node_cell(node, cell_num);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void* leaf_node_value(void* node, uint32_t cell_num) {
|
|
|
|
|
return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE;
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-03 14:52:23 -07:00
|
|
|
void* get_page(Pager* pager, uint32_t page_num) {
|
|
|
|
|
if (page_num > TABLE_MAX_PAGES) {
|
|
|
|
|
printf("Tried to fetch page number out of bounds. %d > %d\n", page_num,
|
|
|
|
|
TABLE_MAX_PAGES);
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pager->pages[page_num] == NULL) {
|
|
|
|
|
// Cache miss. Allocate memory and load from file.
|
|
|
|
|
void* page = malloc(PAGE_SIZE);
|
|
|
|
|
uint32_t num_pages = pager->file_length / PAGE_SIZE;
|
|
|
|
|
|
|
|
|
|
// We might save a partial page at the end of the file
|
|
|
|
|
if (pager->file_length % PAGE_SIZE) {
|
|
|
|
|
num_pages += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (page_num <= num_pages) {
|
|
|
|
|
lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
|
|
|
|
|
ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
|
|
|
|
|
if (bytes_read == -1) {
|
|
|
|
|
printf("Error reading file: %d\n", errno);
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pager->pages[page_num] = page;
|
2017-09-17 22:35:27 -07:00
|
|
|
|
|
|
|
|
if (page_num >= pager->num_pages) {
|
|
|
|
|
pager->num_pages = page_num + 1;
|
|
|
|
|
}
|
2017-09-03 14:52:23 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return pager->pages[page_num];
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-19 21:47:39 -07:00
|
|
|
uint32_t get_node_max_key(Pager* pager, void* node) {
|
|
|
|
|
if (get_node_type(node) == NODE_LEAF) {
|
|
|
|
|
return *leaf_node_key(node, *leaf_node_num_cells(node) - 1);
|
|
|
|
|
}
|
|
|
|
|
void* right_child = get_page(pager,*internal_node_right_child(node));
|
|
|
|
|
return get_node_max_key(pager, right_child);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void print_constants() {
|
|
|
|
|
printf("ROW_SIZE: %d\n", ROW_SIZE);
|
|
|
|
|
printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
|
|
|
|
|
printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
|
|
|
|
|
printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
|
|
|
|
|
printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
|
|
|
|
|
printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-09 16:51:34 -07:00
|
|
|
void indent(uint32_t level) {
|
|
|
|
|
for (uint32_t i = 0; i < level; i++) {
|
|
|
|
|
printf(" ");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void print_tree(Pager* pager, uint32_t page_num, uint32_t indentation_level) {
|
|
|
|
|
void* node = get_page(pager, page_num);
|
|
|
|
|
uint32_t num_keys, child;
|
|
|
|
|
|
|
|
|
|
switch (get_node_type(node)) {
|
|
|
|
|
case (NODE_LEAF):
|
|
|
|
|
num_keys = *leaf_node_num_cells(node);
|
|
|
|
|
indent(indentation_level);
|
|
|
|
|
printf("- leaf (size %d)\n", num_keys);
|
|
|
|
|
for (uint32_t i = 0; i < num_keys; i++) {
|
|
|
|
|
indent(indentation_level + 1);
|
|
|
|
|
printf("- %d\n", *leaf_node_key(node, i));
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case (NODE_INTERNAL):
|
|
|
|
|
num_keys = *internal_node_num_keys(node);
|
|
|
|
|
indent(indentation_level);
|
|
|
|
|
printf("- internal (size %d)\n", num_keys);
|
2023-05-19 21:47:39 -07:00
|
|
|
if (num_keys > 0) {
|
|
|
|
|
for (uint32_t i = 0; i < num_keys; i++) {
|
|
|
|
|
child = *internal_node_child(node, i);
|
|
|
|
|
print_tree(pager, child, indentation_level + 1);
|
|
|
|
|
|
|
|
|
|
indent(indentation_level + 1);
|
|
|
|
|
printf("- key %d\n", *internal_node_key(node, i));
|
|
|
|
|
}
|
|
|
|
|
child = *internal_node_right_child(node);
|
2017-10-09 16:51:34 -07:00
|
|
|
print_tree(pager, child, indentation_level + 1);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void serialize_row(Row* source, void* destination) {
|
|
|
|
|
memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
|
|
|
|
|
memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
|
|
|
|
|
memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void deserialize_row(void* source, Row* destination) {
|
|
|
|
|
memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
|
|
|
|
|
memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
|
|
|
|
|
memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void initialize_leaf_node(void* node) {
|
|
|
|
|
set_node_type(node, NODE_LEAF);
|
|
|
|
|
set_node_root(node, false);
|
|
|
|
|
*leaf_node_num_cells(node) = 0;
|
2017-11-11 14:19:12 -08:00
|
|
|
*leaf_node_next_leaf(node) = 0; // 0 represents no sibling
|
2017-10-09 16:51:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void initialize_internal_node(void* node) {
|
|
|
|
|
set_node_type(node, NODE_INTERNAL);
|
|
|
|
|
set_node_root(node, false);
|
|
|
|
|
*internal_node_num_keys(node) = 0;
|
2023-05-19 21:47:39 -07:00
|
|
|
/*
|
|
|
|
|
Necessary because the root page number is 0; by not initializing an internal
|
|
|
|
|
node's right child to an invalid page number when initializing the node, we may
|
|
|
|
|
end up with 0 as the node's right child, which makes the node a parent of the root
|
|
|
|
|
*/
|
|
|
|
|
*internal_node_right_child(node) = INVALID_PAGE_NUM;
|
2017-10-09 16:51:34 -07:00
|
|
|
}
|
|
|
|
|
|
2017-10-01 16:02:19 -07:00
|
|
|
Cursor* leaf_node_find(Table* table, uint32_t page_num, uint32_t key) {
|
|
|
|
|
void* node = get_page(table->pager, page_num);
|
|
|
|
|
uint32_t num_cells = *leaf_node_num_cells(node);
|
|
|
|
|
|
2017-09-10 20:34:18 -07:00
|
|
|
Cursor* cursor = malloc(sizeof(Cursor));
|
|
|
|
|
cursor->table = table;
|
2017-10-01 16:02:19 -07:00
|
|
|
cursor->page_num = page_num;
|
2018-08-09 12:58:31 +07:00
|
|
|
cursor->end_of_table = false;
|
2017-10-01 16:02:19 -07:00
|
|
|
|
|
|
|
|
// Binary search
|
|
|
|
|
uint32_t min_index = 0;
|
|
|
|
|
uint32_t one_past_max_index = num_cells;
|
|
|
|
|
while (one_past_max_index != min_index) {
|
|
|
|
|
uint32_t index = (min_index + one_past_max_index) / 2;
|
|
|
|
|
uint32_t key_at_index = *leaf_node_key(node, index);
|
|
|
|
|
if (key == key_at_index) {
|
|
|
|
|
cursor->cell_num = index;
|
|
|
|
|
return cursor;
|
|
|
|
|
}
|
|
|
|
|
if (key < key_at_index) {
|
|
|
|
|
one_past_max_index = index;
|
|
|
|
|
} else {
|
|
|
|
|
min_index = index + 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-09-10 20:34:18 -07:00
|
|
|
|
2017-10-01 16:02:19 -07:00
|
|
|
cursor->cell_num = min_index;
|
2017-09-10 20:34:18 -07:00
|
|
|
return cursor;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-25 14:59:26 -08:00
|
|
|
uint32_t internal_node_find_child(void* node, uint32_t key) {
|
|
|
|
|
/*
|
|
|
|
|
Return the index of the child which should contain
|
|
|
|
|
the given key.
|
|
|
|
|
*/
|
|
|
|
|
|
2017-10-22 18:33:18 -07:00
|
|
|
uint32_t num_keys = *internal_node_num_keys(node);
|
|
|
|
|
|
2017-11-25 14:59:26 -08:00
|
|
|
/* Binary search */
|
2017-10-22 18:33:18 -07:00
|
|
|
uint32_t min_index = 0;
|
|
|
|
|
uint32_t max_index = num_keys; /* there is one more child than key */
|
|
|
|
|
|
|
|
|
|
while (min_index != max_index) {
|
|
|
|
|
uint32_t index = (min_index + max_index) / 2;
|
|
|
|
|
uint32_t key_to_right = *internal_node_key(node, index);
|
|
|
|
|
if (key_to_right >= key) {
|
|
|
|
|
max_index = index;
|
|
|
|
|
} else {
|
|
|
|
|
min_index = index + 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-25 14:59:26 -08:00
|
|
|
return min_index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Cursor* internal_node_find(Table* table, uint32_t page_num, uint32_t key) {
|
|
|
|
|
void* node = get_page(table->pager, page_num);
|
|
|
|
|
|
|
|
|
|
uint32_t child_index = internal_node_find_child(node, key);
|
|
|
|
|
uint32_t child_num = *internal_node_child(node, child_index);
|
2017-10-22 18:33:18 -07:00
|
|
|
void* child = get_page(table->pager, child_num);
|
|
|
|
|
switch (get_node_type(child)) {
|
|
|
|
|
case NODE_LEAF:
|
|
|
|
|
return leaf_node_find(table, child_num, key);
|
|
|
|
|
case NODE_INTERNAL:
|
|
|
|
|
return internal_node_find(table, child_num, key);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-01 16:02:19 -07:00
|
|
|
/*
|
|
|
|
|
Return the position of the given key.
|
|
|
|
|
If the key is not present, return the position
|
|
|
|
|
where it should be inserted
|
|
|
|
|
*/
|
|
|
|
|
Cursor* table_find(Table* table, uint32_t key) {
|
|
|
|
|
uint32_t root_page_num = table->root_page_num;
|
|
|
|
|
void* root_node = get_page(table->pager, root_page_num);
|
|
|
|
|
|
|
|
|
|
if (get_node_type(root_node) == NODE_LEAF) {
|
|
|
|
|
return leaf_node_find(table, root_page_num, key);
|
|
|
|
|
} else {
|
2017-10-22 18:33:18 -07:00
|
|
|
return internal_node_find(table, root_page_num, key);
|
2017-10-01 16:02:19 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-11 14:19:12 -08:00
|
|
|
Cursor* table_start(Table* table) {
|
|
|
|
|
Cursor* cursor = table_find(table, 0);
|
|
|
|
|
|
|
|
|
|
void* node = get_page(table->pager, cursor->page_num);
|
|
|
|
|
uint32_t num_cells = *leaf_node_num_cells(node);
|
|
|
|
|
cursor->end_of_table = (num_cells == 0);
|
|
|
|
|
|
|
|
|
|
return cursor;
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-10 20:34:18 -07:00
|
|
|
void* cursor_value(Cursor* cursor) {
|
2017-09-17 22:35:27 -07:00
|
|
|
uint32_t page_num = cursor->page_num;
|
2017-09-10 20:34:18 -07:00
|
|
|
void* page = get_page(cursor->table->pager, page_num);
|
2017-09-17 22:35:27 -07:00
|
|
|
return leaf_node_value(page, cursor->cell_num);
|
2017-08-31 20:36:14 -07:00
|
|
|
}
|
|
|
|
|
|
2017-09-10 20:34:18 -07:00
|
|
|
void cursor_advance(Cursor* cursor) {
|
2017-09-17 22:35:27 -07:00
|
|
|
uint32_t page_num = cursor->page_num;
|
|
|
|
|
void* node = get_page(cursor->table->pager, page_num);
|
|
|
|
|
|
|
|
|
|
cursor->cell_num += 1;
|
|
|
|
|
if (cursor->cell_num >= (*leaf_node_num_cells(node))) {
|
2017-11-11 14:19:12 -08:00
|
|
|
/* Advance to next leaf node */
|
|
|
|
|
uint32_t next_page_num = *leaf_node_next_leaf(node);
|
|
|
|
|
if (next_page_num == 0) {
|
|
|
|
|
/* This was rightmost leaf */
|
|
|
|
|
cursor->end_of_table = true;
|
|
|
|
|
} else {
|
|
|
|
|
cursor->page_num = next_page_num;
|
|
|
|
|
cursor->cell_num = 0;
|
|
|
|
|
}
|
2017-09-10 20:34:18 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-03 14:52:23 -07:00
|
|
|
Pager* pager_open(const char* filename) {
|
|
|
|
|
int fd = open(filename,
|
|
|
|
|
O_RDWR | // Read/Write mode
|
|
|
|
|
O_CREAT, // Create file if it does not exist
|
|
|
|
|
S_IWUSR | // User write permission
|
|
|
|
|
S_IRUSR // User read permission
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (fd == -1) {
|
|
|
|
|
printf("Unable to open file\n");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
off_t file_length = lseek(fd, 0, SEEK_END);
|
|
|
|
|
|
|
|
|
|
Pager* pager = malloc(sizeof(Pager));
|
|
|
|
|
pager->file_descriptor = fd;
|
|
|
|
|
pager->file_length = file_length;
|
2017-09-17 22:35:27 -07:00
|
|
|
pager->num_pages = (file_length / PAGE_SIZE);
|
|
|
|
|
|
|
|
|
|
if (file_length % PAGE_SIZE != 0) {
|
|
|
|
|
printf("Db file is not a whole number of pages. Corrupt file.\n");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
2017-09-03 14:52:23 -07:00
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
|
|
|
|
|
pager->pages[i] = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return pager;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Table* db_open(const char* filename) {
|
|
|
|
|
Pager* pager = pager_open(filename);
|
|
|
|
|
|
2017-08-31 20:36:14 -07:00
|
|
|
Table* table = malloc(sizeof(Table));
|
2017-09-03 14:52:23 -07:00
|
|
|
table->pager = pager;
|
2018-04-17 18:28:44 +09:00
|
|
|
table->root_page_num = 0;
|
2017-09-17 22:35:27 -07:00
|
|
|
|
|
|
|
|
if (pager->num_pages == 0) {
|
2017-10-01 12:48:16 -07:00
|
|
|
// New database file. Initialize page 0 as leaf node.
|
2017-09-17 22:35:27 -07:00
|
|
|
void* root_node = get_page(pager, 0);
|
|
|
|
|
initialize_leaf_node(root_node);
|
2017-10-09 16:51:34 -07:00
|
|
|
set_node_root(root_node, true);
|
2017-09-17 22:35:27 -07:00
|
|
|
}
|
2017-08-31 20:36:14 -07:00
|
|
|
|
|
|
|
|
return table;
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-31 10:58:19 -07:00
|
|
|
InputBuffer* new_input_buffer() {
|
|
|
|
|
InputBuffer* input_buffer = malloc(sizeof(InputBuffer));
|
|
|
|
|
input_buffer->buffer = NULL;
|
|
|
|
|
input_buffer->buffer_length = 0;
|
|
|
|
|
input_buffer->input_length = 0;
|
|
|
|
|
|
|
|
|
|
return input_buffer;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void print_prompt() { printf("db > "); }
|
|
|
|
|
|
|
|
|
|
void read_input(InputBuffer* input_buffer) {
|
|
|
|
|
ssize_t bytes_read =
|
|
|
|
|
getline(&(input_buffer->buffer), &(input_buffer->buffer_length), stdin);
|
|
|
|
|
|
|
|
|
|
if (bytes_read <= 0) {
|
|
|
|
|
printf("Error reading input\n");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ignore trailing newline
|
|
|
|
|
input_buffer->input_length = bytes_read - 1;
|
|
|
|
|
input_buffer->buffer[bytes_read - 1] = 0;
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-16 16:34:16 +03:00
|
|
|
void close_input_buffer(InputBuffer* input_buffer) {
|
2019-06-21 21:13:27 -07:00
|
|
|
free(input_buffer->buffer);
|
|
|
|
|
free(input_buffer);
|
2019-04-16 16:34:16 +03:00
|
|
|
}
|
|
|
|
|
|
2017-09-17 22:35:27 -07:00
|
|
|
void pager_flush(Pager* pager, uint32_t page_num) {
|
2017-09-03 14:52:23 -07:00
|
|
|
if (pager->pages[page_num] == NULL) {
|
|
|
|
|
printf("Tried to flush null page\n");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
|
|
|
|
|
|
|
|
|
|
if (offset == -1) {
|
|
|
|
|
printf("Error seeking: %d\n", errno);
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ssize_t bytes_written =
|
2017-09-17 22:35:27 -07:00
|
|
|
write(pager->file_descriptor, pager->pages[page_num], PAGE_SIZE);
|
2017-09-03 14:52:23 -07:00
|
|
|
|
|
|
|
|
if (bytes_written == -1) {
|
|
|
|
|
printf("Error writing: %d\n", errno);
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void db_close(Table* table) {
|
|
|
|
|
Pager* pager = table->pager;
|
|
|
|
|
|
2017-09-17 22:35:27 -07:00
|
|
|
for (uint32_t i = 0; i < pager->num_pages; i++) {
|
2017-09-03 14:52:23 -07:00
|
|
|
if (pager->pages[i] == NULL) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2017-09-17 22:35:27 -07:00
|
|
|
pager_flush(pager, i);
|
2017-09-03 14:52:23 -07:00
|
|
|
free(pager->pages[i]);
|
|
|
|
|
pager->pages[i] = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int result = close(pager->file_descriptor);
|
|
|
|
|
if (result == -1) {
|
|
|
|
|
printf("Error closing db file.\n");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
|
|
|
|
|
void* page = pager->pages[i];
|
|
|
|
|
if (page) {
|
|
|
|
|
free(page);
|
|
|
|
|
pager->pages[i] = NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
free(pager);
|
2019-04-16 16:34:16 +03:00
|
|
|
free(table);
|
2017-09-03 14:52:23 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) {
|
2017-08-31 17:23:52 -07:00
|
|
|
if (strcmp(input_buffer->buffer, ".exit") == 0) {
|
2019-04-16 16:34:16 +03:00
|
|
|
close_input_buffer(input_buffer);
|
2017-09-03 14:52:23 -07:00
|
|
|
db_close(table);
|
2017-08-31 17:23:52 -07:00
|
|
|
exit(EXIT_SUCCESS);
|
2017-09-17 22:35:27 -07:00
|
|
|
} else if (strcmp(input_buffer->buffer, ".btree") == 0) {
|
|
|
|
|
printf("Tree:\n");
|
2017-10-09 16:51:34 -07:00
|
|
|
print_tree(table->pager, 0, 0);
|
2017-09-17 22:35:27 -07:00
|
|
|
return META_COMMAND_SUCCESS;
|
|
|
|
|
} else if (strcmp(input_buffer->buffer, ".constants") == 0) {
|
|
|
|
|
printf("Constants:\n");
|
|
|
|
|
print_constants();
|
|
|
|
|
return META_COMMAND_SUCCESS;
|
2017-08-31 17:23:52 -07:00
|
|
|
} else {
|
|
|
|
|
return META_COMMAND_UNRECOGNIZED_COMMAND;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-02 14:07:08 -07:00
|
|
|
PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) {
|
|
|
|
|
statement->type = STATEMENT_INSERT;
|
|
|
|
|
|
|
|
|
|
char* keyword = strtok(input_buffer->buffer, " ");
|
|
|
|
|
char* id_string = strtok(NULL, " ");
|
|
|
|
|
char* username = strtok(NULL, " ");
|
|
|
|
|
char* email = strtok(NULL, " ");
|
|
|
|
|
|
|
|
|
|
if (id_string == NULL || username == NULL || email == NULL) {
|
|
|
|
|
return PREPARE_SYNTAX_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int id = atoi(id_string);
|
|
|
|
|
if (id < 0) {
|
|
|
|
|
return PREPARE_NEGATIVE_ID;
|
|
|
|
|
}
|
|
|
|
|
if (strlen(username) > COLUMN_USERNAME_SIZE) {
|
|
|
|
|
return PREPARE_STRING_TOO_LONG;
|
|
|
|
|
}
|
|
|
|
|
if (strlen(email) > COLUMN_EMAIL_SIZE) {
|
|
|
|
|
return PREPARE_STRING_TOO_LONG;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
statement->row_to_insert.id = id;
|
|
|
|
|
strcpy(statement->row_to_insert.username, username);
|
|
|
|
|
strcpy(statement->row_to_insert.email, email);
|
|
|
|
|
|
|
|
|
|
return PREPARE_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-31 17:23:52 -07:00
|
|
|
PrepareResult prepare_statement(InputBuffer* input_buffer,
|
|
|
|
|
Statement* statement) {
|
|
|
|
|
if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
|
2017-09-02 14:07:08 -07:00
|
|
|
return prepare_insert(input_buffer, statement);
|
2017-08-31 17:23:52 -07:00
|
|
|
}
|
|
|
|
|
if (strcmp(input_buffer->buffer, "select") == 0) {
|
|
|
|
|
statement->type = STATEMENT_SELECT;
|
|
|
|
|
return PREPARE_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return PREPARE_UNRECOGNIZED_STATEMENT;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-09 16:51:34 -07:00
|
|
|
/*
|
|
|
|
|
Until we start recycling free pages, new pages will always
|
|
|
|
|
go onto the end of the database file
|
|
|
|
|
*/
|
|
|
|
|
uint32_t get_unused_page_num(Pager* pager) { return pager->num_pages; }
|
|
|
|
|
|
|
|
|
|
void create_new_root(Table* table, uint32_t right_child_page_num) {
|
|
|
|
|
/*
|
|
|
|
|
Handle splitting the root.
|
|
|
|
|
Old root copied to new page, becomes left child.
|
|
|
|
|
Address of right child passed in.
|
|
|
|
|
Re-initialize root page to contain the new root node.
|
|
|
|
|
New root node points to two children.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
void* root = get_page(table->pager, table->root_page_num);
|
|
|
|
|
void* right_child = get_page(table->pager, right_child_page_num);
|
|
|
|
|
uint32_t left_child_page_num = get_unused_page_num(table->pager);
|
|
|
|
|
void* left_child = get_page(table->pager, left_child_page_num);
|
|
|
|
|
|
2023-05-19 21:47:39 -07:00
|
|
|
if (get_node_type(root) == NODE_INTERNAL) {
|
|
|
|
|
initialize_internal_node(right_child);
|
|
|
|
|
initialize_internal_node(left_child);
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-09 16:51:34 -07:00
|
|
|
/* Left child has data copied from old root */
|
|
|
|
|
memcpy(left_child, root, PAGE_SIZE);
|
|
|
|
|
set_node_root(left_child, false);
|
|
|
|
|
|
2023-05-19 21:47:39 -07:00
|
|
|
if (get_node_type(left_child) == NODE_INTERNAL) {
|
|
|
|
|
void* child;
|
|
|
|
|
for (int i = 0; i < *internal_node_num_keys(left_child); i++) {
|
|
|
|
|
child = get_page(table->pager, *internal_node_child(left_child,i));
|
|
|
|
|
*node_parent(child) = left_child_page_num;
|
|
|
|
|
}
|
|
|
|
|
child = get_page(table->pager, *internal_node_right_child(left_child));
|
|
|
|
|
*node_parent(child) = left_child_page_num;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-09 16:51:34 -07:00
|
|
|
/* Root node is a new internal node with one key and two children */
|
|
|
|
|
initialize_internal_node(root);
|
|
|
|
|
set_node_root(root, true);
|
|
|
|
|
*internal_node_num_keys(root) = 1;
|
|
|
|
|
*internal_node_child(root, 0) = left_child_page_num;
|
2023-05-19 21:47:39 -07:00
|
|
|
uint32_t left_child_max_key = get_node_max_key(table->pager, left_child);
|
2017-10-09 16:51:34 -07:00
|
|
|
*internal_node_key(root, 0) = left_child_max_key;
|
|
|
|
|
*internal_node_right_child(root) = right_child_page_num;
|
2017-11-25 14:59:26 -08:00
|
|
|
*node_parent(left_child) = table->root_page_num;
|
|
|
|
|
*node_parent(right_child) = table->root_page_num;
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-19 21:47:39 -07:00
|
|
|
void internal_node_split_and_insert(Table* table, uint32_t parent_page_num,
|
|
|
|
|
uint32_t child_page_num);
|
|
|
|
|
|
2017-11-25 14:59:26 -08:00
|
|
|
void internal_node_insert(Table* table, uint32_t parent_page_num,
|
|
|
|
|
uint32_t child_page_num) {
|
|
|
|
|
/*
|
|
|
|
|
Add a new child/key pair to parent that corresponds to child
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
void* parent = get_page(table->pager, parent_page_num);
|
|
|
|
|
void* child = get_page(table->pager, child_page_num);
|
2023-05-19 21:47:39 -07:00
|
|
|
uint32_t child_max_key = get_node_max_key(table->pager, child);
|
2017-11-25 14:59:26 -08:00
|
|
|
uint32_t index = internal_node_find_child(parent, child_max_key);
|
|
|
|
|
|
|
|
|
|
uint32_t original_num_keys = *internal_node_num_keys(parent);
|
|
|
|
|
|
2023-05-19 21:47:39 -07:00
|
|
|
if (original_num_keys >= INTERNAL_NODE_MAX_KEYS) {
|
|
|
|
|
internal_node_split_and_insert(table, parent_page_num, child_page_num);
|
|
|
|
|
return;
|
2017-11-25 14:59:26 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t right_child_page_num = *internal_node_right_child(parent);
|
2023-05-19 21:47:39 -07:00
|
|
|
/*
|
|
|
|
|
An internal node with a right child of INVALID_PAGE_NUM is empty
|
|
|
|
|
*/
|
|
|
|
|
if (right_child_page_num == INVALID_PAGE_NUM) {
|
|
|
|
|
*internal_node_right_child(parent) = child_page_num;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-25 14:59:26 -08:00
|
|
|
void* right_child = get_page(table->pager, right_child_page_num);
|
2023-05-19 21:47:39 -07:00
|
|
|
/*
|
|
|
|
|
If we are already at the max number of cells for a node, we cannot increment
|
|
|
|
|
before splitting. Incrementing without inserting a new key/child pair
|
|
|
|
|
and immediately calling internal_node_split_and_insert has the effect
|
|
|
|
|
of creating a new key at (max_cells + 1) with an uninitialized value
|
|
|
|
|
*/
|
|
|
|
|
*internal_node_num_keys(parent) = original_num_keys + 1;
|
2017-11-26 19:18:10 -08:00
|
|
|
|
2023-05-19 21:47:39 -07:00
|
|
|
if (child_max_key > get_node_max_key(table->pager, right_child)) {
|
2017-11-25 14:59:26 -08:00
|
|
|
/* Replace right child */
|
|
|
|
|
*internal_node_child(parent, original_num_keys) = right_child_page_num;
|
|
|
|
|
*internal_node_key(parent, original_num_keys) =
|
2023-05-19 21:47:39 -07:00
|
|
|
get_node_max_key(table->pager, right_child);
|
2017-11-25 14:59:26 -08:00
|
|
|
*internal_node_right_child(parent) = child_page_num;
|
|
|
|
|
} else {
|
|
|
|
|
/* Make room for the new cell */
|
|
|
|
|
for (uint32_t i = original_num_keys; i > index; i--) {
|
|
|
|
|
void* destination = internal_node_cell(parent, i);
|
|
|
|
|
void* source = internal_node_cell(parent, i - 1);
|
|
|
|
|
memcpy(destination, source, INTERNAL_NODE_CELL_SIZE);
|
|
|
|
|
}
|
|
|
|
|
*internal_node_child(parent, index) = child_page_num;
|
|
|
|
|
*internal_node_key(parent, index) = child_max_key;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void update_internal_node_key(void* node, uint32_t old_key, uint32_t new_key) {
|
|
|
|
|
uint32_t old_child_index = internal_node_find_child(node, old_key);
|
|
|
|
|
*internal_node_key(node, old_child_index) = new_key;
|
2017-10-09 16:51:34 -07:00
|
|
|
}
|
|
|
|
|
|
2023-05-19 21:47:39 -07:00
|
|
|
void internal_node_split_and_insert(Table* table, uint32_t parent_page_num,
|
|
|
|
|
uint32_t child_page_num) {
|
|
|
|
|
uint32_t old_page_num = parent_page_num;
|
|
|
|
|
void* old_node = get_page(table->pager,parent_page_num);
|
|
|
|
|
uint32_t old_max = get_node_max_key(table->pager, old_node);
|
|
|
|
|
|
|
|
|
|
void* child = get_page(table->pager, child_page_num);
|
|
|
|
|
uint32_t child_max = get_node_max_key(table->pager, child);
|
|
|
|
|
|
|
|
|
|
uint32_t new_page_num = get_unused_page_num(table->pager);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Declaring a flag before updating pointers which
|
|
|
|
|
records whether this operation involves splitting the root -
|
|
|
|
|
if it does, we will insert our newly created node during
|
|
|
|
|
the step where the table's new root is created. If it does
|
|
|
|
|
not, we have to insert the newly created node into its parent
|
|
|
|
|
after the old node's keys have been transferred over. We are not
|
|
|
|
|
able to do this if the newly created node's parent is not a newly
|
|
|
|
|
initialized root node, because in that case its parent may have existing
|
|
|
|
|
keys aside from our old node which we are splitting. If that is true, we
|
|
|
|
|
need to find a place for our newly created node in its parent, and we
|
|
|
|
|
cannot insert it at the correct index if it does not yet have any keys
|
|
|
|
|
*/
|
|
|
|
|
uint32_t splitting_root = is_node_root(old_node);
|
|
|
|
|
|
|
|
|
|
void* parent;
|
|
|
|
|
void* new_node;
|
|
|
|
|
if (splitting_root) {
|
|
|
|
|
create_new_root(table, new_page_num);
|
|
|
|
|
parent = get_page(table->pager,table->root_page_num);
|
|
|
|
|
/*
|
|
|
|
|
If we are splitting the root, we need to update old_node to point
|
|
|
|
|
to the new root's left child, new_page_num will already point to
|
|
|
|
|
the new root's right child
|
|
|
|
|
*/
|
|
|
|
|
old_page_num = *internal_node_child(parent,0);
|
|
|
|
|
old_node = get_page(table->pager, old_page_num);
|
|
|
|
|
} else {
|
|
|
|
|
parent = get_page(table->pager,*node_parent(old_node));
|
|
|
|
|
new_node = get_page(table->pager, new_page_num);
|
|
|
|
|
initialize_internal_node(new_node);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t* old_num_keys = internal_node_num_keys(old_node);
|
|
|
|
|
|
|
|
|
|
uint32_t cur_page_num = *internal_node_right_child(old_node);
|
|
|
|
|
void* cur = get_page(table->pager, cur_page_num);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
First put right child into new node and set right child of old node to invalid page number
|
|
|
|
|
*/
|
|
|
|
|
internal_node_insert(table, new_page_num, cur_page_num);
|
|
|
|
|
*node_parent(cur) = new_page_num;
|
|
|
|
|
*internal_node_right_child(old_node) = INVALID_PAGE_NUM;
|
|
|
|
|
/*
|
|
|
|
|
For each key until you get to the middle key, move the key and the child to the new node
|
|
|
|
|
*/
|
|
|
|
|
for (int i = INTERNAL_NODE_MAX_KEYS - 1; i > INTERNAL_NODE_MAX_KEYS / 2; i--) {
|
|
|
|
|
cur_page_num = *internal_node_child(old_node, i);
|
|
|
|
|
cur = get_page(table->pager, cur_page_num);
|
|
|
|
|
|
|
|
|
|
internal_node_insert(table, new_page_num, cur_page_num);
|
|
|
|
|
*node_parent(cur) = new_page_num;
|
|
|
|
|
|
|
|
|
|
(*old_num_keys)--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Set child before middle key, which is now the highest key, to be node's right child,
|
|
|
|
|
and decrement number of keys
|
|
|
|
|
*/
|
|
|
|
|
*internal_node_right_child(old_node) = *internal_node_child(old_node,*old_num_keys - 1);
|
|
|
|
|
(*old_num_keys)--;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Determine which of the two nodes after the split should contain the child to be inserted,
|
|
|
|
|
and insert the child
|
|
|
|
|
*/
|
|
|
|
|
uint32_t max_after_split = get_node_max_key(table->pager, old_node);
|
|
|
|
|
|
|
|
|
|
uint32_t destination_page_num = child_max < max_after_split ? old_page_num : new_page_num;
|
|
|
|
|
|
|
|
|
|
internal_node_insert(table, destination_page_num, child_page_num);
|
|
|
|
|
*node_parent(child) = destination_page_num;
|
|
|
|
|
|
|
|
|
|
update_internal_node_key(parent, old_max, get_node_max_key(table->pager, old_node));
|
|
|
|
|
|
|
|
|
|
if (!splitting_root) {
|
|
|
|
|
internal_node_insert(table,*node_parent(old_node),new_page_num);
|
|
|
|
|
*node_parent(new_node) = *node_parent(old_node);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-09 16:51:34 -07:00
|
|
|
void leaf_node_split_and_insert(Cursor* cursor, uint32_t key, Row* value) {
|
|
|
|
|
/*
|
|
|
|
|
Create a new node and move half the cells over.
|
|
|
|
|
Insert the new value in one of the two nodes.
|
|
|
|
|
Update parent or create a new parent.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
void* old_node = get_page(cursor->table->pager, cursor->page_num);
|
2023-05-19 21:47:39 -07:00
|
|
|
uint32_t old_max = get_node_max_key(cursor->table->pager, old_node);
|
2017-10-09 16:51:34 -07:00
|
|
|
uint32_t new_page_num = get_unused_page_num(cursor->table->pager);
|
|
|
|
|
void* new_node = get_page(cursor->table->pager, new_page_num);
|
|
|
|
|
initialize_leaf_node(new_node);
|
2017-11-25 14:59:26 -08:00
|
|
|
*node_parent(new_node) = *node_parent(old_node);
|
2017-11-11 14:19:12 -08:00
|
|
|
*leaf_node_next_leaf(new_node) = *leaf_node_next_leaf(old_node);
|
|
|
|
|
*leaf_node_next_leaf(old_node) = new_page_num;
|
2017-10-09 16:51:34 -07:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
All existing keys plus new key should should be divided
|
|
|
|
|
evenly between old (left) and new (right) nodes.
|
|
|
|
|
Starting from the right, move each key to correct position.
|
|
|
|
|
*/
|
|
|
|
|
for (int32_t i = LEAF_NODE_MAX_CELLS; i >= 0; i--) {
|
|
|
|
|
void* destination_node;
|
|
|
|
|
if (i >= LEAF_NODE_LEFT_SPLIT_COUNT) {
|
|
|
|
|
destination_node = new_node;
|
|
|
|
|
} else {
|
|
|
|
|
destination_node = old_node;
|
|
|
|
|
}
|
|
|
|
|
uint32_t index_within_node = i % LEAF_NODE_LEFT_SPLIT_COUNT;
|
|
|
|
|
void* destination = leaf_node_cell(destination_node, index_within_node);
|
|
|
|
|
|
|
|
|
|
if (i == cursor->cell_num) {
|
2017-11-11 14:19:12 -08:00
|
|
|
serialize_row(value,
|
|
|
|
|
leaf_node_value(destination_node, index_within_node));
|
|
|
|
|
*leaf_node_key(destination_node, index_within_node) = key;
|
2017-10-09 16:51:34 -07:00
|
|
|
} else if (i > cursor->cell_num) {
|
|
|
|
|
memcpy(destination, leaf_node_cell(old_node, i - 1), LEAF_NODE_CELL_SIZE);
|
|
|
|
|
} else {
|
|
|
|
|
memcpy(destination, leaf_node_cell(old_node, i), LEAF_NODE_CELL_SIZE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Update cell count on both leaf nodes */
|
|
|
|
|
*(leaf_node_num_cells(old_node)) = LEAF_NODE_LEFT_SPLIT_COUNT;
|
|
|
|
|
*(leaf_node_num_cells(new_node)) = LEAF_NODE_RIGHT_SPLIT_COUNT;
|
|
|
|
|
|
|
|
|
|
if (is_node_root(old_node)) {
|
|
|
|
|
return create_new_root(cursor->table, new_page_num);
|
|
|
|
|
} else {
|
2017-11-25 14:59:26 -08:00
|
|
|
uint32_t parent_page_num = *node_parent(old_node);
|
2023-05-19 21:47:39 -07:00
|
|
|
uint32_t new_max = get_node_max_key(cursor->table->pager, old_node);
|
2017-11-25 14:59:26 -08:00
|
|
|
void* parent = get_page(cursor->table->pager, parent_page_num);
|
|
|
|
|
|
|
|
|
|
update_internal_node_key(parent, old_max, new_max);
|
|
|
|
|
internal_node_insert(cursor->table, parent_page_num, new_page_num);
|
|
|
|
|
return;
|
2017-10-09 16:51:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-17 22:35:27 -07:00
|
|
|
void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) {
|
|
|
|
|
void* node = get_page(cursor->table->pager, cursor->page_num);
|
|
|
|
|
|
|
|
|
|
uint32_t num_cells = *leaf_node_num_cells(node);
|
|
|
|
|
if (num_cells >= LEAF_NODE_MAX_CELLS) {
|
|
|
|
|
// Node full
|
2017-10-09 16:51:34 -07:00
|
|
|
leaf_node_split_and_insert(cursor, key, value);
|
|
|
|
|
return;
|
2017-09-17 22:35:27 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (cursor->cell_num < num_cells) {
|
|
|
|
|
// Make room for new cell
|
|
|
|
|
for (uint32_t i = num_cells; i > cursor->cell_num; i--) {
|
|
|
|
|
memcpy(leaf_node_cell(node, i), leaf_node_cell(node, i - 1),
|
|
|
|
|
LEAF_NODE_CELL_SIZE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*(leaf_node_num_cells(node)) += 1;
|
|
|
|
|
*(leaf_node_key(node, cursor->cell_num)) = key;
|
|
|
|
|
serialize_row(value, leaf_node_value(node, cursor->cell_num));
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-31 20:36:14 -07:00
|
|
|
ExecuteResult execute_insert(Statement* statement, Table* table) {
|
|
|
|
|
Row* row_to_insert = &(statement->row_to_insert);
|
2017-10-01 16:02:19 -07:00
|
|
|
uint32_t key_to_insert = row_to_insert->id;
|
|
|
|
|
Cursor* cursor = table_find(table, key_to_insert);
|
|
|
|
|
|
2019-06-21 21:13:27 -07:00
|
|
|
void* node = get_page(table->pager, cursor->page_num);
|
2018-08-07 14:14:08 +07:00
|
|
|
uint32_t num_cells = *leaf_node_num_cells(node);
|
|
|
|
|
|
2017-10-01 16:02:19 -07:00
|
|
|
if (cursor->cell_num < num_cells) {
|
|
|
|
|
uint32_t key_at_index = *leaf_node_key(node, cursor->cell_num);
|
|
|
|
|
if (key_at_index == key_to_insert) {
|
|
|
|
|
return EXECUTE_DUPLICATE_KEY;
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-08-31 20:36:14 -07:00
|
|
|
|
2017-09-17 22:35:27 -07:00
|
|
|
leaf_node_insert(cursor, row_to_insert->id, row_to_insert);
|
2017-08-31 20:36:14 -07:00
|
|
|
|
2017-09-10 20:34:18 -07:00
|
|
|
free(cursor);
|
|
|
|
|
|
2017-08-31 20:36:14 -07:00
|
|
|
return EXECUTE_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ExecuteResult execute_select(Statement* statement, Table* table) {
|
2017-09-10 20:34:18 -07:00
|
|
|
Cursor* cursor = table_start(table);
|
|
|
|
|
|
2017-08-31 20:36:14 -07:00
|
|
|
Row row;
|
2017-09-10 20:34:18 -07:00
|
|
|
while (!(cursor->end_of_table)) {
|
|
|
|
|
deserialize_row(cursor_value(cursor), &row);
|
2017-08-31 20:36:14 -07:00
|
|
|
print_row(&row);
|
2017-09-10 20:34:18 -07:00
|
|
|
cursor_advance(cursor);
|
2017-08-31 20:36:14 -07:00
|
|
|
}
|
2017-09-10 20:34:18 -07:00
|
|
|
|
|
|
|
|
free(cursor);
|
|
|
|
|
|
2017-08-31 20:36:14 -07:00
|
|
|
return EXECUTE_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ExecuteResult execute_statement(Statement* statement, Table* table) {
|
2017-08-31 17:23:52 -07:00
|
|
|
switch (statement->type) {
|
|
|
|
|
case (STATEMENT_INSERT):
|
2017-08-31 20:36:14 -07:00
|
|
|
return execute_insert(statement, table);
|
2017-08-31 17:23:52 -07:00
|
|
|
case (STATEMENT_SELECT):
|
2017-08-31 20:36:14 -07:00
|
|
|
return execute_select(statement, table);
|
2017-08-31 17:23:52 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-31 10:58:19 -07:00
|
|
|
int main(int argc, char* argv[]) {
|
2017-09-03 14:52:23 -07:00
|
|
|
if (argc < 2) {
|
|
|
|
|
printf("Must supply a database filename.\n");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char* filename = argv[1];
|
|
|
|
|
Table* table = db_open(filename);
|
|
|
|
|
|
2017-08-31 10:58:19 -07:00
|
|
|
InputBuffer* input_buffer = new_input_buffer();
|
|
|
|
|
while (true) {
|
|
|
|
|
print_prompt();
|
|
|
|
|
read_input(input_buffer);
|
|
|
|
|
|
2017-08-31 17:23:52 -07:00
|
|
|
if (input_buffer->buffer[0] == '.') {
|
2017-09-03 14:52:23 -07:00
|
|
|
switch (do_meta_command(input_buffer, table)) {
|
2017-08-31 17:23:52 -07:00
|
|
|
case (META_COMMAND_SUCCESS):
|
|
|
|
|
continue;
|
|
|
|
|
case (META_COMMAND_UNRECOGNIZED_COMMAND):
|
|
|
|
|
printf("Unrecognized command '%s'\n", input_buffer->buffer);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2017-08-31 10:58:19 -07:00
|
|
|
}
|
2017-08-31 17:23:52 -07:00
|
|
|
|
|
|
|
|
Statement statement;
|
|
|
|
|
switch (prepare_statement(input_buffer, &statement)) {
|
|
|
|
|
case (PREPARE_SUCCESS):
|
|
|
|
|
break;
|
2017-09-02 14:07:08 -07:00
|
|
|
case (PREPARE_NEGATIVE_ID):
|
|
|
|
|
printf("ID must be positive.\n");
|
|
|
|
|
continue;
|
|
|
|
|
case (PREPARE_STRING_TOO_LONG):
|
|
|
|
|
printf("String is too long.\n");
|
|
|
|
|
continue;
|
2017-08-31 20:36:14 -07:00
|
|
|
case (PREPARE_SYNTAX_ERROR):
|
|
|
|
|
printf("Syntax error. Could not parse statement.\n");
|
|
|
|
|
continue;
|
2017-08-31 17:23:52 -07:00
|
|
|
case (PREPARE_UNRECOGNIZED_STATEMENT):
|
|
|
|
|
printf("Unrecognized keyword at start of '%s'.\n",
|
|
|
|
|
input_buffer->buffer);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-31 20:36:14 -07:00
|
|
|
switch (execute_statement(&statement, table)) {
|
2017-09-02 17:59:31 -07:00
|
|
|
case (EXECUTE_SUCCESS):
|
2017-08-31 20:36:14 -07:00
|
|
|
printf("Executed.\n");
|
|
|
|
|
break;
|
2017-10-01 16:02:19 -07:00
|
|
|
case (EXECUTE_DUPLICATE_KEY):
|
|
|
|
|
printf("Error: Duplicate key.\n");
|
|
|
|
|
break;
|
2017-08-31 20:36:14 -07:00
|
|
|
}
|
2017-08-31 10:58:19 -07:00
|
|
|
}
|
|
|
|
|
}
|