Blame: _parts/part4.md - cstack/db_tutorial

Writing a sqlite clone from scratch in C

0 0 0 C

-												First draft of part 4.

											
										
										
											2017-09-02 17:11:08 -07:00
+								---
 								title: Part 4 - Our First Tests (and Bugs)
-												Add date to part 4.

											
										
										
											2017-09-04 14:54:03 -07:00
+								date: 2017-09-03
-												First draft of part 4.

											
										
										
											2017-09-02 17:11:08 -07:00
+								---
 								We've got the ability to insert rows into our database and to print out all rows. Let's take a moment to test what we've got so far.
-												Part 4 edits.

											
										
										
											2017-09-03 12:55:42 -07:00
+								I'm going to use [rspec](http://rspec.info/) to write my tests because I'm familiar with it, and the syntax is fairly readable.
-												First draft of part 4.

											
										
										
											2017-09-02 17:11:08 -07:00
-												Part 4 edits.

											
										
										
											2017-09-03 12:55:42 -07:00
+								I'll define a short helper to send a list of commands to our database program then make assertions about the output:
-												First draft of part 4.

											
										
										
											2017-09-02 17:11:08 -07:00
 								```ruby
 								describe 'database' do
 								  def run_script(commands)
 								    raw_output = nil
 								    IO.popen("./db", "r+") do |pipe|
 								      commands.each do |command|
 								        pipe.puts command
 								      end
 								      pipe.close_write
 								      # Read entire output
 								      raw_output = pipe.gets(nil)
 								    end
 								    raw_output.split("\n")
 								  end
 								  it 'inserts and retreives a row' do
 								    result = run_script([
 								      "insert 1 user1 person1@example.com",
 								      "select",
 								      ".exit",
 								    ])
 								    expect(result).to eq([
 								      "db > Executed.",
 								      "db > (1, user1, person1@example.com)",
 								      "Executed.",
 								      "db > ",
 								    ])
 								  end
 								end
 								```
 								This simple test makes sure we get back what we put in. And indeed it passes:
 								```command-line
 								bundle exec rspec
 								.
 								Finished in 0.00871 seconds (files took 0.09506 seconds to load)
 example, 0 failures
 								```
 								Now it's feasible to test inserting a large number of rows into the database:
 								```ruby
 								it 'prints error message when table is full' do
 								  script = (1..1401).map do |i|
 								    "insert #{i} user#{i} person#{i}@example.com"
 								  end
 								  script << ".exit"
 								  result = run_script(script)
 								  expect(result[-2]).to eq('db > Error: Table full.')
 								end
 								```
 								Running tests again...
 								```command-line
 								bundle exec rspec
 								..
 								Finished in 0.01553 seconds (files took 0.08156 seconds to load)
 examples, 0 failures
 								```
 								Sweet, it works! Our db can hold 1400 rows right now because we set the maximum number of pages to 100, and 14 rows can fit in a page.
 								Reading through the code we have so far, I realized we might not handle storing text fields correctly. Easy to test with this example:
 								```ruby
 								it 'allows inserting strings that are the maximum length' do
 								  long_username = "a"*32
 								  long_email = "a"*255
 								  script = [
 								    "insert 1 #{long_username} #{long_email}",
 								    "select",
 								    ".exit",
 								  ]
 								  result = run_script(script)
 								  expect(result).to eq([
 								    "db > Executed.",
 								    "db > (1, #{long_username}, #{long_email})",
 								    "Executed.",
 								    "db > ",
 								  ])
 								end
 								```
 								And the test fails!
 								```ruby
 								Failures:
 ) database allows inserting strings that are the maximum length
 								     Failure/Error: raw_output.split("\n")
 								     ArgumentError:
 								       invalid byte sequence in UTF-8
 								     # ./spec/main_spec.rb:14:in `split'
 								     # ./spec/main_spec.rb:14:in `run_script'
 								     # ./spec/main_spec.rb:48:in `block (2 levels) in <top (required)>'
 								```
 								If we try it ourselves, we'll see that there's some weird characters when we try to print out the row. (I'm abbreviating the long strings):
 								```command-line
 								db > insert 1 aaaaa... aaaaa...
 								Executed.
 								db > select
 								(1, aaaaa...aaa\<5C>, aaaaa...aaa\<5C>)
 								Executed.
 								db >
 								```
 								What's going on? If you take a look at our definition of a Row, we allocate exactly 32 bytes for username and exactly 255 bytes for email. But [C strings](http://www.cprogramming.com/tutorial/c/lesson9.html) are supposed to end with a null character, which we didn't allocate space for. The solution is to allocate one additional byte:
 								```diff
 								 const uint32_t COLUMN_EMAIL_SIZE = 255;
 								 struct Row_t {
 								   uint32_t id;
 								-  char username[COLUMN_USERNAME_SIZE];
 								-  char email[COLUMN_EMAIL_SIZE];
 								+  char username[COLUMN_USERNAME_SIZE + 1];
 								+  char email[COLUMN_EMAIL_SIZE + 1];
 								 };
 								 typedef struct Row_t Row;
 								 ```
 								 And indeed that fixes it:
 								 ```ruby
 								 bundle exec rspec
 								...
 								Finished in 0.0188 seconds (files took 0.08516 seconds to load)
 examples, 0 failures
 								```
 								We should not allow inserting usernames or emails that are longer than column size. The spec for that looks like this:
 								```ruby
 								it 'prints error message if strings are too long' do
 								  long_username = "a"*33
 								  long_email = "a"*256
 								  script = [
 								    "insert 1 #{long_username} #{long_email}",
 								    "select",
 								    ".exit",
 								  ]
 								  result = run_script(script)
 								  expect(result).to eq([
 								    "db > String is too long.",
 								    "db > Executed.",
 								    "db > ",
 								  ])
 								end
 								```
-												Part 4 edits.

											
										
										
											2017-09-03 12:55:42 -07:00
+								In order to do this we need to upgrade our parser. As a reminder, we're currently using [scanf()](https://linux.die.net/man/3/scanf):
-												First draft of part 4.

											
										
										
											2017-09-02 17:11:08 -07:00
+								```c
 								if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
 								  statement->type = STATEMENT_INSERT;
 								  int args_assigned = sscanf(
 								      input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
 								      statement->row_to_insert.username, statement->row_to_insert.email);
 								  if (args_assigned < 3) {
 								    return PREPARE_SYNTAX_ERROR;
 								  }
 								  return PREPARE_SUCCESS;
 								}
 								```
 								But [scanf has some disadvantages](https://stackoverflow.com/questions/2430303/disadvantages-of-scanf). If the string it's reading is larger than the buffer it's reading into, it will cause a buffer overflow and start writing into unexpected places. We want to check the length of each string before we copy it into a `Row` structure. And to do that, we need to divide the input by spaces.
 								I'm going to use [strtok()](http://www.cplusplus.com/reference/cstring/strtok/) to do that. I think it's easiest to understand if you see it in action:
 								```diff
 								+PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) {
 								+  statement->type = STATEMENT_INSERT;
 								+
 								+  char* keyword = strtok(input_buffer->buffer, " ");
 								+  char* id_string = strtok(NULL, " ");
 								+  char* username = strtok(NULL, " ");
 								+  char* email = strtok(NULL, " ");
 								+
 								+  if (id_string == NULL || username == NULL || email == NULL) {
 								+    return PREPARE_SYNTAX_ERROR;
 								+  }
 								+
 								+  int id = atoi(id_string);
 								+  if (strlen(username) > COLUMN_USERNAME_SIZE) {
 								+    return PREPARE_STRING_TOO_LONG;
 								+  }
 								+  if (strlen(email) > COLUMN_EMAIL_SIZE) {
 								+    return PREPARE_STRING_TOO_LONG;
 								+  }
 								+
 								+  statement->row_to_insert.id = id;
 								+  strcpy(statement->row_to_insert.username, username);
 								+  strcpy(statement->row_to_insert.email, email);
 								+
 								+  return PREPARE_SUCCESS;
 								+}
 								+
 								 PrepareResult prepare_statement(InputBuffer* input_buffer,
 								                                 Statement* statement) {
 								   if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
 								+    return prepare_insert(input_buffer, statement);
 								-    statement->type = STATEMENT_INSERT;
 								-    int args_assigned = sscanf(
 								-        input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
 								-        statement->row_to_insert.username, statement->row_to_insert.email);
 								-    if (args_assigned < 3) {
 								-      return PREPARE_SYNTAX_ERROR;
 								-    }
 								-    return PREPARE_SUCCESS;
 								   }
 								```
 								Calling `strtok` successively on the the input buffer breaks it into substrings by inserting a null character whenever it reaches a delimiter (space, in our case). It returns a pointer to the start of the substring.
 								We can call [strlen()](http://www.cplusplus.com/reference/cstring/strlen/) on each text value to see if it's too long.
 								We can handle the error like we do any other error code:
 								```diff
 								 enum PrepareResult_t {
 								   PREPARE_SUCCESS,
 								+  PREPARE_STRING_TOO_LONG,
 								   PREPARE_SYNTAX_ERROR,
 								   PREPARE_UNRECOGNIZED_STATEMENT
 								 };
 								```
 								```diff
 								 switch (prepare_statement(input_buffer, &statement)) {
 								   case (PREPARE_SUCCESS):
 								     break;
 								+  case (PREPARE_STRING_TOO_LONG):
 								+    printf("String is too long.\n");
 								+    continue;
 								   case (PREPARE_SYNTAX_ERROR):
 								     printf("Syntax error. Could not parse statement.\n");
 								     continue;
 								```
 								Which makes our test pass
 								```command-line
 								bundle exec rspec
 								....
 								Finished in 0.02284 seconds (files took 0.116 seconds to load)
 examples, 0 failures
 								```
 								While we're here, we might as well handle one more error case:
 								```ruby
 								it 'prints an error message if id is negative' do
 								  script = [
 								    "insert -1 cstack foo@bar.com",
 								    "select",
 								    ".exit",
 								  ]
 								  result = run_script(script)
 								  expect(result).to eq([
 								    "db > ID must be positive.",
 								    "db > Executed.",
 								    "db > ",
 								  ])
 								end
 								```
 								```diff
 								 enum PrepareResult_t {
 								   PREPARE_SUCCESS,
 								+  PREPARE_NEGATIVE_ID,
 								   PREPARE_STRING_TOO_LONG,
 								   PREPARE_SYNTAX_ERROR,
 								   PREPARE_UNRECOGNIZED_STATEMENT
@@ -148,9 +147,6 @@ PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) {
 								   }
 								   int id = atoi(id_string);
 								+  if (id < 0) {
 								+    return PREPARE_NEGATIVE_ID;
 								+  }
 								   if (strlen(username) > COLUMN_USERNAME_SIZE) {
 								     return PREPARE_STRING_TOO_LONG;
 								   }
@@ -230,9 +226,6 @@ int main(int argc, char* argv[]) {
 								     switch (prepare_statement(input_buffer, &statement)) {
 								       case (PREPARE_SUCCESS):
 								         break;
 								+      case (PREPARE_NEGATIVE_ID):
 								+        printf("ID must be positive.\n");
 								+        continue;
 								       case (PREPARE_STRING_TOO_LONG):
 								         printf("String is too long.\n");
 								         continue;
 								```
-												Spellcheck

											
										
										
											2017-10-01 12:56:58 -07:00
+								Alright, that's enough testing for now. Next is a very important feature: persistence! We're going to save our database to a file and read it back out again.
-												First draft of part 4.

											
										
										
											2017-09-02 17:11:08 -07:00
 								It's gonna be great.
 								Here's the complete diff for this part:
 								```diff
 								 enum PrepareResult_t {
 								   PREPARE_SUCCESS,
 								+  PREPARE_NEGATIVE_ID,
 								+  PREPARE_STRING_TOO_LONG,
 								   PREPARE_SYNTAX_ERROR,
 								   PREPARE_UNRECOGNIZED_STATEMENT
 								 };
@@ -33,8 +35,8 @@ const uint32_t COLUMN_USERNAME_SIZE = 32;
 								 const uint32_t COLUMN_EMAIL_SIZE = 255;
 								 struct Row_t {
 								   uint32_t id;
 								-  char username[COLUMN_USERNAME_SIZE];
 								-  char email[COLUMN_EMAIL_SIZE];
 								+  char username[COLUMN_USERNAME_SIZE + 1];
 								+  char email[COLUMN_EMAIL_SIZE + 1];
 								 };
 								 typedef struct Row_t Row;
@@ -133,17 +135,40 @@ MetaCommandResult do_meta_command(InputBuffer* input_buffer) {
 								   }
 								 }
 								+PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) {
 								+  statement->type = STATEMENT_INSERT;
 								+
 								+  char* keyword = strtok(input_buffer->buffer, " ");
 								+  char* id_string = strtok(NULL, " ");
 								+  char* username = strtok(NULL, " ");
 								+  char* email = strtok(NULL, " ");
 								+
 								+  if (id_string == NULL || username == NULL || email == NULL) {
 								+    return PREPARE_SYNTAX_ERROR;
 								+  }
 								+
 								+  int id = atoi(id_string);
 								+  if (id < 0) {
 								+    return PREPARE_NEGATIVE_ID;
 								+  }
 								+  if (strlen(username) > COLUMN_USERNAME_SIZE) {
 								+    return PREPARE_STRING_TOO_LONG;
 								+  }
 								+  if (strlen(email) > COLUMN_EMAIL_SIZE) {
 								+    return PREPARE_STRING_TOO_LONG;
 								+  }
 								+
 								+  statement->row_to_insert.id = id;
 								+  strcpy(statement->row_to_insert.username, username);
 								+  strcpy(statement->row_to_insert.email, email);
 								+
 								+  return PREPARE_SUCCESS;
 								+}
 								+
 								 PrepareResult prepare_statement(InputBuffer* input_buffer,
 								                                 Statement* statement) {
 								   if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
 								-    statement->type = STATEMENT_INSERT;
 								-    int args_assigned = sscanf(
 								-        input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
 								-        statement->row_to_insert.username, statement->row_to_insert.email);
 								-    if (args_assigned < 3) {
 								-      return PREPARE_SYNTAX_ERROR;
 								-    }
 								-    return PREPARE_SUCCESS;
 								+    return prepare_insert(input_buffer, statement);
 								   }
 								   if (strcmp(input_buffer->buffer, "select") == 0) {
 								     statement->type = STATEMENT_SELECT;
@@ -205,6 +230,12 @@ int main(int argc, char* argv[]) {
 								     switch (prepare_statement(input_buffer, &statement)) {
 								       case (PREPARE_SUCCESS):
 								         break;
 								+      case (PREPARE_NEGATIVE_ID):
 								+        printf("ID must be positive.\n");
 								+        continue;
 								+      case (PREPARE_STRING_TOO_LONG):
 								+        printf("String is too long.\n");
 								+        continue;
 								       case (PREPARE_SYNTAX_ERROR):
 								         printf("Syntax error. Could not parse statement.\n");
 								         continue;
 								```
 								And we added tests:
 								```diff
 								+describe 'database' do
 								+  def run_script(commands)
 								+    raw_output = nil
 								+    IO.popen("./db", "r+") do |pipe|
 								+      commands.each do |command|
 								+        pipe.puts command
 								+      end
 								+
 								+      pipe.close_write
 								+
 								+      # Read entire output
 								+      raw_output = pipe.gets(nil)
 								+    end
 								+    raw_output.split("\n")
 								+  end
 								+
 								+  it 'inserts and retreives a row' do
 								+    result = run_script([
 								+      "insert 1 user1 person1@example.com",
 								+      "select",
 								+      ".exit",
 								+    ])
 								+    expect(result).to eq([
 								+      "db > Executed.",
 								+      "db > (1, user1, person1@example.com)",
 								+      "Executed.",
 								+      "db > ",
 								+    ])
 								+  end
 								+
 								+  it 'prints error message when table is full' do
 								+    script = (1..1401).map do |i|
 								+      "insert #{i} user#{i} person#{i}@example.com"
 								+    end
 								+    script << ".exit"
 								+    result = run_script(script)
 								+    expect(result[-2]).to eq('db > Error: Table full.')
 								+  end
 								+
 								+  it 'allows inserting strings that are the maximum length' do
 								+    long_username = "a"*32
 								+    long_email = "a"*255
 								+    script = [
 								+      "insert 1 #{long_username} #{long_email}",
 								+      "select",
 								+      ".exit",
 								+    ]
 								+    result = run_script(script)
 								+    expect(result).to eq([
 								+      "db > Executed.",
 								+      "db > (1, #{long_username}, #{long_email})",
 								+      "Executed.",
 								+      "db > ",
 								+    ])
 								+  end
 								+
 								+  it 'prints error message if strings are too long' do
 								+    long_username = "a"*33
 								+    long_email = "a"*256
 								+    script = [
 								+      "insert 1 #{long_username} #{long_email}",
 								+      "select",
 								+      ".exit",
 								+    ]
 								+    result = run_script(script)
 								+    expect(result).to eq([
 								+      "db > String is too long.",
 								+      "db > Executed.",
 								+      "db > ",
 								+    ])
 								+  end
 								+
 								+  it 'prints an error message if id is negative' do
 								+    script = [
 								+      "insert -1 cstack foo@bar.com",
 								+      "select",
 								+      ".exit",
 								+    ]
 								+    result = run_script(script)
 								+    expect(result).to eq([
 								+      "db > ID must be positive.",
 								+      "db > Executed.",
 								+      "db > ",
 								+    ])
 								+  end
 								+end
 								```