Part2 - 世上最简单的SQL编译器和虚拟机
我们正在复现sqlite。sqlite的前端(front-end)是一个SQL编译器,它解析一个字符串并输出一个字节码。
字节码被传递给虚拟机来处理它(Part1中有提到)。
再看一下这个结构图,我们可以看到SQL Command Processer(SQL命令处理器)处理完数据后将字节码传递给Virtual Machine(虚拟机)。
将事情分为两个步骤(指SQL编译器和虚拟机)有许多好处:
- 减少每一部分的复杂性(例如:虚拟机不需要担心语法错误,SQL编译器已经处理了)
- 允许只编译一次普通查询(个人认为是相同的查询只需要编译一次,因此可以提高效率),并缓存字节码以提高效率。
于是,我们需要重构主功能,并支持两个新的关键字。
int main(int argc, char* argv[]) {
InputBuffer* input_buffer = new_input_buffer();while (true) {
print_prompt();read_input(input_buffer);- if (strcmp(input_buffer->buffer, ".exit") == 0) {
- exit(EXIT_SUCCESS);
- } else {
- printf("Unrecognized command '%s'.\n", input_buffer->buffer);+ // SQL编译器
+ // 检查是否为元命令
+ if (input_buffer->buffer[0] == '.') {
+ switch (do_meta_command(input_buffer)) {
+ case (META_COMMAND_SUCCESS):
+ continue;
+ case (META_COMMAND_UNRECOGNIZED_COMMAND):
+ printf("Unrecognized command '%s'\n", input_buffer->buffer);
+ continue;
+ }}
+ // 转换为字节码
+ Statement statement;
+ switch (prepare_statement(input_buffer, &statement)) {
+ case (PREPARE_SUCCESS):
+ break;
+ case (PREPARE_UNRECOGNIZED_STATEMENT):
+ printf("Unrecognized keyword at start of '%s'.\n",
+ input_buffer->buffer);
+ continue;
+ }
+ // 将字节码传递给虚拟机
+ execute_statement(&statement);
+ printf("Executed.\n");}}
像 “.exit” 这样的非sql语句称为 “元命令” (meta-commands)。它们都以一个点开始,所以我们检查它们并在一个单独的函数中处理它们。
接下来,我们添加一个步骤,将输入转换为我们所需要的字节码。
最后,我们将准备好的字节码传递给execute函数,这个函数就是我们的虚拟机。
注意我们还需要定义两个步骤中成功和失败的枚举(ENUM)。
typedef enum {
META_COMMAND_SUCCESS,META_COMMAND_UNRECOGNIZED_COMMAND
} MetaCommandResult;typedef enum {
PREPARE_SUCCESS,PREPARE_UNRECOGNIZED_STATEMENT
} PrepareResult;
C语言不支持异常,所以我在任何地方都使用ENUM代码。如果我的switch语句没有处理enum的一个成员,那么C编译器就会报错,所以我们可以保证处理了函数的每个结果。
元命令中还是只有 “.exit” 。
MetaCommandResult do_meta_command(InputBuffer* input_buffer) {
if (strcmp(input_buffer->buffer, ".exit") == 0) {
exit(EXIT_SUCCESS);} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;}
}
“prepared statement” (我们的SQL编译器) 目前只包含两个可能的值。
typedef enum {
STATEMENT_INSERT,STATEMENT_SELECT
} StatementType;typedef struct {
StatementType type;
} Statement;
“prepared statement” 现在不理解SQL,它只理解两个单词。
PrepareResult prepare_statement(InputBuffer* input_buffer,Statement* statement) {
if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
statement->type = STATEMENT_INSERT;return PREPARE_SUCCESS;}if (strcmp(input_buffer->buffer, "select") == 0) {
statement->type = STATEMENT_SELECT;return PREPARE_SUCCESS;}return PREPARE_UNRECOGNIZED_STATEMENT;
}
注意,我们对insert使用strncmp(比较前n个字符是否相同),因为insert关键字后面会跟着数据。
最后,execute_statement函数包含一些存根(stubs,不知道是啥),这里也没有实现真正的execute。
void execute_statement(Statement* statement) {
switch (statement->type) {
case (STATEMENT_INSERT):printf("This is where we would do an insert.\n");break;case (STATEMENT_SELECT):printf("This is where we would do a select.\n");break;}
}
它不会返回任何错误代码,因为还没有可能出错的代码。
通过这些重构,我们现在可以识别两个新的关键字(insert和select)
~ ./db
db > insert foo bar
This is where we would do an insert.
Executed.
db > delete foo
Unrecognized keyword at start of 'delete foo'.
db > select
This is where we would do a select.
Executed.
db > .tables
Unrecognized command '.tables'
db > .exit
~
实践结果:
我们的数据库框架正在形成,如果它存储数据不是很好,在下一部分中,我们将实现insert和select。
以下是修改的代码部分:
@@ -10,6 +10,23 @@ struct InputBuffer_t {
} InputBuffer;+typedef enum {
+ META_COMMAND_SUCCESS,
+ META_COMMAND_UNRECOGNIZED_COMMAND
+} MetaCommandResult;
+
+typedef enum {
PREPARE_SUCCESS, PREPARE_UNRECOGNIZED_STATEMENT } PrepareResult;
+
+typedef enum {
STATEMENT_INSERT, STATEMENT_SELECT } StatementType;
+
+typedef struct {
+ StatementType type;
+} Statement;
+InputBuffer* new_input_buffer() {
InputBuffer* input_buffer = malloc(sizeof(InputBuffer));input_buffer->buffer = NULL;
@@ -40,17 +57,67 @@ void close_input_buffer(InputBuffer* input_buffer) {
free(input_buffer);}+MetaCommandResult do_meta_command(InputBuffer* input_buffer) {
+ if (strcmp(input_buffer->buffer, ".exit") == 0) {
+ close_input_buffer(input_buffer);
+ exit(EXIT_SUCCESS);
+ } else {
+ return META_COMMAND_UNRECOGNIZED_COMMAND;
+ }
+}
+
+PrepareResult prepare_statement(InputBuffer* input_buffer,
+ Statement* statement) {
+ if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
+ statement->type = STATEMENT_INSERT;
+ return PREPARE_SUCCESS;
+ }
+ if (strcmp(input_buffer->buffer, "select") == 0) {
+ statement->type = STATEMENT_SELECT;
+ return PREPARE_SUCCESS;
+ }
+
+ return PREPARE_UNRECOGNIZED_STATEMENT;
+}
+
+void execute_statement(Statement* statement) {
+ switch (statement->type) {
+ case (STATEMENT_INSERT):
+ printf("This is where we would do an insert.\n");
+ break;
+ case (STATEMENT_SELECT):
+ printf("This is where we would do a select.\n");
+ break;
+ }
+}
+int main(int argc, char* argv[]) {
InputBuffer* input_buffer = new_input_buffer();while (true) {
print_prompt();read_input(input_buffer);- if (strcmp(input_buffer->buffer, ".exit") == 0) {
- close_input_buffer(input_buffer);
- exit(EXIT_SUCCESS);
- } else {
- printf("Unrecognized command '%s'.\n", input_buffer->buffer);
+ if (input_buffer->buffer[0] == '.') {
+ switch (do_meta_command(input_buffer)) {
+ case (META_COMMAND_SUCCESS):
+ continue;
+ case (META_COMMAND_UNRECOGNIZED_COMMAND):
+ printf("Unrecognized command '%s'\n", input_buffer->buffer);
+ continue;
+ }}
+
+ Statement statement;
+ switch (prepare_statement(input_buffer, &statement)) {
+ case (PREPARE_SUCCESS):
+ break;
+ case (PREPARE_UNRECOGNIZED_STATEMENT):
+ printf("Unrecognized keyword at start of '%s'.\n",
+ input_buffer->buffer);
+ continue;
+ }
+
+ execute_statement(&statement);
+ printf("Executed.\n");}}