Add named character entity © Add states HTML_STATE_AFTER_DOCTYPE_NAME, HTML_STATE_BOGUS_DOCTYPE, HTML_STATE_SELF_CLOSING_START_TAG

This commit is contained in:
Alec Murphy
2022-05-15 13:18:43 -04:00
parent f2b106fd3b
commit e1d66093d5
+72
View File
@@ -153,6 +153,11 @@ U0 @replace_temp_buffer_with_named_character_reference(Tokenizer *t) {
@recalculate_temp_buffer_size(t);
return;
}
if (!StrICmp(t->tempBuffer.data, "©")) {
StrCpy(t->tempBuffer.data, "(c)");
@recalculate_temp_buffer_size(t);
return;
}
if (!StrICmp(t->tempBuffer.data, " ")) {
StrCpy(t->tempBuffer.data, " ");
@recalculate_temp_buffer_size(t);
@@ -499,6 +504,46 @@ U0 @tokenizer_html_state_doctype_name(Tokenizer *t) {
}
}
U0 @tokenizer_html_state_after_doctype_name(Tokenizer *t) {
@consume_next_input_char(t);
switch (t->currentInputChar) {
case '\n':
case '\r':
case '\t':
case ' ':
// Ignore the character.
break;
case 'A' ... 'Z':
// Create a new DOCTYPE token. Set the token's name to the lowercase version
// of the current input character (add 0x0020 to the character's code
// point). Switch to the DOCTYPE name state.
t->state = HTML_STATE_DOCTYPE_NAME;
break;
case '>':
// Switch to the data state. Emit the current DOCTYPE token.
t->state = HTML_STATE_DATA;
break;
default:
// Reconsume in the bogus DOCTYPE state.
t->inputBuffer.pos--;
t->state = HTML_STATE_BOGUS_DOCTYPE;
break;
}
}
U0 @tokenizer_html_state_bogus_doctype(Tokenizer *t) {
@consume_next_input_char(t);
switch (t->currentInputChar) {
case '>':
// Switch to the data state. Emit the DOCTYPE token.
t->state = HTML_STATE_DATA;
break;
default:
// Ignore the character.
break;
}
}
U0 @tokenizer_html_state_tag_name(Tokenizer *t) {
@consume_next_input_char(t);
switch (t->currentInputChar) {
@@ -775,6 +820,24 @@ U0 @tokenizer_html_state_after_attribute_name(Tokenizer *t) {
}
}
U0 @tokenizer_html_state_self_closing_start_tag(Tokenizer *t) {
@consume_next_input_char(t);
switch (t->currentInputChar) {
case '>':
// Set the self-closing flag of the current tag token. Switch to the data
// state. Emit the current tag token.
@emit_current_node(t);
t->state = HTML_STATE_DATA;
break;
default:
// This is an unexpected-solidus-in-tag parse error. Reconsume in the before
// attribute name state.
t->inputBuffer.pos--;
t->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME;
break;
}
}
U0 @tokenizer_html_state_character_reference(Tokenizer *t) {
// Set the temporary buffer to the empty string.
@empty_temp_buffer(t);
@@ -929,6 +992,15 @@ Node *@html_tokenize_and_create_node_tree(U8 *buffer, I64 size,
case HTML_STATE_NUMERIC_CHARACTER_REFERENCE:
@tokenizer_html_state_numeric_character_reference(&t);
break;
case HTML_STATE_AFTER_DOCTYPE_NAME:
@tokenizer_html_state_after_doctype_name(&t);
break;
case HTML_STATE_BOGUS_DOCTYPE:
@tokenizer_html_state_bogus_doctype(&t);
break;
case HTML_STATE_SELF_CLOSING_START_TAG:
@tokenizer_html_state_self_closing_start_tag(&t);
break;
case HTML_STATE_INVALID:
default:
@debug("\n$FG,0$HTML Tokenization error: Invalid or unimplemented "