mirror of
https://git.checksum.fail/alec/Web.git
synced 2026-05-26 19:15:49 +00:00
Add named character entity © Add states HTML_STATE_AFTER_DOCTYPE_NAME, HTML_STATE_BOGUS_DOCTYPE, HTML_STATE_SELF_CLOSING_START_TAG
This commit is contained in:
@@ -153,6 +153,11 @@ U0 @replace_temp_buffer_with_named_character_reference(Tokenizer *t) {
|
||||
@recalculate_temp_buffer_size(t);
|
||||
return;
|
||||
}
|
||||
if (!StrICmp(t->tempBuffer.data, "©")) {
|
||||
StrCpy(t->tempBuffer.data, "(c)");
|
||||
@recalculate_temp_buffer_size(t);
|
||||
return;
|
||||
}
|
||||
if (!StrICmp(t->tempBuffer.data, " ")) {
|
||||
StrCpy(t->tempBuffer.data, " ");
|
||||
@recalculate_temp_buffer_size(t);
|
||||
@@ -499,6 +504,46 @@ U0 @tokenizer_html_state_doctype_name(Tokenizer *t) {
|
||||
}
|
||||
}
|
||||
|
||||
U0 @tokenizer_html_state_after_doctype_name(Tokenizer *t) {
|
||||
@consume_next_input_char(t);
|
||||
switch (t->currentInputChar) {
|
||||
case '\n':
|
||||
case '\r':
|
||||
case '\t':
|
||||
case ' ':
|
||||
// Ignore the character.
|
||||
break;
|
||||
case 'A' ... 'Z':
|
||||
// Create a new DOCTYPE token. Set the token's name to the lowercase version
|
||||
// of the current input character (add 0x0020 to the character's code
|
||||
// point). Switch to the DOCTYPE name state.
|
||||
t->state = HTML_STATE_DOCTYPE_NAME;
|
||||
break;
|
||||
case '>':
|
||||
// Switch to the data state. Emit the current DOCTYPE token.
|
||||
t->state = HTML_STATE_DATA;
|
||||
break;
|
||||
default:
|
||||
// Reconsume in the bogus DOCTYPE state.
|
||||
t->inputBuffer.pos--;
|
||||
t->state = HTML_STATE_BOGUS_DOCTYPE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
U0 @tokenizer_html_state_bogus_doctype(Tokenizer *t) {
|
||||
@consume_next_input_char(t);
|
||||
switch (t->currentInputChar) {
|
||||
case '>':
|
||||
// Switch to the data state. Emit the DOCTYPE token.
|
||||
t->state = HTML_STATE_DATA;
|
||||
break;
|
||||
default:
|
||||
// Ignore the character.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
U0 @tokenizer_html_state_tag_name(Tokenizer *t) {
|
||||
@consume_next_input_char(t);
|
||||
switch (t->currentInputChar) {
|
||||
@@ -775,6 +820,24 @@ U0 @tokenizer_html_state_after_attribute_name(Tokenizer *t) {
|
||||
}
|
||||
}
|
||||
|
||||
U0 @tokenizer_html_state_self_closing_start_tag(Tokenizer *t) {
|
||||
@consume_next_input_char(t);
|
||||
switch (t->currentInputChar) {
|
||||
case '>':
|
||||
// Set the self-closing flag of the current tag token. Switch to the data
|
||||
// state. Emit the current tag token.
|
||||
@emit_current_node(t);
|
||||
t->state = HTML_STATE_DATA;
|
||||
break;
|
||||
default:
|
||||
// This is an unexpected-solidus-in-tag parse error. Reconsume in the before
|
||||
// attribute name state.
|
||||
t->inputBuffer.pos--;
|
||||
t->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
U0 @tokenizer_html_state_character_reference(Tokenizer *t) {
|
||||
// Set the temporary buffer to the empty string.
|
||||
@empty_temp_buffer(t);
|
||||
@@ -929,6 +992,15 @@ Node *@html_tokenize_and_create_node_tree(U8 *buffer, I64 size,
|
||||
case HTML_STATE_NUMERIC_CHARACTER_REFERENCE:
|
||||
@tokenizer_html_state_numeric_character_reference(&t);
|
||||
break;
|
||||
case HTML_STATE_AFTER_DOCTYPE_NAME:
|
||||
@tokenizer_html_state_after_doctype_name(&t);
|
||||
break;
|
||||
case HTML_STATE_BOGUS_DOCTYPE:
|
||||
@tokenizer_html_state_bogus_doctype(&t);
|
||||
break;
|
||||
case HTML_STATE_SELF_CLOSING_START_TAG:
|
||||
@tokenizer_html_state_self_closing_start_tag(&t);
|
||||
break;
|
||||
case HTML_STATE_INVALID:
|
||||
default:
|
||||
@debug("\n$FG,0$HTML Tokenization error: Invalid or unimplemented "
|
||||
|
||||
Reference in New Issue
Block a user