mirror of
https://github.com/minexew/Shrine.git
synced 2026-05-26 15:22:22 +00:00
1048 lines
27 KiB
HolyC
1048 lines
27 KiB
HolyC
// https://tools.ietf.org/html/rfc793
|
|
|
|
// See https://en.wikipedia.org/wiki/File:Tcp_state_diagram_fixed_new.svg
|
|
#define TCP_STATE_CLOSED 0
|
|
#define TCP_STATE_LISTEN 1
|
|
#define TCP_STATE_SYN_SENT 2
|
|
#define TCP_STATE_SYN_RECEIVED 3
|
|
#define TCP_STATE_ESTABLISHED 4
|
|
#define TCP_STATE_FIN_WAIT_1 5
|
|
#define TCP_STATE_FIN_WAIT_2 6
|
|
#define TCP_STATE_CLOSE_WAIT 7
|
|
#define TCP_STATE_CLOSING 8
|
|
#define TCP_STATE_LAST_ACK 9
|
|
#define TCP_STATE_TIME_WAIT 10
|
|
|
|
#define TCP_CONNECT_TIMEOUT 10000
|
|
|
|
#define TCP_DEFAULT_MSS 536
|
|
|
|
#define TCP_WINDOW_SIZE 8192
|
|
|
|
#define TCP_FLAG_FIN 0x01
|
|
#define TCP_FLAG_SYN 0x02
|
|
#define TCP_FLAG_RST 0x04
|
|
#define TCP_FLAG_PSH 0x08
|
|
#define TCP_FLAG_ACK 0x10
|
|
#define TCP_FLAG_URG 0x20
|
|
|
|
#define TCP_SRTT_ALPHA 0.9
|
|
#define TCP_RTO_MIN 0.2
|
|
#define TCP_RTO_MAX 10
|
|
#define TCP_RTO_BETA 2
|
|
|
|
class CTcpHeader {
|
|
U16 source_port;
|
|
U16 dest_port;
|
|
U32 seq;
|
|
U32 ack;
|
|
U8 data_offset;
|
|
U8 flags;
|
|
U16 window_size;
|
|
U16 checksum;
|
|
U16 urgent_pointer;
|
|
};
|
|
|
|
class CTcpSendBufHeader {
|
|
CTcpSendBufHeader* next;
|
|
|
|
F64 time_sent;
|
|
U32 length;
|
|
U32 retries;
|
|
U32 seq_start;
|
|
U32 seq_end;
|
|
};
|
|
|
|
class CTcpSocket {
|
|
CSocket sock;
|
|
|
|
I64 state;
|
|
|
|
U32 local_addr;
|
|
U16 local_port;
|
|
|
|
U32 remote_addr;
|
|
U32 remote_port;
|
|
|
|
U32 snd_una; // seq number of first unacknowledged octet
|
|
U32 snd_nxt; // seq number of next octet to send
|
|
U32 snd_wnd; // allowed number of unacknowledged outgoing octets
|
|
U32 mss; // maximum segment size
|
|
|
|
U32 rcv_nxt; // seq number of next octet to receive
|
|
U32 rcv_wnd; // allowed number of unacknowledged incoming octets
|
|
|
|
F64 conntime;
|
|
F64 srtt;
|
|
|
|
I64 recv_buf_size;
|
|
U8* recv_buf;
|
|
I64 recv_buf_read_pos;
|
|
I64 recv_buf_write_pos;
|
|
|
|
CTcpSocket* backlog_next;
|
|
CTcpSocket* backlog_first;
|
|
CTcpSocket* backlog_last;
|
|
I64 backlog_remaining;
|
|
|
|
CTcpSendBufHeader* send_buf_first;
|
|
CTcpSendBufHeader* send_buf_last;
|
|
|
|
//I64 rcvtimeo_ms;
|
|
//I64 recv_maxtime;
|
|
};
|
|
|
|
class CTcpPseudoHeader {
|
|
U32 source_addr;
|
|
U32 dest_addr;
|
|
U8 zeros;
|
|
U8 protocol;
|
|
U16 tcp_length;
|
|
};
|
|
|
|
class CTcpSocketListItem {
|
|
CTcpSocketListItem *prev;
|
|
CTcpSocketListItem *next;
|
|
CTcpSocket *sock;
|
|
};
|
|
|
|
static CTcpSocketListItem** tcp_socket_list;
|
|
|
|
static CTcpSocket* GetTcpSocketFromList(CIPv4Packet* packet, CTcpHeader* hdr) {
|
|
CTcpSocketListItem* item = tcp_socket_list[ntohs(hdr->dest_port)]->next;
|
|
while (item) {
|
|
if (item->sock->remote_addr == packet->source_ip &&
|
|
item->sock->remote_port == ntohs(hdr->source_port)) {
|
|
return item->sock;
|
|
}
|
|
item = item->next;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
U0 AddTcpSocketToList(CTcpSocket* s) {
|
|
CTcpSocketListItem* prev = tcp_socket_list[s->local_port];
|
|
CTcpSocketListItem* new = CAlloc(sizeof(CTcpSocketListItem));
|
|
while (prev->next) {
|
|
prev = prev->next;
|
|
}
|
|
new->prev = prev;
|
|
new->sock = s;
|
|
prev->next = new;
|
|
}
|
|
|
|
CTcpSocket* RemoveTcpSocketFromList(CTcpSocket* s) {
|
|
CTcpSocketListItem* prev = NULL;
|
|
CTcpSocketListItem* next = NULL;
|
|
CTcpSocketListItem* item = tcp_socket_list[s->local_port]->next;
|
|
while (item) {
|
|
if (item->sock==s) {
|
|
prev = item->prev;
|
|
next = item->next;
|
|
if (prev) {
|
|
prev->next = next;
|
|
}
|
|
if (next) {
|
|
next->prev = prev;
|
|
}
|
|
return s;
|
|
}
|
|
item = item->next;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
// TODO: this takes up half a meg, change it to a binary tree or something
|
|
static CTcpSocket** tcp_bound_sockets;
|
|
|
|
static U16 tcp_next_source_port = RandU16();
|
|
|
|
static Bool TcpIsSynchronizedState(I64 state) {
|
|
return state == TCP_STATE_ESTABLISHED || state == TCP_STATE_FIN_WAIT_1
|
|
|| state == TCP_STATE_FIN_WAIT_2 || state == TCP_STATE_CLOSE_WAIT
|
|
|| state == TCP_STATE_CLOSING || state == TCP_STATE_LAST_ACK
|
|
|| state == TCP_STATE_TIME_WAIT;
|
|
}
|
|
|
|
static U16 TcpPartialChecksum(U32 sum, U8* header, I64 length) {
|
|
I64 nleft = length;
|
|
U16* w = header;
|
|
|
|
while (nleft > 1) {
|
|
sum += *(w++);
|
|
nleft -= 2;
|
|
}
|
|
|
|
return sum;
|
|
}
|
|
|
|
static U16 TcpFinalChecksum(U32 sum, U8* header, I64 length) {
|
|
I64 nleft = length;
|
|
U16* w = header;
|
|
|
|
while (nleft > 1) {
|
|
sum += *(w++);
|
|
nleft -= 2;
|
|
}
|
|
|
|
// mop up an odd byte, if necessary
|
|
if (nleft == 1) {
|
|
sum += ((*w) & 0x00ff);
|
|
}
|
|
|
|
// add back carry outs from top 16 bits to low 16 bits
|
|
sum = (sum >> 16) + (sum & 0xffff); // add hi 16 to low 16
|
|
sum += (sum >> 16); // add carry
|
|
return (~sum) & 0xffff;
|
|
}
|
|
|
|
I64 TcpPacketAlloc(U8** frame_out, U32 source_ip, U16 source_port, U32 dest_ip, U16 dest_port,
|
|
U32 seq, U32 ack, U8 flags, I64 length) {
|
|
U8* frame;
|
|
I64 index = IPv4PacketAlloc(&frame, IP_PROTO_TCP, source_ip, dest_ip,
|
|
sizeof(CTcpHeader) + length);
|
|
|
|
if (index < 0)
|
|
return index;
|
|
|
|
CTcpHeader* hdr = frame;
|
|
hdr->source_port = htons(source_port);
|
|
hdr->dest_port = htons(dest_port);
|
|
hdr->seq = htonl(seq);
|
|
hdr->ack = htonl(ack);
|
|
hdr->data_offset = (sizeof(CTcpHeader) / 4) << 4;
|
|
hdr->flags = flags;
|
|
hdr->window_size = htons(TCP_WINDOW_SIZE / 2); // FIXME
|
|
hdr->checksum = 0;
|
|
hdr->urgent_pointer = 0;
|
|
|
|
*frame_out = frame + sizeof(CTcpHeader);
|
|
return index;
|
|
}
|
|
|
|
I64 TcpPacketFinish(I64 index, U32 source_ip, U32 dest_ip, U8* frame, I64 length,
|
|
CTcpSendBufHeader** send_buf_out) {
|
|
CTcpHeader* hdr = frame - sizeof(CTcpHeader);
|
|
|
|
CTcpPseudoHeader pseudo;
|
|
pseudo.source_addr = htonl(source_ip);
|
|
pseudo.dest_addr = htonl(dest_ip);
|
|
pseudo.zeros = 0;
|
|
pseudo.protocol = IP_PROTO_TCP;
|
|
pseudo.tcp_length = htons(sizeof(CTcpHeader) + length);
|
|
|
|
U32 sum = TcpPartialChecksum(0, &pseudo, sizeof(CTcpPseudoHeader));
|
|
hdr->checksum = TcpFinalChecksum(sum, hdr, sizeof(CTcpHeader) + length);
|
|
|
|
if (send_buf_out) {
|
|
CTcpSendBufHeader* sb = MAlloc(sizeof(CTcpSendBufHeader) + sizeof(CTcpHeader) + length);
|
|
sb->next = NULL;
|
|
sb->time_sent = tS;
|
|
sb->length = sizeof(CTcpHeader) + length;
|
|
sb->retries = 0;
|
|
sb->seq_start = ntohl(hdr->seq);
|
|
sb->seq_end = 0; // NEEDS TO BE SET UPSTREAM
|
|
|
|
MemCpy((sb(U8*)) + sizeof(CTcpSendBufHeader), frame, sizeof(CTcpHeader) + length);
|
|
*send_buf_out = sb;
|
|
}
|
|
|
|
return IPv4PacketFinish(index);
|
|
}
|
|
|
|
// Send a TCP frame with flags and/or data
|
|
I64 TcpSend(U32 local_addr, U16 local_port, U32 remote_addr, U16 remote_port, U32 seq, U32 ack, U8 flags) {
|
|
U8* frame;
|
|
I64 index = TcpPacketAlloc(&frame,
|
|
local_addr, local_port, remote_addr, remote_port,
|
|
seq, ack, flags, 0);
|
|
|
|
if (index < 0)
|
|
return index;
|
|
|
|
return TcpPacketFinish(index, local_addr, remote_addr, frame, 0, NULL);
|
|
}
|
|
|
|
// Send a TCP frame with flags only, no data
|
|
I64 TcpSend2(CTcpSocket* s, U8 flags) {
|
|
U8* frame;
|
|
I64 index = TcpPacketAlloc(&frame,
|
|
s->local_addr, s->local_port, s->remote_addr, s->remote_port,
|
|
s->snd_nxt, s->rcv_nxt, flags, 0);
|
|
|
|
if (index < 0)
|
|
return index;
|
|
|
|
if (flags & TCP_FLAG_SYN)
|
|
s->snd_nxt++;
|
|
|
|
if (flags & TCP_FLAG_FIN)
|
|
s->snd_nxt++;
|
|
|
|
//"Sent #%d, to %08X, err = %d\n", s->seq, s->remote_addr, error;
|
|
// FIXME: If the packet is SYN or FIN, we also need to queue for retransmit!
|
|
return TcpPacketFinish(index, s->local_addr, s->remote_addr, frame, 0, NULL);
|
|
}
|
|
|
|
// Send a TCP frame with flags and data
|
|
I64 TcpSendData2(CTcpSocket* s, U8 flags, U8* data, I64 length) {
|
|
U8* frame;
|
|
I64 index = TcpPacketAlloc(&frame,
|
|
s->local_addr, s->local_port, s->remote_addr, s->remote_port,
|
|
s->snd_nxt, s->rcv_nxt, flags, length);
|
|
|
|
if (index < 0)
|
|
return index;
|
|
|
|
if (length)
|
|
MemCpy(frame, data, length);
|
|
|
|
if (flags & TCP_FLAG_SYN)
|
|
s->snd_nxt++;
|
|
|
|
s->snd_nxt += length;
|
|
|
|
if (flags & TCP_FLAG_FIN)
|
|
s->snd_nxt++;
|
|
|
|
//"Sent #%d, to %08X, err = %d\n", s->seq, s->remote_addr, error;
|
|
|
|
CTcpSendBufHeader* sb;
|
|
TcpPacketFinish(index, s->local_addr, s->remote_addr, frame, length, &sb);
|
|
sb->seq_end = s->snd_nxt;
|
|
|
|
// Append to SendBuf chain
|
|
if (s->send_buf_first)
|
|
s->send_buf_last->next = sb;
|
|
else
|
|
s->send_buf_first = sb;
|
|
|
|
s->send_buf_last = sb;
|
|
}
|
|
|
|
I64 TcpParsePacket(CTcpHeader** header_out, U8** data_out, I64* length_out, CIPv4Packet* packet) {
|
|
if (packet->proto != IP_PROTO_TCP)
|
|
return -1;
|
|
|
|
// FIXME: validate packet->length
|
|
// FIXME: checksum
|
|
|
|
CTcpHeader* hdr = packet->data;
|
|
I64 header_length = (hdr->data_offset >> 4) * 4;
|
|
|
|
//"TCP: in hdr %d, flags %02Xh, seq %d, ack %d, len %d, chksum %d\n",
|
|
// header_length, hdr->flags, ntohl(hdr->seq), ntohl(hdr->ack),
|
|
// packet->length - header_length, ntohs(hdr->checksum);
|
|
|
|
*header_out = hdr;
|
|
*data_out = packet->data + header_length;
|
|
*length_out = packet->length - header_length;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
class CTcpSendBufHeader {
|
|
CTcpSendBufHeader* next;
|
|
|
|
F64 time_sent;
|
|
U32 length;
|
|
U32 retries;
|
|
U32 seq_start;
|
|
U32 seq_end;
|
|
};
|
|
*/
|
|
|
|
static U0 TcpSocketAckSendBufs(CTcpSocket* s, U32 seg_ack) {
|
|
F64 time = tS;
|
|
|
|
while (s->send_buf_first) {
|
|
CTcpSendBufHeader* sb = s->send_buf_first;
|
|
|
|
// There's no notion of smaller/greater than in modular arithemtic,
|
|
// we can only check if a number lies within some range.
|
|
// Here we check that
|
|
// sb->seq_end <= seg_ack <= s->snd_nxt
|
|
// because that will work for all meaningful ACKs.
|
|
I64 seg_ack_rel = (seg_ack - sb->seq_end) & 0xffffffff;
|
|
I64 snd_nxt_rel = (s->snd_nxt - sb->seq_end) & 0xffffffff;
|
|
|
|
if (seg_ack_rel <= snd_nxt_rel) {
|
|
// Update smoothed RTT
|
|
F64 rtt = time - sb->time_sent;
|
|
s->srtt = (s->srtt * TCP_SRTT_ALPHA) + ((1.0 - TCP_SRTT_ALPHA) * rtt);
|
|
//"ACK'd %d->%d (RTT %f ms)", sb->seq_start, sb->seq_end, rtt * 1000;
|
|
|
|
// Remove SendBuf from chain
|
|
s->send_buf_first = sb->next;
|
|
|
|
if (s->send_buf_first == NULL)
|
|
s->send_buf_last = NULL;
|
|
|
|
Free(sb);
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Check unacknowledged outgoing packets and retransmit if needed
|
|
static U0 TcpSocketCheckSendBufs(CTcpSocket* s) {
|
|
F64 time = tS;
|
|
|
|
F64 rto = TCP_RTO_BETA * s->srtt;
|
|
|
|
if (rto < TCP_RTO_MIN) rto = TCP_RTO_MIN;
|
|
if (rto > TCP_RTO_MAX) rto = TCP_RTO_MAX;
|
|
|
|
while (s->send_buf_first) {
|
|
CTcpSendBufHeader* sb = s->send_buf_first;
|
|
|
|
if (time > sb->time_sent + rto) {
|
|
// Retransmit
|
|
"Retransmit %d->%d (%f ms)!\n", sb->seq_start, sb->seq_end, (time - sb->time_sent) * 1000;
|
|
U8* frame;
|
|
I64 index = IPv4PacketAlloc(&frame, IP_PROTO_TCP, s->local_addr, s->remote_addr, sb->length);
|
|
|
|
if (index < 0)
|
|
return; // retry later I guess
|
|
|
|
MemCpy(frame, (sb(U8*)) + sizeof(CTcpSendBufHeader), sb->length);
|
|
IPv4PacketFinish(index);
|
|
|
|
sb->time_sent = tS;
|
|
|
|
// Move to the end of the chain
|
|
s->send_buf_first = sb->next;
|
|
sb->next = NULL;
|
|
|
|
if (s->send_buf_first)
|
|
s->send_buf_last->next = sb;
|
|
else
|
|
s->send_buf_first = sb;
|
|
|
|
s->send_buf_last = sb;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
I64 TcpSocketAccept(CTcpSocket* s, sockaddr* addr, I64 addrlen) {
|
|
if (s->state != TCP_STATE_LISTEN)
|
|
return -1;
|
|
|
|
while (1) {
|
|
// TODO: Thread safe?
|
|
if (s->backlog_first) {
|
|
CTcpSocket* new_socket = s->backlog_first;
|
|
"Retr %p\n", new_socket;
|
|
|
|
s->backlog_first = s->backlog_first->backlog_next;
|
|
if (!s->backlog_first)
|
|
s->backlog_last = NULL;
|
|
|
|
s->backlog_remaining++;
|
|
|
|
// TODO: this should be done in a way that doesn't block on accept()
|
|
I64 maxtime = cnts.jiffies + TCP_CONNECT_TIMEOUT * JIFFY_FREQ / 1000;
|
|
|
|
while (cnts.jiffies < maxtime) {
|
|
if (new_socket->state == TCP_STATE_ESTABLISHED || new_socket->state == TCP_STATE_CLOSED)
|
|
break;
|
|
else
|
|
Yield;
|
|
}
|
|
|
|
if (new_socket->state != TCP_STATE_ESTABLISHED) {
|
|
close(new_socket);
|
|
return -1;
|
|
}
|
|
|
|
return new_socket;
|
|
}
|
|
else
|
|
Yield;
|
|
}
|
|
|
|
no_warn addr; // FIXME
|
|
no_warn addrlen;
|
|
return -1;
|
|
}
|
|
|
|
I64 TcpSocketBind(CTcpSocket* s, sockaddr* addr, I64 addrlen) {
|
|
if (addrlen < sizeof(sockaddr_in))
|
|
return -1;
|
|
|
|
if (s->state != TCP_STATE_CLOSED)
|
|
return -1;
|
|
|
|
sockaddr_in* addr_in = addr;
|
|
|
|
U16 local_port = ntohs(addr_in->sin_port);
|
|
|
|
// TODO: address & stuff
|
|
if (tcp_bound_sockets[local_port] != NULL)
|
|
return -1;
|
|
|
|
tcp_bound_sockets[local_port] = s;
|
|
|
|
s->local_addr = IPv4GetAddress();
|
|
s->local_port = local_port;
|
|
|
|
return 0;
|
|
}
|
|
|
|
I64 TcpSocketClose(CTcpSocket* s) {
|
|
/* https://tools.ietf.org/html/rfc793#section-3.5
|
|
Case 1: Local user initiates the close
|
|
|
|
In this case, a FIN segment can be constructed and placed on the
|
|
outgoing segment queue. No further SENDs from the user will be
|
|
accepted by the TCP, and it enters the FIN-WAIT-1 state. RECEIVEs
|
|
are allowed in this state. All segments preceding and including FIN
|
|
will be retransmitted until acknowledged. When the other TCP has
|
|
both acknowledged the FIN and sent a FIN of its own, the first TCP
|
|
can ACK this FIN. Note that a TCP receiving a FIN will ACK but not
|
|
send its own FIN until its user has CLOSED the connection also.
|
|
*/
|
|
|
|
// Send FIN & wait for acknowledge
|
|
TcpSend2(s, TCP_FLAG_FIN);
|
|
s->state = TCP_STATE_FIN_WAIT_1;
|
|
// "FIN-WAIT-1\n";
|
|
|
|
// Block until all outgoing data including our FIN have been acknowledged (una == nxt)
|
|
//
|
|
// TODO: what other states are permissible here?
|
|
// TODO: this can block for ever if our receive buffer fills up, but the other side
|
|
// insists on pushing more data before closing the connection
|
|
while ((s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_FIN_WAIT_1)
|
|
&& s->snd_una != s->snd_nxt) {
|
|
TcpSocketCheckSendBufs(s);
|
|
Yield;
|
|
}
|
|
|
|
s->state = TCP_STATE_FIN_WAIT_2;
|
|
// "FIN-WAIT-2 (%d = %d)...\n", s->snd_una, s->snd_nxt;
|
|
|
|
// Now we should wait for the other side's FIN and acknowledge it
|
|
// TODO: time-out
|
|
while (s->state == TCP_STATE_FIN_WAIT_2) {
|
|
Yield;
|
|
}
|
|
|
|
// Still connected? RST it!
|
|
if (TcpIsSynchronizedState(s->state)) {
|
|
TcpSend2(s, TCP_FLAG_RST);
|
|
}
|
|
|
|
// Free backlog
|
|
CTcpSocket* backlog = s->backlog_first;
|
|
CTcpSocket* backlog2;
|
|
|
|
while (backlog) {
|
|
backlog2 = backlog->backlog_next;
|
|
close(backlog);
|
|
backlog = backlog2;
|
|
}
|
|
|
|
if (s->local_port)
|
|
if (!RemoveTcpSocketFromList(s))
|
|
tcp_bound_sockets[s->local_port] = NULL;
|
|
|
|
Free(s->recv_buf);
|
|
Free(s);
|
|
return 0;
|
|
}
|
|
|
|
I64 TcpSocketConnect(CTcpSocket* s, sockaddr* addr, I64 addrlen) {
|
|
if (addrlen < sizeof(sockaddr_in))
|
|
return -1;
|
|
|
|
if (s->state != TCP_STATE_CLOSED)
|
|
return -1;
|
|
|
|
sockaddr_in* addr_in = addr;
|
|
|
|
U16 local_port = 0x8000 + (tcp_next_source_port & 0x7fff);
|
|
tcp_next_source_port++;
|
|
|
|
// TODO: address & stuff
|
|
if (tcp_bound_sockets[local_port] != NULL)
|
|
return -1;
|
|
|
|
tcp_bound_sockets[local_port] = s;
|
|
|
|
s->local_addr = IPv4GetAddress();
|
|
s->local_port = local_port;
|
|
s->remote_addr = ntohl(addr_in->sin_addr.s_addr);
|
|
s->remote_port = ntohs(addr_in->sin_port);
|
|
|
|
s->snd_una = 0;
|
|
s->snd_nxt = 0;
|
|
s->snd_wnd = 0;
|
|
s->mss = TCP_DEFAULT_MSS;
|
|
|
|
s->rcv_nxt = 0;
|
|
s->rcv_wnd = TCP_WINDOW_SIZE;
|
|
|
|
s->conntime = tS;
|
|
|
|
TcpSend2(s, TCP_FLAG_SYN);
|
|
s->state = TCP_STATE_SYN_SENT;
|
|
|
|
// TODO: TcpSetTimeout
|
|
I64 maxtime = cnts.jiffies + TCP_CONNECT_TIMEOUT * JIFFY_FREQ / 1000;
|
|
|
|
while (cnts.jiffies < maxtime) {
|
|
if (s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_CLOSED)
|
|
break;
|
|
else
|
|
Yield;
|
|
}
|
|
|
|
if (s->state != TCP_STATE_ESTABLISHED)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
I64 TcpSocketListen(CTcpSocket* s, I64 backlog) {
|
|
if (s->state != TCP_STATE_CLOSED)
|
|
return -1;
|
|
|
|
// Enter listen state. If a SYN packet arrives, it will be processed by TcpHandler,
|
|
// which opens the connection and puts the new socket into the listening socket's accept backlog.
|
|
s->state = TCP_STATE_LISTEN;
|
|
s->backlog_remaining = backlog;
|
|
|
|
return 0;
|
|
}
|
|
|
|
I64 TcpSocketRecvfrom(CTcpSocket* s, U8* buf, I64 len, I64 flags, sockaddr* src_addr, I64 addrlen) {
|
|
no_warn flags;
|
|
no_warn src_addr; // FIXME
|
|
no_warn addrlen;
|
|
//"TcpSocketRecvfrom\n";
|
|
// If we are ready to receive data, but there is none currently, block until we receive is some.
|
|
// TODO: checking for FIN-WAIT-1 here is not so useful, since it only exists while we are in Close()
|
|
while ((s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_FIN_WAIT_1)
|
|
&& s->recv_buf_read_pos == s->recv_buf_write_pos) {
|
|
TcpSocketCheckSendBufs(s);
|
|
Yield;
|
|
}
|
|
|
|
// TODO: this works for now, but we should be still able to receive data
|
|
// in connection-closing states
|
|
if (((s->state != TCP_STATE_ESTABLISHED || s->state == TCP_STATE_FIN_WAIT_1)
|
|
&& s->recv_buf_read_pos == s->recv_buf_write_pos)
|
|
|| len == 0)
|
|
return 0;
|
|
|
|
I64 read_pos = s->recv_buf_read_pos;
|
|
I64 write_pos = s->recv_buf_write_pos;
|
|
|
|
//I64 avail = (write_pos - read_pos) & (s->recv_buf_size);
|
|
I64 read_total = 0;
|
|
I64 step;
|
|
|
|
if (write_pos < read_pos) {
|
|
// We can read up to the end of the buffer
|
|
step = s->recv_buf_size - read_pos;
|
|
|
|
if (step > len)
|
|
step = len;
|
|
|
|
//"Read %d from %d..end\n", step, read_pos;
|
|
MemCpy(buf, s->recv_buf + read_pos, step);
|
|
buf += step;
|
|
len -= step;
|
|
read_pos = (read_pos + step) & (s->recv_buf_size - 1);
|
|
read_total += step;
|
|
|
|
// at this point, (len == 0 || read_pos == 0) must be true
|
|
}
|
|
|
|
if (len) {
|
|
step = write_pos - read_pos;
|
|
|
|
if (step > len)
|
|
step = len;
|
|
|
|
//"Read %d from start+%d..\n", step, read_pos;
|
|
MemCpy(buf, s->recv_buf + read_pos, step);
|
|
buf += step;
|
|
len -= step;
|
|
read_pos += step;
|
|
read_total += step;
|
|
}
|
|
|
|
s->recv_buf_read_pos = read_pos;
|
|
return read_total;
|
|
}
|
|
|
|
// This function blocks until at least some data is sent.
|
|
// Then it returns if the transmission window or outgoing buffers are full.
|
|
I64 TcpSocketSendto(CTcpSocket* s, U8* buf, I64 len, I64 flags, sockaddr_in* dest_addr, I64 addrlen) {
|
|
no_warn dest_addr; // TODO: should be validated instead, no?
|
|
no_warn addrlen;
|
|
no_warn flags;
|
|
|
|
I64 sent_total = 0;
|
|
|
|
while ((s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_CLOSE_WAIT) && len) {
|
|
I64 can_send = (s->snd_una + s->snd_wnd - s->snd_nxt) & 0xffffffff;
|
|
|
|
// TODO: Keep trying
|
|
// Must be tied to a timeout; see RFC793/Managing-the-Window
|
|
//if (s->snd_wnd == 0)
|
|
// can_send = 1;
|
|
|
|
if (can_send == 0) {
|
|
if (sent_total > 0)
|
|
break;
|
|
else {
|
|
// Check unacknowledged outgoing packets, re-transmit as needed
|
|
TcpSocketCheckSendBufs(s);
|
|
Yield;
|
|
}
|
|
}
|
|
else {
|
|
if (can_send > len)
|
|
can_send = len;
|
|
|
|
if (can_send > s->mss)
|
|
can_send = s->mss;
|
|
|
|
if (TcpSendData2(s, TCP_FLAG_ACK, buf, can_send) < 0) {
|
|
// No out-buffers available! Handle in the same way as full window:
|
|
// stall until some of the outdoing data is acknowledged.
|
|
if (sent_total > 0)
|
|
break;
|
|
else {
|
|
// Check unacknowledged outgoing packets, re-transmit as needed
|
|
TcpSocketCheckSendBufs(s);
|
|
Yield;
|
|
}
|
|
}
|
|
else {
|
|
buf += can_send;
|
|
len -= can_send;
|
|
}
|
|
}
|
|
}
|
|
|
|
return sent_total;
|
|
}
|
|
|
|
I64 TcpSocketSetsockopt(CTcpSocket* s, I64 level, I64 optname, U8* optval, I64 optlen) {
|
|
/*if (level == SOL_SOCKET && optname == SO_RCVTIMEO_MS && optlen == 8) {
|
|
s->rcvtimeo_ms = *(optval(I64*));
|
|
return 0;
|
|
}*/
|
|
|
|
no_warn s;
|
|
no_warn level;
|
|
no_warn optname;
|
|
no_warn optval;
|
|
no_warn optlen;
|
|
|
|
return -1;
|
|
}
|
|
|
|
CTcpSocket* TcpSocket(U16 domain, U16 type) {
|
|
if (domain != AF_INET || type != SOCK_STREAM)
|
|
return NULL;
|
|
|
|
CTcpSocket* s = MAlloc(sizeof(CTcpSocket));
|
|
s->sock.accept = &TcpSocketAccept;
|
|
s->sock.bind = &TcpSocketBind;
|
|
s->sock.close = &TcpSocketClose;
|
|
s->sock.connect = &TcpSocketConnect;
|
|
s->sock.listen = &TcpSocketListen;
|
|
s->sock.recvfrom = &TcpSocketRecvfrom;
|
|
s->sock.sendto = &TcpSocketSendto;
|
|
s->sock.setsockopt = &TcpSocketSetsockopt;
|
|
|
|
s->state = TCP_STATE_CLOSED;
|
|
|
|
s->send_buf_first = NULL;
|
|
s->send_buf_last = NULL;
|
|
|
|
s->recv_buf_size = TCP_WINDOW_SIZE;
|
|
s->recv_buf = MAlloc(s->recv_buf_size);
|
|
s->recv_buf_read_pos = 0;
|
|
s->recv_buf_write_pos = 0;
|
|
|
|
s->backlog_next = NULL;
|
|
s->backlog_first = NULL;
|
|
s->backlog_last = NULL;
|
|
s->backlog_remaining = 0;
|
|
|
|
/*s->rcvtimeo_ms = 0;
|
|
s->recv_maxtime = 0;
|
|
|
|
s->recv_buf = NULL;
|
|
s->recv_len = 0;
|
|
s->recv_addr.sin_family = AF_INET;
|
|
s->bound_to = 0;*/
|
|
return s;
|
|
}
|
|
|
|
U0 TcpSocketHandle(CTcpSocket* s, CIPv4Packet* packet, CTcpHeader* hdr, U8* data, I64 length) {
|
|
U32 seg_len = length;
|
|
|
|
if (hdr->flags & TCP_FLAG_FIN) seg_len++;
|
|
if (hdr->flags & TCP_FLAG_SYN) seg_len++;
|
|
|
|
U32 seg_seq = ntohl(hdr->seq);
|
|
|
|
if (s->state == TCP_STATE_LISTEN) {
|
|
// A new connection is being opened.
|
|
|
|
if ((hdr->flags & TCP_FLAG_SYN) && s->backlog_remaining > 0) {
|
|
//"SYN in from %08X:%d => %08X:%d.\n", packet->source_ip, ntohs(hdr->source_port),
|
|
// packet->dest_ip, ntohs(hdr->dest_port);
|
|
CTcpSocket* new_socket = TcpSocket(AF_INET, SOCK_STREAM);
|
|
|
|
new_socket->local_addr = IPv4GetAddress();
|
|
new_socket->local_port = s->local_port;
|
|
new_socket->remote_addr = packet->source_ip;
|
|
new_socket->remote_port = ntohs(hdr->source_port);
|
|
|
|
new_socket->snd_una = 0;
|
|
new_socket->snd_nxt = 0;
|
|
new_socket->snd_wnd = 0;
|
|
new_socket->mss = TCP_DEFAULT_MSS;
|
|
|
|
new_socket->rcv_nxt = ++seg_seq;
|
|
new_socket->rcv_wnd= TCP_WINDOW_SIZE;
|
|
|
|
new_socket->conntime = tS;
|
|
|
|
TcpSend2(new_socket, TCP_FLAG_SYN | TCP_FLAG_ACK);
|
|
new_socket->state = TCP_STATE_SYN_RECEIVED;
|
|
|
|
AddTcpSocketToList(new_socket);
|
|
|
|
if (s->backlog_last)
|
|
s->backlog_last->backlog_next = new_socket;
|
|
else
|
|
s->backlog_first = new_socket;
|
|
|
|
s->backlog_last = new_socket;
|
|
s->backlog_remaining--;
|
|
}
|
|
else {
|
|
//"REJ %08X:%d (as %08X:%d)\n", packet->source_ip, ntohs(hdr->source_port),
|
|
// packet->dest_ip, ntohs(hdr->dest_port);
|
|
TcpSend(packet->dest_ip, ntohs(hdr->dest_port), packet->source_ip, ntohs(hdr->source_port),
|
|
seg_seq + 1, seg_seq + 1, TCP_FLAG_ACK | TCP_FLAG_RST);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (s->state == TCP_STATE_CLOSED)
|
|
return;
|
|
|
|
Bool must_ack = FALSE;
|
|
|
|
// Process SYN
|
|
if (hdr->flags & TCP_FLAG_SYN) {
|
|
s->rcv_nxt = ++seg_seq;
|
|
//"Reset ACK to %d\n", s->ack;
|
|
|
|
must_ack = TRUE;
|
|
}
|
|
|
|
// Validate SEQ
|
|
Bool valid_seq;
|
|
|
|
if (seg_len == 0 && s->rcv_wnd == 0) {
|
|
valid_seq = (seg_seq == s->rcv_nxt);
|
|
}
|
|
else {
|
|
// At least one of these must be true:
|
|
// RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
|
|
// RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
|
|
I64 rel_seq = ((seg_seq - s->rcv_nxt) & 0xffffffff);
|
|
I64 rel_seq_end = ((seg_seq + seg_len - 1 - s->rcv_nxt) & 0xffffffff);
|
|
|
|
if (rel_seq < s->rcv_wnd || rel_seq_end < s->rcv_wnd)
|
|
valid_seq = TRUE;
|
|
else
|
|
valid_seq = FALSE;
|
|
}
|
|
|
|
if (!valid_seq)
|
|
"SEQ error: seg_seq %d, seg_len %d, rcv_nxt %d, rcv_wnd %d\n", seg_seq, seg_len, s->rcv_nxt, s->rcv_wnd;
|
|
|
|
// Process ACK
|
|
if (hdr->flags & TCP_FLAG_ACK) {
|
|
U32 seg_ack = ntohl(hdr->ack);
|
|
// ACK is acceptable iff SND.UNA < SEG.ACK =< SND.NXT
|
|
|
|
I64 rel_ack = ((seg_ack - s->snd_una) & 0xffffffff);
|
|
I64 rel_nxt = ((s->snd_nxt - s->snd_una) & 0xffffffff);
|
|
|
|
// RFC 793 is poorly worded in this regard, unacceptable ACK
|
|
// is not the opposite of an acceptible (= new) ACK!
|
|
// TODO: Instead of zero, we should compare rel_ack to some NEGATIVE_CONSTANT,
|
|
// so that we don't unnecessarily try to correct every slightly delayed ACK
|
|
if (/*0 < rel_ack &&*/ rel_ack <= rel_nxt) {
|
|
TcpSocketAckSendBufs(s, seg_ack);
|
|
|
|
// Accept ACK
|
|
s->snd_una = seg_ack;
|
|
|
|
if (s->state == TCP_STATE_SYN_SENT && (hdr->flags & TCP_FLAG_SYN)) {
|
|
s->state = TCP_STATE_ESTABLISHED;
|
|
s->srtt = tS - s->conntime;
|
|
//"Initial RTT: %f ms", s->srtt * 1000;
|
|
}
|
|
else if (s->state == TCP_STATE_SYN_RECEIVED) {
|
|
//"Connection established.\n";
|
|
s->state = TCP_STATE_ESTABLISHED;
|
|
s->srtt = tS - s->conntime;
|
|
//"Initial RTT: %f ms", s->srtt * 1000;
|
|
}
|
|
}
|
|
else {
|
|
// Unacceptable ACK
|
|
"Bad ACK; state %d, seg_ack %d, snd_nxt %d\n", s->state, seg_ack, s->snd_nxt;
|
|
|
|
if (s->state == TCP_STATE_LISTEN || s->state == TCP_STATE_SYN_SENT
|
|
|| s->state == TCP_STATE_SYN_RECEIVED) {
|
|
// Reset
|
|
TcpSend(packet->dest_ip, ntohs(hdr->dest_port), packet->source_ip, ntohs(hdr->source_port),
|
|
seg_ack, seg_seq + seg_len, TCP_FLAG_ACK | TCP_FLAG_RST);
|
|
}
|
|
else if (TcpIsSynchronizedState(s->state)) {
|
|
// Send a 'corrective' ACK
|
|
must_ack = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process RST
|
|
if (hdr->flags & TCP_FLAG_RST) {
|
|
if ((s->state == TCP_STATE_SYN_SENT)) {
|
|
// If acknowledged
|
|
if (s->snd_una == s->snd_nxt) {
|
|
"Connection refused\n";
|
|
s->state = TCP_STATE_CLOSED;
|
|
return;
|
|
}
|
|
}
|
|
else {
|
|
if (valid_seq) {
|
|
"Connection reset by peer\n";
|
|
s->state = TCP_STATE_CLOSED;
|
|
return;
|
|
}
|
|
}
|
|
|
|
"Spurious RST\n";
|
|
}
|
|
|
|
// FIXME check remote addr & port
|
|
|
|
// Process data
|
|
if (valid_seq) {
|
|
s->snd_wnd = hdr->window_size;
|
|
|
|
if (s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_FIN_WAIT_1) {
|
|
I64 write_pos = s->recv_buf_write_pos;
|
|
//"%d in @ %d", length, write_pos;
|
|
|
|
// Skip retransmitted bytes
|
|
while (length && seg_seq != s->rcv_nxt) {
|
|
seg_seq = (seg_seq + 1) & 0xffffffff;
|
|
data++;
|
|
length--;
|
|
}
|
|
|
|
// ugh!
|
|
I64 i = 0;
|
|
for (i = 0; i < length; i++) {
|
|
I64 next_pos = (write_pos + 1) & (s->recv_buf_size - 1);
|
|
|
|
if (next_pos == s->recv_buf_read_pos)
|
|
break;
|
|
|
|
s->recv_buf[write_pos] = data[i];
|
|
write_pos = next_pos;
|
|
}
|
|
|
|
s->recv_buf_write_pos = write_pos;
|
|
s->rcv_nxt += i;
|
|
//"; %d saved\n", i;
|
|
|
|
if (i > 0)
|
|
must_ack = TRUE;
|
|
|
|
if (hdr->flags & TCP_FLAG_FIN) {
|
|
must_ack = TRUE;
|
|
s->rcv_nxt++;
|
|
|
|
if (s->state == TCP_STATE_ESTABLISHED) {
|
|
s->state = TCP_STATE_CLOSE_WAIT;
|
|
}
|
|
else if (s->state == TCP_STATE_FIN_WAIT_1 || s->state == TCP_STATE_FIN_WAIT_2) {
|
|
s->state = TCP_STATE_TIME_WAIT;
|
|
}
|
|
//else { ?? }
|
|
}
|
|
}
|
|
}
|
|
|
|
if (must_ack) {
|
|
TcpSend2(s, TCP_FLAG_ACK);
|
|
}
|
|
}
|
|
|
|
I64 TcpHandler(CIPv4Packet* packet) {
|
|
CTcpHeader* hdr;
|
|
U8* data;
|
|
I64 length;
|
|
|
|
I64 error = TcpParsePacket(&hdr, &data, &length, packet);
|
|
|
|
if (error < 0)
|
|
return error;
|
|
|
|
U16 dest_port = ntohs(hdr->dest_port);
|
|
//"%u => %p\n", dest_port, tcp_bound_sockets[dest_port];
|
|
|
|
CTcpSocket* s = GetTcpSocketFromList(packet, hdr);
|
|
if (!s)
|
|
s = tcp_bound_sockets[dest_port];
|
|
|
|
// FIXME: should also check that bound address is INADDR_ANY,
|
|
// OR packet dest IP matches bound address
|
|
if (s != NULL) {
|
|
TcpSocketHandle(s, packet, hdr, data, length);
|
|
}
|
|
else {
|
|
// TODO: Send RST as per RFC793/Reset-Generation
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
U0 TcpInit() {
|
|
I64 i;
|
|
tcp_bound_sockets = MAlloc(65536 * sizeof(CTcpSocket*));
|
|
MemSet(tcp_bound_sockets, 0, 65536 * sizeof(CTcpSocket*));
|
|
tcp_socket_list = MAlloc(65536 * sizeof(CTcpSocketListItem*));
|
|
for (i=0; i<65536; i++)
|
|
{
|
|
tcp_socket_list[i] = CAlloc(sizeof(CTcpSocketListItem));
|
|
}
|
|
}
|
|
|
|
TcpInit;
|
|
RegisterL4Protocol(IP_PROTO_TCP, &TcpHandler);
|
|
RegisterSocketClass(AF_INET, SOCK_STREAM, &TcpSocket);
|