diff --git a/Net/Arp.HC b/Net/Arp.HC new file mode 100644 index 0000000..83a5086 --- /dev/null +++ b/Net/Arp.HC @@ -0,0 +1,128 @@ +// Not a Network Layer protocol, but it is encapsulated in L2 frames, which +// makes it L3 for our purposes + +#define ARP_REQUEST 0x01 +#define ARP_REPLY 0x02 + +class CArpHeader { + U16 htype; + U16 ptype; + U8 hlen; + U8 plen; + U16 oper; + U8 sha[6]; + U32 spa; + U8 tha[6]; + U32 tpa; +}; + +class CArpCacheEntry { + CArpCacheEntry *next; + U32 ip; + U8 mac[6]; +}; + +// Stored in network order +static U32 arp_my_ipv4_n = 0; + +// TODO: use a Hash table +static CArpCacheEntry *arp_cache = NULL; + +// IPs are in network order +I64 ArpSend(U16 oper, U8 *dest_mac, U8 *sender_mac, U32 sender_ip_n, + U8 *target_mac, U32 target_ip_n) { + U8 *frame; + + I64 index = EthernetFrameAlloc(&frame, sender_mac, dest_mac, ETHERTYPE_ARP, + sizeof(CArpHeader), 0); + + if (index < 0) + return index; + + CArpHeader *hdr = frame; + hdr->htype = htons(1); + hdr->ptype = htons(ETHERTYPE_IPV4); + hdr->hlen = 6; + hdr->plen = 4; + hdr->oper = htons(oper); + MemCpy(hdr->sha, sender_mac, 6); + hdr->spa = sender_ip_n; + MemCpy(hdr->tha, target_mac, 6); + hdr->tpa = target_ip_n; + + return EthernetFrameFinish(index); +} + +U0 ArpSetIPv4Address(U32 addr) { + arp_my_ipv4_n = htonl(addr); + + // Broadcast our new address + ArpSend(ARP_REPLY, eth_broadcast, EthernetGetAddress(), arp_my_ipv4_n, + eth_null, arp_my_ipv4_n); +} + +CArpCacheEntry *ArpCacheFindByIP(U32 ip) { + CArpCacheEntry *e = arp_cache; + + while (e) { + if (e->ip == ip) + return e; + e = e->next; + } + + return e; +} + +CArpCacheEntry *ArpCachePut(U32 ip, U8 *mac) { + CArpCacheEntry *e = ArpCacheFindByIP(ip); + + if (!e) { + //"ARP: add entry for %08X\n", ip; + e = MAlloc(sizeof(CArpCacheEntry)); + e->next = arp_cache; + e->ip = ip; + MemCpy(e->mac, mac, 6); + arp_cache = e; + } + // FIXME: else replace! + + return e; +} + +I64 ArpHandler(CEthFrame *eth_frame) { + if (eth_frame->ethertype != ETHERTYPE_ARP) + return -1; + + // FIXME[obecebo]: this blocks responding to ARP_REQUEST? [2019/08/05] + if (eth_frame->length < sizeof(CArpHeader)) + return -1; + + CArpHeader *hdr = eth_frame->data; + U16 oper = ntohs(hdr->oper); + + //"ARP: htype %d, ptype %d, hlen %d, plen %d, oper %d\n", + // ntohs(hdr->htype), ntohs(hdr->ptype), hdr->hlen, hdr->plen, oper; + //" spa %08X, tpa %08X\n", ntohl(hdr->spa), ntohl(hdr->tpa); + + if (ntohs(hdr->htype) != 1 || ntohs(hdr->ptype) != ETHERTYPE_IPV4 || + hdr->hlen != 6 || hdr->plen != 4) + return -1; + + if (oper == ARP_REQUEST) { + // Not too sure about this line, but it seems necessary in WiFi networks, + // because the wireless device won't hear our Ethernet broadcast when we + // Request + // ArpCachePut(ntohl(hdr->spa), hdr->sha); + + if (hdr->tpa == arp_my_ipv4_n) { + ArpSend(ARP_REPLY, hdr->sha, EthernetGetAddress(), arp_my_ipv4_n, + hdr->sha, hdr->spa); + } + } else if (oper == ARP_REPLY) { + ArpCachePut(ntohl(hdr->spa), hdr->sha); + } + + return 0; +} + +RegisterL3Protocol(ETHERTYPE_ARP, &ArpHandler); diff --git a/Net/Dhcp.HC b/Net/Dhcp.HC new file mode 100644 index 0000000..46af956 --- /dev/null +++ b/Net/Dhcp.HC @@ -0,0 +1,281 @@ + +#define BOOTREQUEST 0x01 +#define BOOTREPLY 0x02 + +#define HTYPE_ETHERNET 0x01 + +#define HLEN_ETHERNET 6 + +#define DHCP_OPTION_SUBNET_MASK 1 +#define DHCP_OPTION_ROUTER 3 +#define DHCP_OPTION_DNS 6 +#define DHCP_OPTION_DOMAIN_NAME 15 +#define DHCP_OPTION_REQUESTED_IP 50 +#define DHCP_OPTION_MSGTYPE 53 +#define DHCP_OPTION_SERVER_ID 54 +#define DHCP_OPTION_PARAMLIST 55 + +#define DHCP_COOKIE 0x63825363 +#define DHCP_MSGTYPE_DISCOVER 0x01 +#define DHCP_MSGTYPE_OFFER 0x02 +#define DHCP_MSGTYPE_REQUEST 0x03 +#define DHCP_MSGTYPE_ACK 0x05 + +class CDhcpHeader { + U8 op; + U8 htype; + U8 hlen; + U8 hops; + U32 xid; + U16 secs; + U16 flags; + U32 ciaddr; + U32 yiaddr; + U32 siaddr; + U32 giaddr; + U8 chaddr[16]; + U8 sname[64]; + U8 file[128]; +}; + +class CDhcpDiscoverOptions { + U32 cookie; + // DHCP Message Type + U8 dmt_type; + U8 dmt_length; + U8 dmt; + // DHCP Parameter Request List + U8 prl_type; + U8 prl_length; + U8 prl[4]; + + U8 end; +}; + +class CDhcpRequestOptions { + U32 cookie; + // DHCP Message Type + U8 dmt_type; + U8 dmt_length; + U8 dmt; + // DHCP Requested IP + U8 requested_ip_type; + U8 requested_ip_length; + U32 requested_ip; + // DHCP Server Identifier + U8 server_id_type; + U8 server_id_length; + U32 server_id; + + U8 end; +}; + +U32 DhcpBeginTransaction() { return RandU32(); } + +I64 DhcpSendDiscover(U32 xid) { + U8 *frame; + I64 index = + UdpPacketAlloc(&frame, 0x00000000, 68, 0xffffffff, 67, + sizeof(CDhcpHeader) + sizeof(CDhcpDiscoverOptions)); + + if (index < 0) + return index; + + CDhcpHeader *dhcp = frame; + MemSet(dhcp, 0, sizeof(CDhcpHeader)); + dhcp->op = BOOTREQUEST; + dhcp->htype = HTYPE_ETHERNET; + dhcp->hlen = HLEN_ETHERNET; + dhcp->hops = 0; + dhcp->xid = htonl(xid); + dhcp->secs = 0; + dhcp->flags = htons(0x8000); + dhcp->ciaddr = 0; + dhcp->yiaddr = 0; + dhcp->siaddr = 0; + dhcp->giaddr = 0; + MemCpy(dhcp->chaddr, EthernetGetAddress(), 6); + + CDhcpDiscoverOptions *opts = frame + sizeof(CDhcpHeader); + opts->cookie = htonl(DHCP_COOKIE); + opts->dmt_type = DHCP_OPTION_MSGTYPE; + opts->dmt_length = 1; + opts->dmt = DHCP_MSGTYPE_DISCOVER; + opts->prl_type = DHCP_OPTION_PARAMLIST; + opts->prl_length = 4; + opts->prl[0] = DHCP_OPTION_SUBNET_MASK; + opts->prl[1] = DHCP_OPTION_ROUTER; + opts->prl[2] = DHCP_OPTION_DNS; + opts->prl[3] = DHCP_OPTION_DOMAIN_NAME; + opts->end = 0xff; + + return UdpPacketFinish(index); +} + +I64 DhcpSendRequest(U32 xid, U32 requested_ip, U32 siaddr) { + U8 *frame; + I64 index = UdpPacketAlloc(&frame, 0x00000000, 68, 0xffffffff, 67, + sizeof(CDhcpHeader) + sizeof(CDhcpRequestOptions)); + + if (index < 0) + return index; + + CDhcpHeader *dhcp = frame; + MemSet(dhcp, 0, sizeof(CDhcpHeader)); + dhcp->op = BOOTREQUEST; + dhcp->htype = HTYPE_ETHERNET; + dhcp->hlen = HLEN_ETHERNET; + dhcp->hops = 0; + dhcp->xid = htonl(xid); + dhcp->secs = 0; + dhcp->flags = htons(0x0000); + dhcp->ciaddr = 0; + dhcp->yiaddr = 0; + dhcp->siaddr = htonl(siaddr); + dhcp->giaddr = 0; + MemCpy(dhcp->chaddr, EthernetGetAddress(), 6); + + CDhcpRequestOptions *opts = frame + sizeof(CDhcpHeader); + opts->cookie = htonl(DHCP_COOKIE); + opts->dmt_type = DHCP_OPTION_MSGTYPE; + opts->dmt_length = 1; + opts->dmt = DHCP_MSGTYPE_REQUEST; + opts->requested_ip_type = DHCP_OPTION_REQUESTED_IP; + opts->requested_ip_length = 4; + opts->requested_ip = htonl(requested_ip); + opts->server_id_type = DHCP_OPTION_SERVER_ID; + opts->server_id_length = 4; + opts->server_id = htonl(siaddr); + opts->end = 0xff; + + return UdpPacketFinish(index); +} + +I64 DhcpParseBegin(U8 **data_inout, I64 *length_inout, CDhcpHeader **hdr_out) { + U8 *data = *data_inout; + I64 length = *length_inout; + + if (length < sizeof(CDhcpHeader) + 4) { + //"DhcpParseBegin: too short\n"; + return -1; + } + + U32 *p_cookie = data + sizeof(CDhcpHeader); + + if (ntohl(*p_cookie) != DHCP_COOKIE) { + //"DhcpParseBegin: cookie %08Xh != %08Xh\n", ntohl(*p_cookie), DHCP_COOKIE; + return -1; + } + + *hdr_out = data; + *data_inout = data + (sizeof(CDhcpHeader) + 4); + *length_inout = length - (sizeof(CDhcpHeader) + 4); + return 0; +} + +I64 DhcpParseOption(U8 **data_inout, I64 *length_inout, U8 *type_out, + U8 *value_length_out, U8 **value_out) { + U8 *data = *data_inout; + I64 length = *length_inout; + + if (length < 2 || length < 2 + data[1]) { + //"DhcpParseOption: too short\n"; + return -1; + } + + if (data[0] == 0xff) + return 0; + + *type_out = data[0]; + *value_length_out = data[1]; + *value_out = data + 2; + + *data_inout = data + (2 + *value_length_out); + *length_inout = length - (2 + *value_length_out); + return data[0]; +} + +I64 DhcpParseOffer(U32 xid, U8 *data, I64 length, U32 *yiaddr_out, + U32 *dns_ip_out, U32 *router_ip_out, U32 *subnet_mask_out) { + CDhcpHeader *hdr; + I64 error = DhcpParseBegin(&data, &length, &hdr); + if (error < 0) + return error; + + if (ntohl(hdr->xid) != xid) + return -1; + + Bool have_type = FALSE; + Bool have_dns = FALSE; + Bool have_router = FALSE; + Bool have_subnet = FALSE; + + while (length) { + U8 type, value_length; + U8 *value; + + error = DhcpParseOption(&data, &length, &type, &value_length, &value); + //"%d, %02Xh, %d, %02Xh...\n", error, type, value_length, value[0]; + if (error < 0) + return error; + if (error == 0) + break; + + if (type == DHCP_OPTION_MSGTYPE && value_length == 1 && + value[0] == DHCP_MSGTYPE_OFFER) + have_type = TRUE; + + if (type == DHCP_OPTION_DNS && value_length == 4) { + *dns_ip_out = ntohl(*(value(U32 *))); + have_dns = TRUE; + } + + if (type == DHCP_OPTION_ROUTER && value_length == 4) { + *router_ip_out = ntohl(*(value(U32 *))); + have_router = TRUE; + } + + if (type == DHCP_OPTION_SUBNET_MASK && value_length == 4) { + *subnet_mask_out = ntohl(*(value(U32 *))); + have_subnet = TRUE; + } + } + + //"DhcpParseOffer: end %d %d %d %d\n", have_type, have_dns, have_subnet, + // have_router; + + // VirtualBox host network doesn't provide DNS or ROUTER, so this has to do + if (have_type && have_subnet) { + *yiaddr_out = ntohl(hdr->yiaddr); + return 0; + } else + return -1; +} + +I64 DhcpParseAck(U32 xid, U8 *data, I64 length) { + CDhcpHeader *hdr; + I64 error = DhcpParseBegin(&data, &length, &hdr); + if (error < 0) + return error; + + if (ntohl(hdr->xid) != xid) + return -1; + + while (length) { + U8 type, value_length; + U8 *value; + + error = DhcpParseOption(&data, &length, &type, &value_length, &value); + //"%d, %02Xh, %d, %02Xh...\n", error, type, value_length, value[0]; + if (error < 0) + return error; + if (error == 0) + break; + + if (type == DHCP_OPTION_MSGTYPE && value_length == 1 && + value[0] == DHCP_MSGTYPE_ACK) + return 0; + } + + return -1; +} diff --git a/Net/Dns.HC b/Net/Dns.HC new file mode 100644 index 0000000..b8003f1 --- /dev/null +++ b/Net/Dns.HC @@ -0,0 +1,539 @@ +#define DNS_RCODE_NO_ERROR 0 +#define DNS_RCODE_FORMAT_ERROR 1 +#define DNS_RCODE_SERVER_FAILURE 2 +#define DNS_RCODE_NAME_ERROR 3 +#define DNS_RCODE_NOT_IMPLEMENTED 5 +#define DNS_RCODE_REFUSED 6 + +#define DNS_FLAG_RA 0x0080 +#define DNS_FLAG_RD 0x0100 +#define DNS_FLAG_TC 0x0200 +#define DNS_FLAG_AA 0x0400 + +#define DNS_OP_QUERY 0 +#define DNS_OP_IQUERY 1 +#define DNS_OP_STATUS 2 + +#define DNS_FLAG_QR 0x8000 + +// http://www.freesoft.org/CIE/RFC/1035/14.htm +#define DNS_TYPE_A 1 +#define DNS_TYPE_NS 2 +#define DNS_TYPE_CNAME 5 +#define DNS_TYPE_PTR 12 +#define DNS_TYPE_MX 15 +#define DNS_TYPE_TXT 16 + +// http://www.freesoft.org/CIE/RFC/1035/16.htm +#define DNS_CLASS_IN 1 + +#define DNS_TIMEOUT 5000 +#define DNS_MAX_RETRIES 3 + +class CDnsCacheEntry { + CDnsCacheEntry *next; + U8 *hostname; + addrinfo info; + // TODO: honor TTL +}; + +class CDnsHeader { + U16 id; + U16 flags; + U16 qdcount; + U16 ancount; + U16 nscount; + U16 arcount; +}; + +class CDnsDomainName { + U8 **labels; + I64 num_labels; +} + +class CDnsQuestion { + CDnsQuestion *next; + + CDnsDomainName qname; + U16 qtype; + U16 qclass; +}; + +class CDnsRR { + CDnsRR *next; + + CDnsDomainName name; + U16 type; + U16 class_; + U32 ttl; + U16 rdlength; + U8 *rdata; +}; + +// TODO: use a Hash table +static CDnsCacheEntry *dns_cache = NULL; + +static U32 dns_ip = 0; + +static CDnsCacheEntry *DnsCacheFind(U8 *hostname) { + CDnsCacheEntry *e = dns_cache; + + while (e) { + if (!StrCmp(e->hostname, hostname)) + return e; + + e = e->next; + } + + return e; +} + +static CDnsCacheEntry *DnsCachePut(U8 *hostname, addrinfo *info) { + CDnsCacheEntry *e = DnsCacheFind(hostname); + + if (!e) { + e = MAlloc(sizeof(CDnsCacheEntry)); + e->next = dns_cache; + e->hostname = StrNew(hostname); + AddrInfoCopy(&e->info, info); + + dns_cache = e; + } + + return e; +} + +static I64 DnsCalcQuestionSize(CDnsQuestion *question) { + I64 size = 0; + I64 i; + for (i = 0; i < question->qname.num_labels; i++) { + size += 1 + StrLen(question->qname.labels[i]); + } + return size + 1 + 4; +} + +static U0 DnsSerializeQuestion(U8 *buf, CDnsQuestion *question) { + I64 i; + + for (i = 0; i < question->qname.num_labels; i++) { + U8 *label = question->qname.labels[i]; + *(buf++) = StrLen(label); + + while (*label) + *(buf++) = *(label++); + } + + *(buf++) = 0; + *(buf++) = (question->qtype >> 8); + *(buf++) = (question->qtype & 0xff); + *(buf++) = (question->qclass >> 8); + *(buf++) = (question->qclass & 0xff); +} + +static I64 DnsSendQuestion(U16 id, U16 local_port, CDnsQuestion *question) { + if (!dns_ip) + return -1; + + U8 *frame; + I64 index = + UdpPacketAlloc(&frame, IPv4GetAddress(), local_port, dns_ip, 53, + sizeof(CDnsHeader) + DnsCalcQuestionSize(question)); + + if (index < 0) + return index; + + U16 flags = (DNS_OP_QUERY << 11) | DNS_FLAG_RD; + + CDnsHeader *hdr = frame; + hdr->id = htons(id); + hdr->flags = htons(flags); + hdr->qdcount = htons(1); + hdr->ancount = 0; + hdr->nscount = 0; + hdr->arcount = 0; + + DnsSerializeQuestion(frame + sizeof(CDnsHeader), question); + + return UdpPacketFinish(index); +} + +static I64 DnsParseDomainName(U8 *packet_data, I64 packet_length, + U8 **data_inout, I64 *length_inout, + CDnsDomainName *name_out) { + U8 *data = *data_inout; + I64 length = *length_inout; + Bool jump_taken = FALSE; + + if (length < 1) { + //"DnsParseDomainName: EOF\n"; + return -1; + } + + name_out->labels = MAlloc(16 * sizeof(U8 *)); + name_out->num_labels = 0; + + U8 *name_buf = MAlloc(256); + name_out->labels[0] = name_buf; + + while (length) { + I64 label_len = *(data++); + length--; + + if (label_len == 0) { + break; + } else if (label_len >= 192) { + label_len &= 0x3f; + + if (!jump_taken) { + *data_inout = data + 1; + *length_inout = length - 1; + jump_taken = TRUE; + } + + //"jmp %d\n", ((label_len << 8) | *data); + + data = packet_data + ((label_len << 8) | *data); + length = packet_data + packet_length - data; + } else { + if (length < label_len) + return -1; + + MemCpy(name_buf, data, label_len); + data += label_len; + length -= label_len; + + name_buf[label_len] = 0; + //"%d bytes => %s\n", label_len, name_buf; + name_out->labels[name_out->num_labels++] = name_buf; + + name_buf += label_len + 1; + } + } + + if (!jump_taken) { + *data_inout = data; + *length_inout = length; + } + + return 0; +} + +static I64 DnsParseQuestion(U8 *packet_data, I64 packet_length, U8 **data_inout, + I64 *length_inout, CDnsQuestion *question_out) { + I64 error = DnsParseDomainName(packet_data, packet_length, data_inout, + length_inout, &question_out->qname); + + if (error < 0) + return error; + + U8 *data = *data_inout; + I64 length = *length_inout; + + if (length < 4) + return -1; + + question_out->next = NULL; + question_out->qtype = (data[1] << 8) | data[0]; + question_out->qclass = (data[3] << 8) | data[2]; + + //"DnsParseQuestion: qtype %d, qclass %d\n", ntohs(question_out->qtype), + // ntohs(question_out->qclass); + + *data_inout = data + 4; + *length_inout = length - 4; + return 0; +} + +static I64 DnsParseRR(U8 *packet_data, I64 packet_length, U8 **data_inout, + I64 *length_inout, CDnsRR *rr_out) { + I64 error = DnsParseDomainName(packet_data, packet_length, data_inout, + length_inout, &rr_out->name); + + if (error < 0) + return error; + + U8 *data = *data_inout; + I64 length = *length_inout; + + if (length < 10) + return -1; + + rr_out->next = NULL; + MemCpy(&rr_out->type, data, 10); + + I64 record_length = 10 + ntohs(rr_out->rdlength); + + if (length < record_length) + return -1; + + rr_out->rdata = data + 10; + + //"DnsParseRR: type %d, class %d\n, ttl %d, rdlength %d\n", + // ntohs(rr_out->type), ntohs(rr_out->class_), ntohl(rr_out->ttl), + // ntohs(rr_out->rdlength); + + *data_inout = data + record_length; + *length_inout = length - record_length; + return 0; +} + +static I64 DnsParseResponse(U16 id, U8 *data, I64 length, CDnsHeader **hdr_out, + CDnsQuestion **questions_out, + CDnsRR **answers_out) { + U8 *packet_data = data; + I64 packet_length = length; + + if (length < sizeof(CDnsHeader)) { + //"DnsParseResponse: too short\n"; + return -1; + } + + CDnsHeader *hdr = data; + data += sizeof(CDnsHeader); + + if (id != 0 && ntohs(hdr->id) != id) { + //"DnsParseResponse: id %04Xh != %04Xh\n", ntohs(hdr->id), id; + return -1; + } + + I64 i; + + for (i = 0; i < htons(hdr->qdcount); i++) { + CDnsQuestion *question = MAlloc(sizeof(CDnsQuestion)); + if (DnsParseQuestion(packet_data, packet_length, &data, &length, question) < + 0) + return -1; + + question->next = *questions_out; + *questions_out = question; + } + + for (i = 0; i < htons(hdr->ancount); i++) { + CDnsRR *answer = MAlloc(sizeof(CDnsRR)); + if (DnsParseRR(packet_data, packet_length, &data, &length, answer) < 0) + return -1; + + answer->next = *answers_out; + *answers_out = answer; + } + + *hdr_out = hdr; + return 0; +} + +static U0 DnsBuildQuestion(CDnsQuestion *question, U8 *name) { + question->next = NULL; + question->qname.labels = MAlloc(16 * sizeof(U8 *)); + question->qname.labels[0] = 0; + question->qname.num_labels = 0; + question->qtype = DNS_TYPE_A; + question->qclass = DNS_CLASS_IN; + + U8 *copy = StrNew(name); + + while (*copy) { + question->qname.labels[question->qname.num_labels++] = copy; + U8 *dot = StrFirstOcc(copy, "."); + + if (dot) { + *dot = 0; + copy = dot + 1; + } else + break; + } +} + +static U0 DnsFreeQuestion(CDnsQuestion *question) { + Free(question->qname.labels[0]); +} + +static U0 DnsFreeRR(CDnsRR *rr) { Free(rr->name.labels[0]); } + +static U0 DnsFreeQuestionChain(CDnsQuestion *questions) { + while (questions) { + CDnsQuestion *next = questions->next; + DnsFreeQuestion(questions); + Free(questions); + questions = next; + } +} + +static U0 DnsFreeRRChain(CDnsRR *rrs) { + while (rrs) { + CDnsQuestion *next = rrs->next; + DnsFreeRR(rrs); + Free(rrs); + rrs = next; + } +} + +static I64 DnsRunQuery(I64 sock, U8 *name, U16 port, addrinfo **res_out) { + I64 retries = 0; + I64 timeout = DNS_TIMEOUT; + + if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO_MS, &timeout, sizeof(timeout)) < + 0) { + "$FG,6$DnsRunQuery: setsockopt failed\n$FG$"; + } + + U16 local_port = RandU16(); + + sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = htons(local_port); + addr.sin_addr.s_addr = INADDR_ANY; + + if (bind(sock, &addr, sizeof(addr)) < 0) { + "$FG,4$DnsRunQuery: failed to bind\n$FG$"; + return -1; + } + + U8 buffer[2048]; + + I64 count; + sockaddr_in addr_in; + + U16 id = RandU16(); + I64 error = 0; + + CDnsQuestion question; + DnsBuildQuestion(&question, name); + + while (1) { + error = DnsSendQuestion(id, local_port, &question); + if (error < 0) + return error; + + count = + recvfrom(sock, buffer, sizeof(buffer), 0, &addr_in, sizeof(addr_in)); + + if (count > 0) { + //"Try parse response\n"; + CDnsHeader *hdr = NULL; + CDnsQuestion *questions = NULL; + CDnsRR *answers = NULL; + + error = DnsParseResponse(id, buffer, count, &hdr, &questions, &answers); + + if (error >= 0) { + Bool have = FALSE; + + // Look for a suitable A-record in the answer + CDnsRR *answer = answers; + while (answer) { + // TODO: if there are multiple acceptable answers, + // we should pick one at random -- not just the first one + if (htons(answer->type) == DNS_TYPE_A && + htons(answer->class_) == DNS_CLASS_IN && + htons(answer->rdlength) == 4) { + addrinfo *res = MAlloc(sizeof(addrinfo)); + res->ai_flags = 0; + res->ai_family = AF_INET; + res->ai_socktype = 0; + res->ai_protocol = 0; + res->ai_addrlen = sizeof(sockaddr_in); + res->ai_addr = MAlloc(sizeof(sockaddr_in)); + res->ai_canonname = NULL; + res->ai_next = NULL; + + sockaddr_in *sa = res->ai_addr; + sa->sin_family = AF_INET; + sa->sin_port = port; + MemCpy(&sa->sin_addr.s_addr, answers->rdata, 4); + + DnsCachePut(name, res); + *res_out = res; + have = TRUE; + break; + } + + answer = answer->next; + } + + DnsFreeQuestionChain(questions); + DnsFreeRRChain(answers); + + if (have) + break; + + // At this point we could try iterative resolution, + // but all end-user DNS servers would have tried that already + + "$FG,6$DnsParseResponse: no suitable answer in reply\n$FG$"; + error = -1; + } else { + "$FG,6$DnsParseResponse: error %d\n$FG$", error; + } + } + + if (++retries == DNS_MAX_RETRIES) { + "$FG,4$DnsRunQuery: max retries reached\n$FG$"; + error = -1; + break; + } + } + + DnsFreeQuestion(&question); + return error; +} + +I64 DnsGetaddrinfo(U8 *node, U8 *service, addrinfo *hints, addrinfo **res) { + no_warn service; + no_warn hints; + + CDnsCacheEntry *cached = DnsCacheFind(node); + + if (cached) { + *res = MAlloc(sizeof(addrinfo)); + AddrInfoCopy(*res, &cached->info); + (*res)->ai_flags |= AI_CACHED; + return 0; + } + + I64 sock = socket(AF_INET, SOCK_DGRAM); + I64 error = 0; + + if (sock >= 0) { + // TODO: service should be parsed as int, specifying port number + error = DnsRunQuery(sock, node, 0, res); + + close(sock); + } else + error = -1; + + return error; +} + +U0 DnsSetResolverIPv4(U32 ip) { dns_ip = ip; } + +public +U0 Host(U8 *hostname) { + addrinfo *res = NULL; + I64 error = getaddrinfo(hostname, NULL, NULL, &res); + + if (error < 0) { + "$FG,4$getaddrinfo: error %d\n", error; + } else { + addrinfo *curr = res; + while (curr) { + U8 buffer[INET_ADDRSTRLEN]; + "flags %04Xh, family %d, socktype %d, proto %d, addrlen %d, addr %s\n", + curr->ai_flags, curr->ai_family, curr->ai_socktype, curr->ai_protocol, + curr->ai_addrlen, + inet_ntop(AF_INET, &(curr->ai_addr(sockaddr_in *))->sin_addr, buffer, + sizeof(buffer)); + curr = curr->ai_next; + } + } + + freeaddrinfo(res); +} + +U0 DnsInit() { + static CAddrResolver dns_addr_resolver; + dns_addr_resolver.getaddrinfo = &DnsGetaddrinfo; + + socket_addr_resolver = &dns_addr_resolver; +} + +DnsInit; diff --git a/Net/Ethernet.HC b/Net/Ethernet.HC new file mode 100644 index 0000000..475607e --- /dev/null +++ b/Net/Ethernet.HC @@ -0,0 +1,57 @@ +class CEthFrame { + U8 source_addr[6]; + U8 padding[2]; + U8 dest_addr[6]; + U16 ethertype; + + U8 *data; + I64 length; +}; + +class CL3Protocol { + CL3Protocol *next; + + U16 ethertype; + U8 padding[6]; + + I64 (*handler)(CEthFrame *frame); +}; + +static CL3Protocol *l3_protocols = NULL; + +U8 eth_null[6] = {0, 0, 0, 0, 0, 0}; +U8 eth_broadcast[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +I64 EthernetFrameParse(CEthFrame *frame_out, U8 *frame, U16 length) { + // FIXME: check length + // TODO: MemCpy has high overhead, get rid of it + MemCpy(frame_out->dest_addr, frame, 6); + MemCpy(frame_out->source_addr, frame + 6, 6); + frame_out->ethertype = frame[13] | (frame[12] << 8); + + /*"Rx dst: %02X:%02X:%02X:%02X:%02X:%02X\n", + frame_out->dest_addr[0], frame_out->dest_addr[1], + frame_out->dest_addr[2], frame_out->dest_addr[3], frame_out->dest_addr[4], + frame_out->dest_addr[5]; + + "Rx src: %02X:%02X:%02X:%02X:%02X:%02X\n", + frame_out->source_addr[0], frame_out->source_addr[1], + frame_out->source_addr[2], frame_out->source_addr[3], + frame_out->source_addr[4], frame_out->source_addr[5]; + + "Rx ethertype: %02X\n", frame_out->ethertype;*/ + + frame_out->data = frame + 14; + frame_out->length = length - 14 - 4; // ?? + return 0; +} + +U0 RegisterL3Protocol(U16 ethertype, I64 (*handler)(CEthFrame *frame)) { + CL3Protocol *p = MAlloc(sizeof(CL3Protocol)); + + p->next = l3_protocols; + p->ethertype = ethertype; + p->handler = handler; + + l3_protocols = p; +} diff --git a/Net/IPv4.HC b/Net/IPv4.HC new file mode 100644 index 0000000..490197f --- /dev/null +++ b/Net/IPv4.HC @@ -0,0 +1,259 @@ +#define IP_PROTO_ICMP 0x01 +#define IP_PROTO_TCP 0x06 +#define IP_PROTO_UDP 0x11 + +#define IPV4_EADDR_INVALID (-200001) +#define IPV4_EHOST_UNREACHABLE (-200002) + +#define IPV4_TTL 64 + +class CIPv4Packet { + CEthFrame *l2_frame; + + U32 source_ip; + U32 dest_ip; + U8 proto; + U8 padding[7]; + + U8 *data; + I64 length; + I64 ttl; +}; + +class CIPv4Header { + U8 version_ihl; + U8 dscp_ecn; + U16 total_length; + U16 ident; + U16 flags_fragoff; + U8 ttl; + U8 proto; + U16 header_checksum; + U32 source_ip; + U32 dest_ip; +}; + +class CL4Protocol { + CL4Protocol *next; + + U8 proto; + U8 padding[7]; + + U0 (*handler)(CIPv4Packet *packet); +}; + +// *_n = stored in network order +static U32 my_ip = 0; +static U32 my_ip_n = 0; + +static U32 ipv4_router_addr = 0; +static U32 ipv4_subnet_mask = 0; + +static CL4Protocol *l4_protocols = NULL; + +// http://stackoverflow.com/q/26774761/2524350 +static U16 IPv4Checksum(U8 *header, I64 length) { + I64 nleft = length; + U16 *w = header; + I64 sum = 0; + + while (nleft > 1) { + sum += *(w++); + nleft -= 2; + } + + // mop up an odd byte, if necessary + if (nleft == 1) { + sum += ((*w) & 0x00ff); + } + + // add back carry outs from top 16 bits to low 16 bits + sum = (sum >> 16) + (sum & 0xffff); // add hi 16 to low 16 + sum += (sum >> 16); // add carry + return (~sum) & 0xffff; +} + +static I64 GetEthernetAddressForIP(U32 ip, U8 **mac_out) { + // invalid + if (ip == 0) { + return IPV4_EADDR_INVALID; + } + // broadcast + else if (ip == 0xffffffff) { + *mac_out = eth_broadcast; + return 0; + } + // outside this subnet; needs routing + else if ((ip & ipv4_subnet_mask) != (my_ip & ipv4_subnet_mask)) { + // no gateway + if (ipv4_router_addr == 0) { + return IPV4_EADDR_INVALID; + } + + // FIXME: infinite loop if mis-configured + + return GetEthernetAddressForIP(ipv4_router_addr, mac_out); + } + // local network + else { + // FIXME: this can stall NetHandlerTask, we might need a flag to bail early + + CArpCacheEntry *e = ArpCacheFindByIP(ip); + + if (e) { + *mac_out = e->mac; + return 0; + } + + //"Not in cache, requesting\n"; + + // Up to 4 retries, 500 ms each + I64 retries = 4; + + while (retries) { + ArpSend(ARP_REQUEST, eth_broadcast, EthernetGetAddress(), my_ip_n, + eth_null, htonl(ip)); + + I64 try_ = 0; + + for (try_ = 0; try_ < 50; try_++) { + Sleep(10); + + e = ArpCacheFindByIP(ip); + if (e) + break; + } + + if (e) { + *mac_out = e->mac; + return 0; + } + + retries--; + } + + in_addr in; + in.s_addr = htonl(ip); + U8 buffer[INET_ADDRSTRLEN]; + "$FG,6$IPv4: Failed to resolve address %s\n$FG$", + inet_ntop(AF_INET, &in.s_addr, buffer, sizeof(buffer)); + return IPV4_EHOST_UNREACHABLE; + } +} + +I64 IPv4PacketAlloc(U8 **frame_out, U8 proto, U32 source_ip, U32 dest_ip, + I64 length) { + U8 *frame; + U8 *dest_mac; + + I64 error = GetEthernetAddressForIP(dest_ip, &dest_mac); + + if (error < 0) + return error; + + I64 index = + EthernetFrameAlloc(&frame, EthernetGetAddress(), dest_mac, ETHERTYPE_IPV4, + sizeof(CIPv4Header) + length, 0); + + if (index < 0) + return index; + + I64 internet_header_length = 5; + + CIPv4Header *hdr = frame; + hdr->version_ihl = internet_header_length | (4 << 4); + hdr->dscp_ecn = 0; + hdr->total_length = htons(internet_header_length * 4 + length); + hdr->ident = 0; + hdr->flags_fragoff = 0; + hdr->ttl = IPV4_TTL; + hdr->proto = proto; + hdr->header_checksum = 0; + hdr->source_ip = htonl(source_ip); + hdr->dest_ip = htonl(dest_ip); + + hdr->header_checksum = IPv4Checksum(hdr, internet_header_length * 4); + + *frame_out = frame + sizeof(CIPv4Header); + return index; +} + +I64 IPv4PacketFinish(I64 index) { return EthernetFrameFinish(index); } + +U32 IPv4GetAddress() { return my_ip; } + +U0 IPv4SetAddress(U32 addr) { + my_ip = addr; + my_ip_n = htonl(addr); + + ArpSetIPv4Address(addr); +} + +U0 IPv4SetSubnet(U32 router_addr, U32 subnet_mask) { + ipv4_router_addr = router_addr; + ipv4_subnet_mask = subnet_mask; +} + +I64 IPv4ParsePacket(CIPv4Packet *packet_out, CEthFrame *eth_frame) { + if (eth_frame->ethertype != ETHERTYPE_IPV4) + return -1; + + // FIXME: check eth_frame->length etc. + + CIPv4Header *hdr = eth_frame->data; + I64 header_length = (hdr->version_ihl & 0x0f) * 4; + //"IPv4: hdr %d, proto %02X, source %08X, dest %08X, len %d\n", + // header_length, hdr->proto, ntohl(hdr->source_ip), ntohl(hdr->dest_ip), + // eth_frame->length - header_length; + + U16 total_length = ntohs(hdr->total_length); + + packet_out->l2_frame = eth_frame; + packet_out->source_ip = ntohl(hdr->source_ip); + packet_out->dest_ip = ntohl(hdr->dest_ip); + packet_out->proto = hdr->proto; + + packet_out->data = eth_frame->data + header_length; + packet_out->length = total_length - header_length; + packet_out->ttl = hdr->ttl; + + return 0; +} + +U0 RegisterL4Protocol(U8 proto, I64 (*handler)(CIPv4Packet *frame)) { + CL4Protocol *p = MAlloc(sizeof(CL4Protocol)); + + p->next = l4_protocols; + p->proto = proto; + p->handler = handler; + + l4_protocols = p; +} + +I64 IPv4Handler(CEthFrame *eth_frame) { + CIPv4Packet packet; + + I64 error = IPv4ParsePacket(&packet, eth_frame); + + if (error < 0) + return error; + + // This seems necessary to receive connections under VBox NAT, + // but is also pretty slow, so should be optimized to use a better + // struct than linked list. + ArpCachePut(packet.source_ip, eth_frame->source_addr); + + CL4Protocol *l4 = l4_protocols; + + while (l4) { + if (l4->proto == packet.proto) { + l4->handler(&packet); + break; + } + l4 = l4->next; + } + + return error; +} + +RegisterL3Protocol(ETHERTYPE_IPV4, &IPv4Handler); diff --git a/Net/Icmp.HC b/Net/Icmp.HC new file mode 100644 index 0000000..0deab1f --- /dev/null +++ b/Net/Icmp.HC @@ -0,0 +1,101 @@ +#define ICMP_TYPE_ECHO_REPLY 0 +#define ICMP_TYPE_ECHO_REQUEST 8 + +class CIcmpHeader { + U8 type; + U8 code; + U16 checksum; + U16 identifier; + U16 seq_number; +}; + +U64 *icmp_reply = CAlloc(sizeof(U64) * 65536); + +U16 IcmpComputeChecksum(U8 *buf, I64 size) { + I64 i; + U64 sum = 0; + + for (i = 0; i < size; i += 2) { + sum += *buf(U16 *); + buf += 2; + } + if (size - i > 0) { + sum += *buf; + } + + while ((sum >> 16) != 0) { + sum = (sum & 0xFFFF) + (sum >> 16); + } + + return ~sum(U16); +} + +I64 IcmpSendReply(U32 dest_ip, U16 identifier, U16 seq_number, + U16 request_checksum, U8 *payload, I64 length) { + U8 *frame; + I64 index = IPv4PacketAlloc(&frame, IP_PROTO_ICMP, IPv4GetAddress(), dest_ip, + sizeof(CIcmpHeader) + length); + + if (index < 0) + return index; + + CIcmpHeader *hdr = frame; + hdr->type = ICMP_TYPE_ECHO_REPLY; + hdr->code = 0; + hdr->checksum = htons(ntohs(request_checksum) + 0x0800); // hack alert! + hdr->identifier = identifier; + hdr->seq_number = seq_number; + + MemCpy(frame + sizeof(CIcmpHeader), payload, length); + return IPv4PacketFinish(index); +} + +I64 IcmpSendRequest(U32 dest_ip, U16 identifier, U16 seq_number, + U16 request_checksum, U8 *payload, I64 length) { + no_warn request_checksum; + U8 *frame; + I64 index = IPv4PacketAlloc(&frame, IP_PROTO_ICMP, IPv4GetAddress(), dest_ip, + sizeof(CIcmpHeader) + length); + + if (index < 0) + return index; + + CIcmpHeader *hdr = frame; + hdr->type = ICMP_TYPE_ECHO_REQUEST; + hdr->code = 0; + hdr->checksum = 0; + hdr->identifier = identifier; + hdr->seq_number = seq_number; + + hdr->checksum = IcmpComputeChecksum(hdr, sizeof(CIcmpHeader)); + + MemCpy(frame + sizeof(CIcmpHeader), payload, length); + return IPv4PacketFinish(index); +} + +I64 IcmpHandler(CIPv4Packet *packet) { + if (packet->proto != IP_PROTO_ICMP) + return -1; + + if (packet->length < sizeof(CIcmpHeader)) + return -1; + + CIcmpHeader *hdr = packet->data; + + if (hdr->type == ICMP_TYPE_ECHO_REPLY && hdr->code == 0) { + icmp_reply[hdr->identifier] = packet; + } + + if (hdr->type == ICMP_TYPE_ECHO_REQUEST && hdr->code == 0) { + // This also makes sure that we don't stall NetHandlerTask + ArpCachePut(packet->source_ip, packet->l2_frame->source_addr); + + IcmpSendReply(packet->source_ip, hdr->identifier, hdr->seq_number, + hdr->checksum, packet->data + sizeof(CIcmpHeader), + packet->length - sizeof(CIcmpHeader)); + } + + return 0; +} + +RegisterL4Protocol(IP_PROTO_ICMP, &IcmpHandler); diff --git a/Net/NativeSocket.HC b/Net/NativeSocket.HC new file mode 100644 index 0000000..f2517b2 --- /dev/null +++ b/Net/NativeSocket.HC @@ -0,0 +1,327 @@ +#define SOCK_STREAM 1 +#define SOCK_DGRAM 2 +#define SOCK_RAW 3 + +#define AF_UNSPEC 0 +#define AF_INET 2 +#define AF_INET6 10 + +#define INADDR_ANY 0 + +#define INET_ADDRSTRLEN 16 + +#define NS_INADDRSZ 4 + +#define SOL_SOCKET 1 + +// optval = I64* +#define SO_RCVTIMEO_MS 1 + +#define AI_CACHED 0x8000 + +class in_addr { + U32 s_addr; +}; + +class sockaddr { + U16 sa_family; + U8 sa_data[14]; +}; + +class sockaddr_in { + I16 sin_family; + U16 sin_port; + in_addr sin_addr; + U8 sin_zero[8]; +}; + +class addrinfo { + I32 ai_flags; + I32 ai_family; + I32 ai_socktype; + I32 ai_protocol; + I64 ai_addrlen; + sockaddr *ai_addr; + U8 *ai_canonname; + addrinfo *ai_next; +}; + +I64 inet_pton(I64 af, U8 *src, U8 *dst) { + I64 saw_digit, octets, ch; + U8 tmp[NS_INADDRSZ], *tp; + + if (af != AF_INET) { + return -1; + } + + saw_digit = 0; + octets = 0; + *(tp = tmp) = 0; + while (*src) { + ch = *src++; + if (ch >= '0' && ch <= '9') { + U64 new = *tp * 10 + (ch - '0'); + if (saw_digit && *tp == 0) + return 0; + if (new > 255) + return 0; + *tp = new; + if (!saw_digit) { + if (++octets > 4) + return 0; + saw_digit = 1; + } + } else if (ch == '.' && saw_digit) { + if (octets == 4) + return 0; + *++tp = 0; + saw_digit = 0; + } else + return 0; + } + if (octets < 4) + return 0; + MemCpy(dst, tmp, NS_INADDRSZ); + return 1; +} + +U8 *inet_ntop(I64 af, U8 *src, U8 *dst, I64 size) { + if (af == AF_INET && size >= INET_ADDRSTRLEN) { + StrPrint(dst, "%d.%d.%d.%d", src[0], src[1], src[2], src[3]); + return dst; + } else { + return 0; + } +} + +class CSocket { + I64 (*accept)(CSocket *s, sockaddr *src_addr, I64 addrlen); + I64 (*bind)(CSocket *s, sockaddr *addr, I64 addrlen); + I64 (*close)(CSocket *s); + I64 (*connect)(CSocket *s, sockaddr *addr, I64 addrlen); + I64 (*listen)(CSocket *s, I64 backlog); + I64(*recvfrom) + (CSocket *s, U8 *buf, I64 len, I64 flags, sockaddr *src_addr, I64 addrlen); + I64(*sendto) + (CSocket *s, U8 *buf, I64 len, I64 flags, sockaddr *dest_addr, I64 addrlen); + I64 (*setsockopt)(CSocket *s, I64 level, I64 optname, U8 *optval, I64 optlen); +}; + +class CSocketClass { + CSocketClass *next; + + U16 domain; + U16 type; + U8 padding[4]; + + CSocket *(*socket)(U16 domain, U16 type); +}; + +class CAddrResolver { + // TODO: allow different resolvers for different socket domains + + I64 (*getaddrinfo)(U8 *node, U8 *service, addrinfo *hints, addrinfo **res); +}; + +static CSocketClass *socket_classes = NULL; +static CAddrResolver *socket_addr_resolver = NULL; + +static CSocketClass *FindSocketClass(U16 domain, U16 type) { + CSocketClass *cls = socket_classes; + + while (cls) { + if (cls->domain == domain && cls->type == type) + return cls; + + cls = cls->next; + } + + return NULL; +} + +I64 SocketInit() { return 0; } + +I64 socket(I64 domain, I64 type) { + CSocketClass *cls = FindSocketClass(domain, type); + + if (cls) + return cls->socket(domain, type)(I64); + else + return -1; +} + +I64 accept(I64 sockfd, sockaddr *addr, I64 addrlen) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->accept(sock, addr, addrlen); + else + return -1; +} + +I64 close(I64 sockfd) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->close(sock); + else + return -1; +} + +I64 bind(I64 sockfd, sockaddr *addr, I64 addrlen) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->bind(sock, addr, addrlen); + else + return -1; +} + +I64 connect(I64 sockfd, sockaddr *addr, I64 addrlen) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->connect(sock, addr, addrlen); + else + return -1; +} + +I64 listen(I64 sockfd, I64 backlog) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->listen(sock, backlog); + else + return -1; +} + +I64 recv(I64 sockfd, U8 *buf, I64 len, I64 flags) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->recvfrom(sock, buf, len, flags, NULL, 0); + else + return -1; +} + +I64 recvfrom(I64 sockfd, U8 *buf, I64 len, I64 flags, sockaddr *src_addr, + I64 addrlen) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->recvfrom(sock, buf, len, flags, src_addr, addrlen); + else + return -1; +} + +I64 send(I64 sockfd, U8 *buf, I64 len, I64 flags) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->sendto(sock, buf, len, flags, NULL, 0); + else + return -1; +} + +I64 sendto(I64 sockfd, U8 *buf, I64 len, I64 flags, sockaddr *dest_addr, + I64 addrlen) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->sendto(sock, buf, len, flags, dest_addr, addrlen); + else + return -1; +} + +I64 setsockopt(I64 sockfd, I64 level, I64 optname, U8 *optval, I64 optlen) { + CSocket *sock = sockfd(CSocket *); + if (sockfd > 0) + return sock->setsockopt(sock, level, optname, optval, optlen); + else + return -1; +} + +I64 getaddrinfo(U8 *node, U8 *service, addrinfo *hints, addrinfo **res) { + if (socket_addr_resolver) + return socket_addr_resolver->getaddrinfo(node, service, hints, res); + else + return -1; +} + +U0 freeaddrinfo(addrinfo *res) { + while (res) { + addrinfo *next = res->ai_next; + Free(res->ai_addr); + Free(res->ai_canonname); + Free(res); + res = next; + } +} + +U0 AddrInfoCopy(addrinfo *ai_out, addrinfo *ai_in) { + MemCpy(ai_out, ai_in, sizeof(addrinfo)); + + if (ai_in->ai_addr) { + ai_out->ai_addr = MAlloc(ai_in->ai_addrlen); + MemCpy(ai_out->ai_addr, ai_in->ai_addr, ai_in->ai_addrlen); + } + + if (ai_in->ai_canonname) { + ai_out->ai_canonname = StrNew(ai_in->ai_canonname); + } +} + +U8 *gai_strerror(I64 errcode) { + no_warn errcode; + return "Unspecified error"; +} + +// Inspired by +// https://docs.python.org/3.7/library/socket.html#socket.create_connection +I64 create_connection(U8 *hostname, U16 port) { + sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = 0; + + addrinfo *res; + I64 error = getaddrinfo(hostname, NULL, NULL, &res); + + if (error < 0) { + "$FG,4$getaddrinfo: error %d\n$FG$", error; + } else { + addrinfo *curr = res; + + while (curr) { + if (curr->ai_family == AF_INET && + (curr->ai_socktype == 0 || curr->ai_socktype == SOCK_STREAM)) { + addr.sin_addr.s_addr = (curr->ai_addr(sockaddr_in *))->sin_addr.s_addr; + freeaddrinfo(res); + + I64 sockfd = socket(AF_INET, SOCK_STREAM); + + if (sockfd < 0) + return sockfd; + + error = connect(sockfd, &addr, sizeof(addr)); + + if (error < 0) { + close(sockfd); + return error; + } + + return sockfd; + } + + curr = curr->ai_next; + } + + "$FG,4$create_connection: no suitable address\n$FG$"; + } + + freeaddrinfo(res); + return -1; +} + +U0 RegisterSocketClass(U16 domain, U16 type, + CSocket *(*socket)(U16 domain, U16 type)) { + CSocketClass *cls = MAlloc(sizeof(CSocketClass)); + + cls->next = socket_classes; + cls->domain = domain; + cls->type = type; + cls->socket = socket; + + socket_classes = cls; +} diff --git a/Net/NetFifo.HC b/Net/NetFifo.HC new file mode 100644 index 0000000..b380e86 --- /dev/null +++ b/Net/NetFifo.HC @@ -0,0 +1,77 @@ +// Warning: terrible code ahead. this still needs a lot of work + +// In the future we'll probably have 2 FIFOs (pending frames & empty buffers) +// TODO: check if FIFO implementation is suitable for high throughput + +#define NET_FIFO_DEPTH 1024 + +#define ETHERNET_FRAME_SIZE 1548 + +#define ETHERTYPE_IPV4 0x0800 +#define ETHERTYPE_ARP 0x0806 + +class CNetFifoEntry { + I64 length; + U8 frame[ETHERNET_FRAME_SIZE]; +}; + +static CFifoI64 *netfifo; + +static CNetFifoEntry *entries; +static I64 next_entry = 0; + +CTask *netfifo_handler_task = NULL; + +// TODO: asm optimization? or perhaps use EndianU*? +// These don't belong here in the first place, +// but it's convenient for Ethernet drivers +// We'll probably split it off along with ETHERTYPE_* constants + +U16 htons(U16 h) { return ((h >> 8) | (h << 8)) & 0xffff; } + +U16 ntohs(U16 h) { return ((h >> 8) | (h << 8)) & 0xffff; } + +U32 htonl(U32 h) { + return ((h >> 24) | ((h & 0x00ff0000) >> 8) | ((h & 0x0000ff00) << 8) | + (h << 24)) & + 0xffffffff; +} + +U32 ntohl(U32 h) { + return ((h >> 24) | ((h & 0x00ff0000) >> 8) | ((h & 0x0000ff00) << 8) | + (h << 24)) & + 0xffffffff; +} + +CNetFifoEntry *NetFifoPull() { + CNetFifoEntry *entry; + + if (FifoI64Rem(netfifo, &entry)) + return entry; + else + return NULL; +} + +I64 NetFifoPushCopy(U8 *data, I64 length) { + CNetFifoEntry *entry = &entries[next_entry]; + next_entry = (next_entry + 1) & (NET_FIFO_DEPTH - 1); + + entry->length = length; + MemCpy(entry->frame, data, length); + + if (!FifoI64Ins(netfifo, entry)) + return -1; + + // Wake up Handler Task + if (netfifo_handler_task) + LBtr(&netfifo_handler_task->task_flags, TASKf_IDLE); + + return 0; +} + +U0 NetFifoInit() { + netfifo = FifoI64New(NET_FIFO_DEPTH); + entries = MAlloc(NET_FIFO_DEPTH * sizeof(CNetFifoEntry)); +} + +NetFifoInit; diff --git a/Net/NetHandler.HC b/Net/NetHandler.HC new file mode 100644 index 0000000..a7c7575 --- /dev/null +++ b/Net/NetHandler.HC @@ -0,0 +1,58 @@ +U0 @virtio_net_handle_net_fifo_entry(CNetFifoEntry *e) { + CEthFrame l2_frame; + + if (EthernetFrameParse(&l2_frame, e->frame, e->length) < 0) + return; + + CL3Protocol *l3 = l3_protocols; + + while (l3) { + if (l3->ethertype == l2_frame.ethertype) { + l3->handler(&l2_frame); + break; + } + l3 = l3->next; + } +} + +U0 @virtio_net_handler_task() { + I64 idx_used, idx_rec; + I64 i, j; + @virtio_used_item *item; + U8 *buffer; + I64 length; + while (1) { + idx_rec = VirtioNet.rq_index; + idx_used = VirtioNet.rq->used.index; + + if (idx_used < idx_rec) { + idx_used += 0x10000; + } + + if (idx_rec != idx_used && idx_used) { + + j = 0; + for (i = idx_rec; i < idx_used; i++) { + item = VirtioNet.rq->used.ring; + buffer = VirtioNet.rq->buffers[item[i % 256].index + 1]; + length = item[i % 256].length; + NetFifoPushCopy(buffer, length - 10); + j++; + VirtioNet.rx_packets++; + VirtioNet.rx_bytes += length - 10; + } + VirtioNet.rq_index = idx_used % 0x10000; + VirtioNet.rq->available.index += j; + OutU16(VirtioNet.port + VIRTIO_PCI_QUEUE_NOTIFY, 0); + } + CNetFifoEntry *e = NetFifoPull; + if (e) { + @virtio_net_handle_net_fifo_entry(e); + } + Busy(200); + } +} + +Spawn(&@virtio_net_handler_task, NULL, "NetHandlerTask", 2); + +"[OK] NetHandler \n"; \ No newline at end of file diff --git a/Net/Netcfg.HC b/Net/Netcfg.HC new file mode 100644 index 0000000..9c43c68 --- /dev/null +++ b/Net/Netcfg.HC @@ -0,0 +1,142 @@ + +#define CLIENT_START 0 +#define CLIENT_DISCOVER 1 +#define CLIENT_REQUEST 2 +#define CLIENT_REQUEST_ACCEPTED 3 + +#define DHCP_TIMEOUT 3000 +#define MAX_RETRIES 3 + +I64 DhcpConfigureInner(I64 sock, U32 *yiaddr_out, U32 *dns_ip_out, + U32 *router_ip_out, U32 *subnet_mask_out) { + I64 state = CLIENT_START; + I64 retries = 0; + + I64 timeout = DHCP_TIMEOUT; + + if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO_MS, &timeout, sizeof(timeout)) < + 0) { + "$FG,6$DhcpConfigure: setsockopt failed\n$FG$"; + } + + sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = htons(68); + addr.sin_addr.s_addr = INADDR_ANY; + + if (bind(sock, &addr, sizeof(addr)) < 0) { + "$FG,4$DhcpConfigure: failed to bind\n$FG$"; + return -1; + } + + U32 xid = DhcpBeginTransaction(); + + I64 error = 0; + + U32 dhcp_addr; + U8 buffer[2048]; + + I64 count; + sockaddr_in addr_in; + + while (state != CLIENT_REQUEST_ACCEPTED) { + if (state == CLIENT_START) { + state = CLIENT_DISCOVER; + retries = 0; + } else if (state == CLIENT_DISCOVER) { + error = DhcpSendDiscover(xid); + if (error < 0) + return error; + + count = + recvfrom(sock, buffer, sizeof(buffer), 0, &addr_in, sizeof(addr_in)); + + if (count > 0) { + //"Try parse Offer\n"; + error = DhcpParseOffer(xid, buffer, count, yiaddr_out, dns_ip_out, + router_ip_out, subnet_mask_out); + + if (error < 0) { + "$FG,6$DhcpParseOffer1: error %d\n$FG$", error; + } + } + + if (count > 0 && error >= 0) { + dhcp_addr = ntohl(addr_in.sin_addr.s_addr); + //"DHCP Offer from %08X: YIAddr %08X,\n\tDNS %08X, Router %08X, Subnet + //%08X\n", + // dhcp_addr, *yiaddr_out, dns_ip, router_ip, subnet_mask; + + state = CLIENT_REQUEST; + retries = 0; + } else if (++retries == MAX_RETRIES) { + "$FG,4$DhcpConfigure: max retries for DISCOVER\n$FG$"; + return -1; + } + } else if (state == CLIENT_REQUEST) { + error = DhcpSendRequest(xid, *yiaddr_out, dhcp_addr); + if (error < 0) + return error; + + count = + recvfrom(sock, buffer, sizeof(buffer), 0, &addr_in, sizeof(addr_in)); + + if (count > 0) { + //"Try parse Ack\n"; + error = DhcpParseAck(xid, buffer, count); + + if (error < 0) { + "$FG,6$DhcpParseOffer: error %d\n$FG$", error; + } + } + + if (count > 0 && error >= 0) { + dhcp_addr = ntohl(addr_in.sin_addr.s_addr); + //"DHCP Ack from %08X\n", dhcp_addr; + + state = CLIENT_REQUEST_ACCEPTED; + } else if (++retries == MAX_RETRIES) { + "$FG,4$DhcpConfigure: max retries for REQUEST\n$FG$"; + return -1; + } + } + } + + return state; +} + +I64 DhcpConfigure() { + I64 sock = socket(AF_INET, SOCK_DGRAM); + + if (sock < 0) + return -1; + + U32 yiaddr, dns_ip, router_ip, subnet_mask; + I64 state = + DhcpConfigureInner(sock, &yiaddr, &dns_ip, &router_ip, &subnet_mask); + + close(sock); + + if (state == CLIENT_REQUEST_ACCEPTED) { + in_addr in; + in.s_addr = htonl(yiaddr); + U8 buffer[INET_ADDRSTRLEN]; + "$FG,2$Obtained IP address %s\n$FG$", + inet_ntop(AF_INET, &in.s_addr, buffer, sizeof(buffer)); + IPv4SetAddress(yiaddr); + IPv4SetSubnet(router_ip, subnet_mask); + DnsSetResolverIPv4(dns_ip); + return 0; + } else + return -1; +} + +U0 Netcfg() { + SocketInit(); + + "$FG,7$Netcfg: Configuring network...\n$FG$"; + + I64 error = DhcpConfigure(); + if (error < 0) + "$FG,4$DhcpConfigure: error %d\n$FG$", error; +} diff --git a/Net/Socket.HC b/Net/Socket.HC new file mode 100644 index 0000000..aa23a05 --- /dev/null +++ b/Net/Socket.HC @@ -0,0 +1,43 @@ +#exe { +if (SNAILNET_NATIVE_DRIVER == NULL) { + StreamPrint("#include \"::/Adam/Net/SnailLib\""); +} +} + +// Higher-level, utility functions + +I64 recvLine(I64 sock, U8 *buffer, I64 size, I64 flags) { + I64 got = 0; + while (got + 1 < size) { + if (!recv(sock, buffer + got, 1, flags)) + return -1; + + if (buffer[got] == '\n') + break; + else if (buffer[got] != '\r') + got++; + } + // FIXME: safe but incorrect behavior on overflow + buffer[got] = 0; + return got; +} + +I64 sendall(I64 sockfd, U8 *buf, I64 len, I64 flags) { + I64 total = 0; + + while (len) { + I64 sent = send(sockfd, buf, len, flags); + if (sent > 0) { + buf += sent; + total += sent; + len -= sent; + } else + break; + } + + return total; +} + +I64 sendString(I64 sockfd, U8 *str, I64 flags) { + return sendall(sockfd, str, StrLen(str), flags); +} diff --git a/Net/Tcp.HC b/Net/Tcp.HC new file mode 100644 index 0000000..9378f6f --- /dev/null +++ b/Net/Tcp.HC @@ -0,0 +1,1108 @@ +// https://tools.ietf.org/html/rfc793 + +// See https://en.wikipedia.org/wiki/File:Tcp_state_diagram_fixed_new.svg +#define TCP_STATE_CLOSED 0 +#define TCP_STATE_LISTEN 1 +#define TCP_STATE_SYN_SENT 2 +#define TCP_STATE_SYN_RECEIVED 3 +#define TCP_STATE_ESTABLISHED 4 +#define TCP_STATE_FIN_WAIT_1 5 +#define TCP_STATE_FIN_WAIT_2 6 +#define TCP_STATE_CLOSE_WAIT 7 +#define TCP_STATE_CLOSING 8 +#define TCP_STATE_LAST_ACK 9 +#define TCP_STATE_TIME_WAIT 10 + +#define TCP_CONNECT_TIMEOUT 10000 + +//#define TCP_DEFAULT_MSS 536 + +#define TCP_DEFAULT_MSS 1500 + +#define TCP_WINDOW_SIZE 65536 + +#define TCP_FLAG_FIN 0x01 +#define TCP_FLAG_SYN 0x02 +#define TCP_FLAG_RST 0x04 +#define TCP_FLAG_PSH 0x08 +#define TCP_FLAG_ACK 0x10 +#define TCP_FLAG_URG 0x20 + +#define TCP_SRTT_ALPHA 0.9 +#define TCP_RTO_MIN 0.2 +#define TCP_RTO_MAX 10000 +#define TCP_RTO_BETA 2 + +class CTcpHeader { + U16 source_port; + U16 dest_port; + U32 seq; + U32 ack; + U8 data_offset; + U8 flags; + U16 window_size; + U16 checksum; + U16 urgent_pointer; +}; + +class CTcpSendBufHeader { + CTcpSendBufHeader *next; + + F64 time_sent; + U32 length; + U32 retries; + U32 seq_start; + U32 seq_end; +}; + +class CTcpSocket { + CSocket sock; + + I64 state; + + U32 local_addr; + U16 local_port; + + U32 remote_addr; + U32 remote_port; + + U32 snd_una; // seq number of first unacknowledged octet + U32 snd_nxt; // seq number of next octet to send + U32 snd_wnd; // allowed number of unacknowledged outgoing octets + U32 mss; // maximum segment size + + U32 rcv_nxt; // seq number of next octet to receive + U32 rcv_wnd; // allowed number of unacknowledged incoming octets + + F64 conntime; + F64 srtt; + + I64 recv_buf_size; + U8 *recv_buf; + I64 recv_buf_read_pos; + I64 recv_buf_write_pos; + + CTcpSocket *backlog_next; + CTcpSocket *backlog_first; + CTcpSocket *backlog_last; + I64 backlog_remaining; + + CTcpSendBufHeader *send_buf_first; + CTcpSendBufHeader *send_buf_last; + + // I64 rcvtimeo_ms; + // I64 recv_maxtime; +}; + +class CTcpPseudoHeader { + U32 source_addr; + U32 dest_addr; + U8 zeros; + U8 protocol; + U16 tcp_length; +}; + +class CTcpSocketListItem { + CTcpSocketListItem *prev; + CTcpSocketListItem *next; + CTcpSocket *sock; +}; + +static CTcpSocketListItem **tcp_socket_list; + +static CTcpSocket *GetTcpSocketFromList(CIPv4Packet *packet, CTcpHeader *hdr) { + CTcpSocketListItem *item = tcp_socket_list[ntohs(hdr->dest_port)]->next; + while (item) { + if (item->sock->remote_addr == packet->source_ip && + item->sock->remote_port == ntohs(hdr->source_port)) { + return item->sock; + } + item = item->next; + } + return NULL; +} + +U0 AddTcpSocketToList(CTcpSocket *s) { + CTcpSocketListItem *prev = tcp_socket_list[s->local_port]; + CTcpSocketListItem *new = CAlloc(sizeof(CTcpSocketListItem)); + while (prev->next) { + prev = prev->next; + } + new->prev = prev; + new->sock = s; + prev->next = new; +} + +CTcpSocket *RemoveTcpSocketFromList(CTcpSocket *s) { + CTcpSocketListItem *prev = NULL; + CTcpSocketListItem *next = NULL; + CTcpSocketListItem *item = tcp_socket_list[s->local_port]->next; + while (item) { + if (item->sock == s) { + prev = item->prev; + next = item->next; + if (prev) { + prev->next = next; + } + if (next) { + next->prev = prev; + } + return s; + } + item = item->next; + } + return NULL; +} + +// TODO: this takes up half a meg, change it to a binary tree or something +static CTcpSocket **tcp_bound_sockets; + +static U16 tcp_next_source_port = RandU16(); + +static Bool TcpIsSynchronizedState(I64 state) { + return state == TCP_STATE_ESTABLISHED || state == TCP_STATE_FIN_WAIT_1 || + state == TCP_STATE_FIN_WAIT_2 || state == TCP_STATE_CLOSE_WAIT || + state == TCP_STATE_CLOSING || state == TCP_STATE_LAST_ACK || + state == TCP_STATE_TIME_WAIT; +} + +static U16 TcpPartialChecksum(U32 sum, U8 *header, I64 length) { + I64 nleft = length; + U16 *w = header; + + while (nleft > 1) { + sum += *(w++); + nleft -= 2; + } + + return sum; +} + +static U16 TcpFinalChecksum(U32 sum, U8 *header, I64 length) { + I64 nleft = length; + U16 *w = header; + + while (nleft > 1) { + sum += *(w++); + nleft -= 2; + } + + // mop up an odd byte, if necessary + if (nleft == 1) { + sum += ((*w) & 0x00ff); + } + + // add back carry outs from top 16 bits to low 16 bits + sum = (sum >> 16) + (sum & 0xffff); // add hi 16 to low 16 + sum += (sum >> 16); // add carry + return (~sum) & 0xffff; +} + +I64 TcpPacketAlloc(U8 **frame_out, U32 source_ip, U16 source_port, U32 dest_ip, + U16 dest_port, U32 seq, U32 ack, U8 flags, I64 length) { + U8 *frame; + I64 index = IPv4PacketAlloc(&frame, IP_PROTO_TCP, source_ip, dest_ip, + sizeof(CTcpHeader) + length); + + if (index < 0) + return index; + + CTcpHeader *hdr = frame; + hdr->source_port = htons(source_port); + hdr->dest_port = htons(dest_port); + hdr->seq = htonl(seq); + hdr->ack = htonl(ack); + hdr->data_offset = (sizeof(CTcpHeader) / 4) << 4; + hdr->flags = flags; + hdr->window_size = htons(TCP_WINDOW_SIZE / 2); // FIXME + hdr->checksum = 0; + hdr->urgent_pointer = 0; + + *frame_out = frame + sizeof(CTcpHeader); + return index; +} + +I64 TcpPacketFinish(I64 index, U32 source_ip, U32 dest_ip, U8 *frame, + I64 length, CTcpSendBufHeader **send_buf_out) { + CTcpHeader *hdr = frame - sizeof(CTcpHeader); + + CTcpPseudoHeader pseudo; + pseudo.source_addr = htonl(source_ip); + pseudo.dest_addr = htonl(dest_ip); + pseudo.zeros = 0; + pseudo.protocol = IP_PROTO_TCP; + pseudo.tcp_length = htons(sizeof(CTcpHeader) + length); + + U32 sum = TcpPartialChecksum(0, &pseudo, sizeof(CTcpPseudoHeader)); + hdr->checksum = TcpFinalChecksum(sum, hdr, sizeof(CTcpHeader) + length); + + if (send_buf_out) { + CTcpSendBufHeader *sb = + MAlloc(sizeof(CTcpSendBufHeader) + sizeof(CTcpHeader) + length); + sb->next = NULL; + sb->time_sent = tS; + sb->length = sizeof(CTcpHeader) + length; + sb->retries = 0; + sb->seq_start = ntohl(hdr->seq); + sb->seq_end = 0; // NEEDS TO BE SET UPSTREAM + + MemCpy((sb(U8 *)) + sizeof(CTcpSendBufHeader), frame, + sizeof(CTcpHeader) + length); + *send_buf_out = sb; + } + + return IPv4PacketFinish(index); +} + +// Send a TCP frame with flags and/or data +I64 TcpSend(U32 local_addr, U16 local_port, U32 remote_addr, U16 remote_port, + U32 seq, U32 ack, U8 flags) { + U8 *frame; + I64 index = TcpPacketAlloc(&frame, local_addr, local_port, remote_addr, + remote_port, seq, ack, flags, 0); + + if (index < 0) + return index; + + return TcpPacketFinish(index, local_addr, remote_addr, frame, 0, NULL); +} + +// Send a TCP frame with flags only, no data +I64 TcpSend2(CTcpSocket *s, U8 flags) { + U8 *frame; + I64 index = + TcpPacketAlloc(&frame, s->local_addr, s->local_port, s->remote_addr, + s->remote_port, s->snd_nxt, s->rcv_nxt, flags, 0); + + if (index < 0) + return index; + + if (flags & TCP_FLAG_SYN) + s->snd_nxt++; + + if (flags & TCP_FLAG_FIN) + s->snd_nxt++; + + //"Sent #%d, to %08X, err = %d\n", s->seq, s->remote_addr, error; + if (flags & (TCP_FLAG_SYN | TCP_FLAG_FIN)) { + CTcpSendBufHeader *sb; + TcpPacketFinish(index, s->local_addr, s->remote_addr, frame, 0, &sb); + sb->seq_end = s->snd_nxt; + + // Append to SendBuf chain + if (s->send_buf_first) + s->send_buf_last->next = sb; + else + s->send_buf_first = sb; + + s->send_buf_last = sb; + } else { + return TcpPacketFinish(index, s->local_addr, s->remote_addr, frame, 0, + NULL); + } +} + +// Send a TCP frame with flags and data +I64 TcpSendData2(CTcpSocket *s, U8 flags, U8 *data, I64 length) { + U8 *frame; + I64 index = + TcpPacketAlloc(&frame, s->local_addr, s->local_port, s->remote_addr, + s->remote_port, s->snd_nxt, s->rcv_nxt, flags, length); + + if (index < 0) + return index; + + if (length) + MemCpy(frame, data, length); + + if (flags & TCP_FLAG_SYN) + s->snd_nxt++; + + s->snd_nxt += length; + + if (flags & TCP_FLAG_FIN) + s->snd_nxt++; + + //"Sent #%d, to %08X, err = %d\n", s->seq, s->remote_addr, error; + + CTcpSendBufHeader *sb; + TcpPacketFinish(index, s->local_addr, s->remote_addr, frame, length, &sb); + sb->seq_end = s->snd_nxt; + + // Append to SendBuf chain + if (s->send_buf_first) + s->send_buf_last->next = sb; + else + s->send_buf_first = sb; + + s->send_buf_last = sb; +} + +I64 TcpParsePacket(CTcpHeader **header_out, U8 **data_out, I64 *length_out, + CIPv4Packet *packet) { + if (packet->proto != IP_PROTO_TCP) + return -1; + + // FIXME: validate packet->length + // FIXME: checksum + + CTcpHeader *hdr = packet->data; + I64 header_length = (hdr->data_offset >> 4) * 4; + + //"TCP: in hdr %d, flags %02Xh, seq %d, ack %d, len %d, chksum %d\n", + // header_length, hdr->flags, ntohl(hdr->seq), ntohl(hdr->ack), + // packet->length - header_length, ntohs(hdr->checksum); + + *header_out = hdr; + *data_out = packet->data + header_length; + *length_out = packet->length - header_length; + return 0; +} + +/* +class CTcpSendBufHeader { + CTcpSendBufHeader* next; + + F64 time_sent; + U32 length; + U32 retries; + U32 seq_start; + U32 seq_end; +}; +*/ + +static U0 TcpSocketAckSendBufs(CTcpSocket *s, U32 seg_ack) { + F64 time = tS; + + while (s->send_buf_first) { + CTcpSendBufHeader *sb = s->send_buf_first; + + // There's no notion of smaller/greater than in modular arithemtic, + // we can only check if a number lies within some range. + // Here we check that + // sb->seq_end <= seg_ack <= s->snd_nxt + // because that will work for all meaningful ACKs. + I64 seg_ack_rel = (seg_ack - sb->seq_end) & 0xffffffff; + I64 snd_nxt_rel = (s->snd_nxt - sb->seq_end) & 0xffffffff; + + if (seg_ack_rel <= snd_nxt_rel) { + // Update smoothed RTT + F64 rtt = time - sb->time_sent; + s->srtt = (s->srtt * TCP_SRTT_ALPHA) + ((1.0 - TCP_SRTT_ALPHA) * rtt); + //"ACK'd %d->%d (RTT %f ms)", sb->seq_start, sb->seq_end, rtt * 1000; + + // Remove SendBuf from chain + s->send_buf_first = sb->next; + + if (s->send_buf_first == NULL) + s->send_buf_last = NULL; + + Free(sb); + } else + break; + } +} + +// Check unacknowledged outgoing packets and retransmit if needed +static U0 TcpSocketCheckSendBufs(CTcpSocket *s) { + F64 time = tS; + + F64 rto = TCP_RTO_BETA * s->srtt; + + if (rto < TCP_RTO_MIN) + rto = TCP_RTO_MIN; + if (rto > TCP_RTO_MAX) + rto = TCP_RTO_MAX; + + while (s->send_buf_first) { + CTcpSendBufHeader *sb = s->send_buf_first; + + if (time > sb->time_sent + rto) { + break; + + // Retransmit + "Retransmit %d->%d (%f ms)!\n", sb->seq_start, sb->seq_end, + (time - sb->time_sent) * 1000; + + U8 *frame; + I64 index = IPv4PacketAlloc(&frame, IP_PROTO_TCP, s->local_addr, + s->remote_addr, sb->length); + + if (index < 0) { + return; // retry later I guess + } + + MemCpy(frame, (sb(U8 *)) + sizeof(CTcpSendBufHeader), sb->length); + IPv4PacketFinish(index); + + sb->time_sent = tS; + + // Move to the end of the chain + s->send_buf_first = sb->next; + sb->next = NULL; + + if (s->send_buf_first) + s->send_buf_last->next = sb; + else + s->send_buf_first = sb; + + s->send_buf_last = sb; + } else + break; + } +} + +I64 TcpSocketAccept(CTcpSocket *s, sockaddr *addr, I64 addrlen) { + if (s->state != TCP_STATE_LISTEN) + return -1; + + while (1) { + // TODO: Thread safe? + if (s->backlog_first) { + CTcpSocket *new_socket = s->backlog_first; + // "Retr %p\n", new_socket; + + s->backlog_first = s->backlog_first->backlog_next; + if (!s->backlog_first) + s->backlog_last = NULL; + + s->backlog_remaining++; + + // TODO: this should be done in a way that doesn't block on accept() + I64 maxtime = cnts.jiffies + TCP_CONNECT_TIMEOUT * JIFFY_FREQ / 1000; + + while (cnts.jiffies < maxtime) { + if (new_socket->state == TCP_STATE_ESTABLISHED || + new_socket->state == TCP_STATE_CLOSED) + break; + else + Yield; + } + + if (new_socket->state != TCP_STATE_ESTABLISHED) { + close(new_socket); + return -1; + } + + return new_socket; + } else + Yield; + } + + no_warn addr; // FIXME + no_warn addrlen; + return -1; +} + +I64 TcpSocketBind(CTcpSocket *s, sockaddr *addr, I64 addrlen) { + if (addrlen < sizeof(sockaddr_in)) + return -1; + + if (s->state != TCP_STATE_CLOSED) + return -1; + + sockaddr_in *addr_in = addr; + + U16 local_port = ntohs(addr_in->sin_port); + + // TODO: address & stuff + if (tcp_bound_sockets[local_port] != NULL) + return -1; + + tcp_bound_sockets[local_port] = s; + + s->local_addr = IPv4GetAddress(); + s->local_port = local_port; + + return 0; +} + +I64 TcpSocketClose(CTcpSocket *s) { + /* https://tools.ietf.org/html/rfc793#section-3.5 + Case 1: Local user initiates the close + + In this case, a FIN segment can be constructed and placed on the + outgoing segment queue. No further SENDs from the user will be + accepted by the TCP, and it enters the FIN-WAIT-1 state. RECEIVEs + are allowed in this state. All segments preceding and including FIN + will be retransmitted until acknowledged. When the other TCP has + both acknowledged the FIN and sent a FIN of its own, the first TCP + can ACK this FIN. Note that a TCP receiving a FIN will ACK but not + send its own FIN until its user has CLOSED the connection also. + */ + + // Send FIN & wait for acknowledge + if (s->state == TCP_STATE_ESTABLISHED) { + while (TcpSend2(s, TCP_FLAG_FIN | TCP_FLAG_ACK) < 0) { + TcpSocketCheckSendBufs(s); + Yield; + } + + s->state = TCP_STATE_FIN_WAIT_1; + // "FIN-WAIT-1\n"; + + // Block until all outgoing data including our FIN have been acknowledged + // (una == nxt) + // + // TODO: what other states are permissible here? + // TODO: this can block for ever if our receive buffer fills up, but the + // other side + // insists on pushing more data before closing the connection + while ((s->state == TCP_STATE_FIN_WAIT_1) && s->snd_una != s->snd_nxt) { + TcpSocketCheckSendBufs(s); + Yield; + } + + if (s->state == TCP_STATE_FIN_WAIT_1) { + s->state = TCP_STATE_FIN_WAIT_2; + // "FIN-WAIT-2 (%d/%d)\n", s->snd_una, s->snd_nxt; + } + + // Now we should wait for the other side's FIN and acknowledge it + // TODO: time-out + while (s->state == TCP_STATE_FIN_WAIT_2) { + Yield; + } + } else if (s->state == TCP_STATE_CLOSE_WAIT) { + while (TcpSend2(s, TCP_FLAG_FIN | TCP_FLAG_ACK) < 0) { + TcpSocketCheckSendBufs(s); + Yield; + } + + if (s->state == TCP_STATE_CLOSE_WAIT) { + s->state = TCP_STATE_LAST_ACK; + // "LAST-ACK (%d/%d)\n", s->snd_una, s->snd_nxt; + } + + // Block until all outgoing data including our FIN have been acknowledged + // (una == nxt) + while (s->state == TCP_STATE_LAST_ACK && s->snd_una != s->snd_nxt) { + TcpSocketCheckSendBufs(s); + Yield; + } + } + + // Still connected? RST it! + if (TcpIsSynchronizedState(s->state)) { + TcpSend2(s, TCP_FLAG_RST); + } + + // Free backlog + CTcpSocket *backlog = s->backlog_first; + CTcpSocket *backlog2; + + while (backlog) { + backlog2 = backlog->backlog_next; + close(backlog); + backlog = backlog2; + } + + if (s->local_port) + if (!RemoveTcpSocketFromList(s)) + tcp_bound_sockets[s->local_port] = NULL; + + Free(s->recv_buf); + Free(s); + return 0; +} + +I64 TcpSocketConnect(CTcpSocket *s, sockaddr *addr, I64 addrlen) { + if (addrlen < sizeof(sockaddr_in)) + return -1; + + if (s->state != TCP_STATE_CLOSED) + return -1; + + sockaddr_in *addr_in = addr; + + U16 local_port = 0x8000 + (tcp_next_source_port & 0x7fff); + tcp_next_source_port++; + + // TODO: address & stuff + if (tcp_bound_sockets[local_port] != NULL) + return -1; + + tcp_bound_sockets[local_port] = s; + + s->local_addr = IPv4GetAddress(); + s->local_port = local_port; + s->remote_addr = ntohl(addr_in->sin_addr.s_addr); + s->remote_port = ntohs(addr_in->sin_port); + + s->snd_una = 0; + s->snd_nxt = 0; + s->snd_wnd = 0; + s->mss = TCP_DEFAULT_MSS; + + s->rcv_nxt = 0; + s->rcv_wnd = TCP_WINDOW_SIZE; + + s->conntime = tS; + + TcpSend2(s, TCP_FLAG_SYN); + s->state = TCP_STATE_SYN_SENT; + + // TODO: TcpSetTimeout + I64 maxtime = cnts.jiffies + TCP_CONNECT_TIMEOUT * JIFFY_FREQ / 1000; + + while (cnts.jiffies < maxtime) { + if (s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_CLOSED) + break; + else + Yield; + } + + if (s->state != TCP_STATE_ESTABLISHED) + return -1; + + return 0; +} + +I64 TcpSocketListen(CTcpSocket *s, I64 backlog) { + if (s->state != TCP_STATE_CLOSED) + return -1; + + // Enter listen state. If a SYN packet arrives, it will be processed by + // TcpHandler, which opens the connection and puts the new socket into the + // listening socket's accept backlog. + s->state = TCP_STATE_LISTEN; + s->backlog_remaining = backlog; + + return 0; +} + +I64 TcpSocketRecvfrom(CTcpSocket *s, U8 *buf, I64 len, I64 flags, + sockaddr *src_addr, I64 addrlen) { + no_warn flags; + no_warn src_addr; // FIXME + no_warn addrlen; + //"TcpSocketRecvfrom\n"; + // If we are ready to receive data, but there is none currently, block until + // we receive is some. + // TODO: checking for FIN-WAIT-1 here is not so useful, since it only exists + // while we are in Close() + while ( + (s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_FIN_WAIT_1) && + s->recv_buf_read_pos == s->recv_buf_write_pos) { + TcpSocketCheckSendBufs(s); + Yield; + } + + // TODO: this works for now, but we should be still able to receive data + // in connection-closing states + if (((s->state != TCP_STATE_ESTABLISHED || + s->state == TCP_STATE_FIN_WAIT_1) && + s->recv_buf_read_pos == s->recv_buf_write_pos) || + len == 0) + return 0; + + I64 read_pos = s->recv_buf_read_pos; + I64 write_pos = s->recv_buf_write_pos; + + // I64 avail = (write_pos - read_pos) & (s->recv_buf_size); + I64 read_total = 0; + I64 step; + + if (write_pos < read_pos) { + // We can read up to the end of the buffer + step = s->recv_buf_size - read_pos; + + if (step > len) + step = len; + + //"Read %d from %d..end\n", step, read_pos; + MemCpy(buf, s->recv_buf + read_pos, step); + buf += step; + len -= step; + read_pos = (read_pos + step) & (s->recv_buf_size - 1); + read_total += step; + + // at this point, (len == 0 || read_pos == 0) must be true + } + + if (len) { + step = write_pos - read_pos; + + if (step > len) + step = len; + + //"Read %d from start+%d..\n", step, read_pos; + MemCpy(buf, s->recv_buf + read_pos, step); + buf += step; + len -= step; + read_pos += step; + read_total += step; + } + + s->recv_buf_read_pos = read_pos; + return read_total; +} + +// This function blocks until at least some data is sent. +// Then it returns if the transmission window or outgoing buffers are full. +I64 TcpSocketSendto(CTcpSocket *s, U8 *buf, I64 len, I64 flags, + sockaddr_in *dest_addr, I64 addrlen) { + no_warn dest_addr; // TODO: should be validated instead, no? + no_warn addrlen; + no_warn flags; + + I64 sent_total = 0; + + while ( + (s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_CLOSE_WAIT) && + len) { + I64 can_send = (s->snd_una + s->snd_wnd - s->snd_nxt) & 0xffffffff; + + // TODO: Keep trying + // Must be tied to a timeout; see RFC793/Managing-the-Window + // if (s->snd_wnd == 0) + // can_send = 1; + + if (can_send == 0) { + if (sent_total > 0) + break; + else { + // Check unacknowledged outgoing packets, re-transmit as needed + TcpSocketCheckSendBufs(s); + Yield; + } + } else { + if (can_send > len) + can_send = len; + + if (can_send > s->mss) + can_send = s->mss; + + if (TcpSendData2(s, TCP_FLAG_ACK, buf, can_send) < 0) { + // No out-buffers available! Handle in the same way as full window: + // stall until some of the outdoing data is acknowledged. + if (sent_total > 0) + break; + else { + // Check unacknowledged outgoing packets, re-transmit as needed + TcpSocketCheckSendBufs(s); + Yield; + } + } else { + buf += can_send; + len -= can_send; + } + } + } + + return sent_total; +} + +I64 TcpSocketSetsockopt(CTcpSocket *s, I64 level, I64 optname, U8 *optval, + I64 optlen) { + /*if (level == SOL_SOCKET && optname == SO_RCVTIMEO_MS && optlen == 8) { + s->rcvtimeo_ms = *(optval(I64*)); + return 0; + }*/ + + no_warn s; + no_warn level; + no_warn optname; + no_warn optval; + no_warn optlen; + + return -1; +} + +CTcpSocket *TcpSocket(U16 domain, U16 type) { + if (domain != AF_INET || type != SOCK_STREAM) + return NULL; + + CTcpSocket *s = MAlloc(sizeof(CTcpSocket)); + s->sock.accept = &TcpSocketAccept; + s->sock.bind = &TcpSocketBind; + s->sock.close = &TcpSocketClose; + s->sock.connect = &TcpSocketConnect; + s->sock.listen = &TcpSocketListen; + s->sock.recvfrom = &TcpSocketRecvfrom; + s->sock.sendto = &TcpSocketSendto; + s->sock.setsockopt = &TcpSocketSetsockopt; + + s->state = TCP_STATE_CLOSED; + + s->send_buf_first = NULL; + s->send_buf_last = NULL; + + s->recv_buf_size = TCP_WINDOW_SIZE; + s->recv_buf = MAlloc(s->recv_buf_size); + s->recv_buf_read_pos = 0; + s->recv_buf_write_pos = 0; + + s->backlog_next = NULL; + s->backlog_first = NULL; + s->backlog_last = NULL; + s->backlog_remaining = 0; + + /*s->rcvtimeo_ms = 0; + s->recv_maxtime = 0; + + s->recv_buf = NULL; + s->recv_len = 0; + s->recv_addr.sin_family = AF_INET; + s->bound_to = 0;*/ + return s; +} + +U0 TcpSocketHandle(CTcpSocket *s, CIPv4Packet *packet, CTcpHeader *hdr, + U8 *data, I64 length) { + U32 seg_len = length; + + if (hdr->flags & TCP_FLAG_FIN) + seg_len++; + if (hdr->flags & TCP_FLAG_SYN) + seg_len++; + + U32 seg_seq = ntohl(hdr->seq); + + if (s->state == TCP_STATE_LISTEN) { + // A new connection is being opened. + + if ((hdr->flags & TCP_FLAG_SYN) && s->backlog_remaining > 0) { + //"SYN in from %08X:%d => %08X:%d.\n", packet->source_ip, + // ntohs(hdr->source_port), + // packet->dest_ip, ntohs(hdr->dest_port); + CTcpSocket *new_socket = TcpSocket(AF_INET, SOCK_STREAM); + + new_socket->local_addr = IPv4GetAddress(); + new_socket->local_port = s->local_port; + new_socket->remote_addr = packet->source_ip; + new_socket->remote_port = ntohs(hdr->source_port); + + new_socket->snd_una = 0; + new_socket->snd_nxt = 0; + new_socket->snd_wnd = 0; + new_socket->mss = TCP_DEFAULT_MSS; + + new_socket->rcv_nxt = ++seg_seq; + new_socket->rcv_wnd = TCP_WINDOW_SIZE; + + new_socket->conntime = tS; + + TcpSend2(new_socket, TCP_FLAG_SYN | TCP_FLAG_ACK); + new_socket->state = TCP_STATE_SYN_RECEIVED; + + AddTcpSocketToList(new_socket); + + if (s->backlog_last) + s->backlog_last->backlog_next = new_socket; + else + s->backlog_first = new_socket; + + s->backlog_last = new_socket; + s->backlog_remaining--; + } else { + //"REJ %08X:%d (as %08X:%d)\n", packet->source_ip, + // ntohs(hdr->source_port), + // packet->dest_ip, ntohs(hdr->dest_port); + TcpSend(packet->dest_ip, ntohs(hdr->dest_port), packet->source_ip, + ntohs(hdr->source_port), seg_seq + 1, seg_seq + 1, + TCP_FLAG_ACK | TCP_FLAG_RST); + } + + return; + } + + if (s->state == TCP_STATE_CLOSED) + return; + + Bool must_ack = FALSE; + + // Process SYN + if (hdr->flags & TCP_FLAG_SYN) { + s->rcv_nxt = ++seg_seq; + //"Reset ACK to %d\n", s->ack; + + must_ack = TRUE; + } + + // Validate SEQ + Bool valid_seq; + + if (seg_len == 0 && s->rcv_wnd == 0) { + valid_seq = (seg_seq == s->rcv_nxt); + } else { + // At least one of these must be true: + // RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + // RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND + I64 rel_seq = ((seg_seq - s->rcv_nxt) & 0xffffffff); + I64 rel_seq_end = ((seg_seq + seg_len - 1 - s->rcv_nxt) & 0xffffffff); + + if (rel_seq < s->rcv_wnd || rel_seq_end < s->rcv_wnd) + valid_seq = TRUE; + else + valid_seq = FALSE; + } + + if (!valid_seq) + "SEQ error: seg_seq %d, seg_len %d, rcv_nxt %d, rcv_wnd %d\n", seg_seq, + seg_len, s->rcv_nxt, s->rcv_wnd; + + // Process ACK + if (hdr->flags & TCP_FLAG_ACK) { + U32 seg_ack = ntohl(hdr->ack); + // ACK is acceptable iff SND.UNA < SEG.ACK =< SND.NXT + + I64 rel_ack = ((seg_ack - s->snd_una) & 0xffffffff); + I64 rel_nxt = ((s->snd_nxt - s->snd_una) & 0xffffffff); + + // RFC 793 is poorly worded in this regard, unacceptable ACK + // is not the opposite of an acceptible (= new) ACK! + // TODO: Instead of zero, we should compare rel_ack to some + // NEGATIVE_CONSTANT, so that we don't unnecessarily try to correct every + // slightly delayed ACK + if (/*0 < rel_ack &&*/ rel_ack <= rel_nxt) { + TcpSocketAckSendBufs(s, seg_ack); + + // Accept ACK + s->snd_una = seg_ack; + + if (s->state == TCP_STATE_SYN_SENT && (hdr->flags & TCP_FLAG_SYN)) { + s->state = TCP_STATE_ESTABLISHED; + s->srtt = tS - s->conntime; + //"Initial RTT: %f ms", s->srtt * 1000; + } else if (s->state == TCP_STATE_SYN_RECEIVED) { + //"Connection established.\n"; + s->state = TCP_STATE_ESTABLISHED; + s->srtt = tS - s->conntime; + //"Initial RTT: %f ms", s->srtt * 1000; + } + } else { + // Unacceptable ACK + "Bad ACK; state %d, seg_ack %d, snd_nxt %d\n", s->state, seg_ack, + s->snd_nxt; + + if (s->state == TCP_STATE_LISTEN || s->state == TCP_STATE_SYN_SENT || + s->state == TCP_STATE_SYN_RECEIVED) { + // Reset + TcpSend(packet->dest_ip, ntohs(hdr->dest_port), packet->source_ip, + ntohs(hdr->source_port), seg_ack, seg_seq + seg_len, + TCP_FLAG_ACK | TCP_FLAG_RST); + } else if (TcpIsSynchronizedState(s->state)) { + // Send a 'corrective' ACK + must_ack = TRUE; + } + } + } + + // Process RST + if (hdr->flags & TCP_FLAG_RST) { + if ((s->state == TCP_STATE_SYN_SENT)) { + // If acknowledged + if (s->snd_una == s->snd_nxt) { + "Connection refused\n"; + s->state = TCP_STATE_CLOSED; + return; + } + } else { + if (valid_seq) { + "Connection reset by peer\n"; + s->state = TCP_STATE_CLOSED; + return; + } + } + + "Spurious RST\n"; + } + + // FIXME check remote addr & port + + // Process data + if (valid_seq) { + s->snd_wnd = hdr->window_size; + + if (s->state == TCP_STATE_ESTABLISHED || s->state == TCP_STATE_FIN_WAIT_1) { + I64 write_pos = s->recv_buf_write_pos; + //"%d in @ %d", length, write_pos; + + // Skip retransmitted bytes + while (length && seg_seq != s->rcv_nxt) { + seg_seq = (seg_seq + 1) & 0xffffffff; + data++; + length--; + } + + // ugh! + I64 i = 0; + for (i = 0; i < length; i++) { + I64 next_pos = (write_pos + 1) & (s->recv_buf_size - 1); + + if (next_pos == s->recv_buf_read_pos) + break; + + s->recv_buf[write_pos] = data[i]; + write_pos = next_pos; + } + + s->recv_buf_write_pos = write_pos; + s->rcv_nxt += i; + //"; %d saved\n", i; + + if (i > 0) + must_ack = TRUE; + + if (hdr->flags & TCP_FLAG_FIN) { + must_ack = TRUE; + s->rcv_nxt++; + + if (s->state == TCP_STATE_ESTABLISHED) { + s->state = TCP_STATE_CLOSE_WAIT; + } else if (s->state == TCP_STATE_FIN_WAIT_1 || + s->state == TCP_STATE_FIN_WAIT_2) { + s->state = TCP_STATE_TIME_WAIT; + } + // else { ?? } + } + } + } + + if (must_ack) { + TcpSend2(s, TCP_FLAG_ACK); + } +} + +I64 TcpHandler(CIPv4Packet *packet) { + CTcpHeader *hdr; + U8 *data; + I64 length; + + I64 error = TcpParsePacket(&hdr, &data, &length, packet); + + if (error < 0) + return error; + + U16 dest_port = ntohs(hdr->dest_port); + //"%u => %p\n", dest_port, tcp_bound_sockets[dest_port]; + + CTcpSocket *s = GetTcpSocketFromList(packet, hdr); + if (!s) + s = tcp_bound_sockets[dest_port]; + + // FIXME: should also check that bound address is INADDR_ANY, + // OR packet dest IP matches bound address + if (s != NULL) { + TcpSocketHandle(s, packet, hdr, data, length); + } else { + // TODO: Send RST as per RFC793/Reset-Generation + } + + return error; +} + +U0 TcpInit() { + I64 i; + tcp_bound_sockets = MAlloc(65536 * sizeof(CTcpSocket *)); + MemSet(tcp_bound_sockets, 0, 65536 * sizeof(CTcpSocket *)); + tcp_socket_list = MAlloc(65536 * sizeof(CTcpSocketListItem *)); + for (i = 0; i < 65536; i++) { + tcp_socket_list[i] = CAlloc(sizeof(CTcpSocketListItem)); + } +} + +TcpInit; +RegisterL4Protocol(IP_PROTO_TCP, &TcpHandler); +RegisterSocketClass(AF_INET, SOCK_STREAM, &TcpSocket); diff --git a/Net/Udp.HC b/Net/Udp.HC new file mode 100644 index 0000000..74e19f8 --- /dev/null +++ b/Net/Udp.HC @@ -0,0 +1,247 @@ +class CUdpHeader { + U16 source_port; + U16 dest_port; + U16 length; + U16 checksum; +}; + +class CUdpSocket { + CSocket sock; + + I64 rcvtimeo_ms; + I64 recv_maxtime; + + U8 *recv_buf; + I64 recv_len; + + sockaddr_in recv_addr; + U16 bound_to; +}; + +// TODO: this takes up half a meg, change it to a binary tree or something +static CUdpSocket **udp_bound_sockets; + +I64 UdpPacketAlloc(U8 **frame_out, U32 source_ip, U16 source_port, U32 dest_ip, + U16 dest_port, I64 length) { + U8 *frame; + I64 index = IPv4PacketAlloc(&frame, IP_PROTO_UDP, source_ip, dest_ip, + sizeof(CUdpHeader) + length); + + if (index < 0) + return index; + + CUdpHeader *hdr = frame; + hdr->source_port = htons(source_port); + hdr->dest_port = htons(dest_port); + hdr->length = htons(sizeof(CUdpHeader) + length); + hdr->checksum = 0; + + *frame_out = frame + sizeof(CUdpHeader); + return index; +} + +I64 UdpPacketFinish(I64 index) { return IPv4PacketFinish(index); } + +I64 UdpParsePacket(U16 *source_port_out, U16 *dest_port_out, U8 **data_out, + I64 *length_out, CIPv4Packet *packet) { + if (packet->proto != IP_PROTO_UDP) + return -1; + + CUdpHeader *hdr = packet->data; + //"UDP: from %d, to %d, len %d, chksum %d\n", + // ntohs(hdr->source_port), ntohs(hdr->dest_port), ntohs(hdr->length), + // ntohs(hdr->checksum); + + // FIXME: validate packet->length + + *source_port_out = ntohs(hdr->source_port); + *dest_port_out = ntohs(hdr->dest_port); + // ntohs(hdr->length) + // ntohs(hdr->checksum) + + *data_out = packet->data + sizeof(CUdpHeader); + *length_out = packet->length - sizeof(CUdpHeader); + + return 0; +} + +I64 UdpSocketAccept(CUdpSocket *s, sockaddr *addr, I64 addrlen) { + no_warn s; + no_warn addr; + no_warn addrlen; + return -1; +} + +I64 UdpSocketBind(CUdpSocket *s, sockaddr *addr, I64 addrlen) { + if (addrlen < sizeof(sockaddr_in)) + return -1; + + if (s->bound_to) + return -1; + + sockaddr_in *addr_in = addr; + U16 port = ntohs(addr_in->sin_port); + + // TODO: address & stuff + if (udp_bound_sockets[port] != NULL) + return -1; + + udp_bound_sockets[port] = s; + s->bound_to = port; + return 0; +} + +I64 UdpSocketClose(CUdpSocket *s) { + if (s->bound_to) + udp_bound_sockets[s->bound_to] = NULL; + + Free(s); + return 0; +} + +I64 UdpSocketConnect(CUdpSocket *s, sockaddr *addr, I64 addrlen) { + // FIXME: implement + no_warn s; + no_warn addr; + no_warn addrlen; + return -1; +} + +I64 UdpSocketListen(CUdpSocket *s, I64 backlog) { + no_warn s; + no_warn backlog; + return -1; +} + +I64 UdpSocketRecvfrom(CUdpSocket *s, U8 *buf, I64 len, I64 flags, + sockaddr *src_addr, I64 addrlen) { + no_warn flags; + + s->recv_buf = buf; + s->recv_len = len; + + if (s->rcvtimeo_ms != 0) + s->recv_maxtime = cnts.jiffies + s->rcvtimeo_ms * JIFFY_FREQ / 1000; + + while (s->recv_buf != NULL) { + // Check for timeout + if (s->rcvtimeo_ms != 0 && cnts.jiffies > s->recv_maxtime) { + // TODO: seterror(EWOULDBLOCK) + s->recv_len = -1; + break; + } + + Yield; + } + + // TODO: addrlen + if (src_addr) { + // wtf? can't copy structs with '='? + MemCpy((src_addr(sockaddr_in *)), &s->recv_addr, addrlen); + } + + return s->recv_len; +} + +I64 UdpSocketSendto(CSocket *s, U8 *buf, I64 len, I64 flags, + sockaddr_in *dest_addr, I64 addrlen) { + no_warn s; + no_warn flags; + + if (addrlen < sizeof(sockaddr_in)) + return -1; + + U8 *frame; + + I64 index = UdpPacketAlloc(&frame, IPv4GetAddress(), 0, + ntohl(dest_addr->sin_addr.s_addr), + ntohs(dest_addr->sin_port), len); + + if (index < 0) + return -1; + + MemCpy(frame, buf, len); + return UdpPacketFinish(index); +} + +I64 UdpSocketSetsockopt(CUdpSocket *s, I64 level, I64 optname, U8 *optval, + I64 optlen) { + if (level == SOL_SOCKET && optname == SO_RCVTIMEO_MS && optlen == 8) { + s->rcvtimeo_ms = *(optval(I64 *)); + return 0; + } + + return -1; +} + +CUdpSocket *UdpSocket(U16 domain, U16 type) { + if (domain != AF_INET || type != SOCK_DGRAM) + return NULL; + + CUdpSocket *s = MAlloc(sizeof(CUdpSocket)); + s->sock.accept = &UdpSocketAccept; + s->sock.bind = &UdpSocketBind; + s->sock.close = &UdpSocketClose; + s->sock.connect = &UdpSocketConnect; + s->sock.listen = &UdpSocketListen; + s->sock.recvfrom = &UdpSocketRecvfrom; + s->sock.sendto = &UdpSocketSendto; + s->sock.setsockopt = &UdpSocketSetsockopt; + + s->rcvtimeo_ms = 0; + s->recv_maxtime = 0; + + s->recv_buf = NULL; + s->recv_len = 0; + s->recv_addr.sin_family = AF_INET; + s->bound_to = 0; + return s; +} + +I64 UdpHandler(CIPv4Packet *packet) { + U16 source_port; + U16 dest_port; + U8 *data; + I64 length; + + I64 error = UdpParsePacket(&source_port, &dest_port, &data, &length, packet); + + if (error < 0) + return error; + + //"%u => %p\n", dest_port, udp_bound_sockets[dest_port]; + + CUdpSocket *s = udp_bound_sockets[dest_port]; + + // FIXME: should also check that bound address is INADDR_ANY, + // OR packet dest IP matches bound address + if (s != NULL) { + if (s->recv_buf) { + I64 num_recv = s->recv_len; + + if (num_recv > length) + num_recv = length; + + MemCpy(s->recv_buf, data, num_recv); + + // signal that we received something + s->recv_buf = NULL; + s->recv_len = num_recv; + + // TODO: we keep converting n>h>n, fuck that + s->recv_addr.sin_port = htons(source_port); + s->recv_addr.sin_addr.s_addr = htonl(packet->source_ip); + } + } + + return error; +} + +U0 UdpInit() { + udp_bound_sockets = MAlloc(65536 * sizeof(CUdpSocket *)); + MemSet(udp_bound_sockets, 0, 65536 * sizeof(CUdpSocket *)); +} + +UdpInit; +RegisterL4Protocol(IP_PROTO_UDP, &UdpHandler); +RegisterSocketClass(AF_INET, SOCK_DGRAM, &UdpSocket); diff --git a/Net/Virtio-net.HC b/Net/Virtio-net.HC new file mode 100644 index 0000000..31066c8 --- /dev/null +++ b/Net/Virtio-net.HC @@ -0,0 +1,185 @@ +/* NOTE: This driver uses SnailNet naming conventions for required Ethernet + function calls. */ + +extern U16 htons(U16 h); +extern I64 NetFifoPushCopy(U8 *data, I64 length); +#define ETHERNET_FRAME_SIZE 1548 +U8 *SNAILNET_NATIVE_DRIVER = "Virtio-net"; + +// Current Rx/Tx buffer +I64 rx_buffer_ptr = 0; +I64 tx_buffer_ptr = 0; + +I64 rx_buffer_count = 256; +I64 tx_buffer_count = 256; + +U64 rx_buffers = MAlloc(ETHERNET_FRAME_SIZE * 256); +U64 tx_buffers = MAlloc(ETHERNET_FRAME_SIZE * 256); + +class @virtio_net { + U16 port; + U8 mac[6]; + @virtio_queue *rq; + @virtio_queue *sq; + I64 rq_size; + I64 rq_index; + I64 sq_size; + I64 sq_index; + I64 rx_packets; + I64 rx_bytes; + I64 tx_packets; + I64 tx_bytes; +}; + +class @virtio_net_header { + U8 flags; + U8 gso_type; + U16 header_length; + U16 gso_size; + U16 checksum_start; + U16 checksum_offset; +}; + +@virtio_net VirtioNet; +MemSet(&VirtioNet, 0, sizeof(@virtio_net)); + +@virtio_net_header *def_pkt_hdr = CAlloc(sizeof(@virtio_net_header)); + +static I64 @virtio_net_alloc_tx_packet(U8 **buffer_out, I64 length, I64 flags) { + // FIXME: validate length + flags = flags; + I64 sq_idx = VirtioNet.sq->available.index % 256; + I64 sq_idx2 = sq_idx % 128; + I64 index = tx_buffer_ptr; + tx_buffer_ptr = (tx_buffer_ptr + 1) & (tx_buffer_count - 1); + *buffer_out = tx_buffers + index * ETHERNET_FRAME_SIZE; + + VirtioNet.sq->buffers[sq_idx2 * 2].address = def_pkt_hdr; + VirtioNet.sq->buffers[sq_idx2 * 2].length = sizeof(@virtio_net_header); + VirtioNet.sq->buffers[sq_idx2 * 2].flags = VRING_DESC_F_NEXT; + VirtioNet.sq->buffers[sq_idx2 * 2].next = (sq_idx2 * 2) + 1; + VirtioNet.sq->buffers[(sq_idx2 * 2) + 1].address = *buffer_out; + VirtioNet.sq->buffers[(sq_idx2 * 2) + 1].length = length; + VirtioNet.sq->buffers[(sq_idx2 * 2) + 1].flags = NULL; + VirtioNet.sq->buffers[(sq_idx2 * 2) + 1].next = 0; + VirtioNet.sq->available.ring[sq_idx] = sq_idx2 * 2; + + VirtioNet.sq->available.index++; + + VirtioNet.tx_packets++; + VirtioNet.tx_bytes += length; + + return index; +} + +static I64 @virtio_net_finish_tx_packet(I64) { + OutU16(VirtioNet.port + VIRTIO_PCI_QUEUE_NOTIFY, 1); + return 0; +} + +U8 *loopback_frame = MAlloc(ETHERNET_FRAME_SIZE); +I64 loopback_length = 0; + +I64 EthernetFrameAlloc(U8 **buffer_out, U8 *src_addr, U8 *dst_addr, + U16 ethertype, I64 length, I64 flags) { + + U8 *frame; + + // APAD_XMT doesn't seem to work in VirtualBox, so we have to pad the frame + // ourselves + if (length < 46) + length = 46; + + I64 index; + + if (!MemCmp(dst_addr, &VirtioNet.mac, 6)) { + frame = loopback_frame; + loopback_length = length; + index = I64_MAX; + } else { + index = @virtio_net_alloc_tx_packet(&frame, 14 + length, flags); + if (index < 0) + return index; + } + + MemCpy(frame + 0, dst_addr, 6); + MemCpy(frame + 6, src_addr, 6); + frame[12] = (ethertype >> 8); + frame[13] = (ethertype & 0xff); + + *buffer_out = frame + 14; + return index; +} + +I64 EthernetFrameFinish(I64 index) { + if (index == I64_MAX && loopback_frame && loopback_length) { + NetFifoPushCopy(loopback_frame, loopback_length); + loopback_length = 0; + return 0; + } + return @virtio_net_finish_tx_packet(index); +} + +U8 *EthernetGetAddress() { return &VirtioNet.mac; } + +I64 @virtio_net_init() { + I64 i, j; + + // Scan for device + j = PCIClassFind(0x020000, 0); + if (j < 0) { + "\nVirtio-net device not found.\n"; + return -1; + } + VirtioNet.port = PCIReadU32(j.u8[2], j.u8[1], j.u8[0], 0x10) & 0xFFFFFFFC; + for (i = 0; i < 6; i++) { + VirtioNet.mac[i] = InU8(VirtioNet.port + VIRTIO_PCI_CONFIG + i); + } + + // Reset Device + OutU8(VirtioNet.port + VIRTIO_PCI_STATUS, 0); + + // Found Driver + OutU8(VirtioNet.port + VIRTIO_PCI_STATUS, + InU8(VirtioNet.port + VIRTIO_PCI_STATUS) | VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER); + + // Set up receive queue + OutU16(VirtioNet.port + VIRTIO_PCI_QUEUE_SEL, 0); + VirtioNet.rq_size = InU16(VirtioNet.port + VIRTIO_PCI_QUEUE_SIZE); // 256 + VirtioNet.rq = CAllocAligned(sizeof(@virtio_queue), 4096, Fs->code_heap); + OutU32(VirtioNet.port + VIRTIO_PCI_QUEUE_PFN, VirtioNet.rq / 4096); + + // Set up send queue + OutU16(VirtioNet.port + VIRTIO_PCI_QUEUE_SEL, 1); + VirtioNet.sq_size = InU16(VirtioNet.port + VIRTIO_PCI_QUEUE_SIZE); // 256 + VirtioNet.sq = CAllocAligned(sizeof(@virtio_queue), 4096, Fs->code_heap); + OutU32(VirtioNet.port + VIRTIO_PCI_QUEUE_PFN, VirtioNet.sq / 4096); + + for (i = 0; i < 128; i++) { + VirtioNet.rq->buffers[i * 2].address = CAlloc(sizeof(@virtio_net_header)); + VirtioNet.rq->buffers[i * 2].length = sizeof(@virtio_net_header); + VirtioNet.rq->buffers[i * 2].flags = VRING_DESC_F_NEXT | VRING_DESC_F_WRITE; + VirtioNet.rq->buffers[i * 2].next = (i * 2) + 1; + VirtioNet.rq->buffers[(i * 2) + 1].address = CAlloc(ETHERNET_FRAME_SIZE); + VirtioNet.rq->buffers[(i * 2) + 1].length = ETHERNET_FRAME_SIZE; + VirtioNet.rq->buffers[(i * 2) + 1].flags = VRING_DESC_F_WRITE; + VirtioNet.rq->buffers[(i * 2) + 1].next = 0; + VirtioNet.rq->available.ring[i] = i * 2; + VirtioNet.rq->available.ring[i + 128] = i * 2; + } + VirtioNet.rq->available.index = 1; + + // Init OK + OutU8(VirtioNet.port + VIRTIO_PCI_STATUS, + InU8(VirtioNet.port + VIRTIO_PCI_STATUS) | VIRTIO_CONFIG_S_DRIVER_OK); + OutU16(VirtioNet.port + VIRTIO_PCI_QUEUE_NOTIFY, 0); + "[Virtio-net] device detected, MAC address " + "%02x:%02x:%02x:%02x:%02x:%02x\n", + VirtioNet.mac[0], VirtioNet.mac[1], VirtioNet.mac[2], VirtioNet.mac[3], + VirtioNet.mac[4], VirtioNet.mac[5]; +} + +@virtio_net_init; + +"[OK] virtio-net \n"; \ No newline at end of file diff --git a/Net/Virtio.HC b/Net/Virtio.HC new file mode 100644 index 0000000..862bd3a --- /dev/null +++ b/Net/Virtio.HC @@ -0,0 +1,77 @@ +// +// PCI virtio I/O registers. +// + +#define VIRTIO_PCI_HOST_FEATURES 0 // Features supported by the host +#define VIRTIO_PCI_GUEST_FEATURES 4 // Features activated by the guest +#define VIRTIO_PCI_QUEUE_PFN 8 // PFN for the currently selected queue +#define VIRTIO_PCI_QUEUE_SIZE 12 // Queue size for the currently selected queue +#define VIRTIO_PCI_QUEUE_SEL 14 // Queue selector +#define VIRTIO_PCI_QUEUE_NOTIFY 16 // Queue notifier +#define VIRTIO_PCI_STATUS 18 // Device status register +#define VIRTIO_PCI_ISR 19 // Interrupt status register +#define VIRTIO_PCI_CONFIG 20 // Configuration data block + +// +// PCI virtio status register bits +// + +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +#define VIRTIO_CONFIG_S_DRIVER 2 +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +#define VIRTIO_CONFIG_S_FAILED 0x80 + +// +// Ring descriptor flags +// + +#define VRING_DESC_F_NEXT 1 // Buffer continues via the next field +#define VRING_DESC_F_WRITE 2 // Buffer is write-only (otherwise read-only) +#define VRING_DESC_F_INDIRECT 4 // Buffer contains a list of buffer descriptors + +class @virtio_queue_buf { + U64 address; + U32 length; + U16 flags; + U16 next; +}; +class @virtio_avail { + U16 flags; + U16 index; + U16 ring[256]; + U16 int_index; +}; +class @virtio_used_item { + U32 index; + U32 length; +}; +class @virtio_used { + U16 flags; + U16 index; + @virtio_used_item ring[256]; + U16 int_index; +}; +class @virtio_queue { + @virtio_queue_buf buffers[256]; + @virtio_avail available; + U8 padding[3578]; + @virtio_used used; +}; + +class @virtio_avail_buf { + U32 index; + U64 address; + U32 length; +}; + +class @virtio_buf_info { + U8 *buffer; + U64 size; + U8 flags; + + // If the user wants to keep same buffer as passed in this struct, use "true". + // otherwise, the supplied buffer will be copied in the queues' buffer + Bool copy; +}; + +"[OK] virtio \n"; \ No newline at end of file