I'm trying to build an http server using c++. and so among the conditions based in which i decide how to extract the body entity, is if there's a content length present? , here's a minimal code on how i extract body using Content-Length :
req_t *Webserver::_recv(int client_fd, bool *closed)
{
string req;
static string rest;
// string extracted_req;
char buff[1024];
// while (true) {
// std::cout << "client_fd: " << client_fd << std::endl;
int n = recv(client_fd, buff, 1024, 0);
// std::cout << "n: " << n << std::endl;
if (n == -1)
{
_set_error_code("500", "Internal Server Error");
return NULL;
}
if (n == 0)
{
*closed = true;
return NULL;
}
buff[n] = '\0';
req += buff;
req_t *extracted_req = _extract_req(client_fd, req, rest, closed);
return extracted_req;
}
...
else if (headers.find("Content-Length") != string::npos) {
string body = extract_body_len(client_fd, rest_of_req, content_length);
}
req_t is a simple struct that contains three strings status_line, headers, body.
req_t *Webserver::_extract_req(int client_fd, const string &req, string &rest, bool *closed)
{
req_t *ret;
try
{
ret = new req_t;
}
catch (std::bad_alloc &e)
{
std::cerr << "\033[1;31mError:\033[0m " << e.what() << std::endl;
exit(1);
}
string status_line = req.substr(0, req.find("\r\n"));
string headers = req.substr(req.find("\r\n") + 2, req.find("\r\n\r\n") - req.find("\r\n") - 2);
rest = req.substr(req.find("\r\n\r\n") + 4, req.size() - req.find("\r\n\r\n") - 4);
ret->status_line = status_line;
ret->headers = headers;
// if method is get request body is empty
// if the header contains a content-length, extract number of buytes for body;
if (headers.find("Content-Length") != string::npos)
{
long long content_length = _get_content_len(headers);
if (content_length == -1)
{
_set_error_code("400", "Bad Request");
return NULL;
}
// substracting the length of the body from the length of the request
ret->body = _extract_body_len(client_fd, rest, content_length, closed);
// if body is not complete, return an error
...
string extract_body_len(int client_fd, string& rest, unsigned long long len) {
string body;
unsigned long long total = 0;
body = rest;
// starting total with first bytes of body
total += rest.size();
// if we have it all that's it
if (total >= len) {
body = rest.substr(0, len);
rest = rest.substr(len);
return body;
}
else
{
while (total < len)
{
char buf[1024];
int ret = recv(client_fd, buf, 1024, 0);
// after a lot of debugging , i've noticed that recv starts to read less than 1024 only when total is closer to len, so i added this condition naively.
if (ret != 1024)
{
if ((total + ret) >= len)
{
body += string(buf).substr(0, len - total);
rest = string(buf).substr(len - total);
break;
}
}
if (ret == 0)
{
if (total == len)
{
rest = "";
break;
}
// client closed connection and it's still incomplete: 400
else
{
res->status_code = "400";
res->status_message = "Bad Request";
return NULL;
}
}
else if (ret == -1)
{
res->status_code = "500";
res->status_message = "Internal Server Error";
return body;
}
total += ret;
body += string(buf, ret);
}
}
return body;
}
Now, The problem is i've tested requests with varying sized body entities(8MB, 1.9MB, 31 MB) and all the time i never receive the whole body (as per content-length), the pattern is like the following:
recvkeeps reading all 1024 bytes untiltotalgets closer tolenthen it starts reading smaller numbers. until the difference betweentotalandlenis around 400...600 bytes thenrecvblocks at some point (there's nothing more to read) beforetotal==len.
That really confused me, i tried with different api clients (postman, insonomia) but the same results, i doubted maybe Content-Length isn't that accurate but it obviously should be, what do you think is the problem , why am i receiving or reading less than Content-Length ?