HTTP/1.0 Web Server

I wrote this program around April, 2006 for inet4021 - Network Programming. The requirement was to create a web server adhering to RFC1945 (HTTP/1.0 specification) and achieving concurrency. I finished early, so I decided to add thread-safe logging (which, interestingly, led me to find out that my web server, running on my home connection on port 5555, got crawled by msnbot within a few hours of going online) and a chrooted environment (when spanwed as root, and then also suid’s to ‘nobody’).

The full project, including a README, makefile, and sample public_html directory is available here, including a bzipped tarball of all files. The source code is also provided below (reformatted from the tar’d copy for better 80-column compliance).

/* David R. Hedges
 * 2836226
 * hedg0029
 * 2006-04-18
 * inet4021
 */

#define _REENTRANT
#include <errno.h>
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <netdb.h>
 
#define QUEUE_LENGTH 50	  /* using a static-sized array for the 'queue'
                          // instead of a linked list. this is the size
                          // of that array (number of simultaneous
                          // pending requests)
                          */
 
#define NUM_THREADS 10    // defines how many worker threads to spawn
 
 
 
static int reqBuf[QUEUE_LENGTH];        /* the 'queue' to hold sockfds of
                                           accepted clients
                                        */
static char addrBuf[QUEUE_LENGTH][16];  // put the IP of the client in here
static int bufHead = 0;                 /* the 'front' of the queue (used in
                                            reqBuf array)
                                        */
static int bufTail = 0;                 /* the 'back' of the queue (new
                                           entries get added in bufTail++
                                        */
static int log_fd;                      // file descriptor of the logfile
static int threadCounter = 0;           /* used for threads to be able to
                                           identify themselves during
                                           instantiation
                                        */
int debug = 0;                          /* variable used for determining
                                           whether or not to display debugging
                                           messages, set to 1 with -v flag
                                        */
int sockfd;                             /* globally visible sockfd for use in
                                           passing a socket fd around
                                        */
int done = 0;                           /* used for threads to figure out if
                                           execution is done
                                        */
 
char *errorpage = "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"
                  "<html><head>\n<title>404 Not Found</title>\n</head><body>\n"
                  "<h1>Not Found</h1>\n"
                  "<p>The requested URL was not found on this server.</p>\n"
                  "<hr>\n<address>p14serv/0.1</address>\n</body></html>";
char *serverName = "p14serv/0.1";
char *errorpage400 = "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"
                     "<html><head>\n"
                     "<title>400 Bad Request</title>\n"
                     "</head><body>\n<h1>Bad Request</h1>\n"
                     "<p>The method you used to request this page is not "
                     "supported by this server. Please restrict your reqeuests "
                     "to GET or HEAD request-methods.</p>\n<hr>\n"
                     "<address>p14serv/0.1</address>\n</body></html>";
 
static pthread_mutex_t bufAccess = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t logAccess = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t doneAccess = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t bufAvail = PTHREAD_COND_INITIALIZER;
 
//prototypes:
int checkVerbose(int, char**);
int checkPort(int, char**);
int checkPath(int, char**);
void *readerThread (void *arg);
void *workerThread (void *arg);
 
int main(int argc, char **argv) {
  int i;
  int ret_val;
  int port_number;
  char *errmsg;
  pthread_t reader;
  pthread_t workers[NUM_THREADS];
  struct sockaddr_in serv_addr;
  char writeMsg[1024];

  //create a new log file every time (open it before we chdir)
  //if ((log_fd = creat("httpserv_log", 0666)) < 0) {
  if ((log_fd = open("httpserv_log", O_WRONLY|O_CREAT|O_APPEND, 0666)) < 0) {
    errmsg = strerror(errno);
    printf("(%s:%d): Unable to create httpserv_log: %s.\n",
            __FILE__, __LINE__, errmsg);
    //return -1;
  }
 
  if(argc == 1) {
    //no other command-line optios supplied, so set defaults
    port_number = 80;
  }
  else if(argc > 1) {
    //don't know whether flag is -v, port, or path
    if (strlen(argv[1]) >= 6 && strncmp(argv[1], "--help", 6) == 0) {
      printf("Usage is: ./httpserv [-v] [port] [path]\n");
      return 0;
    }
    debug = checkVerbose(argc, argv);
    port_number = checkPort(argc, argv);
    ret_val = checkPath(argc, argv);
    if (ret_val == -1) {
      printf("Usage is: ./httpserv [-v] [port] [path]\n");
      return -1;
    }
    if (debug == 0 && port_number == 0 && ret_val == 0) {
      printf("(%s:%d): Argument '%s' not understood.\n"
              "Usage is: ./httpserv [-v] [port] [path]\n",
              __FILE__, __LINE__, argv[1]);
      return 0;
    }

    if (port_number > 65535) {
      printf("Port number must be in the range [1-65535].\n");
      return -1;
    }

    if (port_number == 0) {
      port_number = 80;
    }
  }

  //set up socket to listen; will be used by reader thread
  sockfd = socket(AF_INET, SOCK_STREAM, 0);
  if (sockfd < 0) {
    errmsg = strerror(errno);
    printf("(%s:%d): Unable to create initial socket: %s.\n",
            __FILE__, __LINE__, errmsg);
    return -1;
  }

  if (debug) {
    printf("socket() call completed\n");
  }

  memset((char *) &serv_addr, '0', sizeof(serv_addr));
  serv_addr.sin_family = AF_INET;
  serv_addr.sin_addr.s_addr = INADDR_ANY;
  serv_addr.sin_port = htons(port_number);
  if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) {
    errmsg = strerror(errno);
    printf("(%s:%d): Unable to bind socket: %s.\n",
            __FILE__, __LINE__, errmsg);
    return -1;
  }
  if (listen(sockfd,10) < 0) {
    errmsg = strerror(errno);
    printf("(%s:%d): Unable to listen() on socket: %s.\n",
            __FILE__, __LINE__, errmsg);
    return -1;
  }

  //if running as root, change to nobody now that we've created the socket and chrooted
  if (debug) {
    printf("(%s:%d#m): getuid() was %d, ", __FILE__, __LINE__, getuid());
  }
  if (getuid() == 0) setuid(65534);
  if (debug) {
    printf("now it's %d.\n", getuid());
  }

  //display our info:
  char hostname[128];
  gethostname(hostname, 128);
  if (port_number == 80) {
    printf("\nWeb server running at:\thttp://%s/\n\n"
            "Press ctrl-c to terminate.\n", hostname);
  }
  else {
    printf("\nWeb server running at:\thttp://%s:%d/\n\n"
            "Press ctrl-c to terminate.\n", hostname, port_number);
  }

  //log startup
  time_t curtime = time(NULL);
  char timestamp[25];
  strftime(timestamp, 25, "%Y-%m-%d %H:%M:%S%z", localtime(&curtime));

  snprintf(writeMsg, 1024, "%s - %s started, listening on %s:%d\n",
           timestamp, serverName, hostname, port_number);

  //no other threads, so can log without risk of interruption:
  write(log_fd, writeMsg, strlen(writeMsg));

  //spawn reader/listener thread
  if (pthread_create(&reader,NULL,readerThread,NULL) != 0) {
    errmsg = strerror(errno);
    printf("(%s:%d): Unable to spawn readerThread: %s.\n",
            __FILE__, __LINE__, errmsg);
    return -1;
  }

  for (i=0; i<NUM_THREADS; i++) {
    if(pthread_create(&workers[i],NULL,workerThread,NULL) != 0) {
      errmsg = strerror_r(errno, writeMsg, 1024);
      printf("(%s:%d#%d): Error creating workerThread(%d): %s.\n",
              __FILE__, __LINE__, i, i, errmsg);
    }
    else if(debug) {
      printf("(%s:%d#m): Created workerThread(%d).\n", __FILE__, __LINE__, i);
    }
  }

  //la de da, threads do things, now let's join them

  //join the reader thread
  if (pthread_join(reader,NULL) != 0) {
    errmsg = strerror_r(errno, writeMsg, 1024);
    printf("(%s:%d): Error re-joining reader/listener thread: %s.\n",
            __FILE__, __LINE__, errmsg);
    return -1;
  }

  for (i=0; i<NUM_THREADS; i++) {
    if(pthread_join(workers[i],NULL) != 0) {
      errmsg = strerror_r(errno, writeMsg, 1024);
      printf("(%s:%d): Error re-joining worker thread %d: %s.\n",
              __FILE__, __LINE__, i, errmsg);
      return -1;
    }
  }

  if (close(log_fd) != 0) {
    perror("Error closing log file");
    return -1;
  }
 
  return 0;
}
 
// usage is: ./httpserv [-v] [port] [directory]
 
int checkVerbose(int argc, char **argv) {
    int i;
    for(i=1; i<argc; i++) {
      if(strlen(argv[i]) >= 2 && strncmp(argv[i], "-v", 2) == 0) {
        return 1;
      }
    }
    return 0;
}
 
int checkPort(int argc, char **argv) {
    int i, p;
    for(i=1; i<argc; i++) {
      p = atoi(argv[i]);
      if (p>0) {
        return p;
      }
    }
    return 0;
}
 
int checkPath(int argc, char **argv) {
  int i, ret_val;
  for(i=1; i<argc; i++) {
    #ifdef DEBUG
      printf("atoi(%s) = %d, strlen(%s) = %d, strncmp(%s, \"-v\", 2) = %d)\n",
              argv[i], atoi(argv[i]), argv[i], strlen(argv[i]),
              argv[i], strncmp(argv[i], "-v", 2));
    #endif
    if((!atoi(argv[i])) && (strlen(argv[i]) >= 2) && strncmp(argv[i], "-v", 2) != 0) {
      //path declaration found, now check if it exists
      ret_val = chroot(argv[i]);
      #ifdef DEBUG
        printf("(%s:%d): chroot(%s) returns %d.\n",
                __FILE__, __LINE__, argv[i], ret_val);
      #endif
      if(ret_val < 0) {
        perror("Sad chrooting");
      }
      if((ret_val = chdir(argv[i])) == 0) {
        #ifdef DEBUG
          printf("ret_val is %d\n", ret_val);
          printf("getuid = %d\n", getuid());
        #endif
        return 1;
      }
      else {
        //path not found
        char *errmsg = strerror(errno);
        printf("(%s:%d): Specificed path '%s' not found: %s.\n",
                __FILE__, __LINE__, argv[i], errmsg);
        return -1;
      }
    }
  }
  return 0;
}
 
 
void *readerThread (void *arg) {
  struct sockaddr_in cli_addr;
  int tempsockfd;
  char *errmsg;
  char writeMsg[1024];
  unsigned int clilen = sizeof(cli_addr);

  while (1) {
    tempsockfd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen);
    if (tempsockfd < 0) {
      errmsg = strerror_r(errno, writeMsg, 1024);
      snprintf(writeMsg, 1024,
                "[reader]: Error accepting connection '%s'\n", errmsg);
      pthread_mutex_lock(&logAccess);
      write(log_fd, writeMsg, strlen(writeMsg));
      pthread_mutex_unlock(&logAccess);
    }
    else {
      if (debug) {
        printf("(%s:%d#r): Accepted connection %d from %s:%d\n",
                __FILE__, __LINE__, tempsockfd,
                inet_ntoa(cli_addr.sin_addr), ntohs(cli_addr.sin_port));
        #ifdef DEBUG
          struct hostent *george;
          george = gethostbyaddr((void *) &cli_addr.sin_addr, clilen, AF_INET);
          if(george->h_name != NULL) {
            printf("hostname is '%s'\n", (char *) george->h_name);
          }
          else {
            printf("h_addr is null?\n");
          }
        #endif
      }
      //put the temp sockfd into the shared buffer for a worker thread to pick up
      pthread_mutex_lock(&bufAccess);
      if (bufTail == QUEUE_LENGTH) bufTail = 0;		//wrap around to beginning
      strncpy(addrBuf[bufTail], inet_ntoa(cli_addr.sin_addr), 16);
      reqBuf[bufTail++] = tempsockfd;
      pthread_mutex_unlock(&bufAccess);
      pthread_cond_signal(&bufAvail);
      if(debug) {
        printf("(%s:%d#r): Signaled bufAvail.\n", __FILE__, __LINE__);
      }
    }
  }

  if(debug) {
    printf("(%s:%d#r): Exiting reader thread.\n", __FILE__, __LINE__);
  }

  //"signal" (not exactly) the other threads that we're done
  pthread_mutex_lock(&doneAccess);
  done = 1;
  pthread_mutex_unlock(&doneAccess);
  //broadcast this, so all the threads wake up and finish
  //what they ahve to do, then exit
  pthread_cond_broadcast(&bufAvail);

  if (debug) {
    printf("(%s:%d#r): Returning from reader thread.\n", __FILE__, __LINE__);
  }

  return NULL;
}
 
void *workerThread (void *arg) {
  pthread_mutex_lock(&logAccess);
  int thread_id = threadCounter++;
  pthread_mutex_unlock(&logAccess);

  int ret_val, status_code;
  int sockfd, filefd, filesize;

  char *errmsg, *stringpos, *tok;
  char writemsg[1024], readmsg[5120];
  char cli_addr[16];
  char timestamp[25];
  char method[5];			//will contain HEAD or GET (ideally)
  char filename[1024];		//will not contain leading /
  char content_type[1024];
  char response_header[5120];
  char user_agent[1024] = "-";
  char request_line[1024];

  if(debug) {
    printf("(%s:%d#%d): Spawning worker thread %d.\n",
           __FILE__, __LINE__, thread_id, thread_id);
  }

  while(1) {
    //hack to give another thread a chance to come and get the reqest
    sched_yield();
    pthread_mutex_lock(&bufAccess);
    while (bufHead == bufTail && !done) {// bufEmpty()
      //sleep until there's something in the buffer
      pthread_cond_wait(&bufAvail, &bufAccess);
    }
    if(debug) {
      printf("(%s:%d#%d): Received bufAvail cond_signal.\n",
             __FILE__, __LINE__, thread_id);
    }

    if(bufHead == bufTail) {
      if(done) {
        pthread_mutex_unlock(&bufAccess);
        pthread_exit(NULL);
      }
    }

    //get our sockfd
    if (bufHead == QUEUE_LENGTH) bufHead = 0;
    strncpy(cli_addr, addrBuf[bufHead], 16);
    sockfd = reqBuf[bufHead++];
    pthread_mutex_unlock(&bufAccess);

    if(debug) {
      printf("(%s:%d#%d): Picked up socket %d.\n",
              __FILE__, __LINE__, thread_id, sockfd);
    }

    memset(readmsg, '\0', 5120);
    stringpos = readmsg;
    while (strstr(readmsg, "\r\n\r\n") == NULL) {
      ret_val = read(sockfd, stringpos, 5120-(stringpos-readmsg));
      if (ret_val <= 0) {
        errmsg = strerror_r(errno, writemsg, 1024);
        printf("(%s:%d#%d): No data read: %s.\n",
                __FILE__, __LINE__, thread_id, errmsg);
        close(sockfd);
      }
      stringpos += ret_val;
    }

    char tempString[5120];
    strncpy(tempString, readmsg, 5120);

    time_t curtime = time(NULL);
    strftime(timestamp, 28, "%Y-%m-%d %H:%M:%S%z", localtime(&curtime));

    tok = strtok(tempString, "\r\n");
    strncpy(request_line, tok, 1024);

    if(debug && tok != NULL) {
      printf("(%s:%d#%d): [%s] Request is: \"%s\"\n",
             __FILE__, __LINE__, thread_id, cli_addr, tok);
    }

    #ifdef DEBUG
      snprintf(writemsg, 1024, "%s - [%s] - \"%s\"\n", timestamp, cli_addr, tok);

      pthread_mutex_lock(&logAccess);
      write(log_fd, writemsg, strlen(writemsg));
      pthread_mutex_unlock(&logAccess);
    #endif

    //copy the request line to tempstring, and only parse that
    strncpy(tempString, tok, 1024);
    tok = strtok(tempString, " ");

    //copy GET or HEAD to method
    if (tok != NULL && (strlen(tok) >=3)) {
      strncpy(method, tok, 5);
    }
    else {
      snprintf(response_header, 5120, "HTTP/1.0 400 Bad Request\r\n"
               "Server: %s\r\n"
               "Allow: GET,HEAD\r\n"
               "Content-Length: %d\r\n"
               "Content-Type: %s\r\n\r\n",
               serverName, strlen(errorpage400), content_type);
      write(sockfd, response_header, strlen(response_header));
      write(sockfd, errorpage400, strlen(errorpage400));
      close(sockfd);
      snprintf(writemsg, 1024, "%s - [%s] - \"%s\" 400 %d \"-\" \"-\"\n",
               timestamp, cli_addr, request_line, strlen(errorpage400));
      pthread_mutex_lock(&logAccess);
      write(log_fd, writemsg, strlen(writemsg));
      pthread_mutex_unlock(&logAccess);
      if (debug) {
        //kill newline from log writemsg
        writemsg[strlen(writemsg)-1] = '\0';
        printf("(%s:%d#%d): Wrote to logfile: '%s'\n",
               __FILE__, __LINE__, thread_id, writemsg);
      }
      continue;
    }

    //should be filename or full url
    tok = strtok(NULL, " ");
    if (strncasecmp(tok, "http://", 7) == 0) {
      stringpos = index(tok+7, '/');		//skips the http://, and finds a pointer to the next /
      if (stringpos == NULL) {
        strncpy(filename, "index.html", 11);
      }
      else {
        strncpy(filename, stringpos+1, 1024);
      }
    }
    else {
      if (tok[0] == '/') {
        strncpy(filename, tok+1, 1024);
      }
      else {
        strncpy(filename, tok, 1024);
      }
    }
    if (debug) {
      printf("(%s:%d#%d): Filename is '%s'\n",
             __FILE__, __LINE__, thread_id, filename);
    }

    if ((filefd = open(filename, O_RDONLY)) <= 0) {
      errmsg = strerror_r(errno, writemsg, 1024);
      if (debug) {
        printf("(%s:%d#%d): Error opening file '%s'\n",
               __FILE__, __LINE__, thread_id, filename);
      }
      status_code = 404;
    }
    else {
      filesize = lseek(filefd, 0, SEEK_END);
      lseek(filefd, 0, SEEK_SET);
      status_code = 200;
    }

    //get user-agent:
    strncpy(tempString, readmsg, 5120);
    tok = strtok(tempString, "\r\n");
    while (strncasecmp(tok, "user-agent", 10) != 0 && tok != NULL) {
      //printf("tok is '%s'\n", tok);
      tok = strtok(NULL, "\r\n");
    }

    if (strncasecmp(tok, "user-agent", 10) == 0) {
      //user-agent found
      strncpy(user_agent, index(tok, ' ')+1, 1024);
    }

    if (status_code == 200) {
      //determine file type for use in content-type
      if (strncasecmp(rindex(filename, '.'), ".htm", 4) == 0) {
        strncpy(content_type, "text/html", 10);
        if(debug) {
          printf("(%s:%d#%d): content-type is text/html\n",
                 __FILE__, __LINE__, thread_id);
        }
      }
      else if (strncasecmp(rindex(filename, '.'), ".jpg", 4) == 0) {
        strncpy(content_type, "image/jpeg", 11);
      }
      else if (strncasecmp(rindex(filename, '.'), ".gif", 4) == 0) {
        strncpy(content_type, "image/gif", 10);
      }
      else {
        strncpy(content_type, "text/plain", 11);
      }
    }

    else {
      strncpy(content_type, "text/html", 10);
    }

    if(status_code == 404) {
      snprintf(response_header, 5120, "HTTP/1.0 404 Not Found\r\n"
               "Server: %s\r\n"
               "Content-Length: %d\r\n"
               "Content-Type: %s\r\n\r\n",
               serverName, strlen(errorpage), content_type);
      write(sockfd, response_header, strlen(response_header));
      if (strncmp(method, "GET", 4) == 0) {
        write(sockfd, errorpage, strlen(errorpage));
      }
      filesize = strlen(errorpage);
    }

    else if(status_code == 200) {
      snprintf(response_header, 5120, "HTTP/1.0 200 OK\r\n"
               "Server: %s\r\n"
               "Content-Length: %d\r\n"
               "Content-Type: %s\r\n\r\n",
               serverName, filesize, content_type);
      write(sockfd, response_header, strlen(response_header));
      if (strncmp(method, "GET", 4) == 0) {
        ssize_t readbytes, writebytes;
        while ((readbytes = read(filefd, writemsg, 1024)) > 0) {
        //currently assuming that we'll only get <= 0 when EOF arrives, not an error
          stringpos = writemsg;
          while ((writebytes = write(sockfd, stringpos, readbytes)) < readbytes) {
            if (debug) {
              printf("(%s:%d#%d): Writebytes was %d.\n",
                     __FILE__, __LINE__, thread_id, writebytes);
            }
            //we didn't get to write the whole buffer to the new file, so we need to write the rest
            readbytes -= writebytes;
            stringpos += writebytes;
          }// done writing the buffer to the socket

          if (debug) {
            printf("(%s:%d#%d): Wrote %d bytes to sockfd.\n",
                   __FILE__, __LINE__, thread_id, writebytes);
          }
        }
      }
      close(filefd);
    }

    //log the request
    curtime = time(NULL);
    strftime(timestamp, 28, "%Y-%m-%d %H:%M:%S%z", localtime(&curtime));
 
    snprintf(writemsg, 1024, "%s - [%s] - \"%s\" %d %d \"-\" \"%s\"\n",
             timestamp, cli_addr, request_line, status_code, filesize, user_agent);

    pthread_mutex_lock(&logAccess);
    write(log_fd, writemsg, strlen(writemsg));
    pthread_mutex_unlock(&logAccess);
    if (debug) {
      //kill the newline in the write msg
      writemsg[strlen(writemsg)-1] = '\0';
      printf("(%s:%d#%d): Wrote to logfile: '%s'\n",
             __FILE__, __LINE__, thread_id, writemsg);
    }
 
    close(sockfd);
  }

  return NULL;
}

Download this code: 20060419-1746-drh-httpserv/httpserv.c