/* File: url-to-string.c * Author: James Mayfield * Purpose: Convert a URL to a string that contains the text * of the document identified by that URL. * Method: Cheat. Make lynx do the work. */ #include #include #include #include "url-to-string.h" #define LYNX "/usr/local/bin/lynx" /* The location of the `lynx' program */ #define RUN_LYNX "%s -source %s" /* The shell command for invoking lynx */ #define MAX_URL_LEN 200 /* Maximum size we'll allow for a URL */ #define MAX_COMMAND_LEN (MAX_URL_LEN + 50) /* Enough space to build the lynx command */ #define INITIAL_BUFFER_SIZE 120 /* Assume that the text of the document is this long; will be increased dynamically if necessary. */ /* This is the function to be called by our users. It is responsible for fetching the document specified by the given URL, allocating space for the text of that document, and returning the resulting string to the user. It always returns a string, even on error. */ char * url_to_string(char *url) { char cmd_buf[MAX_COMMAND_LEN]; FILE *infile; int size = INITIAL_BUFFER_SIZE; if (strlen(url) > MAX_URL_LEN) { fprintf(stderr, "Error in url_to_string: URL is too long.\n"); return(""); } /* Build the command to run lynx. */ sprintf(cmd_buf, RUN_LYNX, LYNX, url); /* Use the popen library routine to run lynx, giving a stream from which the contents of the URL can be read. */ if ((infile = popen(cmd_buf, "r")) != NULL) { char *result = malloc(size + 1); int count = 0; int c; if (result == NULL) { fprintf(stderr, "Error in url_to_string: No space left.\n"); return(""); } while ((c = getc(infile)) != EOF) { /* If there's no more room in this buffer, expand it by calling realloc. New size is twice the old size. */ if (count == size) { size *= 2; result = realloc(result, size + 1); if (result == NULL) { fprintf(stderr, "Error in url_to_string: No space left.\n"); return(""); } } result[count++] = c; } result[count] = '\0'; pclose(infile); return(result); /* Space considerations argue for another realloc before the return to give back the unused space in the buffer. Speed considerations argue against it. Vote for speed. */ } else { /* popen failed, so give up. */ fprintf(stderr, "Error in url_to_string: Could not execute `%s'\n", cmd_buf); return(""); } }