#include extern char * name[]; // imaginary array of pointers to page URLs #define debug 0 extern int totalPageCount; // how many pages this Pentium knows. void dup(int); // Accepts reports of pages with both words. char * fwc(char *); // Find page data for a given word. void ftwp(char *w1, char *w2) {char *cp = fwc(w1), *dp = fwc(w2); int c=*cp, d=*dp; //This code searches for pages that contain both of the words //identified by w1 and w2. // It is used when each word is so common that it occurs in 0.6% of all pages. while(1) {// at this point c and d each hold the dense code for pages // that contain respectively the two sought words. if (debug) { if(c&255) printf("\"%s\" occurs in the page at %s\n", w1, name[c]); if(d&255) printf("\"%s\" occurs in the page at %s\n", w2, name[d]);} if (d < c) d += *++dp; else if (c < d) c += *++cp; else { // c == d and thus we are either at the end of the world // or we have a page with both words if (c == totalPageCount) return; // No more interesting pages in this neighborhood. if (c&255) dup(c); d += *++dp; c += *++cp; }} } // This data structure requires perhaps 10 TB of RAM for the web, // At $.10 per megabyte this comes to one M$ not including power, processors, // other data structures etc.