#include #include #include #include #include #include #include #include #include #include #include #include #include #include #define Nsize 1000 #define Ncsize (Nsize/10) FILE * out; uint MaxMem = 50000000; // Don't use more than thus much RAM. (default) typedef int bi; typedef unsigned char uchar; #define C const #define le 10 typedef unsigned short b16; typedef struct mgg{uint ino; struct mgg * next;} zot; #define hhsize 1024 #define hlbat 2000 static zot * hh[hhsize]; typedef struct mgh{struct mgh * next; zot a[hlbat];} segm; static segm * segp = 0; static int pass; uint MaxBc; char C * OutCat = "NS"; // default output categories uchar f[128]; uchar name[Nsize]; static void open_p(){int i; for(i=0; inext; free(segp); segp=x;}} #define Cstrsiz 100000000 char Cstr[Cstrsiz]; // strategic use of virtual addresses // The above is a heap for C strings. // It is not garbage-collected. // The current code uses it only for composing output messages. char * Cp = Cstr; #define Ce (Cp+Cstrsiz) #define BLC 30 char C * BLnames[BLC]; int BLc[BLC]; int xndx = 0; static long long tv = 0; // total character count. static int tt = 0; // total tag size. static int fc = 0; // file count static int Lc = 0; // link count static int eLc = 0; // external link count static void ex(int C d){close_p(); fflush(out); exit(-3);} static void bitch(char C * C s){ex(fprintf(stderr, "%s: Bye.\n", s));} #if 0 static char * app(char C * x){char * r = Cp; while(*x) if(Cp Ce) bitch("Excessive output!");} uchar C * C hd = "0123456789abcdef"; {while(*w) {uchar C x = *(w++); if(' ' < x && x <= '~' && !strchr("\"\\%#?", x)) W(x); else {W('%'); W(hd[x>>4]); W(hd[x&7]);}} W(0); return b;}} static void * core(size_t m, char * who){void * C p = malloc(m); if(!p) {printf("%s exhausted memory!\n", who); exit(-1);} return p;} static int New(uint C i){ static int hwat=hlbat; b16 h = ((i ^ i>>16)>>5)&(hhsize-1); if(pass) return 0; { zot * * p = &hh[h]; while(*p && (*p)->ino != i) p = &(*p)->next; if(!*p){if(hwat==hlbat) {segm * ns = (segm *)core(sizeof(segm), "link hash"); if(f['H']) printf("H:hlbat hard linked files.\n"); ns->next = segp; segp = ns; hwat=0;} {zot * zp = segp->a+hwat++; zp->ino=i; zp->next = *p; *p = zp;} return 1;} else return 0;}} int namec[Ncsize]; // Indexes into (file) name tree. #define tns 100000 uchar tnx[tns+3]; // tnx is space for file name tree. uchar * tn; // Origin of name tree int curs = 0; // Cursor over name tree used in its construction in pass 0. // An entry in the name tree for a file is a one byte name length, // followed by the name, followed by a one byte flag. // If length byte is 0 then previous name is that of a directory and the // next aligned int is index of first name tree entry beyond this directory. // following that index are the entries for this directory. // tn[-1] is flag for the toplevel directory and tn[0] is 0 which is the // beginning of the info for that directory. static int acrete(uchar C * C q, int C nl){ int C df = curs+nl+1; // index of flag if(nl>255) bitch("Name too long"); if(nl+curs+2 >= tns) bitch("Excessive file name mass"); *(tn+curs) = nl; memcpy(tn+curs+1, q, nl); *(tn+df) = 0; // Initialize flags. // in flags: 1 means some file down deeper is referenced. // 2 means this file or directory is referenced. // 4 means this directory has an index file. curs+= nl+2; return df;} static int sl(int x, uchar C * C p, uchar C * C q, char mrk) { // x is index into directory tree, to a directory lest sl return -1. // characters at p thru q are from a file reference // to be found in this directory, lest sl return 0. // Takes and returns an offset into block at tn. // but 0 means not found and -1 means treating file as directory. if (*(tn+x)) return -1; {int C bp = (x&~3)+4; int end = *(int*)(tn+bp); x = bp+4; while(x < end) { if(q - p == *(tn+x) && !memcmp(tn+x+1, p, q - p)) { *(tn+x+(q-p)+1) |= mrk; // Note change to flag. return x+*(tn+x)+2;} x += 2 + *(tn+x); if(!*(tn+x)) x = *(int*)(tn+(x&~3)+4); } return 0;}} static int tatle(int C j, void pr(), char C td){ // j is index of "length code" whose zeroness means this is a directory. // pr is a routine to print pathname of this directory. // td (Transparent Directory) means that this directory may be // listed by the server. if(*(tn+j)) bitch("Supposed to be a directory descriptor"); {int C bp = (j&~3)+4; int C end = *(int*)(tn+bp); int x = bp+4; char C df = *(tn+j-1); void ps(char C * C b, char C * C e){ char fn[e-b+1]; memcpy(fn, b, e-b), fn[e-b]=0; printf("%s", qs(fn));} while(x != end) {int C NL = *(tn+x); // Length of file or directory name if(x > end) bitch ("death"); if(!*(tn+x+NL+2)) { // A directory uchar C flags = *(tn+x+NL+1); void cn(){ pr(); ps(tn+x+1,tn+x+NL+1); // for(q=tn+x+1; q MaxMem) { printf("Ram limit at %d: ", MaxMem); bitch("Exceeded voluntary RAM limit");} if(pp == MAP_FAILED){ printf( "mmap failed with errno = %d, fineno = %d, name=%s\n", errno, fileno(in), name); perror("Damn"); bitch("damn");} #define bump ({uchar C * q = p; ++p; if(q>=eof) goto end; q;}) while(1){ // find and consider each tag. cs=0; while(*p != '<') bump; tt -= (int)p; tagstart = p; cs=1; bump; while(isspace(*p)) bump; {uchar tagname[10]; uchar * a = tagname; int tagcode = 0; void validate(uchar C * pp, uchar C * q){ // Characters at pp thru q locate keyword value. // Routine validate looks at each keyword value within an html tag // whose semantics designates links that we are concerned with. // It considers both the tag value, "a", "img" or "embed", and the keyword // field name, "href", "src", and decides how to act. while(isspace(*pp)) ++pp; while(isspace(*(q-1))) --q; {uchar C * p = pp; void Warn(char C * C g){ idFile(); if (0) printf( " %s", g); else warn(g, p); {while (pp < q) putc(*(pp++), out); printf("\n");}} if((a == tagname+4 && !memcmp("href", tagname, 4) && tagcode==1) || (a == tagname+3 && !memcmp("src", tagname, 3) && tagcode==2)) { uchar C * x = p; ++Lc; while( *x != ':' && x < q) ++x; if (x == q) { int tnc[Ncsize]; int m = L; {int j = L; while(j--) tnc[j] = namec[j];} // An invariant henceforth in this block: // Elements 0 thru m-1 of array namec locate nametree snapshot for that // portion of pathname from p upon call, to p now. if(*p == '/') {++p; m = 1;} while(1){ // process each part of path name. uchar C * r = p; while (*r != '/' && r < q && !(*r == '#' || *r == '?')) ++r; {char C ec = r == q || (*r == '#' || *r == '?'); if(r==p+2 && !memcmp("..", p, 2)) { --m; if(!m) {Warn("This is above sever root: "); return;}} else if(r==p+1 && *p == '.') {} else if (r>p) { int C k = sl(tnc[m-1], p, r, ec+1); if(k<=0) {Warn("No such file as "); return;} if(m == Ncsize) {Warn("too many levels: "); return;} tnc[m++] = k;} if(ec) break;} p = r+1; }} else ++eLc;}}} while(isalnum(*p)) {if(a==tagname+10) goto slip; *(a++) = tolower(*bump);} // Note assignment statements amidst the booleans in the next line. // See other references to "tagcode" and think unification ala Prolog. // See for comments on this style. if((a == tagname+1 && !memcmp("a", tagname, 1) && (tagcode=1)) || ((a == tagname+3 && !memcmp("img", tagname, 3) || a == tagname+5 && !memcmp("embed", tagname, 5)) && (tagcode=2))) { while(isspace(*p)) bump; while(1){ // find and consider each keyword in this tag. if(*p == '>') goto moreTags; a = tagname; while(isalpha(*p)) {*a = tolower(*bump); if(a') goto moreTags;} warn("Garbage in tag\n", p); goto slip;} while(isspace(*p)) bump; if (*p == '=') {++p; while(isspace(*p)) bump; if (*p == '\"') {uchar C * C q = ++p; while(*p != '\"') bump; validate(q, p++);} else {uchar C * C q = p; while(!isspace(*p) && *p != '>') bump; validate(q, p);} while(isspace(*p)) bump;}}} else if(a == tagname+4 && !memcmp("base", tagname, 4)) {tt += (int)p; goto xx;} slip: // Skip to end of tag while(1){while(*p != '\"' && *p != '>') bump; if (*p == '>') break; do bump; while(*p != '\"'); bump;}} moreTags: tt += (int)p;} goto xx; end: if(cs) {warn("File ends in mid tag.\n", tagstart); tt += (int)p;} xx: munmap((void *)pp, Ss.st_size);} else printf("File %s is empty\n", name);} fclose(in);}} static void rd(char C * C q, int C l, int C n, int CRS){ int C F = f['F']; if(F) {int x=l; putc('F', out); while(x--) putc('|', out);} if(F) printf("Name is: %s, ", qs(q)); errno=0; if (q[0] != '.' || !l) {int C nl = strlen(q); int C df = !pass && l /*....*/ ? acrete(q, nl) : -1; if(nl+n>Nsize-2) printf ("N:The name, %s/%s, has become too long.\n", qs(name), qs(q)); else {struct stat Ss; memcpy(name+n, q, nl+1); if((l?lstat:stat)(name, &Ss)) {if(errno == 2) ex(printf("No such directory : %s\n", qs(name))); else printf ("S:Access denied to lstat for file: %s.\n" "errno = %d\n", qs(name), errno);} else { if(S_ISDIR(Ss.st_mode)){ DIR * C nf = opendir(name); if(!nf) switch(errno){ case ENOTDIR: bitch("lstat says this is a directory but opendir says not!\n"); case ENOENT: ex(printf("which opendir can't find!%s\n", qs(name))); case EMFILE: case ENFILE: bitch(" but too deep for opendir logic.\n"); case EACCES: {if(F) printf("which I cannot access.\n"); else printf ("A:Access to directory %s denied.\n", qs(name));} default: ex(printf("error = %d on file:%s\n", errno, qs(name)));} else {if(F) printf("a directory.\n"); name[n+nl] = '/'; {struct dirent C * d; int dc=0; int bp; if(!pass) {*(tn+curs)=0; bp = (curs&~3)+4; curs = bp+4;} else namec[l] = CRS; while((d = readdir(nf))) if(d->d_name[0] != '.'){ int C L = strlen(d->d_name); rd(d->d_name, l+1, n+nl+1, pass?sl(CRS, d->d_name, d->d_name + L, 0):0); if(!pass && (L == 10 || L == 9) && !memcmp("index.html", d->d_name, L)) {*(tn+df) |= 4; *(tn+curs-1) |= 2;} ++dc;} if(!pass){*(int*)(tn+bp) = curs;} closedir(nf);}}} else { if(!l) ex(printf("You gave me a file, not a directory!\n")); if(!(++fc&63) && f['n']) printf("n:%d %s\n", fc, qs(name)); if(S_ISREG(Ss.st_mode) && (Ss.st_nlink==1 || New(Ss.st_ino))){ if(pass) {{int j = xndx; while(j--) if(!memcmp(name+n, BLnames[j], BLc[j])) goto skip;} CheckFile(n+nl, l); skip:;}}} }}} if(F) printf("\n");} int main(int C argc, char C * C args[]){out = stdout; /* prepare .... */ {struct rlimit rl; getrlimit(RLIMIT_DATA, &rl); printf("Maximum RAM limits are %08lx, %08lx.\n", (long)rl.rlim_max, (long)rl.rlim_cur);} {int ac=1; while(ac argc) bitch("\"N\" option without name"); if (xndx*sizeof(char *) > sizeof(BLnames)) bitch("Too many black list names");} else printf("Invalid option: %s\n", args[ac]); ++ac;} {{int j = 128; while(j--) f[j]=0;} {uchar C * op = (uchar *)OutCat; while(*op) if(*op < 128) f[*(op++)] = 1;}} if(ac >= argc) {printf("ac=%d, argc = %d\n", (int)ac, (int)argc); bitch("No file name included.\n");} if(argc > ac+1) ex(printf("Just one directory name at end of arguments, please\n")); tn = (uchar *)((((bi)tnx-1)&~3)+8); // The above makes tn a multiple of 4 // and also reserves space for tn[-1] which // serves as flag for the top level directory. // tn is henceforth constant. if(xndx) {int j = xndx; printf("Neglecting files and directories named:\n"); while(j--) printf("%s\n", BLnames[j]); printf("\n");} open_p(); pass=0; rd(args[ac], 0, 0, 0); close_p(); fc = 0; if(f['n']) printf("Second pass:\n"); open_p(); pass=1; rd(args[ac], 0, 0, 0); close_p(); *(tn+curs++) = -1; // above is to keep the last file in the top directory from looking like a directory. } if(f['a']) {FILE* out = fopen("acretion", "w"); int j = 0; if(!out) printf("Barf\n"); while(j< curs) putc(tn[j++], out); fclose(out);} if(curs){printf("\nUnreferenced files:\n"); {void xx(){}; tatle(0, xx, !(tn[-1]&4));}} printf("\n%d files with %lld characters of which %d are in tags,\n" "%d links of which %d are external.\n", fc, tv, tt, Lc, eLc); return 0;}