// This code reads standard in, coded in utf-8, and computes integer code points. // Each call to g yields one 32 bit 'character'. #include #include static int cil = 1; static int lc = 1; static int bc = 0; static int ad = 0, oldc; typedef unsigned char uchar; void loc(){printf("at character %d of line %d," "(or 0x%x in file):\n", cil, lc, bc);} static char gc(){char c = getchar(); ++bc; if(c == EOF && feof(stdin)) exit(printf( "end of file at char: %d in line %d\n", cil, lc)); if(c == 10 || c == 13) {cil = 1; ++lc;} return c;} static char gx(){char c = gc(); if ((c&0xc0) != 0x80) {loc(); printf("Bad utf8 extension byte: %02X\n", c);} return c;} static int gp(){uchar c = gc(); if(c < 128) return c; {int v=0, sc=0, C=c; while(C&0x40){C <<=1; v = (v<<6) | gx() & 0x3f; ++sc;} return (v | (0x3f>>sc & (int)c) << 6*sc);}} void back(){ad=1;} int g(){if(ad) {ad=0; ++cil;} else oldc = gp(); return oldc;} // int main(){while(1) printf(" %x", g());}