. , , ,

,,,

,

 ¡à âà ï à ¡â  1

  ¨¥:

áâà¨âì ¥ªá¨ç¥áª¨   ¨ âà (᪠¥à), ªâàë ¡ã¥â ¢

 ì¥è¥ ¨á¯ì¢ âìáï ¯à¨ à ¡à¥ HTML-ªã¥â¢.

¥ªáâ ¯à£à ë:

#include

#include

#include

#include

#include "parserhtm_cnst.h"

#include "parserhtm_glob.h"

#include "parserhtm_tokn.h"

#include "parserhtm_err.h"

#include "parse.h"

#pragma hdrstop

extern YYSTYPE yylval;

enum {TEXT=0, PRE, KEYWORD, ATTR, AVALUE, IN_QUOTE};

char *states [ 6] = {"TEXT", "PRE", "KEYWORD", "ATTR", "AVALUE", "IN_QUOTE"};

char *tktypes[10] = {"_OpenTag", "_CloseTag", "_EndTag", "_C_KEYWORD",

"_S_KEYWORD", "_A_KEYWORD", "_V_KEYWORD", "_NUM",

"_IDENTIFIER", "_QUOTED_ATTR"};

/* text, preformatted text, HTML KEYWORD, attribute KEYWORD, */

/* attribute value KEYWORD */

int in_close = FALSE;

int in_open = FALSE;

int opened_par = FALSE;

int state = TEXT;

int old_state = TEXT;

int cUKSZ = 0;

int USE_BUFFER = FALSE;

int lex_buff_size = 0;

typedef struct {

int tktyp;

char tkval[NMSZ];

} tbuff;

tbuff lex_buff[5];

int c;

int lineno;

long charno;

long f_size;

int icm;

/* !!!!!! */

char comment[CMSZ];

/* !!!!!! */

typedef struct {

char name[TKSZ];

int kw_token;

int in_paragraph;

} kw_table;

/********** functions declarations ************/

void fixfile(FILE *, char*);

int nextchar(FILE *, FILE *);

void nlproc (FILE *);

int bsearch (char *, kw_table *, int);

int nexttok (char *);

void lexinit();

int yylex();

/**********************************************/

/* Possible KEYWORDS - directives */

kw_table keyword_table[KWSZ] = {


- 2 -

{"A", _C_KEYWORD, TRUE },

// ...

{"WBR", _S_KEYWORD, TRUE }

};

/* Possible KEYWORDS - attributes */

kw_table attr_table[ATSZ] = {

{"ALIGN", _A_KEYWORD },

// ...

{"WRAP", _A_KEYWORD }

};

/* Possible KEYWORDS - attribute's values */

kw_table aval_table[AVSZ] = {

{"ABSBOTTOM", _V_KEYWORD },

// ...

{"_top", _V_KEYWORD }

};

/*

* äãªæ¨¨

*/

/*

* ãªæ¨ï ¡¨ à£ ¯¨áª  ¨¥¨ ¢ â ¡¨æ¥ ¨¥. ¢à é ¥â ¨¥ªá

*  ¥£ í¥¥â  ¢  áᨢ¥ ¨¨ -1, ¥á¨ ¨ç¥£ ¥  è .

* Œ áᨢ ááâ¨â ¨  ¯¨á¥ á ¯ï¨ : áâઢ ASCII/Z-ªáâ âë -

* ªî祢£ ᢠ HTML ¨ int'¢áª£ ⨯  í⣠KEYWORD' 

*/

int bsearch( char word[], kw_table word_table[], int tbsize)

{

int low, high, middle;

int i;

low = 0;

high = tbsize - 1;

while (low <= high) {

middle = (low + high) / 2;

i = strcmp(word, word_table[middle].name);

if (i < 0) {

high = middle - 1;

} else if (i > 0) {

low = middle + 1;

} else {

return (middle);

}

}

return (-1);

}

int nexttok(char *val)

{

register int i, i2;

char tokenvalue[NMSZ];

int c1, tokentype, tokenend;

i = 0;

while (isspace(c)) /* c is always one char ahead */ {

if (c == 'n') nlproc(listfp);

if (state == PRE) {

tokenvalue[0] = c; tokenvalue[1] = '';

tokentype = _IDENTIFIER;

c = nextchar(infp, listfp); charno++;

strcpy(val, tokenvalue); return(tokentype);

}

c = nextchar(infp, listfp); charno++;


- 3 -

}

if ( ( (state == TEXT) || (state == PRE) ) && (c == '<') ) {

ungetc(c1 = getc(infp), infp);

old_state = state; state = KEYWORD;

if (c1 == '/') {

c = nextchar(infp, listfp); charno++;

c = nextchar(infp, listfp); charno++;

tokenvalue[0] = '<'; tokenvalue[1] = '/'; tokenvalue[2] = '';

tokentype = _CloseTag; in_close = TRUE; in_open = FALSE;

strcpy(val, tokenvalue); return(tokentype);

} else {

c = nextchar(infp, listfp); charno++;

tokenvalue[0] = '<'; tokenvalue[1] = '';

tokentype = _OpenTag; in_close = FALSE; in_open = TRUE;

strcpy(val, tokenvalue); return(tokentype);

}

}

if (state == KEYWORD) {

if (c == '!') { /* Comments! */

while ((c != '>') && ( c != EOF )) {

c = nextchar(infp, listfp); charno++;

}

state = ATTR;

tokenvalue[0] = ''; tokentype = _IDENTIFIER;

strcpy(val, tokenvalue); return(tokentype);

}

while (isalnum(c)) {

tokenvalue[i++] = toupper(c);

c = nextchar(infp, listfp); charno++;

}

tokenvalue[i++] = '';

if ((i = bsearch(tokenvalue, keyword_table, KWSZ)) >= 0) {

tokentype = keyword_table[i].kw_token;

state = ATTR;

if ( strcmp("PRE", tokenvalue) == 0 ) {

if (in_close) old_state = TEXT;

else old_state = PRE;

}

strcpy(val, tokenvalue); return(tokentype);

} else {

if ((i = bsearch(tokenvalue, attr_table, ATSZ)) >= 0) {

tokentype = attr_table[i].kw_token;

state = AVALUE;

strcpy(val, tokenvalue); return(tokentype);

} else { /* Unknown attribute. Actually, it's much more easier */

/* just ignoring it in YACC then trying to skip it here*/

tokentype = _IDENTIFIER;

state = AVALUE;

strcpy(val, tokenvalue); return(tokentype);

}

}

}

if (state == IN_QUOTE) {

if ( c == '"' ) {

c = nextchar(infp, listfp); charno++;

state = ATTR;

tokenvalue[0] = '"'; tokenvalue[1] = '';

tokentype = '"';

strcpy(val, tokenvalue); return(tokentype);

}

tokentype = _QUOTED_ATTR; /* maybe URL, maybe rain, maybe snow... */

/* state = IN_QUOTE; */

while ( c != '"' ) {

tokenvalue[i++] = c;

c = nextchar(infp, listfp); charno++;

}


- 4 -

tokenvalue[i++] = '';

/* c = nextchar(infp, listfp); Skip the closing quotation mark */

strcpy(val, tokenvalue); return(tokentype);

} /* end if for (state == IN_QUOTE) */

return 0; /*  ª£ ¥ ¥â ¡ëâì... */

}

int yylex()

{

int tktyp;

char tkval[NMSZ];

if (!USE_BUFFER) {

tktyp = nexttok(tkval);

switch (tktyp) {

case _OpenTag: {

tktyp = nexttok(tkval);

if (opened_par) {

if (tktyp == _C_KEYWORD || tktyp == _S_KEYWORD) {

if (!keyword_table[bsearch(tkval, keyword_table, KW...

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...

lex_buff[1].tktyp = _OpenTag ; strcpy(lex_buff[...

lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...

lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...

lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...

lex_buff_size = 5; USE_BUFFER = TRUE;

opened_par = (strcmp(tkval, "P")==0);

} else {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...

lex_buff[1].tktyp = _OpenTag ; strcpy(lex_buff[...

lex_buff_size = 2; USE_BUFFER = TRUE;

}

}

} else { // i.e. opened_par == FALSE

opened_par = (strcmp(tkval, "P")==0);

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...

lex_buff[1].tktyp = _OpenTag ; strcpy(lex_buff[1].tkv...

lex_buff_size = 2; USE_BUFFER = TRUE;

}

break;

}

case _CloseTag: {

tktyp = nexttok(tkval);

if (opened_par) {

if (keyword_table[bsearch(tkval, keyword_table, KWSZ)]...

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0]....

lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1]....

lex_buff_size = 2; USE_BUFFER = TRUE;

} else {

if (strcmp(tkval, "P")==0) {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...

lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...

lex_buff_size = 2; USE_BUFFER = TRUE;

opened_par = FALSE;

} else {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...

lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...

lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...

lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...

lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...

lex_buff_size = 5; USE_BUFFER = TRUE;

opened_par = FALSE;

}

}

} else {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...

lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1].tkv...


- 5 -

lex_buff_size = 2; USE_BUFFER = TRUE;

}

break;

}

case _IDENTIFIER: {

if ( !(in_open || in_close) && (!opened_par)) {

lex_buff[0].tktyp = _IDENTIFIER; strcpy(lex_buff[0].tkv...

lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...

lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...

lex_buff[3].tktyp = _OpenTag ; strcpy(lex_buff[3].tkv...

lex_buff_size = 4; USE_BUFFER = TRUE;

opened_par = TRUE;

}

break;

}

case 0: { // EOF

if (opened_par) {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...

lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...

lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...

lex_buff[3].tktyp = _CloseTag ; strcpy(lex_buff[3].tkv...

lex_buff_size = 4; USE_BUFFER = TRUE;

opened_par = TRUE;

}

}

}

}

if (USE_BUFFER) {

tktyp = lex_buff[--lex_buff_size].tktyp;

strcpy(tkval, lex_buff[ lex_buff_size].tkval);

if (lex_buff_size == 0) USE_BUFFER = FALSE;

}

yylval.pchar = (char *) malloc(1 + strlen(tkval));

strcpy(yylval.pchar, tkval);

return(tktyp);

}

à¨¥ç ¨¥: à á¯¥ç âª¨ ä ¢ htm_cnst.h, htm_glob.h, htm_tokn.h ¨

htm_err.h á. ¢ ¯à¨¥¨¨ apendix'  4.4.


- 6 -

 ¡à âà ï à ¡â  2

  ¨¥:

áâà¨âì äà â¨ âà -ä ¢. ¥¡å¨ ॠ¨¢ âì

á¥ãî騥 äãªæ¨¨:

- `{` - ¢á¥£  c ¢ áâન ¡¥ âáâ㯠

- `{` - ¨ª£  ¥ ¯¥à¥á¨âáï

- `{` - ¢á¥£  c ¢ áâન á âáâã¯

- 0 - ¢áâ ¢ïâì ᨢ TAB ¯à¨ âáâ㯥

- 1..8 ¢áâ ¢ïâì x ¯à¡¥¢

- äà â¨à¢ âì ª¥â à¨¨ c xx ¯ yy ¯¨æ¨¨

- ¥áªìª ª    áâથ

- `=` ¢ë¥ïâì ¯à¡¥ ¨

¥ªáâ ¯à£à ë:

#include

#include

#include

#include

#define ERROR 0

#define IDENT 1

#define KEYWORD 2

#define BRACKETS 3

#define OTHER 4

#define BEGIN '{'

#define END '}'

#define COMMA ','

#define SEMI ';'

#define LB 'n'

char val[100];

char comment[200];

int pos=0;

int undo;

int backspace;

#define N_KW 8

char keywords[N_KW][20]={

"for",

"while",

"do",

"if",

"switch",

"else",

"case",

"default"};

// -----------------------------------------------------------------

void blockQuote (char *(&s),FILE *f)

{

char c=fgetc(f);

*(s++)=c, c=0;

while (c!=''' && c!='"') {

c=*(s++)=fgetc(f);

if (c=='\')

c=*(s++)=fgetc(f);

}

}

// -----------------------------------------------------------------

int getNext (FILE *f)

{

char *s=val,c;

int buf=undo;

static int symbol=0;

static int startPos=1;

c=fgetc(f);


- 7 -

startPos=0;

if (isalnum(c) || c=='_') {

symbol=0;

*(s++)=c;

while ((isalnum(c) || c=='_' || c=='.') && !feof(f) && !isspace(c))

c=*(s++)=fgetc(f);

ungetc (c,f);

*(--s)=0;

for (int i=0;i

if (i!=N_KW) *(s++)=' ';

*s=0;

if (i!=N_KW) return KEYWORD;

return IDENT;

}

if (c==''' || c=='"') {

symbol=0;

ungetc(c,f);

blockQuote (s,f);

*s=0;

return IDENT;

}

if (c=='=' && E && !symbol)

{

*(s++)=' ';

*(s++)=c;

*(s++)=' ';

*s=0;

return OTHER;

}

*s=c, *(s+1)=0;

symbol=1;

return OTHER;

}

void outVal (FILE *f,int back=0)

{

char *s=val;

if (backspace && !back)

if (!T) {

fputc (9,f);

pos+=F3;

}

else {

for (int j=0;j

{

fputc (' ',f);

pos++;

}

}

backspace=0;

while (*s) {

fputc (*(s++),f);

pos++;

}

}

void outCR (FILE *f,int tab,int newLine=0)

{

int limit=tab-1+newLine;

// Out Comments

if (*comment)

outComment (f);

if (!newLine) {

fputc ('n',f);

pos=0;


- 8 -

}

for (int i=0;i

{

if (!T) {

fputc (9,f);

pos+=F3;

}

else {

for (int j=0;j

fputc (' ',f);

pos++;

}

}

}

if (tab && !newLine)

backspace=1;

}

// -----------------------------------------------------------------

int format (FILE *f_in,FILE *f_out)

{

int tab=0,tab1=0,tab2=0,lb=0;

int lex;

while (!feof(f_in))

{

lex=getNext(f_in);

switch (lex) {

case KEYWORD: {

lb=0;

outVal (f_out);

lex=getNext(f_in);

break;

}

case BEGIN: {

tab1=0;

if (!lb && N!=2) outCR (f_out,tab);

if (N==3) outCR (f_out,1,1);

outVal(f_out);

outCR (f_out,++tab);

lb=1;

break;

}

case END: {

if (!lb) outCR (f_out,tab);

lb=1;

tab1=0;

lex=getNext (f_in);

if (lex==LB) lex=getNext(f_in);

undo=lex;

outCR (f_out,tab);

lb=1;

break;

}

case SEMI: {

lb=0;

tab1=0;

outVal (f_out);

lex=getNext(f_in);

undo=lex;

break;

}

case IDENT: {

lb=0;

outVal (f_out);

lex=getNext (f_in);

undo=lex;

break;

}

case LB: {


- 9 -

int used;

tab1=0;

if (*comment) {

outCR (f_out,tab);

used=1;

}

lex=getNext (f_in);

if (N!=2 || lex!=BEGIN) {

if (!used) outCR (f_out,tab);

lb=1;

}

undo=lex;

break;

}

case EOF:

return 1;

default: {

lb=0;

outVal(f_out);

}

}

}

return 1;

}

à¨¥ç ¨¥: ¥¡å¨ë¥ äãªæ¨¨ ¡ë¨ ॠ¨¢ ë ¢ ¯ ¡ê¥¥

¨ ¯à¨ á ç¥  à¥ª ¨ ¥ ¢ë¢ ¨. (¥áâ¢ë ¯à¨¥à

¯à¨¢¥¥ ¢ ¯à¨¥¨¨  apendix'  4.5)


- 10 -

 ¡à âà ï à ¡â  3

  ¨¥:

à  ¨¨¢ âì åä ¨ ¢ë¡ âì ãçèãî ï áãç £ à á¯¥¥¥¨ï ¨-¢

Ž¡¡é¥ ï äã  ¢ëç¨á¥¨ï åä:

h0 = 0;

h(i) = Alf* h(i-1) [+] C(i), i=1..k

k - ¨  áâન. [+] - ¥ªâ ï ¯à¨¢ì ï ¯¥à æ¨ï

(+, -, <<, _rotl, ^ |, &)

à  ¨¨¢ âì í⨠¨ î¡ë¥ à㣨¥ åä   ª¨ç¥á⢠ª¨¨ (ªä¨ªâ¢)

ï ¥ªâàëå áãç ëå ¯á¥¢ â¥ìáâì.  ¯à¨¥à:

1) id'ë ï몠 C ( 50).

2) ---- "" ---- ( 100).

3) ---- "" ---- ( 1000).

4) ¥è¨¥ ¨¥  áâ  â ¡¨¡¨â⥪¨ BC++.

5) ¥è¨¥ ¨¥  £ ä¨ç¥áª ¡¨¡¨â¥ª¨ BC++.

6) ãç  £¥¥¨ã¥ë¥ ¨¥  (~600)

7) £¨áª¨¥ áᢠ á ¯¥ä¨ªá ¨ ¨/¨¨ áãä䨪ᠨ (xxx) - ª 200

8) 300 ¨¥ ¢¨ : w000, w001, w002, etc

 ¨ áâ â¨á⨪¨ ᢥá⨠¢ â ¡¨æã/£à ä¨ª

¥ªá ¯à£à ë   ¨ âà :

#include

#include

unsigned hash_shift(char *s)

{

unsigned hash = 0;

while (*s)

hash = (hash << 1) + *s++;

return hash;

}

unsigned hash_rotl(char *s)

{

unsigned hash = 0;

while (*s)

hash = _rotl(hash,1) ^ *s++;

return hash;

}

long hash_pgw(char *s)

{

char *p;

unsigned long h = 0, g;

for(p=s;*p;p++)

{

h = (h << 4) + *p;

if (g = h & 0xF0000000)

{

h = h^(g >>24);

h = h^g;

}

}

return h;

}

unsigned hash_4 (char *s)

{

unsigned h = 0;

const D=5;


- 11 -

while (*s)

h = D*h + *s++;

return h;

}

main (int argc,char **argv)

{

int n=0,total=0;

int placed1=0,placed2=0,placed3=0,placed4=0;

int max1=0,max2=0,max3=0,max4=0;

int *table1,*table2,*table3,*table4;

FILE *f;

char buf[100];

puts (" ¡à âà ï à ¡â  3. ˆáᥢ ¨¥ åíè-äãªæ¨¨.");

puts ("ï¯ã¢ ˆ.. -61");

if (argc<3)

{

puts ("n HASH.COM ");

puts (" n - ç¨á í¥¥â¢ ¢ â ¡¨æ¥");

puts (" file_name - ¨ï ä   á ¨¥â¨ä¨ª âà ¨ n");

return 0;

}

n=atoi(argv[1]);

if (n<10)

{

puts ("n ¨èª  ¥ìª ï â ¡¨æ  n");

return 0;

}

table1=(int*)calloc(n,sizeof(int));

table2=(int*)calloc(n,sizeof(int));

table3=(int*)calloc(n,sizeof(int));

table4=(int*)calloc(n,sizeof(int));

if (!table1 || !table2 || !table3 || !table4)

{

puts ("n¥ 墠⠥⠯ ï⨠ï â ¡¨æn");

return 0;

}

if ((f=fopen(argv[2],"r"))==NULL)

{

puts ("n¥ £ã âªàëâì 㪠 ë ä n");

return 0;

}

while (!feof(f))

if (fscanf (f,"%s",buf)==1)

{

table1[hash_shift(buf)%n]++;

table2[hash_rotl(buf)%n]++;

table3[hash_pgw(buf)%n]++;

table4[hash_4(buf)%n]++;

puts (buf);

total++;

}

puts ("â â¨á⨪  :");

for (int i=0;i

{

if (table1[i]) {

if (max1

placed1++;

}

if (table2[i]) {

if (max2

placed2++;

}

if (table3[i]) {

if (max3

placed3++;

}


- 12 -

if (table4[i]) {

if (max4

placed4++;

}

}

printf (" HASH_SHIFT : %f max=%dn",(float)total/placed1,max1);

printf (" HASH_ROTL : %f max=%dn",(float)total/placed2,max2);

printf (" HASH_PGW : %f max=%dn",(float)total/placed3,max3);

printf (" HASH_4 : %f max=%dn",(float)total/placed4,max4);

fclose (f);

return 0;

}

&iexcl;&agrave; &acirc;&agrave; &iuml; &agrave; &iexcl;&acirc; 1 &uml;&yen;: &#143;&aacute;&acirc;&agrave;&uml;&acirc;&igrave; &yen;&ordf;&aacute;&uml;&ccedil;&yen;&aacute;&ordf;&uml; &uml; &

 

 

 

! , , , .
. , :