,,,
¡à âà ï à ¡â 1
¨¥:
áâà¨âì ¥ªá¨ç¥áª¨ ¨ âà (᪠¥à), ªâàë ¡ã¥â ¢
ì¥è¥ ¨á¯ì¢ âìáï ¯à¨ à ¡à¥ HTML-ªã¥â¢.
¥ªáâ ¯à£à ë:
#include
#include
#include
#include
#include "parserhtm_cnst.h"
#include "parserhtm_glob.h"
#include "parserhtm_tokn.h"
#include "parserhtm_err.h"
#include "parse.h"
#pragma hdrstop
extern YYSTYPE yylval;
enum {TEXT=0, PRE, KEYWORD, ATTR, AVALUE, IN_QUOTE};
char *states [ 6] = {"TEXT", "PRE", "KEYWORD", "ATTR", "AVALUE", "IN_QUOTE"};
char *tktypes[10] = {"_OpenTag", "_CloseTag", "_EndTag", "_C_KEYWORD",
"_S_KEYWORD", "_A_KEYWORD", "_V_KEYWORD", "_NUM",
"_IDENTIFIER", "_QUOTED_ATTR"};
/* text, preformatted text, HTML KEYWORD, attribute KEYWORD, */
/* attribute value KEYWORD */
int in_close = FALSE;
int in_open = FALSE;
int opened_par = FALSE;
int state = TEXT;
int old_state = TEXT;
int cUKSZ = 0;
int USE_BUFFER = FALSE;
int lex_buff_size = 0;
typedef struct {
int tktyp;
char tkval[NMSZ];
} tbuff;
tbuff lex_buff[5];
int c;
int lineno;
long charno;
long f_size;
int icm;
/* !!!!!! */
char comment[CMSZ];
/* !!!!!! */
typedef struct {
char name[TKSZ];
int kw_token;
int in_paragraph;
} kw_table;
/********** functions declarations ************/
void fixfile(FILE *, char*);
int nextchar(FILE *, FILE *);
void nlproc (FILE *);
int bsearch (char *, kw_table *, int);
int nexttok (char *);
void lexinit();
int yylex();
/**********************************************/
/* Possible KEYWORDS - directives */
kw_table keyword_table[KWSZ] = {
- 2 -
{"A", _C_KEYWORD, TRUE },
// ...
{"WBR", _S_KEYWORD, TRUE }
};
/* Possible KEYWORDS - attributes */
kw_table attr_table[ATSZ] = {
{"ALIGN", _A_KEYWORD },
// ...
{"WRAP", _A_KEYWORD }
};
/* Possible KEYWORDS - attribute's values */
kw_table aval_table[AVSZ] = {
{"ABSBOTTOM", _V_KEYWORD },
// ...
{"_top", _V_KEYWORD }
};
/*
* äãªæ¨¨
*/
/*
* ãªæ¨ï ¡¨ ࣠¯¨áª ¨¥¨ ¢ â ¡¨æ¥ ¨¥. ¢à é ¥â ¨¥ªá
* ¥£ í¥¥â ¢ áᨢ¥ ¨¨ -1, ¥á¨ ¨ç¥£ ¥ è .
* Œ áᨢ ááâ¨â ¨ ¯¨á¥ á ¯ï¨ : áâઢ ASCII/Z-ªáâ âë -
* ªî祢£ ᢠHTML ¨ int'¢áª£ ⨯ í⣠KEYWORD'
*/
int bsearch( char word[], kw_table word_table[], int tbsize)
{
int low, high, middle;
int i;
low = 0;
high = tbsize - 1;
while (low <= high) {
middle = (low + high) / 2;
i = strcmp(word, word_table[middle].name);
if (i < 0) {
high = middle - 1;
} else if (i > 0) {
low = middle + 1;
} else {
return (middle);
}
}
return (-1);
}
int nexttok(char *val)
{
register int i, i2;
char tokenvalue[NMSZ];
int c1, tokentype, tokenend;
i = 0;
while (isspace(c)) /* c is always one char ahead */ {
if (c == 'n') nlproc(listfp);
if (state == PRE) {
tokenvalue[0] = c; tokenvalue[1] = ' ';
tokentype = _IDENTIFIER;
c = nextchar(infp, listfp); charno++;
strcpy(val, tokenvalue); return(tokentype);
}
c = nextchar(infp, listfp); charno++;
- 3 -
}
if ( ( (state == TEXT) || (state == PRE) ) && (c == '<') ) {
ungetc(c1 = getc(infp), infp);
old_state = state; state = KEYWORD;
if (c1 == '/') {
c = nextchar(infp, listfp); charno++;
c = nextchar(infp, listfp); charno++;
tokenvalue[0] = '<'; tokenvalue[1] = '/'; tokenvalue[2] = ' ';
tokentype = _CloseTag; in_close = TRUE; in_open = FALSE;
strcpy(val, tokenvalue); return(tokentype);
} else {
c = nextchar(infp, listfp); charno++;
tokenvalue[0] = '<'; tokenvalue[1] = ' ';
tokentype = _OpenTag; in_close = FALSE; in_open = TRUE;
strcpy(val, tokenvalue); return(tokentype);
}
}
if (state == KEYWORD) {
if (c == '!') { /* Comments! */
while ((c != '>') && ( c != EOF )) {
c = nextchar(infp, listfp); charno++;
}
state = ATTR;
tokenvalue[0] = ' '; tokentype = _IDENTIFIER;
strcpy(val, tokenvalue); return(tokentype);
}
while (isalnum(c)) {
tokenvalue[i++] = toupper(c);
c = nextchar(infp, listfp); charno++;
}
tokenvalue[i++] = ' ';
if ((i = bsearch(tokenvalue, keyword_table, KWSZ)) >= 0) {
tokentype = keyword_table[i].kw_token;
state = ATTR;
if ( strcmp("PRE", tokenvalue) == 0 ) {
if (in_close) old_state = TEXT;
else old_state = PRE;
}
strcpy(val, tokenvalue); return(tokentype);
} else {
if ((i = bsearch(tokenvalue, attr_table, ATSZ)) >= 0) {
tokentype = attr_table[i].kw_token;
state = AVALUE;
strcpy(val, tokenvalue); return(tokentype);
} else { /* Unknown attribute. Actually, it's much more easier */
/* just ignoring it in YACC then trying to skip it here*/
tokentype = _IDENTIFIER;
state = AVALUE;
strcpy(val, tokenvalue); return(tokentype);
}
}
}
if (state == IN_QUOTE) {
if ( c == '"' ) {
c = nextchar(infp, listfp); charno++;
state = ATTR;
tokenvalue[0] = '"'; tokenvalue[1] = ' ';
tokentype = '"';
strcpy(val, tokenvalue); return(tokentype);
}
tokentype = _QUOTED_ATTR; /* maybe URL, maybe rain, maybe snow... */
/* state = IN_QUOTE; */
while ( c != '"' ) {
tokenvalue[i++] = c;
c = nextchar(infp, listfp); charno++;
}
- 4 -
tokenvalue[i++] = ' ';
/* c = nextchar(infp, listfp); Skip the closing quotation mark */
strcpy(val, tokenvalue); return(tokentype);
} /* end if for (state == IN_QUOTE) */
return 0; /* ª£ ¥ ¥â ¡ëâì... */
}
int yylex()
{
int tktyp;
char tkval[NMSZ];
if (!USE_BUFFER) {
tktyp = nexttok(tkval);
switch (tktyp) {
case _OpenTag: {
tktyp = nexttok(tkval);
if (opened_par) {
if (tktyp == _C_KEYWORD || tktyp == _S_KEYWORD) {
if (!keyword_table[bsearch(tkval, keyword_table, KW...
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...
lex_buff[1].tktyp = _OpenTag ; strcpy(lex_buff[...
lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...
lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...
lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff_size = 5; USE_BUFFER = TRUE;
opened_par = (strcmp(tkval, "P")==0);
} else {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...
lex_buff[1].tktyp = _OpenTag ; strcpy(lex_buff[...
lex_buff_size = 2; USE_BUFFER = TRUE;
}
}
} else { // i.e. opened_par == FALSE
opened_par = (strcmp(tkval, "P")==0);
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _OpenTag ; strcpy(lex_buff[1].tkv...
lex_buff_size = 2; USE_BUFFER = TRUE;
}
break;
}
case _CloseTag: {
tktyp = nexttok(tkval);
if (opened_par) {
if (keyword_table[bsearch(tkval, keyword_table, KWSZ)]...
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0]....
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1]....
lex_buff_size = 2; USE_BUFFER = TRUE;
} else {
if (strcmp(tkval, "P")==0) {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff_size = 2; USE_BUFFER = TRUE;
opened_par = FALSE;
} else {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...
lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...
lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff_size = 5; USE_BUFFER = TRUE;
opened_par = FALSE;
}
}
} else {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1].tkv...
- 5 -
lex_buff_size = 2; USE_BUFFER = TRUE;
}
break;
}
case _IDENTIFIER: {
if ( !(in_open || in_close) && (!opened_par)) {
lex_buff[0].tktyp = _IDENTIFIER; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...
lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...
lex_buff[3].tktyp = _OpenTag ; strcpy(lex_buff[3].tkv...
lex_buff_size = 4; USE_BUFFER = TRUE;
opened_par = TRUE;
}
break;
}
case 0: { // EOF
if (opened_par) {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...
lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...
lex_buff[3].tktyp = _CloseTag ; strcpy(lex_buff[3].tkv...
lex_buff_size = 4; USE_BUFFER = TRUE;
opened_par = TRUE;
}
}
}
}
if (USE_BUFFER) {
tktyp = lex_buff[--lex_buff_size].tktyp;
strcpy(tkval, lex_buff[ lex_buff_size].tkval);
if (lex_buff_size == 0) USE_BUFFER = FALSE;
}
yylval.pchar = (char *) malloc(1 + strlen(tkval));
strcpy(yylval.pchar, tkval);
return(tktyp);
}
à¨¥ç ¨¥: à ᯥç ⪨ ä ¢ htm_cnst.h, htm_glob.h, htm_tokn.h ¨
htm_err.h á. ¢ ¯à¨¥¨¨ apendix' 4.4.
- 6 -
¡à âà ï à ¡â 2
¨¥:
áâà¨âì äà ⨠âà -ä ¢. ¥¡å¨ ॠ¨¢ âì
á¥ãî騥 äãªæ¨¨:
- `{` - ¢á¥£ c ¢ áâન ¡¥ âáâã¯
- `{` - ¨ª£ ¥ ¯¥à¥á¨âáï
- `{` - ¢á¥£ c ¢ áâન á âáâã¯
- 0 - ¢áâ ¢ïâì ᨢ TAB ¯à¨ âáâ㯥
- 1..8 ¢áâ ¢ïâì x ¯à¡¥¢
- äà â¨à¢ âì ª¥â ਨ c xx ¯ yy ¯¨æ¨¨
- ¥áªìª ª áâથ
- `=` ¢ë¥ïâì ¯à¡¥ ¨
¥ªáâ ¯à£à ë:
#include
#include
#include
#include
#define ERROR 0
#define IDENT 1
#define KEYWORD 2
#define BRACKETS 3
#define OTHER 4
#define BEGIN '{'
#define END '}'
#define COMMA ','
#define SEMI ';'
#define LB 'n'
char val[100];
char comment[200];
int pos=0;
int undo;
int backspace;
#define N_KW 8
char keywords[N_KW][20]={
"for",
"while",
"do",
"if",
"switch",
"else",
"case",
"default"};
// -----------------------------------------------------------------
void blockQuote (char *(&s),FILE *f)
{
char c=fgetc(f);
*(s++)=c, c=0;
while (c!=''' && c!='"') {
c=*(s++)=fgetc(f);
if (c=='\')
c=*(s++)=fgetc(f);
}
}
// -----------------------------------------------------------------
int getNext (FILE *f)
{
char *s=val,c;
int buf=undo;
static int symbol=0;
static int startPos=1;
c=fgetc(f);
- 7 -
startPos=0;
if (isalnum(c) || c=='_') {
symbol=0;
*(s++)=c;
while ((isalnum(c) || c=='_' || c=='.') && !feof(f) && !isspace(c))
c=*(s++)=fgetc(f);
ungetc (c,f);
*(--s)=0;
for (int i=0;i if (i!=N_KW) *(s++)=' '; *s=0; if (i!=N_KW) return KEYWORD; return IDENT; } if (c==''' || c=='"') { symbol=0; ungetc(c,f); blockQuote (s,f); *s=0; return IDENT; } if (c=='=' && E && !symbol) { *(s++)=' '; *(s++)=c; *(s++)=' '; *s=0; return OTHER; } *s=c, *(s+1)=0; symbol=1; return OTHER; } void outVal (FILE
*f,int back=0) { char *s=val; if (backspace && !back) if (!T) { fputc (9,f); pos+=F3; } else { for (int j=0;j { fputc (' ',f); pos++; } } backspace=0; while (*s) { fputc (*(s++),f); pos++; } } void outCR (FILE
*f,int tab,int newLine=0) { int limit=tab-1+newLine; // Out Comments if (*comment) outComment (f); if (!newLine) { fputc ('n',f); pos=0; - 8 - } for (int i=0;i { if (!T) { fputc (9,f); pos+=F3; } else { for
(int j=0;j
fputc (' ',f);
pos++; } } } if (tab && !newLine) backspace=1; } //
----------------------------------------------------------------- int format (FILE
*f_in,FILE *f_out) { int tab=0,tab1=0,tab2=0,lb=0; int lex; while (!feof(f_in)) { lex=getNext(f_in); switch (lex) { case KEYWORD: {
lb=0;
outVal (f_out);
lex=getNext(f_in);
break; } case BEGIN: {
tab1=0;
if (!lb && N!=2) outCR (f_out,tab);
if (N==3) outCR (f_out,1,1);
outVal(f_out);
outCR (f_out,++tab);
lb=1;
break; } case END: {
if (!lb) outCR (f_out,tab);
lb=1;
tab1=0;
lex=getNext (f_in);
if (lex==LB) lex=getNext(f_in);
undo=lex;
outCR (f_out,tab);
lb=1;
break; } case SEMI: {
lb=0;
tab1=0;
outVal (f_out);
lex=getNext(f_in);
undo=lex;
break; } case IDENT: {
lb=0;
outVal (f_out);
lex=getNext (f_in);
undo=lex;
break; } case LB: { - 9 -
int used;
tab1=0;
if (*comment) {
outCR (f_out,tab);
used=1;
}
lex=getNext (f_in);
if (N!=2 || lex!=BEGIN) {
if (!used) outCR (f_out,tab);
lb=1;
}
undo=lex;
break; } case EOF:
return 1; default: {
lb=0;
outVal(f_out); } } } return 1; } à¨¥ç ¨¥:
¥¡å¨ë¥ äãªæ¨¨
¡ë¨ ॠ¨¢ ë ¢
¯ ¡ê¥¥
¨ ¯à¨ á ç¥
४ ¨ ¥ ¢ë¢ ¨.
(¥áâ¢ë ¯à¨¥à
¯à¨¢¥¥ ¢
¯à¨¥¨¨ apendix' 4.5) - 10 -
¡à âà ï
à ¡â 3 ¨¥: à ¨¨¢ âì
åä ¨ ¢ë¡ âì
ãçèãî ï
áãç £ à ᯥ¥¥¨ï
¨-¢ ¡¡é¥ ï
äã ¢ëç¨á¥¨ï
åä: h0 = 0; h(i) = Alf*
h(i-1) [+] C(i), i=1..k k -
¨ áâન. [+] -
¥ªâ ï
¯à¨¢ì ï
¯¥à æ¨ï (+, -, <<, _rotl, ^ |, &) à ¨¨¢ âì
í⨠¨ î¡ë¥
à㣨¥ åä
ª¨ç¥á⢠ª¨¨
(ªä¨ªâ¢) ï
¥ªâàëå
áãç ëå
¯á¥¢ â¥ìáâì.
¯à¨¥à: 1) id'ë
ïëª C ( 50). 2) ----
"" ---- ( 100). 3) ----
"" ---- ( 1000). 4)
¥è¨¥ ¨¥
áâ â
¡¨¡¨â⥪¨ BC++. 5)
¥è¨¥ ¨¥
£ ä¨ç¥áª
¡¨¡¨â¥ª¨ BC++. 6)
ãç £¥¥¨ã¥ë¥
¨¥ (~600) 7)
£¨áª¨¥ áá¢
á ¯¥ä¨ªá ¨ ¨/¨¨
áãää¨ªá ¨ (xxx) - ª
200 8) 300
¨¥ ¢¨ : w000, w001, w002, etc ¨
áâ â¨á⨪¨
ᢥá⨠¢
â ¡¨æã/£à 䨪 ¥ªá
¯à£à ë
¨ âà : #include
#include
unsigned
hash_shift(char *s) { unsigned hash = 0; while (*s) hash = (hash << 1) + *s++; return hash; } unsigned
hash_rotl(char *s) { unsigned hash = 0; while (*s) hash = _rotl(hash,1) ^ *s++; return hash; } long
hash_pgw(char *s) { char *p; unsigned long h = 0, g; for(p=s;*p;p++) { h = (h << 4) + *p; if
(g = h & 0xF0000000) { h =
h^(g >>24); h =
h^g; } } return h; } unsigned hash_4
(char *s) { unsigned h = 0; const D=5; - 11 - while (*s) h = D*h + *s++; return h; } main (int
argc,char **argv) { int n=0,total=0; int placed1=0,placed2=0,placed3=0,placed4=0; int max1=0,max2=0,max3=0,max4=0; int *table1,*table2,*table3,*table4; FILE *f; char buf[100]; puts
(" ¡à âà ï
à ¡â 3.
ˆáᥢ ¨¥
åíè-äãªæ¨¨."); puts ("ï¯ã¢
ˆ.. -61"); if (argc<3) { puts ("n HASH.COM puts (" n -
ç¨á í¥¥â¢ ¢
â ¡¨æ¥"); puts (" file_name -
¨ï ä á
¨¥â¨ä¨ª âà ¨ n"); return 0; } n=atoi(argv[1]); if (n<10) { puts ("n ¨èª
¥ìª ï
â ¡¨æ n"); return 0; } table1=(int*)calloc(n,sizeof(int)); table2=(int*)calloc(n,sizeof(int)); table3=(int*)calloc(n,sizeof(int)); table4=(int*)calloc(n,sizeof(int)); if (!table1 || !table2 || !table3 ||
!table4) { puts ("n¥
å¢ â ¥â ¯ ïâ¨
ï â ¡¨æn"); return 0; } if ((f=fopen(argv[2],"r"))==NULL) { puts ("n¥
£ã âªàëâì
㪠ë ä n"); return 0; } while (!feof(f)) if (fscanf (f,"%s",buf)==1) { table1[hash_shift(buf)%n]++; table2[hash_rotl(buf)%n]++; table3[hash_pgw(buf)%n]++; table4[hash_4(buf)%n]++; puts (buf); total++; } puts
("â â¨á⨪ :"); for (int i=0;i { if (table1[i]) { if
(max1 placed1++; } if (table2[i]) { if
(max2 placed2++; } if (table3[i]) { if
(max3 placed3++; } - 12 - if (table4[i]) { if
(max4 placed4++; } } printf (" HASH_SHIFT : %f max=%dn",(float)total/placed1,max1); printf (" HASH_ROTL : %f
max=%dn",(float)total/placed2,max2); printf (" HASH_PGW : %f
max=%dn",(float)total/placed3,max3); printf (" HASH_4 : %f
max=%dn",(float)total/placed4,max4); fclose (f); return 0; }
Copyright (c) 2025 Stud-Baza.ru , , , .