2013年5月8日水曜日
単語切り出しプログラムOCP.C(その1)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define AMAX 4096
#define EEOF 0xff
#define VER "Token Count Program (OCP) Ver.130425a"
struct line{
char text[AMAX];
long count,gyo,pos;
struct line *next;
struct line2 *data2;
};
struct line2{
long count,gyo,pos;
struct line2 *next2;
};
struct line *stt,*start,*last;
struct line *gcdata,*info,*fwe,*gte,*lte,*note,*eqe;
struct line *elsee,*olddata,*ftuu,*quo1e,*quo2e;
struct line *shojun(struct line *temp);
struct line *ryaku(struct line *temp);
struct line *node_create();
struct line2 *node2_create();
struct line2 *gcdata2,*info2,*old2;
FILE *fp,*fs;
void omake(int a, int b,int c, long *d);
char quo1d[AMAX],quo2d[AMAX];
long x30[30],x41[30],x61[30];
main(int argc,char *argv[]){
char p,fname[40]="",fold[40]="",*arx;
long pos=1,gyo=1,si,cn[50];
int i,dotflg=0,fwflg=0,ltflg=0,gtflg=0,notflg=0;
int eqflg=0,wp=0,elflg=0,quo1f=0,quo2f=0,q1=0,q2=0;
/*printf("AMAX=%d \n",AMAX);*/
printf("%s\n",VER);
if(argc<2){
printf("ファイルがありません。\n\n");
printf("./ocp [ファイル名]\n");
exit(1);
}
si=0; dotflg=0;
arx=argv[1];
while(*arx != '\0' && *arx != ' ' && si<40){
if(*arx == '.') dotflg=1;
if(dotflg==0){ fname[si]=*arx; fname[si+1]='\0'; }
fold[si] = *arx;
arx++;
si++;
}
fold[si]='\0';
if(dotflg==0) strcat(fold,".c");
strcat(fname,".ocp");
for(i=0;i<50;i++) cn[i]=0;
for(i=0;i<30;i++){
x30[i]=0;
x41[i]=0;
x61[i]=0;
}
fp=fopen(fold,"r");
if(fp==NULL){
printf("cannot open file %s \n",fold);
exit(1);
}
printf("Input file name: %s\n",fold);
gcdata=olddata=start=node_create();
fwe =node_create();
ftuu=node_create();
gte =node_create();
lte =node_create();
note=node_create();
eqe =node_create();
elsee=node_create();
quo1e=node_create();
quo2e=node_create();
fwe->pos=pos;
fwe->gyo=gyo;
while( (p=fgetc(fp)) != EEOF){
if(p=='\n'){ gyo++; pos=0;
if(quo1f==1){
strcpy(gcdata->text,quo1d);
ryaku(quo1e);
}
if(quo2f==1){
strcpy(gcdata->text,quo2d);
ryaku(quo2e);
}
quo1f=quo2f=0;
gtflg=ltflg=eqflg=notflg=0;
}
if(p=='\n') cn[0]++;
else if(p==0x0d) cn[1]++;
else if(p==' ') cn[2]++;
else if(p==';') cn[3]++;
else if(p=='(') cn[4]++;
else if(p==')') cn[5]++;
else if(p=='{') cn[6]++;
else if(p=='}') cn[7]++;
else if(p=='[') cn[8]++;
else if(p==']') cn[9]++;
else if(p=='!') cn[10]++;
else if(p=='?') cn[11]++;
else if(p=='\"') cn[12]++;
else if(p=='\'') cn[13]++;
else if(p==0x5c) cn[14]++;
else if(p=='&') cn[15]++;
else if(p=='|') cn[16]++;
else if(p=='<') cn[17]++;
else if(p=='>') cn[18]++;
else if(p=='#') cn[19]++;
else if(p=='%') cn[20]++;
else if(p=='+') cn[21]++;
else if(p=='-') cn[22]++;
else if(p=='*') cn[23]++;
else if(p=='/') cn[24]++;
else if(p=='=') cn[25]++;
else if(p==':') cn[26]++;
else if(p==',') cn[27]++;
else if(p=='.') cn[28]++;
else if(p=='@') cn[29]++;
else if(p=='_') cn[30]++;
else if(p=='^') cn[31]++;
else if(p=='~') cn[32]++;
else if(p=='`') cn[33]++;
else if(p=='\t') cn[34]++;
if(0x30<=p && p<=0x39) x30[p-0x30]++;
if(0x41<=p && p<=0x5a) x41[p-0x41]++;
if(0x61<=p && p<=0x7a) x61[p-0x61]++;
if((p=='\n'|| p=='\0' || p==' '|| p=='!' ||
p=='(' || p==')' || p==0x0d || p=='\t' ||
p==',' || p=='.' || p==':'|| p==';' ||
p=='<' || p=='=' || p=='>'|| p=='?' ||
p=='{'|| p=='}') && quo1f==0 && quo2f==0){
if(quo1f==0 && quo2f==0){
if(fwflg==1){ ryaku(fwe); }
fwflg=wp=0;
if(strcmp(olddata->text,"else")==0){
elflg=1;
elsee->gyo=gyo;
elsee->pos=pos;
}
else if(strcmp(olddata->text,"if")==0){
if(elflg==1){
elflg=0;
strcpy(gcdata->text,"else if");
ryaku(elsee);
}
}
else if(p==' '){}
else elflg=0;
if(p=='.'){
ftuu->pos=pos;
ftuu->gyo=gyo;
strcpy(gcdata->text,".");
ryaku(ftuu);
}
if(p==':'){
ftuu->pos=pos;
ftuu->gyo=gyo;
strcpy(gcdata->text,":");
ryaku(ftuu);
}
if(p=='?'){
ftuu->pos=pos;
ftuu->gyo=gyo;
strcpy(gcdata->text,"?");
ryaku(ftuu);
}
/* お好みで使おう
if(p=='('){
ftuu->pos=pos;
ftuu->gyo=gyo;
strcpy(gcdata->text,"(");
ryaku(ftuu);
}
if(p==')'){
ftuu->pos=pos;
ftuu->gyo=gyo;
strcpy(gcdata->text,")");
ryaku(ftuu);
}
*//* お好みでつけよう
if(p=='{'){
ftuu->pos=pos;
ftuu->gyo=gyo;
strcpy(gcdata->text,"{");
ryaku(ftuu);
}
if(p=='}'){
ftuu->pos=pos;
ftuu->gyo=gyo;
strcpy(gcdata->text,"}");
ryaku(ftuu);
}
*/
if(p=='<'){
gtflg=notflg=eqflg=0;
if(ltflg==0){
ltflg=1;
lte->pos=pos;
lte->gyo=gyo;
strcpy(gcdata->text,"<");
ryaku(lte);
}
else{
ltflg=0;
strcpy(gcdata->text,"<<");
ryaku(lte);
}
}
if(p=='>'){
i=strlen(olddata->text);
if(olddata->text[i-1]=='-'){
olddata->text[i]='>';
olddata->text[i+1]='\0';
}
ltflg=notflg=eqflg=0;
if(gtflg==0){
gtflg=1;
gte->pos=pos;
gte->gyo=gyo;
strcpy(gcdata->text,">");
ryaku(gte);
}
else{
gtflg=0;
strcpy(gcdata->text,">>");
ryaku(gte);
}
}
if(p=='!'){
gtflg=ltflg=eqflg=0;
notflg=1;
note->pos=pos;
note->gyo=gyo;
strcpy(gcdata->text,"!");
ryaku(note);
}
if(p=='='){
i=strlen(olddata->text);
if(olddata->text[i-1]=='-'){
olddata->text[i]='=';
olddata->text[i+1]='\0';
}
else if(olddata->text[i-1]=='+'){
olddata->text[i]='=';
olddata->text[i+1]='\0';
}
else if(olddata->text[i-1]=='*'){
olddata->text[i]='=';
olddata->text[i+1]='\0';
}
else if(olddata->text[i-1]=='/'){
olddata->text[i]='=';
olddata->text[i+1]='\0';
}
else if(olddata->text[i-1]=='%'){
olddata->text[i]='=';
olddata->text[i+1]='\0';
}
if(eqflg==0){
eqflg=1;
eqe->pos=pos;
eqe->gyo=gyo;
/* 情報量が多いので書き込みを止める。場合によってはコメント削除
strcpy(gcdata->text,"=");
ryaku(eqe);
*/
}
else{
eqflg=0;
strcpy(gcdata->text,"==");
ryaku(eqe);
}
if(ltflg==1){
ltflg=0;
strcpy(gcdata->text,"<=");
ryaku(lte);
}
if(gtflg==1){
gtflg=0;
strcpy(gcdata->text,">=");
ryaku(gte);
}
if(notflg==1){
notflg=0;
strcpy(gcdata->text,"!=");
ryaku(note);
}
}
}
if(p==0x0d){
if(fwflg==1){ ryaku(fwe); }
ftuu->pos=pos;
ftuu->gyo=gyo;
strcpy(gcdata->text," ASCII CODE : 0x0d");
ryaku(ftuu);
}
pos++;
}
else if(p=='\"' && quo2f==0 && quo1d[q1-1] != 0x5c){
if(quo1f==0){
if(fwflg==1){ ryaku(fwe); }
fwflg=wp=0;
quo1f=1;
quo1e->gyo=gyo;
quo1e->pos=pos;
q1=0;
quo1d[q1++]='\"';
quo1d[q1]='\0';
}
else{
quo1f=0;
quo1d[q1++]='\"';
quo1d[q1]='\0';
strcpy(gcdata->text,quo1d);
ryaku(quo1e);
}
pos++;
}
else if(p=='\'' && quo1f==0 && quo2d[q2-1] != 0x5c){
if(quo2f==0){
if(fwflg==1){ ryaku(fwe); }
fwflg=wp=0;
quo2f=1;
quo2e->gyo=gyo;
quo2e->pos=pos;
q2=0;
quo2d[q2++]='\'';
quo2d[q2]='\0';
}
else{
quo2f=0;
quo2d[q2++]='\'';
quo2d[q2]='\0';
strcpy(gcdata->text,quo2d);
ryaku(quo2e);
}
pos++;
}
else{
if(wp>=AMAX) break;
gtflg=ltflg=eqflg=notflg=0;
if(fwflg==0){
fwflg=1;
fwe->pos=pos;
fwe->gyo=gyo;
}
pos++;
gcdata->text[wp++]=p;
gcdata->text[wp]='\0';
if(quo1f==1 && quo2f==0){
quo1d[q1++]=p;
quo1d[q1]='\0';
}
if(quo1f==0 && quo2f==1){
quo2d[q2++]=p;
quo2d[q2]='\0';
}
}
}
gcdata->text[wp]='\0';
gcdata->count =1;
if(fwflg==1){
gcdata->pos=fwe->pos;
gcdata->gyo=fwe->gyo;
}
fclose(fp);
stt=info=start;
while(info){
start=info->next;
info->next=NULL;
stt =shojun(info);
info=start;
}
fs=fopen(fname,"w");
if(fs==NULL){
printf("cannot open file %s \n",fname);
exit(1);
}
printf("Output file name: %s\n\n",fname);
fprintf(fs,"%s\n",VER);
fprintf(fs,"Input file name: %s\n",fold);
fprintf(fs,"Output file name: %s\n\n",fname);
info=stt;
info=info->next;
while(info){
printf( "[%s] (%ld)",info->text,info->count);
fprintf(fs,"[%s] (%ld)",info->text,info->count);
info2=info->data2;
while(info2){
printf( ",L%ld(S%ld)",info2->gyo,info2->pos);
fprintf(fs,",L%ld(S%ld)",info2->gyo,info2->pos);
info2=info2->next2;
}
printf("\n");
fprintf(fs,"\n");
info=info->next;
}
printf( "\n[\\n]=%ld,[0x0d]=%ld,[space]=%ld,[;]=%ld\n",cn[0],cn[1],cn[2],cn[3]);
fprintf(fs,"\n[\\n]=%ld,[0x0d]=%ld,[space]=%ld,[;]=%ld\n",cn[0],cn[1],cn[2],cn[3]);
printf( "[(]=%ld,[)]=%ld,[{]=%ld,[}]=%ld\n",cn[4],cn[5],cn[6],cn[7]);
fprintf(fs,"[(]=%ld,[)]=%ld,[{]=%ld,[}]=%ld\n",cn[4],cn[5],cn[6],cn[7]);
printf( "[!]=%ld,[?]=%ld,[\"]=%ld,[\']=%ld,[\\]=%ld\n",cn[10],cn[11],cn[12],cn[13],cn[14]);
fprintf(fs,"[!]=%ld,[?]=%ld,[\"]=%ld,[\']=%ld,[\\]=%ld\n",cn[10],cn[11],cn[12],cn[13],cn[14]);
printf( "[&]=%ld,[|]=%ld,[<]=%ld,[>]=%ld,[=]=%ld\n",cn[15],cn[16],cn[17],cn[18],cn[25]);
fprintf(fs,"[&]=%ld,[|]=%ld,[<]=%ld,[>]=%ld,[=]=%ld\n",cn[15],cn[16],cn[17],cn[18],cn[25]);
printf( "[#]=%ld,[%%]=%ld,[+]=%ld,[-]=%ld,[*]=%ld,[/]=%ld\n",cn[19],cn[20],cn[21],cn[22],cn[23],cn[24]);
fprintf(fs,"[#]=%ld,[%%]=%ld,[+]=%ld,[-]=%ld,[*]=%ld,[/]=%ld\n",cn[19],cn[20],cn[21],cn[22],cn[23],cn[24]);
printf( "[:]=%ld,[,]=%ld,[.]=%ld,[@]=%ld,[_]=%ld,[^]=%ld\n",cn[26],cn[27],cn[28],cn[29],cn[30],cn[31]);
fprintf(fs,"[:]=%ld,[,]=%ld,[.]=%ld,[@]=%ld,[_]=%ld,[^]=%ld\n",cn[26],cn[27],cn[28],cn[29],cn[30],cn[31]);
printf( " [ =%ld, ] =%ld,[~]=%ld,[`]=%ld,[\\t]=%ld\n",cn[8],cn[9],cn[32],cn[33],cn[34]);
fprintf(fs," [ =%ld, ] =%ld,[~]=%ld,[`]=%ld,[\\t]=%ld\n",cn[8],cn[9],cn[32],cn[33],cn[34]);
omake( 0, 5,'0',x30);
omake( 5,10,'0',x30);
omake( 0, 7,'a',x61);
omake( 7,14,'a',x61);
omake(14,21,'a',x61);
omake(21,26,'a',x61);
omake( 0, 7,'A',x41);
omake( 7,14,'A',x41);
omake(14,21,'A',x41);
omake(21,26,'A',x41);
fclose(fs);
}
登録:
コメントの投稿 (Atom)
0 件のコメント:
コメントを投稿