円周率計算ソースファイル gu002c.c

ひとつ前に戻る

gu002c.c
/*
 calculation of PI.
 
 Example compilation:
 bcc32 gu002c.c fftsg_h.c
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>

void cal(int);
void fmul1604(short int *,short int *,short int *,int,double *);
void add1604(short int *,short int *,short int *,int);
void sub1604(short int *,short int *,short int *,int);
void xmul1604(short int *,int,int);
void div4(short int *,int);
void div2h(short int *,short int *,int);
void result1604_file(short int *,int);
/* fft */
void fft_dataset(short int *,int,double *,int);
void fft_fuku_xx(double *,int);
void fft_fuku   (double *,double *,int);
void fft_dataput(short int *,int,double *,int);
/* sqrt */
void gksqrt(short int *,short int *,short int *,int,double *);
void gksqrt_maincalc(short int *,short int *,short int *,int,double *);
void gksqrt_x3sub2d(short int *,short int *,int);
/* gyaku */
void gyaku(short int *,short int *,short int *,int,double *);
void gyaku_maincalc(short int *,short int *,short int *,int,double *);
void gyaku_x2sub(short int *,short int *,int);
/* sqrt & gyaku */
void h_add(short int *,short int *,int);
/* fftsg_h.c */
void rdft(int, int, double *);

void main(void)
{
    int bx,snum;
    
    printf("length of pi =? ");
    scanf("%d",&snum);
    for(bx=1;bx<snum;bx*=2);
    cal(bx);
}

/*
    a=1.0;
    b=1.0/2.0;
    t=1.0/2.0;
    x=2;
    for(i=0;i<n;i++){
        y=(a+b)/4.0;
        b=sqrt(a*b);
        a=y+b/2.0;
        t=t-x*(a-b);
        x*=2;
    }
    pi=(a+b)/t);
*/

void cal(int bx)
{
    int i,x,kt,log2_bx;
    short int *a,*b,*t,*y,*z;
    double *cp;
    time_t stime,rtime;
    
    stime=clock();
    
    kt=bx/4;
    for(log2_bx = 1; (1 << log2_bx) < bx; log2_bx++);
    
    a=(short int *)malloc((kt+4)*sizeof(short int));
    b=(short int *)malloc((kt+4)*sizeof(short int));
    t=(short int *)malloc((kt+4)*sizeof(short int));
    y=(short int *)malloc((kt+4)*sizeof(short int));
    z=(short int *)malloc((kt+4)*sizeof(short int));
    cp=(double   *)malloc( kt   *sizeof(double)*2*2);
    if(cp == NULL){
        printf("Memory Allocation Failure!\n");
        exit(1);
    }
    printf("bx = %d  ",bx);
    printf("use memory = %d\n",(kt+4)*sizeof(short int)*5+kt*sizeof(double)*2*2);
    
    rtime=clock();
    x=2;
    a[0]=10000;for(i=1;i<kt+4;i++) a[i]=0;
    b[0]= 5000;for(i=1;i<kt+4;i++) b[i]=0;
    t[0]= 5000;for(i=1;i<kt+4;i++) t[i]=0;
    printf("-- %dmsec\n",clock()-rtime);
    
    for(i=1;i<log2_bx;i++){
        rtime=clock();
        add1604(a,b,y,kt);
        div4(y,kt);
        fmul1604(a,b,a,kt,cp);
        gksqrt(a,b,z,kt,cp);
        fmul1604(a,b,b,kt,cp);
        xmul1604(b,100,kt);
        div2h(b,a,kt);
        add1604(a,y,a,kt);
        sub1604(a,b,y,kt);
        xmul1604(y,x,kt);
        sub1604(t,y,t,kt);
        x*=2;
        printf("%2d %dmsec\n",i,clock()-rtime);
    }
    
    rtime=clock();
    add1604(a,b,a,kt);
    gyaku(t,b,z,kt,cp);
    fmul1604(a,b,a,kt,cp);
    result1604_file(a+1,kt-1);
    printf("-- %dmsec\n",clock()-rtime);
    
    free(a);
    free(b);
    free(t);
    free(y);
    free(z);
    free(cp);
    
    printf("Total time : %d msec.\n",clock()-stime);
}

void gksqrt(short int *a,short int *x,short int *t,int kt,double *cp)
{
    int i,bp;
    double gy;
    
    for(i=0;i<kt+4;i++) x[i]=0;
    
    gy=0.0;
    for(i=0;i<5;i++){
        gy*=10000.0;
        gy+=a[i];
    }
    gy=1.0/sqrt(gy);
    
    while((int)gy==0) gy*=10000.0;
    for(i=0;i<5;i++){
        x[i]=gy;
        gy-=x[i];
        gy*=10000.0;
    }
    
    gksqrt_maincalc(a,x,t,16,cp);
    gksqrt_maincalc(a,x,t,16,cp);
    
    bp=8;
    while(bp<=kt){
        /* x=(3x-Axxx)/2 */
        gksqrt_maincalc(a,x,t,bp,cp);
        bp*=2;
    }
}

void gksqrt_maincalc(short int *a,short int *b,short int *t,int kt,double *acp)
{
    /*
      x=(3x-Axxx)/2
      
      t=b*b;
      t=t*b;
      t=a*t;
      b=3*x-t;
    */
    
    int i,shr;
    short int h1[4],h2[4];
    double *bcp;
    
    shr=kt*2;
    bcp=acp+shr;
    
    /* step1 x*x */
    fft_dataset(b   ,4,acp,4*2);
    fft_dataset(b+kt,4,bcp,4*2);
    rdft(4*2, 1, acp);
    rdft(4*2, 1, bcp);
    fft_fuku(acp,bcp,4*2);
    rdft(4*2,-1, acp);
    fft_dataput(h1,4,acp,4*2);
    
    fft_dataset(b,kt,acp,shr);
    rdft(shr, 1, acp);
    fft_fuku_xx(acp,shr);
    rdft(shr,-1, acp);
    fft_dataput(t,kt+4,acp,shr);
    
    h_add(t+kt,h1,4);
    h_add(t+kt,h1,4);
    
    if(t[0]==0){
        for(i=0;i<kt+3;i++){
            t[i]=t[i+1];
        }
        t[kt+3]=0;
    }
    
    /* step2 x*x*x */
    fft_dataset(b   ,4,acp,4*2);
    fft_dataset(t+kt,4,bcp,4*2);
    rdft(4*2, 1, acp);
    rdft(4*2, 1, bcp);
    fft_fuku(acp,bcp,4*2);
    rdft(4*2,-1, acp);
    fft_dataput(h1,4,acp,4*2);
    
    fft_dataset(b+kt,4,acp,4*2);
    fft_dataset(t   ,4,bcp,4*2);
    rdft(4*2, 1, acp);
    rdft(4*2, 1, bcp);
    fft_fuku(acp,bcp,4*2);
    rdft(4*2,-1, acp);
    fft_dataput(h2,4,acp,4*2);
    
    fft_dataset(b,kt,acp,shr);
    fft_dataset(t,kt,bcp,shr);
    rdft(shr, 1, acp);
    rdft(shr, 1, bcp);
    fft_fuku(acp,bcp,shr);
    rdft(shr,-1, acp);
    fft_dataput(t,kt+4,acp,shr);
    
    h_add(t+kt,h1,4);
    h_add(t+kt,h2,4);
    
    if(t[0]==0){
        for(i=0;i<kt+3;i++){
            t[i]=t[i+1];
        }
        t[kt+3]=0;
    }
    
    /* step3 (x*x*x)*a */
    fft_dataset(a   ,4,acp,4*2);
    fft_dataset(t+kt,4,bcp,4*2);
    rdft(4*2, 1, acp);
    rdft(4*2, 1, bcp);
    fft_fuku(acp,bcp,4*2);
    rdft(4*2,-1, acp);
    fft_dataput(h1,4,acp,4*2);
    
    fft_dataset(a+kt,4,acp,4*2);
    fft_dataset(t   ,4,bcp,4*2);
    rdft(4*2, 1, acp);
    rdft(4*2, 1, bcp);
    fft_fuku(acp,bcp,4*2);
    rdft(4*2,-1, acp);
    fft_dataput(h2,4,acp,4*2);
    
    fft_dataset(a,kt,acp,shr);
    fft_dataset(t,kt,bcp,shr);
    rdft(shr, 1, acp);
    rdft(shr, 1, bcp);
    fft_fuku(acp,bcp,shr);
    rdft(shr,-1, acp);
    fft_dataput(t,kt+4,acp,shr);
    
    h_add(t+kt,h1,4);
    h_add(t+kt,h2,4);
    
    if(t[0]==0){
        for(i=0;i<kt+3;i++){
            t[i]=t[i+1];
        }
        t[kt+3]=0;
    }
    
    /* step4 (3*x-x*x*x*a)/2 */
    gksqrt_x3sub2d(b,t,kt+4);
}

void fmul1604(short int *a,short int *b,short int *out,int kt,double *acp)
{
    int shr;
    double *bcp;
    
    shr=kt*2;
    bcp=acp+shr;
    
    fft_dataset(a,kt,acp,shr);
    fft_dataset(b,kt,bcp,shr);
    rdft(shr, 1, acp);
    rdft(shr, 1, bcp);
    fft_fuku(acp,bcp,shr);
    rdft(shr,-1, acp);
    fft_dataput(out,kt+4,acp,shr);
}

void fft_dataput(short int *c,int out_len,double *acp,int shr)
{
    int i,deep;
    double scale,rgs;
    __int64 cy;
    
    scale=2.0/shr;
    
    deep=shr-out_len;
    if(deep>3) deep=3;
    
    cy=0;
    for(i=out_len-1+deep;i>out_len-1;i--){
        rgs=acp[i-1]*scale+0.5+cy;
        cy=rgs*0.0001;
    }
    for(i=out_len-1;i>0;i--){
        rgs=acp[i-1]*scale+0.5+cy;
        cy=rgs*0.0001;
        c[i]=rgs-(double)cy*10000.0;
    }
    c[0]=cy;
}

void fft_fuku(double *acp,double *bcp,int shr)
{
    int i;
    double ra,rb,rc,rd;
    
    acp[0]=acp[0]*bcp[0];
    acp[1]=acp[1]*bcp[1];
    for(i=2;i<shr;i+=2){
        ra=acp[i  ];
        rb=acp[i+1];
        rc=bcp[i  ];
        rd=bcp[i+1];
        acp[i  ]=ra*rc-rb*rd;
        acp[i+1]=rb*rc+ra*rd;
    }
}

void fft_dataset(short int *a,int kt,double *acp,int shr)
{
    int i;
    
    for(i=0;i<kt;i++) acp[i]=a[i];
    for(i=kt;i<shr;i++) acp[i]=0.0;
}

void fft_fuku_xx(double *acp,int shr)
{
    int i;
    double ra,rb;
    
    acp[0]=acp[0]*acp[0];
    acp[1]=acp[1]*acp[1];
    for(i=2;i<shr;i+=2){
        ra=acp[i  ];
        rb=acp[i+1];
        acp[i  ]=ra*ra-rb*rb;
        acp[i+1]=ra*rb*2;
    }
}

void gksqrt_x3sub2d(short int *a,short int *b,int kt)
{
    int i,brrw,carry,d,rem;
    
    carry=0;
    for(i=kt-1;i>=0;i--){
        a[i]=a[i]*3+carry;
        if(a[i]>=20000){
            carry=2;
            a[i]-=20000;
        }
        else if(a[i]>=10000){
            carry=1;
            a[i]-=10000;
        }
        else{
            carry=0;
        }
    }
    a[0]+=carry*10000;
    
    brrw=0;
    for(i=kt-1;i>=0;i--){
        a[i]=a[i]-b[i]-brrw;
        if(a[i]>=0){
            brrw=0;
        }
        else{
            a[i]+=10000;
            brrw=1;
        }
    }
    
    rem=0;
    for(i=0;i<kt;i++){
        d=a[i]+rem;
        a[i]=d>>1;
        rem=(d-a[i]*2)*10000;
    }
}

void h_add(short int *a,short int *b,int kt)
{
    int i,carry;
    
    carry=0;
    for(i=kt-1;i>=0;i--){
        a[i]=a[i]+b[i]+carry;
        if(a[i]>=10000){
            carry=1;
            a[i]-=10000;
        }
        else{
            carry=0;
        }
    }
    a[-1]+=carry;
}

void add1604(short int *a,short int *b,short int *c,int kt)
{
    int i,carry;
    
    carry=0;
    for(i=kt-1;i>=0;i--){
        c[i]=a[i]+b[i]+carry;
        if(c[i]>=10000){
            carry=1;
            c[i]-=10000;
        }
        else{
            carry=0;
        }
    }
    c[0]+=carry*10000;
}

void sub1604(short int *a,short int *b,short int *c,int kt)
{
    int i,brrw;
    
    brrw=0;
    for(i=kt-1;i>=0;i--){
        c[i]=a[i]-b[i]-brrw;
        if(c[i]>=0){
            brrw=0;
        }
        else{
            c[i]+=10000;
            brrw=1;
        }
    }
}

void xmul1604(short int *a,int x,int kt)
{
    int i;
    double d;
    __int64 cy;
    
    cy=0;
    for(i=kt-1;i>=0;i--){
        d=(double)a[i]*x+cy;
        cy=d*0.0001;
        a[i]=d-(double)cy*10000.0;
    }
}

void div4(short int *a,int kt)
{
    int i,d,rem;
    
    rem=0;
    for(i=0;i<kt;i++){
        d=a[i]+rem;
        a[i]=d>>2;
        rem=(d-a[i]*4)*10000;
    }
}

void div2h(short int *a,short int *b,int kt)
{
    int i,d,rem;
    
    rem=0;
    for(i=0;i<kt;i++){
        d=a[i]+rem;
        b[i]=d>>1;
        rem=(d-b[i]*2)*10000;
    }
}

void gyaku(short int *a,short int *x,short int *t,int kt,double *cp)
{
    int i,bp;
    double gy;
    
    for(i=0;i<kt+4;i++) x[i]=0;
    
    gy=0.0;
    for(i=0;i<5;i++){
        gy*=10000.0;
        gy+=a[i];
    }
    gy=1.0/gy;
    
    while((int)gy==0) gy*=10000.0;
    for(i=0;i<5;i++){
        x[i]=gy;
        gy-=x[i];
        gy*=10000.0;
    }
    
    gyaku_maincalc(a,x,t,16,cp);
    gyaku_maincalc(a,x,t,16,cp);
    
    bp=8;
    while(bp<=kt){
        /* x=2x-Axx */
        gyaku_maincalc(a,x,t,bp,cp);
        bp*=2;
    }
}

void gyaku_maincalc(short int *a,short int *b,short int *t,int kt,double *acp)
{
    /* x=2*x-a*x*x */
    
    int i,shr;
    short int h1[4],h2[4];
    double *bcp;
    
    shr=kt*2;
    bcp=acp+shr;
    
    /* step1 x*x */
    fft_dataset(b   ,4,acp,4*2);
    fft_dataset(b+kt,4,bcp,4*2);
    rdft(4*2, 1, acp);
    rdft(4*2, 1, bcp);
    fft_fuku(acp,bcp,4*2);
    rdft(4*2,-1, acp);
    fft_dataput(h1,4,acp,4*2);
    
    fft_dataset(b,kt,acp,shr);
    rdft(shr, 1, acp);
    fft_fuku_xx(acp,shr);
    rdft(shr,-1, acp);
    fft_dataput(t,kt+4,acp,shr);
    
    h_add(t+kt,h1,4);
    h_add(t+kt,h1,4);
    
    if(t[0]==0){
        for(i=0;i<kt+3;i++){
            t[i]=t[i+1];
        }
        t[kt+3]=0;
    }
    
    /* step2 (x*x)*a */
    fft_dataset(a   ,4,acp,4*2);
    fft_dataset(t+kt,4,bcp,4*2);
    rdft(4*2, 1, acp);
    rdft(4*2, 1, bcp);
    fft_fuku(acp,bcp,4*2);
    rdft(4*2,-1, acp);
    fft_dataput(h1,4,acp,4*2);
    
    fft_dataset(a+kt,4,acp,4*2);
    fft_dataset(t   ,4,bcp,4*2);
    rdft(4*2, 1, acp);
    rdft(4*2, 1, bcp);
    fft_fuku(acp,bcp,4*2);
    rdft(4*2,-1, acp);
    fft_dataput(h2,4,acp,4*2);
    
    fft_dataset(a,kt,acp,shr);
    fft_dataset(t,kt,bcp,shr);
    rdft(shr, 1, acp);
    rdft(shr, 1, bcp);
    fft_fuku(acp,bcp,shr);
    rdft(shr,-1, acp);
    fft_dataput(t,kt+4,acp,shr);
    
    h_add(t+kt,h1,4);
    h_add(t+kt,h2,4);
    
    if(t[0]==0){
        for(i=0;i<kt+3;i++){
            t[i]=t[i+1];
        }
        t[kt+3]=0;
    }
    
    /* step3 2*x-x*x*a */
    gyaku_x2sub(b,t,kt+4);
}

void gyaku_x2sub(short int *a,short int *b,int kt)
{
    int i,brrw,carry;
    
    carry=0;
    for(i=kt-1;i>=0;i--){
        a[i]=a[i]*2+carry;
        if(a[i]>=10000){
            carry=1;
            a[i]-=10000;
        }
        else{
            carry=0;
        }
    }
    a[0]+=carry*10000;
    
    brrw=0;
    for(i=kt-1;i>=0;i--){
        a[i]=a[i]-b[i]-brrw;
        if(a[i]>=0){
            brrw=0;
        }
        else{
            a[i]+=10000;
            brrw=1;
        }
    }
}

void result1604_file(short int *c,int out_len)
{
    FILE *fp;
    int i,fe,count,p01;
    char file_name[13];
    
    sprintf(file_name,"pi.txt");
    
    if((fp=fopen(file_name,"w"))==NULL){
        printf("file open error!\n");
        exit(1);
    }
    
    fprintf(fp,"PI=3.\n\n");
    
    fe=out_len/25*25;
    count=0;
    for(i=0;i<fe;i+=25){
        
        fprintf(fp,"%04d%04d%02d ",c[i  ],c[i+1],p01=c[i+2]/100);
        fprintf(fp,"%02d%04d%04d ",c[i+2]-p01*100,c[i+3],c[i+4]);
        fprintf(fp,"%04d%04d%02d ",c[i+5],c[i+6],p01=c[i+7]/100);
        fprintf(fp,"%02d%04d%04d ",c[i+7]-p01*100,c[i+8],c[i+9]);
        fprintf(fp,"%04d%04d%02d\n",c[i+10],c[i+11],p01=c[i+12]/100);
        
        fprintf(fp,"%02d%04d%04d ",c[i+12]-p01*100,c[i+13],c[i+14]);
        fprintf(fp,"%04d%04d%02d ",c[i+15],c[i+16],p01=c[i+17]/100);
        fprintf(fp,"%02d%04d%04d ",c[i+17]-p01*100,c[i+18],c[i+19]);
        fprintf(fp,"%04d%04d%02d ",c[i+20],c[i+21],p01=c[i+22]/100);
        fprintf(fp,"%02d%04d%04d\n",c[i+22]-p01*100,c[i+23],c[i+24]);
        
        count+=100;
        if(count==1000){
            count=0;
            fprintf(fp,"\n");
        }
    }
    
    fe=out_len;
    count=0;
    for(;i<fe;i++){
        fprintf(fp,"%02d",p01=c[i]/100);
        count+=2;
        if(count%50==0) fprintf(fp,"\n"); else if(count%10==0) fprintf(fp," ");
        
        fprintf(fp,"%02d",c[i]-p01*100);
        count+=2;
        if(count%50==0) fprintf(fp,"\n"); else if(count%10==0) fprintf(fp," ");
    }
    fprintf(fp,"\n");
    
    fclose(fp);
}

人気ベスト20(ページ内)
トップページに戻る
作成日2008/04/11,最終更新日2008/04/11