/*
calculation of PI.
Example compilation:
bcc32 gu002c.c fftsg_h.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
void cal(int);
void fmul1604(short int *,short int *,short int *,int,double *);
void add1604(short int *,short int *,short int *,int);
void sub1604(short int *,short int *,short int *,int);
void xmul1604(short int *,int,int);
void div4(short int *,int);
void div2h(short int *,short int *,int);
void result1604_file(short int *,int);
/* fft */
void fft_dataset(short int *,int,double *,int);
void fft_fuku_xx(double *,int);
void fft_fuku (double *,double *,int);
void fft_dataput(short int *,int,double *,int);
/* sqrt */
void gksqrt(short int *,short int *,short int *,int,double *);
void gksqrt_maincalc(short int *,short int *,short int *,int,double *);
void gksqrt_x3sub2d(short int *,short int *,int);
/* gyaku */
void gyaku(short int *,short int *,short int *,int,double *);
void gyaku_maincalc(short int *,short int *,short int *,int,double *);
void gyaku_x2sub(short int *,short int *,int);
/* sqrt & gyaku */
void h_add(short int *,short int *,int);
/* fftsg_h.c */
void rdft(int, int, double *);
void main(void)
{
int bx,snum;
printf("length of pi =? ");
scanf("%d",&snum);
for(bx=1;bx<snum;bx*=2);
cal(bx);
}
/*
a=1.0;
b=1.0/2.0;
t=1.0/2.0;
x=2;
for(i=0;i<n;i++){
y=(a+b)/4.0;
b=sqrt(a*b);
a=y+b/2.0;
t=t-x*(a-b);
x*=2;
}
pi=(a+b)/t);
*/
void cal(int bx)
{
int i,x,kt,log2_bx;
short int *a,*b,*t,*y,*z;
double *cp;
time_t stime,rtime;
stime=clock();
kt=bx/4;
for(log2_bx = 1; (1 << log2_bx) < bx; log2_bx++);
a=(short int *)malloc((kt+4)*sizeof(short int));
b=(short int *)malloc((kt+4)*sizeof(short int));
t=(short int *)malloc((kt+4)*sizeof(short int));
y=(short int *)malloc((kt+4)*sizeof(short int));
z=(short int *)malloc((kt+4)*sizeof(short int));
cp=(double *)malloc( kt *sizeof(double)*2*2);
if(cp == NULL){
printf("Memory Allocation Failure!\n");
exit(1);
}
printf("bx = %d ",bx);
printf("use memory = %d\n",(kt+4)*sizeof(short int)*5+kt*sizeof(double)*2*2);
rtime=clock();
x=2;
a[0]=10000;for(i=1;i<kt+4;i++) a[i]=0;
b[0]= 5000;for(i=1;i<kt+4;i++) b[i]=0;
t[0]= 5000;for(i=1;i<kt+4;i++) t[i]=0;
printf("-- %dmsec\n",clock()-rtime);
for(i=1;i<log2_bx;i++){
rtime=clock();
add1604(a,b,y,kt);
div4(y,kt);
fmul1604(a,b,a,kt,cp);
gksqrt(a,b,z,kt,cp);
fmul1604(a,b,b,kt,cp);
xmul1604(b,100,kt);
div2h(b,a,kt);
add1604(a,y,a,kt);
sub1604(a,b,y,kt);
xmul1604(y,x,kt);
sub1604(t,y,t,kt);
x*=2;
printf("%2d %dmsec\n",i,clock()-rtime);
}
rtime=clock();
add1604(a,b,a,kt);
gyaku(t,b,z,kt,cp);
fmul1604(a,b,a,kt,cp);
result1604_file(a+1,kt-1);
printf("-- %dmsec\n",clock()-rtime);
free(a);
free(b);
free(t);
free(y);
free(z);
free(cp);
printf("Total time : %d msec.\n",clock()-stime);
}
void gksqrt(short int *a,short int *x,short int *t,int kt,double *cp)
{
int i,bp;
double gy;
for(i=0;i<kt+4;i++) x[i]=0;
gy=0.0;
for(i=0;i<5;i++){
gy*=10000.0;
gy+=a[i];
}
gy=1.0/sqrt(gy);
while((int)gy==0) gy*=10000.0;
for(i=0;i<5;i++){
x[i]=gy;
gy-=x[i];
gy*=10000.0;
}
gksqrt_maincalc(a,x,t,16,cp);
gksqrt_maincalc(a,x,t,16,cp);
bp=8;
while(bp<=kt){
/* x=(3x-Axxx)/2 */
gksqrt_maincalc(a,x,t,bp,cp);
bp*=2;
}
}
void gksqrt_maincalc(short int *a,short int *b,short int *t,int kt,double *acp)
{
/*
x=(3x-Axxx)/2
t=b*b;
t=t*b;
t=a*t;
b=3*x-t;
*/
int i,shr;
short int h1[4],h2[4];
double *bcp;
shr=kt*2;
bcp=acp+shr;
/* step1 x*x */
fft_dataset(b ,4,acp,4*2);
fft_dataset(b+kt,4,bcp,4*2);
rdft(4*2, 1, acp);
rdft(4*2, 1, bcp);
fft_fuku(acp,bcp,4*2);
rdft(4*2,-1, acp);
fft_dataput(h1,4,acp,4*2);
fft_dataset(b,kt,acp,shr);
rdft(shr, 1, acp);
fft_fuku_xx(acp,shr);
rdft(shr,-1, acp);
fft_dataput(t,kt+4,acp,shr);
h_add(t+kt,h1,4);
h_add(t+kt,h1,4);
if(t[0]==0){
for(i=0;i<kt+3;i++){
t[i]=t[i+1];
}
t[kt+3]=0;
}
/* step2 x*x*x */
fft_dataset(b ,4,acp,4*2);
fft_dataset(t+kt,4,bcp,4*2);
rdft(4*2, 1, acp);
rdft(4*2, 1, bcp);
fft_fuku(acp,bcp,4*2);
rdft(4*2,-1, acp);
fft_dataput(h1,4,acp,4*2);
fft_dataset(b+kt,4,acp,4*2);
fft_dataset(t ,4,bcp,4*2);
rdft(4*2, 1, acp);
rdft(4*2, 1, bcp);
fft_fuku(acp,bcp,4*2);
rdft(4*2,-1, acp);
fft_dataput(h2,4,acp,4*2);
fft_dataset(b,kt,acp,shr);
fft_dataset(t,kt,bcp,shr);
rdft(shr, 1, acp);
rdft(shr, 1, bcp);
fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
fft_dataput(t,kt+4,acp,shr);
h_add(t+kt,h1,4);
h_add(t+kt,h2,4);
if(t[0]==0){
for(i=0;i<kt+3;i++){
t[i]=t[i+1];
}
t[kt+3]=0;
}
/* step3 (x*x*x)*a */
fft_dataset(a ,4,acp,4*2);
fft_dataset(t+kt,4,bcp,4*2);
rdft(4*2, 1, acp);
rdft(4*2, 1, bcp);
fft_fuku(acp,bcp,4*2);
rdft(4*2,-1, acp);
fft_dataput(h1,4,acp,4*2);
fft_dataset(a+kt,4,acp,4*2);
fft_dataset(t ,4,bcp,4*2);
rdft(4*2, 1, acp);
rdft(4*2, 1, bcp);
fft_fuku(acp,bcp,4*2);
rdft(4*2,-1, acp);
fft_dataput(h2,4,acp,4*2);
fft_dataset(a,kt,acp,shr);
fft_dataset(t,kt,bcp,shr);
rdft(shr, 1, acp);
rdft(shr, 1, bcp);
fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
fft_dataput(t,kt+4,acp,shr);
h_add(t+kt,h1,4);
h_add(t+kt,h2,4);
if(t[0]==0){
for(i=0;i<kt+3;i++){
t[i]=t[i+1];
}
t[kt+3]=0;
}
/* step4 (3*x-x*x*x*a)/2 */
gksqrt_x3sub2d(b,t,kt+4);
}
void fmul1604(short int *a,short int *b,short int *out,int kt,double *acp)
{
int shr;
double *bcp;
shr=kt*2;
bcp=acp+shr;
fft_dataset(a,kt,acp,shr);
fft_dataset(b,kt,bcp,shr);
rdft(shr, 1, acp);
rdft(shr, 1, bcp);
fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
fft_dataput(out,kt+4,acp,shr);
}
void fft_dataput(short int *c,int out_len,double *acp,int shr)
{
int i,deep;
double scale,rgs;
__int64 cy;
scale=2.0/shr;
deep=shr-out_len;
if(deep>3) deep=3;
cy=0;
for(i=out_len-1+deep;i>out_len-1;i--){
rgs=acp[i-1]*scale+0.5+cy;
cy=rgs*0.0001;
}
for(i=out_len-1;i>0;i--){
rgs=acp[i-1]*scale+0.5+cy;
cy=rgs*0.0001;
c[i]=rgs-(double)cy*10000.0;
}
c[0]=cy;
}
void fft_fuku(double *acp,double *bcp,int shr)
{
int i;
double ra,rb,rc,rd;
acp[0]=acp[0]*bcp[0];
acp[1]=acp[1]*bcp[1];
for(i=2;i<shr;i+=2){
ra=acp[i ];
rb=acp[i+1];
rc=bcp[i ];
rd=bcp[i+1];
acp[i ]=ra*rc-rb*rd;
acp[i+1]=rb*rc+ra*rd;
}
}
void fft_dataset(short int *a,int kt,double *acp,int shr)
{
int i;
for(i=0;i<kt;i++) acp[i]=a[i];
for(i=kt;i<shr;i++) acp[i]=0.0;
}
void fft_fuku_xx(double *acp,int shr)
{
int i;
double ra,rb;
acp[0]=acp[0]*acp[0];
acp[1]=acp[1]*acp[1];
for(i=2;i<shr;i+=2){
ra=acp[i ];
rb=acp[i+1];
acp[i ]=ra*ra-rb*rb;
acp[i+1]=ra*rb*2;
}
}
void gksqrt_x3sub2d(short int *a,short int *b,int kt)
{
int i,brrw,carry,d,rem;
carry=0;
for(i=kt-1;i>=0;i--){
a[i]=a[i]*3+carry;
if(a[i]>=20000){
carry=2;
a[i]-=20000;
}
else if(a[i]>=10000){
carry=1;
a[i]-=10000;
}
else{
carry=0;
}
}
a[0]+=carry*10000;
brrw=0;
for(i=kt-1;i>=0;i--){
a[i]=a[i]-b[i]-brrw;
if(a[i]>=0){
brrw=0;
}
else{
a[i]+=10000;
brrw=1;
}
}
rem=0;
for(i=0;i<kt;i++){
d=a[i]+rem;
a[i]=d>>1;
rem=(d-a[i]*2)*10000;
}
}
void h_add(short int *a,short int *b,int kt)
{
int i,carry;
carry=0;
for(i=kt-1;i>=0;i--){
a[i]=a[i]+b[i]+carry;
if(a[i]>=10000){
carry=1;
a[i]-=10000;
}
else{
carry=0;
}
}
a[-1]+=carry;
}
void add1604(short int *a,short int *b,short int *c,int kt)
{
int i,carry;
carry=0;
for(i=kt-1;i>=0;i--){
c[i]=a[i]+b[i]+carry;
if(c[i]>=10000){
carry=1;
c[i]-=10000;
}
else{
carry=0;
}
}
c[0]+=carry*10000;
}
void sub1604(short int *a,short int *b,short int *c,int kt)
{
int i,brrw;
brrw=0;
for(i=kt-1;i>=0;i--){
c[i]=a[i]-b[i]-brrw;
if(c[i]>=0){
brrw=0;
}
else{
c[i]+=10000;
brrw=1;
}
}
}
void xmul1604(short int *a,int x,int kt)
{
int i;
double d;
__int64 cy;
cy=0;
for(i=kt-1;i>=0;i--){
d=(double)a[i]*x+cy;
cy=d*0.0001;
a[i]=d-(double)cy*10000.0;
}
}
void div4(short int *a,int kt)
{
int i,d,rem;
rem=0;
for(i=0;i<kt;i++){
d=a[i]+rem;
a[i]=d>>2;
rem=(d-a[i]*4)*10000;
}
}
void div2h(short int *a,short int *b,int kt)
{
int i,d,rem;
rem=0;
for(i=0;i<kt;i++){
d=a[i]+rem;
b[i]=d>>1;
rem=(d-b[i]*2)*10000;
}
}
void gyaku(short int *a,short int *x,short int *t,int kt,double *cp)
{
int i,bp;
double gy;
for(i=0;i<kt+4;i++) x[i]=0;
gy=0.0;
for(i=0;i<5;i++){
gy*=10000.0;
gy+=a[i];
}
gy=1.0/gy;
while((int)gy==0) gy*=10000.0;
for(i=0;i<5;i++){
x[i]=gy;
gy-=x[i];
gy*=10000.0;
}
gyaku_maincalc(a,x,t,16,cp);
gyaku_maincalc(a,x,t,16,cp);
bp=8;
while(bp<=kt){
/* x=2x-Axx */
gyaku_maincalc(a,x,t,bp,cp);
bp*=2;
}
}
void gyaku_maincalc(short int *a,short int *b,short int *t,int kt,double *acp)
{
/* x=2*x-a*x*x */
int i,shr;
short int h1[4],h2[4];
double *bcp;
shr=kt*2;
bcp=acp+shr;
/* step1 x*x */
fft_dataset(b ,4,acp,4*2);
fft_dataset(b+kt,4,bcp,4*2);
rdft(4*2, 1, acp);
rdft(4*2, 1, bcp);
fft_fuku(acp,bcp,4*2);
rdft(4*2,-1, acp);
fft_dataput(h1,4,acp,4*2);
fft_dataset(b,kt,acp,shr);
rdft(shr, 1, acp);
fft_fuku_xx(acp,shr);
rdft(shr,-1, acp);
fft_dataput(t,kt+4,acp,shr);
h_add(t+kt,h1,4);
h_add(t+kt,h1,4);
if(t[0]==0){
for(i=0;i<kt+3;i++){
t[i]=t[i+1];
}
t[kt+3]=0;
}
/* step2 (x*x)*a */
fft_dataset(a ,4,acp,4*2);
fft_dataset(t+kt,4,bcp,4*2);
rdft(4*2, 1, acp);
rdft(4*2, 1, bcp);
fft_fuku(acp,bcp,4*2);
rdft(4*2,-1, acp);
fft_dataput(h1,4,acp,4*2);
fft_dataset(a+kt,4,acp,4*2);
fft_dataset(t ,4,bcp,4*2);
rdft(4*2, 1, acp);
rdft(4*2, 1, bcp);
fft_fuku(acp,bcp,4*2);
rdft(4*2,-1, acp);
fft_dataput(h2,4,acp,4*2);
fft_dataset(a,kt,acp,shr);
fft_dataset(t,kt,bcp,shr);
rdft(shr, 1, acp);
rdft(shr, 1, bcp);
fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
fft_dataput(t,kt+4,acp,shr);
h_add(t+kt,h1,4);
h_add(t+kt,h2,4);
if(t[0]==0){
for(i=0;i<kt+3;i++){
t[i]=t[i+1];
}
t[kt+3]=0;
}
/* step3 2*x-x*x*a */
gyaku_x2sub(b,t,kt+4);
}
void gyaku_x2sub(short int *a,short int *b,int kt)
{
int i,brrw,carry;
carry=0;
for(i=kt-1;i>=0;i--){
a[i]=a[i]*2+carry;
if(a[i]>=10000){
carry=1;
a[i]-=10000;
}
else{
carry=0;
}
}
a[0]+=carry*10000;
brrw=0;
for(i=kt-1;i>=0;i--){
a[i]=a[i]-b[i]-brrw;
if(a[i]>=0){
brrw=0;
}
else{
a[i]+=10000;
brrw=1;
}
}
}
void result1604_file(short int *c,int out_len)
{
FILE *fp;
int i,fe,count,p01;
char file_name[13];
sprintf(file_name,"pi.txt");
if((fp=fopen(file_name,"w"))==NULL){
printf("file open error!\n");
exit(1);
}
fprintf(fp,"PI=3.\n\n");
fe=out_len/25*25;
count=0;
for(i=0;i<fe;i+=25){
fprintf(fp,"%04d%04d%02d ",c[i ],c[i+1],p01=c[i+2]/100);
fprintf(fp,"%02d%04d%04d ",c[i+2]-p01*100,c[i+3],c[i+4]);
fprintf(fp,"%04d%04d%02d ",c[i+5],c[i+6],p01=c[i+7]/100);
fprintf(fp,"%02d%04d%04d ",c[i+7]-p01*100,c[i+8],c[i+9]);
fprintf(fp,"%04d%04d%02d\n",c[i+10],c[i+11],p01=c[i+12]/100);
fprintf(fp,"%02d%04d%04d ",c[i+12]-p01*100,c[i+13],c[i+14]);
fprintf(fp,"%04d%04d%02d ",c[i+15],c[i+16],p01=c[i+17]/100);
fprintf(fp,"%02d%04d%04d ",c[i+17]-p01*100,c[i+18],c[i+19]);
fprintf(fp,"%04d%04d%02d ",c[i+20],c[i+21],p01=c[i+22]/100);
fprintf(fp,"%02d%04d%04d\n",c[i+22]-p01*100,c[i+23],c[i+24]);
count+=100;
if(count==1000){
count=0;
fprintf(fp,"\n");
}
}
fe=out_len;
count=0;
for(;i<fe;i++){
fprintf(fp,"%02d",p01=c[i]/100);
count+=2;
if(count%50==0) fprintf(fp,"\n"); else if(count%10==0) fprintf(fp," ");
fprintf(fp,"%02d",c[i]-p01*100);
count+=2;
if(count%50==0) fprintf(fp,"\n"); else if(count%10==0) fprintf(fp," ");
}
fprintf(fp,"\n");
fclose(fp);
}
|