#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
void main_cal(int,int);
void mul_fft_fuku(double *,double *,int);
void mul_fft_dataset(short int *,int,double *,int);
void mul_fft_dataput_type3(short int *,int,int,double *,int);
void mul_fft_dataput_type2(short int *,int,int,double *,int);
void lmulmt_herf(short int *,short int *,short int *,int,int,int,double *,int);
void mul_harf_hosei(short int *,short int *,short int *,short int *,short int *,int,int,int,int,int);
void gyaku_herf(short int *,short int *,int,double *);
void gmul_xa(short int *,short int *,int,double *);
void gmul_precalc(short int *,short int *,int,double *);
void rdft(int, int, double *);
void main(void)
{
main_cal(1024,1);
}
void main_cal(int bx,int seed)
{
short int *stsu,*st1,*su1;
int i,mr;
double *cp;
long t_1, t_2;
stsu=(short int *)malloc((bx/4+4+2)*sizeof(short int)*2+10);
cp=(double *)malloc((bx/4+4)*sizeof(double)*2+(bx/4+2)*sizeof(short int));
if(cp == NULL){
printf("Memory Allocation Failure!\n");
exit(1);
}
printf("use memory = %d\n",(bx/4+4)*sizeof(short int)*2+2+(bx/4+4)*sizeof(double)*2+(bx/4+2)*sizeof(short int));
for(i=0;i<(bx/4+4)*2;i++) stsu[i]=rand()%10000;
srand(seed);
mr=bx/4-1;
st1=stsu;
su1=st1+mr+2+3;
*((int *)st1)=mr;
for(i=2;i<2+mr+3;i++){
st1[i]=rand()%10000;
}
if(st1[2]==0) st1[2]=rand()%10000;
t_1=clock();
st1[2+mr+0]=0;
st1[2+mr+1]=0;
st1[2+mr+2]=0;
gyaku_herf(st1,su1,mr,cp);
t_2=clock();
lmulmt_herf(st1,su1,st1,mr,mr,mr,cp,mr);
for(i=0;i<mr+2;i++) printf("%5d",st1[i]); printf("\n");
if( st1[mr+2-2]!=9999 && st1[mr+2-2]!=0 ){printf("error seed=%d\n",seed);exit(1);}
printf("\n%d msec.\n",t_2-t_1);
free(stsu);
free(cp);
}
void gyaku_herf(short int *a,short int *x,int kt,double *cp)
{
int i,bp;
double gy=0.0;
for(i=2;i<2+4+2;i++){
gy+=a[i];
gy*=10000.0;
}
gy/=10000.0;
gy=1.0/gy;
*((int *)x)=4+2-*((int *)a);
while((int)(gy*=10000.0)==0){
(*((int *)x))--;
}
for(i=2;i<2+4+2;i++){
x[i]=gy;
gy-=x[i];
gy*=10000.0;
}
for(i=2+6;i<2+15;i++) x[i]=0;
gmul_precalc(a+2,x+2,15,cp);
gmul_precalc(a+2,x+2,15,cp);
bp=8;
while(bp<(kt+1)){
//x=2*x-A*x*x
gmul_xa(a+2,x+2,bp-1,cp);
bp*=2;
}
}
void gmul_precalc(short int *a,short int *b,int kt,double *acp)
{
int i,shr;
double *bcp;
shr=(kt+1)*2;
bcp=acp+shr+4;
mul_fft_dataset(a,kt,acp,shr);
rdft(shr, 1, acp);
mul_fft_dataset(b,kt,bcp,shr);
rdft(shr, 1, bcp);
mul_fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
mul_fft_dataput_type3(b,kt+kt,kt,acp,shr);
if(b[0]==1) b[0]=0; else b[0]=1;
for(i=kt-1;i>0;i--){
b[i]=9999-b[i];
}
mul_fft_dataset(b,kt,acp,shr);
rdft(shr, 1, acp);
mul_fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
mul_fft_dataput_type3(b,kt+kt-1,kt,acp,shr);
}
void gmul_xa(short int *a,short int *b,int kt,double *acp)
{
int i,shr,b1,b2,b3,b4,be1,be2,be3,over_chk;
double *bcp,und1,und2,und3;
shr=(kt+1)*2;
bcp=acp+shr+4;
b1=b[0];
b2=b[1];
b3=b[2];
b4=b[3];
be1=b[kt+0];
be2=b[kt+1];
be3=b[kt+2];
mul_fft_dataset(a,kt,acp,shr);
rdft(shr, 1, acp);
mul_fft_dataset(b,kt,bcp,shr);
rdft(shr, 1, bcp);
mul_fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
mul_fft_dataput_type3(b+kt,kt,kt,acp,shr);
mul_fft_dataset(a+kt,kt,acp,shr);
rdft(shr, 1, acp);
mul_fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
und1=(double)be1*(shr/2.0);
und2=(double)be2*(shr/2.0);
und3=(double)be3*(shr/2.0);
for(i=0;i<kt+4;i++){
acp[i+2]+=a[i]*und1;
acp[i+3]+=a[i]*und2;
acp[i+4]+=a[i]*und3;
}
acp[kt+2]+=a[kt*2 ]*b1*(shr/2.0);
acp[kt+3]+=a[kt*2 ]*b2*(shr/2.0);
acp[kt+4]+=a[kt*2 ]*b3*(shr/2.0);
acp[kt+5]+=a[kt*2 ]*b4*(shr/2.0);
acp[kt+3]+=a[kt*2+1]*b1*(shr/2.0);
acp[kt+4]+=a[kt*2+1]*b2*(shr/2.0);
acp[kt+5]+=a[kt*2+1]*b3*(shr/2.0);
acp[kt+6]+=a[kt*2+1]*b4*(shr/2.0);
acp[kt+4]+=a[kt*2+2]*b1*(shr/2.0);
acp[kt+5]+=a[kt*2+2]*b2*(shr/2.0);
acp[kt+6]+=a[kt*2+2]*b3*(shr/2.0);
acp[kt+7]+=a[kt*2+2]*b4*(shr/2.0);
acp[kt+5]+=a[kt*2+3]*b1*(shr/2.0);
acp[kt+6]+=a[kt*2+3]*b2*(shr/2.0);
acp[kt+7]+=a[kt*2+3]*b3*(shr/2.0);
acp[kt+8]+=a[kt*2+3]*b4*(shr/2.0);
b[kt*2+0]=0;
b[kt*2+1]=0;
b[kt*2+2]=0;
b[kt*2+3]=0;
mul_fft_dataput_type2(b+kt,kt+kt,kt+4,acp,shr);
over_chk=0;
if(b[kt]==10000){
over_chk=1;
}
else{
for(i=kt*2+3;i>=kt;i--){
b[i]=9999-b[i];
}
}
mul_fft_dataset(b+kt+2,kt,acp,shr);
rdft(shr, 1, acp);
mul_fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
acp[kt+2]+=b[kt+2]*be1*(shr/2.0);
acp[kt+3]+=b[kt+2]*be2*(shr/2.0);
acp[kt+4]+=b[kt+2]*be3*(shr/2.0);
acp[kt+3]+=b[kt+3]*be1*(shr/2.0);
acp[kt+4]+=b[kt+3]*be2*(shr/2.0);
acp[kt+5]+=b[kt+3]*be3*(shr/2.0);
acp[kt+4]+=b[kt+4]*be1*(shr/2.0);
acp[kt+5]+=b[kt+4]*be2*(shr/2.0);
acp[kt+6]+=b[kt+4]*be3*(shr/2.0);
acp[kt+2]+=b[kt*2+2]*b1*(shr/2.0);
acp[kt+3]+=b[kt*2+2]*b2*(shr/2.0);
acp[kt+4]+=b[kt*2+2]*b3*(shr/2.0);
acp[kt+3]+=b[kt*2+3]*b1*(shr/2.0);
acp[kt+4]+=b[kt*2+3]*b2*(shr/2.0);
acp[kt+5]+=b[kt*2+3]*b3*(shr/2.0);
mul_fft_dataput_type3(b+kt+1,kt+kt,kt+3,acp,shr);
if(over_chk==1){
for(i=kt*2+3;i>=kt;i--){
b[i]=9999-b[i];
}
}
b[kt+2]+=be3;
b[kt+1]+=be2;
b[kt+0]+=be1;
if(b[kt+2]>9999){b[kt+2]-=10000; b[kt+1]++;}
if(b[kt+1]>9999){b[kt+1]-=10000; b[kt+0]++;}
if(b[kt+0]>9999){b[kt+0]-=10000; }
}
void lmulmt_herf(short int *a,short int *b,short int *out,int kta,int ktb,int rkt,double *acp,int limit)
{
int i,hlim,d,cy,shr,sis,ktachk,ktbchk,new_rkt,new_kta,new_ktb;
short int *a1,*a2,*b1,*b2,*tmp,*t1,*t2;
double *bcp;
hlim=limit/2;
shr=2;
while(hlim*2+1>shr){
shr*=2;
}
bcp=acp+shr+4;
tmp=(short int *)(bcp+shr+4);
a1=a+2;
a2=a1+hlim;
b1=b+2;
b2=b1+hlim;
t1=tmp;
t2=tmp+hlim;
ktachk=0;
ktbchk=0;
new_rkt=hlim*2;
if(kta>new_rkt) { new_kta=hlim; ktachk=1;} else new_kta=kta-hlim;
if(ktb>new_rkt) { new_ktb=hlim; ktbchk=1;} else new_ktb=ktb-hlim;
for(i=0;i<new_rkt+3;i++) tmp[i]=0;
mul_harf_hosei( a1, a2, b1, b2, tmp, new_kta, new_ktb, new_rkt, ktachk, ktbchk);
mul_fft_dataset(b2,new_ktb,acp,shr);
rdft(shr, 1, acp);
mul_fft_dataset(a1,hlim,bcp,shr);
rdft(shr, 1, bcp);
mul_fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
mul_fft_dataput_type2(t2,new_ktb+hlim,hlim+3,acp,shr);
mul_fft_dataset(b1,hlim,acp,shr);
rdft(shr, 1, acp);
mul_fft_fuku(bcp,acp,shr);
rdft(shr,-1, bcp);
mul_fft_dataput_type2(t1,hlim+hlim,new_rkt,bcp,shr);
mul_fft_dataset(a2,new_kta,bcp,shr);
rdft(shr, 1, bcp);
mul_fft_fuku(acp,bcp,shr);
rdft(shr,-1, acp);
mul_fft_dataput_type2(t2,hlim+new_kta,hlim+3,acp,shr);
cy=0;
for(i=hlim;i>=0;i--){
d=tmp[i]+cy;
cy=d*0.0001;
tmp[i]=d-cy*10000;
if(cy==0) break;
}
sis=*((int *)a)+*((int *)b);
if(tmp[0]!=0){
for(i=rkt-1;i>=0;i--){
out[i+2]=tmp[i];
}
}
else{
for(i=rkt-1;i>=0;i--){
out[i+2]=tmp[i+1];
}
sis--;
}
*((int *)out)=sis;
}
void mul_harf_hosei(short int *a1,short int *a2,short int *b1,short int *b2,short int *tmp,int kta,int ktb,int rkt,int ktachk,int ktbchk)
{
int tmp1,tmp2,tmp3;
long double gs;
if(kta<2 && ktb<2){
gs=(long double)(a2[0]*10000)*(b2[0]*10000);
}
else if(kta<2){
gs=(long double)(a2[0]*10000)*(b2[0]*10000+b2[1]);
}
else if(ktb<2){
gs=(long double)(a2[0]*10000+a2[1])*(b2[0]*10000);
}
else{
gs=(long double)(a2[0]*10000+a2[1])*(b2[0]*10000+b2[1]);
}
tmp1=gs*0.000000000001;
tmp2=(gs-(long double)tmp1*1000000000000)*0.00000001;
tmp3=(gs-(long double)tmp1*1000000000000-(long double)tmp2*100000000)*0.0001;
tmp[rkt ]+=tmp1;
tmp[rkt+1]+=tmp2;
tmp[rkt+2]+=tmp3;
gs=0.0;
if(ktbchk==1) gs+=(long double)(a1[0]*100000000.0+a1[1]*10000.0+a2[2])*b1[rkt];
if(ktachk==1) gs+=(long double)(b1[0]*100000000.0+b1[1]*10000.0+b2[2])*a1[rkt];
tmp1=gs*0.000000000001;
tmp2=(gs-(long double)tmp1*1000000000000.0)*0.00000001;
tmp3=(gs-(long double)tmp1*1000000000000.0-(long double)tmp2*100000000.0)*0.0001;
tmp[rkt ]+=tmp1;
tmp[rkt+1]+=tmp2;
tmp[rkt+2]+=tmp3;
tmp[rkt+1]+=tmp[rkt+2]/10000;
tmp[rkt+2]%=10000;
tmp[rkt ]+=tmp[rkt+1]/10000;
tmp[rkt+1]%=10000;
tmp[rkt-1]+=tmp[rkt ]/10000;
tmp[rkt ]%=10000;
}
void mul_fft_dataput_type3(short int *c,int loc,int out_len,double *acp,int shr)
{
int i,start,deep;
double scale,rgs;
__int64 cy;
scale=2.0/shr;
start=shr-(loc+1);
deep=3;
while( deep > -1 ){
if( start + out_len + deep < shr+4 ) break;
deep--;
}
cy=0;
for(i=out_len+deep;i>out_len-1;i--){
rgs=acp[i+start]*scale+0.5+cy;
cy=rgs*0.0001;
}
for(i=out_len-1;i>=0;i--){
rgs=acp[i+start]*scale+0.5+cy;
cy=rgs*0.0001;
c[i]=rgs-(double)cy*10000.0;
}
}
void mul_fft_dataput_type2(short int *c,int loc,int out_len,double *acp,int shr)
{
int i,start,deep;
double scale,rgs;
__int64 cy;
scale=2.0/shr;
start=shr-(loc+1);
deep=3;
while( deep > -1 ){
if( start + out_len + deep < shr+4 ) break;
deep--;
}
cy=0;
for(i=out_len+deep;i>out_len-1;i--){
rgs=acp[i+start]*scale+0.5+cy;
cy=rgs*0.0001;
}
for(i=out_len-1;i>=0;i--){
rgs=acp[i+start]*scale+0.5+cy+c[i];
cy=rgs*0.0001;
c[i]=rgs-(double)cy*10000.0;
}
if((int)cy==1) c[0]+=10000;
}
void mul_fft_dataset(short int *a,int kta,double *acp,int shr)
{
int i,start;
start=shr-kta;
for(i=0;i<start;i++) acp[i]=0.0;
for(i=0;i<kta;i++) acp[start+i]=a[i];
acp[shr]=0.0;acp[shr+1]=0.0;acp[shr+2]=0.0;acp[shr+3]=0.0;
}
void mul_fft_fuku(double *acp,double *bcp,int shr)
{
int i;
double ra,rb,rc,rd;
acp[0]=acp[0]*bcp[0];
acp[1]=acp[1]*bcp[1];
for(i=2;i<shr;i+=2){
ra=acp[i ];
rb=acp[i+1];
rc=bcp[i ];
rd=bcp[i+1];
acp[i ]=ra*rc-rb*rd;
acp[i+1]=rb*rc+ra*rd;
}
}
–ß‚é