/*
   This file belongs to Aeneas. Aeneas is a GNU package released under GPL 3 license.
   This code is a simulator for Submicron 3D Semiconductor Devices. 
   It implements the Monte Carlo transport in 3D tetrahedra meshes
   for the simulation of the semiclassical Boltzmann equation for both electrons.
   It also includes all the relevant quantum effects for nanodevices.

   Copyright (C) 2007 Jean Michel Sellier <sellier@dmi.unict.it>
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

// Created on : 07 june 2007, Siracusa, Jean Michel Sellier
// Last modified : 16 august 2007, Siracusa, Jean Michel Sellier

// Solves A.x = b for x[1..n], given b[1..n], by the iterative biconjugate
// gradient method. On input x[1..n] should be set to an initial guess of
// the solution (or all zeros); 
// itol is 1,2,3 or 4, specifying which convergence test is applied (see below)
// itmax is the maximum number of allowed iterations
// tol is the desired convergence tollerance
// On output, x[1..n] is reset to the improved solution
// iter is the number of iterations actually taken
// err is the estimated error
// The matrix A is referenced only through the user-supplied routines atimes,
// which computes the product of either A or its transpose on a vector;
// and asolve.

void linbcg(int n,double b[],double x[],int itol,double tol,int itmax)
{
 int j;
 double ak,akden,bk,bkden,bknum,bnrm,dxnrm,xnrm,zminrm,znrm;
 int iter;
 double err=1.e12;

// reset the allocated arrays
 for(j=1;j<=n;j++){
   p[j]=0.0;
   r[j]=0.0;
   z[j]=0.0;
   pp[j]=0.0;
   rr[j]=0.0;
   zz[j]=0.0;
 }

// {int i; for(i=1;i<=Ng;i++) printf("b[%d]=%g\n",i,b[i]);}
// calculate the initial residual
 iter=0;
 atimes(n,x,r,0);
 for(j=1;j<=n;j++){
    r[j]=b[j]-r[j];
    rr[j]=r[j];
//    printf("j = %d r = %g b = %g rr = %g\n",j,r[j],b[j],rr[j]);
 } 

// uncomment this line to get the "minimum residual" variant of the algorithm
// atimes(n,r,rr,0); 

 if(itol==1){
   bnrm=snrm(n,b,itol);
   asolve(n,r,z,0);
 }
 else if(itol==2){
   asolve(n,b,z,0);
   bnrm=snrm(n,z,itol);
   asolve(n,r,z,0);
 }
 else if(itol==3 || itol==4){
   asolve(n,b,z,0);
   bnrm=snrm(n,z,itol);
   asolve(n,r,z,0);
   znrm=snrm(n,z,itol);
 } 
 else{
   printf("linbcg error : illegal itol\n");
   system("PAUSE");
   exit(0);
 }

// printf("bnrm=%g\n",bnrm);
 while(iter<=itmax){ // <--- main loop
// printf("iter = %d itmax = %d\n",iter,itmax);
   ++(iter);
   asolve(n,rr,zz,1); // final 1 indicates use of transpose matrix A tilde
   for(bknum=0.0,j=1;j<=n;j++) bknum+=z[j]*rr[j];
//   printf("bknum=%g\n",bknum);
// calculate coefficient bk and direction vectors p and pp
   if(iter==1){
        for(j=1;j<=n;j++){
            p[j]=z[j];
            pp[j]=zz[j];
        }
   }
   else{
     bk=bknum/bkden;
     for(j=1;j<=n;j++){
        p[j]=bk*p[j]+z[j];
        pp[j]=bk*pp[j]+zz[j];
     }   
   }

//   for(j=1;j<=n;j++) printf("p[%d]=%g\n",j,p[j]);
// calculate coefficient ak, new iterate x, and new residuals r and rr
   bkden=bknum;
   atimes(n,p,z,0);
   for(akden=0.0,j=1;j<=n;j++) akden+=z[j]*pp[j];
//   printf("akden=%g\n",akden);
   ak=bknum/akden;
//   printf("ak=%g\n",ak);
   atimes(n,pp,zz,1);
   for(j=1;j<=n;j++){
     x[j] += ak*p[j];
     r[j] -= ak*z[j];
     rr[j]-= ak*zz[j];
   }

// solve A tilde . z = r and check stopping criterion
   asolve(n,r,z,0);
   if(itol==1) err=snrm(n,r,itol)/bnrm;
   else if(itol==2) err=snrm(n,z,itol)/bnrm;
   else if(itol==3 || itol==4){
          zminrm=znrm;
          znrm=snrm(n,z,itol);
          if(fabs(zminrm-znrm)>EPS*znrm){
            dxnrm=fabs(ak)*snrm(n,p,itol);
            err=znrm/fabs(zminrm-znrm)*dxnrm;
          }
          else{
            err=znrm/bnrm; // error may not be accurate, so loop again
//            printf("iter=%4d err=%12.6f\n",iter,err);
            continue;
          }
          xnrm=snrm(n,x,itol);
          if(err<=0.5*xnrm) err/=xnrm;
          else{
            err=znrm/bnrm; // error may not be accurate, so loop again
//            printf("iter=%4d err=%12.6f\n",iter,err);
            continue;
          }
   }
//   printf("*** err=%g ",err);
   if(err<=tol){
     printf("\nerr = %g < tol = %g\n",err,tol);
     break;
   }
 }
}
