TUnfold: TUnfold.h Source File

00001 // Author: Stefan Schmitt
00002 // DESY, 13/10/08
00003 
00004 //  Version 17.6,  updated doxygen-style comments, add one argument for scanLCurve
00005 //
00006 //  History:
00007 //    Version 17.5, fix memory leak and other bugs
00008 //    Version 17.4, in parallel to changes in TUnfoldBinning
00009 //    Version 17.3, in parallel to changes in TUnfoldBinning
00010 //    Version 17.2, in parallel to changes in TUnfoldBinning
00011 //    Version 17.1, bug fixes in GetFoldedOutput, GetOutput
00012 //    Version 17.0, error matrix with SetInput, store fL not fLSquared
00013 //    Version 16.2, in parallel to bug-fix in TUnfoldSys
00014 //    Version 16.1, in parallel to bug-fix in TUnfold.C
00015 //    Version 16.0, some cleanup, more getter functions, query version number
00016 //    Version 15, simplified L-curve scan, new tau definition, new eror calc.
00017 //    Version 14, with changes in TUnfoldSys.cxx
00018 //    Version 13, new methods for derived classes
00019 //    Version 12, with support for preconditioned matrix inversion
00020 //    Version 11, regularisation methods have return values
00021 //    Version 10, with bug-fix in TUnfold.cxx
00022 //    Version 9, implements method for optimized inversion of sparse matrix
00023 //    Version 8, replace all TMatrixSparse matrix operations by private code
00024 //    Version 7, fix problem with TMatrixDSparse,TMatrixD multiplication
00025 //    Version 6, completely remove definition of class XY
00026 //    Version 5, move definition of class XY from TUnfold.C to this file
00027 //    Version 4, with bug-fix in TUnfold.C
00028 //    Version 3, with bug-fix in TUnfold.C
00029 //    Version 2, with changed ScanLcurve() arguments
00030 //    Version 1, added ScanLcurve() method
00031 //    Version 0, stable version of basic unfolding algorithm
00032 
00033 
00034 #ifndef ROOT_TUnfold
00035 #define ROOT_TUnfold
00036 
00037 //////////////////////////////////////////////////////////////////////////
00038 //                                                                      //
00039 //                                                                      //
00040 //  TUnfold provides functionality to correct data                      //
00041 //   for migration effects.                                             //
00042 //                                                                      //
00043 //  Citation: S.Schmitt, JINST 7 (2012) T10003 [arXiv:1205.6201]        //
00044 //                                                                      //
00045 //                                                                      //
00046 //  TUnfold solves the inverse problem                                  //
00047 //                                                                      //
00048 //                   T   -1            2          T                     //
00049 //    chi**2 = (y-Ax) Vyy  (y-Ax) + tau  (L(x-x0)) L(x-x0)              //
00050 //                                                                      //
00051 //  Monte Carlo input                                                   //
00052 //    y: vector of measured quantities  (dimension ny)                  //
00053 //    Vyy: covariance matrix for y (dimension ny x ny)                  //
00054 //    A: migration matrix               (dimension ny x nx)             //
00055 //    x: unknown underlying distribution (dimension nx)                 //
00056 //  Regularisation                                                      //
00057 //    tau: parameter, defining the regularisation strength              //
00058 //    L: matrix of regularisation conditions (dimension nl x nx)        //
00059 //    x0: underlying distribution bias                                  //
00060 //                                                                      //
00061 //  where chi**2 is minimized as a function of x                        //
00062 //                                                                      //
00063 //  The algorithm is based on "standard" matrix inversion, with the     //
00064 //  known limitations in numerical accuracy and computing cost for      //
00065 //  matrices with large dimensions.                                     //
00066 //                                                                      //
00067 //  Thus the algorithm should not used for large dimensions of x and y  //
00068 //    dim(x) should not exceed O(100)                                   //    
00069 //    dim(y) should not exceed O(500)                                   //
00070 //                                                                      //
00071 //////////////////////////////////////////////////////////////////////////
00072 
00073 /*
00074   This file is part of TUnfold.
00075 
00076   TUnfold is free software: you can redistribute it and/or modify
00077   it under the terms of the GNU General Public License as published by
00078   the Free Software Foundation, either version 3 of the License, or
00079   (at your option) any later version.
00080 
00081   TUnfold is distributed in the hope that it will be useful,
00082   but WITHOUT ANY WARRANTY; without even the implied warranty of
00083   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00084   GNU General Public License for more details.
00085 
00086   You should have received a copy of the GNU General Public License
00087   along with TUnfold.  If not, see <http://www.gnu.org/licenses/>.
00088 */
00089 
00090 #include <TH1D.h>
00091 #include <TH2D.h>
00092 #include <TObject.h>
00093 #include <TArrayI.h>
00094 #include <TSpline.h>
00095 #include <TMatrixDSparse.h>
00096 #include <TMatrixD.h>
00097 #include <TObjArray.h>
00098 #include <TString.h>
00099 
00100 #define TUnfold_VERSION "V17.6"
00101 #define TUnfold_CLASS_VERSION 17
00102 
00103 #define TUnfold TUnfoldV17
00104 
00105 class TUnfoldV17 : public TObject {
00106  private:
00107    void InitTUnfold(void);     // initialize all data members
00108  public:
00109 
00110    /// type of extra constraint
00111    enum EConstraint {
00112 
00113       /// use no extra constraint
00114       kEConstraintNone =0,
00115 
00116       /// enforce preservation of the area
00117       kEConstraintArea =1
00118    };
00119 
00120    /// choice of regularisation scheme
00121    enum ERegMode {
00122 
00123       /// no regularisation, or defined later by RegularizeXXX() methods
00124       kRegModeNone = 0,
00125 
00126       /// regularise the amplitude of the output distribution
00127       kRegModeSize = 1,
00128 
00129       /// regularize the 1st derivative of the output distribution
00130       kRegModeDerivative = 2,
00131 
00132       /// regularize the 2nd derivative of the output distribution
00133       kRegModeCurvature = 3,
00134 
00135 
00136       /// mixed regularisation pattern
00137       kRegModeMixed = 4
00138    };
00139 
00140    /// arrangement of axes for the response matrix (TH2 histogram)
00141    enum EHistMap {
00142 
00143       /// truth level on x-axis of the response matrix
00144       kHistMapOutputHoriz = 0,
00145 
00146       /// truth level on y-axis of the response matrix
00147       kHistMapOutputVert = 1
00148    };
00149 
00150  protected:
00151    /// response matrix A
00152    TMatrixDSparse * fA;
00153    /// regularisation conditions L
00154    TMatrixDSparse *fL;
00155    /// input (measured) data y
00156    TMatrixD *fY;
00157    /// covariance matrix Vyy corresponding to y
00158    TMatrixDSparse *fVyy;
00159    /// scale factor for the bias
00160    Double_t fBiasScale;
00161    /// bias vector x0
00162    TMatrixD *fX0;
00163    /// regularisation parameter tau squared
00164    Double_t fTauSquared;
00165    /// mapping of matrix indices to histogram bins
00166    TArrayI fXToHist;
00167    /// mapping of histogram bins to matrix indices
00168    TArrayI fHistToX;
00169    /// truth vector calculated from the non-normalized response matrix
00170    TArrayD fSumOverY;
00171    /// type of constraint to use for the unfolding
00172    EConstraint fConstraint;
00173    /// type of regularisation
00174    ERegMode fRegMode;
00175  private:
00176    /// number of input bins which are dropped because they have error=0
00177    Int_t fIgnoredBins;
00178    /// machine accuracy used to determine matrix rank after eigenvalue analysis
00179    Double_t fEpsMatrix;
00180    /// unfolding result x
00181    TMatrixD *fX;
00182    /// covariance matrix Vxx
00183    TMatrixDSparse *fVxx;
00184    /// inverse of covariance matrix Vxx<sup>-1</sub>
00185    TMatrixDSparse *fVxxInv;
00186    /// inverse of the input covariance matrix Vyy<sup>-1</sub>
00187    TMatrixDSparse *fVyyInv;
00188    /// result x folded back A*x
00189    TMatrixDSparse *fAx;
00190    /// chi**2 contribution from (y-Ax)Vyy<sup>-1</sub>(y-Ax)
00191    Double_t fChi2A;
00192    /// chi**2 contribution from (x-s*x0)<sup>T</sub>L<sup>T</sub>L(x-s*x0)
00193    Double_t fLXsquared;
00194    /// maximum global correlation coefficient
00195    Double_t fRhoMax;
00196    /// average global correlation coefficient
00197    Double_t fRhoAvg;
00198    /// number of degrees of freedom
00199    Int_t fNdf;
00200    /// matrix contribution to the of derivative dx_k/dA_ij
00201    TMatrixDSparse *fDXDAM[2];
00202    /// vector contribution to the of derivative dx_k/dA_ij
00203    TMatrixDSparse *fDXDAZ[2];
00204    /// derivative of the result wrt tau squared
00205    TMatrixDSparse *fDXDtauSquared;
00206    /// derivative of the result wrt dx/dy
00207    TMatrixDSparse *fDXDY;
00208    /// matrix E^(-1)
00209    TMatrixDSparse *fEinv;
00210    /// matrix E
00211    TMatrixDSparse *fE;
00212  protected:
00213    // Int_t IsNotSymmetric(TMatrixDSparse const &m) const;
00214    virtual Double_t DoUnfold(void);     // the unfolding algorithm
00215    virtual void ClearResults(void);     // clear all results
00216    void ClearHistogram(TH1 *h,Double_t x=0.) const;
00217    virtual TString GetOutputBinName(Int_t iBinX) const; // name a bin
00218    TMatrixDSparse *MultiplyMSparseM(const TMatrixDSparse *a,const TMatrixD *b) const; // multiply sparse and non-sparse matrix
00219    TMatrixDSparse *MultiplyMSparseMSparse(const TMatrixDSparse *a,const TMatrixDSparse *b) const; // multiply sparse and sparse matrix
00220    TMatrixDSparse *MultiplyMSparseTranspMSparse(const TMatrixDSparse *a,const TMatrixDSparse *b) const; // multiply transposed sparse and sparse matrix
00221    TMatrixDSparse *MultiplyMSparseMSparseTranspVector
00222       (const TMatrixDSparse *m1,const TMatrixDSparse *m2,
00223        const TMatrixTBase<Double_t> *v) const; // calculate M_ij = sum_k [m1_ik*m2_jk*v[k] ]. the pointer v may be zero (means no scaling).
00224    TMatrixDSparse *InvertMSparseSymmPos(const TMatrixDSparse *A,Int_t *rank) const; // invert symmetric (semi-)positive sparse matrix
00225    void AddMSparse(TMatrixDSparse *dest,Double_t f,const TMatrixDSparse *src) const; // replacement for dest += f*src
00226    TMatrixDSparse *CreateSparseMatrix(Int_t nrow,Int_t ncol,Int_t nele,Int_t *row,Int_t *col,Double_t *data) const; // create a TMatrixDSparse from an array
00227    /// returns internal number of output (truth) matrix rows
00228    inline Int_t GetNx(void) const {
00229       return fA->GetNcols();
00230    }
00231    /// converts truth histogram bin number to matrix row
00232    inline Int_t GetRowFromBin(int ix) const { return fHistToX[ix]; }
00233    /// converts matrix row to truth histogram bin number
00234    inline Int_t GetBinFromRow(int ix) const { return fXToHist[ix]; }
00235    /// returns the number of measurement bins
00236    inline Int_t GetNy(void) const {
00237       return fA->GetNrows();
00238    }
00239    /// vector of the unfolding result
00240    inline const TMatrixD *GetX(void) const { return fX; }
00241    /// covariance matrix of the result
00242    inline const TMatrixDSparse *GetVxx(void) const { return fVxx; }
00243    /// inverse of covariance matrix of the result
00244    inline const TMatrixDSparse *GetVxxInv(void) const { return fVxxInv; }
00245    /// vector of folded-back result
00246    inline const TMatrixDSparse *GetAx(void) const { return fAx; }
00247    /// matrix of derivatives dx/dy
00248    inline const TMatrixDSparse *GetDXDY(void) const { return fDXDY; }
00249    /// matrix contributions of the derivative dx/dA
00250    inline const TMatrixDSparse *GetDXDAM(int i) const { return fDXDAM[i]; }
00251    /// vector contributions of the derivative dx/dA
00252    inline const TMatrixDSparse *GetDXDAZ(int i) const { return fDXDAZ[i]; }
00253    /// matrix E<sup>-1</sup>, using internal bin counting
00254    inline const TMatrixDSparse *GetEinv(void) const { return fEinv; }
00255    /// matrix E, using internal bin counting
00256    inline const TMatrixDSparse *GetE(void) const { return fE; }
00257    /// inverse of covariance matrix of the data y
00258    inline const TMatrixDSparse *GetVyyInv(void) const { return fVyyInv; }
00259 
00260    void ErrorMatrixToHist(TH2 *ematrix,const TMatrixDSparse *emat,const Int_t *binMap,Bool_t doClear) const; // return an error matrix as histogram
00261    Double_t GetRhoIFromMatrix(TH1 *rhoi,const TMatrixDSparse *eOrig,const Int_t *binMap,TH2 *invEmat) const; // return global correlation coefficients
00262    /// vector of derivative dx/dtauSquared, using internal bin counting
00263    inline const TMatrixDSparse *GetDXDtauSquared(void) const { return fDXDtauSquared; }
00264    /// delete matrix and invalidate pointer
00265    static void DeleteMatrix(TMatrixD **m);
00266    /// delete sparse matrix and invalidate pointer
00267    static void DeleteMatrix(TMatrixDSparse **m);
00268 
00269    Bool_t AddRegularisationCondition(Int_t i0,Double_t f0,Int_t i1=-1,Double_t f1=0.,Int_t i2=-1,Double_t f2=0.); // add regularisation condition for a triplet of output bins
00270    Bool_t AddRegularisationCondition(Int_t nEle,const Int_t *indices,const Double_t *rowData); // add a regularisation condition
00271 public:
00272    static const char*GetTUnfoldVersion(void);
00273    // Set up response matrix and regularisation scheme
00274    TUnfoldV17(const TH2 *hist_A, EHistMap histmap,
00275            ERegMode regmode = kRegModeSize,
00276            EConstraint constraint=kEConstraintArea);
00277    // for root streamer and derived classes
00278    TUnfoldV17(void);
00279    virtual ~TUnfoldV17(void);
00280    // define input distribution
00281    virtual Int_t SetInput(const TH1 *hist_y, Double_t scaleBias=0.0,Double_t oneOverZeroError=0.0,const TH2 *hist_vyy=0,const TH2 *hist_vyy_inv=0);
00282    // Unfold with given choice of tau and input 
00283    virtual Double_t DoUnfold(Double_t tau);
00284    Double_t DoUnfold(Double_t tau,const TH1 *hist_y, Double_t scaleBias=0.0);
00285    // scan the L curve using successive calls to DoUnfold(Double_t) at various tau
00286    virtual Int_t ScanLcurve(Int_t nPoint,Double_t tauMin,
00287                             Double_t tauMax,TGraph **lCurve,
00288              TSpline **logTauX=0,TSpline **logTauY=0,
00289                             TSpline **logTauCurvature=0);
00290 
00291    // access unfolding results
00292    Double_t GetTau(void) const;
00293    void GetOutput(TH1 *output,const Int_t *binMap=0) const;
00294    void GetEmatrix(TH2 *ematrix,const Int_t *binMap=0) const;
00295    void GetRhoIJ(TH2 *rhoij,const Int_t *binMap=0) const;
00296    Double_t GetRhoI(TH1 *rhoi,const Int_t *binMap=0,TH2 *invEmat=0) const;
00297    void GetFoldedOutput(TH1 *folded,const Int_t *binMap=0) const;
00298 
00299    // access input parameters
00300    void GetProbabilityMatrix(TH2 *A,EHistMap histmap) const;
00301    void GetNormalisationVector(TH1 *s,const Int_t *binMap=0) const; // get the vector of normalisation factors, equivalent to the initial bias vector
00302    void GetInput(TH1 *inputData,const Int_t *binMap=0) const; // get input data
00303    void GetInputInverseEmatrix(TH2 *ematrix);   // get input data inverse of error matrix
00304    void GetBias(TH1 *bias,const Int_t *binMap=0) const; // get bias (includind biasScale)
00305    Int_t GetNr(void) const; // number of regularisation conditions
00306    void GetL(TH2 *l) const; // get matrix of regularisation conditions
00307    void GetLsquared(TH2 *lsquared) const;
00308 
00309    // access various properties of the result
00310    /// get maximum global correlation determined in recent unfolding
00311    inline Double_t GetRhoMax(void) const { return fRhoMax; }
00312    /// get average global correlation determined in recent unfolding
00313    inline Double_t GetRhoAvg(void) const { return fRhoAvg; }
00314    /// get &chi;<sup>2</sup><sub>A</sub> contribution determined in recent unfolding
00315    inline Double_t GetChi2A(void) const { return fChi2A; }
00316 
00317    Double_t GetChi2L(void) const; // get &chi;<sup>2</sup><sub>L</sub> contribution determined in recent unfolding
00318    virtual Double_t GetLcurveX(void) const;        // get value on x axis of L curve
00319    virtual Double_t GetLcurveY(void) const;        // get value on y axis of L curve
00320    /// get number of degrees of freedom determined in recent unfolding
00321    ///
00322    /// This returns the number of valid measurements minus the number
00323    /// of unfolded truth bins. If the area constraint is active, one
00324    /// further degree of freedom is subtracted
00325    inline Int_t GetNdf(void) const { return fNdf; }
00326    Int_t GetNpar(void) const;  // get number of parameters
00327 
00328    // advanced features
00329    void SetBias(const TH1 *bias);       // set alternative bias
00330    void SetConstraint(EConstraint constraint); // set type of constraint for the next unfolding
00331    Int_t RegularizeSize(int bin, Double_t scale = 1.0);   // regularise the size of one output bin
00332    Int_t RegularizeDerivative(int left_bin, int right_bin, Double_t scale = 1.0); // regularize difference of two output bins (1st derivative)
00333    Int_t RegularizeCurvature(int left_bin, int center_bin, int right_bin, Double_t scale_left = 1.0, Double_t scale_right = 1.0);  // regularize curvature of three output bins (2nd derivative)
00334    Int_t RegularizeBins(int start, int step, int nbin, ERegMode regmode);        // regularize a 1-dimensional curve
00335    Int_t RegularizeBins2D(int start_bin, int step1, int nbin1, int step2, int nbin2, ERegMode regmode);  // regularize a 2-dimensional grid
00336    /// get numerical accuracy for Eigenvalue analysis when inverting
00337    /// matrices with rank problems
00338    inline Double_t GetEpsMatrix(void) const { return  fEpsMatrix; } 
00339    /// set numerical accuracy for Eigenvalue analysis when inverting
00340    /// matrices with rank problems
00341    void SetEpsMatrix(Double_t eps); // set accuracy for eigenvalue analysis
00342 
00343    ClassDef(TUnfoldV17, TUnfold_CLASS_VERSION) //Unfolding with support for L-curve analysis
00344 };
00345 
00346 #endif