MVA_example.py
import os
import ROOT
from ROOT import TFile, gDirectory, TChain, TObject, TMVA, TCut, TCanvas, THStack, TH1F
from array import array
from copy import deepcopy
from math import sqrt
import CMS_lumi, tdrstyle
from HHPlotterTools import *
ROOT.gROOT.SetBatch(ROOT.kTRUE) # run in batch mode, don't open plots
 
# GLOBAL VARIABLES
Methods = [ ]
methods = [ ]
 
# FILES & TREES
MVA_DIR = "/shome/ineuteli/CMSSW_5_3_24/src/MVA/"
fileS = TFile(MVA_DIR+"exoticProcess.root")
fileB = TFile(MVA_DIR+"boringProcess.root")
treeS1 = fileS.Get("preselection1")
treeB1 = fileB.Get("preselection1")
 
# CMS LAY OUT
CMS_lumi.cmsText = "CMS"
CMS_lumi.extraText = "Preliminary"
CMS_lumi.cmsTextSize = 0.65
CMS_lumi.outOfFrame = True
tdrstyle.setTDRStyle()
 
 
 
# CONFIGURATION CLASS
class configuration(object):
    """Class defining a set of variables for specified signal and background trees."""
 
    def __init__(self, name, varNames, treeS, treeB):
        self.name = name
        self.varNames = varNames
        self.treeS = treeS
        self.treeB = treeB
 
 
 
# TRAIN
def train(config):
    """Train for a given configuration with a list of methods are defined in the main function."""
    print "\n>>> train with configuration "+config.name
 
    if not os.path.exists(MVA_DIR+"trees"):
        os.makedirs(MVA_DIR+"trees")
    weightsdir = MVA_DIR+"weights/"+config.name
    if not os.path.exists(weightsdir):
        os.makedirs(weightsdir)
        print ">>> made directory "+weightsdir
 
    f_out = TFile(MVA_DIR+"trees/MVA_"+config.name+".root","RECREATE")
    TMVA.Tools.Instance()
    (TMVA.gConfig().GetIONames()).fWeightFileDir = weightsdir
 
    factory = TMVA.Factory( "TMVAClassification", f_out,
                            ":".join([ "!V",
                            "!Silent",
                            "Color",
                            "DrawProgressBar",
                            "Transformations=I;D;P;G,D",
                            "AnalysisType=Classification" ]) )
 
    for name in config.varNames:
        if name[0] == "N":
            factory.AddVariable(name,'I') # integer
        else:
            factory.AddVariable(name,'F') # float
 
    factory.AddSignalTree(config.treeS)
    factory.AddBackgroundTree(config.treeB)
 
    cut_S = TCut("")
    cut_B = TCut("")
    factory.PrepareTrainingAndTestTree( cut_S, cut_B,
                                        ":".join([ "!V",
                                                   "nTrain_Signal=0",
                                                   "nTrain_Background=0",
                                                   "SplitMode=Random",
                                                   "NormMode=NumEvents" ]) )
 
    if "BDT" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDT", "!H:!V" )
 
    # my default parameters settings
    NTrees = 2000
    MaxDepth = 4
    AdaBoostBeta = 0.10
    nCuts = 90
 
    if "BDTTuned" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTTuned",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % MaxDepth,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % AdaBoostBeta,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "BDTBoost10" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTBoost10",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % MaxDepth,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % 0.10,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "BDTBoost15" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTBoost15",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % MaxDepth,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % 0.15,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "BDTBoost30" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTBoost30",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % MaxDepth,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % 0.30,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "BDTMaxDepth" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTMaxDepth",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % 5,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % AdaBoostBeta,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "MLPTanh" in methods:
        factory.BookMethod( TMVA.Types.kMLP, "MLPTanh",
                            ":".join([ "!H","!V",
                                       "LearningRate=0.01",
#                                       "NCycles=200",
                                       "NeuronType=tanh",
                                       "VarTransform=N",
                                       "HiddenLayers=N,N",
                                       "UseRegulator"
                                      ]) )
 
    if "MLPLearningrate" in methods:
        factory.BookMethod( TMVA.Types.kMLP, "MLPLearningRate",
                            ":".join([ "!H","!V",
                                       "LearningRate=0.1",
#                                       "NCycles=200",
                                       "NeuronType=sigmoid",
                                       "VarTransform=N",
                                       "HiddenLayers=N,N",
                                       "UseRegulator"
                                      ]) )
 
    if "MLPNodes" in methods:
        factory.BookMethod( TMVA.Types.kMLP, "MLPNodes",
                            ":".join([ "!H","!V",
                                       "LearningRate=0.01",
#                                       "NCycles=200",
                                       "NeuronType=tanh",
                                       "VarTransform=N",
                                       "HiddenLayers=N+4,N",
                                       "UseRegulator"
                                      ]) )
 
    if "MLPSigmoid" in methods:
        factory.BookMethod( TMVA.Types.kMLP, "MLPSigmoid",
                            ":".join([ "!H","!V",
                                      "LearningRate=0.01",
#                                      "NCycles=200",
                                      "NeuronType=sigmoid",
                                      "VarTransform=N",
                                      "HiddenLayers=N,N",
                                      "UseRegulator"
                                     ]) )
 
    factory.TrainAllMethods()
    factory.TestAllMethods()
    print ">>> factory.EvaluateAllMethods()"
    factory.EvaluateAllMethods()
    print ">>> f_out.Close()"
    f_out.Close()
 
 
 
# APPLY
def apply(config):
    """Apply MVA weights for a given configuration and save ROOT file with MVA output."""
    print "\n>>> apply with configuration "+config.name+" and add MVA response histograms to MVA root file"
    # https://root.cern.ch/root/html/tutorials/tree/htest.C.html
 
    treeS = config.treeS
    treeB = config.treeB
    reader = TMVA.Reader()
    f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root",'UPDATE')
    dir = f.GetDirectory("TotalSample")
    if not dir:
        dir = f.mkdir("TotalSample")
    dir.cd()
 
    vars = [ ]
    for name in config.varNames:
        vars.append(array('f',[0]))
        reader.AddVariable(name,vars[-1])
 
    for Method, method in Methods:
        reader.BookMVA(method,MVA_DIR+"weights/"+config.name+"/TMVAClassification_"+method+".weights.xml")
        if Method == "MLP":
            histS = TH1F("histS_"+method, "", nBins, -0.4, 1.4)
            histB = TH1F("histB_"+method, "", nBins, -0.4, 1.4)
        else:
            histS = TH1F("histS_"+method, "", nBins, -1.0, 1.0)
            histB = TH1F("histB_"+method, "", nBins, -1.0, 1.0)
 
        # fill histograms
        print ">>> looping over trees with weights for " + method
        treeS.ResetBranchAddresses()
        for i in range(len(config.varNames)):
            treeS.SetBranchAddress(config.varNames[i],vars[i])
        for evt in range(treeS.GetEntries()):
            treeS.GetEntry(evt)
            histS.Fill( reader.EvaluateMVA(method) )
        treeB.ResetBranchAddresses()
        for i in range(len(config.varNames)):
            treeB.SetBranchAddress(config.varNames[i],vars[i])
        for evt in range(treeB.GetEntries()):
            treeB.GetEntry(evt)
            histB.Fill( reader.EvaluateMVA(method) )
        f.Write("",TObject.kOverwrite)
        gDirectory.Delete("histS_"+method)
        gDirectory.Delete("histB_"+method)
 
    f.Close()
 
 
 
# HISTOGRAMS: TMVA output for training and test sample
def plotTrainAndTest(config):
    """Plot MVA output and efficiencies of a given configuration for training and test samples."""
    print "\n>>> plot training with training and test sample for configuration "+config.name
 
    f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root")
 
    for Method, method in Methods:
        for sample in ["train","test"]:
 
            if sample == "train":
                tree = gDirectory.Get("TrainTree")
            elif sample == "test":
                tree = gDirectory.Get("TestTree")
 
            if Method == "MLP":
                histS = TH1F("histS", "", nBins, -0.4, 1.4)
                histB = TH1F("histB", "", nBins, -0.4, 1.4)
            else:
                histS = TH1F("histS", "", nBins, -1.0, 1.0)
                histB = TH1F("histB", "", nBins, -1.0, 1.0)
 
            tree.Draw(method+">>histS","classID == 0","goff")
            tree.Draw(method+">>histB","classID == 1", "goff")
 
            c = TCanvas("c","c",100,100,800,600)
            histB.Draw()
            histS.Draw("same")
            # axes, legend, ...
            CMS_lumi.CMS_lumi(c,14,33)
            c.SaveAs(MVA_DIR+method+"_"+config.name+".png")
            c.Close()
            gDirectory.Delete("histS")
            gDirectory.Delete("histB")
 
    f.Close()
 
 
 
# HISTOGRAMS: TMVA output for total sample
def plotApplication(config):
    """Plot MVA output and efficiencies of a given configuration for a total sample."""
    print "\n>>> examine training with application to total sample for configuration "+config.name
 
    f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root")
 
    for Method, method in Methods:        
        histS = f.Get("TotalSample/histS_"+method)
        histB = f.Get("TotalSample/histB_"+method)
 
        c = TCanvas("c","c",100,100,800,600)
        histB.Draw()
        histS.Draw("same")
        histB.GetYaxis().SetRangeUser(0,1.2)
        # axes, legend, ...
        CMS_lumi.CMS_lumi(c,14,33)
        c.SaveAs(MVA_DIR+method+"_"+config.name+"_Appl.png")
        c.Close()
 
    f.Close()
 
 
 
# 2D COLOR HISTOGRAM: Correlation matrices
def correlation(config):
    """Plot correlation of variables."""
    print "\n>>> make correlation matrix plots"
    ROOT.gStyle.SetPalette(1) # for rainbow colors
 
    reader = TMVA.Reader()
    f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root")
    TestTree = gDirectory.Get("TestTree")
 
    c = TCanvas("c","c",100,100,800,800)
    histS = f.Get("CorrelationMatrixS")
    histS.Draw("colz")
    histS.SetLabelSize(0.030,"x")
    histS.SetLabelSize(0.030,"y")
    CMS_lumi.CMS_lumi(c,14,33)
    c.SaveAs(MVA_DIR+"CorrelationMatrixS_"+config.name+".png")
    c.Close()
 
    c = TCanvas("c","c",100,100,800,800)
    histB = f.Get("CorrelationMatrixB")
    histB.Draw("colz")
    histS.SetLabelSize(0.030,"x")
    histS.SetLabelSize(0.030,"y")
    CMS_lumi.CMS_lumi(c,14,33)
    c.SaveAs(MVA_DIR+"CorrelationMatrixB_"+config.name+".png")
    c.Close()
 
    f.Close()
 
 
 
# MAIN
def main():
    """Main function."""
    global Methods, methods, nBins
 
    allVars1 = [  "jet1Pt", "jet2Pt", "jet1Eta", "jet2Eta",
                 "bjet1Pt", "bjet2Pt", "bjet1Eta", "bjet2Eta", "M_bb", ]
    allVars2 = [  "jet1Pt", "jet2Pt", "bjet1Pt", "bjet2Pt", "M_bb", ]
 
    configs = [ configuration("allVars1", allVars1, treeS1, treeB1),
                configuration("allVars2", allVars2, treeS1, treeB1), ]
 
    Methods = [
                ("BDT","BDT"),
                ("BDT","BDTTuned"),
#                 ("BDT","BDTNTrees"),
#                 ("BDT","BDTMaxDepth"),
#                 ("BDT","BDTBoost10"),
                ("BDT","BDTBoost15"),
                ("BDT","BDTBoost30"),
#                 ("BDT","BDTNodeSize"),
                ("MLP","MLPTanh"),
#                 ("MLP","MLPLearningRate"),
#                 ("MLP","MLPNodes"),
#                 ("MLP","MLPSigmoid")
              ]
    methods = [ method[1] for method in Methods ]
 
    for config in configs:
        train(config)
        apply(config)
 
 
 
if __name__ == '__main__':
    main()
    print "\n>>> done"