User Tools

Site Tools


mva:mvaexample

This is an old revision of the document!


MVA_example.python
import os
import ROOT
from ROOT import TFile, gDirectory, TChain, TObject, TMVA, TCut, TCanvas, THStack, TH1F
from array import array
from copy import deepcopy
from math import sqrt
import CMS_lumi, tdrstyle
from HHPlotterTools import *
ROOT.gROOT.SetBatch(ROOT.kTRUE) # run in batch mode, don't open plots
 
# GLOBAL VARIABLES
Methods = [ ]
methods = [ ]
 
# FILES & TREES
MVA_DIR = "/shome/ineuteli/CMSSW_5_3_24/src/MVA/"
fileS = TFile(MVA_DIR+"exoticProcess.root")
fileB = TFile(MVA_DIR+"boringProcess.root")
treeS1 = fileS.Get("preselection1")
treeB1 = fileB.Get("preselection1")
 
# CMS LAY OUT
CMS_lumi.cmsText = "CMS"
CMS_lumi.extraText = "Preliminary"
CMS_lumi.cmsTextSize = 0.65
CMS_lumi.outOfFrame = True
tdrstyle.setTDRStyle()
 
 
 
# CONFIGURATION CLASS
class configuration(object):
    """Class defining a set of variables for specified signal and background trees."""
 
    def __init__(self, name, varNames, treeS, treeB):
        self.name = name
        self.varNames = varNames
        self.treeS = treeS
        self.treeB = treeB
 
 
 
# TRAIN
def train(config):
    """Train for a given configuration with a list of methods are defined in the main function."""
    print "\n>>> train with configuration "+config.name
 
    if not os.path.exists(MVA_DIR+"trees"):
        os.makedirs(MVA_DIR+"trees")
    weightsdir = MVA_DIR+"weights/"+config.name
    if not os.path.exists(weightsdir):
        os.makedirs(weightsdir)
        print ">>> made directory "+weightsdir
 
    f_out = TFile(MVA_DIR+"trees/MVA_"+config.name+".root","RECREATE")
    TMVA.Tools.Instance()
    (TMVA.gConfig().GetIONames()).fWeightFileDir = weightsdir
 
    factory = TMVA.Factory( "TMVAClassification", f_out,
                            ":".join([ "!V",
                            "!Silent",
                            "Color",
                            "DrawProgressBar",
                            "Transformations=I;D;P;G,D",
                            "AnalysisType=Classification" ]) )
 
    for name in config.varNames:
        if name[0] == "N":
            factory.AddVariable(name,'I') # integer
        else:
            factory.AddVariable(name,'F') # float
 
    factory.AddSignalTree(config.treeS)
    factory.AddBackgroundTree(config.treeB)
 
    cut_S = TCut("")
    cut_B = TCut("")
    factory.PrepareTrainingAndTestTree( cut_S, cut_B,
                                        ":".join([ "!V",
                                                   "nTrain_Signal=0",
                                                   "nTrain_Background=0",
                                                   "SplitMode=Random",
                                                   "NormMode=NumEvents" ]) )
 
    if "BDT" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDT", "!H:!V" )
 
    # my default parameters settings
    NTrees = 2000
    MaxDepth = 4
    AdaBoostBeta = 0.10
    nCuts = 90
 
    if "BDTTuned" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTTuned",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % MaxDepth,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % AdaBoostBeta,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "BDTBoost10" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTBoost10",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % MaxDepth,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % 0.10,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "BDTBoost15" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTBoost15",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % MaxDepth,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % 0.15,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "BDTBoost30" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTBoost30",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % MaxDepth,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % 0.30,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "BDTMaxDepth" in methods:
        factory.BookMethod(TMVA.Types.kBDT, "BDTMaxDepth",
                           ":".join([ "!H","!V",
                                      "NTrees=%s" % NTrees,
                                      "MaxDepth=%s" % 5,
                                      "BoostType=AdaBoost",
                                      "AdaBoostBeta=%s" % AdaBoostBeta,
                                      "SeparationType=GiniIndex",
                                      "nCuts=%s" % nCuts
                                     ]) )
 
    if "MLPTanh" in methods:
        factory.BookMethod( TMVA.Types.kMLP, "MLPTanh",
                            ":".join([ "!H","!V",
                                       "LearningRate=0.01",
#                                       "NCycles=200",
                                       "NeuronType=tanh",
                                       "VarTransform=N",
                                       "HiddenLayers=N,N",
                                       "UseRegulator"
                                      ]) )
 
    if "MLPLearningrate" in methods:
        factory.BookMethod( TMVA.Types.kMLP, "MLPLearningRate",
                            ":".join([ "!H","!V",
                                       "LearningRate=0.1",
#                                       "NCycles=200",
                                       "NeuronType=sigmoid",
                                       "VarTransform=N",
                                       "HiddenLayers=N,N",
                                       "UseRegulator"
                                      ]) )
 
    if "MLPNodes" in methods:
        factory.BookMethod( TMVA.Types.kMLP, "MLPNodes",
                            ":".join([ "!H","!V",
                                       "LearningRate=0.01",
#                                       "NCycles=200",
                                       "NeuronType=tanh",
                                       "VarTransform=N",
                                       "HiddenLayers=N+4,N",
                                       "UseRegulator"
                                      ]) )
 
    if "MLPSigmoid" in methods:
        factory.BookMethod( TMVA.Types.kMLP, "MLPSigmoid",
                            ":".join([ "!H","!V",
                                      "LearningRate=0.01",
#                                      "NCycles=200",
                                      "NeuronType=sigmoid",
                                      "VarTransform=N",
                                      "HiddenLayers=N,N",
                                      "UseRegulator"
                                     ]) )
 
    factory.TrainAllMethods()
    factory.TestAllMethods()
    print ">>> factory.EvaluateAllMethods()"
    factory.EvaluateAllMethods()
    print ">>> f_out.Close()"
    f_out.Close()
 
 
 
# APPLY
def apply(config):
    """Apply MVA weights for a given configuration and save ROOT file with MVA output."""
    print "\n>>> apply with configuration "+config.name+" and add MVA response histograms to MVA root file"
    # https://root.cern.ch/root/html/tutorials/tree/htest.C.html
 
    treeS = config.treeS
    treeB = config.treeB
    reader = TMVA.Reader()
    f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root",'UPDATE')
    dir = f.GetDirectory("TotalSample")
    if not dir:
        dir = f.mkdir("TotalSample")
    dir.cd()
 
    vars = [ ]
    for name in config.varNames:
        vars.append(array('f',[0]))
        reader.AddVariable(name,vars[-1])
 
    for Method, method in Methods:
        reader.BookMVA(method,MVA_DIR+"weights/"+config.name+"/TMVAClassification_"+method+".weights.xml")
        if Method == "MLP":
            histS = TH1F("histS_"+method, "", nBins, -0.4, 1.4)
            histB = TH1F("histB_"+method, "", nBins, -0.4, 1.4)
        else:
            histS = TH1F("histS_"+method, "", nBins, -1.0, 1.0)
            histB = TH1F("histB_"+method, "", nBins, -1.0, 1.0)
 
        # fill histograms
        print ">>> looping over trees with weights for " + method
        treeS.ResetBranchAddresses()
        for i in range(len(config.varNames)):
            treeS.SetBranchAddress(config.varNames[i],vars[i])
        for evt in range(treeS.GetEntries()):
            treeS.GetEntry(evt)
            histS.Fill( reader.EvaluateMVA(method) )
        treeB.ResetBranchAddresses()
        for i in range(len(config.varNames)):
            treeB.SetBranchAddress(config.varNames[i],vars[i])
        for evt in range(treeB.GetEntries()):
            treeB.GetEntry(evt)
            histB.Fill( reader.EvaluateMVA(method) )
        f.Write("",TObject.kOverwrite)
        gDirectory.Delete("histS_"+method)
        gDirectory.Delete("histB_"+method)
 
    f.Close()
 
 
 
# HISTOGRAMS: TMVA output for training and test sample
def plotTrainAndTest(config):
    """Plot MVA output and efficiencies of a given configuration for training and test samples."""
    print "\n>>> plot training with training and test sample for configuration "+config.name
 
    f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root")
 
    for Method, method in Methods:
        for sample in ["train","test"]:
 
            if sample == "train":
                tree = gDirectory.Get("TrainTree")
            elif sample == "test":
                tree = gDirectory.Get("TestTree")
 
            if Method == "MLP":
                histS = TH1F("histS", "", nBins, -0.4, 1.4)
                histB = TH1F("histB", "", nBins, -0.4, 1.4)
            else:
                histS = TH1F("histS", "", nBins, -1.0, 1.0)
                histB = TH1F("histB", "", nBins, -1.0, 1.0)
 
            tree.Draw(method+">>histS","classID == 0","goff")
            tree.Draw(method+">>histB","classID == 1", "goff")
 
            c = TCanvas("c","c",100,100,800,600)
            histB.Draw()
            histS.Draw("same")
            # axes, legend, ...
            CMS_lumi.CMS_lumi(c,14,33)
            c.SaveAs(MVA_DIR+method+"_"+config.name+".png")
            c.Close()
            gDirectory.Delete("histS")
            gDirectory.Delete("histB")
 
    f.Close()
 
 
 
# HISTOGRAMS: TMVA output for total sample
def plotApplication(config):
    """Plot MVA output and efficiencies of a given configuration for a total sample."""
    print "\n>>> examine training with application to total sample for configuration "+config.name
 
    f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root")
 
    for Method, method in Methods:        
        histS = f.Get("TotalSample/histS_"+method)
        histB = f.Get("TotalSample/histB_"+method)
 
        c = TCanvas("c","c",100,100,800,600)
        histB.Draw()
        histS.Draw("same")
        histB.GetYaxis().SetRangeUser(0,1.2)
        # axes, legend, ...
        CMS_lumi.CMS_lumi(c,14,33)
        c.SaveAs(MVA_DIR+method+"_"+config.name+"_Appl.png")
        c.Close()
 
    f.Close()
 
 
 
# 2D COLOR HISTOGRAM: Correlation matrices
def correlation(config):
    """Plot correlation of variables."""
    print "\n>>> make correlation matrix plots"
    ROOT.gStyle.SetPalette(1) # for rainbow colors
 
    reader = TMVA.Reader()
    f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root")
    TestTree = gDirectory.Get("TestTree")
 
    c = TCanvas("c","c",100,100,800,800)
    histS = f.Get("CorrelationMatrixS")
    histS.Draw("colz")
    histS.SetLabelSize(0.030,"x")
    histS.SetLabelSize(0.030,"y")
    CMS_lumi.CMS_lumi(c,14,33)
    c.SaveAs(MVA_DIR+"CorrelationMatrixS_"+config.name+".png")
    c.Close()
 
    c = TCanvas("c","c",100,100,800,800)
    histB = f.Get("CorrelationMatrixB")
    histB.Draw("colz")
    histS.SetLabelSize(0.030,"x")
    histS.SetLabelSize(0.030,"y")
    CMS_lumi.CMS_lumi(c,14,33)
    c.SaveAs(MVA_DIR+"CorrelationMatrixB_"+config.name+".png")
    c.Close()
 
    f.Close()
 
 
 
# MAIN
def main():
    """Main function."""
    global Methods, methods, nBins
 
    allVars1 = [  "jet1Pt", "jet2Pt", "jet1Eta", "jet2Eta",
                 "bjet1Pt", "bjet2Pt", "bjet1Eta", "bjet2Eta", "M_bb", ]
    allVars2 = [  "jet1Pt", "jet2Pt", "bjet1Pt", "bjet2Pt", "M_bb", ]
 
    configs = [ configuration("allVars1", allVars1, treeS1, treeB1),
                configuration("allVars2", allVars2, treeS1, treeB1), ]
 
    Methods = [
                ("BDT","BDT"),
                ("BDT","BDTTuned"),
#                 ("BDT","BDTNTrees"),
#                 ("BDT","BDTMaxDepth"),
#                 ("BDT","BDTBoost10"),
                ("BDT","BDTBoost15"),
                ("BDT","BDTBoost30"),
#                 ("BDT","BDTNodeSize"),
                ("MLP","MLPTanh"),
#                 ("MLP","MLPLearningRate"),
#                 ("MLP","MLPNodes"),
#                 ("MLP","MLPSigmoid")
              ]
    methods = [ method[1] for method in Methods ]
 
    for config in configs:
        train(config)
        apply(config)
 
 
 
if __name__ == '__main__':
    main()
    print "\n>>> done"
mva/mvaexample.1470477999.txt.gz · Last modified: 2016/08/06 12:06 by iwn