This is an old revision of the document!
import os import ROOT from ROOT import TFile, gDirectory, TChain, TObject, TMVA, TCut, TCanvas, THStack, TH1F from array import array from copy import deepcopy from math import sqrt import CMS_lumi, tdrstyle from HHPlotterTools import * ROOT.gROOT.SetBatch(ROOT.kTRUE) # run in batch mode, don't open plots # GLOBAL VARIABLES Methods = [ ] methods = [ ] # FILES & TREES MVA_DIR = "/shome/ineuteli/CMSSW_5_3_24/src/MVA/" fileS = TFile(MVA_DIR+"exoticProcess.root") fileB = TFile(MVA_DIR+"boringProcess.root") treeS1 = fileS.Get("preselection1") treeB1 = fileB.Get("preselection1") # CMS LAY OUT CMS_lumi.cmsText = "CMS" CMS_lumi.extraText = "Preliminary" CMS_lumi.cmsTextSize = 0.65 CMS_lumi.outOfFrame = True tdrstyle.setTDRStyle() # CONFIGURATION CLASS class configuration(object): """Class defining a set of variables for specified signal and background trees.""" def __init__(self, name, varNames, treeS, treeB): self.name = name self.varNames = varNames self.treeS = treeS self.treeB = treeB # TRAIN def train(config): """Train for a given configuration with a list of methods are defined in the main function.""" print "\n>>> train with configuration "+config.name if not os.path.exists(MVA_DIR+"trees"): os.makedirs(MVA_DIR+"trees") weightsdir = MVA_DIR+"weights/"+config.name if not os.path.exists(weightsdir): os.makedirs(weightsdir) print ">>> made directory "+weightsdir f_out = TFile(MVA_DIR+"trees/MVA_"+config.name+".root","RECREATE") TMVA.Tools.Instance() (TMVA.gConfig().GetIONames()).fWeightFileDir = weightsdir factory = TMVA.Factory( "TMVAClassification", f_out, ":".join([ "!V", "!Silent", "Color", "DrawProgressBar", "Transformations=I;D;P;G,D", "AnalysisType=Classification" ]) ) for name in config.varNames: if name[0] == "N": factory.AddVariable(name,'I') # integer else: factory.AddVariable(name,'F') # float factory.AddSignalTree(config.treeS) factory.AddBackgroundTree(config.treeB) cut_S = TCut("") cut_B = TCut("") factory.PrepareTrainingAndTestTree( cut_S, cut_B, ":".join([ "!V", "nTrain_Signal=0", "nTrain_Background=0", "SplitMode=Random", "NormMode=NumEvents" ]) ) if "BDT" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDT", "!H:!V" ) # my default parameters settings NTrees = 2000 MaxDepth = 4 AdaBoostBeta = 0.10 nCuts = 90 if "BDTTuned" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTTuned", ":".join([ "!H","!V", "NTrees=%s" % NTrees, # "MinNodeSize=2.%", # "nEventsMin=200", "MaxDepth=%s" % MaxDepth, "BoostType=AdaBoost", "AdaBoostBeta=%s" % AdaBoostBeta, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "BDTBoost10" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTBoost10", ":".join([ "!H","!V", "NTrees=%s" % NTrees, # "MinNodeSize=2.%", # "nEventsMin=200", "MaxDepth=%s" % MaxDepth, "BoostType=AdaBoost", "AdaBoostBeta=%s" % 0.10, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "BDTBoost15" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTBoost15", ":".join([ "!H","!V", "NTrees=%s" % NTrees, # "MinNodeSize=2.%", # "nEventsMin=200", "MaxDepth=%s" % MaxDepth, "BoostType=AdaBoost", "AdaBoostBeta=%s" % 0.15, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "BDTBoost30" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTBoost30", ":".join([ "!H","!V", "NTrees=%s" % NTrees, # "MinNodeSize=2.%", # "nEventsMin=200", "MaxDepth=%s" % MaxDepth, "BoostType=AdaBoost", "AdaBoostBeta=%s" % 0.30, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "BDTMaxDepth" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTMaxDepth", ":".join([ "!H","!V", "NTrees=%s" % NTrees, # "MinNodeSize=2.%", # "nEventsMin=200", "MaxDepth=%s" % 5, "BoostType=AdaBoost", "AdaBoostBeta=%s" % AdaBoostBeta, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "MLPTanh" in methods: factory.BookMethod( TMVA.Types.kMLP, "MLPTanh", ":".join([ "!H","!V", "LearningRate=0.01", # "NCycles=200", "NeuronType=tanh", "VarTransform=N", "HiddenLayers=N,N", "UseRegulator" ]) ) if "MLPLearningrate" in methods: factory.BookMethod( TMVA.Types.kMLP, "MLPLearningRate", ":".join([ "!H","!V", "LearningRate=0.1", # "NCycles=200", "NeuronType=sigmoid", "VarTransform=N", "HiddenLayers=N,N", "UseRegulator" ]) ) if "MLPNodes" in methods: factory.BookMethod( TMVA.Types.kMLP, "MLPNodes", ":".join([ "!H","!V", "LearningRate=0.01", # "NCycles=200", "NeuronType=tanh", "VarTransform=N", "HiddenLayers=N+4,N", "UseRegulator" ]) ) if "MLPSigmoid" in methods: factory.BookMethod( TMVA.Types.kMLP, "MLPSigmoid", ":".join([ "!H","!V", "LearningRate=0.01", # "NCycles=200", "NeuronType=sigmoid", "VarTransform=N", "HiddenLayers=N,N", "UseRegulator" ]) ) factory.TrainAllMethods() factory.TestAllMethods() print ">>> factory.EvaluateAllMethods()" factory.EvaluateAllMethods() print ">>> f_out.Close()" f_out.Close() # APPLY def apply(config): """Apply MVA weights for a given configuration and save ROOT file with MVA output.""" print "\n>>> apply with configuration "+config.name+" and add MVA response histograms to MVA root file" # https://root.cern.ch/root/html/tutorials/tree/htest.C.html treeS = config.treeS treeB = config.treeB reader = TMVA.Reader() f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root",'UPDATE') dir = f.GetDirectory("TotalSample") if not dir: dir = f.mkdir("TotalSample") dir.cd() vars = [ ] for name in config.varNames: vars.append(array('f',[0])) reader.AddVariable(name,vars[-1]) for Method, method in Methods: reader.BookMVA(method,MVA_DIR+"weights/"+config.name+"/TMVAClassification_"+method+".weights.xml") if Method == "MLP": histS = TH1F("histS_"+method, "", nBins, -0.4, 1.4) histB = TH1F("histB_"+method, "", nBins, -0.4, 1.4) else: histS = TH1F("histS_"+method, "", nBins, -1.0, 1.0) histB = TH1F("histB_"+method, "", nBins, -1.0, 1.0) # fill histograms print ">>> looping over trees with weights for " + method treeS.ResetBranchAddresses() for i in range(len(config.varNames)): treeS.SetBranchAddress(config.varNames[i],vars[i]) for evt in range(treeS.GetEntries()): treeS.GetEntry(evt) histS.Fill( reader.EvaluateMVA(method) ) treeB.ResetBranchAddresses() for i in range(len(config.varNames)): treeB.SetBranchAddress(config.varNames[i],vars[i]) for evt in range(treeB.GetEntries()): treeB.GetEntry(evt) histB.Fill( reader.EvaluateMVA(method) ) f.Write("",TObject.kOverwrite) gDirectory.Delete("histS_"+method) gDirectory.Delete("histB_"+method) f.Close() # HISTOGRAMS: TMVA output for training and test sample def plotTrainAndTest(config): """Plot MVA output and efficiencies of a given configuration for training and test samples.""" print "\n>>> plot training with training and test sample for configuration "+config.name f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root") for Method, method in Methods: for sample in ["train","test"]: if sample == "train": tree = gDirectory.Get("TrainTree") elif sample == "test": tree = gDirectory.Get("TestTree") if Method == "MLP": histS = TH1F("histS", "", nBins, -0.4, 1.4) histB = TH1F("histB", "", nBins, -0.4, 1.4) else: histS = TH1F("histS", "", nBins, -1.0, 1.0) histB = TH1F("histB", "", nBins, -1.0, 1.0) tree.Draw(method+">>histS","classID == 0","goff") tree.Draw(method+">>histB","classID == 1", "goff") c = TCanvas("c","c",100,100,800,600) histB.Draw() histS.Draw("same") # axes, legend, ... CMS_lumi.CMS_lumi(c,14,33) c.SaveAs(MVA_DIR+method+"_"+config.name+".png") c.Close() gDirectory.Delete("histS") gDirectory.Delete("histB") f.Close() # HISTOGRAMS: TMVA output for total sample def plotApplication(config): """Plot MVA output and efficiencies of a given configuration for a total sample.""" print "\n>>> examine training with application to total sample for configuration "+config.name f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root") for Method, method in Methods: histS = f.Get("TotalSample/histS_"+method) histB = f.Get("TotalSample/histB_"+method) c = TCanvas("c","c",100,100,800,600) histB.Draw() histS.Draw("same") histB.GetYaxis().SetRangeUser(0,1.2) # axes, legend, ... CMS_lumi.CMS_lumi(c,14,33) c.SaveAs(MVA_DIR+method+"_"+config.name+"_Appl.png") c.Close() f.Close() # 2D COLOR HISTOGRAM: Correlation matrices def correlation(config): """Plot correlation of variables.""" print "\n>>> make correlation matrix plots" ROOT.gStyle.SetPalette(1) # for rainbow colors reader = TMVA.Reader() f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root") TestTree = gDirectory.Get("TestTree") c = TCanvas("c","c",100,100,800,800) histS = f.Get("CorrelationMatrixS") histS.Draw("colz") histS.SetLabelSize(0.030,"x") histS.SetLabelSize(0.030,"y") CMS_lumi.CMS_lumi(c,14,33) c.SaveAs(MVA_DIR+"CorrelationMatrixS_"+config.name+".png") c.Close() c = TCanvas("c","c",100,100,800,800) histB = f.Get("CorrelationMatrixB") histB.Draw("colz") histS.SetLabelSize(0.030,"x") histS.SetLabelSize(0.030,"y") CMS_lumi.CMS_lumi(c,14,33) c.SaveAs(MVA_DIR+"CorrelationMatrixB_"+config.name+".png") c.Close() f.Close() # MAIN def main(): """Main function.""" global Methods, methods, nBins allVars1 = [ "jet1Pt", "jet2Pt", "jet1Eta", "jet2Eta", "bjet1Pt", "bjet2Pt", "bjet1Eta", "bjet2Eta", "M_bb", ] allVars2 = [ "jet1Pt", "jet2Pt", "bjet1Pt", "bjet2Pt", "M_bb", ] configs = [ configuration("allVars1", allVars1, treeS1, treeB1), configuration("allVars2", allVars2, treeS1, treeB1), ] Methods = [ ("BDT","BDT"), ("BDT","BDTTuned"), # ("BDT","BDTNTrees"), # ("BDT","BDTMaxDepth"), # ("BDT","BDTBoost10"), ("BDT","BDTBoost15"), ("BDT","BDTBoost30"), # ("BDT","BDTNodeSize"), ("MLP","MLPTanh"), # ("MLP","MLPLearningRate"), # ("MLP","MLPNodes"), # ("MLP","MLPSigmoid") ] methods = [ method[1] for method in Methods ] for config in configs: train(config) apply(config) if __name__ == '__main__': main() print "\n>>> done"