import os import ROOT from ROOT import TFile, gDirectory, TChain, TObject, TMVA, TCut, TCanvas, THStack, TH1F from array import array from copy import deepcopy from math import sqrt import CMS_lumi, tdrstyle from HHPlotterTools import * ROOT.gROOT.SetBatch(ROOT.kTRUE) # run in batch mode, don't open plots # GLOBAL VARIABLES Methods = [ ] methods = [ ] # FILES & TREES MVA_DIR = "/shome/ineuteli/CMSSW_5_3_24/src/MVA/" fileS = TFile(MVA_DIR+"exoticProcess.root") fileB = TFile(MVA_DIR+"boringProcess.root") treeS1 = fileS.Get("preselection1") treeB1 = fileB.Get("preselection1") # CMS LAY OUT CMS_lumi.cmsText = "CMS" CMS_lumi.extraText = "Preliminary" CMS_lumi.cmsTextSize = 0.65 CMS_lumi.outOfFrame = True tdrstyle.setTDRStyle() # CONFIGURATION CLASS class configuration(object): """Class defining a set of variables for specified signal and background trees.""" def __init__(self, name, varNames, treeS, treeB): self.name = name self.varNames = varNames self.treeS = treeS self.treeB = treeB # TRAIN def train(config): """Train for a given configuration with a list of methods are defined in the main function.""" print "\n>>> train with configuration "+config.name if not os.path.exists(MVA_DIR+"trees"): os.makedirs(MVA_DIR+"trees") weightsdir = MVA_DIR+"weights/"+config.name if not os.path.exists(weightsdir): os.makedirs(weightsdir) print ">>> made directory "+weightsdir f_out = TFile(MVA_DIR+"trees/MVA_"+config.name+".root","RECREATE") TMVA.Tools.Instance() (TMVA.gConfig().GetIONames()).fWeightFileDir = weightsdir factory = TMVA.Factory( "TMVAClassification", f_out, ":".join([ "!V", "!Silent", "Color", "DrawProgressBar", "Transformations=I;D;P;G,D", "AnalysisType=Classification" ]) ) for name in config.varNames: if name[0] == "N": factory.AddVariable(name,'I') # integer else: factory.AddVariable(name,'F') # float factory.AddSignalTree(config.treeS) factory.AddBackgroundTree(config.treeB) cut_S = TCut("") cut_B = TCut("") factory.PrepareTrainingAndTestTree( cut_S, cut_B, ":".join([ "!V", "nTrain_Signal=0", "nTrain_Background=0", "SplitMode=Random", "NormMode=NumEvents" ]) ) if "BDT" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDT", "!H:!V" ) # my default parameters settings NTrees = 2000 MaxDepth = 4 AdaBoostBeta = 0.10 nCuts = 90 if "BDTTuned" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTTuned", ":".join([ "!H","!V", "NTrees=%s" % NTrees, "MaxDepth=%s" % MaxDepth, "BoostType=AdaBoost", "AdaBoostBeta=%s" % AdaBoostBeta, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "BDTBoost10" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTBoost10", ":".join([ "!H","!V", "NTrees=%s" % NTrees, "MaxDepth=%s" % MaxDepth, "BoostType=AdaBoost", "AdaBoostBeta=%s" % 0.10, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "BDTBoost15" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTBoost15", ":".join([ "!H","!V", "NTrees=%s" % NTrees, "MaxDepth=%s" % MaxDepth, "BoostType=AdaBoost", "AdaBoostBeta=%s" % 0.15, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "BDTBoost30" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTBoost30", ":".join([ "!H","!V", "NTrees=%s" % NTrees, "MaxDepth=%s" % MaxDepth, "BoostType=AdaBoost", "AdaBoostBeta=%s" % 0.30, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "BDTMaxDepth" in methods: factory.BookMethod(TMVA.Types.kBDT, "BDTMaxDepth", ":".join([ "!H","!V", "NTrees=%s" % NTrees, "MaxDepth=%s" % 5, "BoostType=AdaBoost", "AdaBoostBeta=%s" % AdaBoostBeta, "SeparationType=GiniIndex", "nCuts=%s" % nCuts ]) ) if "MLPTanh" in methods: factory.BookMethod( TMVA.Types.kMLP, "MLPTanh", ":".join([ "!H","!V", "LearningRate=0.01", # "NCycles=200", "NeuronType=tanh", "VarTransform=N", "HiddenLayers=N,N", "UseRegulator" ]) ) if "MLPLearningrate" in methods: factory.BookMethod( TMVA.Types.kMLP, "MLPLearningRate", ":".join([ "!H","!V", "LearningRate=0.1", # "NCycles=200", "NeuronType=sigmoid", "VarTransform=N", "HiddenLayers=N,N", "UseRegulator" ]) ) if "MLPNodes" in methods: factory.BookMethod( TMVA.Types.kMLP, "MLPNodes", ":".join([ "!H","!V", "LearningRate=0.01", # "NCycles=200", "NeuronType=tanh", "VarTransform=N", "HiddenLayers=N+4,N", "UseRegulator" ]) ) if "MLPSigmoid" in methods: factory.BookMethod( TMVA.Types.kMLP, "MLPSigmoid", ":".join([ "!H","!V", "LearningRate=0.01", # "NCycles=200", "NeuronType=sigmoid", "VarTransform=N", "HiddenLayers=N,N", "UseRegulator" ]) ) factory.TrainAllMethods() factory.TestAllMethods() print ">>> factory.EvaluateAllMethods()" factory.EvaluateAllMethods() print ">>> f_out.Close()" f_out.Close() # APPLY def apply(config): """Apply MVA weights for a given configuration and save ROOT file with MVA output.""" print "\n>>> apply with configuration "+config.name+" and add MVA response histograms to MVA root file" # https://root.cern.ch/root/html/tutorials/tree/htest.C.html treeS = config.treeS treeB = config.treeB reader = TMVA.Reader() f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root",'UPDATE') dir = f.GetDirectory("TotalSample") if not dir: dir = f.mkdir("TotalSample") dir.cd() vars = [ ] for name in config.varNames: vars.append(array('f',[0])) reader.AddVariable(name,vars[-1]) for Method, method in Methods: reader.BookMVA(method,MVA_DIR+"weights/"+config.name+"/TMVAClassification_"+method+".weights.xml") if Method == "MLP": histS = TH1F("histS_"+method, "", nBins, -0.4, 1.4) histB = TH1F("histB_"+method, "", nBins, -0.4, 1.4) else: histS = TH1F("histS_"+method, "", nBins, -1.0, 1.0) histB = TH1F("histB_"+method, "", nBins, -1.0, 1.0) # fill histograms print ">>> looping over trees with weights for " + method treeS.ResetBranchAddresses() for i in range(len(config.varNames)): treeS.SetBranchAddress(config.varNames[i],vars[i]) for evt in range(treeS.GetEntries()): treeS.GetEntry(evt) histS.Fill( reader.EvaluateMVA(method) ) treeB.ResetBranchAddresses() for i in range(len(config.varNames)): treeB.SetBranchAddress(config.varNames[i],vars[i]) for evt in range(treeB.GetEntries()): treeB.GetEntry(evt) histB.Fill( reader.EvaluateMVA(method) ) f.Write("",TObject.kOverwrite) gDirectory.Delete("histS_"+method) gDirectory.Delete("histB_"+method) f.Close() # HISTOGRAMS: TMVA output for training and test sample def plotTrainAndTest(config): """Plot MVA output and efficiencies of a given configuration for training and test samples.""" print "\n>>> plot training with training and test sample for configuration "+config.name f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root") for Method, method in Methods: for sample in ["train","test"]: if sample == "train": tree = gDirectory.Get("TrainTree") elif sample == "test": tree = gDirectory.Get("TestTree") if Method == "MLP": histS = TH1F("histS", "", nBins, -0.4, 1.4) histB = TH1F("histB", "", nBins, -0.4, 1.4) else: histS = TH1F("histS", "", nBins, -1.0, 1.0) histB = TH1F("histB", "", nBins, -1.0, 1.0) tree.Draw(method+">>histS","classID == 0","goff") tree.Draw(method+">>histB","classID == 1", "goff") c = TCanvas("c","c",100,100,800,600) histB.Draw() histS.Draw("same") # axes, legend, ... CMS_lumi.CMS_lumi(c,14,33) c.SaveAs(MVA_DIR+method+"_"+config.name+".png") c.Close() gDirectory.Delete("histS") gDirectory.Delete("histB") f.Close() # HISTOGRAMS: TMVA output for total sample def plotApplication(config): """Plot MVA output and efficiencies of a given configuration for a total sample.""" print "\n>>> examine training with application to total sample for configuration "+config.name f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root") for Method, method in Methods: histS = f.Get("TotalSample/histS_"+method) histB = f.Get("TotalSample/histB_"+method) c = TCanvas("c","c",100,100,800,600) histB.Draw() histS.Draw("same") histB.GetYaxis().SetRangeUser(0,1.2) # axes, legend, ... CMS_lumi.CMS_lumi(c,14,33) c.SaveAs(MVA_DIR+method+"_"+config.name+"_Appl.png") c.Close() f.Close() # 2D COLOR HISTOGRAM: Correlation matrices def correlation(config): """Plot correlation of variables.""" print "\n>>> make correlation matrix plots" ROOT.gStyle.SetPalette(1) # for rainbow colors reader = TMVA.Reader() f = TFile(MVA_DIR+"trees/MVA_"+config.name+".root") TestTree = gDirectory.Get("TestTree") c = TCanvas("c","c",100,100,800,800) histS = f.Get("CorrelationMatrixS") histS.Draw("colz") histS.SetLabelSize(0.030,"x") histS.SetLabelSize(0.030,"y") CMS_lumi.CMS_lumi(c,14,33) c.SaveAs(MVA_DIR+"CorrelationMatrixS_"+config.name+".png") c.Close() c = TCanvas("c","c",100,100,800,800) histB = f.Get("CorrelationMatrixB") histB.Draw("colz") histS.SetLabelSize(0.030,"x") histS.SetLabelSize(0.030,"y") CMS_lumi.CMS_lumi(c,14,33) c.SaveAs(MVA_DIR+"CorrelationMatrixB_"+config.name+".png") c.Close() f.Close() # MAIN def main(): """Main function.""" global Methods, methods, nBins allVars1 = [ "jet1Pt", "jet2Pt", "jet1Eta", "jet2Eta", "bjet1Pt", "bjet2Pt", "bjet1Eta", "bjet2Eta", "M_bb", ] allVars2 = [ "jet1Pt", "jet2Pt", "bjet1Pt", "bjet2Pt", "M_bb", ] configs = [ configuration("allVars1", allVars1, treeS1, treeB1), configuration("allVars2", allVars2, treeS1, treeB1), ] Methods = [ ("BDT","BDT"), ("BDT","BDTTuned"), # ("BDT","BDTNTrees"), # ("BDT","BDTMaxDepth"), # ("BDT","BDTBoost10"), ("BDT","BDTBoost15"), ("BDT","BDTBoost30"), # ("BDT","BDTNodeSize"), ("MLP","MLPTanh"), # ("MLP","MLPLearningRate"), # ("MLP","MLPNodes"), # ("MLP","MLPSigmoid") ] methods = [ method[1] for method in Methods ] for config in configs: train(config) apply(config) if __name__ == '__main__': main() print "\n>>> done"