Source code for paramonte._TabularFileContents

####################################################################################################################################
####################################################################################################################################
####
####   MIT License
####
####   ParaMonte: plain powerful parallel Monte Carlo library.
####
####   Copyright (C) 2012-present, The Computational Data Science Lab
####
####   This file is part of the ParaMonte library.
####
####   Permission is hereby granted, free of charge, to any person obtaining a
####   copy of this software and associated documentation files (the "Software"),
####   to deal in the Software without restriction, including without limitation
####   the rights to use, copy, modify, merge, publish, distribute, sublicense,
####   and/or sell copies of the Software, and to permit persons to whom the
####   Software is furnished to do so, subject to the following conditions:
####
####   The above copyright notice and this permission notice shall be
####   included in all copies or substantial portions of the Software.
####
####   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
####   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
####   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
####   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
####   DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
####   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
####   OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
####
####   ACKNOWLEDGMENT
####
####   ParaMonte is an honor-ware and its currency is acknowledgment and citations.
####   As per the ParaMonte library license agreement terms, if you use any parts of
####   this library for any purposes, kindly acknowledge the use of ParaMonte in your
####   work (education/research/industry/development/...) by citing the ParaMonte
####   library as described on this page:
####
####       https://github.com/cdslaborg/paramonte/blob/3548c097f2a25dfc0613061800656d27d0e0ddbe/ACKNOWLEDGMENT.md
####
####################################################################################################################################
####################################################################################################################################

import numpy as np
import _paramonte as pm
import _CorCovMat as ccm
from _AutoCorr import AutoCorr
from _dfutils import getMaxLogFunc
from _OutputFileContents import OutputFileContents
from paramonte.vis.LineScatterPlot import LineScatterPlot
from paramonte.vis.DensityPlot import DensityPlot
from paramonte.vis.GridPlot import GridPlot

Struct = pm.Struct
newline = pm.newline

####################################################################################################################################
#### TabularFileContents class
####################################################################################################################################

[docs]class TabularFileContents(OutputFileContents): """ This is the **TabularFileContents** class for generating instances of the ParaMonte tabular output contents. This class is NOT meant to be directly accessed by the ParaMonte library users. It is internally used by the ParaMonte library to parse the tabular contents of the output files generated by the ParaMonte sampler routines. For example, the ParaDRAM sampler class makes calls to this class via its ``readSample()`` or ``readChain()`` or ``readMarkovChain()`` or ``readProgress()`` methods to return a list of objects of class ``TabularFileContents``. **Parameters** file The full path to the file containing the sample/chain. fileType A string containing the type of the file to be parsed. Current options include but are not limited to: ``sample``, ``chain``, ``markovChain``, ``progress`` delimiter The delimiter used in the sample/chain file, which must be provided by the user. methodName A string representing the name of the ParaMonte sampler used to call the constructor of the ``TabularFileContents`` class. parseContents If set to ``True``, the contents of the file will be parsed and stored in a component of the object named ``contents``. The default value is ``True``. reportEnabled A logical input parameter indicating whether the ParaMonte automatic guidelines to the standard output should be provided or not. The default value is ``True``. **Attributes** file The full path to the file containing the sample/chain. delimiter The delimiter used in the sample/chain file, which must be provided by the user. ndim The number of dimensions of the domain of the objective function from which the sample has been drawn. count The number of points (states) in the sample/chain file. This is essentially, the number of rows in the file minus one (representing the header line). plot A structure containing the graphics tools for the visualization of the contents of the file. df If the input file contents is structured in a format that could be read as a dataframe, then the contents of the file will be stored in the form of a pandas-library DataFrame in this property (hence called ``df``). contents If the input file contents is structured in the form of columns, then a property named ``contents`` is also added to the object. Each component of contents will named via the header of the file and will contain data from the corresponding column of the file. **Returns** tabularFileContents An object of class ``TabularFileContents``. ---------------------------------------------------------------------- """ def __init__( self , file , fileType , delimiter , methodName , parseContents = True , reportEnabled = True ): super().__init__(file, methodName, reportEnabled) markovChainRequested = fileType=="markovChain" self._isProgressFile = "progress"==fileType self._sampleLogFuncColName = "" if self._isProgressFile else "SampleLogFunc" #if "sample"==fileType: # fileSuffix = "sample" #elif fileType=="chain" or markovChainRequested: # fileSuffix = "chain" #elif self._isProgressFile: # fileSuffix = "progress" #else: # pm.abort( msg = "Internal error occurred. The input fileType is not recognized.\n" # + "Please report this error at:\n\n" # + " " + pm.website.github.issues.url # , methodName = self._methodName # , marginTop = 1 # , marginBot = 1 # ) if fileType!="sample" and fileType!="chain" and not (self._isProgressFile or markovChainRequested): pm.abort( msg = "Internal error occurred. The input fileType is not recognized.\n" + "Please report this error at:\n\n" + " " + pm.website.github.issues.url , methodName = self._methodName , marginTop = 1 , marginBot = 1 ) ############################################################################################################################ #### data ############################################################################################################################ self.delimiter = delimiter import pandas as pd self.df = pd.read_csv ( self.file , delimiter = self.delimiter , header = 0 ) if self._isProgressFile: self._offset = -1 else: self._offset = list(self.df.columns).index(self._sampleLogFuncColName) + 1 # index of the first variable self.ndim = len(self.df.columns) - self._offset self.count = len(self.df.iloc[:,1]) self.ncol = len(self.df.iloc[1,:]) if markovChainRequested: CumSumWeight = np.cumsum(self.df.iloc[:,self._offset-2].values, dtype=np.int32) if CumSumWeight[-1] != self.count: # it is indeed a compact chain #dfMarkov = pd.DataFrame( columns=list(self.df.columns), index=list(range(CumSumWeight[-1])) ) dfMarkov = np.zeros( (CumSumWeight[-1] , self.ndim+self._offset) ) istart = 0 for i in range(self.count): iend = CumSumWeight[i] #dfMarkov.iloc[istart:iend,:] = self.df.iloc[i].values dfMarkov[istart:iend,:] = self.df.iloc[i].values istart = iend columns = self.df.columns self.df = pd.DataFrame(dfMarkov) self.count = len(self.df.iloc[:,1]) self.df.columns = columns self._progress.note() if not self._isProgressFile: self._progress.note( msg = "ndim = " + str(self.ndim) + ", count = " + str(self.count), end = newline, pre = True ) # set dynamic properties if parseContents: self._progress.note( msg = "parsing file contents... ", end = newline, pre = True ) self.contents = Struct() for icol, colName in enumerate(self.df.columns): setattr ( self.contents, colName, self.df[colName] ) ############################################################################################################################ #### statistics ############################################################################################################################ if not self._isProgressFile: self.stats = Struct() #### add chain cormat self._progress.note( msg = "computing the sample correlation matrix... ", end = newline, pre = True ) self.stats.cormat = ccm.CorMat ( dataFrame = self.df , columns = range(self._offset,self._offset+self.ndim) , methodName = self._methodName , reportEnabled = self._reportEnabled , method = "pearson" ) self.stats.cormat() #### add chain covmat self._progress.note( msg = "computing the sample covariance matrix... ", end = newline, pre = True ) self.stats.covmat = ccm.CovMat ( dataFrame = self.df , columns = range(self._offset,self._offset+self.ndim) , methodName = self._methodName , reportEnabled = self._reportEnabled ) self.stats.covmat() #### add chain autocorrelation self._progress.note( msg = "computing the sample autocorrelations... ", end = newline, pre = True ) self.stats.autocorr = AutoCorr ( dataFrame = self.df , columns = range(self._offset-1,self._offset+self.ndim) , methodName = self._methodName , reportEnabled = self._reportEnabled ) self.stats.autocorr() #### add chain maxLogFunc self.stats.maxLogFunc = getMaxLogFunc(dataFrame = self.df) ############################################################################################################################ #### graphics ############################################################################################################################ self._plotTypeList = [ "line" , "scatter" , "lineScatter" ] if not self._isProgressFile: self._plotTypeList += [ "line3" , "scatter3" , "lineScatter3" , "jointplot" , "histplot" , "kdeplot1" , "kdeplot2" , "contour3" , "contourf" , "contour" , "grid" ] self._progress.note( msg = "adding the graphics tools... ", end = newline, pre = True ) self.plot = Struct() self._resetPlot(resetType="hard") self.plot.reset = self._resetPlot #self.plot.helpme = self.helpme ################################################################################################################################ #### _resetPlot ################################################################################################################################
[docs] def _resetPlot ( self , resetType = "soft" , plotNames = "all" ): """ Reset the properties of the plot to the original default settings. Use this method when you change many attributes of the plot and you want to clean up and go back to the default settings. **Parameters** resetType (optional) An optional string with possible value of ``"hard"``. If provided, the plot object will be regenerated from scratch. This includes reading the original data frame again and resetting everything. If not provided, then only the plot settings will be reset without reseting the dataFrame. plotNames (optional) An optional string value or list of string values representing the names of plots to reset. If no value is provided, then all plots will be reset. **Returns** None **Example** .. code-block:: python reset("hard") # regenerate all plots from scratch reset("hard","line3") # regenerate line3 plot from scratch reset("hard",["line","line3"]) # regenerate line and line3 plots """ requestedPlotTypeList = [] if isinstance(plotNames, str): plotTypeLower = plotNames.lower() if plotTypeLower=="all": requestedPlotTypeList = self._plotTypeList elif plotNames in self._plotTypeList: requestedPlotTypeList = [plotNames] else: self._reportWrongPlotName(plotNames) elif isinstance(plotNames, list): for plotName in plotNames: if plotName not in self._plotTypeList: self._reportWrongPlotName(plotName) else: self._reportWrongPlotName("a none-string none-list object.") resetTypeIsHard = None if isinstance(resetType, str): resetTypeIsHard = resetType.lower()=="hard" else: resetTypeIsHard = None pm.abort( msg = "The input argument resetType must be a string representing" + newline + "the type of the reset to be performed on the plots." + newline + "A list of possible plots includes: \"hard\", \"soft\"" + newline + "Here is the help for the ``reset()`` method: " + newline + newline + self._resetPlot.__doc__ , marginTop = 1 , marginBot = 1 , methodName = self._methodName ) ############################################################################################################################ #### reset plots ############################################################################################################################ for requestedPlotType in requestedPlotTypeList: plotObject = None requestedPlotTypeLower = requestedPlotType.lower() is3d = "3" in requestedPlotTypeLower isLine = "line" in requestedPlotTypeLower isScatter = "scatter" in requestedPlotTypeLower isJointplot = "jointplot" in requestedPlotTypeLower isHistplot = "histplot" in requestedPlotTypeLower isKdeplot1 = "kdeplot1" in requestedPlotTypeLower isKdeplot2 = "kdeplot2" in requestedPlotTypeLower isContourf = "contourf" in requestedPlotTypeLower isContour3 = "contour3" in requestedPlotTypeLower isContour = "contour" in requestedPlotTypeLower and not (isContourf or isContour3) isGridPlot = "grid" in requestedPlotTypeLower isLineScatterPlot = isLine or isScatter isDensityPlot = isJointplot or isHistplot or isKdeplot1 or isKdeplot2 or isContourf or isContour3 or isContour if not resetTypeIsHard: plotComponent = getattr(self, "plot") plotObject = getattr(plotComponent, requestedPlotType) plotObject._reset() ######################################################################################################################## #### reset line / scatter ######################################################################################################################## if isLineScatterPlot: if resetTypeIsHard: plotObject = LineScatterPlot( plotType = requestedPlotType , dataFrame = self.df , methodName = self._methodName , reportEnabled = self._reportEnabled , resetPlot = self._resetPlot ) plotObject.ycolumns = self.df.columns[self._offset] # :] plotObject.ccolumns = self._sampleLogFuncColName plotObject.colorbar.kws.extend = "neither" plotObject.colorbar.kws.orientation = "vertical" plotObject.colorbar.kws.spacing = "uniform" if is3d: plotObject.zcolumns = self._sampleLogFuncColName if self.ndim>1: plotObject.xcolumns = self.df.columns[self._offset] plotObject.ycolumns = self.df.columns[self._offset+1] if isLine: if isScatter: plotObject.lineCollection.enabled = False plotObject.plot.enabled = True plotObject.plot.kws.alpha = 0.2 plotObject.plot.kws.color = "grey" plotObject.plot.kws.linewidth = 0.75 else: plotObject.lineCollection.enabled = True plotObject.plot.enabled = False ######################################################################################################################## #### reset density plots: kdeplot / histplot / jointplot / contour / contourf / contour3 ######################################################################################################################## if isDensityPlot: if resetTypeIsHard: plotObject = DensityPlot( plotType = requestedPlotType , dataFrame = self.df , methodName = self._methodName , reportEnabled = self._reportEnabled , resetPlot = self._resetPlot ) plotObject.xcolumns = self.df.columns[self._offset] if not (isHistplot or isKdeplot1): if self.ndim==1: plotObject.xcolumns = self.df.columns[self._offset-1] plotObject.ycolumns = self.df.columns[self._offset] else: plotObject.ycolumns = self.df.columns[self._offset+1] ######################################################################################################################## #### reset GridPlot ######################################################################################################################## if isGridPlot: if resetTypeIsHard: plotObject = GridPlot ( plotType = requestedPlotType , dataFrame = self.df , methodName = self._methodName , reportEnabled = self._reportEnabled , resetPlot = self._resetPlot ) endColindex = np.min( [self._offset+3, self._offset+self.ndim] ) plotObject.columns = self.df.columns[self._offset-1:endColindex] plotObject.ccolumn = self._sampleLogFuncColName ######################################################################################################################## #### reset target component ######################################################################################################################## if (isLineScatterPlot or isDensityPlot) and not (plotObject._type.is3d or self._isProgressFile): xtarget = 0 # dummy if isDensityPlot: xtarget = self.df[plotObject.xcolumns].values.flatten()[self.stats.maxLogFunc.idrow] if plotObject._type.is1d: plotObject.target.value = [ xtarget, 0 ] if plotObject._type.is2d: ytarget = self.df[plotObject.ycolumns].values.flatten()[self.stats.maxLogFunc.idrow] plotObject.target.value = [ xtarget, ytarget ] if isDensityPlot and plotObject._type.is1d: plotObject.target.axhline.enabled = False if isLine or isScatter: plotObject.target.axvline.enabled = False plotObject.target.label = "maxLogFunc" ######################################################################################################################## if plotObject is not None: setattr(self.plot, requestedPlotType, plotObject)
################################################################################################################################ #### _reportWrongPlotName ################################################################################################################################
[docs] def _reportWrongPlotName( self , plotNames ): pm.abort( msg = "The input argument ``plotNames`` must be a string representing" + newline + "the name of a plot belonging to the TabularFileContents class or," + newline + "a list of such plot names. You have entered: " + plotNames + newline + "Possible plots are: " + newline + newline + newline.join(self._plotTypeList) + newline + newline + "Here is the help for the ``reset()`` method: " + newline + newline + self._resetPlot.__doc__ , marginTop = 1 , marginBot = 1 , methodName = self._methodName )
################################################################################################################################