Source code for prm.localdistribution

'''
The model parameters in a ProbReM project are the `conditional probability distributions` (CPDs) defined for each probabilistic attribute defined in the model. They are also refered to as `local distributions` interchangeably. 

.. inheritance-diagram:: prm.localdistribution
'''

#import prm.PRM

#from pylab import * 
import numpy as N

from itertools import izip,count 

from analytics.performance import time_analysis



[docs]class CPD():
    '''
    A conditional probability distribution CPD is defined for an attribute. This is an abstract 
    version of a CPD that defines a set of methods all CPD implementations must provide.
    '''
    
    def __init__(self, attr):
        
        self.attr = attr
        """The :class:`.Attribute` that the CPD is associated with
        """
        
    
    
[docs]    def sample(self,paAssignment):
        """        
        :arg paAssignment: List of parent values
        :returns: Randomly drawn sample of the CPD given the `paAssignment` 
        """
        raise Exception('Sampling not implemented for %s'%(self.__class__.__name__))
        
[docs]    def logLikelihood(self,fullAssignment):
        """
        :arg fullAssignment: List of values order such that [attributeValue,ParentValue1,ParentValue2,....] 
        :returns: Loglikelihood of `fullAssignment`
        """
        raise Exception('logLikelihood not implemented for %s'%(self.__class__.__name__))
        
[docs]    def save(self):
        """Saves the CPD to disk
        """
        raise Exception('saving of CPD not supported for %s'%(self.__class__.__name__))
    
    
[docs]class CPDTabular(CPD):
    """
    The tabular representation of a CPD for discrete variables. A matrix of dimensions `m x n`, where
    
    * `m` is the number of possible parent assignments :math:`\prod_{pa \in Parents} |V(pa)|`
    * `n` is the cardinalitiy of the attribute domain :math:`|V(attr)|`
    
    This matrix grows exponentially with the number of parents, thus not suited for large V-Structures.

    .. todo::

        The rows of the `CPDTabular.cpdMatrix` are the possible parent assignments. Naturally the ordering of indexing matters, and it depends on the order of the attributes in the `attr.parents` list (it is set in :meth:`.PRMparser.start_element`). The order in which the dependencies in the model specification are defined sets the order of the `attr.parents`. The problem is that when the CPDs are loaded from file, but the specification of the dependencies changed, it is possible that the CPD is incorrect.
    """
    
    def __init__(self, attr):
        '''
        The CPD will be stored, inefficiently, in a matrix of dimension [  product of domain size of all parents  ,  size of domain of attribute]
        '''
        CPD.__init__(self,attr)
        
        self.parentAssignments = 1
        self.indexingMultiplier = [1 for p in self.attr.parents] #a multiplier used to access the correct 
        self.initCPD()
                            
        # initialize the probability matrix
        self.cpdMatrixDim = [self.parentAssignments,attr.cardinality]   
        """Dimension of `cpdMatrix`
        """
        self.cpdMatrix = N.zeros( self.cpdMatrixDim ) 
        """The CPD matrix of type `numpy.array`. The rows represent different parent assignments, the columns of a row define the distribution over the attribute.
        """
        self.cpdLogMatrix = None
        """Log values of `cpdMatrix`
        """
        
        #the cumulative distribution for sampling
        self.cumMatrix = N.zeros( self.cpdMatrixDim ) 
        """Cumulativ `cpdMatrix`. Computed by :meth:`.computeCumulativeDist`
        """
        self.cumLogMatrix = None
        """Log values of `cumMatrix`
        """
        
              
        
    
[docs]    def sample(self,paAssignment):
        ''' 
        Samples a random value using `cumMatrix`
        
        :arg paAssignment: List of parent values
        :returns: Randomly drawn sample of the CPD given the `paAssignment`
        '''
        #print 'paAssignment ',paAssignment
        
        ri = self.indexRow(paAssignment)
        #print 'rowIndex ',rowIndex
        
        
        u = N.random.uniform()
        
        # If the attribute doesn't have any parents, the CPD is a 1 x n vector and 
        # thus can't be indexed 
        cumRow = self.cumMatrix[ri,:]
        # cumRow = N.atleast_2d(self.cumMatrix)[ri,:]
        for i,cumprop in enumerate(cumRow):
            if u <= cumprop:
                return self.attr.domain[i]
    
    
[docs]    def logLikelihood(self,fullAssignment):
        '''
        :arg fullAssignment: List of values order such that [`attributeValue`,`parentValue1`,`parentValue2`,....] 
        :returns: Loglikelihood of `fullAssignment` using `cpdLogMatrix`
        ''' 
        #compute the matrix index for the attribute values
        [indexRow,indexColumn] = self.attr.CPD.indexingCPD(fullAssignment)
        #update the loglik with the log prob of the instance that we have seen
        return self.attr.CPD.cpdLogMatrix[indexRow,indexColumn]
         
[docs]    def indexingCPD(self,currentRow):
        '''
        Returns the row and column indices for a full assignment to the attribute `attr`. `indexRow` is the
        index of the row of the cpd matrix that corresponds to the assignment of the 
        parent attributes. The parents attribute values are ordered the same way as in `attr.parents`.
        `indexColumn` is the index of the column that corresponds to the assignment of the attribute value
        itself.
        
        :arg currentRow: List containing a full assignment, [`attributeValue`,`parentValue1`,`parentValue2`,....]
        :returns: Tuple [`indexRow`,`indexColumn`]
        '''
        return [self.indexRow(currentRow[1:]),self.indexColumn(currentRow[0]) ]
     
    #@time_analysis    
[docs]    def indexRow(self,parentAssignment):
        '''
        See :meth:`.indexingCPD`
        '''
        
        index=0
        for i, mult, value in izip(count(),self.indexingMultiplier, parentAssignment):
            index += mult * self.attr.parents[i].indexingValue(value)                    
            
        return int(index)


[docs]    def conditionalDist(self,gbnV):
        '''
        Returns the conditional probability distribution of the `gbnV` given its parent values.

        :arg gbnV: :class:`.GBN` instance
        :returns: Index of the row
        '''
        
        


        index=0
        for i, mult, value in izip(count(),self.indexingMultiplier, parentAssignment):
            index += mult * self.attr.parents[i].indexingValue(value)                    
            
        return int(index)

    
[docs]    def reverseIndexRow(self,index):
        '''
        Computes the parent assignment given an row index of `cpdMatrix`
        
        :arg index: Row index of `cpdMatrix`
        :returns: Parent assignment associated with `index`
        '''
        parentAssignment = [None for p in self.attr.parents]
        for i,m in enumerate(self.indexingMultiplier):
            parentAssignment[i] = self.attr.parents[i].domain[(index/m)] 
            index =  index % m
            
        return parentAssignment   
        
    
[docs]    def indexColumn(self,attrValue):
        '''
        See :meth:`.indexingCPD`
        '''
        return self.attr.indexingValue(attrValue)
        
    
    
[docs]    def computeLogDists(self):
        '''
        Calculates the log probability distribution `cpdLogMatrix` and cumulative log probability distribution `cumLogMatrix`
        '''
        self.cpdLogMatrix = N.log(self.cpdMatrix)
        self.computeCumulativeDist()
        self.cumLogMatrix = N.log(self.cumMatrix)        
        
[docs]    def computeCumulativeDist(self):
        '''
        Calculates the cumulative distribution of the tabular CPD
        by incrementally summing the columns
        '''
        
        # self.cumMatrix = self.cpdMatrix.copy()
        # 
        # for i in range(0,(self.cumMatrix.shape[1]-1)):
        #     self.cumMatrix[:,(i+1)] = self.cumMatrix[:,(i+1)] + self.cumMatrix[:,i]
        
        
        self.cumMatrix = N.atleast_2d(self.cpdMatrix).cumsum(axis=1)
        
        
    def __repr__(self):
                
        return 'TabularCPD, Dim=%s'%(self.cpdMatrixDim)
        
[docs]    def initCPD(self):
        '''
        Computes the number of possible parent assigments and the index multipliers needed
        to compute the row index of a given parent assignment, see :meth:`.indexingCPD`.            
        ''' 
        
        nparentsindex= len(self.attr.parents) #temp variable
        for i, pa in enumerate(self.attr.parents):
            #print "parent %s , cardinality %s"%(pa.name,pa.cardinality)
                        
            # we calculate the total number of all possible combinations of parent assignments
            self.parentAssignments *= pa.cardinality
            
            # the index serves to find the row that corresponds to a specific parents assignment
            for j in range(i+1,nparentsindex):
                #print 'i=%s,j=%s'%(i,j)
                self.indexingMultiplier[i] *= self.attr.parents[j].cardinality
                
        
[docs]    def save(self,relPath='./localdistributions'):
        """
        Saves `cpdMatrix` to disk using `numpy.save` and outputs the XML specification that can be added to the PRM specification.
        
        :arg relPath: Relative path to the local distribution files, starting from the directory where the model is instantiated from.
        """        
        fname = self.attr.name  
        if len(self.attr.parents)!=0:
            fname = '%s_%s'%(fname,''.join([pa.name for pa in self.attr.parents]))
        locDistPath = '%s/%s'%(relPath,fname)
        #print 'Saving CPDmatrix.npy and attrname.xml for %s to %s -> include reference in PRM xml'%(self.attr.name,locDistPath)
        N.save(locDistPath,self.cpdMatrix)
        locDistXML = "<?xml version='1.0' standalone='no' ?><LocalDistribution attribute='%s'><TabularCPD file='%s.npy'/></LocalDistribution>"%(self.attr.fullname,locDistPath)
        xmlFile = open('%s.xml'%(locDistPath), 'w')
        xmlFile.write(locDistXML)
        xmlFile.close()
        #print 'Tag for cpd in prm xml specification:\n%s'%("<LocalDistribution attribute='%s' file='%s.xml'/>"%(self.attr.name,locDistPath))
        print "<LocalDistribution attribute='%s' file='%s.xml'/>"%(self.attr.fullname,locDistPath)
        
        
[docs]class CPDTree(CPD):
    """Future implementation for a CPD based on a decision tree. No need so far.
    """
    
    def __init__(self):
        pass
Navigation

Quick search

Source code for prm.localdistribution

Navigation