Source code for statsmodels.sandbox.regression.try_treewalker

'''Trying out tree structure for nested logit

sum is standing for likelihood calculations

should collect and aggregate likelihood contributions bottom up

'''
from __future__ import print_function
from statsmodels.compat.python import iteritems, itervalues, lrange, zip
import numpy as np

tree = [[0,1],[[2,3],[4,5,6]],[7]]
#singleton/degenerate branch needs to be list

xb = 2*np.arange(8)
testxb = 1 #0

[docs]def branch(tree):
    '''walking a tree bottom-up
    '''

    if not isinstance(tree[0], int):   #assumes leaves are int for choice index
        branchsum = 0
        for b in tree:
            branchsum += branch(b)
    else:
        print(tree)
        print('final branch with', tree, sum(tree))
        if testxb:
            return sum(xb[tree])
        else:
            return sum(tree)

    print('working on branch', tree, branchsum)
    return branchsum

print(branch(tree))



#new version that also keeps track of branch name and allows V_j for a branch
#   as in Greene, V_j + lamda * IV doesn't look the same as including the
#   explanatory variables in leaf X_j, V_j is linear in X, IV is logsumexp of X,


testxb = 0#1#0
[docs]def branch2(tree):
    '''walking a tree bottom-up based on dictionary
    '''


    if isinstance(tree,  tuple):   #assumes leaves are int for choice index
        name, subtree = tree
        print(name, data2[name])
        print('subtree', subtree)
        if testxb:
            branchsum = data2[name]
        else:
            branchsum = name  #0
        for b in subtree:
            #branchsum += branch2(b)
            branchsum = branchsum + branch2(b)
    else:
        leavessum = sum((data2[bi] for bi in tree))
        print('final branch with', tree, ''.join(tree), leavessum) #sum(tree)
        if testxb:
            return leavessum  #sum(xb[tree])
        else:
            return ''.join(tree) #sum(tree)

    print('working on branch', tree, branchsum)
    return branchsum

tree = [[0,1],[[2,3],[4,5,6]],[7]]
tree2 = ('top',
            [('B1',['a','b']),
             ('B2',
                   [('B21',['c', 'd']),
                    ('B22',['e', 'f', 'g'])
                    ]
              ),
             ('B3',['h'])
            ]
         )

data2 = dict([i for i in zip('abcdefgh',lrange(8))])
#data2.update({'top':1000, 'B1':100, 'B2':200, 'B21':300,'B22':400, 'B3':400})
data2.update({'top':1000, 'B1':100, 'B2':200, 'B21':21,'B22':22, 'B3':300})

#data2
#{'a': 0, 'c': 2, 'b': 1, 'e': 4, 'd': 3, 'g': 6, 'f': 5, 'h': 7,
#'top': 1000, 'B22': 22, 'B21': 21, 'B1': 100, 'B2': 200, 'B3': 300}

print('\n tree with dictionary data')
print(branch2(tree2))  # results look correct for testxb=0 and 1


#parameters/coefficients map coefficient names to indices, list of indices into
#a 1d params one for each leave and branch

#Note: dict looses ordering
paramsind = {
 'B1': [],
 'a': ['consta', 'p'],
 'b': ['constb', 'p'],
 'B2': ['const2', 'x2'],
 'B21': [],
 'c': ['consta', 'p', 'time'],
 'd': ['consta', 'p', 'time'],
 'B22': ['x22'],
 'e': ['conste', 'p', 'hince'],
 'f': ['constt', 'p', 'hincf'],
 'g': [          'p', 'hincg'],
 'B3': [],
 'h': ['consth', 'p', 'h'],
 'top': []}

#unique, parameter array names,
#sorted alphabetically, order is/should be only internal

paramsnames = sorted(set([i for j in itervalues(paramsind) for i in j]))

#mapping coefficient names to indices to unique/parameter array
paramsidx = dict((name, idx) for (idx,name) in enumerate(paramsnames))

#mapping branch and leaf names to index in parameter array
inddict = dict((k,[paramsidx[j] for j in v]) for k,v in iteritems(paramsind))

'''
>>> paramsnames
['const2', 'consta', 'constb', 'conste', 'consth', 'constt', 'h', 'hince',
 'hincf', 'hincg', 'p', 'time', 'x2', 'x22']
>>> parmasidx
{'conste': 3, 'consta': 1, 'constb': 2, 'h': 6, 'time': 11, 'consth': 4,
 'p': 10, 'constt': 5, 'const2': 0, 'x2': 12, 'x22': 13, 'hince': 7,
 'hincg': 9, 'hincf': 8}
>>> inddict
{'a': [1, 10], 'c': [1, 10, 11], 'b': [2, 10], 'e': [3, 10, 7],
 'd': [1, 10, 11], 'g': [10, 9], 'f': [5, 10, 8], 'h': [4, 10, 6],
 'top': [], 'B22': [13], 'B21': [], 'B1': [], 'B2': [0, 12], 'B3': []}
>>> paramsind
{'a': ['consta', 'p'], 'c': ['consta', 'p', 'time'], 'b': ['constb', 'p'],
 'e': ['conste', 'p', 'hince'], 'd': ['consta', 'p', 'time'],
 'g': ['p', 'hincg'], 'f': ['constt', 'p', 'hincf'], 'h': ['consth', 'p', 'h'],
 'top': [], 'B22': ['x22'], 'B21': [], 'B1': [], 'B2': ['const2', 'x2'],
 'B3': []}
'''