Source code for gensim.models.normmodel

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2012 Radim Rehurek <>
# Licensed under the GNU LGPL v2.1 -

import logging

from gensim import interfaces, matutils

logger = logging.getLogger(__name__)

[docs]class NormModel(interfaces.TransformationABC): """ Objects of this class realize the explicit normalization of vectors. Supported norms are l1' and 'l2' with 'l2' being default. The main methods are: 1. Constructor which normalizes the terms in the given corpus document-wise. 2. The normalize() method which normalizes a simple count representation. 3. The [] transformation which internally calls the self.normalize() method. >>> norm_l2 = NormModel(corpus) >>> print(norm_l2[some_doc]) >>>'/tmp/foo.tfidf_model') Model persistency is achieved via its load/save methods """
[docs] def __init__(self, corpus=None, norm='l2'): """ Compute the 'l1' or 'l2' normalization by normalizing separately for each doc in a corpus. Formula for 'l1' norm for term 'i' in document 'j' in a corpus of 'D' documents is:: norml1_{i, j} = (i / sum(absolute(values in j))) Formula for 'l2' norm for term 'i' in document 'j' in a corpus of 'D' documents is:: norml2_{i, j} = (i / sqrt(sum(square(values in j)))) """ self.norm = norm if corpus is not None: self.calc_norm(corpus) else: pass
def __str__(self): return "NormModel(num_docs=%s, num_nnz=%s, norm=%s)" % (self.num_docs, self.num_nnz, self.norm)
[docs] def calc_norm(self, corpus): """ Calculates the norm by calling matutils.unitvec with the norm parameter. """"Performing %s normalization..." % (self.norm)) norms = [] numnnz = 0 docno = 0 for bow in corpus: docno += 1 numnnz += len(bow) norms.append(matutils.unitvec(bow, self.norm)) self.num_docs = docno self.num_nnz = numnnz self.norms = norms
[docs] def normalize(self, bow): vector = matutils.unitvec(bow, self.norm) return vector
def __getitem__(self, bow): return self.normalize(bow)
#endclass NormModel