Source code for stop_words

import json
import os

__VERSION__ = (2015, 2, 23, 1)
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
STOP_WORDS_DIR = os.path.join(CURRENT_DIR, 'stop-words')
STOP_WORDS_CACHE = {}

with open(os.path.join(STOP_WORDS_DIR, 'languages.json'), 'rb') as map_file:
    buffer = map_file.read()
    buffer = buffer.decode('ascii')
    LANGUAGE_MAPPING = json.loads(buffer)

AVAILABLE_LANGUAGES = list(LANGUAGE_MAPPING.values())


[docs]def get_version(): """ :rtype: basestring """ return ".".join(str(v) for v in __VERSION__)
[docs]class StopWordError(Exception): pass
[docs]def get_stop_words(language, cache=True): """ :type language: basestring :rtype: list """ try: language = LANGUAGE_MAPPING[language] except KeyError: if language not in AVAILABLE_LANGUAGES: raise StopWordError('{0}" language is unavailable.'.format( language )) if cache and language in STOP_WORDS_CACHE: return STOP_WORDS_CACHE[language] language_filename = os.path.join(STOP_WORDS_DIR, language + '.txt') try: with open(language_filename, 'rb') as language_file: stop_words = [line.decode('utf-8').strip() for line in language_file.readlines()] stop_words = apply_filters(stop_words, language) except IOError: raise StopWordError( '{0}" file is unreadable, check your installation.'.format( language_filename ) ) if cache: STOP_WORDS_CACHE[language] = stop_words return stop_words
_filters = {None: []}
[docs]def apply_filters(stopwords, language): """ Apply registered filters to stopwords :param stopwords: list :param language: string :return: filtered stopwords """ if language in _filters: for func in _filters[language]: stopwords = func(stopwords) for func in _filters[None]: stopwords = func(stopwords, language) return stopwords
[docs]def add_filter(func, language=None): """ Register filters for specific language. If language == None the filter applies for all languages. Filter will not apply for stop words in cache. :param func: callable :param language: string|None :return: """ if language not in _filters: _filters[language] = [] _filters[language].append(func)
[docs]def remove_filter(func, language=None): """ :param func: :param language: :return: """ if not (language in _filters and func in _filters[language]): return False _filters[language].remove(func) return True
[docs]def safe_get_stop_words(language): """ :type language: basestring :rtype: list """ try: return get_stop_words(language) except StopWordError: return []