Source code for nltk.stem.snowball

# -*- coding: utf-8 -*-
#
# Natural Language Toolkit: Snowball Stemmer
#
# Copyright (C) 2001-2015 NLTK Project
# Author: Peter Michael Stahl <pemistahl@gmail.com>
#         Peter Ljunglof <peter.ljunglof@heatherleaf.se> (revisions)
# Algorithms: Dr Martin Porter <martin@tartarus.org>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT

"""
Snowball stemmers

This module provides a port of the Snowball stemmers
developed by Martin Porter.

There is also a demo function: `snowball.demo()`.

"""
from __future__ import unicode_literals, print_function

from nltk import compat
from nltk.corpus import stopwords
from nltk.stem import porter
from nltk.stem.util import suffix_replace

from nltk.stem.api import StemmerI


[docs]class SnowballStemmer(StemmerI): """ Snowball Stemmer The following languages are supported: Danish, Dutch, English, Finnish, French, German, Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian, Spanish and Swedish. The algorithm for English is documented here: Porter, M. \"An algorithm for suffix stripping.\" Program 14.3 (1980): 130-137. The algorithms have been developed by Martin Porter. These stemmers are called Snowball, because Porter created a programming language with this name for creating new stemming algorithms. There is more information available at http://snowball.tartarus.org/ The stemmer is invoked as shown below: >>> from nltk.stem import SnowballStemmer >>> print(" ".join(SnowballStemmer.languages)) # See which languages are supported danish dutch english finnish french german hungarian italian norwegian porter portuguese romanian russian spanish swedish >>> stemmer = SnowballStemmer("german") # Choose a language >>> stemmer.stem("Autobahnen") # Stem a word 'autobahn' Invoking the stemmers that way is useful if you do not know the language to be stemmed at runtime. Alternatively, if you already know the language, then you can invoke the language specific stemmer directly: >>> from nltk.stem.snowball import GermanStemmer >>> stemmer = GermanStemmer() >>> stemmer.stem("Autobahnen") 'autobahn' :param language: The language whose subclass is instantiated. :type language: str or unicode :param ignore_stopwords: If set to True, stopwords are not stemmed and returned unchanged. Set to False by default. :type ignore_stopwords: bool :raise ValueError: If there is no stemmer for the specified language, a ValueError is raised. """ languages = ("danish", "dutch", "english", "finnish", "french", "german", "hungarian", "italian", "norwegian", "porter", "portuguese", "romanian", "russian", "spanish", "swedish")
[docs] def __init__(self, language, ignore_stopwords=False): if language not in self.languages: raise ValueError("The language '%s' is not supported." % language) stemmerclass = globals()[language.capitalize() + "Stemmer"] self.stemmer = stemmerclass(ignore_stopwords) self.stem = self.stemmer.stem self.stopwords = self.stemmer.stopwords
@compat.python_2_unicode_compatible class _LanguageSpecificStemmer(StemmerI): """ This helper subclass offers the possibility to invoke a specific stemmer directly. This is useful if you already know the language to be stemmed at runtime. Create an instance of the Snowball stemmer. :param ignore_stopwords: If set to True, stopwords are not stemmed and returned unchanged. Set to False by default. :type ignore_stopwords: bool """ def __init__(self, ignore_stopwords=False): # The language is the name of the class, minus the final "Stemmer". language = type(self).__name__.lower() if language.endswith("stemmer"): language = language[:-7] self.stopwords = set() if ignore_stopwords: try: for word in stopwords.words(language): self.stopwords.add(word) except IOError: raise ValueError("%r has no list of stopwords. Please set" " 'ignore_stopwords' to 'False'." % self) def __repr__(self): """ Print out the string representation of the respective class. """ return "<%s>" % type(self).__name__ class PorterStemmer(_LanguageSpecificStemmer, porter.PorterStemmer): """ A word stemmer based on the original Porter stemming algorithm. Porter, M. \"An algorithm for suffix stripping.\" Program 14.3 (1980): 130-137. A few minor modifications have been made to Porter's basic algorithm. See the source code of the module nltk.stem.porter for more information. """ def __init__(self, ignore_stopwords=False): _LanguageSpecificStemmer.__init__(self, ignore_stopwords) porter.PorterStemmer.__init__(self) class _ScandinavianStemmer(_LanguageSpecificStemmer): """ This subclass encapsulates a method for defining the string region R1. It is used by the Danish, Norwegian, and Swedish stemmer. """ def _r1_scandinavian(self, word, vowels): """ Return the region R1 that is used by the Scandinavian stemmers. R1 is the region after the first non-vowel following a vowel, or is the null region at the end of the word if there is no such non-vowel. But then R1 is adjusted so that the region before it contains at least three letters. :param word: The word whose region R1 is determined. :type word: str or unicode :param vowels: The vowels of the respective language that are used to determine the region R1. :type vowels: unicode :return: the region R1 for the respective word. :rtype: unicode :note: This helper method is invoked by the respective stem method of the subclasses DanishStemmer, NorwegianStemmer, and SwedishStemmer. It is not to be invoked directly! """ r1 = "" for i in range(1, len(word)): if word[i] not in vowels and word[i-1] in vowels: if len(word[:i+1]) < 3 and len(word[:i+1]) > 0: r1 = word[3:] elif len(word[:i+1]) >= 3: r1 = word[i+1:] else: return word break return r1 class _StandardStemmer(_LanguageSpecificStemmer): """ This subclass encapsulates two methods for defining the standard versions of the string regions R1, R2, and RV. """ def _r1r2_standard(self, word, vowels): """ Return the standard interpretations of the string regions R1 and R2. R1 is the region after the first non-vowel following a vowel, or is the null region at the end of the word if there is no such non-vowel. R2 is the region after the first non-vowel following a vowel in R1, or is the null region at the end of the word if there is no such non-vowel. :param word: The word whose regions R1 and R2 are determined. :type word: str or unicode :param vowels: The vowels of the respective language that are used to determine the regions R1 and R2. :type vowels: unicode :return: (r1,r2), the regions R1 and R2 for the respective word. :rtype: tuple :note: This helper method is invoked by the respective stem method of the subclasses DutchStemmer, FinnishStemmer, FrenchStemmer, GermanStemmer, ItalianStemmer, PortugueseStemmer, RomanianStemmer, and SpanishStemmer. It is not to be invoked directly! :note: A detailed description of how to define R1 and R2 can be found at http://snowball.tartarus.org/texts/r1r2.html """ r1 = "" r2 = "" for i in range(1, len(word)): if word[i] not in vowels and word[i-1] in vowels: r1 = word[i+1:] break for i in range(1, len(r1)): if r1[i] not in vowels and r1[i-1] in vowels: r2 = r1[i+1:] break return (r1, r2) def _rv_standard(self, word, vowels): """ Return the standard interpretation of the string region RV. If the second letter is a consonant, RV is the region after the next following vowel. If the first two letters are vowels, RV is the region after the next following consonant. Otherwise, RV is the region after the third letter. :param word: The word whose region RV is determined. :type word: str or unicode :param vowels: The vowels of the respective language that are used to determine the region RV. :type vowels: unicode :return: the region RV for the respective word. :rtype: unicode :note: This helper method is invoked by the respective stem method of the subclasses ItalianStemmer, PortugueseStemmer, RomanianStemmer, and SpanishStemmer. It is not to be invoked directly! """ rv = "" if len(word) >= 2: if word[1] not in vowels: for i in range(2, len(word)): if word[i] in vowels: rv = word[i+1:] break elif word[0] in vowels and word[1] in vowels: for i in range(2, len(word)): if word[i] not in vowels: rv = word[i+1:] break else: rv = word[3:] return rv class DanishStemmer(_ScandinavianStemmer): """ The Danish Snowball stemmer. :cvar __vowels: The Danish vowels. :type __vowels: unicode :cvar __consonants: The Danish consonants. :type __consonants: unicode :cvar __double_consonants: The Danish double consonants. :type __double_consonants: tuple :cvar __s_ending: Letters that may directly appear before a word final 's'. :type __s_ending: unicode :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. :type __step1_suffixes: tuple :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. :type __step2_suffixes: tuple :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. :type __step3_suffixes: tuple :note: A detailed description of the Danish stemming algorithm can be found under http://snowball.tartarus.org/algorithms/danish/stemmer.html """ # The language's vowels and other important characters are defined. __vowels = "aeiouy\xE6\xE5\xF8" __consonants = "bcdfghjklmnpqrstvwxz" __double_consonants = ("bb", "cc", "dd", "ff", "gg", "hh", "jj", "kk", "ll", "mm", "nn", "pp", "qq", "rr", "ss", "tt", "vv", "ww", "xx", "zz") __s_ending = "abcdfghjklmnoprtvyz\xE5" # The different suffixes, divided into the algorithm's steps # and organized by length, are listed in tuples. __step1_suffixes = ("erendes", "erende", "hedens", "ethed", "erede", "heden", "heder", "endes", "ernes", "erens", "erets", "ered", "ende", "erne", "eren", "erer", "heds", "enes", "eres", "eret", "hed", "ene", "ere", "ens", "ers", "ets", "en", "er", "es", "et", "e", "s") __step2_suffixes = ("gd", "dt", "gt", "kt") __step3_suffixes = ("elig", "l\xF8st", "lig", "els", "ig") def stem(self, word): """ Stem a Danish word and return the stemmed form. :param word: The word that is stemmed. :type word: str or unicode :return: The stemmed form. :rtype: unicode """ # Every word is put into lower case for normalization. word = word.lower() if word in self.stopwords: return word # After this, the required regions are generated # by the respective helper method. r1 = self._r1_scandinavian(word, self.__vowels) # Then the actual stemming process starts. # Every new step is explicitly indicated # according to the descriptions on the Snowball website. # STEP 1 for suffix in self.__step1_suffixes: if r1.endswith(suffix): if suffix == "s": if word[-2] in self.__s_ending: word = word[:-1] r1 = r1[:-1] else: word = word[:-len(suffix)] r1 = r1[:-len(suffix)] break # STEP 2 for suffix in self.__step2_suffixes: if r1.endswith(suffix): word = word[:-1] r1 = r1[:-1] break # STEP 3 if r1.endswith("igst"): word = word[:-2] r1 = r1[:-2] for suffix in self.__step3_suffixes: if r1.endswith(suffix): if suffix == "l\xF8st": word = word[:-1] r1 = r1[:-1] else: word = word[:-len(suffix)] r1 = r1[:-len(suffix)] if r1.endswith(self.__step2_suffixes): word = word[:-1] r1 = r1[:-1] break # STEP 4: Undouble for double_cons in self.__double_consonants: if word.endswith(double_cons) and len(word) > 3: word = word[:-1] break return word class DutchStemmer(_StandardStemmer): """ The Dutch Snowball stemmer. :cvar __vowels: The Dutch vowels. :type __vowels: unicode :cvar __step1_suffixes: Suffixes to be deleted in step 1 of the algorithm. :type __step1_suffixes: tuple :cvar __step3b_suffixes: Suffixes to be deleted in step 3b of the algorithm. :type __step3b_suffixes: tuple :note: A detailed description of the Dutch stemming algorithm can be found under http://snowball.tartarus.org/algorithms/dutch/stemmer.html """ __vowels = "aeiouy\xE8" __step1_suffixes = ("heden", "ene", "en", "se", "s") __step3b_suffixes = ("baar", "lijk", "bar", "end", "ing", "ig") def stem(self, word): """ Stem a Dutch word and return the stemmed form. :param word: The word that is stemmed. :type word: str or unicode :return: The stemmed form. :rtype: unicode """ word = word.lower() if word in self.stopwords: return word step2_success = False # Vowel accents are removed. word = (word.replace("\xE4", "a").replace("\xE1", "a") .replace("\xEB", "e").replace("\xE9", "e") .replace("\xED", "i").replace("\xEF", "i") .replace("\xF6", "o").replace("\xF3", "o") .replace("\xFC", "u").replace("\xFA", "u")) # An initial 'y', a 'y' after a vowel, # and an 'i' between self.__vowels is put into upper case. # As from now these are treated as consonants. if word.startswith("y"): word = "".join(("Y", word[1:])) for i in range(1, len(word)): if word[i-1] in self.__vowels and word[i] == "y": word = "".join((word[:i], "Y", word[i+1:])) for i in range(1, len(word)-1): if (word[i-1] in self.__vowels and word[i] == "i" and word[i+1] in self.__vowels): word = "".join((word[:i], "I", word[i+1:])) r1, r2 = self._r1r2_standard(word, self.__vowels) # R1 is adjusted so that the region before it # contains at least 3 letters. for i in range(1, len(word)): if word[i] not in self.__vowels and word[i-1] in self.__vowels: if len(word[:i+1]) < 3 and len(word[:i+1]) > 0: r1 = word[3:] elif len(word[:i+1]) == 0: return word break # STEP 1 for suffix in self.__step1_suffixes: if r1.endswith(suffix): if suffix == "heden": word = suffix_replace(word, suffix, "heid") r1 = suffix_replace(r1, suffix, "heid") if r2.endswith("heden"): r2 = suffix_replace(r2, suffix, "heid") elif (suffix in ("ene", "en") and not word.endswith("heden") and word[-len(suffix)-1] not in self.__vowels and word[-len(suffix)-3:-len(suffix)] != "gem"): word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] if word.endswith(("kk", "dd", "tt")): word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] elif (suffix in ("se", "s") and word[-len(suffix)-1] not in self.__vowels and word[-len(suffix)-1] != "j"): word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] break # STEP 2 if r1.endswith("e") and word[-2] not in self.__vowels: step2_success = True word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] if word.endswith(("kk", "dd", "tt")): word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] # STEP 3a if r2.endswith("heid") and word[-5] != "c": word = word[:-4] r1 = r1[:-4] r2 = r2[:-4] if (r1.endswith("en") and word[-3] not in self.__vowels and word[-5:-2] != "gem"): word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] if word.endswith(("kk", "dd", "tt")): word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] # STEP 3b: Derivational suffixes for suffix in self.__step3b_suffixes: if r2.endswith(suffix): if suffix in ("end", "ing"): word = word[:-3] r2 = r2[:-3] if r2.endswith("ig") and word[-3] != "e": word = word[:-2] else: if word.endswith(("kk", "dd", "tt")): word = word[:-1] elif suffix == "ig" and word[-3] != "e": word = word[:-2] elif suffix == "lijk": word = word[:-4] r1 = r1[:-4] if r1.endswith("e") and word[-2] not in self.__vowels: word = word[:-1] if word.endswith(("kk", "dd", "tt")): word = word[:-1] elif suffix == "baar": word = word[:-4] elif suffix == "bar" and step2_success: word = word[:-3] break # STEP 4: Undouble vowel if len(word) >= 4: if word[-1] not in self.__vowels and word[-1] != "I": if word[-3:-1] in ("aa", "ee", "oo", "uu"): if word[-4] not in self.__vowels: word = "".join((word[:-3], word[-3], word[-1])) # All occurrences of 'I' and 'Y' are put back into lower case. word = word.replace("I", "i").replace("Y", "y") return word class EnglishStemmer(_StandardStemmer): """ The English Snowball stemmer. :cvar __vowels: The English vowels. :type __vowels: unicode :cvar __double_consonants: The English double consonants. :type __double_consonants: tuple :cvar __li_ending: Letters that may directly appear before a word final 'li'. :type __li_ending: unicode :cvar __step0_suffixes: Suffixes to be deleted in step 0 of the algorithm. :type __step0_suffixes: tuple :cvar __step1a_suffixes: Suffixes to be deleted in step 1a of the algorithm. :type __step1a_suffixes: tuple :cvar __step1b_suffixes: Suffixes to be deleted in step 1b of the algorithm. :type __step1b_suffixes: tuple :cvar __step2_suffixes: Suffixes to be deleted in step 2 of the algorithm. :type __step2_suffixes: tuple :cvar __step3_suffixes: Suffixes to be deleted in step 3 of the algorithm. :type __step3_suffixes: tuple :cvar __step4_suffixes: Suffixes to be deleted in step 4 of the algorithm. :type __step4_suffixes: tuple :cvar __step5_suffixes: Suffixes to be deleted in step 5 of the algorithm. :type __step5_suffixes: tuple :cvar __special_words: A dictionary containing words which have to be stemmed specially. :type __special_words: dict :note: A detailed description of the English stemming algorithm can be found under http://snowball.tartarus.org/algorithms/english/stemmer.html """ __vowels = "aeiouy" __double_consonants = ("bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt") __li_ending = "cdeghkmnrt" __step0_suffixes = ("'s'", "'s", "'") __step1a_suffixes = ("sses", "ied", "ies", "us", "ss", "s") __step1b_suffixes = ("eedly", "ingly", "edly", "eed", "ing", "ed") __step2_suffixes = ('ization', 'ational', 'fulness', 'ousness', 'iveness', 'tional', 'biliti', 'lessli', 'entli', 'ation', 'alism', 'aliti', 'ousli', 'iviti', 'fulli', 'enci', 'anci', 'abli', 'izer', 'ator', 'alli', 'bli', 'ogi', 'li') __step3_suffixes = ('ational', 'tional', 'alize', 'icate', 'iciti', 'ative', 'ical', 'ness', 'ful') __step4_suffixes = ('ement', 'ance', 'ence', 'able', 'ible', 'ment', 'ant', 'ent', 'ism', 'ate', 'iti', 'ous', 'ive', 'ize', 'ion', 'al', 'er', 'ic') __step5_suffixes = ("e", "l") __special_words = {"skis" : "ski", "skies" : "sky", "dying" : "die", "lying" : "lie", "tying" : "tie", "idly" : "idl", "gently" : "gentl", "ugly" : "ugli", "early" : "earli", "only" : "onli", "singly" : "singl", "sky" : "sky", "news" : "news", "howe" : "howe", "atlas" : "atlas", "cosmos" : "cosmos", "bias" : "bias", "andes" : "andes", "inning" : "inning", "innings" : "inning", "outing" : "outing", "outings" : "outing", "canning" : "canning", "cannings" : "canning", "herring" : "herring", "herrings" : "herring", "earring" : "earring", "earrings" : "earring", "proceed" : "proceed", "proceeds" : "proceed", "proceeded" : "proceed", "proceeding" : "proceed", "exceed" : "exceed", "exceeds" : "exceed", "exceeded" : "exceed", "exceeding" : "exceed", "succeed" : "succeed", "succeeds" : "succeed", "succeeded" : "succeed", "succeeding" : "succeed"} def stem(self, word): """ Stem an English word and return the stemmed form. :param word: The word that is stemmed. :type word: str or unicode :return: The stemmed form. :rtype: unicode """ word = word.lower() if word in self.stopwords or len(word) <= 2: return word elif word in self.__special_words: return self.__special_words[word] # Map the different apostrophe characters to a single consistent one word = (word.replace("\u2019", "\x27") .replace("\u2018", "\x27") .replace("\u201B", "\x27")) if word.startswith("\x27"): word = word[1:] if word.startswith("y"): word = "".join(("Y", word[1:])) for i in range(1, len(word)): if word[i-1] in self.__vowels and word[i] == "y": word = "".join((word[:i], "Y", word[i+1:])) step1a_vowel_found = False step1b_vowel_found = False r1 = "" r2 = "" if word.startswith(("gener", "commun", "arsen")): if word.startswith(("gener", "arsen")): r1 = word[5:] else: r1 = word[6:] for i in range(1, len(r1)): if r1[i] not in self.__vowels and r1[i-1] in self.__vowels: r2 = r1[i+1:] break else: r1, r2 = self._r1r2_standard(word, self.__vowels) # STEP 0 for suffix in self.__step0_suffixes: if word.endswith(suffix): word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] break # STEP 1a for suffix in self.__step1a_suffixes: if word.endswith(suffix): if suffix == "sses": word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] elif suffix in ("ied", "ies"): if len(word[:-len(suffix)]) > 1: word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] else: word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] elif suffix == "s": for letter in word[:-2]: if letter in self.__vowels: step1a_vowel_found = True break if step1a_vowel_found: word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] break # STEP 1b for suffix in self.__step1b_suffixes: if word.endswith(suffix): if suffix in ("eed", "eedly"): if r1.endswith(suffix): word = suffix_replace(word, suffix, "ee") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ee") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ee") else: r2 = "" else: for letter in word[:-len(suffix)]: if letter in self.__vowels: step1b_vowel_found = True break if step1b_vowel_found: word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] if word.endswith(("at", "bl", "iz")): word = "".join((word, "e")) r1 = "".join((r1, "e")) if len(word) > 5 or len(r1) >=3: r2 = "".join((r2, "e")) elif word.endswith(self.__double_consonants): word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] elif ((r1 == "" and len(word) >= 3 and word[-1] not in self.__vowels and word[-1] not in "wxY" and word[-2] in self.__vowels and word[-3] not in self.__vowels) or (r1 == "" and len(word) == 2 and word[0] in self.__vowels and word[1] not in self.__vowels)): word = "".join((word, "e")) if len(r1) > 0: r1 = "".join((r1, "e")) if len(r2) > 0: r2 = "".join((r2, "e")) break # STEP 1c if len(word) > 2 and word[-1] in "yY" and word[-2] not in self.__vowels: word = "".join((word[:-1], "i")) if len(r1) >= 1: r1 = "".join((r1[:-1], "i")) else: r1 = "" if len(r2) >= 1: r2 = "".join((r2[:-1], "i")) else: r2 = "" # STEP 2 for suffix in self.__step2_suffixes: if word.endswith(suffix): if r1.endswith(suffix): if suffix == "tional": word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] elif suffix in ("enci", "anci", "abli"): word = "".join((word[:-1], "e")) if len(r1) >= 1: r1 = "".join((r1[:-1], "e")) else: r1 = "" if len(r2) >= 1: r2 = "".join((r2[:-1], "e")) else: r2 = "" elif suffix == "entli": word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] elif suffix in ("izer", "ization"): word = suffix_replace(word, suffix, "ize") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ize") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ize") else: r2 = "" elif suffix in ("ational", "ation", "ator"): word = suffix_replace(word, suffix, "ate") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ate") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ate") else: r2 = "e" elif suffix in ("alism", "aliti", "alli"): word = suffix_replace(word, suffix, "al") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "al") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "al") else: r2 = "" elif suffix == "fulness": word = word[:-4] r1 = r1[:-4] r2 = r2[:-4] elif suffix in ("ousli", "ousness"): word = suffix_replace(word, suffix, "ous") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ous") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ous") else: r2 = "" elif suffix in ("iveness", "iviti"): word = suffix_replace(word, suffix, "ive") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ive") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ive") else