001package squidpony;
002
003import squidpony.annotation.Beta;
004import squidpony.squidmath.ProbabilityTable;
005import squidpony.squidmath.RNG;
006
007import java.util.ArrayList;
008import java.util.HashMap;
009import java.util.Set;
010import java.util.TreeMap;
011
012/**
013 * Based on work by Nolithius available at the following two sites
014 * https://github.com/Nolithius/weighted-letter-namegen
015 * http://code.google.com/p/weighted-letter-namegen/
016 *
017 * @author Eben Howard - http://squidpony.com - howard@squidpony.com
018 */
019@Beta
020public class WeightedLetterNamegen {
021//<editor-fold defaultstate="collapsed" desc="Viking Style static name list">
022
023    public static final String[] VIKING_STYLE_NAMES = new String[]{
024        "Andor",
025        "Baatar",
026        "Beowulf",
027        "Drogo",
028        "Freya",
029        "Grog",
030        "Gruumsh",
031        "Grunt",
032        "Hodor",
033        "Hrothgar",
034        "Hrun",
035        "Korg",
036        "Lothar",
037        "Odin",
038        "Theodrin",
039        "Thor",
040        "Yngvar",
041        "Xandor"
042    };
043//</editor-fold>
044//<editor-fold defaultstate="collapsed" desc="Star Wars Style static name list">
045    public static final String[] STAR_WARS_STYLE_NAMES = new String[]{
046        "Lutoif Vap",
047        "Nasoi Seert",
048        "Jitpai",
049        "Sose",
050        "Vainau",
051        "Jairkau",
052        "Tirka Kist",
053        "Boush",
054        "Wofe",
055        "Voxin Voges",
056        "Koux Boiti",
057        "Loim",
058        "Gaungu",
059        "Mut Tep",
060        "Foimo Saispi",
061        "Toneeg Vaiba",
062        "Nix Nast",
063        "Gup Dangisp",
064        "Distark Toonausp",
065        "Tex Brinki",
066        "Kat Tosha",
067        "Tauna Foip",
068        "Frip Cex",
069        "Fexa Lun",
070        "Tafa",
071        "Zeesheerk",
072        "Cremoim Kixoop",
073        "Tago",
074        "Kesha Diplo"
075    };
076//</editor-fold>
077//<editor-fold defaultstate="collapsed" desc="USA male names static name list">
078    public static final String[] COMMON_USA_MALE_NAMES = new String[]{
079        "James",
080        "John",
081        "Robert",
082        "Michael",
083        "William",
084        "David",
085        "Richard",
086        "Charles",
087        "Joseph",
088        "Tomas",
089        "Christopher",
090        "Daniel",
091        "Paul",
092        "Mark",
093        "Donald",
094        "George",
095        "Kenneth",
096        "Steven",
097        "Edward",
098        "Brian",
099        "Ronald",
100        "Anthony",
101        "Kevin",
102        "Jason",
103        "Matthew",
104        "Gary",
105        "Timothy",
106        "Jose",
107        "Larry",
108        "Jeffrey",
109        "Frank",
110        "Scott",
111        "Eric",
112        "Stephen",
113        "Andrew",
114        "Raymond",
115        "Gregory",
116        "Joshua",
117        "Jerry",
118        "Dennis",
119        "Walter",
120        "Patrick",
121        "Peter",
122        "Harold",
123        "Douglas",
124        "Henry",
125        "Carl",
126        "Arthur",
127        "Ryan",
128        "Roger"
129    };
130//</editor-fold>
131//<editor-fold defaultstate="collapsed" desc="USA female names static name list">
132    public static final String[] COMMON_USA_FEMALE_NAMES = new String[]{
133        "Mary",
134        "Patricia",
135        "Linda",
136        "Barbara",
137        "Elizabeth",
138        "Jennifer",
139        "Maria",
140        "Susan",
141        "Margaret",
142        "Dorothy",
143        "Lisa",
144        "Nancy",
145        "Karen",
146        "Betty",
147        "Helen",
148        "Sandra",
149        "Donna",
150        "Carol",
151        "Ruth",
152        "Sharon",
153        "Michelle",
154        "Laura",
155        "Sarah",
156        "Kimberly",
157        "Deborah",
158        "Jessica",
159        "Shirley",
160        "Cynthia",
161        "Angela",
162        "Melissa",
163        "Brenda",
164        "Amy",
165        "Anna",
166        "Crystal",
167        "Virginia",
168        "Kathleen",
169        "Pamela",
170        "Martha",
171        "Becky",
172        "Amanda",
173        "Stephanie",
174        "Carolyn",
175        "Christine",
176        "Marie",
177        "Janet",
178        "Catherine",
179        "Frances",
180        "Ann",
181        "Joyce",
182        "Diane"
183    };
184//</editor-fold>
185//<editor-fold defaultstate="collapsed" desc="USA last names static name list">
186    public static final String[] COMMON_USA_LAST_NAMES = new String[]{
187        "Smith",
188        "Johnson",
189        "Williams",
190        "Brown",
191        "Jones",
192        "Miller",
193        "Davis",
194        "Wilson",
195        "Anderson",
196        "Taylor",
197        "Thomas",
198        "Moore",
199        "Martin",
200        "Jackson",
201        "Thompson",
202        "White",
203        "Clark",
204        "Lewis",
205        "Robinson",
206        "Walker"
207};
208//</editor-fold>
209
210//<editor-fold defaultstate="collapsed" desc="Lovecraft Mythos style static name list">
211    public static final String[] LOVECRAFT_MYTHOS_NAMES = new String[]{
212        "Koth",
213        "Ghlatelilt",
214        "Siarlut",
215        "Nyogongogg",
216        "Nyialan",
217        "Nyithiark",
218        "Lyun",
219        "Kethoshigr",
220        "Shobik",
221        "Tekogr",
222        "Hru-yn",
223        "Lya-ehibos",
224        "Hruna-oma-ult",
225        "Shabo'en",
226        "Shrashangal",
227        "Shukhaniark",
228        "Thaghum",
229        "Shrilang",
230        "Lukhungu'ith",
231        "Nyun",
232        "Nyia-ongin",
233        "Shogia-usun",
234        "Lyu-yl",
235        "Liathiagragr",
236        "Lyathagg",
237        "Hri'osurkut",
238        "Shothegh",
239        "No-orleshigh",
240        "Zvriangekh",
241        "Nyesashiv",
242        "Lyarkio",
243        "Le'akh",
244        "Liashi-en",
245        "Shurkano'um",
246        "Hrakhanoth",
247        "Ghlotsuban",
248        "Cthitughias",
249        "Ftanugh"
250    };
251//</editor-fold>
252
253    private static final Character[] vowels = new Character[]{'a', 'e', 'i', 'o'};//not using y because it looks strange as a vowel in names
254    private static final int LAST_LETTER_CANDIDATES_MAX = 52;
255
256    private RNG rng;
257    private String[] names;
258    private int consonantLimit;
259    private ArrayList<Integer> sizes;
260
261    private TreeMap<Character, HashMap<Character, ProbabilityTable<Character>>> letters;
262    private ArrayList<Character> firstLetterSamples;
263    private ArrayList<Character> lastLetterSamples;
264    private DamerauLevenshteinAlgorithm dla = new DamerauLevenshteinAlgorithm(1, 1, 1, 1);
265
266    /**
267     * Creates the generator by seeding the provided list of names.
268     *
269     * @param names an array of Strings that are typical names to be emulated
270     */
271    public WeightedLetterNamegen(String[] names) {
272        this(names, 2);
273    }
274
275    /**
276     * Creates the generator by seeding the provided list of names.
277     *
278     * @param names an array of Strings that are typical names to be emulated
279     * @param consonantLimit the maximum allowed consonants in a row
280     */
281    public WeightedLetterNamegen(String[] names, int consonantLimit) {
282        this(names, consonantLimit, new RNG());
283    }
284
285    /**
286     * Creates the generator by seeding the provided list of names.
287     *
288     * @param names an array of Strings that are typical names to be emulated
289     * @param consonantLimit the maximum allowed consonants in a row
290     * @param rng the source of randomness to be used
291     */
292    public WeightedLetterNamegen(String[] names, int consonantLimit, RNG rng) {
293        this.names = names;
294        this.consonantLimit = consonantLimit;
295        this.rng = rng;
296        init();
297    }
298
299    /**
300     * Initialization, statistically measures letter likelihood.
301     */
302    private void init() {
303        sizes = new ArrayList<>();
304        letters = new TreeMap<>();
305        firstLetterSamples = new ArrayList<>();
306        lastLetterSamples = new ArrayList<>();
307
308        for (int i = 0; i < names.length - 1; i++) {
309            String name = names[i];
310            if (name == null || name.length() < 1) {
311                continue;
312            }
313
314            // (1) Insert size
315            sizes.add(name.length());
316
317            // (2) Grab first letter
318            firstLetterSamples.add(name.charAt(0));
319
320            // (3) Grab last letter
321            lastLetterSamples.add(name.charAt(name.length() - 1));
322
323            // (4) Process all letters
324            for (int n = 0; n < name.length() - 1; n++) {
325                char letter = name.charAt(n);
326                char nextLetter = name.charAt(n + 1);
327
328                // Create letter if it doesn't exist
329                HashMap<Character, ProbabilityTable<Character>> wl = letters.get(letter);
330                if (wl == null) {
331                    wl = new HashMap<>();
332                    letters.put(letter, wl);
333                }
334                ProbabilityTable<Character> wlg = wl.get(letter);
335                if (wlg == null) {
336                    wlg = new ProbabilityTable<>();
337                    wl.put(letter, wlg);
338                }
339                wlg.add(nextLetter, 1);
340
341                // If letter was uppercase (beginning of name), also add a lowercase entry
342                if (Character.isUpperCase(letter)) {
343                    letter = Character.toLowerCase(letter);
344
345                    wlg = wl.get(letter);
346                    if (wlg == null) {
347                        wlg = new ProbabilityTable<>();
348                        wl.put(letter, wlg);
349                    }
350                    wlg.add(nextLetter, 1);
351                }
352            }
353        }
354    }
355
356    public String[] generate() {
357        return generate(1);
358    }
359
360    public String[] generate(int amountToGenerate) {
361        ArrayList<String> result = new ArrayList<>();
362
363        int nameCount = 0;
364        while (nameCount < amountToGenerate) {
365            String name = "";
366
367            // Pick size
368            int size = rng.getRandomElement(sizes);
369
370            // Pick first letter
371            char firstLetter = rng.getRandomElement(firstLetterSamples);
372
373            name += firstLetter;
374
375            for (int i = 1; i < size - 2; i++) {
376                name += getRandomNextLetter(name.charAt(name.length() - 1));
377            }
378
379            // Attempt to find a last letter
380            for (int lastLetterFits = 0; lastLetterFits < LAST_LETTER_CANDIDATES_MAX; lastLetterFits++) {
381                char lastLetter = rng.getRandomElement(lastLetterSamples);
382                char intermediateLetterCandidate = getIntermediateLetter(name.charAt(name.length() - 1), lastLetter);
383
384                // Only attach last letter if the candidate is valid (if no candidate, the antepenultimate letter always occurs at the end)
385                if (Character.isLetter(intermediateLetterCandidate)) {
386                    name += intermediateLetterCandidate;
387                    name += lastLetter;
388                    break;
389                }
390            }
391
392            String nameString = name;
393
394            // Check that the word has no triple letter sequences, and that the Levenshtein distance is kosher
395            if (validateGrouping(name) && checkLevenshtein(nameString)) {
396                result.add(nameString);
397
398                // Only increase the counter if we've successfully added a name
399                nameCount++;
400            }
401        }
402
403        return result.toArray(new String[result.size()]);
404    }
405
406    /**
407     * Searches for the best fit letter between the letter before and the letter
408     * after (non-random). Used to determine penultimate letters in names.
409     *
410     * @param   letterBefore    The letter before the desired letter.
411     * @param   letterAfter     The letter after the desired letter.
412     * @return  The best fit letter between the provided letters.
413     */
414    private char getIntermediateLetter(char letterBefore, char letterAfter) {
415        if (Character.isLetter(letterBefore) && Character.isLetter(letterAfter)) {
416            // First grab all letters that come after the 'letterBefore'
417            HashMap<Character, ProbabilityTable<Character>> wl = letters.get(letterBefore);
418            if (wl == null) {
419                return getRandomNextLetter(letterBefore);
420            }
421            Set<Character> letterCandidates = wl.get(letterBefore).items();
422
423            char bestFitLetter = '\'';
424            int bestFitScore = 0;
425
426            // Step through candidates, and return best scoring letter
427            for (char letter : letterCandidates) {
428                wl = letters.get(letter);
429                if (wl == null) {
430                    continue;
431                }
432                ProbabilityTable<Character> weightedLetterGroup = wl.get(letterBefore);
433                if (weightedLetterGroup != null) {
434                    Integer letterCounter = weightedLetterGroup.weight(letterAfter);
435                    if (letterCounter > bestFitScore) {
436                        bestFitLetter = letter;
437                        bestFitScore = letterCounter;
438                    }
439                }
440            }
441
442            return bestFitLetter;
443        } else {
444            return '-';
445        }
446    }
447
448    /**
449     * Checks that no three letters happen in succession.
450     *
451     * @param   name    The name array (easier to iterate)
452     * @return  True if no triple letter sequence is found.
453     */
454    private boolean validateGrouping(String name) {
455        for (int i = 2; i < name.length(); i++) {
456            if (name.charAt(i) == name.charAt(i - 1) && name.charAt(i) == name.charAt(i - 2)) {
457                return false;
458            }
459        }
460        int consonants = 0;
461        for (int i = 0; i < name.length(); i++) {
462            if (isVowel(name.charAt(i))) {
463                consonants = 0;
464            } else {
465                consonants++;
466            }
467
468            if (consonants > consonantLimit) {
469                return false;
470            }
471        }
472        return true;
473    }
474
475    private boolean isVowel(char c) {
476        for (char v : vowels) {
477            if (c == v) {
478                return true;
479            }
480        }
481        return false;
482    }
483
484    /**
485     * Checks that the Damerau-Levenshtein distance of this name is within a
486     * given bias from a name on the master list.
487     *
488     * @param   name    The name string.
489     * @return  True if a name is found that is within the bias.
490     */
491    private boolean checkLevenshtein(String name) {
492        int levenshteinBias = name.length() / 2;
493
494        for (String name1 : names) {
495            int levenshteinDistance = dla.execute(name, name1);
496            if (levenshteinDistance <= levenshteinBias) {
497                return true;
498            }
499        }
500
501        return false;
502    }
503
504    private char getRandomNextLetter(char letter) {
505        if (letters.containsKey(letter)) {
506            return letters.get(letter).get(letter).random();
507        } else {
508            return rng.getRandomElement(vowels);
509        }
510    }
511}