001package squidpony; 002 003import squidpony.annotation.Beta; 004import squidpony.squidmath.ProbabilityTable; 005import squidpony.squidmath.RNG; 006 007import java.util.ArrayList; 008import java.util.HashMap; 009import java.util.Set; 010import java.util.TreeMap; 011 012/** 013 * Based on work by Nolithius available at the following two sites 014 * https://github.com/Nolithius/weighted-letter-namegen 015 * http://code.google.com/p/weighted-letter-namegen/ 016 * 017 * @author Eben Howard - http://squidpony.com - howard@squidpony.com 018 */ 019@Beta 020public class WeightedLetterNamegen { 021//<editor-fold defaultstate="collapsed" desc="Viking Style static name list"> 022 023 public static final String[] VIKING_STYLE_NAMES = new String[]{ 024 "Andor", 025 "Baatar", 026 "Beowulf", 027 "Drogo", 028 "Freya", 029 "Grog", 030 "Gruumsh", 031 "Grunt", 032 "Hodor", 033 "Hrothgar", 034 "Hrun", 035 "Korg", 036 "Lothar", 037 "Odin", 038 "Theodrin", 039 "Thor", 040 "Yngvar", 041 "Xandor" 042 }; 043//</editor-fold> 044//<editor-fold defaultstate="collapsed" desc="Star Wars Style static name list"> 045 public static final String[] STAR_WARS_STYLE_NAMES = new String[]{ 046 "Lutoif Vap", 047 "Nasoi Seert", 048 "Jitpai", 049 "Sose", 050 "Vainau", 051 "Jairkau", 052 "Tirka Kist", 053 "Boush", 054 "Wofe", 055 "Voxin Voges", 056 "Koux Boiti", 057 "Loim", 058 "Gaungu", 059 "Mut Tep", 060 "Foimo Saispi", 061 "Toneeg Vaiba", 062 "Nix Nast", 063 "Gup Dangisp", 064 "Distark Toonausp", 065 "Tex Brinki", 066 "Kat Tosha", 067 "Tauna Foip", 068 "Frip Cex", 069 "Fexa Lun", 070 "Tafa", 071 "Zeesheerk", 072 "Cremoim Kixoop", 073 "Tago", 074 "Kesha Diplo" 075 }; 076//</editor-fold> 077//<editor-fold defaultstate="collapsed" desc="USA male names static name list"> 078 public static final String[] COMMON_USA_MALE_NAMES = new String[]{ 079 "James", 080 "John", 081 "Robert", 082 "Michael", 083 "William", 084 "David", 085 "Richard", 086 "Charles", 087 "Joseph", 088 "Tomas", 089 "Christopher", 090 "Daniel", 091 "Paul", 092 "Mark", 093 "Donald", 094 "George", 095 "Kenneth", 096 "Steven", 097 "Edward", 098 "Brian", 099 "Ronald", 100 "Anthony", 101 "Kevin", 102 "Jason", 103 "Matthew", 104 "Gary", 105 "Timothy", 106 "Jose", 107 "Larry", 108 "Jeffrey", 109 "Frank", 110 "Scott", 111 "Eric", 112 "Stephen", 113 "Andrew", 114 "Raymond", 115 "Gregory", 116 "Joshua", 117 "Jerry", 118 "Dennis", 119 "Walter", 120 "Patrick", 121 "Peter", 122 "Harold", 123 "Douglas", 124 "Henry", 125 "Carl", 126 "Arthur", 127 "Ryan", 128 "Roger" 129 }; 130//</editor-fold> 131//<editor-fold defaultstate="collapsed" desc="USA female names static name list"> 132 public static final String[] COMMON_USA_FEMALE_NAMES = new String[]{ 133 "Mary", 134 "Patricia", 135 "Linda", 136 "Barbara", 137 "Elizabeth", 138 "Jennifer", 139 "Maria", 140 "Susan", 141 "Margaret", 142 "Dorothy", 143 "Lisa", 144 "Nancy", 145 "Karen", 146 "Betty", 147 "Helen", 148 "Sandra", 149 "Donna", 150 "Carol", 151 "Ruth", 152 "Sharon", 153 "Michelle", 154 "Laura", 155 "Sarah", 156 "Kimberly", 157 "Deborah", 158 "Jessica", 159 "Shirley", 160 "Cynthia", 161 "Angela", 162 "Melissa", 163 "Brenda", 164 "Amy", 165 "Anna", 166 "Crystal", 167 "Virginia", 168 "Kathleen", 169 "Pamela", 170 "Martha", 171 "Becky", 172 "Amanda", 173 "Stephanie", 174 "Carolyn", 175 "Christine", 176 "Marie", 177 "Janet", 178 "Catherine", 179 "Frances", 180 "Ann", 181 "Joyce", 182 "Diane" 183 }; 184//</editor-fold> 185//<editor-fold defaultstate="collapsed" desc="USA last names static name list"> 186 public static final String[] COMMON_USA_LAST_NAMES = new String[]{ 187 "Smith", 188 "Johnson", 189 "Williams", 190 "Brown", 191 "Jones", 192 "Miller", 193 "Davis", 194 "Wilson", 195 "Anderson", 196 "Taylor", 197 "Thomas", 198 "Moore", 199 "Martin", 200 "Jackson", 201 "Thompson", 202 "White", 203 "Clark", 204 "Lewis", 205 "Robinson", 206 "Walker" 207}; 208//</editor-fold> 209 210//<editor-fold defaultstate="collapsed" desc="Lovecraft Mythos style static name list"> 211 public static final String[] LOVECRAFT_MYTHOS_NAMES = new String[]{ 212 "Koth", 213 "Ghlatelilt", 214 "Siarlut", 215 "Nyogongogg", 216 "Nyialan", 217 "Nyithiark", 218 "Lyun", 219 "Kethoshigr", 220 "Shobik", 221 "Tekogr", 222 "Hru-yn", 223 "Lya-ehibos", 224 "Hruna-oma-ult", 225 "Shabo'en", 226 "Shrashangal", 227 "Shukhaniark", 228 "Thaghum", 229 "Shrilang", 230 "Lukhungu'ith", 231 "Nyun", 232 "Nyia-ongin", 233 "Shogia-usun", 234 "Lyu-yl", 235 "Liathiagragr", 236 "Lyathagg", 237 "Hri'osurkut", 238 "Shothegh", 239 "No-orleshigh", 240 "Zvriangekh", 241 "Nyesashiv", 242 "Lyarkio", 243 "Le'akh", 244 "Liashi-en", 245 "Shurkano'um", 246 "Hrakhanoth", 247 "Ghlotsuban", 248 "Cthitughias", 249 "Ftanugh" 250 }; 251//</editor-fold> 252 253 private static final Character[] vowels = new Character[]{'a', 'e', 'i', 'o'};//not using y because it looks strange as a vowel in names 254 private static final int LAST_LETTER_CANDIDATES_MAX = 52; 255 256 private RNG rng; 257 private String[] names; 258 private int consonantLimit; 259 private ArrayList<Integer> sizes; 260 261 private TreeMap<Character, HashMap<Character, ProbabilityTable<Character>>> letters; 262 private ArrayList<Character> firstLetterSamples; 263 private ArrayList<Character> lastLetterSamples; 264 private DamerauLevenshteinAlgorithm dla = new DamerauLevenshteinAlgorithm(1, 1, 1, 1); 265 266 /** 267 * Creates the generator by seeding the provided list of names. 268 * 269 * @param names an array of Strings that are typical names to be emulated 270 */ 271 public WeightedLetterNamegen(String[] names) { 272 this(names, 2); 273 } 274 275 /** 276 * Creates the generator by seeding the provided list of names. 277 * 278 * @param names an array of Strings that are typical names to be emulated 279 * @param consonantLimit the maximum allowed consonants in a row 280 */ 281 public WeightedLetterNamegen(String[] names, int consonantLimit) { 282 this(names, consonantLimit, new RNG()); 283 } 284 285 /** 286 * Creates the generator by seeding the provided list of names. 287 * 288 * @param names an array of Strings that are typical names to be emulated 289 * @param consonantLimit the maximum allowed consonants in a row 290 * @param rng the source of randomness to be used 291 */ 292 public WeightedLetterNamegen(String[] names, int consonantLimit, RNG rng) { 293 this.names = names; 294 this.consonantLimit = consonantLimit; 295 this.rng = rng; 296 init(); 297 } 298 299 /** 300 * Initialization, statistically measures letter likelihood. 301 */ 302 private void init() { 303 sizes = new ArrayList<>(); 304 letters = new TreeMap<>(); 305 firstLetterSamples = new ArrayList<>(); 306 lastLetterSamples = new ArrayList<>(); 307 308 for (int i = 0; i < names.length - 1; i++) { 309 String name = names[i]; 310 if (name == null || name.length() < 1) { 311 continue; 312 } 313 314 // (1) Insert size 315 sizes.add(name.length()); 316 317 // (2) Grab first letter 318 firstLetterSamples.add(name.charAt(0)); 319 320 // (3) Grab last letter 321 lastLetterSamples.add(name.charAt(name.length() - 1)); 322 323 // (4) Process all letters 324 for (int n = 0; n < name.length() - 1; n++) { 325 char letter = name.charAt(n); 326 char nextLetter = name.charAt(n + 1); 327 328 // Create letter if it doesn't exist 329 HashMap<Character, ProbabilityTable<Character>> wl = letters.get(letter); 330 if (wl == null) { 331 wl = new HashMap<>(); 332 letters.put(letter, wl); 333 } 334 ProbabilityTable<Character> wlg = wl.get(letter); 335 if (wlg == null) { 336 wlg = new ProbabilityTable<>(); 337 wl.put(letter, wlg); 338 } 339 wlg.add(nextLetter, 1); 340 341 // If letter was uppercase (beginning of name), also add a lowercase entry 342 if (Character.isUpperCase(letter)) { 343 letter = Character.toLowerCase(letter); 344 345 wlg = wl.get(letter); 346 if (wlg == null) { 347 wlg = new ProbabilityTable<>(); 348 wl.put(letter, wlg); 349 } 350 wlg.add(nextLetter, 1); 351 } 352 } 353 } 354 } 355 356 public String[] generate() { 357 return generate(1); 358 } 359 360 public String[] generate(int amountToGenerate) { 361 ArrayList<String> result = new ArrayList<>(); 362 363 int nameCount = 0; 364 while (nameCount < amountToGenerate) { 365 String name = ""; 366 367 // Pick size 368 int size = rng.getRandomElement(sizes); 369 370 // Pick first letter 371 char firstLetter = rng.getRandomElement(firstLetterSamples); 372 373 name += firstLetter; 374 375 for (int i = 1; i < size - 2; i++) { 376 name += getRandomNextLetter(name.charAt(name.length() - 1)); 377 } 378 379 // Attempt to find a last letter 380 for (int lastLetterFits = 0; lastLetterFits < LAST_LETTER_CANDIDATES_MAX; lastLetterFits++) { 381 char lastLetter = rng.getRandomElement(lastLetterSamples); 382 char intermediateLetterCandidate = getIntermediateLetter(name.charAt(name.length() - 1), lastLetter); 383 384 // Only attach last letter if the candidate is valid (if no candidate, the antepenultimate letter always occurs at the end) 385 if (Character.isLetter(intermediateLetterCandidate)) { 386 name += intermediateLetterCandidate; 387 name += lastLetter; 388 break; 389 } 390 } 391 392 String nameString = name; 393 394 // Check that the word has no triple letter sequences, and that the Levenshtein distance is kosher 395 if (validateGrouping(name) && checkLevenshtein(nameString)) { 396 result.add(nameString); 397 398 // Only increase the counter if we've successfully added a name 399 nameCount++; 400 } 401 } 402 403 return result.toArray(new String[result.size()]); 404 } 405 406 /** 407 * Searches for the best fit letter between the letter before and the letter 408 * after (non-random). Used to determine penultimate letters in names. 409 * 410 * @param letterBefore The letter before the desired letter. 411 * @param letterAfter The letter after the desired letter. 412 * @return The best fit letter between the provided letters. 413 */ 414 private char getIntermediateLetter(char letterBefore, char letterAfter) { 415 if (Character.isLetter(letterBefore) && Character.isLetter(letterAfter)) { 416 // First grab all letters that come after the 'letterBefore' 417 HashMap<Character, ProbabilityTable<Character>> wl = letters.get(letterBefore); 418 if (wl == null) { 419 return getRandomNextLetter(letterBefore); 420 } 421 Set<Character> letterCandidates = wl.get(letterBefore).items(); 422 423 char bestFitLetter = '\''; 424 int bestFitScore = 0; 425 426 // Step through candidates, and return best scoring letter 427 for (char letter : letterCandidates) { 428 wl = letters.get(letter); 429 if (wl == null) { 430 continue; 431 } 432 ProbabilityTable<Character> weightedLetterGroup = wl.get(letterBefore); 433 if (weightedLetterGroup != null) { 434 Integer letterCounter = weightedLetterGroup.weight(letterAfter); 435 if (letterCounter > bestFitScore) { 436 bestFitLetter = letter; 437 bestFitScore = letterCounter; 438 } 439 } 440 } 441 442 return bestFitLetter; 443 } else { 444 return '-'; 445 } 446 } 447 448 /** 449 * Checks that no three letters happen in succession. 450 * 451 * @param name The name array (easier to iterate) 452 * @return True if no triple letter sequence is found. 453 */ 454 private boolean validateGrouping(String name) { 455 for (int i = 2; i < name.length(); i++) { 456 if (name.charAt(i) == name.charAt(i - 1) && name.charAt(i) == name.charAt(i - 2)) { 457 return false; 458 } 459 } 460 int consonants = 0; 461 for (int i = 0; i < name.length(); i++) { 462 if (isVowel(name.charAt(i))) { 463 consonants = 0; 464 } else { 465 consonants++; 466 } 467 468 if (consonants > consonantLimit) { 469 return false; 470 } 471 } 472 return true; 473 } 474 475 private boolean isVowel(char c) { 476 for (char v : vowels) { 477 if (c == v) { 478 return true; 479 } 480 } 481 return false; 482 } 483 484 /** 485 * Checks that the Damerau-Levenshtein distance of this name is within a 486 * given bias from a name on the master list. 487 * 488 * @param name The name string. 489 * @return True if a name is found that is within the bias. 490 */ 491 private boolean checkLevenshtein(String name) { 492 int levenshteinBias = name.length() / 2; 493 494 for (String name1 : names) { 495 int levenshteinDistance = dla.execute(name, name1); 496 if (levenshteinDistance <= levenshteinBias) { 497 return true; 498 } 499 } 500 501 return false; 502 } 503 504 private char getRandomNextLetter(char letter) { 505 if (letters.containsKey(letter)) { 506 return letters.get(letter).get(letter).random(); 507 } else { 508 return rng.getRandomElement(vowels); 509 } 510 } 511}