package org.languagetool.language.identifier;

import com.optimaize.langdetect.text.TextFilter;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.DetectedLanguage;
import org.languagetool.Language;
import org.languagetool.language.identifier.detector.CommonWordsDetector;
import org.languagetool.language.identifier.detector.UnicodeBasedDetector;

/* loaded from: input_file:org/languagetool/language/identifier/LanguageIdentifier.class */
public abstract class LanguageIdentifier {
    protected static final float SCORE_THRESHOLD = 0.85f;
    protected static final int CONSIDER_ONLY_PREFERRED_THRESHOLD = 50;
    protected static final CommonWordsDetector COMMON_WORDS_LANG_IDENTIFIER;
    protected int maxLength;
    private static final Pattern URL_REGEX = Pattern.compile("https?://[-_.?&~;+=/#%0-9A-Za-z]+");
    private static final Pattern MAIL_REGEX = Pattern.compile("[-_.0-9A-Za-z]+@[-_0-9A-Za-z]+[-_.0-9A-Za-z]+");
    private static final Pattern SIGNATURE = Pattern.compile("\n--[  ]\n.*", 32);
    private static final Pattern MENTION = Pattern.compile("@[A-Za-z0-9_]+");
    private static final Pattern NBSP_INVIS_SEPARATOR = Pattern.compile("[\ufeff\u2063]+");
    protected static final List<String> NON_LATIN_CHARS_LANGUAGES = Arrays.asList("ar", "fa", "ru", "uk", "be", "zh", "ja", "km", "ta", "el", "hi", "mr", "th", "he", "ko");
    protected static final TextFilter REMOVE_EMAIL_SIGNATURE_FILTER = charSequence -> {
        return SIGNATURE.matcher(charSequence).replaceFirst("");
    };
    protected static final TextFilter REMOVE_MENTION_FILTER = charSequence -> {
        return MENTION.matcher(charSequence).replaceFirst("");
    };
    protected static final TextFilter REMOVE_NON_BREAKING_SPACES_FILTER = charSequence -> {
        return charSequence.toString().replace((char) 160, ' ');
    };
    protected static final TextFilter REMOVE_URL_FILTER = charSequence -> {
        return MAIL_REGEX.matcher(URL_REGEX.matcher(charSequence).replaceAll(" ")).replaceAll(" ");
    };
    protected static final UnicodeBasedDetector UNICODE_BASED_LANG_IDENTIFIER = new UnicodeBasedDetector();

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/languagetool/language/identifier/LanguageIdentifier$ParsedLanguageLists.class */
    public static class ParsedLanguageLists {
        private final List<String> additionalLangs = new ArrayList();
        private final List<String> preferredLangs = new ArrayList();

        public ParsedLanguageLists(List<String> list, List<String> list2) {
            this.additionalLangs.addAll(list);
            this.preferredLangs.addAll(list2);
        }

        public List<String> getAdditionalLangs() {
            return this.additionalLangs;
        }

        public List<String> getPreferredLangs() {
            return this.preferredLangs;
        }
    }

    public LanguageIdentifier(int i) {
        if (i < 10) {
            throw new IllegalArgumentException("maxLength must be >= 10 (but values > 100 are recommended): " + i);
        }
        this.maxLength = i;
    }

    @Nullable
    public abstract DetectedLanguage detectLanguage(String str, List<String> list, List<String> list2);

    @Nullable
    public abstract DetectedLanguage detectLanguage(String str, List<String> list, List<String> list2, boolean z);

    @NotNull
    public abstract List<DetectedLanguage> getDetectedLanguageScores(String str, List<String> list, List<String> list2, boolean z, int i);

    @Nullable
    public abstract Language detectLanguage(String str);

    public String cleanAndShortenText(String str) {
        return REMOVE_NON_BREAKING_SPACES_FILTER.filter(REMOVE_MENTION_FILTER.filter(REMOVE_EMAIL_SIGNATURE_FILTER.filter(REMOVE_URL_FILTER.filter(NBSP_INVIS_SEPARATOR.matcher(str.length() > this.maxLength ? str.substring(0, this.maxLength) : str).replaceAll(" ")))));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public ParsedLanguageLists prepareDetectLanguage(String str, List<String> list, List<String> list2) {
        Objects.requireNonNull(list);
        Objects.requireNonNull(list2);
        List list3 = (List) list.stream().map(str2 -> {
            return str2.equals("nb") ? "no" : str2;
        }).collect(Collectors.toList());
        List list4 = (List) list2.stream().map(str3 -> {
            return str3.equals("nb") ? "no" : str3;
        }).collect(Collectors.toCollection(ArrayList::new));
        if (list4.stream().anyMatch(str4 -> {
            return str4.contains("-");
        })) {
            throw new IllegalArgumentException("preferredLanguages may only contain language codes without variants (e.g. 'en', but not 'en-US'): " + list4 + ". Use 'preferredVariants' to specify variants.");
        }
        List<String> dominantLangCodes = UNICODE_BASED_LANG_IDENTIFIER.getDominantLangCodes(str);
        String join = String.join(",", dominantLangCodes);
        if (join.equals("th") || join.equals("he") || join.equals("ko") || join.equals("hi,mr")) {
            return null;
        }
        if (!list4.contains("ru") && !list4.contains("uk") && !list4.contains("be") && !list4.contains("zh") && !list4.contains("hi") && !list4.contains("mr")) {
            list4.addAll(dominantLangCodes);
            list3.addAll(dominantLangCodes);
        }
        return new ParsedLanguageLists(list3, list4);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Map.Entry<String, Double> getHighestScoringResult(Map<String, Double> map) {
        String str = null;
        double d = -1.0d;
        for (Map.Entry<String, Double> entry : map.entrySet()) {
            if (entry.getValue().doubleValue() > d) {
                d = entry.getValue().doubleValue();
                str = entry.getKey();
            }
        }
        return new AbstractMap.SimpleImmutableEntry(str, Double.valueOf(d));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Map<String, Double> getOrderedScores(Map<String, Double> map, int i) {
        ArrayList arrayList = new ArrayList(map.entrySet());
        arrayList.sort(Map.Entry.comparingByValue(Collections.reverseOrder()));
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (int i2 = 0; i2 < arrayList.size() && i2 < i; i2++) {
            linkedHashMap.put(((Map.Entry) arrayList.get(i2)).getKey(), ((Map.Entry) arrayList.get(i2)).getValue());
        }
        return linkedHashMap;
    }

    static {
        try {
            COMMON_WORDS_LANG_IDENTIFIER = new CommonWordsDetector();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
