package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.util;

import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.stringOperations.StringOperations;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/melt/matching_jena_matchers/util/StringProcessing.class */
public class StringProcessing {
    private static final Pattern CAMEL_CASE = Pattern.compile("(?<!^)(?<!\\s)(?=[A-Z][a-z])");
    private static final Pattern NON_ALPHA = Pattern.compile("[^a-zA-Z\\d\\s:_]");
    private static final Pattern ENGLISH_GENITIVE_S = Pattern.compile("'s");
    private static final Pattern MULTIPLE_UNDERSCORES = Pattern.compile("_+");
    private static final Pattern MULTIPLE_WHITESPACE = Pattern.compile(" +");

    public static List<String> normalize(String str) {
        return new ArrayList(Arrays.asList(normalizeToStringArray(str)));
    }

    public static List<String> normalizeAndRemoveStopwords(String str) {
        return new ArrayList(Arrays.asList(StringOperations.clearArrayFromStopwords(normalizeToStringArray(str))));
    }

    private static String[] normalizeToStringArray(String str) {
        if (str == null) {
            return new String[0];
        }
        String lowerCase = CAMEL_CASE.matcher(str.trim()).replaceAll("_").replace(' ', '_').toLowerCase();
        try {
            lowerCase = URLDecoder.decode(lowerCase, "UTF-8");
        } catch (Exception e) {
        }
        return MULTIPLE_UNDERSCORES.matcher(ENGLISH_GENITIVE_S.matcher(NON_ALPHA.matcher(lowerCase).replaceAll("_")).replaceAll("")).replaceAll("_").split("_");
    }

    public static boolean containsMostlyNumbers(String str) {
        int i = 0;
        int i2 = 0;
        for (int i3 = 0; i3 < str.length(); i3++) {
            char charAt = str.charAt(i3);
            if (charAt >= '0' && charAt <= '9') {
                i++;
            }
            if (!Character.isWhitespace(charAt)) {
                i2++;
            }
        }
        return i >= i2 / 2;
    }

    public static String normalizeOnlyCamelCaseAndUnderscore(String str) {
        if (str == null) {
            return "";
        }
        return MULTIPLE_WHITESPACE.matcher(CAMEL_CASE.matcher(str.trim()).replaceAll(" ").replace('_', ' ')).replaceAll(" ");
    }
}
