package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.webIsAlod.xl;

import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.nGramTokenizers.MaxGramLeftToRightTokenizer;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.stringOperations.StringOperations;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.webIsAlod.WebIsAlodSPARQLservice;
import java.util.HashSet;
import java.util.Iterator;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/melt/matching_jena_matchers/external/webIsAlod/xl/WebIsAlodXLLinker.class */
public class WebIsAlodXLLinker implements LabelToConceptLinker {
    private String nameOfLinker = "ALOD XL Linker";
    private WebIsAlodSPARQLservice sparqlService = WebIsAlodSPARQLservice.getInstance(WebIsAlodSPARQLservice.WebIsAlodEndpoint.ALOD_XL_NO_PROXY);

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker
    public String linkToSingleConcept(String str) {
        String uriUsingLabel = this.sparqlService.getUriUsingLabel(cleanLabelForLabelLookup(str));
        if (uriUsingLabel != null) {
            return uriUsingLabel;
        }
        String uriUsingLabel2 = this.sparqlService.getUriUsingLabel(normalizeForAlodXLLookupWithoutTokenization(str));
        return uriUsingLabel2 != null ? uriUsingLabel2 : this.sparqlService.getUriUsingLabel(normalizeForAlodXLLookupWithTokenization(str));
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker
    public HashSet<String> linkToPotentiallyMultipleConcepts(String str) {
        HashSet<String> linkLabelToTokensLeftToRight = linkLabelToTokensLeftToRight(str);
        int length = StringOperations.clearArrayFromStopwords(StringOperations.tokenizeBestGuess(str)).length;
        int i = 0;
        Iterator<String> it = linkLabelToTokensLeftToRight.iterator();
        while (it.hasNext()) {
            i += StringOperations.clearArrayFromStopwords(StringOperations.tokenizeBestGuess(unstripUriXl(it.next()))).length;
        }
        if (length <= i) {
            return linkLabelToTokensLeftToRight;
        }
        return null;
    }

    private HashSet<String> linkLabelToTokensLeftToRight(String str) {
        MaxGramLeftToRightTokenizer maxGramLeftToRightTokenizer = new MaxGramLeftToRightTokenizer(StringOperations.tokenizeBestGuess(str), " ");
        HashSet<String> hashSet = new HashSet<>();
        String initialToken = maxGramLeftToRightTokenizer.getInitialToken();
        while (true) {
            String str2 = initialToken;
            if (str2 == null) {
                return hashSet;
            }
            String linkToSingleConcept = linkToSingleConcept(str2);
            if (linkToSingleConcept == null || linkToSingleConcept.length() == 0) {
                initialToken = maxGramLeftToRightTokenizer.getNextTokenNotSuccessful();
            } else {
                hashSet.add(linkToSingleConcept);
                initialToken = maxGramLeftToRightTokenizer.getNextTokenSuccessful();
            }
        }
    }

    public static String cleanLabelForLabelLookup(String str) {
        return str.replace("\"", "").replace(":", "").replace("{", "").replace("}", "").replace("-", " ").replace("\\", "\\\\").replace("\n", " ");
    }

    public static String normalizeForAlodXLLookupWithTokenization(String str) {
        return cleanLabelForLabelLookup(str.replaceAll("(?<!^)(?<!\\s)(?=[A-Z][a-z])", " ").replace("_", " ").replaceAll("( ){1,}", " ").toLowerCase());
    }

    public static String normalizeForAlodXLLookupWithoutTokenization(String str) {
        return cleanLabelForLabelLookup(str.replace("_", " ").replaceAll("( ){1,}", " ").toLowerCase());
    }

    public static String unstripUriXl(String str) {
        return str.replaceAll("%20", " ").replaceAll("http://webisa.webdatacommons.org/concept/", "");
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker
    public String getNameOfLinker() {
        return this.nameOfLinker;
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker
    public void setNameOfLinker(String str) {
        this.nameOfLinker = str;
    }
}
