package de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.dbpedia;

import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.Language;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.MultiConceptLinker;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.nGramTokenizers.MaxGramLeftToRightTokenizer;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.stringModifiers.StringModifier;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.stringModifiers.TokenizeConcatSpaceCapitalizeFirstLetterLowercaseRestModifier;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.stringModifiers.TokenizeConcatSpaceCapitalizeModifier;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.stringModifiers.TokenizeConcatSpaceLowercaseModifier;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.stringModifiers.TokenizeConcatSpaceLowercaseModifierDropPlural;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.stringModifiers.TokenizeConcatSpaceModifier;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.stringModifiers.TokenizeConcatSpaceModifierDropPlural;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.labelToConcept.stringModifiers.TokenizeConcatSpaceOnlyCapitalizeFirstLetterModifierDropPlural;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.persistence.PersistenceService;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.sparql.SparqlServices;
import de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.services.stringOperations.StringOperations;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.ResultSet;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/melt/matching_jena_matchers/external/dbpedia/DBpediaLinker.class */
public class DBpediaLinker implements LabelToConceptLinker, MultiConceptLinker {
    private static final Logger LOGGER = LoggerFactory.getLogger(DBpediaLinker.class);
    private static final String ENDPOINT_URL = "https://dbpedia.org/sparql";
    public static final String MULTI_CONCEPT_PREFIX = "#ML_";
    PersistenceService persistenceService;
    private static ConcurrentMap<String, Set<String>> multiLinkStore;
    private boolean isDiskBufferEnabled;
    private boolean isUseTdb;
    private Dataset tdbDataset;
    private String nameOfLinker = "DBpedia Linker";
    Set<StringModifier> stringModificationSet = new HashSet();

    public static String getEndpointUrl() {
        return ENDPOINT_URL;
    }

    public DBpediaLinker(DBpediaKnowledgeSource dBpediaKnowledgeSource) {
        this.isUseTdb = false;
        this.isDiskBufferEnabled = dBpediaKnowledgeSource.isDiskBufferEnabled();
        initializeBuffers();
        this.stringModificationSet.add(new TokenizeConcatSpaceModifier());
        this.stringModificationSet.add(new TokenizeConcatSpaceCapitalizeModifier());
        this.stringModificationSet.add(new TokenizeConcatSpaceLowercaseModifier());
        this.stringModificationSet.add(new TokenizeConcatSpaceModifierDropPlural());
        this.stringModificationSet.add(new TokenizeConcatSpaceLowercaseModifierDropPlural());
        this.stringModificationSet.add(new TokenizeConcatSpaceCapitalizeFirstLetterLowercaseRestModifier());
        this.stringModificationSet.add(new TokenizeConcatSpaceOnlyCapitalizeFirstLetterModifierDropPlural());
        this.isUseTdb = dBpediaKnowledgeSource.isUseTdb();
        if (this.isUseTdb) {
            this.tdbDataset = dBpediaKnowledgeSource.getTdbDataset();
        }
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.MultiConceptLinker
    @NotNull
    public Set<String> getUris(String str) {
        HashSet hashSet = new HashSet();
        if (str == null) {
            return hashSet;
        }
        if (str.startsWith("#ML_")) {
            return multiLinkStore.containsKey(str) ? multiLinkStore.get(str) : hashSet;
        }
        LOGGER.warn("The given link does not start with a prefix. Returning the link");
        hashSet.add(str);
        return hashSet;
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.MultiConceptLinker
    public boolean isMultiConceptLink(String str) {
        return str.startsWith("#ML_");
    }

    public Set<String> getUris(Set<String> set) {
        HashSet hashSet = new HashSet();
        for (String str : set) {
            if (str.startsWith("#ML_")) {
                hashSet.addAll(getUris(str));
            } else {
                hashSet.add(str);
            }
        }
        return hashSet;
    }

    private void initializeBuffers() {
        this.persistenceService = PersistenceService.getService();
        if (this.isDiskBufferEnabled) {
            multiLinkStore = this.persistenceService.getMapDatabase(PersistenceService.PreconfiguredPersistences.DBPEDIA_LABEL_LINK_BUFFER);
        } else {
            multiLinkStore = new ConcurrentHashMap();
        }
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker
    public String linkToSingleConcept(String str) {
        return linkToSingleConcept(str, Language.ENGLISH);
    }

    public String linkToSingleConcept(String str, Language language) {
        if (str == null || language == null || str.trim().equals("")) {
            return null;
        }
        String str2 = "#ML_" + str + "_" + language.toSparqlChar2();
        if (multiLinkStore.containsKey(str2)) {
            LOGGER.debug("Found in buffer: " + str2);
            if (multiLinkStore.get(str2).size() == 0) {
                return null;
            }
            return str2;
        }
        HashSet hashSet = new HashSet();
        Iterator<StringModifier> it = this.stringModificationSet.iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().modifyString(str));
        }
        String linkerQueryString = getLinkerQueryString(hashSet, language);
        QueryExecution create = this.isUseTdb ? QueryExecutionFactory.create(linkerQueryString, this.tdbDataset) : QueryExecutionFactory.sparqlService(ENDPOINT_URL, linkerQueryString);
        ResultSet safeExecution = SparqlServices.safeExecution(create);
        HashSet hashSet2 = new HashSet();
        while (safeExecution.hasNext()) {
            hashSet2.add(safeExecution.next().getResource("c").getURI());
        }
        create.close();
        HashSet hashSet3 = new HashSet();
        Iterator it2 = hashSet2.iterator();
        while (it2.hasNext()) {
            hashSet3.addAll(getDisambiguationUris((String) it2.next()));
        }
        hashSet2.addAll(hashSet3);
        multiLinkStore.put(str2, hashSet2);
        commit();
        if (hashSet2.size() > 0) {
            return str2;
        }
        return null;
    }

    private void commit() {
        if (this.isDiskBufferEnabled) {
            this.persistenceService.commit(PersistenceService.PreconfiguredPersistences.DBPEDIA_LABEL_LINK_BUFFER);
        }
    }

    @NotNull
    public Set<String> getDisambiguationUris(String str) {
        HashSet hashSet = new HashSet();
        if (str == null || str.equals("")) {
            return hashSet;
        }
        String str2 = "SELECT DISTINCT ?c WHERE {<" + str + "> <http://dbpedia.org/ontology/wikiPageDisambiguates> ?c}";
        QueryExecution create = this.isUseTdb ? QueryExecutionFactory.create(str2, this.tdbDataset) : QueryExecutionFactory.sparqlService(getEndpointUrl(), str2);
        ResultSet safeExecution = SparqlServices.safeExecution(create);
        while (safeExecution.hasNext()) {
            hashSet.add(safeExecution.next().getResource("c").getURI());
        }
        create.close();
        return hashSet;
    }

    static String getLinkerQueryString(Set<String> set, Language language) {
        StringBuilder sb = new StringBuilder();
        sb.append("SELECT DISTINCT ?c WHERE {\n");
        boolean z = true;
        for (String str : set) {
            if (z) {
                z = false;
            } else {
                sb.append("UNION ");
            }
            sb.append((CharSequence) getPredicateQueryLine("http://www.w3.org/2000/01/rdf-schema#label", str, language)).append("UNION ").append((CharSequence) getPredicateQueryLine("http://xmlns.com/foaf/0.1/name", str, language)).append("UNION ").append((CharSequence) getPredicateQueryLine("http://dbpedia.org/property/name", str, language)).append("UNION ").append((CharSequence) getPredicateQueryLine("http://dbpedia.org/property/otherNames", str, language)).append("UNION ").append((CharSequence) getPredicateQueryLine("http://dbpedia.org/ontology/alias", str, language));
        }
        sb.append("}");
        return sb.toString();
    }

    static StringBuilder getPredicateQueryLine(String str, String str2, Language language) {
        StringBuilder sb = new StringBuilder();
        sb.append("{?c <").append(str).append("> \"").append(str2).append("\"@").append(language.toSparqlChar2()).append("}\n");
        return sb;
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker
    public Set<String> linkToPotentiallyMultipleConcepts(String str) {
        return linkToPotentiallyMultipleConcepts(str, Language.ENGLISH);
    }

    public HashSet<String> linkToPotentiallyMultipleConcepts(String str, Language language) {
        HashSet<String> linkLabelToTokensLeftToRight = linkLabelToTokensLeftToRight(str, language);
        int length = StringOperations.clearArrayFromStopwords(StringOperations.tokenizeBestGuess(str)).length;
        int i = 0;
        Iterator<String> it = linkLabelToTokensLeftToRight.iterator();
        while (it.hasNext()) {
            i += StringOperations.clearArrayFromStopwords(StringOperations.tokenizeBestGuess(it.next())).length;
        }
        if (length <= i) {
            return linkLabelToTokensLeftToRight;
        }
        return null;
    }

    private HashSet<String> linkLabelToTokensLeftToRight(String str, Language language) {
        MaxGramLeftToRightTokenizer maxGramLeftToRightTokenizer = new MaxGramLeftToRightTokenizer(StringOperations.tokenizeBestGuess(str), " ");
        HashSet<String> hashSet = new HashSet<>();
        String initialToken = maxGramLeftToRightTokenizer.getInitialToken();
        while (true) {
            String str2 = initialToken;
            if (str2 == null) {
                return hashSet;
            }
            String linkToSingleConcept = linkToSingleConcept(str2, language);
            if (linkToSingleConcept == null || linkToSingleConcept.length() == 0) {
                initialToken = maxGramLeftToRightTokenizer.getNextTokenNotSuccessful();
            } else {
                hashSet.add(linkToSingleConcept);
                initialToken = maxGramLeftToRightTokenizer.getNextTokenSuccessful();
            }
        }
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker
    public String getNameOfLinker() {
        return this.nameOfLinker;
    }

    @Override // de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.external.LabelToConceptLinker
    public void setNameOfLinker(String str) {
        this.nameOfLinker = str;
    }

    public boolean isDiskBufferEnabled() {
        return this.isDiskBufferEnabled;
    }

    public void setDiskBufferEnabled(boolean z) {
        if (z && this.isDiskBufferEnabled) {
            return;
        }
        if (z || this.isDiskBufferEnabled) {
            this.isDiskBufferEnabled = z;
            initializeBuffers();
        }
    }
}
