package org.molgenis.ontology.roc;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.UncheckedExecutionException;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.optimization.direct.CMAESOptimizer;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.QueryRule;
import org.molgenis.data.semanticsearch.string.NGramDistanceAlgorithm;
import org.molgenis.data.semanticsearch.string.Stemmer;
import org.molgenis.data.support.QueryImpl;
import org.molgenis.ontology.core.meta.OntologyMetaData;
import org.molgenis.ontology.core.meta.OntologyTermMetaData;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:WEB-INF/lib/molgenis-ontology-3.0.1.jar:org/molgenis/ontology/roc/InformationContentService.class */
public class InformationContentService {
    private static final String NON_WORD_SEPARATOR = "[^a-zA-Z0-9]";
    private static final String SINGLE_WHITESPACE = " ";
    private final LoadingCache<String, Long> CACHED_TOTAL_WORD_COUNT = CacheBuilder.newBuilder().maximumSize(2147483647L).expireAfterWrite(1, TimeUnit.DAYS).build(new CacheLoader<String, Long>() { // from class: org.molgenis.ontology.roc.InformationContentService.1
        @Override // com.google.common.cache.CacheLoader
        public Long load(String str) {
            Entity findOne = InformationContentService.this.dataService.findOne(OntologyMetaData.ONTOLOGY, new QueryImpl().eq(OntologyMetaData.ONTOLOGY_IRI, str));
            if (findOne != null) {
                return Long.valueOf(InformationContentService.this.dataService.count(OntologyTermMetaData.ONTOLOGY_TERM, new QueryImpl().eq(OntologyTermMetaData.ONTOLOGY, findOne)));
            }
            return 0L;
        }
    });
    private final LoadingCache<OntologyWord, Double> CACHED_INVERSE_DOCUMENT_FREQ = CacheBuilder.newBuilder().maximumSize(2147483647L).expireAfterWrite(1, TimeUnit.DAYS).build(new CacheLoader<OntologyWord, Double>() { // from class: org.molgenis.ontology.roc.InformationContentService.2
        @Override // com.google.common.cache.CacheLoader
        public Double load(OntologyWord ontologyWord) throws ExecutionException {
            String ontologyIri = ontologyWord.getOntologyIri();
            Entity findOne = InformationContentService.this.dataService.findOne(OntologyMetaData.ONTOLOGY, new QueryImpl().eq(OntologyMetaData.ONTOLOGY_IRI, ontologyIri));
            if (findOne == null) {
                return Double.valueOf(CMAESOptimizer.DEFAULT_STOPFITNESS);
            }
            QueryRule queryRule = new QueryRule((List<QueryRule>) Arrays.asList(new QueryRule("ontologyTermSynonym", QueryRule.Operator.FUZZY_MATCH, ontologyWord.getWord())));
            queryRule.setOperator(QueryRule.Operator.DIS_MAX);
            long count = InformationContentService.this.dataService.count(OntologyTermMetaData.ONTOLOGY_TERM, new QueryImpl(new QueryRule((List<QueryRule>) Arrays.asList(new QueryRule(OntologyTermMetaData.ONTOLOGY, QueryRule.Operator.EQUALS, findOne), new QueryRule(QueryRule.Operator.AND), queryRule))));
            return Double.valueOf(new BigDecimal(((Long) InformationContentService.this.CACHED_TOTAL_WORD_COUNT.get(ontologyIri)) == null ? CMAESOptimizer.DEFAULT_STOPFITNESS : 1.0d + Math.log(r0.longValue() / (count + 1))).doubleValue());
        }
    });
    private final DataService dataService;

    @Autowired
    public InformationContentService(DataService dataService) {
        this.dataService = (DataService) Objects.requireNonNull(dataService);
    }

    public Map<String, Double> redistributedNGramScore(String str, String str2) {
        Map<String, Double> createWordIDF = createWordIDF(str, str2);
        HashMap hashMap = new HashMap();
        if (createWordIDF.size() > 0) {
            double asDouble = createWordIDF.values().stream().mapToDouble((v0) -> {
                return v0.doubleValue();
            }).average().getAsDouble();
            double length = StringUtils.join(createStemmedWordSet(str), " ").trim().length();
            double d = 0.0d;
            double d2 = 0.0d;
            for (Map.Entry<String, Double> entry : createWordIDF.entrySet()) {
                double doubleValue = entry.getValue().doubleValue() - asDouble;
                if (doubleValue < CMAESOptimizer.DEFAULT_STOPFITNESS) {
                    Double valueOf = Double.valueOf((entry.getKey().length() / length) * 100.0d * (doubleValue / asDouble));
                    d += Math.abs(valueOf.doubleValue());
                    hashMap.put(entry.getKey(), valueOf);
                } else {
                    d2 += doubleValue;
                }
            }
            for (Map.Entry<String, Double> entry2 : createWordIDF.entrySet()) {
                double doubleValue2 = entry2.getValue().doubleValue() - asDouble;
                if (doubleValue2 > CMAESOptimizer.DEFAULT_STOPFITNESS) {
                    hashMap.put(entry2.getKey(), Double.valueOf((doubleValue2 / d2) * d));
                }
            }
        }
        return hashMap;
    }

    Map<String, Double> createWordIDF(String str, String str2) {
        HashMap hashMap = new HashMap();
        createStemmedWordSet(str).stream().forEach(str3 -> {
            try {
                Double d = this.CACHED_INVERSE_DOCUMENT_FREQ.get(new OntologyWord(str2, str3));
                if (d == null || d.doubleValue() == CMAESOptimizer.DEFAULT_STOPFITNESS) {
                    return;
                }
                hashMap.put(str3, d);
            } catch (ExecutionException e) {
                throw new UncheckedExecutionException(e);
            }
        });
        return hashMap;
    }

    public Set<String> createStemmedWordSet(String str) {
        return Sets.newHashSet((Set) Sets.newHashSet(str.toLowerCase().trim().split(NON_WORD_SEPARATOR)).stream().filter(str2 -> {
            return !NGramDistanceAlgorithm.STOPWORDSLIST.contains(str2);
        }).map(Stemmer::stem).filter((v0) -> {
            return StringUtils.isNotBlank(v0);
        }).collect(Collectors.toSet()));
    }
}
