package org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.bucket.terms;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.function.LongConsumer;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.TokenStream;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.miscellaneous.DeDuplicatingTokenFilter;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.miscellaneous.DuplicateByteSequenceSpotter;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.graylog.shaded.opensearch2.org.apache.lucene.index.LeafReaderContext;
import org.graylog.shaded.opensearch2.org.apache.lucene.util.BytesRef;
import org.graylog.shaded.opensearch2.org.apache.lucene.util.BytesRefBuilder;
import org.graylog.shaded.opensearch2.org.opensearch.common.lease.Releasables;
import org.graylog.shaded.opensearch2.org.opensearch.common.util.BigArrays;
import org.graylog.shaded.opensearch2.org.opensearch.common.util.BytesRefHash;
import org.graylog.shaded.opensearch2.org.opensearch.common.util.ObjectArray;
import org.graylog.shaded.opensearch2.org.opensearch.index.analysis.NamedAnalyzer;
import org.graylog.shaded.opensearch2.org.opensearch.index.mapper.MappedFieldType;
import org.graylog.shaded.opensearch2.org.opensearch.index.query.QueryBuilder;
import org.graylog.shaded.opensearch2.org.opensearch.index.query.QueryShardContext;
import org.graylog.shaded.opensearch2.org.opensearch.search.DocValueFormat;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.Aggregator;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.AggregatorFactories;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.AggregatorFactory;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.CardinalityUpperBound;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.LeafBucketCollector;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.LeafBucketCollectorBase;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.bucket.BucketUtils;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.bucket.terms.MapStringTermsAggregator;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.bucket.terms.TermsAggregator;
import org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic;
import org.graylog.shaded.opensearch2.org.opensearch.search.internal.SearchContext;
import org.graylog.shaded.opensearch2.org.opensearch.search.lookup.SourceLookup;

/* loaded from: input_file:org/graylog/shaded/opensearch2/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.class */
public class SignificantTextAggregatorFactory extends AggregatorFactory {
    private static final int MEMORY_GROWTH_REPORTING_INTERVAL_BYTES = 5000;
    private final IncludeExclude includeExclude;
    private final String indexedFieldName;
    private final MappedFieldType fieldType;
    private final String[] sourceFieldNames;
    private final QueryBuilder backgroundFilter;
    private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
    private final SignificanceHeuristic significanceHeuristic;
    private final boolean filterDuplicateText;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/graylog/shaded/opensearch2/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory$SignificantTextCollectorSource.class */
    public static class SignificantTextCollectorSource implements MapStringTermsAggregator.CollectorSource {
        private final SourceLookup sourceLookup;
        private final BigArrays bigArrays;
        private final MappedFieldType fieldType;
        private final String[] sourceFieldNames;
        private ObjectArray<DuplicateByteSequenceSpotter> dupSequenceSpotters;

        SignificantTextCollectorSource(SourceLookup sourceLookup, BigArrays bigArrays, MappedFieldType mappedFieldType, String[] strArr, boolean z) {
            this.sourceLookup = sourceLookup;
            this.bigArrays = bigArrays;
            this.fieldType = mappedFieldType;
            this.sourceFieldNames = strArr;
            this.dupSequenceSpotters = z ? bigArrays.newObjectArray(1L) : null;
        }

        @Override // org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.bucket.terms.MapStringTermsAggregator.CollectorSource
        public boolean needsScores() {
            return false;
        }

        @Override // org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.bucket.terms.MapStringTermsAggregator.CollectorSource
        public LeafBucketCollector getLeafCollector(final IncludeExclude.StringFilter stringFilter, final LeafReaderContext leafReaderContext, final LeafBucketCollector leafBucketCollector, final LongConsumer longConsumer, final MapStringTermsAggregator.CollectConsumer collectConsumer) throws IOException {
            return new LeafBucketCollectorBase(leafBucketCollector, null) { // from class: org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.bucket.terms.SignificantTextAggregatorFactory.SignificantTextCollectorSource.1
                private final BytesRefBuilder scratch = new BytesRefBuilder();

                @Override // org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.LeafBucketCollectorBase, org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.LeafBucketCollector
                public void collect(int i, long j) throws IOException {
                    if (SignificantTextCollectorSource.this.dupSequenceSpotters == null) {
                        collectFromSource(i, j, null);
                        return;
                    }
                    SignificantTextCollectorSource.this.dupSequenceSpotters = SignificantTextCollectorSource.this.bigArrays.grow(SignificantTextCollectorSource.this.dupSequenceSpotters, j + 1);
                    DuplicateByteSequenceSpotter duplicateByteSequenceSpotter = SignificantTextCollectorSource.this.dupSequenceSpotters.get(j);
                    if (duplicateByteSequenceSpotter == null) {
                        duplicateByteSequenceSpotter = new DuplicateByteSequenceSpotter();
                        SignificantTextCollectorSource.this.dupSequenceSpotters.set(j, duplicateByteSequenceSpotter);
                    }
                    collectFromSource(i, j, duplicateByteSequenceSpotter);
                    duplicateByteSequenceSpotter.startNewSequence();
                }

                private void collectFromSource(int i, long j, DuplicateByteSequenceSpotter duplicateByteSequenceSpotter) throws IOException {
                    SignificantTextCollectorSource.this.sourceLookup.setSegmentAndDocument(leafReaderContext, i);
                    BytesRefHash bytesRefHash = new BytesRefHash(256L, SignificantTextCollectorSource.this.bigArrays);
                    try {
                        for (String str : SignificantTextCollectorSource.this.sourceFieldNames) {
                            Iterator it = SignificantTextCollectorSource.this.sourceLookup.extractRawValues(str).stream().map(obj -> {
                                if (obj == null) {
                                    return null;
                                }
                                return obj instanceof BytesRef ? SignificantTextCollectorSource.this.fieldType.valueForDisplay(obj).toString() : obj.toString();
                            }).iterator();
                            NamedAnalyzer indexAnalyzer = SignificantTextCollectorSource.this.fieldType.indexAnalyzer();
                            while (it.hasNext()) {
                                processTokenStream(i, j, indexAnalyzer.tokenStream(SignificantTextCollectorSource.this.fieldType.name(), (String) it.next()), bytesRefHash, duplicateByteSequenceSpotter);
                            }
                        }
                    } finally {
                        Releasables.close(bytesRefHash);
                    }
                }

                /* JADX WARN: Finally extract failed */
                private void processTokenStream(int i, long j, TokenStream tokenStream, BytesRefHash bytesRefHash, DuplicateByteSequenceSpotter duplicateByteSequenceSpotter) throws IOException {
                    long j2 = 0;
                    if (duplicateByteSequenceSpotter != null) {
                        j2 = duplicateByteSequenceSpotter.getEstimatedSizeInBytes();
                        tokenStream = new DeDuplicatingTokenFilter(tokenStream, duplicateByteSequenceSpotter);
                    }
                    CharTermAttribute charTermAttribute = (CharTermAttribute) tokenStream.addAttribute(CharTermAttribute.class);
                    tokenStream.reset();
                    while (tokenStream.incrementToken()) {
                        try {
                            if (duplicateByteSequenceSpotter != null) {
                                long estimatedSizeInBytes = duplicateByteSequenceSpotter.getEstimatedSizeInBytes();
                                long j3 = estimatedSizeInBytes - j2;
                                if (j3 > 5000) {
                                    longConsumer.accept(j3);
                                    j2 = estimatedSizeInBytes;
                                }
                            }
                            this.scratch.clear();
                            this.scratch.copyChars(charTermAttribute);
                            BytesRef bytesRef = this.scratch.get();
                            if (stringFilter == null || false != stringFilter.accept(bytesRef)) {
                                if (bytesRefHash.add(bytesRef) >= 0) {
                                    collectConsumer.accept(leafBucketCollector, i, j, bytesRef);
                                }
                            }
                        } catch (Throwable th) {
                            tokenStream.close();
                            throw th;
                        }
                    }
                    tokenStream.close();
                    if (duplicateByteSequenceSpotter != null) {
                        long estimatedSizeInBytes2 = duplicateByteSequenceSpotter.getEstimatedSizeInBytes() - j2;
                        if (estimatedSizeInBytes2 > 0) {
                            longConsumer.accept(estimatedSizeInBytes2);
                        }
                    }
                }
            };
        }

        @Override // org.graylog.shaded.opensearch2.org.opensearch.common.lease.Releasable, java.io.Closeable, java.lang.AutoCloseable
        public void close() {
            Releasables.close(this.dupSequenceSpotters);
        }
    }

    public SignificantTextAggregatorFactory(String str, IncludeExclude includeExclude, QueryBuilder queryBuilder, TermsAggregator.BucketCountThresholds bucketCountThresholds, SignificanceHeuristic significanceHeuristic, QueryShardContext queryShardContext, AggregatorFactory aggregatorFactory, AggregatorFactories.Builder builder, String str2, String[] strArr, boolean z, Map<String, Object> map) throws IOException {
        super(str, queryShardContext, aggregatorFactory, builder, map);
        this.fieldType = queryShardContext.fieldMapper(str2);
        if (this.fieldType != null && this.fieldType.indexAnalyzer() == null) {
            throw new IllegalArgumentException("Field [" + this.fieldType.name() + "] has no analyzer, but SignificantText requires an analyzed field");
        }
        this.indexedFieldName = this.fieldType != null ? this.fieldType.name() : str2;
        this.sourceFieldNames = strArr == null ? new String[]{this.indexedFieldName} : strArr;
        this.includeExclude = includeExclude;
        this.backgroundFilter = queryBuilder;
        this.filterDuplicateText = z;
        this.bucketCountThresholds = bucketCountThresholds;
        this.significanceHeuristic = significanceHeuristic;
    }

    @Override // org.graylog.shaded.opensearch2.org.opensearch.search.aggregations.AggregatorFactory
    protected Aggregator createInternal(SearchContext searchContext, Aggregator aggregator, CardinalityUpperBound cardinalityUpperBound, Map<String, Object> map) throws IOException {
        TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(this.bucketCountThresholds);
        if (bucketCountThresholds.getShardSize() == SignificantTextAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
            bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize()));
        }
        IncludeExclude.StringFilter convertToStringFilter = this.includeExclude == null ? null : this.includeExclude.convertToStringFilter(DocValueFormat.RAW, searchContext.getQueryShardContext().getIndexSettings().getMaxRegexLength());
        SignificantTextCollectorSource significantTextCollectorSource = new SignificantTextCollectorSource(this.queryShardContext.lookup().source(), this.queryShardContext.bigArrays(), this.fieldType, this.sourceFieldNames, this.filterDuplicateText);
        SignificanceLookup significanceLookup = new SignificanceLookup(this.queryShardContext, this.fieldType, DocValueFormat.RAW, this.backgroundFilter);
        return new MapStringTermsAggregator(this.name, this.factories, significantTextCollectorSource, mapStringTermsAggregator -> {
            Objects.requireNonNull(mapStringTermsAggregator);
            return new MapStringTermsAggregator.SignificantTermsResults(significanceLookup, this.significanceHeuristic, cardinalityUpperBound);
        }, null, DocValueFormat.RAW, bucketCountThresholds, convertToStringFilter, searchContext, aggregator, Aggregator.SubAggCollectionMode.BREADTH_FIRST, false, cardinalityUpperBound, map);
    }
}
