package raw.inferrer.local.text;

import com.typesafe.scalalogging.Logger;
import com.typesafe.scalalogging.StrictLogging;
import java.io.Reader;
import java.util.Arrays;
import java.util.regex.Matcher;
import raw.inferrer.api.LinesInputFormatDescriptor;
import raw.inferrer.api.SourceAttrType;
import raw.inferrer.api.SourceCollectionType;
import raw.inferrer.api.SourceIntType;
import raw.inferrer.api.SourceRecordType;
import raw.inferrer.api.SourceStringType;
import raw.inferrer.api.TextInputFormatDescriptor;
import raw.inferrer.api.TextInputStreamFormatDescriptor;
import raw.inferrer.local.EncodingInferrer;
import raw.inferrer.local.InferrerErrorHandler;
import raw.inferrer.local.LocalInferrerException;
import raw.inferrer.local.LocalInferrerException$;
import raw.inferrer.local.TextBuffer;
import raw.sources.api.Encoding;
import raw.sources.api.SourceContext;
import raw.sources.bytestream.api.SeekableInputStream;
import raw.utils.RawSettings;
import scala.Array$;
import scala.Function0;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.math.Ordering$Double$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.util.Either;

/* compiled from: TextInferrer.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005\u0005r!B\t\u0013\u0011\u0003Yb!B\u000f\u0013\u0011\u0003q\u0002\"B\u0013\u0002\t\u00031\u0003bB\u0014\u0002\u0005\u0004%I\u0001\u000b\u0005\u0007c\u0005\u0001\u000b\u0011B\u0015\u0007\tu\u0011\u0002A\r\u0005\t\t\u0016\u0011)\u0019!C\n\u000b\"Aa*\u0002B\u0001B\u0003%a\tC\u0003&\u000b\u0011\u0005q\nC\u0004T\u000b\t\u0007I\u0011\u0002+\t\ra+\u0001\u0015!\u0003V\u0011\u001dIVA1A\u0005\niCaAX\u0003!\u0002\u0013Y\u0006\"B0\u0006\t\u0003\u0001\u0007\"B0\u0006\t\u0003Q\b\"CA\b\u000b\t\u0007I\u0011BA\t\u0011!\ty\"\u0002Q\u0001\n\u0005M\u0011\u0001\u0004+fqRLeNZ3se\u0016\u0014(BA\n\u0015\u0003\u0011!X\r\u001f;\u000b\u0005U1\u0012!\u00027pG\u0006d'BA\f\u0019\u0003!IgNZ3se\u0016\u0014(\"A\r\u0002\u0007I\fwo\u0001\u0001\u0011\u0005q\tQ\"\u0001\n\u0003\u0019Q+\u0007\u0010^%oM\u0016\u0014(/\u001a:\u0014\u0005\u0005y\u0002C\u0001\u0011$\u001b\u0005\t#\"\u0001\u0012\u0002\u000bM\u001c\u0017\r\\1\n\u0005\u0011\n#AB!osJ+g-\u0001\u0004=S:LGO\u0010\u000b\u00027\u0005\u0001B+\u0012-U?N\u000bU\n\u0015'F?NK%,R\u000b\u0002SA\u0011!fL\u0007\u0002W)\u0011A&L\u0001\u0005Y\u0006twMC\u0001/\u0003\u0011Q\u0017M^1\n\u0005AZ#AB*ue&tw-A\tU\u000bb#vlU!N!2+ulU%[\u000b\u0002\u001aR!B\u00104oi\u0002\"\u0001N\u001b\u000e\u0003QI!A\u000e\u000b\u0003)%sg-\u001a:sKJ,%O]8s\u0011\u0006tG\r\\3s!\t!\u0004(\u0003\u0002:)\t\u0001RI\\2pI&tw-\u00138gKJ\u0014XM\u001d\t\u0003w\tk\u0011\u0001\u0010\u0006\u0003{y\nAb]2bY\u0006dwnZ4j]\u001eT!a\u0010!\u0002\u0011QL\b/Z:bM\u0016T\u0011!Q\u0001\u0004G>l\u0017BA\"=\u00055\u0019FO]5di2{wmZ5oO\u0006i1o\\;sG\u0016\u001cuN\u001c;fqR,\u0012A\u0012\t\u0003\u000f2k\u0011\u0001\u0013\u0006\u0003\u0013*\u000b1!\u00199j\u0015\tY\u0005$A\u0004t_V\u00148-Z:\n\u00055C%!D*pkJ\u001cWmQ8oi\u0016DH/\u0001\bt_V\u00148-Z\"p]R,\u0007\u0010\u001e\u0011\u0015\u0003A#\"!\u0015*\u0011\u0005q)\u0001\"\u0002#\t\u0001\b1\u0015\u0001C7j]6\u000bGo\u00195\u0016\u0003U\u0003\"\u0001\t,\n\u0005]\u000b#A\u0002#pk\ndW-A\u0005nS:l\u0015\r^2iA\u0005\tB-\u001a4bk2$8+Y7qY\u0016\u001c\u0016N_3\u0016\u0003m\u0003\"\u0001\t/\n\u0005u\u000b#aA%oi\u0006\u0011B-\u001a4bk2$8+Y7qY\u0016\u001c\u0016N_3!\u0003\u0015IgNZ3s)\u0011\tgm\\<\u0011\u0005\t$W\"A2\u000b\u0005%3\u0012BA3d\u0005}!V\r\u001f;J]B,Ho\u0015;sK\u0006lgi\u001c:nCR$Um]2sSB$xN\u001d\u0005\u0006O6\u0001\r\u0001[\u0001\u0003SN\u0004\"![7\u000e\u0003)T!!S6\u000b\u00051T\u0015A\u00032zi\u0016\u001cHO]3b[&\u0011aN\u001b\u0002\u0014'\u0016,7.\u00192mK&s\u0007/\u001e;TiJ,\u0017-\u001c\u0005\u0006a6\u0001\r!]\u0001\u000e[\u0006L(-Z#oG>$\u0017N\\4\u0011\u0007\u0001\u0012H/\u0003\u0002tC\t1q\n\u001d;j_:\u0004\"aR;\n\u0005YD%\u0001C#oG>$\u0017N\\4\t\u000bal\u0001\u0019A=\u0002\u001f5\f\u0017PY3TC6\u0004H.Z*ju\u0016\u00042\u0001\t:\\)\u0011Yh0!\u0004\u0011\u0005\td\u0018BA?d\u0005e!V\r\u001f;J]B,HOR8s[\u0006$H)Z:de&\u0004Ho\u001c:\t\r}t\u0001\u0019AA\u0001\u0003\u0019\u0011X-\u00193feB!\u00111AA\u0005\u001b\t\t)AC\u0002\u0002\b5\n!![8\n\t\u0005-\u0011Q\u0001\u0002\u0007%\u0016\fG-\u001a:\t\u000bat\u0001\u0019A=\u0002\u0013I,w-\u001a=MSN$XCAA\n!\u0015\u0001\u0013QCA\r\u0013\r\t9\"\t\u0002\u0006\u0003J\u0014\u0018-\u001f\t\u00049\u0005m\u0011bAA\u000f%\tY!+Z4fqR{G+\u001f9f\u0003)\u0011XmZ3y\u0019&\u001cH\u000f\t")
/* loaded from: input_file:raw/inferrer/local/text/TextInferrer.class */
public class TextInferrer implements InferrerErrorHandler, EncodingInferrer {
    private final SourceContext sourceContext;
    private final double minMatch;
    private final int defaultSampleSize;
    private final RegexToType[] regexList;
    private final RawSettings settings;
    private final long raw$inferrer$local$EncodingInferrer$$encodingDetectionReadSize;
    private final Logger logger;

    @Override // raw.inferrer.local.EncodingInferrer
    public Reader getReader(SeekableInputStream seekableInputStream, Encoding encoding) {
        Reader reader;
        reader = getReader(seekableInputStream, encoding);
        return reader;
    }

    @Override // raw.inferrer.local.EncodingInferrer
    public TextBuffer getTextBuffer(SeekableInputStream seekableInputStream, Option<Encoding> option) {
        TextBuffer textBuffer;
        textBuffer = getTextBuffer(seekableInputStream, option);
        return textBuffer;
    }

    @Override // raw.inferrer.local.EncodingInferrer
    public Tuple2<Encoding, Object> guessEncoding(SeekableInputStream seekableInputStream) {
        Tuple2<Encoding, Object> guessEncoding;
        guessEncoding = guessEncoding(seekableInputStream);
        return guessEncoding;
    }

    @Override // raw.inferrer.local.InferrerErrorHandler
    public <T> Either<String, T> tryInfer(String str, Function0<T> function0) {
        Either<String, T> tryInfer;
        tryInfer = tryInfer(str, function0);
        return tryInfer;
    }

    @Override // raw.inferrer.local.InferrerErrorHandler
    public <T> T withErrorHandling(Function0<T> function0) {
        Object withErrorHandling;
        withErrorHandling = withErrorHandling(function0);
        return (T) withErrorHandling;
    }

    @Override // raw.inferrer.local.EncodingInferrer
    public RawSettings settings() {
        return this.settings;
    }

    @Override // raw.inferrer.local.EncodingInferrer
    public long raw$inferrer$local$EncodingInferrer$$encodingDetectionReadSize() {
        return this.raw$inferrer$local$EncodingInferrer$$encodingDetectionReadSize;
    }

    @Override // raw.inferrer.local.EncodingInferrer
    public void raw$inferrer$local$EncodingInferrer$_setter_$settings_$eq(RawSettings rawSettings) {
        this.settings = rawSettings;
    }

    @Override // raw.inferrer.local.EncodingInferrer
    public final void raw$inferrer$local$EncodingInferrer$_setter_$raw$inferrer$local$EncodingInferrer$$encodingDetectionReadSize_$eq(long j) {
        this.raw$inferrer$local$EncodingInferrer$$encodingDetectionReadSize = j;
    }

    public Logger logger() {
        return this.logger;
    }

    public void com$typesafe$scalalogging$StrictLogging$_setter_$logger_$eq(Logger logger) {
        this.logger = logger;
    }

    @Override // raw.inferrer.local.EncodingInferrer
    public SourceContext sourceContext() {
        return this.sourceContext;
    }

    private double minMatch() {
        return this.minMatch;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public int defaultSampleSize() {
        return this.defaultSampleSize;
    }

    public TextInputStreamFormatDescriptor infer(SeekableInputStream seekableInputStream, Option<Encoding> option, Option<Object> option2) {
        return (TextInputStreamFormatDescriptor) withErrorHandling(() -> {
            TextBuffer textBuffer = this.getTextBuffer(seekableInputStream, option);
            try {
                return new TextInputStreamFormatDescriptor(textBuffer.encoding(), textBuffer.confidence(), this.infer(textBuffer.reader(), option2));
            } finally {
                textBuffer.reader().close();
            }
        });
    }

    public TextInputFormatDescriptor infer(Reader reader, Option<Object> option) {
        return (TextInputFormatDescriptor) withErrorHandling(() -> {
            int i = 0;
            int unboxToInt = BoxesRunTime.unboxToInt(option.getOrElse(() -> {
                return this.defaultSampleSize();
            }));
            int i2 = unboxToInt <= 0 ? Integer.MAX_VALUE : unboxToInt;
            TextLineIterator textLineIterator = new TextLineIterator(reader);
            Matcher[] matcherArr = (Matcher[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(this.regexList())).map(regexToType -> {
                return regexToType.regex().pattern().matcher("");
            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Matcher.class)));
            double[] dArr = new double[this.regexList().length];
            Arrays.fill(dArr, 0.0d);
            while (textLineIterator.hasNext() && i < i2) {
                String next = textLineIterator.next();
                i++;
                new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(matcherArr)).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).foreach(tuple2 -> {
                    $anonfun$infer$5(next, dArr, tuple2);
                    return BoxedUnit.UNIT;
                });
            }
            Tuple2[] tuple2Arr = (Tuple2[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(this.regexList())).zip(Predef$.MODULE$.wrapDoubleArray(dArr), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)));
            if (i == 0) {
                throw new LocalInferrerException("could not read any line from file", LocalInferrerException$.MODULE$.$lessinit$greater$default$2());
            }
            Tuple2 tuple22 = (Tuple2) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(tuple2Arr)).maxBy(tuple23 -> {
                return BoxesRunTime.boxToDouble(tuple23._2$mcD$sp());
            }, Ordering$Double$.MODULE$);
            if (tuple22 == null) {
                throw new MatchError(tuple22);
            }
            Tuple2 tuple24 = new Tuple2((RegexToType) tuple22._1(), BoxesRunTime.boxToDouble(tuple22._2$mcD$sp()));
            RegexToType regexToType2 = (RegexToType) tuple24._1();
            return tuple24._2$mcD$sp() / ((double) i) < this.minMatch() ? new LinesInputFormatDescriptor(new SourceCollectionType(new SourceStringType(false), false), None$.MODULE$, false) : new LinesInputFormatDescriptor(new SourceCollectionType(new SourceRecordType(regexToType2.atts().toVector(), true), false), new Some(regexToType2.regex().regex()), textLineIterator.hasNext());
        });
    }

    private RegexToType[] regexList() {
        return this.regexList;
    }

    public static final /* synthetic */ void $anonfun$infer$5(String str, double[] dArr, Tuple2 tuple2) {
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        Matcher matcher = (Matcher) tuple2._1();
        int _2$mcI$sp = tuple2._2$mcI$sp();
        matcher.reset(str);
        if (!matcher.matches()) {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else {
            dArr[_2$mcI$sp] = dArr[_2$mcI$sp] + 1.0d;
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        }
    }

    public TextInferrer(SourceContext sourceContext) {
        this.sourceContext = sourceContext;
        StrictLogging.$init$(this);
        InferrerErrorHandler.$init$(this);
        EncodingInferrer.$init$(this);
        this.minMatch = 0.95d;
        this.defaultSampleSize = settings().getInt(TextInferrer$.MODULE$.raw$inferrer$local$text$TextInferrer$$TEXT_SAMPLE_SIZE());
        this.regexList = new RegexToType[]{new RegexToType(new StringOps(Predef$.MODULE$.augmentString("([-\\w\\d\\.]+) - - \\[(.*)\\] \"(\\w+)\\s+([^\\s]+) ([^\"]+)\"\\s+(\\d+)\\s+(\\d+)")).r(), new $colon.colon(new SourceAttrType("hostname", new SourceStringType(false)), new $colon.colon(new SourceAttrType("timestamp", new SourceStringType(false)), new $colon.colon(new SourceAttrType("method", new SourceStringType(false)), new $colon.colon(new SourceAttrType("url", new SourceStringType(false)), new $colon.colon(new SourceAttrType("version", new SourceStringType(false)), new $colon.colon(new SourceAttrType("returned", new SourceIntType(false)), new $colon.colon(new SourceAttrType("size", new SourceIntType(false)), Nil$.MODULE$)))))))), new RegexToType(new StringOps(Predef$.MODULE$.augmentString("^(\\d{2}/\\d{2}/\\d{4} \\d{2}:\\d{2}:\\d{2})\\s*(.*)")).r(), new $colon.colon(new SourceAttrType("timestamp", new SourceStringType(false)), new $colon.colon(new SourceAttrType("message", new SourceStringType(false)), Nil$.MODULE$))), new RegexToType(new StringOps(Predef$.MODULE$.augmentString("^(\\d{2}/\\d{2}/\\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d+)\\s*(.*)")).r(), new $colon.colon(new SourceAttrType("timestamp", new SourceStringType(false)), new $colon.colon(new SourceAttrType("message", new SourceStringType(false)), Nil$.MODULE$))), new RegexToType(new StringOps(Predef$.MODULE$.augmentString("^\\[([^\\[\\]]+)\\]\\s+\\[?(\\w+)\\]?:?\\s+(.*)")).r(), new $colon.colon(new SourceAttrType("timestamp", new SourceStringType(false)), new $colon.colon(new SourceAttrType("level", new SourceStringType(false)), new $colon.colon(new SourceAttrType("message", new SourceStringType(false)), Nil$.MODULE$)))), new RegexToType(new StringOps(Predef$.MODULE$.augmentString("^(\\w+\\s+\\d\\d?\\s+\\d{2}:\\d{2}:\\d{2}\\.?\\d*)\\s+(.*)")).r(), new $colon.colon(new SourceAttrType("timestamp", new SourceStringType(false)), new $colon.colon(new SourceAttrType("message", new SourceStringType(false)), Nil$.MODULE$))), new RegexToType(new StringOps(Predef$.MODULE$.augmentString("^(\\d{2}:\\d{2}:\\d{2}\\.\\d+)\\s+\\[([-\\w.$#]+)\\]\\s+(\\w+)\\s+([-\\w.$#]+)\\s*-?\\s*(.*)")).r(), new $colon.colon(new SourceAttrType("timestamp", new SourceStringType(false)), new $colon.colon(new SourceAttrType("logger", new SourceStringType(false)), new $colon.colon(new SourceAttrType("level", new SourceStringType(false)), new $colon.colon(new SourceAttrType("class", new SourceStringType(false)), new $colon.colon(new SourceAttrType("message", new SourceStringType(false)), Nil$.MODULE$))))))};
    }
}
