package raw.inferrer.local;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import com.typesafe.scalalogging.StrictLogging;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.input.BOMInputStream;
import raw.sources.api.Encoding;
import raw.sources.api.ISO_8859_1;
import raw.sources.api.ISO_8859_2;
import raw.sources.api.ISO_8859_9;
import raw.sources.api.SourceContext;
import raw.sources.api.UTF_16;
import raw.sources.api.UTF_16BE;
import raw.sources.api.UTF_16LE;
import raw.sources.api.UTF_8;
import raw.sources.api.WINDOWS_1252;
import raw.sources.bytestream.api.SeekableInputStream;
import raw.utils.RawSettings;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Some;
import scala.Tuple2;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: EncodingInferrer.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005eq!\u0002\b\u0010\u0011\u00031b!\u0002\r\u0010\u0011\u0003I\u0002\"\u0002\u0011\u0002\t\u0003\t\u0003b\u0002\u0012\u0002\u0005\u0004%Ia\t\u0005\u0007Y\u0005\u0001\u000b\u0011\u0002\u0013\u0007\u0011ay\u0001\u0013aA\u0001#5BQ\u0001O\u0003\u0005\u0002eBQ!P\u0003\u0007\u0012yBqaR\u0003C\u0002\u0013E\u0001\nC\u0004P\u000b\t\u0007I\u0011\u0002)\t\u000bQ+A\u0011C+\t\u000b),A\u0011C6\t\rU,A\u0011A\tw\u0011\u0015qX\u0001\"\u0003��\u0003A)enY8eS:<\u0017J\u001c4feJ,'O\u0003\u0002\u0011#\u0005)An\\2bY*\u0011!cE\u0001\tS:4WM\u001d:fe*\tA#A\u0002sC^\u001c\u0001\u0001\u0005\u0002\u0018\u00035\tqB\u0001\tF]\u000e|G-\u001b8h\u0013:4WM\u001d:feN\u0011\u0011A\u0007\t\u00037yi\u0011\u0001\b\u0006\u0002;\u0005)1oY1mC&\u0011q\u0004\b\u0002\u0007\u0003:L(+\u001a4\u0002\rqJg.\u001b;?)\u00051\u0012\u0001H#O\u0007>#\u0015JT$`\t\u0016#Vi\u0011+J\u001f:{&+R!E?NK%,R\u000b\u0002IA\u0011QEK\u0007\u0002M)\u0011q\u0005K\u0001\u0005Y\u0006twMC\u0001*\u0003\u0011Q\u0017M^1\n\u0005-2#AB*ue&tw-A\u000fF\u001d\u000e{E)\u0013(H?\u0012+E+R\"U\u0013>suLU#B\t~\u001b\u0016JW#!'\r)!D\f\t\u0003_Yj\u0011\u0001\r\u0006\u0003cI\nAb]2bY\u0006dwnZ4j]\u001eT!a\r\u001b\u0002\u0011QL\b/Z:bM\u0016T\u0011!N\u0001\u0004G>l\u0017BA\u001c1\u00055\u0019FO]5di2{wmZ5oO\u00061A%\u001b8ji\u0012\"\u0012A\u000f\t\u00037mJ!\u0001\u0010\u000f\u0003\tUs\u0017\u000e^\u0001\u000eg>,(oY3D_:$X\r\u001f;\u0016\u0003}\u0002\"\u0001Q#\u000e\u0003\u0005S!AQ\"\u0002\u0007\u0005\u0004\u0018N\u0003\u0002E'\u000591o\\;sG\u0016\u001c\u0018B\u0001$B\u00055\u0019v.\u001e:dK\u000e{g\u000e^3yi\u0006A1/\u001a;uS:<7/F\u0001J!\tQU*D\u0001L\u0015\ta5#A\u0003vi&d7/\u0003\u0002O\u0017\nY!+Y<TKR$\u0018N\\4t\u0003e)gnY8eS:<G)\u001a;fGRLwN\u001c*fC\u0012\u001c\u0016N_3\u0016\u0003E\u0003\"a\u0007*\n\u0005Mc\"\u0001\u0002'p]\u001e\f\u0011bZ3u%\u0016\fG-\u001a:\u0015\u0007YcV\r\u0005\u0002X56\t\u0001L\u0003\u0002ZQ\u0005\u0011\u0011n\\\u0005\u00037b\u0013aAU3bI\u0016\u0014\b\"B/\u000b\u0001\u0004q\u0016AA5t!\ty6-D\u0001a\u0015\t\u0011\u0015M\u0003\u0002c\u0007\u0006Q!-\u001f;fgR\u0014X-Y7\n\u0005\u0011\u0004'aE*fK.\f'\r\\3J]B,Ho\u0015;sK\u0006l\u0007\"\u00024\u000b\u0001\u00049\u0017\u0001C3oG>$\u0017N\\4\u0011\u0005\u0001C\u0017BA5B\u0005!)enY8eS:<\u0017!D4fiR+\u0007\u0010\u001e\"vM\u001a,'\u000fF\u0002m_B\u0004\"aF7\n\u00059|!A\u0003+fqR\u0014UO\u001a4fe\")Ql\u0003a\u0001=\")\u0011o\u0003a\u0001e\u0006iQ.Y=cK\u0016s7m\u001c3j]\u001e\u00042aG:h\u0013\t!HD\u0001\u0004PaRLwN\\\u0001\u000eOV,7o]#oG>$\u0017N\\4\u0015\u0005]l\b\u0003B\u000eyOjL!!\u001f\u000f\u0003\rQ+\b\u000f\\33!\tY20\u0003\u0002}9\t\u0019\u0011J\u001c;\t\u000buc\u0001\u0019\u00010\u0002\u001b\u001d,GOQ=uKN\u000bW\u000e\u001d7f)\u0019\t\t!!\u0004\u0002\u0016A)1$a\u0001\u0002\b%\u0019\u0011Q\u0001\u000f\u0003\u000b\u0005\u0013(/Y=\u0011\u0007m\tI!C\u0002\u0002\fq\u0011AAQ=uK\"1Q,\u0004a\u0001\u0003\u001f\u00012aVA\t\u0013\r\t\u0019\u0002\u0017\u0002\f\u0013:\u0004X\u000f^*ue\u0016\fW\u000e\u0003\u0004\u0002\u00185\u0001\rA_\u0001\u000bg\u0006l\u0007\u000f\\3TSj,\u0007")
/* loaded from: input_file:raw/inferrer/local/EncodingInferrer.class */
public interface EncodingInferrer extends StrictLogging {
    void raw$inferrer$local$EncodingInferrer$_setter_$settings_$eq(RawSettings rawSettings);

    void raw$inferrer$local$EncodingInferrer$_setter_$raw$inferrer$local$EncodingInferrer$$encodingDetectionReadSize_$eq(long j);

    SourceContext sourceContext();

    RawSettings settings();

    long raw$inferrer$local$EncodingInferrer$$encodingDetectionReadSize();

    /* JADX WARN: Multi-variable type inference failed */
    default Reader getReader(SeekableInputStream seekableInputStream, Encoding encoding) {
        return new InputStreamReader((InputStream) (encoding instanceof UTF_8 ? new BOMInputStream((InputStream) seekableInputStream, false, new ByteOrderMark[]{ByteOrderMark.UTF_8}) : encoding instanceof UTF_16LE ? new BOMInputStream((InputStream) seekableInputStream, false, new ByteOrderMark[]{ByteOrderMark.UTF_16LE}) : encoding instanceof UTF_16BE ? new BOMInputStream((InputStream) seekableInputStream, false, new ByteOrderMark[]{ByteOrderMark.UTF_16BE}) : (InputStream) seekableInputStream), encoding.charset());
    }

    default TextBuffer getTextBuffer(SeekableInputStream seekableInputStream, Option<Encoding> option) {
        Tuple2 tuple2;
        if (None$.MODULE$.equals(option)) {
            Tuple2<Encoding, Object> guessEncoding = guessEncoding(seekableInputStream);
            if (guessEncoding == null) {
                throw new MatchError(guessEncoding);
            }
            Tuple2 tuple22 = new Tuple2((Encoding) guessEncoding._1(), BoxesRunTime.boxToInteger(guessEncoding._2$mcI$sp()));
            Encoding encoding = (Encoding) tuple22._1();
            int _2$mcI$sp = tuple22._2$mcI$sp();
            seekableInputStream.seek(0L);
            tuple2 = new Tuple2(encoding, BoxesRunTime.boxToInteger(_2$mcI$sp));
        } else {
            if (!(option instanceof Some)) {
                throw new MatchError(option);
            }
            tuple2 = new Tuple2((Encoding) ((Some) option).value(), BoxesRunTime.boxToInteger(0));
        }
        Tuple2 tuple23 = tuple2;
        if (tuple23 == null) {
            throw new MatchError(tuple23);
        }
        Tuple2 tuple24 = new Tuple2((Encoding) tuple23._1(), BoxesRunTime.boxToInteger(tuple23._2$mcI$sp()));
        Encoding encoding2 = (Encoding) tuple24._1();
        return new TextBuffer(getReader(seekableInputStream, encoding2), encoding2, tuple24._2$mcI$sp());
    }

    /* JADX WARN: Multi-variable type inference failed */
    default Tuple2<Encoding, Object> guessEncoding(SeekableInputStream seekableInputStream) {
        Object windows_1252;
        byte[] byteSample = getByteSample((InputStream) seekableInputStream, (int) raw$inferrer$local$EncodingInferrer$$encodingDetectionReadSize());
        CharsetDetector charsetDetector = new CharsetDetector();
        charsetDetector.setText(byteSample);
        charsetDetector.setDeclaredEncoding("utf-8");
        CharsetMatch detect = charsetDetector.detect();
        if (detect.getConfidence() < 10) {
            throw new LocalInferrerException(new StringBuilder(76).append("could not detect encoding: detected charset ").append(detect.getName()).append(" with confidence ").append(detect.getConfidence()).append(" (less than 10)").toString(), LocalInferrerException$.MODULE$.$lessinit$greater$default$2());
        }
        if (detect.getConfidence() >= 50) {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else if (logger().underlying().isDebugEnabled()) {
            logger().underlying().debug("Charset detection {} with low confidence: {}", new Object[]{detect.getName(), BoxesRunTime.boxToInteger(detect.getConfidence())});
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        } else {
            BoxedUnit boxedUnit3 = BoxedUnit.UNIT;
        }
        String lowerCase = detect.getName().toLowerCase();
        if ("utf-8".equals(lowerCase)) {
            windows_1252 = new UTF_8();
        } else if ("utf-16be".equals(lowerCase)) {
            windows_1252 = new UTF_16BE();
        } else if ("utf-16le".equals(lowerCase)) {
            windows_1252 = new UTF_16LE();
        } else if ("utf-16".equals(lowerCase)) {
            windows_1252 = new UTF_16();
        } else if ("iso-8859-1".equals(lowerCase)) {
            windows_1252 = new ISO_8859_1();
        } else if ("iso-8859-2".equals(lowerCase)) {
            windows_1252 = new ISO_8859_2();
        } else if ("iso-8859-9".equals(lowerCase)) {
            windows_1252 = new ISO_8859_9();
        } else {
            if (!"windows-1252".equals(lowerCase)) {
                throw new LocalInferrerException(new StringBuilder(21).append("unsupported charset: ").append(lowerCase).toString(), LocalInferrerException$.MODULE$.$lessinit$greater$default$2());
            }
            windows_1252 = new WINDOWS_1252();
        }
        return new Tuple2<>(windows_1252, BoxesRunTime.boxToInteger(detect.getConfidence()));
    }

    private default byte[] getByteSample(InputStream inputStream, int i) {
        byte[] bArr = new byte[i];
        int i2 = 0;
        boolean z = false;
        while (i2 < i && !z) {
            int read = inputStream.read(bArr, i2, i - i2);
            if (read < 0) {
                z = true;
            } else {
                i2 += read;
            }
        }
        if (i2 == 0) {
            throw new LocalInferrerException("input stream appears to be empty", LocalInferrerException$.MODULE$.$lessinit$greater$default$2());
        }
        if (i2 >= i) {
            return bArr;
        }
        byte[] bArr2 = new byte[i2];
        System.arraycopy(bArr, 0, bArr2, 0, i2);
        return bArr2;
    }

    static void $init$(EncodingInferrer encodingInferrer) {
        encodingInferrer.raw$inferrer$local$EncodingInferrer$_setter_$settings_$eq(encodingInferrer.sourceContext().settings());
        encodingInferrer.raw$inferrer$local$EncodingInferrer$_setter_$raw$inferrer$local$EncodingInferrer$$encodingDetectionReadSize_$eq(encodingInferrer.settings().getBytes(EncodingInferrer$.MODULE$.raw$inferrer$local$EncodingInferrer$$ENCODING_DETECTION_READ_SIZE()));
    }
}
