package ai.topandrey15.reinforcemc.core;

import ai.topandrey15.reinforcemc.ReinforceMC;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

/* loaded from: input_file:ai/topandrey15/reinforcemc/core/ExperienceBuffer.class */
public class ExperienceBuffer {
    private static final int DEFAULT_BUFFER_SIZE = 5000;
    private static final int MIN_BUFFER_SIZE_FOR_TRAINING = 16;
    private final Experience[] buffer;
    private final int bufferSize;
    private int currentIndex;
    private int currentSize;
    private boolean bufferFull;
    private final Random random;
    private long totalExperiences;
    private float totalReward;
    private int positiveRewards;
    private int negativeRewards;

    /* loaded from: input_file:ai/topandrey15/reinforcemc/core/ExperienceBuffer$BufferStats.class */
    public static class BufferStats {
        public int bufferSize;
        public int currentSize;
        public long totalExperiences;
        public float averageReward;
        public int positiveRewards;
        public int negativeRewards;
        public float utilization;

        public String toString() {
            return String.format("BufferStats{size=%d/%d (%.1f%%), total=%d, avgReward=%.3f, pos/neg=%d/%d}", Integer.valueOf(this.currentSize), Integer.valueOf(this.bufferSize), Float.valueOf(this.utilization * 100.0f), Long.valueOf(this.totalExperiences), Float.valueOf(this.averageReward), Integer.valueOf(this.positiveRewards), Integer.valueOf(this.negativeRewards));
        }
    }

    /* loaded from: input_file:ai/topandrey15/reinforcemc/core/ExperienceBuffer$Experience.class */
    public static class Experience {
        public final float[] state;
        public final int action;
        public final float reward;
        public final float[] nextState;
        public final boolean done;
        public final long timestamp;

        public Experience(float[] fArr, int i, float f, float[] fArr2, boolean z) {
            this.state = (float[]) fArr.clone();
            this.action = i;
            this.reward = f;
            this.nextState = fArr2 != null ? (float[]) fArr2.clone() : null;
            this.done = z;
            this.timestamp = System.currentTimeMillis();
        }

        public String toString() {
            return String.format("Experience{action=%d, reward=%.3f, done=%s}", Integer.valueOf(this.action), Float.valueOf(this.reward), Boolean.valueOf(this.done));
        }
    }

    public ExperienceBuffer() {
        this(DEFAULT_BUFFER_SIZE);
    }

    public ExperienceBuffer(int i) {
        this.currentIndex = 0;
        this.currentSize = 0;
        this.bufferFull = false;
        this.totalExperiences = 0L;
        this.totalReward = 0.0f;
        this.positiveRewards = 0;
        this.negativeRewards = 0;
        this.bufferSize = Math.max(i, MIN_BUFFER_SIZE_FOR_TRAINING);
        this.buffer = new Experience[this.bufferSize];
        this.random = new Random();
        ReinforceMC.LOGGER.info("ExperienceBuffer initialized with size: {}", Integer.valueOf(this.bufferSize));
    }

    public synchronized void addExperience(float[] fArr, int i, float f, float[] fArr2, boolean z) {
        if (fArr == null) {
            ReinforceMC.LOGGER.warn("Attempted to add experience with null state");
            return;
        }
        Experience experience = new Experience(fArr, i, f, fArr2, z);
        this.buffer[this.currentIndex] = experience;
        this.currentIndex = (this.currentIndex + 1) % this.bufferSize;
        if (!this.bufferFull) {
            this.currentSize++;
            if (this.currentSize >= this.bufferSize) {
                this.bufferFull = true;
            }
        }
        this.totalExperiences++;
        this.totalReward += f;
        if (f > 0.0f) {
            this.positiveRewards++;
        } else if (f < 0.0f) {
            this.negativeRewards++;
        }
        if (Math.abs(f) > 0.1f) {
            ReinforceMC.LOGGER.debug("Significant experience added: {}", experience);
        }
    }

    public synchronized List<Experience> sampleBatch(int i) {
        int nextInt;
        if (this.currentSize < MIN_BUFFER_SIZE_FOR_TRAINING) {
            return null;
        }
        int min = Math.min(i, this.currentSize);
        ArrayList arrayList = new ArrayList(min);
        boolean[] zArr = new boolean[this.currentSize];
        for (int i2 = 0; i2 < min; i2++) {
            do {
                nextInt = this.random.nextInt(this.currentSize);
            } while (zArr[nextInt]);
            zArr[nextInt] = true;
            arrayList.add(this.buffer[this.bufferFull ? (this.currentIndex + nextInt) % this.bufferSize : nextInt]);
        }
        return arrayList;
    }

    public synchronized List<Experience> samplePrioritizedBatch(int i) {
        int i2 = this.currentSize;
        if (i2 < MIN_BUFFER_SIZE_FOR_TRAINING) {
            return null;
        }
        int min = Math.min(i, i2);
        if (min <= 0) {
            ReinforceMC.LOGGER.warn("RACE CONDITION PREVENTED: actualBatchSize={}, returning null to prevent batch_size=0", Integer.valueOf(min));
            return null;
        }
        ArrayList arrayList = new ArrayList(min);
        float[] fArr = new float[this.currentSize];
        float f = 0.0f;
        for (int i3 = 0; i3 < this.currentSize; i3++) {
            fArr[i3] = Math.abs(this.buffer[this.bufferFull ? (this.currentIndex + i3) % this.bufferSize : i3].reward) + 0.01f;
            f += fArr[i3];
        }
        for (int i4 = 0; i4 < this.currentSize; i4++) {
            int i5 = i4;
            fArr[i5] = fArr[i5] / f;
        }
        boolean[] zArr = new boolean[this.currentSize];
        for (int i6 = 0; i6 < min; i6++) {
            int selectWeightedRandom = selectWeightedRandom(fArr, zArr);
            zArr[selectWeightedRandom] = true;
            arrayList.add(this.buffer[this.bufferFull ? (this.currentIndex + selectWeightedRandom) % this.bufferSize : selectWeightedRandom]);
        }
        return arrayList;
    }

    private int selectWeightedRandom(float[] fArr, boolean[] zArr) {
        float f = 0.0f;
        for (int i = 0; i < fArr.length; i++) {
            if (!zArr[i]) {
                f += fArr[i];
            }
        }
        float nextFloat = this.random.nextFloat() * f;
        float f2 = 0.0f;
        for (int i2 = 0; i2 < fArr.length; i2++) {
            if (!zArr[i2]) {
                f2 += fArr[i2];
                if (f2 >= nextFloat) {
                    return i2;
                }
            }
        }
        for (int i3 = 0; i3 < fArr.length; i3++) {
            if (!zArr[i3]) {
                return i3;
            }
        }
        return 0;
    }

    public synchronized List<Experience> getRecentExperiences(int i) {
        if (this.currentSize == 0) {
            return new ArrayList();
        }
        int min = Math.min(i, this.currentSize);
        ArrayList arrayList = new ArrayList(min);
        for (int i2 = 0; i2 < min; i2++) {
            int i3 = (((this.currentIndex - 1) - i2) + this.bufferSize) % this.bufferSize;
            if (this.buffer[i3] != null) {
                arrayList.add(this.buffer[i3]);
            }
        }
        return arrayList;
    }

    public synchronized void clear() {
        for (int i = 0; i < this.bufferSize; i++) {
            this.buffer[i] = null;
        }
        this.currentIndex = 0;
        this.currentSize = 0;
        this.bufferFull = false;
        this.totalExperiences = 0L;
        this.totalReward = 0.0f;
        this.positiveRewards = 0;
        this.negativeRewards = 0;
        ReinforceMC.LOGGER.info("ExperienceBuffer cleared");
    }

    public synchronized boolean canTrain() {
        return this.currentSize >= MIN_BUFFER_SIZE_FOR_TRAINING;
    }

    public synchronized BufferStats getStats() {
        BufferStats bufferStats = new BufferStats();
        bufferStats.bufferSize = this.bufferSize;
        bufferStats.currentSize = this.currentSize;
        bufferStats.totalExperiences = this.totalExperiences;
        bufferStats.averageReward = this.totalExperiences > 0 ? this.totalReward / ((float) this.totalExperiences) : 0.0f;
        bufferStats.positiveRewards = this.positiveRewards;
        bufferStats.negativeRewards = this.negativeRewards;
        bufferStats.utilization = this.currentSize / this.bufferSize;
        return bufferStats;
    }

    public synchronized float getRecentRewardTrend(int i) {
        if (this.currentSize == 0) {
            return 0.0f;
        }
        int min = Math.min(i, this.currentSize);
        float f = 0.0f;
        for (int i2 = 0; i2 < min; i2++) {
            int i3 = (((this.currentIndex - 1) - i2) + this.bufferSize) % this.bufferSize;
            if (this.buffer[i3] != null) {
                f += this.buffer[i3].reward;
            }
        }
        if (min > 0) {
            return f / min;
        }
        return 0.0f;
    }

    public synchronized int getCurrentSize() {
        return this.currentSize;
    }

    public int getBufferSize() {
        return this.bufferSize;
    }

    public synchronized boolean isFull() {
        return this.bufferFull;
    }
}
