package ai.topandrey15.reinforcemc.core;

import ai.topandrey15.reinforcemc.ReinforceMC;
import ai.topandrey15.reinforcemc.action.ActionExecutor;
import ai.topandrey15.reinforcemc.reward.RewardCalculator;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import net.minecraft.client.Minecraft;

/* loaded from: input_file:ai/topandrey15/reinforcemc/core/RLEpisodeManager.class */
public class RLEpisodeManager {
    private final ActionExecutor actionExecutor;
    private final RewardCalculator rewardCalculator;
    private static final int MAX_EPISODE_STEPS = 750;
    private final AtomicReference<float[]> previousState = new AtomicReference<>(null);
    private final AtomicInteger lastAction = new AtomicInteger(-1);
    private final AtomicReference<Float> lastReward = new AtomicReference<>(Float.valueOf(0.0f));
    private final AtomicInteger stepCount = new AtomicInteger(0);
    private final AtomicBoolean episodeStarted = new AtomicBoolean(false);
    private final AtomicInteger episodeSteps = new AtomicInteger(0);
    private final AtomicLong totalSteps = new AtomicLong(0);
    private final AtomicReference<Float> totalReward = new AtomicReference<>(Float.valueOf(0.0f));
    private final AtomicLong sessionStartTime = new AtomicLong(0);
    private final Map<String, Integer> actionUsageStats = new HashMap();
    private final Random diversityRandom = new Random();
    private final AtomicInteger statsLogCounter = new AtomicInteger(0);

    /* loaded from: input_file:ai/topandrey15/reinforcemc/core/RLEpisodeManager$TrainingStats.class */
    public static class TrainingStats {
        public boolean isTraining = false;
        public boolean isRunning = false;
        public int currentSteps = 0;
        public long totalSteps = 0;
        public float totalReward = 0.0f;
        public float averageReward = 0.0f;
        public int modelTrainingSteps = 0;
        public float currentEpsilon = 1.0f;
        public int episodeSteps = 0;
        public long sessionDuration = 0;
        public boolean heavyOperationInProgress = false;
        public int availableActions = 0;

        public String toString() {
            return String.format("TrainingStats{training=%s, running=%s, steps=%d/%d, reward=%.2f/%.3f, ε=%.3f, episode=%d, duration=%ds, heavy=%s, actions=%d}", Boolean.valueOf(this.isTraining), Boolean.valueOf(this.isRunning), Integer.valueOf(this.currentSteps), Long.valueOf(this.totalSteps), Float.valueOf(this.totalReward), Float.valueOf(this.averageReward), Float.valueOf(this.currentEpsilon), Integer.valueOf(this.episodeSteps), Long.valueOf(this.sessionDuration / 1000), Boolean.valueOf(this.heavyOperationInProgress), Integer.valueOf(this.availableActions));
        }
    }

    public RLEpisodeManager(ActionExecutor actionExecutor, RewardCalculator rewardCalculator) {
        this.actionExecutor = actionExecutor;
        this.rewardCalculator = rewardCalculator;
        ReinforceMC.LOGGER.info("RLEpisodeManager initialized with episode and statistics management");
    }

    public float getActionIntensity(int i, float[] fArr) {
        if (i < 0 || i >= fArr.length) {
            return 0.0f;
        }
        String actionName = this.actionExecutor.getActionName(i);
        float min = Math.min(1.0f, fArr[i] + (this.diversityRandom.nextFloat() * 0.1f));
        if (actionName.startsWith("LOOK_")) {
            return Math.min(1.0f, Math.max(0.05f, min * 0.8f));
        }
        if (min > 0.25f) {
            return Math.min(1.0f, 0.6f + ((min - 0.25f) * (0.4f / (1.0f - 0.25f))));
        }
        return 0.0f;
    }

    private void updateActionStats(String str) {
        synchronized (this.actionUsageStats) {
            this.actionUsageStats.put(str, Integer.valueOf(this.actionUsageStats.getOrDefault(str, 0).intValue() + 1));
        }
        if (this.statsLogCounter.incrementAndGet() % 100 == 0) {
            logActionDiversityStats();
        }
    }

    public void logActionDiversityStats() {
        synchronized (this.actionUsageStats) {
            if (this.actionUsageStats.isEmpty()) {
                return;
            }
            int sum = this.actionUsageStats.values().stream().mapToInt((v0) -> {
                return v0.intValue();
            }).sum();
            if (sum == 0) {
                return;
            }
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            for (Map.Entry<String, Integer> entry : this.actionUsageStats.entrySet()) {
                String key = entry.getKey();
                int intValue = entry.getValue().intValue();
                if (key.startsWith("LOOK_")) {
                    i += intValue;
                } else if (key.equals("LMB") || key.equals("RMB")) {
                    i3 += intValue;
                } else {
                    i2 += intValue;
                }
            }
            float f = (i * 100.0f) / sum;
            ReinforceMC.LOGGER.info("Action Diversity Stats - Look: {:.1f}%, Keys: {:.1f}%, Mouse: {:.1f}% (Total: {})", Float.valueOf(f), Float.valueOf((i2 * 100.0f) / sum), Float.valueOf((i3 * 100.0f) / sum), Integer.valueOf(sum));
            if (f > 70.0f) {
                ReinforceMC.LOGGER.warn("Action imbalance detected: Camera movement dominates ({:.1f}%)", Float.valueOf(f));
            } else if (f < 10.0f) {
                ReinforceMC.LOGGER.warn("Action imbalance detected: Too little camera movement ({:.1f}%)", Float.valueOf(f));
            }
        }
    }

    public boolean shouldEndEpisode(float f) {
        if (this.episodeSteps.get() >= MAX_EPISODE_STEPS) {
            ReinforceMC.LOGGER.debug("Episode ended: max steps reached ({})", Integer.valueOf(this.episodeSteps.get()));
            return true;
        }
        if (f < -50.0f) {
            ReinforceMC.LOGGER.debug("Episode ended: large negative reward ({:.1f})", Float.valueOf(f));
            return true;
        }
        if (f <= 100.0f) {
            return false;
        }
        ReinforceMC.LOGGER.debug("Episode ended: large positive reward ({:.1f})", Float.valueOf(f));
        return true;
    }

    public void resetEpisode() {
        this.episodeSteps.set(0);
        this.episodeStarted.set(false);
        this.previousState.set(null);
        Minecraft.func_71410_x().execute(() -> {
            this.actionExecutor.stopAllActions();
        });
        this.rewardCalculator.resetAccumulatedReward();
        this.lastReward.set(Float.valueOf(0.0f));
        this.totalReward.set(Float.valueOf(0.0f));
        ReinforceMC.LOGGER.debug("Episode reset - starting fresh with enhanced action diversity");
    }

    public void resetSession() {
        this.stepCount.set(0);
        this.totalSteps.set(0L);
        this.totalReward.set(Float.valueOf(0.0f));
        this.sessionStartTime.set(System.currentTimeMillis());
        synchronized (this.actionUsageStats) {
            this.actionUsageStats.clear();
        }
        resetEpisode();
        ReinforceMC.LOGGER.info("Episode session and episode reset with action stats cleared");
    }

    public void startEpisode(float f) {
        this.episodeStarted.set(true);
        this.lastReward.set(Float.valueOf(f));
        this.episodeSteps.set(0);
        ReinforceMC.LOGGER.info("Started new RL episode with enhanced action diversity");
    }

    public void updateReward(float f, float f2) {
        this.totalReward.updateAndGet(f3 -> {
            return Float.valueOf(f3.floatValue() + f2);
        });
        this.lastReward.set(Float.valueOf(f));
    }

    public void incrementEpisodeSteps() {
        this.episodeSteps.incrementAndGet();
    }

    public void updateState(float[] fArr, int i, String str, float f) {
        this.previousState.set(fArr);
        this.lastAction.set(i);
        this.stepCount.incrementAndGet();
        this.totalSteps.incrementAndGet();
        updateActionStats(str);
    }

    public TrainingStats getStats() {
        TrainingStats trainingStats = new TrainingStats();
        trainingStats.currentSteps = this.stepCount.get();
        trainingStats.totalSteps = this.totalSteps.get();
        trainingStats.totalReward = this.totalReward.get().floatValue();
        trainingStats.episodeSteps = this.episodeSteps.get();
        trainingStats.availableActions = this.actionExecutor.getActionCount();
        long j = this.sessionStartTime.get();
        if (j > 0) {
            trainingStats.sessionDuration = System.currentTimeMillis() - j;
        }
        return trainingStats;
    }

    public Map<String, Integer> getActionDiversityStats() {
        HashMap hashMap;
        synchronized (this.actionUsageStats) {
            hashMap = new HashMap(this.actionUsageStats);
        }
        return hashMap;
    }

    public float[] getPreviousState() {
        return this.previousState.get();
    }

    public int getLastAction() {
        return this.lastAction.get();
    }

    public float getLastReward() {
        return this.lastReward.get().floatValue();
    }

    public boolean isEpisodeStarted() {
        return this.episodeStarted.get();
    }

    public int getCurrentSteps() {
        return this.stepCount.get();
    }

    public long getTotalSteps() {
        return this.totalSteps.get();
    }

    public float getTotalReward() {
        return this.totalReward.get().floatValue();
    }

    public long getSessionDuration() {
        long j = this.sessionStartTime.get();
        if (j > 0) {
            return System.currentTimeMillis() - j;
        }
        return 0L;
    }

    public int getEpisodeSteps() {
        return this.episodeSteps.get();
    }
}
