/*
 * Decompiled with CFR 0.152.
 */
package jrockit.rls;

import java.util.Random;
import jrockit.rls.Action;
import jrockit.rls.ActionSpace;
import jrockit.rls.NonRlsThread;
import jrockit.rls.RlsThread;
import jrockit.rls.State;
import jrockit.rls.StateSpace;
import jrockit.rls.VmAdapter;

public class Rls {
    private static final long SLEEP_TIME = 50L;
    private static final int MAX_NOOF_RLS = 2;
    private static Rls[] rlsArray = new Rls[2];
    private static int noofRls = 0;
    private boolean generalizing;
    private boolean useTraining;
    private boolean individualLearning;
    private boolean useRls;
    private double[] qArray;
    private int qSize;
    private double[] stateStatistics;
    private String name;
    private Action[] actions;
    private ActionSpace actionSpace;
    private StateSpace stateSpace;
    private State oldState;
    private Action oldAction;
    private double oldStateActionValue;
    private double bestStateActionValue;
    private double epsilon;
    private double gamma;
    private Random rand;
    private static double currentTimeStep;

    public Rls(boolean bl, boolean bl2, boolean bl3, boolean bl4, int n, int[] nArray, int[] nArray2, int n2, int[] nArray3, String string) {
        Rls.rlsArray[Rls.noofRls] = this;
        ++noofRls;
        this.useRls = bl;
        this.useTraining = bl2;
        this.generalizing = bl3;
        this.individualLearning = bl4;
        this.name = string;
        this.stateSpace = new StateSpace(n, nArray, nArray2);
        this.actionSpace = new ActionSpace(n2, nArray3);
        this.actions = this.actionSpace.getAvailableActions();
        this.init();
    }

    public void init() {
        int n = this.actionSpace.getNoofActions();
        this.qSize = this.generalizing ? this.stateSpace.getStateSize() + n * this.stateSpace.getCombinedFeatureStateSize() + n : n * this.stateSpace.getCombinedFeatureStateSize();
        this.qArray = new double[this.qSize];
        this.stateStatistics = new double[this.qSize];
        if (this.useTraining) {
            for (int i = 0; i < this.qSize; ++i) {
                this.qArray[i] = 0.0;
                this.stateStatistics[i] = 0.0;
            }
        } else {
            this.qArray = this.name.equals("GCRls") ? new double[0] : new double[0];
        }
        currentTimeStep = 0.0;
        this.gamma = 0.9;
        this.rand = new Random();
        VmAdapter.measure();
        this.oldState = this.initState();
        this.oldAction = this.getBestAction(this.oldState, this.actions);
        this.oldStateActionValue = this.bestStateActionValue;
    }

    public long getSleepTime() {
        return 50L;
    }

    public void increaseTimeStep() {
        currentTimeStep += 1.0;
    }

    public static double getCurrentTimeStep() {
        return currentTimeStep;
    }

    public void increaseLearning() {
        currentTimeStep = currentTimeStep % 2.0 == 0.0 ? (currentTimeStep /= 2.0) : (currentTimeStep - 1.0) / 2.0;
    }

    public double calculateLearningRate(double d) {
        if (this.individualLearning) {
            if (d > 5000.0) {
                return 0.0;
            }
            return 0.1 * Math.exp(-d / 3000.0);
        }
        if (d > 50000.0) {
            this.useTraining = false;
            return 0.0;
        }
        switch (this.stateSpace.getNoofStateFeatures()) {
            case 1: {
                return 0.1 * Math.exp(-d / 30000.0);
            }
            case 2: {
                return 0.1 * Math.exp(-d / 50000.0);
            }
            case 3: {
                return 0.1 * Math.exp(-d / 40000.0);
            }
        }
        System.err.println("The number of state features are too many for this learning rate function.");
        return 0.0;
    }

    public double calculateRandom(double d) {
        if (this.individualLearning) {
            if (d > 5000.0) {
                return 0.0;
            }
            return 0.4 * Math.exp(-d / 3000.0);
        }
        if (d > 50000.0) {
            this.useTraining = false;
            return 0.0;
        }
        switch (this.stateSpace.getNoofStateFeatures()) {
            case 1: {
                return 0.3 * Math.exp(-d / 5000.0);
            }
            case 2: {
                return 0.4 * Math.exp(-d / 7000.0);
            }
            case 3: {
                return 0.3 * Math.exp(-d / 10000.0);
            }
        }
        System.err.println("The number of state features are too many for this random function.");
        return 0.0;
    }

    public void reFactorQArray(int n, int[] nArray) {
        int n2 = -1;
        if (!this.generalizing) {
            for (int i = 0; i < this.actionSpace.getNoofActions(); ++i) {
                if (this.actions[i].getID() != n) continue;
                n2 = i;
            }
            if (n2 == -1) {
                System.err.println("Action " + n + " is not available.");
            } else {
                int n3 = this.stateSpace.getCombinedFeatureStateSize();
                int n4 = n2 * n3;
                int n5 = (this.actionSpace.getNoofActions() - n2 - 1) * n3;
                for (int i = 0; i < n5; ++i) {
                    this.qArray[n4] = this.qArray[n4 + n3];
                    this.stateStatistics[n4] = this.stateStatistics[n4 + n3];
                    ++n4;
                }
                this.qSize = n4;
            }
            this.changeActions(nArray);
        }
    }

    public void changeActions(int[] nArray) {
        this.actionSpace = new ActionSpace(nArray.length, nArray);
        this.actions = this.actionSpace.getAvailableActions();
    }

    public static void printAllRls() {
        for (int i = 0; i < noofRls; ++i) {
            System.out.println(rlsArray[i].getName());
            rlsArray[i].printRlsInfo();
            System.out.println(" ");
        }
    }

    public static void notifyGcEnded() {
        VmAdapter.notifyGcEnded();
    }

    public String getName() {
        return this.name;
    }

    public void printRlsInfo() {
        System.out.println("Total number of actions: " + this.actionSpace.getNoofActions());
        System.out.println("Time step: " + currentTimeStep);
        System.out.println("Learning rate: " + this.calculateLearningRate(currentTimeStep));
        System.out.println("Exploring rate: " + this.calculateRandom(currentTimeStep));
        if (this.useRls) {
            String string = "QArray = [";
            for (int i = 0; i < this.qSize; ++i) {
                string = string + " " + this.qArray[i];
            }
            string = string + " ]";
            System.out.println(string);
            System.out.println(" ");
        }
    }

    public State initState() {
        State state = new State(this.stateSpace);
        return VmAdapter.initState(state);
    }

    public Action[] getPossibleActions(State state) {
        this.actions = this.actionSpace.getAvailableActions();
        return this.actions;
    }

    public void sarsa() {
        double d = this.calculateReward(this.oldAction);
        State state = this.initState();
        Action[] actionArray = this.getPossibleActions(state);
        Action action = this.getBestAction(state, actionArray);
        double d2 = this.bestStateActionValue;
        if (this.useTraining) {
            this.enhanceQArray(this.oldAction, this.oldStateActionValue, d2, d);
        }
        this.oldState = state;
        this.oldAction = action;
        this.oldStateActionValue = d2;
        VmAdapter.perform(this.oldAction);
        currentTimeStep += 1.0;
    }

    public Action getBestAction(State state, Action[] actionArray) {
        double d = 0.0;
        this.bestStateActionValue = 0.0;
        int n = 0;
        int n2 = this.actionSpace.getNoofActions();
        Action action = new Action(0);
        for (int i = 0; i < n2; ++i) {
            Action action2 = actionArray[i];
            d = this.calculateStateActionValue(action2, state);
            if (Math.abs(d - this.bestStateActionValue) < 1.0E-5) {
                ++n;
                this.bestStateActionValue = d;
                action = action2;
                continue;
            }
            if (!(d > this.bestStateActionValue) && i != 0) continue;
            this.bestStateActionValue = d;
            action = action2;
        }
        double d2 = 0.0;
        if (this.useTraining) {
            d2 = this.individualLearning ? this.calculateRandom(this.epsilon) : this.calculateRandom(currentTimeStep);
        }
        double d3 = this.rand.nextDouble();
        if (this.useTraining && d3 < d2 || n == n2) {
            action = actionArray[this.rand.nextInt(n2)];
            this.bestStateActionValue = this.calculateStateActionValue(action, state);
        }
        return action;
    }

    public double calculateStateActionValue(Action action, State state) {
        int n;
        int n2 = -1;
        double d = 0.0;
        int n3 = 0;
        int n4 = 0;
        int n5 = 0;
        int n6 = this.stateSpace.getStateSize();
        long[] lArray = state.getStateFeatureValues();
        int[] nArray = this.stateSpace.getTiles();
        if (this.generalizing) {
            for (n = 0; n < n6; ++n) {
                if (lArray[n] != 1L) continue;
                d += this.qArray[n];
            }
        }
        switch (this.stateSpace.getNoofStateFeatures()) {
            case 3: {
                n5 = nArray[2];
            }
            case 2: {
                n4 = nArray[1];
            }
            case 1: {
                n3 = nArray[0];
                break;
            }
            default: {
                System.err.println("Error: too few or too many state features in Calculate best state action value in Rls");
            }
        }
        for (int i = 0; i < this.actionSpace.getNoofActions(); ++i) {
            if (this.actions[i].getID() != action.getID()) continue;
            n2 = i;
        }
        if (n2 == -1) {
            System.err.println("Action " + action.getID() + " is not available.");
        } else {
            n += n2 * this.stateSpace.getCombinedFeatureStateSize();
        }
        if (this.stateSpace.getNoofStateFeatures() == 1) {
            for (int i = 0; i < n3; ++i) {
                if (lArray[i] == 1L) {
                    if (this.individualLearning) {
                        this.epsilon = this.stateStatistics[n];
                    }
                    d += this.qArray[n];
                }
                ++n;
            }
        } else {
            for (int i = 0; i < n4; ++i) {
                if (lArray[i] == 1L) {
                    for (int j = 0; j < n3; ++j) {
                        if (lArray[n4 + j] == 1L) {
                            if (this.individualLearning) {
                                this.epsilon = this.stateStatistics[n];
                            }
                            d += this.qArray[n];
                        }
                        ++n;
                    }
                    continue;
                }
                n += n3;
            }
        }
        if (this.generalizing) {
            n += (this.actionSpace.getNoofActions() - n2 - 1) * this.stateSpace.getCombinedFeatureStateSize();
            d += this.qArray[n += n2];
        }
        return d;
    }

    public double calculateReward(Action action) {
        return VmAdapter.calculateReward(action);
    }

    public void enhanceQArray(Action action, double d, double d2, double d3) {
        int n;
        int n2 = -1;
        int n3 = 0;
        int n4 = 0;
        int n5 = 0;
        int n6 = this.stateSpace.getStateSize();
        int n7 = this.actionSpace.getNoofActions();
        long[] lArray = this.oldState.getStateFeatureValues();
        double d4 = Math.abs(d3 + this.gamma * d2 - d) < 1.0E-6 ? 0.0 : d3 + this.gamma * d2 - d;
        double d5 = this.calculateLearningRate(currentTimeStep);
        if (this.generalizing) {
            for (n = 0; n < n6; ++n) {
                if (lArray[n] != 1L) continue;
                int n8 = n;
                this.stateStatistics[n8] = this.stateStatistics[n8] + 1.0;
                int n9 = n;
                this.qArray[n9] = this.qArray[n9] + d5 * d4;
            }
        }
        int[] nArray = this.stateSpace.getTiles();
        switch (this.stateSpace.getNoofStateFeatures()) {
            case 3: {
                n5 = nArray[2];
            }
            case 2: {
                n4 = nArray[1];
            }
            case 1: {
                n3 = nArray[0];
                break;
            }
            default: {
                System.err.println("Error: too few or too many state features in calculateBestStateActionValue in Rls");
            }
        }
        for (int i = 0; i < this.actionSpace.getNoofActions(); ++i) {
            if (this.actions[i].getID() != action.getID()) continue;
            n2 = i;
        }
        if (n2 == -1) {
            System.err.println("Action " + action.getID() + " is not available.");
        } else {
            n += n2 * this.stateSpace.getCombinedFeatureStateSize();
        }
        if (this.stateSpace.getNoofStateFeatures() == 1) {
            for (int i = 0; i < n3; ++i) {
                if (lArray[i] == 1L) {
                    int n10 = n;
                    this.stateStatistics[n10] = this.stateStatistics[n10] + 1.0;
                    if (this.individualLearning) {
                        int n11 = n;
                        this.qArray[n11] = this.qArray[n11] + this.calculateLearningRate(this.stateStatistics[n]) * d4;
                    } else {
                        int n12 = n;
                        this.qArray[n12] = this.qArray[n12] + d5 * d4;
                    }
                }
                ++n;
            }
        } else {
            for (int i = 0; i < n4; ++i) {
                if (lArray[i] == 1L) {
                    for (int j = 0; j < n3; ++j) {
                        if (lArray[n4 + j] == 1L) {
                            int n13 = n;
                            this.stateStatistics[n13] = this.stateStatistics[n13] + 1.0;
                            if (this.individualLearning) {
                                int n14 = n;
                                this.qArray[n14] = this.qArray[n14] + this.calculateLearningRate(this.stateStatistics[n]) * d4;
                            } else {
                                int n15 = n;
                                this.qArray[n15] = this.qArray[n15] + d5 * d4;
                            }
                        }
                        ++n;
                    }
                    continue;
                }
                n += n3;
            }
        }
        if (this.generalizing) {
            n += (n7 - n2 - 1) * this.stateSpace.getCombinedFeatureStateSize();
            int n16 = n += n2;
            this.stateStatistics[n16] = this.stateStatistics[n16] + 1.0;
            int n17 = n;
            this.qArray[n17] = this.qArray[n17] + d5 * d4;
        }
    }

    public static void start(boolean bl) {
        boolean bl2 = true;
        boolean bl3 = false;
        boolean bl4 = false;
        int n = 1;
        int[] nArray = new int[]{7};
        int[] nArray2 = new int[]{1};
        int n2 = 2;
        int[] nArray3 = new int[]{0, 1};
        Rls rls = new Rls(bl, bl2, bl3, bl4, n, nArray2, nArray, n2, nArray3, "SuperRls");
        if (bl) {
            RlsThread rlsThread = new RlsThread(rls);
            rlsThread.start();
        } else {
            NonRlsThread nonRlsThread = new NonRlsThread(rls);
            nonRlsThread.start();
        }
    }
}

