# -*- coding: utf-8 -*-
"""
Created on Thu Jul 15 11:47:43 2021

@author: J. M. Bos
"""
#For calculations
import numpy as np

#For data handling
import pandas as pd
from sklearn.model_selection import train_test_split

#For nearest neighbours, logistic regression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

#For neural networks
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input


#For plotting
import matplotlib.pyplot as plt



###########################
#Preperation steps
###########################

#####
#Import the data from file
#####
def Initialization():
    #Read in the data, without the counter column
    return pd.read_csv("dataset.csv", usecols = [i+1 for i in range(785)]) 
    


#####
#Create training and test set
#####

def SplitTrainigTest(DataSample, splitpercent, setsize):
    testsplit = splitpercent*(setsize/12000)
    trainsplit =(1.0-splitpercent)*(setsize/12000)
    TrainingSample, TestSample = train_test_split(DataSample, test_size=testsplit, train_size=trainsplit)
    trainingsize = TrainingSample.shape[0]
    testsize = TestSample.shape[0]
    return TrainingSample, TestSample, trainingsize, testsize

#Remark: Floating point calculations may result in trainings/test set containing one example more or less then expected



###########################
#Classification methods
###########################

#####
#K-Nearest Neighbours algorithm
#####
#Run-time warning: pre-processing step of summation of pixel values is time consuming, small set-size may be preferable.
def KNearestNeighbours(DataSample):
    #Create test and training sets
    setsize = 1200 #KEEP SMALLER THAN 12000.
    splitpercent = 0.8  #Controls proportion of examples that in test set, training set is complement of this.
    TrainingSample, TestSample, trainingsize, testsize = SplitTrainigTest(DataSample, splitpercent, setsize)
    Ytrain = TrainingSample[['label']].to_numpy() 
    XtrainSimple = np.zeros(trainingsize) 
    for i in range (0,trainingsize):
        for j in range (1,785):
            XtrainSimple[i]=XtrainSimple[i]+TrainingSample.iloc[i,j]
    Ytest = TestSample[['label']].to_numpy()
    XtestSimple = np.zeros(testsize)
    for i in range (0,testsize):
        for j in range (1,785):
            XtestSimple[i]=XtestSimple[i]+TestSample.iloc[i,j]
    #Method parameters
    neighbours = 10 #Number of neigbours
    #Run k-nearest neighbours classifier
    knn = KNeighborsClassifier(n_neighbors=neighbours) #Create instance of classifier
    knn.fit(XtrainSimple.reshape(-1,1),Ytrain.ravel())    #Train classifier   
    #Report results
    print('Accuracy of K-NN classifier on training set: {:.2f}'
          .format(knn.score(XtrainSimple.reshape(-1,1), Ytrain.ravel())))  
    print('Accuracy of K-NN classifier on test set: {:.2f}'
          .format(knn.score(XtestSimple.reshape(-1,1), Ytest.ravel())))
    #Give prediction for individual input:
    while True:
        Choice = input("New example (E) or return to main menu (M): ")
        if Choice == "M" or Choice == "m":
            return
        elif Choice == "E" or Choice == "e":
            NumberInput = AskNumber()%12000
            picture = np.zeros(784)
            for i in range (1,785):
                picture[i-1]=DataSample.iloc[NumberInput,i]
            imgplotsub= plt.imshow(np.reshape(picture, (28,28)), cmap=plt.cm.gray)
            plt.show()
            if DataSample.iloc[NumberInput,0] == 1:
                print("True label: sweater")
            else:
                print("True label: t-shirt")
            Xpict = np.zeros((1,1))
            for j in range (1,785):
                Xpict[0,0]=Xpict[0,0]+DataSample.iloc[NumberInput,j]
            if knn.predict(Xpict) >= 0.5:
                print("Predicted label: sweater")
            else:
                print("Predicted label: t-shirt")


#####
#Logistic regression simple
#####
#Run-time warning: pre-processing step of summation of pixel values is time consuming, small set-size may be preferable.
def SimpleLogReg(DataSample):
    #Create test and training sets
    setsize = 1200 #KEEP SMALLER THAN 12000.
    splitpercent = 0.8  #Controls proportion of examples that in test set, training set is complement of this.
    TrainingSample, TestSample, trainingsize, testsize = SplitTrainigTest(DataSample, splitpercent, setsize)
    Ytrain = TrainingSample[['label']].to_numpy() 
    XtrainSimple = np.zeros(trainingsize) 
    for i in range (0,trainingsize):
        for j in range (1,785):
            XtrainSimple[i]=XtrainSimple[i]+TrainingSample.iloc[i,j]
    Ytest = TestSample[['label']].to_numpy()
    XtestSimple = np.zeros(testsize)
    for i in range (0,testsize):
        for j in range (1,785):
            XtestSimple[i]=XtestSimple[i]+TestSample.iloc[i,j]
    #Do logistic regression   
    logregS = LogisticRegression() #Create instance of classifier
    logregS.fit(XtrainSimple.reshape(-1,1),Ytrain.ravel())   #Train classifier
    #Report results
    print('Accuracy of Logistic regression classifier on training set: {:.2f}'
          .format(logregS.score(XtrainSimple.reshape(-1,1), Ytrain.ravel()))) 
    print('Accuracy of Logistic regression classifier on test set: {:.2f}'
          .format(logregS.score(XtestSimple.reshape(-1,1), Ytest.ravel())))
    #Give prediction for individual input:
    while True:
        Choice = input("New example (E) or return to main menu (M): ")
        if Choice == "M" or Choice == "m":
            return
        elif Choice == "E" or Choice == "e":
            NumberInput = AskNumber()%12000
            picture = np.zeros(784)
            for i in range (1,785):
                picture[i-1]=DataSample.iloc[NumberInput,i]
            imgplotsub= plt.imshow(np.reshape(picture, (28,28)), cmap=plt.cm.gray)
            plt.show()
            if DataSample.iloc[NumberInput,0] == 1:
                print("True label: sweater")
            else:
                print("True label: t-shirt")
            Xpict = np.zeros((1,1))
            for j in range (1,785):
                Xpict[0,0]=Xpict[0,0]+DataSample.iloc[NumberInput,j]
            print("Predicted probability: ", logregS.predict_proba(Xpict))
            if logregS.predict(Xpict) >= 0.5:
                print("Predicted label: sweater")
            else:
                print("Predicted label: t-shirt")


#####
#Logistic regression full
#####

def FullLogReg(DataSample):
    #Create test and training sets
    setsize = 1200 #KEEP SMALLER THAN 12000.
    splitpercent = 0.8  #Controls proportion of examples that in test set, training set is complement of this.
    TrainingSample, TestSample, trainingsize, testsize = SplitTrainigTest(DataSample, splitpercent, setsize)
    Ytrain = TrainingSample[['label']].to_numpy() 
    Xtrain = TrainingSample.iloc[0:trainingsize,1:785]
    Ytest = TestSample[['label']].to_numpy()
    Xtest = TestSample.iloc[0:testsize,1:785]
    #Do logistic regression
    logreg = LogisticRegression(max_iter=5000) #Create instance of classifier
    logreg.fit(Xtrain, Ytrain.ravel())   #Train classifier
    #Report results
    print('Accuracy of Logistic regression classifier on training set: {:.2f}'
               .format(logreg.score(Xtrain, Ytrain.ravel()))) 
    print('Accuracy of Logistic regression classifier on test set: {:.2f}'
               .format(logreg.score(Xtest, Ytest.ravel())))
    imgplotcoef = plt.imshow(np.reshape(logreg.coef_, (28,28)), cmap=plt.cm.gray) #Plot the influence/weight of individual pixels on the outcome
    plt.show()
    #Give prediction for individual input:
    while True:
        Choice = input("New example (E) or return to main menu (M): ")
        if Choice == "M" or Choice == "m":
            return
        elif Choice == "E" or Choice == "e":
            NumberInput = AskNumber()%12000
            picture = np.zeros(784)
            for i in range (1,785):
                picture[i-1]=DataSample.iloc[NumberInput,i]
            imgplotask= plt.imshow(np.reshape(picture, (28,28)), cmap=plt.cm.gray)
            plt.show()
            if DataSample.iloc[NumberInput,0] == 1:
                print("True label: sweater")
            else:
                print("True label: t-shirt")
            print("Predicted probability: ", logreg.predict_proba(DataSample.iloc[NumberInput:NumberInput+1,1:785]))
            if logreg.predict(DataSample.iloc[NumberInput:NumberInput+1,1:785]) >= 0.5:
                print("Predicted label: sweater")
            else:
                print("Predicted label: t-shirt")
    
    
#####
#Neural networks
#####

def NeuralNetwork(DataSample):
    #Create test and training sets
    setsize = 12000 #KEEP SMALLER THAN 12000. Controls how many data points should be used in total, smaller values give faster run time
    splitpercent = 0.8  #Controls proportion of examples that in test set, training set is complement of this.
    TrainingSample, TestSample, trainingsize, testsize = SplitTrainigTest(DataSample, splitpercent, setsize)
    Ytrain = TrainingSample[['label']].to_numpy() 
    Xtrain = TrainingSample.iloc[0:trainingsize,1:785]
    Ytest = TestSample[['label']].to_numpy()
    Xtest = TestSample.iloc[0:testsize,1:785]
    #Set network parameters
    Width=10 #Number of neurons in a single layer
    Layers=4 #Number of layers, set at 1 or higher
    #Construct and train the neural network. Uses the keras functional API model. Activation functions and initilization of parameters are set in the layers, training loss and optimizers at the compile step
    inputs = Input(shape=(784,))
    x = Dense(Width, activation='relu')(inputs) #Network uses ReLU activation in hidden layers
    for layer in range(Layers-1):
        x = Dense(Width, activation='relu')(x)
        #x = Dropout(0.2)(x)  #Activate (remove the first # before code) to add dropout to each layer during training, parameter between 0 and 1 determines the fraction of the inputs to drop
    output = Dense(1, activation='sigmoid')(x) #Network uses sigmoid output
    network = Model(inputs, output)
    network.compile(optimizer = 'adam', loss= 'binary_crossentropy', metrics=['accuracy']) #Training settings for the neural network (training loss and optimizer). The metric accuracy is needed for reporting performance of network
    network.fit(Xtrain,Ytrain, epochs = 100, verbose=0)
    #Plot the network information
    network.summary()
    #Report results
    print('Accuracy of DNN classifier on training set: {:.2f}'
          .format(network.evaluate(Xtrain, Ytrain.ravel())[1])) 
    print('Accuracy of DNN classifier on test set: {:.2f}'
          .format(network.evaluate(Xtest, Ytest.ravel())[1]))
    #Give prediction for individual input:
    while True:
        Choice = input("New example (E) or return to main menu (M): ")
        if Choice == "M" or Choice == "m":
            return
        elif Choice == "E" or Choice == "e":
            NumberInput = AskNumber()%12000
            picture = np.zeros(784)
            for i in range (1,785):
                picture[i-1]=DataSample.iloc[NumberInput,i]
            imgplot= plt.imshow(np.reshape(picture, (28,28)), cmap=plt.cm.gray)
            plt.show()
            if DataSample.iloc[NumberInput,0] == 1:
                print("True label: sweater")
            else:
                print("True label: t-shirt")
            print("Predicted probability: ", network.predict(DataSample.iloc[NumberInput:NumberInput+1,1:785]))
            if network.predict(DataSample.iloc[NumberInput:NumberInput+1,1:785]) >= 0.5:
                print("Predicted label: sweater")
            else:
                print("Predicted label: t-shirt")
                
###########################
#Program functions
###########################

##########
# MAIN FUNCTION
##########
def Main ():
    print("Running the main function")
    DataSample = Initialization()
    #print(DataSample) #Activate to show summary of imported data 
    Menu(DataSample)

##########
# MENU
##########
def Menu(DataSample):

    Option = input("To run k-Nearest Neighbours press K, \n to run logistic regression with simple input data press S, \n to run logistic regression with full pixel data press L, \n to run a neural network press N, \n to exit press Q: ")
    if Option == "Q" or Option == "q":
        return
    elif Option == "K" or Option == "k":
        KNearestNeighbours(DataSample)
    elif Option == "S" or Option == "s":
        SimpleLogReg(DataSample)
    elif Option == "L" or Option == "l":
        FullLogReg(DataSample)
    elif Option == "N" or Option == "n":
        NeuralNetwork(DataSample)
    Menu(DataSample)

##########
# ASK NUMBER
##########
def AskNumber():
    while True:
        NumberInput = input("Please enter a valid number: ")
        if NumberInput.isnumeric():
            return int(NumberInput)
        else:
            pass

    
    
##########
# EXECUTING THE FUNCTION
##########
Main()