#! /usr/bin/env python
import sys
import re
import random
import math

usage = "\nusage:   ./neural_net.py train.txt test.txt nLevels D[0] ... D[nLevels] alpha n_iter"

"""
Non-linear transform applied at each node.
"""
def g(s):
   return ((math.exp(s) - math.exp(-s))/(math.exp(s) + math.exp(-s)))

"""
Derivative of non-linear transform applied at each node.
"""
def g_prime(s):
   temp = 2.0 / (math.exp(s) + math.exp(-s))
   return (temp * temp)

"""
Neural Network.
"""  
class NeuralNetwork:
   """
   Initialize a neural network with random weights.

   nLevels designates the number of levels in the network (excluding the input).
   
   D[L] for L = 0,1,2, ..., nLevels defines the number of nodes at each level L.
   D[0] is the size of the input (level zero is the input vector)
   D[nLevels] is the sie of the output (which is 1 in this project).

   We initialize the weights W, so that W[L][i][j] is the weight of the 
   connection of node i at level L to node j at level L+1.

   Note that we also have a weight for the incoming bias term to each node.
   The value of this bias term is fixed at -1.
   We store the weight for this bias term in W[L][D[L]][j].

   Weights are initialized randomly in the range (-1,1).
   We reset the random seed to simply testing and grading of your code (you'll
   always get the same random weights for the same network structure).
   """
   def __init__(self, nLevels, D, alpha):
      self.nLevels = nLevels
      self.D = D
      self.W = []
      self.alpha = alpha
      # initialize weights to random values
      random.seed(0)
      for L in range(0,nLevels):
         W_L = []
         for i in range(0,D[L]+1):  # D[L]+1 as we have an extra weight for the bias term
            W_i = []
            for j in range(0,D[L+1]):
               w = random.uniform(-1,1)
               W_i.append(w)
            W_L.append(W_i)
         self.W.append(W_L)

   """
   Given an example vector (list of length D[0]), and the desired output label
   (-1 or 1), update the network weights using backpropagation.

   You will need to implement this function.
   """
   def train(self, example, label):
      # initialize x to hold intermediate outputs
      x = []
      for L in range(0,self.nLevels+1):
         xL = list(0 for i in range(0,self.D[L]+1))
         xL[D[L]] = -1
         x.append(xL)
      for i in range(0,D[0]):
         x[0][i] = example[i]
      # compute intermediate sums and outputs
      s = []
      s.append([])
      for L in range(1,self.nLevels+1):
         sL = list(0 for j in range(0,self.D[L]))
         for j in range(0,self.D[L]):
            for i in range(0,self.D[L-1]+1):
               sL[j] = sL[j] + self.W[L-1][i][j] * x[L-1][i]
            x[L][j] = g(sL[j])
         s.append(sL)
      # compute deltas
      delta = list([] for L in range(0,self.nLevels+1))
      delta_top = (x[self.nLevels][0] - label) * g_prime(s[self.nLevels][0])
      delta[self.nLevels] = [delta_top]
      for L in range(self.nLevels,1,-1):
         delta[L-1] = list(0 for i in range(0,self.D[L-1]+1))
         for i in range(0,self.D[L-1]+1):
            sum = 0
            for j in range(0,self.D[L]):
               sum = sum + self.W[L-1][i][j] * delta[L][j]
            if (i < D[L-1]):
               delta[L-1][i] = g_prime(s[L-1][i]) * sum
      # compute partials
      E_grad = []
      for L in range(0,nLevels):
         E_grad_L = []
         for i in range(0,self.D[L]+1):
            E_grad_Li = []
            for j in range(0,self.D[L+1]):
               E_grad_Li.append(delta[L+1][j] * x[L][i])
            E_grad_L.append(E_grad_Li)
         E_grad.append(E_grad_L)
      # apply backprop
      for L in range(0,nLevels):
         for i in range(0,self.D[L]+1):
            for j in range(0,self.D[L+1]):
               self.W[L][i][j] = self.W[L][i][j] - self.alpha * E_grad[L][i][j]

   """
   Given a test example (list of length D[0]), compute the classification
   (either -1 or 1) predicted by the neural network.
   
   You will need to implement this function.
   """
   def classify(self, example):
      # initialize x to hold intermediate outputs
      x = []
      for L in range(0,self.nLevels+1):
         xL = list(0 for i in range(0,self.D[L]+1))
         xL[D[L]] = -1
         x.append(xL)
      for i in range(0,D[0]):
         x[0][i] = example[i]
      # compute intermediate sums and outputs
      s = []
      s.append([])
      for L in range(1,self.nLevels+1):
         sL = list(0 for j in range(0,self.D[L]))
         for j in range(0,self.D[L]):
            for i in range(0,self.D[L-1]+1):
               sL[j] = sL[j] + self.W[L-1][i][j] * x[L-1][i]
            x[L][j] = g(sL[j])
         s.append(sL)
      # return the top level output (rounded to -1 or 1)
      if (s[self.nLevels][0] < 0):
         return -1
      else:
         return 1

   """
   Output current weights.
   """
   def print_weights(self):
      for L in range(0,self.nLevels):
         print 'Level ', L
         for i in range(0,self.D[L]+1):
            for j in range(0,D[L+1]):
               print 'W[', L, '][', i, '][', j, '] = %2.4f' % self.W[L][i][j], '  ',
            print ' '

   
   """
   Train the network using the given examples and labels.
   """
   def train_network(self, examples, labels):
      for n in range(0,len(examples)):
         self.train(examples[n], labels[n])

   """
   Classify examples.
   Return a list of predicted labels.
   """
   def classify_examples(self, examples):
      labels = []
      for n in range(0,len(examples)):
         c = self.classify(examples[n])
         labels.append(c)
      return labels

"""
Compute classification accuracy
"""
def compute_accuracy(true_labels, pred_labels):
   n_correct = 0.0
   n_total = float(len(true_labels))
   for n in range(0,len(true_labels)):
      if (true_labels[n] == pred_labels[n]):
         n_correct = n_correct + 1.0
   accuracy = n_correct / n_total
   return accuracy

"""
Load a data file.
Return examples and labels.
"""
def load_data(filename):
   input = open(filename, 'r')
   input.readline()
   examples = []
   labels = []
   for i in input.readlines():
      x = i.split()
      example = []
      for n in range(0,len(x)-1):
         example.append(float(x[n]))
      examples.append(example)
      labels.append(float(x[len(x)-1]))
   return [examples, labels]

"""
Main program.
"""
if __name__ == '__main__':
   # parse arguments
   if len(sys.argv) < 6:
      print usage
      exit(0)
   trainfile = sys.argv[1]
   testfile = sys.argv[2]
   nLevels = int(sys.argv[3])
   D = []
   for n in range(0,nLevels+1):
      D.append(int(sys.argv[4+n]))
   alpha = float(sys.argv[4+nLevels+1])
   n_iter = int(sys.argv[4+nLevels+2])
   print 'Using parameters:'
   print 'train file = ', trainfile
   print 'train file = ', testfile
   print 'nLevels = ', nLevels
   print 'D = ', D
   print 'alpha = ', alpha
   print '# iterations = ', n_iter 
   print ' '
   # load data files
   [train_examples, train_labels] = load_data(trainfile)
   [test_examples,  test_labels]  = load_data(testfile)
   # create neural network
   net = NeuralNetwork(nLevels, D, alpha)
   print 'Initial weights:'
   net.print_weights()
   print ' '
   # compute training and test error 
   for epoch in range(0,n_iter):
      net.train_network(train_examples, train_labels)
      train_labels_pred = net.classify_examples(train_examples)
      test_labels_pred = net.classify_examples(test_examples)
      if (((epoch+1) % 10) == 0):
         train_acc = compute_accuracy(train_labels, train_labels_pred)
         test_acc  = compute_accuracy(test_labels,  test_labels_pred)
         print 'Iteration ', (epoch+1), 'Train accuracy = %1.3f' % train_acc, 'Test accuracy = %1.3f' % test_acc
   # print final weights
   print ' ' 
   print 'Final learned weights:'
   net.print_weights()