function mocassin_prot(r, mdir, sdir)

% This program determines the Nash equilibrium (through the LP solution) for
% each of the proteins in the primary network. The scoring (distance) tables for the proteins should already
% exist as .txt files and labeled as 1_mtx.txt, 2_mtx.txt, etc in a single directory.
%
% The program takes the following inputs:
% r = the number of taxa in the network (i.e. how many scoring matrices
%     exist)
% mdir = the directory where the distance matrices are stored
% sdir = the directory where the solution data files are to be stored
%
% The program outputs matrices (in .mat format) where the first row (minus the 1,1) matrix entry) gives the conservation 
% probability vector and the first column (minus the (1,1) matrix entry) gives the diversity weight vector. The game value 
% is in the (3,3) cell of the matrix. Note that if the protein only has one domain then the third row of the matrix will 
% still be added. The solution .mat file labeled SolutionK.mat is the solution for the distance matrix K_mtx.txt. If there 
% was no solution (the LP didn't converge, etc.) then the output .mat file will contain no matrix and will have the string 
% variable g = 'no solution'. The solution matrix is also stored to a text file named SolutionK.txt.
%
% Author : Brittney (Hinds) Keel
% Date created : 20 Oct 11
% Last modified : 31 Dec 14

parpool % Starts the parallel computing package (using as many physical cores as possible)

parfor h = 1:r 
  matrix = sprintf('%d_mtx.txt', h); % generate the name of the distance matrix 
                                     % that needs to be opened
  tableFilename = strcat(mdir, matrix);
  Table = importdata(tableFilename); % loads the .txt file that contains the scoring matrix
  
  checkTable = isstruct(Table); % check if domains existed on the protein
  if checkTable == 0 % if none present
      file = sprintf('Solution%d.mat', h); 
      filepath = strcat(sdir,file); 
      g = 'no solution'; % no solution indicator variable
      parsave(filepath, g); 
      
      file2 = sprintf('Solution%d.txt', h);
      filepath2 = strcat(sdir,file2); 
      fid1 = fopen(filepath2,'wt');  % opens the writable output file  
      fprintf(fid1,'%s','no solution');  % prints the no solution indicator to the .txt output file
      fclose(fid1);
  else % if domains were present
      T = Table.data; % the distance matrix
  
      T(:,h) = []; % lock out reference column (i.e. removes the column that corresponds to
                   % the protein whose table we are working with). we remove this
                   % column because it is not used in calculation (no self-loops
                   % in the network graph are allowed)
                 
      [s,t] = size(T);
  
  
      % Next perturb any zero values in the matrix slightly, as taking the log of 0 will
      % result in infinite values, and we must avoid this.
      for i = 1:s 
          for j = 1:t 
              if T(i,j)==0 % if scoring table entry is 0
                  T(i,j) = 1e-40; % perturb the value (need value close to zero)
              end
          end
      end
  
    
  
      % Next lock out linearly dependent rows (rows that correspond to
      % domains that do not exist in the rest of the proteins).
      lockoutrow = []; 
      v = sum(T'); % the vector containing the sum of the rows of T
  
      for w = 1:s 
          if sum(v(w))==(t*2870) % if the row corresponds to a domain not existing on any other protein
              lockoutrow = [lockoutrow 0]; % place a zero in the lockout vector to indicate
                                         % that the row needs to be removed
          else 
              lockoutrow = [lockoutrow 1]; % if domain exists on another protein place a 1 in the lockout 
                                         % vector to indicate that the row will stay in the calculation
          end
      end
  
      P = []; % initialize a matrix that will contain only the rows that should be used in calculation
      
      for q = 1:s 
          if lockoutrow(q)==1 % if the row not locked out
          P = [P ; T(q,:)]; % add the row to P
          end
      end
  
  
      % Next check to be sure that removing the linearly dependent rows did not
      % result in an empty matrix. If it did indicate no solution in the solution file
      % and break the loop.
      check = isempty(P);
      if check == 1 % if P is empty
          file = sprintf('Solution%d.mat', h); 
          filepath = strcat(sdir,file);
          g = 'no solution'; % variable that indicates no solution
          parsave(filepath, g);
 
          file2 = sprintf('Solution%d.txt', h); 
          filepath2 = strcat(sdir,file2); 
          fid1 = fopen(filepath2,'wt');  % opens the writable output file  
          fprintf(fid1,'%s','no solution');  % prints the no solution indicator to the .txt output file
          fclose(fid1);
       
          continue % breaks the loop
      end
      
  
      D = -log(P); % take the negative log transform to ensure that the entries in the 
                   % distance matrix are converted to similarity scores
 
      [gameval, x, y, checkx] = proteinlinearprogram(D'); % calls another script to solve the LP
 
      if checkx == 0 % if the LP did not converge
          file = sprintf('Solution%d.mat', h); 
          filepath = strcat(sdir,file);
          g = 'no solution'; % no solution indicator variable
          parsave(filepath, g);
 
          file2 = sprintf('Solution%d.txt', h); 
          filepath2 = strcat(sdir,file2); 
          fid1 = fopen(filepath2,'wt');  % opens the writable output file  
          fprintf(fid1,'%s','no solution - LP could not converge');  
          fclose(fid1);
       
          continue % breaks the loop
      end
 
 
      % Once the LP solutions are returned we construct the output table that contains 
      % all of the solution information.   
      for p = 1:s % for each row in T
          w = lockoutrow(p); % the value of the corresponding row in the lockout vector
                             % (0 if it was locked out and 1 if it remained in
                             % calculation)
                             
          if w==0 % if the row was locked out
              if p==1 % if we are dealing with the first row
                  x = [-1 x]; % add a -1 to the front of the row weight vector (the -1
                              % indicates that there is no weight present since
                              % the row was locked out)
              else
                  x = [x(1:p-1) -1 x(p:end)]; % otherwise add a -1 between the p-1 and p 
                                              % positions of the row weight vector
              end
          end
      end
 
      
      if h == 1 % if we are dealing with the first protein (the protein indexed 1)
          y = [0 y]; % add a 0 to the beginning of the y column weight vector (since the 
                     % weight of the reference protein to itself is 0)
      elseif h == t+1 % if we are dealing with the last protein 
          y = [y 0]; % place a 0 at the end of the column weight vector
      else
          y = [y(1:h-1) 0 y(h:end)]; % otherwise place a 0 between the h-1 and
                                     % h positions of the column weight vector 
      end
 
      row = length(x); % gets the length of the row weight vector
      col = length(y); % gets the length of the column weight vector
  
      S = zeros(row+1, col+1); % creates a matrix of zeros
 
      % Only use in non-machine zero weights.
      for p = 1:col 
         if y(p)>0.001
             S(1,p+1) = y(p); % makes the column weight vector the top row of this matrix S
                              % NOTE that the (1,1) entry is left to be 0
         end
      end
 
      for p = 1:row
         if x(p)>0.000001
         S(p+1,1) = x(p); % makes the row weight vector the first column of the matrix
                          % S. again NOTE that the (1,1) entry is still 0
         end
      end
 
      S(3,3)=gameval; % makes the (3,3) entry of S the game value
 
      savefile1 = sprintf('Solution%d.mat', h); 
      savefilepath1 = strcat(sdir,savefile1);
      parsave(savefilepath1, S); % save the matrix S to the output file
 
      filename = sprintf('Solution%d.txt', h); 
      filename1 = strcat(sdir,filename); 
      dlmwrite(filename1, S, 'delimiter', '\t') % write matrix tab delimited
  end
  
end

delete(gcp) % closes parallel computing mode