function mocassin_prot_iter(r, mdir, sdir1, sdir2)

% This program determines the Nash equilibrium (through the LP solution) for
% each of the proteins in the network based on removing the domain which received the highest weight in
% a previous network generation. The scoring (distance) tables for the proteins should already
% exist as .txt files and labeled as 1_mtx.txt, 2_mtx.txt, etc in a single
% directory and the solution files from the previous run should exist as
% .mat files, labeled Solution1.mat, Solution2.mat, etc., in a single
% directory.
%
% The program takes the following inputs:
% r = the number of taxa in the network (i.e. how many scoring matrices
%     exist)
% mdir = the directory where the distance matrices are stored
% sdir1 = the directory where the solution data files from the previous run are stored
% sdir2 = the directory where the new solution data files are to be stored
%
% The program outputs matrices (in .mat format) where the first row (minus the 1,1) matrix entry) gives the conservation 
% probability vector and the first column (minus the (1,1) matrix entry) gives the diversity weight vector. The game value 
% is in the (3,3) cell of the matrix. Note that if the protein only has one domain then the third row of the matrix will 
% still be added. The solution .mat file labeled SolutionK.mat is the solution for the distance matrix K_mtx.txt. If there 
% was no solution (the LP didn't converge, etc.) then the output .mat file will contain no matrix and will have the string 
% variable g = 'no solution'. The solution matrix is also stored to a text file named SolutionK.txt.
%
% Author : Brittney Keel
% Date created : 18 June 15
% Last modified :  June 15

% First we gather the clusters identified in the previous run of ProDoMO.

[clusters,~,~] = findgraphclusters(r,sdir1);

% For each cluster, we loop through the proteins, removing the domain that
% received the highest weight in the previous LP solution and rerunning the
% LP for only the proteins in the cluster. If the protein had only one
% domain, it becomes a singleton cluster in the new network.

for i = 1:length(clusters)
    C = clusters{i}; % the array of protein numbers that belong to the cluster
    
    O = length(C);
    if O == 1
        K = C(1);
        S = zeros(2,r+1);
        S(1,K+1) = 1;
        file = sprintf('Solution%d.mat', K); 
        filepath = strcat(sdir2,file); 
        save(filepath, 'S'); 
      
        file2 = sprintf('Solution%d.txt', K);
        filepath2 = strcat(sdir2,file2);   
        dlmwrite(filepath2, S, 'delimiter', '\t') % write matrix tab delimited    
    else
    for h = 1:O
      K = C(h);
      matrix = sprintf('%d_mtx.txt', K); % generate the name of the distance matrix 
                                         % that needs to be opened
      tableFilename = strcat(mdir, matrix);
      Table = importdata(tableFilename); % loads the .txt file that contains the scoring matrix 
      T = Table.data;
      %DomNames = Table.textdata; % the domains present in the protein
  
      matrix = sprintf('Solution%d.mat', K);
      solnFilename = strcat(sdir1, matrix); 
      DD = load(solnFilename); % load previous solution 
      S = DD.S;
      
      div = S(:,1);
      div(1) = [];
      [~,I] = max(div);
      
      T(I,:) = []; % removes domain with highest weight
      [MM,NN] = size(T);
      
      checkTable = isempty(T); % check if domains existed on the protein
      if checkTable == 1 % if no other domains 
        S = zeros(2,r+1);
        S(1,K+1) = 1;
        file = sprintf('Solution%d.mat', K); 
        filepath = strcat(sdir2,file); 
        save(filepath, 'S'); 
      
        file2 = sprintf('Solution%d.txt', K);
        filepath2 = strcat(sdir2,file2);   
        dlmwrite(filepath2, S, 'delimiter', '\t') % write matrix tab delimited
      else % if domains were present
        savecols = C;
        savecols = sort(savecols);
        oldsavecols = savecols;
        qq = find(savecols == K);
        savecols(qq) = [];
        M = T(:,savecols);
           
        [s,t] = size(M);
        
        % Next perturb any zero values in the matrix slightly, as taking the log of 0 will
        % result in infinite values, and we must avoid this.
        for pp = 1:s 
            for j = 1:t 
                if M(pp,j)==0 % if scoring table entry is 0
                    M(pp,j) = 1e-40; % perturb the value (need value close to zero)
                end
            end
        end
        
        % Next lock out linearly dependent rows (rows that correspond to
        % domains that do not exist in the rest of the proteins).
        lockoutrow = [];
        [f ff] = size(M');
        if f == 1
            v = M';
        else
            v = sum(M'); % the vector containing the sum of the rows of T
        end
        
        for w = 1:s 
            if sum(v(w))==(t*2870) % if the row corresponds to a domain not existing on any other protein
                lockoutrow = [lockoutrow 0]; % place a zero in the lockout vector to indicate
                                             % that the row needs to be removed
            else 
                lockoutrow = [lockoutrow 1]; % if domain exists on another protein place a 1 in the lockout 
                                             % vector to indicate that the row will stay in the calculation
            end
        end
        
        P = []; % initialize a matrix that will contain only the rows that should be used in calculation
      
        for q = 1:s 
            if lockoutrow(q)==1 % if the row not locked out
              P = [P ; M(q,:)]; % add the row to P
            end
        end
        
        % Next check to be sure that removing the linearly dependent rows did not
        % result in an empty matrix. If it did indicate no solution in the solution file
        % and break the loop.
        check = isempty(P);
        if check == 1 % if P is empty
            file = sprintf('Solution%d.mat', K); 
            filepath = strcat(sdir2,file);
            g = 'no solution'; % variable that indicates no solution
            save(filepath, 'g');
 
            file2 = sprintf('Solution%d.txt', K); 
            filepath2 = strcat(sdir2,file2); 
            fid1 = fopen(filepath2,'wt');  % opens the writable output file  
            fprintf(fid1,'%s','no solution');  % prints the no solution indicator to the .txt output file
            fclose(fid1);
       
            continue % breaks the loop
        end
        
        D = -log(P); % take the negative log transform to ensure that the entries in the 
                     % distance matrix are converted to similarity scores
 
        [gameval, x, y, checkx] = proteinlinearprogram(D'); % calls another script to solve the LP
 
        if checkx == 0 % if the LP did not converge
          file = sprintf('Solution%d.mat', K); 
          filepath = strcat(sdir2,file);
          g = 'no solution'; % no solution indicator variable
          save(filepath, 'g');
 
          file2 = sprintf('Solution%d.txt', K); 
          filepath2 = strcat(sdir2,file2); 
          fid1 = fopen(filepath2,'wt');  % opens the writable output file  
          fprintf(fid1,'%s','no solution - LP could not converge');  
          fclose(fid1);
       
          continue % breaks the loop
        end
        
        % Once the LP solutions are returned we construct the output table that contains 
        % all of the solution information.   
        for p = 1:s % for each row in T
          w = lockoutrow(p); % the value of the corresponding row in the lockout vector
                             % (0 if it was locked out and 1 if it remained in
                             % calculation)
                             
          if w==0 % if the row was locked out
              if p==1 % if we are dealing with the first row
                  x = [-1 x]; % add a -1 to the front of the row weight vector (the -1
                              % indicates that there is no weight present since
                              % the row was locked out)
              else
                  x = [x(1:p-1) -1 x(p:end)]; % otherwise add a -1 between the p-1 and p 
                                              % positions of the row weight vector
              end
          end
        end
        
        if qq == 1 % if we are dealing with the first protein (the protein indexed 1)
          y = [0 y]; % add a 0 to the beginning of the y column weight vector (since the 
                     % weight of the reference protein to itself is 0)
        elseif qq == t+1 % if we are dealing with the last protein 
          y = [y 0]; % place a 0 at the end of the column weight vector
        else
          y = [y(1:qq-1) 0 y(qq:end)]; % otherwise place a 0 between the h-1 and
                                     % h positions of the column weight vector 
        end
 
        row = length(x); % gets the length of the row weight vector
        col = length(y); % gets the length of the column weight vector
  
        S = zeros(MM+1, NN+1); % creates a matrix of zeros
        
        % Only use in non-machine zero weights.
        for p = 1:col 
           if y(p)>0.001
               idx = oldsavecols(p);
               S(1,idx+1) = y(p); % makes the column weight vector the top row of this matrix S
                                  % NOTE that the (1,1) entry is left to be 0
           end
        end
 
        for p = 1:row
           if x(p)>0.000001
             S(p+1,1) = x(p); % makes the row weight vector the first column of the matrix
                              % S. again NOTE that the (1,1) entry is still 0
           end
        end
        
        S(3,3)=gameval; % makes the (3,3) entry of S the game value
 
        savefile1 = sprintf('Solution%d.mat', K); 
        savefilepath1 = strcat(sdir2,savefile1);
        save(savefilepath1, 'S'); % save the matrix S to the output file
 
        filename = sprintf('Solution%d.txt', K); 
        filename1 = strcat(sdir2,filename); 
        dlmwrite(filename1, S, 'delimiter', '\t') % write matrix tab delimited
      
      end
    end 
    end
end