Jacobi-Type Method for Computing Orthogonal Tensor Decompositions

Matlab Codes

 

JacobiCompress Maximizes sums of squares of diagonals or trace of 3-way tensor
solvenbyn_all Solves entire nxnxn cube in all three dimensions using (p,q) pairs
solvenbyn_oneface Solves a single cube orientation
solve2by2 Solves 2x2x2 subproblem
update Alternating least squares for 2x2x2 subproblem in all three dimensions
reduce Solves kron(I,U,V)a=s where s is the (vectorization of) the tensor and has maximum SS or trace
maxdiags_sctheta Solves for best theta1,theta2 that maximizes diagonals of kron(I,U,V)a
perfshuf Shuffles the contents of a vector according to the perfect shuffle


JacobiCompress

function [U,a]=JacobiCompress(U,a,trace,ITS)
% function [U,a]=JacobiCompress(U,a,trace,ITS)
% Maximizes SS diagonals (trace=0) or trace (trace=1) of 
% (U{3}' x U{2}' x U{1}') a
% by iterating a specified number of times (ITS)
%
% INPUTS:
%     a : n^3x1 column vector
%     U : 3x1 cell array
%   ITS : positive integer
% trace : 0 or 1
%
% OUTPUTS:
%     a : n^3x1 column vector
%     U : 3x1 cell array

if nargin==2
   trace=0;
   ITS=3;
elseif nargin==3
   ITS=3;
end

for i=1:ITS
   IT_NUMBER=i;
   
   disp(sprintf('ITERATION #%d',i))

   [SSdiags,SSoffdiags,U,a]=solvenbyn_all(U,a,trace);

   SSdiags_its(i)=SSdiags(end);
   SSoffdiags_its(i)=SSoffdiags(end);

   disp(sprintf('SSdiags for ITERATION #%d=%3.4e',i,SSdiags_its(i)))
   disp(sprintf('SSoffdiags for ITERATION #%d=%3.4e',i,SSoffdiags_its(i)))
   disp(' ')

end


 
solvenbyn_all
function [SSdiags,SSoffdiags,U,a]=solvenbyn_all(U,a,trace,type);
%
% solves entire nxnxn cube in all 3 dimensions using (p,q) pairs
%
% INPUTS:
%      a : n^3x1 vector that represents an nxnxn tensor
%      U : 3x1 cell array containing nxn orthogonal matrices
%   type : is an integer with the following coding
%          0 = no special analyses (default)
%          1 = color plots of the zero-ing affect
%          2 = number of elements less than a prescribed tolerance, EPSILON
%          3 = histogram of the distribution of a
%          4 = plot of the elements of a
%  trace : is an integer with the following coding
%          0 = maximize SS of diagonals
%          1 = maximize trace of diagonals
%
% OUPUTS:
%      SSdiags : length 3*(n(n-1)/2 + 1)-vector that contains the SS of the 
%                diagonals after each sweep of a dimension of the cube 
%                (note that n(n-1)/2 +1 = # of (p,q) pairs in a nxnxn cube)
%	         (SSdiags is 3*number of (p,q) pairs since we do this for
%                each dimension of the cube)
%   SSoffdiags : length 3*(n(n-1)/2 +1)-vector that contains the SS of
%                the offdiagonals after each sweep of a dimension of the cube
%            a : n^3x1 column vector  with SS of diagonals maximized
%            U : 3x1 cell array of nxn orthogonal matrices 
%                that best "diagonalize" a

global CMIN CMAX EPSILON ITS IT_NUMBER
n=ceil(length(a)^(1/3));

if nargin==2
  trace=0;
  type=0;
end
if nargin==3
  type=0;
end

disp(sprintf('TYPE=%d',type))
if trace==0
   disp('maximizing SS squares...')
else
   disp('maximizing trace ...')
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% FIRST FACE (front/back) %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

[SSdiags1,SSoffdiags1,U,a]=solvenbyn_oneface(U,a,trace);

if type==1
  % make color plots to illustrate graphically what happens after
  % each "sweep"
  dummy=colorplots(a,'sweep #1',1);
end

if type==2
  % calculate # elements below epsilon
  num_elem1=length(find(abs(a)<=EPSILON));
end

if type==3
  % make histogram of distribution of a
  dummy=histogram(a,1,IT_NUMBER,ITS);
end

if type==4
  % make a plot of the elements of a:
  figure(2)
  plot(1:length(a),flipud(sort(abs(a))))
  hold on
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% SECOND FACE (top/bottom) %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% shuffle a
A=reshape(a,n,n^2);
a=reshape(A',n^3,1);

% shuffle U's: U3'xU2'xU1' -> U1'xU3'xU2' = W3'xW2'xW1'
W{3}=U{1};
W{2}=U{3};
W{1}=U{2};


% solve this face (top/bottom)
[SSdiags2,SSoffdiags2,W,a]=solvenbyn_oneface(W,a,trace);

% shuffle back a
A=reshape(a,n^2,n);
a=reshape(A',n^3,1);

% shuffle back U's
U{3}=W{2};
U{2}=W{1};
U{1}=W{3};

if type==1
  % make color plots to illustrate graphically what happens after
  % each "sweep"
  dummy=colorplots(a,'sweep #2',1);
end

if type==2
  % calculate # elements below epsilon
  num_elem2=length(find(abs(a)<=EPSILON));
end

if type==3
  % make a histogram of the distribution of elements of a:
  dummy=histogram(a,2,IT_NUMBER,ITS);
end

if type==4
  % make a plot of the elements of a:
  figure(2)
  plot(1:length(a),flipud(sort(abs(a))))
end


%%%%%%%%%%%%%%%%%%%%%%%%%
%%% THIRD FACE (sides) %%
%%%%%%%%%%%%%%%%%%%%%%%%%

% shuffle a
A=reshape(a,n^2,n);
a=reshape(A',n^3,1);

% shuffle U's: U3'xU2'xU1' -> U2'xU1'xU3' = W3'xW2'xW1
W{3}=U{2};
W{2}=U{1};
W{1}=U{3};


% solve this face (sides)
[SSdiags3,SSoffdiags3,W,a]=solvenbyn_oneface(W,a,trace);

% shuffle back a
A=reshape(a,n,n^2);
a=reshape(A',n^3,1);

% shuffle back U's
U{3}=W{1};
U{2}=W{3};
U{1}=W{2};

if type==1
  % make color plots to illustrate graphically what happens after
  % each "sweep"
  dummy=colorplots(a,'sweep #3',1);
end

if type==2
  % calculate # elements below epsilon
  num_elem3=length(find(abs(a)<=EPSILON));
end

if type==3
  % make a histogram of the distribution of elements of a:
  dummy=histogram(a,3,IT_NUMBER,ITS);
end

if type==4
  % make a plot of the elements of a:
  figure(2)
  plot(1:length(a),flipud(sort(abs(a))))
end


if type==2
  num_elem=[num_elem1; num_elem2; num_elem3]
end

SSdiags=[SSdiags1 SSdiags2 SSdiags3];
SSoffdiags=[SSoffdiags1 SSoffdiags2 SSoffdiags3];

solvenbyn_oneface

function [SSd,SSo,U,a]=solvenbyn_oneface(U,a,trace);
% Given a single orientation of the nxnxn cube, the result is
% a = (U{3} x U{2} x U{1}) a
% such that the diagonals of the tensor are maximized.  Also 
% SSo and SSd are the sums of squares of the offdiagonals
% and diagonals, respectively, after solving a subproblem
%
% INPUTS:
%      a : n^3x1 column vector representing nxnxn tensor
%      U : 3x1 cell array of nxn orthogonal matrices
%  trace : 0 or 1 depending of if SS or trace is maximized
%
% let pairs=(n-1)n/2 + 1 = # of possible (p,q) pairs in a nxnxn cube.
%
% OUTPUTS: 
%  SSd : pairsx1 vector 
%  SSo : pairsx1 vector
%    a : n^3x1 column vector 
%    U : 3x1 cell array of nxn orthogonal matrices
%

if nargin<3
   trace=0;
end

global CMIN CMAX VIEW
n=ceil(length(a)^(1/3));

A=zeros(n,n,n);
for i=1:n
  A(:,:,i)=reshape(a((i-1)*n^2+1:i*n^2),n,n);
end

counter=0;  %used only to index the SS totals
for i=1:n-1
  for j=i+1:n

     counter=counter+1;

     % solves the 2x2x2 subproblem

     M1=[A(i,i,i) A(i,j,i); A(j,i,i) A(j,j,i)]; %pick (p,q) pairs
     M2=[A(i,i,j) A(i,j,j); A(j,i,j) A(j,j,j)];
     Mvec=reshape([M1 M2],8,1);
     [V,b]=solve2by2(Mvec,trace);
     
     % updates all the other elements of a with the orthogonal matrices
     % i.e., a=kron(W{3}',kron(W{2}',W{1}'))*a;

     if n>2 

        % below, we create a vector with integers 1:n except i,j
   
        vec=1:n;
        count=1;
        for s=1:n
           if s~=i & s~=j
             newvec(count)=vec(s);
             count=count+1;
           end
        end

        % look at where one index is fixed and other 2 are i,j
        % in this case, the update is by a kronprod of 2 matrices.
     
        for k=newvec

           % first index constant, then A=kron(V{3}',V{2}')*vec(A)
	   % (need to do reshaping because of matlab format)
           G=reshape(A(k,[i j],[i j]),4,1);
	   H = V{2}'*reshape(G,2,2)*V{3};
           A(k,[i,j],i)=H(:,1)';
           A(k,[i j],j)=H(:,2)';

           % second index constant, then A=kron(V{3}',V{1}')*vec(A)
           G = reshape(A([i j],k,[i j]),4,1);
	   H = V{1}'*reshape(G,2,2)*V{3};
           A([i,j],k,i) = H(:,1)';
           A([i j],k,j) = H(:,2)';

           % third index constant, then A=kron(V{2}',V{1}')*vec(A)
           A([i j],[i j],k) = V{1}'*A([i j],[i j],k)*V{2};
        
        end

        % look at where 2 indices are fixed and the other is i or j
        % in this case, the update is just a multiplication by 1 matrix.

        for k=newvec
           for l=newvec

	      % first and second indices constant, then A=V{3}'*vec(A)
	      G = reshape(A(k,l,[i j]),2,1);
	      H = V{3}'*G;
              A(k,l,i)=H(1);
              A(k,l,j)=H(2);
           
	      % second and third indices constant, then A=V{1}'*vec(A)
 	      A([i j],k,l) = V{1}'*A([i j],k,l);
	    
	      % first and third indices constant, then A=V{2}'*vec(A)
 	      H = V{2}'*A(k,[i j],l)';
	      A(k,[i j],l)=H';
 	   end
        end
     end

     % look at where all 3 indices are i,j
     % in this case, the update is given by the output of solve2by2()

     A([i j],[i j],i)=reshape(b(1:4),2,2);
     A([i j],[i j],j)=reshape(b(5:8),2,2);

     % change back to vector format

     a=reshape(A,n^3,1);

     
     % END UPDATE

     % calculates SS of diags and offdiags

     SSdiags=0;
     for k=1:n
        A(:,:,k)=reshape(a((k-1)*n^2+1:k*n^2),n,n);
        SSdiags=SSdiags+A(k,k,k)^2;
     end
     SSoffdiags=sum(a.^2)-SSdiags;

     SSd(counter)=SSdiags;
     SSo(counter)=SSoffdiags;

     % update new U's (multiplied by old U's)

     for k=1:3
       U{k}(:,[i j]) = U{k}(:,[i j])*V{k};
     end

  end
end


solve2by2

function [U,a]=solve2by2(a,trace)
%
% Solves the 2x2x2 subproblem
% a = (U{3} x U{2} x U{1}) a
% where a represents a 2x2x2 tensor and trace determines whether
% we maximize SS or trace
%
% INPUTS:
%     a : 8x1 column vector
% trace : 0 or 1 depending on maximizing SS or trace
%
% OUTPUTS:
%     a : 8x1 column vector
%     U : 3x1 cell array of 2x2 orthogonal matrices
 
if nargin<2
   trace=0;
end

for j=1:3
  U{j}=eye(2);
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%
% FIRST FACE (front/back) %
%%%%%%%%%%%%%%%%%%%%%%%%%%%

[U,a]=update(U,a,trace);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SECOND FACE (top/bottom) %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%shuffle a
A=reshape(a,2,4);
a=reshape(A',8,1);

% shuffle U's: U3'xU2'xU1' -> U1'xU3'xU2' = W3'xW2'xW1'
W{3}=U{1};
W{2}=U{3};
W{1}=U{2};

% solve this face (top/bottom)
[W,a]=update(W,a,trace);

% shuffle back a
A=reshape(a,4,2);
a=reshape(A',8,1);

% shuffle back U's
U{3}=W{2};
U{2}=W{1};
U{1}=W{3};

%%%%%%%%%%%%%%%%%%%%%%
% THIRD FACE (sides) %
%%%%%%%%%%%%%%%%%%%%%%

% shuffle a
A=reshape(a,4,2);
a=reshape(A',8,1);

% shuffle U's: U3'xU2'xU1' -> U2'xU1'xU3' = W3'xW2'xW1
W{3}=U{2};
W{2}=U{1};
W{1}=U{3};

% solve this face (sides)
[W,a]=update(W,a,trace);

% shuffle back a
A=reshape(a,2,4);
a=reshape(A',8,1);

% shuffle back U's
U{3}=W{1};
U{2}=W{3};
U{1}=W{2};
   
 

update

function [Utilde,atilde]=update(U,a,trace);
%
% Solves
% atilde = (Utilde{3} x Utilde{2} x Utilde{1}) a
% for the 2x2x2 subproblem by holding one dimension constant and solving an
% optimization problem in the other two dimensions. It is repeated for
% each cube orientation. 
%
% INPUTS:
%     a  : 8x1 column vector
%     U : 3x1 cell array of 2x2 orthogonal matrices
% trace : 0 or 1 (depending if maximize SS or trace)
%
% OUTPUTS:
%   atilde : 8x1 column vector
%   Utilde : 3x1 cell array of 2x2 orthogonal matrices

if nargin<3
   trace=0;
end

for i=1:3

  % shuffle the vector a
  if i==2
    a=a(perfshuf(2,8));
  elseif i==3
    a=a(perfshuf(4,8));
  end

  A1=reshape(a(1:4),2,2);
  A2=reshape(a(5:8),2,2);

  % S1,S2 gives my new a
  [Q1,Q2,S1,S2]=reduce(A1,A2,trace);  %maximizes diagonals

  atilde1=reshape(S1,4,1);
  atilde2=reshape(S2,4,1);
  a=[atilde1;atilde2];

  % unshuffle the vector a and assign matrices
  if i==1
    U{1}=U{1}*Q1;
    U{2}=U{2}*Q2;
  elseif i==2
    a=a(perfshuf(4,8));
     U{3}=U{3}*Q1;
     U{1}=U{1}*Q2;
  elseif i==3
    a=a(perfshuf(2,8));
    U{2}=U{2}*Q1;
    U{3}=U{3}*Q2;
  end

end

atilde=a;
Utilde=U;


reduce
function [Q1,Q2,S1,S2]=reduce(A1,A2,trace);
%
% A1 and A2 are 2x2 matrices consisting of the two facets
% of the 2x2x2 cube a
%
% Q1,Q2 are orthogonal rotation matrices that maximize the
% SS of the A1(1,1) and A2(2,2)
% (trace=0) or Q1,Q2 are orthogonal
% matrices that maximize A1(1,1)+A2(2,2) (trace=1) both found by
% finding the SVD of a specific matrix
%
% S1,S2 are the matrices that corresponding with A1 and A2
% with maximum trace.  In the case of max SS, we only have 1
% since we hold 2 constant and vary 1 in each direction.
%
% Note that Q1'*A1*Q2=S1 and Q1'*A2*Q2=S2 for the trace.
%           Q1*A1=S1 for the SS diags
%
% INPUTS:
%    A1,A2 : 2x2 matrices
%    trace : 0 or 1 (default is 0)
%
% OUTPUTS: 
%    Q1,Q2 : 2x2 orthogonal matrices
%    S1,S2 : 2x2 matrices

if nargin<3
   trace=0;
end

if trace==0

   A=[A1(:,1) A2(:,2)];
   [U,S,V]=svd(A);

   % in our derivation we assumed SVD output had V as
   % a rotation matrix.  If it outputs as a reflection
   % matrix, then switch output:

   if sign(V(1,1))~=sign(V(2,2)) % output is reflection
      V(:,2)=-V(:,2);
      U(:,2)=-U(:,2);
   end

   % form M matrix
   M=[S(2,2)*V(2,1) S(1,1)*V(1,1); S(1,1)*V(2,1) S(2,2)*V(1,1)];
   [Uo,So,Vo]=svd(M);

   maxSS=So(1,1)^2;
   c=Vo(1,2); 
   s=Vo(2,2);

   Z=[c s;-s c];
   Q1=Z*U';
   Q2=eye(2);

   S1=Q1*A1;
   S2=Q1*A2;

else     % maximize using trace

  % decide whether to use both rotation matrices or one of each

  if (A1(1,1)+A2(2,2))>=(A1(1,1)-A2(2,2)) % use rotation
     disp('using rot/rot')
  
     B=[A1(1,1)+A2(2,2) A1(1,2)-A2(2,1);
        A1(2,1)-A2(1,2) A1(2,2)+A2(1,1)];
     [Q1,S,Q2]=svd(B);

     % we assumed in derivation, Q1 and Q2 were rotation 
     % matrices, so convert based on SVD output

     if sign(Q1(1,1))~=sign(Q1(2,2)) % output is reflection
        Q1(:,2)=-Q1(:,2);
     end
     if sign(Q2(1,1))~=sign(Q2(2,2)) % output is reflection
        Q2(:,2)=-Q2(:,2);
     end


  else % use rotation and reflection

     disp('using rot/ref')
     B=[A1(1,1)-A2(2,2) A1(1,2)+A2(2,1);
     A1(2,1)+A2(1,2) A1(2,2)-A2(1,1)];
     [Q1,S,Q2]=svd(B);

     if sign(Q1(1,1))==sign(Q1(2,2)) %output is rotation
        Q1(:,2)=-Q1(:,2);
     end
     if sign(Q2(1,1))~=sign(Q2(2,2)) % output is reflection
        Q2(:,2)=-Q2(:,2);
     end 
  end

S1=Q1'*A1*Q2;
S2=Q1'*A2*Q2;

end

perfshuf
function v = perfshuf(p,n);
% computes the perfect shuffle vector, Pi_{n,nm}x
%
% INPUTS:
%    n,p : integers such that p divides n
%
% OUTPUTS:
%    v : 1xn row vector of integers

m=floor(n/p);
if m*p~=n
   error('perfshuf error: n must be a multiple of p')
end

v=zeros(1,n);
for i=0:m-1
   v( (1:p) + p*i ) = (1+i):m:n;
end