WOW !! MUCH LOVE ! SO WORLD PEACE !
Fond bitcoin pour l'amélioration du site: 1memzGeKS7CB3ECNkzSn2qHwxU6NZoJ8o
  Dogecoin (tips/pourboires): DCLoo9Dd4qECqpMLurdgGnaoqbftj16Nvp


Home | Publier un mémoire | Une page au hasard

 > 

Techniques d'extraction de connaissances appliquées aux données du Web

( Télécharger le fichier original )
par Malika CHARRAD
Ecole Nationale des Sciences de l'Informatique, Université de la Manouba, Tunis - Mastère en informatique, Option : Génies Documentiel et Logiciel 2005
  

précédent sommaire

Bitcoin is a swarm of cyber hornets serving the goddess of wisdom, feeding on the fire of truth, exponentially growing ever smarter, faster, and stronger behind a wall of encrypted energy

som_randinit

%Cette fonction permet l'initialisation des cartes de Kohonen function sMap = som_randinit(D, varargin)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% check arguments

% data

if isstruct(D),

data _name = D.name;

comp_names = D.comp_names;

comp_norm = D.comp_norm;

D = D.data;

struct_mode = 1;

else

data_name = inputname(1);

struct _mode = 0;

end

[dlen dim] = size(D);

% varargin

sMap = [];

sTopol = som_topol_struct;

sTopol.msize = 0;

munits = NaN;

i=1;

while i<=length(varargin),

argok = 1;

if ischar(varargin{i}),

switch varargin {i},

case 'munits', i=i+1; munits = varargin{i}; sTopol.msize = 0; case 'msize', i=i+1; sTopol.msize = varargin{i};

munits = prod(sTopol.msize);

case 'lattice', i=i+1; sTopol.lattice = varargin{i}; case 'shape', i=i+1; sTopol.shape = varargin{i};

case {'som_topol','sTopol','topol'}, i=i+1; sTopol = varargin{i};

case {'som_map','sMap','map'}, i=i+1; sMap = varargin{i}; sTopol = sMap.topol;

case {'hexa','rect'}, sTopol.lattice = varargin{i};

case {'sheet','cyl','toroid'}, sTopol.shape = varargin{i}; otherwise argok=0;

end

elseif isstruct(varargin {i}) & isfield(varargin {i} ,'type'), switch varargin{i} .type,

case 'som_topol',

sTopol = varargin{i};

case 'som_map',

sMap = varargin{i};

sTopol = sMap.topol;

otherwise argok=0;

end

else

argok = 0;

end

if ~argok,

disp(['(som_topol_struct) Ignoring invalid argument #' num2str(i)]);

end

i = i+1;

end

if ~isempty(sMap),

[munits dim2] = size(sMap.codebook);

if dim2 ~= dim, error('Map and data must have the same dimension.'); end

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% create map

% map struct

if ~isempty(sMap),

sMap = som_set(sMap,'topol',sTopol);

else

if ~prod(sTopol.msize),

if isnan(munits),

sTopol = som_topol_struct('data',D,sTopol);

else

sTopol = som_topol_struct('data',D,'munits',munits,sTopol);

end

end

sMap = som_map_struct(dim, sTopol);

end

if struct_mode,

sMap = som_set(sMap,'comp_names',comp_names,'comp_norm',comp_norm);

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% initialization

% train struct

sTrain = som_train_struct('algorithm','randinit');

sTrain = som_set(sTrain,'data_name',data_name);

munits = prod(sMap.topol.msize);

sMap.codebook = rand([munits dim]);

% set interval of each component to correct value

for i = 1:dim,

inds = find(~isnan(D(:,i)) & ~isinf(D(:,i)));

if isempty(inds), mi = 0; ma = 1;

else ma = max(D(inds,i)); mi = min(D(inds,i));

end

sMap.codebook(:,i) = (ma - mi) * sMap.codebook(:,i) + mi;

end

% training struct

sTrain = som_set(sTrain,'time',datestr(now,0));

sMap.trainhist = sTrain;

return;

som_seqtrain

%Cette fonction sert à l'apprentissage des cartes de Kohonen

function [sMap, sTrain] = som_seqtrain(sMap, D, varargin)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Check arguments

error(nargchk(2, Inf, nargin)); % check the number of input arguments

% map

struct_mode = isstruct(sMap);

if struct_mode,

sTopol = sMap.topol;

else

orig_size = size(sMap);

if ndims(sMap) > 2,

si = size(sMap); dim = si(end); msize = si(1 :end-1);

M = reshape(sMap,[prod(msize) dim]);

else

msize = [orig_size(1) 1];

dim = orig_size(2);

end

sMap = som_map_struct(dim,'msize',msize);

sTopol = sMap.topol;

end

[munits dim] = size(sMap.codebook);

% data

if isstruct(D),

data _name = D.name;

D = D.data;

else

data_name = inputname(2);

end

D = D(find(sum(isnan(D),2) < dim),:); % remove empty vectors from the data

[dlen ddim] = size(D); % check input dimension

if dim ~= ddim, error('Map and data input space dimensions disagree.'); end

% varargin

sTrain = som_set('som_train','algorithm','seq','neigh', ...

sMap.neigh,'mask',sMap.mask,'data_name',data_name);

radius = []; alpha = []; tracking = 1; sample_order_type = 'random';

tlen_type = 'epochs';

i=1;

while i<=length(varargin),

argok = 1;

if ischar(varargin{i}),

switch varargin {i},

% argument IDs

case 'msize', i=i+1; sTopol.msize = varargin{i};

case 'lattice', i=i+1; sTopol.lattice = varargin{i};

case 'shape', i=i+1; sTopol.shape = varargin{i};

case 'mask', i=i+1; sTrain.mask = varargin{i};

case 'neigh', i=i+1; sTrain.neigh = varargin{i};

case 'trainlen', i=i+1; sTrain.trainlen = varargin{i};

case 'trainlen_type', i=i+1; tlen_type = varargin{i};

case 'tracking', i=i+1; tracking = varargin{i};

case 'sample_order', i=i+1; sample_order_type = varargin{i};

case 'radius_ini', i=i+1; sTrain.radius_ini = varargin{i};

case 'radius_fin', i=i+1; sTrain.radius_fin = varargin{i};

case 'radius',

i=i+ 1;

l = length(varargin{i});

if l==1,

sTrain.radius_ini = varargin {i};

else

sTrain.radius_ini = varargin {i} (1);

sTrain.radius_fin = varargin {i} (end);

if l>2, radius = varargin{i}; tlen_type = 'samples'; end

end

case 'alpha_type', i=i+1; sTrain.alpha_type = varargin{i};

case 'alpha_ini', i=i+1; sTrain.alpha_ini = varargin{i};

case 'alpha',

i=i+ 1;

sTrain.alpha_ini = varargin {i} (1);

if length(varargin {i} )>1,

alpha = varargin{i}; tlen_type = 'samples';

sTrain.alpha_type = 'user defined';

end

case {'sTrain','train','som_train'}, i=i+1; sTrain = varargin{i};

case {'topol','sTopol','som_topol'},

i=i+ 1;

sTopol = varargin{i};

if prod(sTopol.msize) ~= munits,

error('Given map grid size does not match the codebook size.'); end

% unambiguous values

case {'inv','linear','power'}, sTrain.alpha_type = varargin{i};

case {'hexa','rect'}, sTopol.lattice = varargin{i};

case {'sheet','cyl','toroid'}, sTopol.shape = varargin{i};

case {'gaussian','cutgauss','ep','bubble' }, sTrain.neigh = varargin {i}; case {'epochs','samples'}, tlen_type = varargin{i};

case {'random', 'ordered'}, sample_order_type = varargin{i}; otherwise argok=0;

end

elseif isstruct(varargin {i}) & isfield(varargin {i} ,'type'),

switch varargin{i} (1).type,

case 'som_topol',

sTopol = varargin{i};

if prod(sTopol.msize) ~= munits,

error('Given map grid size does not match the codebook size.');

end

case 'som_train', sTrain = varargin{i};

otherwise argok=0;

end

else

argok = 0;

end

if ~argok,

disp(['(som_seqtrain) Ignoring invalid argument #' num2str(i+2)]);

end

i = i+1;

end

% training length

if ~isempty(radius) | ~isempty(alpha),

lr = length(radius);

la = length(alpha);

if lr>2 | la>1,

tlen_type = 'samples';

if lr> 2 & la<=1, sTrain.trainlen = lr;

elseif lr<=2 & la> 1, sTrain.trainlen = la;

elseif lr==la, sTrain.trainlen = la;

else

error('Mismatch between radius and learning rate vector lengths.')

end

end end

if strcmp(tlen _type,'samples'), sTrain.trainlen = sTrain.trainlen/dlen; end

% check topology

if struct_mode,

if ~strcmp(sTopol.lattice,sMap.topol.lattice) | ...

~strcmp(sTopol. shape,sMap.topol. shape) | ...

any(sTopol.msize ~= sMap.topol.msize),

warning('Changing the original map topology.');

end end

sMap.topol = sTopol;

% complement the training struct

sTrain = som_train_struct(sTrain,sMap,'dlen',dlen);

if isempty(sTrain.mask), sTrain.mask = ones(dim, 1); end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% initialize

M = sMap.codebook;

mask = sTrain.mask;

trainlen = Train.trainlen*dlen;

% neighborhood radius

if length(radius)>2,

radius _type = 'user defined';

else

radius = [sTrain.radius_ini sTrain.radius _fin];

rini = radius(1);

rstep = (radius(end)-radius(1 ))/(trainlen- 1);

radius _type = 'linear';

end

% learning rate

if length(alpha)>1,

sTrain.alpha_type ='user defined';

if length(alpha) ~= trainlen,

error('Trainlen and length of neighborhood radius vector do not match.')

end

if any(isnan(alpha)),

error('NaN is an illegal learning rate.')

end

else

if isempty(alpha), alpha = sTrain.alpha_ini; end

if strcmp(sTrain.alpha_type,'inv'),

% alpha(t) = a / (t+b), where a and b are chosen suitably

% below, they are chosen so that alpha_fin = alpha_ini/100

b = (trainlen - 1) / (100 - 1);

a = b * alpha;

end

end

% initialize random number generator

rand('state',sum( 1 00*clock));

% distance between map units in the output space

% Since in the case of gaussian and ep neighborhood functions, the

% equations utilize squares of the unit distances and in bubble case

% it doesn't matter which is used, the unitdistances and neighborhood

% radiuses are squared.

Ud = som _unit _dists(sTopol).^2;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Action

update_step = 100;

mu_x_1 = ones(munits,1);

samples = ones(update_step, 1);

r = samples;

alfa = samples;

qe = 0;

start = clock;

if tracking > 0, % initialize tracking

track_table = zeros(update_step, 1);

qe = zeros(floor(trainlen/update_step), 1);

end

for t = 1 :trainlen,

% Every update_step, new values for sample indeces, neighborhood

% radius and learning rate are calculated. This could be done

% every step, but this way it is more efficient. Or this could

% be done all at once outside the loop, but it would require much

% more memory.

ind = rem(t,update_step); if ind==0, ind = update_step; end

if ind== 1,

steps = [t:min(trainlen,t+update_step- 1)];

% sample order

switch sample_order_type,

case 'ordered', samples = rem(steps,dlen)+1;

case 'random', samples = ceil(dlen*rand(update_step, 1 )+eps);

end

% neighborhood radius

switch radius_type,

case 'linear', r = rini+(steps-1)*rstep;

case 'user defined', r = radius(steps);

end

r=r.^2; % squared radius (see notes about Ud above)

r(r==0) = eps; % zero radius might cause div-by-zero error

% learning rate

switch sTrain.alpha_type,

case 'linear', alfa = (1-steps/trainlen)*alpha;

case 'inv', alfa = a ./ (b + steps-1);

case 'power', alfa = alpha * (0.005/alpha).^((steps-1)/trainlen);

case 'user defined', alfa = alpha(steps);

end

end

% find BMU

x = D(samples(ind),:); % pick one sample vector

known = ~isnan(x); % its known components

Dx = M(:,known) - x(mu_x_1,known); % each map unit minus the vector

[qerr bmu] = min((Dx.^2)*mask(known)); % minimum distance(^2) and the BMU % tracking

if tracking>0,

track_table(ind) = sqrt(qerr); if ind==update_step,

n = ceil(t/update_step); qe(n) = mean(track_table);

trackplot(M,D,tracking,start,n,qe);

end

end

% neighborhood & learning rate

% notice that the elements Ud and radius have been squared!

% (see notes about Ud above) switch sTrain.neigh,

case 'bubble', h = (Ud(:,bmu)<=r(ind));

case 'gaussian', h = exp(-Ud(:,bmu)/(2*r(ind)));

case 'cutgauss', h = exp(-Ud(:,bmu)/(2*r(ind))) .* (Ud(:,bmu)<=r(ind));

case 'ep', h = (1 -Ud(:,bmu)/r(ind)) .* (Ud(:,bmu)<=r(ind));

end

h = h*alfa(ind);

% update M

M(: ,known) = M(: ,known) - h(: ,ones(sum(known), 1)). *Dx;

end; % for t = 1 :trainlen

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% Build / clean up the return arguments if tracking, fprintf(1 ,'\n'); end

% update structures

sTrain = som_set(sTrain,'time',datestr(now,0)); if struct_mode,

sMap = som_set(sMap,'codebook',M,'mask',sTrain.mask,'neigh',sTrain.neigh);

tl = length(sMap.trainhist);

sMap.trainhist(tl+1) = sTrain;

else

sMap = reshape(M,orig_size);

end

return;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% subfunctions

function [] = trackplot(M,D,tracking,start,n,qe) l = length(qe);

elap_t = etime(clock,start);

tot_t = elap_t*l/n;

fprintf( 1 ,'\rTraining: %3 .0f/ %3 .0f s',elap_t,tot_t) switch tracking

case 1,

case 2,

plot(1 :n,qe(1 :n),(n+1):l,qe((n+1):l))

title('Quantization errors for latest samples') drawnow

otherwise,

subplot(2,1,1), plot(1 :n,qe(1 :n),(n+1):l,qe((n+1):l)) title('Quantization error for latest samples');

subplot(2, 1,2), plot(M(:, 1 ),M(:,2),'ro',D(:, 1 ),D(:,2),'b.');

title('First two components of map units (o) and data vectors (+)');

drawnow

end

% end of trackplot

précédent sommaire






Bitcoin is a swarm of cyber hornets serving the goddess of wisdom, feeding on the fire of truth, exponentially growing ever smarter, faster, and stronger behind a wall of encrypted energy








"Enrichissons-nous de nos différences mutuelles "   Paul Valery