# -*- coding: utf-8 -*-""" Structural Clustering Algorithm for Networks"""# Author: Kay Liu <zliu234@uic.edu># License: BSD 2 clauseimportmathimporttimeimportwarningsimporttorchimportnumpyasnpfrom.importDetectorfrom..utilsimportlogger
[docs]classSCAN(Detector):""" Structural Clustering Algorithm for Networks SCAN is a clustering algorithm, which only takes the graph structure without the node features as the input. Note: This model will output detected clusters instead of "outliers" descibed in the original paper. .. note:: This detector is transductive only. Using ``predict`` with unseen data will train the detector from scratch. See :cite:`xu2007scan` for details. Parameters ---------- eps : float, optional Neighborhood threshold. Default: ``.5``. mu : int, optional Minimal size of clusters. Default: ``2``. contamination : float, optional Valid in (0., 0.5). The proportion of outliers in the data set. Used when fitting to define the threshold on the decision function. Default: ``0.1``. verbose : int, optional Verbosity mode. Range in [0, 3]. Larger value for printing out more log information. Default: ``0``. Attributes ---------- decision_score_ : torch.Tensor The outlier scores of the training data. The higher, the more abnormal. Outliers tend to have higher scores. This value is available once the detector is fitted. threshold_ : float The threshold is based on ``contamination``. It is the :math:`N \\times` ``contamination`` most abnormal samples in ``decision_score_``. The threshold is calculated for generating binary outlier labels. label_ : torch.Tensor The binary labels of the training data. 0 stands for inliers and 1 for outliers. It is generated by applying ``threshold_`` on ``decision_score_``. hub_score_ : torch.Tensor The binary hub scores of each node. scatter_score_ : torch.Tensor The binary scatter scores of each node, i.e., the "outlier" scores in the original paper. """def__init__(self,eps=.5,mu=2,contamination=0.1,verbose=0):super(SCAN,self).__init__(contamination=contamination,verbose=verbose)# model paramself.eps=epsself.mu=muself.hub_score_=Noneself.scatter_score_=Nonedefprocess_graph(self,data):pass
def_similarity(self,u,v):u_set=torch.unique(self._neighbors(u))v_set=torch.unique(self._neighbors(v))inter=np.intersect1d(v_set,u_set)iflen(inter)==0:return0# need to account for vertex itself, add 2(1 for each vertex)sim=(len(inter)+2)/(math.sqrt((len(v_set)+1)*(len(u_set)+1)))returnsimdef_neighborhood(self,v):candidates=self._neighbors(v)iflen(candidates)==0:returntorch.empty(0)sim=np.vectorize(self._similarity)(candidates,v)returncandidates[sim>self.eps]def_neighbors(self,v):returnself.edge_index[1][self.edge_index[0]==v]def_if_hub(self,v):neighbors=self._neighbors(v)returnlen(torch.unique(neighbors))>1defdecision_function(self,data,label=None):ifdataisnotNone:warnings.warn("This detector is transductive only. ""Training from scratch with the input data.")self.fit(data,label)returnself.decision_score_