# -*- coding: utf-8 -*-
"""Higher-order Structure based Anomaly Detection on Attributed
Networks (GUIDE)"""
# Author: Kay Liu <zliu234@uic.edu>
# License: BSD 2 clause
import os
import warnings
import torch
import torch.nn.functional as F
from . import DeepDetector
from ..nn import GUIDEBase
[docs]class GUIDE(DeepDetector):
"""
Higher-order Structure based Anomaly Detection on Attributed
Networks
GUIDE is an anomaly detector consisting of an attribute graph
convolutional autoencoder, and a structure graph attentive
autoencoder (not the same as the graph attention networks). Instead
of the adjacency matrix, node motif degree is used as input of
structure autoencoder. The reconstruction mean square error of the
autoencoders are defined as structure anomaly score and attribute
anomaly score, respectively.
Note: The calculation of node motif degree in preprocessing has
high time complexity. It may take longer than you expect.
See :cite:`yuan2021higher` for details.
Parameters
----------
hid_a : int, optional
Hidden dimension for attribute. Default: ``64``.
hid_s : int, optional
Hidden dimension for structure. Default: ``4``.
num_layers : int, optional
Total number of layers in model. Default: ``4``.
dropout : float, optional
Dropout rate. Default: ``0.``.
weight_decay : float, optional
Weight decay (L2 penalty). Default: ``0.``.
act : callable activation function or None, optional
Activation function if not None.
Default: ``torch.nn.functional.relu``.
backbone : torch.nn.Module
The backbone of GUIDE is fixed. Changing of this
parameter will not affect the model. Default: ``None``.
alpha : float, optional
Weight between reconstruction of node feature and structure.
Default: ``0.5``.
contamination : float, optional
The amount of contamination of the dataset in (0., 0.5], i.e.,
the proportion of outliers in the dataset. Used when fitting to
define the threshold on the decision function. Default: ``0.1``.
lr : float, optional
Learning rate. Default: ``0.004``.
epoch : int, optional
Maximum number of training epoch. Default: ``100``.
gpu : int
GPU Index, -1 for using CPU. Default: ``-1``.
batch_size : int, optional
Minibatch size, 0 for full batch training. Default: ``0``.
num_neigh : int, optional
Number of neighbors in sampling, -1 for all neighbors.
Default: ``-1``.
graphlet_size : int, optional
The maximum size of graphlet. Default: ``4``.
selected_motif : bool, optional
Whether to use selected motif in the paper. Default: ``True``.
cache_dir : str, optional
The directory for the node motif degree caching. If ``None``,
~/.pygod will be used. Default: ``None``.
verbose : int, optional
Verbosity mode. Range in [0, 3]. Larger value for printing out
more log information. Default: ``0``.
save_emb : bool, optional
Whether to save the embedding. Default: ``False``.
compile_model : bool, optional
Whether to compile the model with ``torch_geometric.compile``.
Default: ``False``.
**kwargs
Other parameters for the backbone.
Attributes
----------
decision_score_ : torch.Tensor
The outlier scores of the training data. Outliers tend to have
higher scores. This value is available once the detector is
fitted.
threshold_ : float
The threshold is based on ``contamination``. It is the
:math:`N`*``contamination`` most abnormal samples in
``decision_score_``. The threshold is calculated for generating
binary outlier labels.
label_ : torch.Tensor
The binary labels of the training data. 0 stands for inliers
and 1 for outliers. It is generated by applying
``threshold_`` on ``decision_score_``.
emb : torch.Tensor or tuple of torch.Tensor or None
The learned node hidden embeddings of shape
:math:`N \\times` ``hid_dim``. Only available when ``save_emb``
is ``True``. When the detector has not been fitted, ``emb`` is
``None``. When the detector has multiple embeddings,
``emb`` is a tuple of torch.Tensor.
"""
def __init__(self,
hid_a=64,
hid_s=4,
num_layers=4,
dropout=0.,
weight_decay=0.,
act=F.relu,
backbone=None,
alpha=0.5,
contamination=0.1,
lr=0.004,
epoch=100,
gpu=-1,
batch_size=0,
num_neigh=-1,
graphlet_size=4,
selected_motif=True,
cache_dir=None,
verbose=0,
save_emb=False,
compile_model=False,
**kwargs):
if backbone is not None:
warnings.warn("Backbone is not used in GUIDE")
super(GUIDE, self).__init__(hid_dim=(hid_a, hid_s),
num_layers=num_layers,
dropout=dropout,
weight_decay=weight_decay,
act=act,
backbone=backbone,
contamination=contamination,
lr=lr,
epoch=epoch,
gpu=gpu,
batch_size=batch_size,
num_neigh=num_neigh,
verbose=verbose,
save_emb=save_emb,
compile_model=compile_model,
**kwargs)
self.dim_s = None
self.alpha = alpha
self.graphlet_size = graphlet_size
if selected_motif:
assert self.graphlet_size == 4, \
"Graphlet size is fixed when using selected motif"
self.selected_motif = selected_motif
self.verbose = verbose
self.cache_dir = cache_dir
def process_graph(self, data):
data.s = GUIDEBase.calc_gdd(data,
self.cache_dir,
graphlet_size=self.graphlet_size,
selected_motif=self.selected_motif)
self.dim_s = data.s.shape[1]
def init_model(self, **kwargs):
if self.save_emb:
self.emb = (torch.zeros(self.num_nodes, self.hid_dim[0]),
torch.zeros(self.num_nodes, self.hid_dim[1]))
return GUIDEBase(dim_a=self.in_dim,
dim_s=self.dim_s,
hid_a=self.hid_dim[0],
hid_s=self.hid_dim[1],
num_layers=self.num_layers,
dropout=self.dropout,
act=self.act,
**kwargs).to(self.device)
def forward_model(self, data):
batch_size = data.batch_size
x = data.x.to(self.device)
s = data.s.to(self.device)
edge_index = data.edge_index.to(self.device)
x_, s_ = self.model(x, s, edge_index)
score = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size],
s_[:batch_size],
self.alpha)
loss = torch.mean(score)
return loss, score.detach().cpu()