Source code for pygod.detector.guide

# -*- coding: utf-8 -*-
"""Higher-order Structure based Anomaly Detection on Attributed
    Networks (GUIDE)"""
# Author: Kay Liu <zliu234@uic.edu>
# License: BSD 2 clause

import os
import warnings

import torch
import torch.nn.functional as F

from . import DeepDetector
from ..nn import GUIDEBase


[docs]class GUIDE(DeepDetector): """ Higher-order Structure based Anomaly Detection on Attributed Networks GUIDE is an anomaly detector consisting of an attribute graph convolutional autoencoder, and a structure graph attentive autoencoder (not the same as the graph attention networks). Instead of the adjacency matrix, node motif degree is used as input of structure autoencoder. The reconstruction mean square error of the autoencoders are defined as structure anomaly score and attribute anomaly score, respectively. Note: The calculation of node motif degree in preprocessing has high time complexity. It may take longer than you expect. See :cite:`yuan2021higher` for details. Parameters ---------- hid_a : int, optional Hidden dimension for attribute. Default: ``64``. hid_s : int, optional Hidden dimension for structure. Default: ``4``. num_layers : int, optional Total number of layers in model. Default: ``4``. dropout : float, optional Dropout rate. Default: ``0.``. weight_decay : float, optional Weight decay (L2 penalty). Default: ``0.``. act : callable activation function or None, optional Activation function if not None. Default: ``torch.nn.functional.relu``. backbone : torch.nn.Module The backbone of GUIDE is fixed. Changing of this parameter will not affect the model. Default: ``None``. alpha : float, optional Weight between reconstruction of node feature and structure. Default: ``0.5``. contamination : float, optional The amount of contamination of the dataset in (0., 0.5], i.e., the proportion of outliers in the dataset. Used when fitting to define the threshold on the decision function. Default: ``0.1``. lr : float, optional Learning rate. Default: ``0.004``. epoch : int, optional Maximum number of training epoch. Default: ``100``. gpu : int GPU Index, -1 for using CPU. Default: ``-1``. batch_size : int, optional Minibatch size, 0 for full batch training. Default: ``0``. num_neigh : int, optional Number of neighbors in sampling, -1 for all neighbors. Default: ``-1``. graphlet_size : int, optional The maximum size of graphlet. Default: ``4``. selected_motif : bool, optional Whether to use selected motif in the paper. Default: ``True``. cache_dir : str, optional The directory for the node motif degree caching. If ``None``, ~/.pygod will be used. Default: ``None``. verbose : int, optional Verbosity mode. Range in [0, 3]. Larger value for printing out more log information. Default: ``0``. save_emb : bool, optional Whether to save the embedding. Default: ``False``. compile_model : bool, optional Whether to compile the model with ``torch_geometric.compile``. Default: ``False``. **kwargs Other parameters for the backbone. Attributes ---------- decision_score_ : torch.Tensor The outlier scores of the training data. Outliers tend to have higher scores. This value is available once the detector is fitted. threshold_ : float The threshold is based on ``contamination``. It is the :math:`N`*``contamination`` most abnormal samples in ``decision_score_``. The threshold is calculated for generating binary outlier labels. label_ : torch.Tensor The binary labels of the training data. 0 stands for inliers and 1 for outliers. It is generated by applying ``threshold_`` on ``decision_score_``. emb : torch.Tensor or tuple of torch.Tensor or None The learned node hidden embeddings of shape :math:`N \\times` ``hid_dim``. Only available when ``save_emb`` is ``True``. When the detector has not been fitted, ``emb`` is ``None``. When the detector has multiple embeddings, ``emb`` is a tuple of torch.Tensor. """ def __init__(self, hid_a=64, hid_s=4, num_layers=4, dropout=0., weight_decay=0., act=F.relu, backbone=None, alpha=0.5, contamination=0.1, lr=0.004, epoch=100, gpu=-1, batch_size=0, num_neigh=-1, graphlet_size=4, selected_motif=True, cache_dir=None, verbose=0, save_emb=False, compile_model=False, **kwargs): if backbone is not None: warnings.warn("Backbone is not used in GUIDE") super(GUIDE, self).__init__(hid_dim=(hid_a, hid_s), num_layers=num_layers, dropout=dropout, weight_decay=weight_decay, act=act, backbone=backbone, contamination=contamination, lr=lr, epoch=epoch, gpu=gpu, batch_size=batch_size, num_neigh=num_neigh, verbose=verbose, save_emb=save_emb, compile_model=compile_model, **kwargs) self.dim_s = None self.alpha = alpha self.graphlet_size = graphlet_size if selected_motif: assert self.graphlet_size == 4, \ "Graphlet size is fixed when using selected motif" self.selected_motif = selected_motif self.verbose = verbose self.cache_dir = cache_dir def process_graph(self, data): data.s = GUIDEBase.calc_gdd(data, self.cache_dir, graphlet_size=self.graphlet_size, selected_motif=self.selected_motif) self.dim_s = data.s.shape[1] def init_model(self, **kwargs): if self.save_emb: self.emb = (torch.zeros(self.num_nodes, self.hid_dim[0]), torch.zeros(self.num_nodes, self.hid_dim[1])) return GUIDEBase(dim_a=self.in_dim, dim_s=self.dim_s, hid_a=self.hid_dim[0], hid_s=self.hid_dim[1], num_layers=self.num_layers, dropout=self.dropout, act=self.act, **kwargs).to(self.device) def forward_model(self, data): batch_size = data.batch_size x = data.x.to(self.device) s = data.s.to(self.device) edge_index = data.edge_index.to(self.device) x_, s_ = self.model(x, s, edge_index) score = self.model.loss_func(x[:batch_size], x_[:batch_size], s[:batch_size], s_[:batch_size], self.alpha) loss = torch.mean(score) return loss, score.detach().cpu()