Source code for pygod.detector.anomalydae

# -*- coding: utf-8 -*-
"""AnomalyDAE: Dual autoencoder for anomaly detection
on attributed networks"""
import warnings

# Author: Xueying Ding <xding2@andrew.cmu.edu>,
#         Kay Liu <zliu234@uic.edu>
# License: BSD 2 clause

import torch
import torch.nn.functional as F

from . import DeepDetector
from ..nn import AnomalyDAEBase


[docs] class AnomalyDAE(DeepDetector): """ Dual Autoencoder for Anomaly Detection on Attributed Networks AnomalyDAE is an anomaly detector that consists of a structure autoencoder and an attribute autoencoder to learn both node embedding and attribute embedding jointly in latent space. The structural autoencoder uses Graph Attention layers. The reconstruction mean square error of the decoders are defined as structure anomaly score and attribute anomaly score, respectively, with two additional penalties on the reconstructed adj matrix and node attributes (force entries to be nonzero). See :cite:`fan2020anomalydae` for details. Parameters ---------- emb_dim : int, optional Embedding dimension of model. Default: ``64``. hid_dim : int, optional Hidden dimension of model. Default: ``64``. num_layers : int, optional Total number of layers of AnomalyDAE is fixed to be 4. Changing of this parameter will not affect the model. Default: ``4``. dropout : float, optional Dropout rate. Default: ``0.``. weight_decay : float, optional Weight decay (L2 penalty). Default: ``0.``. act : callable activation function or None, optional Activation function if not None. Default: ``torch.nn.functional.relu``. backbone : torch.nn.Module The backbone of AnomalyDAE is fixed. Changing of this parameter will not affect the model. Default: ``None``. alpha : float, optional Weight between reconstruction of node feature and structure. Default: ``0.5``. theta : float, optional The additional penalty for nonzero attribute. Default: ``1.``. eta : float, optional The additional penalty for nonzero structure. Default: ``1.``. contamination : float, optional The amount of contamination of the dataset in (0., 0.5], i.e., the proportion of outliers in the dataset. Used when fitting to define the threshold on the decision function. Default: ``0.1``. lr : float, optional Learning rate. Default: ``0.004``. epoch : int, optional Maximum number of training epoch. Default: ``100``. gpu : int GPU Index, -1 for using CPU. Default: ``-1``. batch_size : int, optional Minibatch size, 0 for full batch training. Default: ``0``. num_neigh : int, optional Number of neighbors in sampling, -1 for all neighbors. Default: ``-1``. verbose : int, optional Verbosity mode. Range in [0, 3]. Larger value for printing out more log information. Default: ``0``. save_emb : bool, optional Whether to save the embedding. Default: ``False``. compile_model : bool, optional Whether to compile the model with ``torch_geometric.compile``. Default: ``False``. **kwargs Other parameters for the backbone model. Attributes ---------- decision_score_ : torch.Tensor The outlier scores of the training data. Outliers tend to have higher scores. This value is available once the detector is fitted. threshold_ : float The threshold is based on ``contamination``. It is the :math:`N \\times` ``contamination`` most abnormal samples in ``decision_score_``. The threshold is calculated for generating binary outlier labels. label_ : torch.Tensor The binary labels of the training data. 0 stands for inliers and 1 for outliers. It is generated by applying ``threshold_`` on ``decision_score_``. emb : torch.Tensor or tuple of torch.Tensor or None The learned node hidden embeddings of shape :math:`N \\times` ``hid_dim``. Only available when ``save_emb`` is ``True``. When the detector has not been fitted, ``emb`` is ``None``. When the detector has multiple embeddings, ``emb`` is a tuple of torch.Tensor. """ def __init__(self, emb_dim=64, hid_dim=64, num_layers=4, dropout=0., weight_decay=0., act=F.relu, backbone=None, alpha=0.5, theta=1., eta=1., contamination=0.1, lr=0.004, epoch=5, gpu=-1, batch_size=0, num_neigh=-1, verbose=0, save_emb=False, compile_model=False, **kwargs): if backbone is not None or num_layers != 4: warnings.warn("Backbone and num_layers are not used in AnomalyDAE") super(AnomalyDAE, self).__init__(hid_dim=hid_dim, num_layers=num_layers, dropout=dropout, weight_decay=weight_decay, act=act, backbone=backbone, contamination=contamination, lr=lr, epoch=epoch, gpu=gpu, batch_size=batch_size, num_neigh=num_neigh, verbose=verbose, save_emb=save_emb, compile_model=compile_model, **kwargs) self.emb_dim = emb_dim self.alpha = alpha self.theta = theta self.eta = eta def process_graph(self, data): AnomalyDAEBase.process_graph(data) def init_model(self, **kwargs): if self.save_emb: self.emb = torch.zeros(self.num_nodes, self.hid_dim) return AnomalyDAEBase(in_dim=self.in_dim, num_nodes=self.num_nodes, emb_dim=self.emb_dim, hid_dim=self.hid_dim, dropout=self.dropout, act=self.act, **kwargs).to(self.device) def forward_model(self, data): batch_size = data.batch_size node_idx = data.n_id x = data.x.to(self.device) s = data.s.to(self.device) edge_index = data.edge_index.to(self.device) x_, s_ = self.model(x, edge_index, batch_size) # positive weight conversion weight = 1 - self.alpha pos_weight_a = self.eta / (1 + self.eta) pos_weight_s = self.theta / (1 + self.theta) score = self.model.loss_func(x[:batch_size], x_[:batch_size], s[:batch_size, node_idx], s_[:batch_size], weight, pos_weight_a, pos_weight_s) loss = torch.mean(score) return loss, score.detach().cpu()