Source code for pygod.detector.conad

# -*- coding: utf-8 -*-
"""Contrastive Attributed Network Anomaly Detection
with Data Augmentation (CONAD)"""
# Author: Zhiming Xu <zhimng.xu@gmail.com>
# License: BSD 2 clause

import torch
import numpy as np
from copy import deepcopy
from torch_geometric.nn import GCN
from torch_geometric.utils import dense_to_sparse

from . import DeepDetector
from ..nn import DOMINANTBase


[docs] class CONAD(DeepDetector): """ Contrastive Attributed Network Anomaly Detection CONAD is an anomaly detector consisting of a shared graph convolutional encoder, a structure reconstruction decoder, and an attribute reconstruction decoder. The model is trained with both contrastive loss and structure/attribute reconstruction loss. The reconstruction mean square error of the decoders are defined as structure anomaly score and attribute anomaly score, respectively. See :cite:`xu2022contrastive` for details. Parameters ---------- hid_dim : int, optional Hidden dimension of model. Default: ``64``. num_layers : int, optional Total number of layers in model. Default: ``4``. dropout : float, optional Dropout rate. Default: ``0.``. weight_decay : float, optional Weight decay (L2 penalty). Default: ``0.``. act : callable activation function or None, optional Activation function if not None. Default: ``torch.nn.functional.relu``. sigmoid_s : bool, optional Whether to use sigmoid function to scale the reconstructed structure. Default: ``False``. backbone : torch.nn.Module, optional The backbone of the deep detector implemented in PyG. Default: ``torch_geometric.nn.GCN``. contamination : float, optional The amount of contamination of the dataset in (0., 0.5], i.e., the proportion of outliers in the dataset. Used when fitting to define the threshold on the decision function. Default: ``0.1``. lr : float, optional Learning rate. Default: ``0.004``. epoch : int, optional Maximum number of training epoch. Default: ``100``. gpu : int GPU Index, -1 for using CPU. Default: ``-1``. batch_size : int, optional Minibatch size, 0 for full batch training. Default: ``0``. num_neigh : int, optional Number of neighbors in sampling, -1 for all neighbors. Default: ``-1``. weight : float, optional Weight between reconstruction of node feature and structure. Default: ``0.5``. eta : float, optional Weight between contrastive and reconstruction.Default: ``0.5``. margin : float, optional Margin in margin ranking loss. Default: ``0.5``. r : float, optional The rate of augmented anomalies. Default: ``0.2``. m : int, optional For densely connected nodes, the number of edges to add. Default: ``50``. k : int, optional Same as ``k`` in ``pygod.generator.gen_contextual_outlier``. Default: ``50``. f : int, optional For disproportionate nodes, the scale factor applied on their attribute value. Default: ``10``. verbose : int, optional Verbosity mode. Range in [0, 3]. Larger value for printing out more log information. Default: ``0``. save_emb : bool, optional Whether to save the embedding. Default: ``False``. compile_model : bool, optional Whether to compile the model with ``torch_geometric.compile``. Default: ``False``. **kwargs : optional Additional arguments for the backbone. Attributes ---------- decision_score_ : torch.Tensor The outlier scores of the training data. Outliers tend to have higher scores. This value is available once the detector is fitted. threshold_ : float The threshold is based on ``contamination``. It is the :math:`N \\times` ``contamination`` most abnormal samples in ``decision_score_``. The threshold is calculated for generating binary outlier labels. label_ : torch.Tensor The binary labels of the training data. 0 stands for inliers and 1 for outliers. It is generated by applying ``threshold_`` on ``decision_score_``. emb : torch.Tensor or tuple of torch.Tensor or None The learned node hidden embeddings of shape :math:`N \\times` ``hid_dim``. Only available when ``save_emb`` is ``True``. When the detector has not been fitted, ``emb`` is ``None``. When the detector has multiple embeddings, ``emb`` is a tuple of torch.Tensor. """ def __init__(self, hid_dim=64, num_layers=4, dropout=0., weight_decay=0., act=torch.nn.functional.relu, sigmoid_s=False, backbone=GCN, contamination=0.1, lr=4e-3, epoch=100, gpu=-1, batch_size=0, num_neigh=-1, weight=0.5, eta=0.5, margin=0.5, r=0.2, m=50, k=50, f=10, verbose=0, save_emb=False, compile_model=False, **kwargs): super(CONAD, self).__init__(hid_dim=hid_dim, num_layers=num_layers, dropout=dropout, weight_decay=weight_decay, act=act, backbone=backbone, contamination=contamination, lr=lr, epoch=epoch, gpu=gpu, batch_size=batch_size, num_neigh=num_neigh, verbose=verbose, save_emb=save_emb, compile_model=compile_model, **kwargs) # model param self.weight = weight self.sigmoid_s = sigmoid_s self.eta = eta # other param self.r = r self.m = m self.k = k self.f = f self.margin_loss_func = torch.nn.MarginRankingLoss(margin=margin) def process_graph(self, data): DOMINANTBase.process_graph(data) def init_model(self, **kwargs): if self.save_emb: self.emb = torch.zeros(self.num_nodes, self.hid_dim) return DOMINANTBase(in_dim=self.in_dim, hid_dim=self.hid_dim, num_layers=self.num_layers, dropout=self.dropout, act=self.act, sigmoid_s=self.sigmoid_s, backbone=self.backbone, **kwargs).to(self.device) def forward_model(self, data): batch_size = data.batch_size node_idx = data.n_id x = data.x.to(self.device) s = data.s.to(self.device) edge_index = data.edge_index.to(self.device) if self.model.training: x_aug, edge_index_aug, label_aug = \ self._data_augmentation(data) x_aug = x_aug.to(self.device) edge_index_aug = edge_index_aug.to(self.device) label_aug = label_aug.to(self.device) _, _ = self.model(x_aug, edge_index_aug) h_aug = self.model.emb x_, s_ = self.model(x, edge_index) h = self.model.emb score = self.model.loss_func(x[:batch_size], x_[:batch_size], s[:batch_size, node_idx], s_[:batch_size], self.weight) if self.model.training: margin_loss = self.margin_loss_func(h, h, h_aug) * label_aug loss = self.eta * torch.mean(score) + \ (1 - self.eta) * torch.mean(margin_loss) else: loss = torch.mean(score) return loss, score.detach().cpu() def _data_augmentation(self, data): """ Data augmentation on the input graph. Four types of pseudo anomalies will be injected: Attribute, deviated Attribute, disproportionate Structure, high-degree Structure, outlying Parameters ----------- data : torch_geometric.data.Data The input graph data. Returns ------- feat_aug, adj_aug, label_aug : augmented attribute matrix, adjacency matrix, and pseudo anomaly label to train contrastive graph representations """ rate = self.r num_added_edge = self.m surround = self.k scale_factor = self.f x = data.x adj = data.s node_idx = data.n_id batch_size = adj.shape[0] num_nodes = adj.shape[1] adj_aug, feat_aug = deepcopy(adj), deepcopy(x) label_aug = torch.zeros(batch_size, dtype=torch.int32) prob = torch.rand(batch_size) label_aug[prob < rate] = 1 # high-degree hd_mask = prob < rate / 4 n_hd = torch.sum(hd_mask) edges_mask = torch.rand(n_hd, num_nodes) < num_added_edge / num_nodes edges_mask = edges_mask adj_aug[hd_mask, :] = edges_mask.float() # outlying ol_mask = torch.logical_and(rate / 4 <= prob, prob < rate / 2) adj_aug[ol_mask, :] = 0 # deviated dv_mask = torch.logical_and(rate / 2 <= prob, prob < rate * 3 / 4) feat_c = feat_aug[torch.randperm(batch_size)[:surround]] ds = torch.cdist(feat_aug[dv_mask], feat_c) feat_aug[dv_mask] = feat_c[torch.argmax(ds, 1)] # disproportionate mul_mask = torch.logical_and(rate * 3 / 4 <= prob, prob < rate * 7 / 8) div_mask = rate * 7 / 8 <= prob feat_aug[mul_mask] *= scale_factor feat_aug[div_mask] /= scale_factor edge_index_aug = dense_to_sparse(adj_aug)[0] inv_idx = torch.zeros(num_nodes, dtype=torch.int64) inv_idx[node_idx] = torch.arange(batch_size) edge_index_aug[1] = inv_idx[edge_index_aug[1]] return feat_aug, edge_index_aug, label_aug