Source code for pygod.detector.gadnr

# -*- coding: utf-8 -*-
"""GAD-NR: Graph Anomaly Detection via Neighborhood Reconstruction (GADNR)
   The code is partially from the original implementation in 
   https://github.com/Graph-COM/GAD-NR"""
# Author: Yingtong Dou <ytongdou@gmail.com>
# License: BSD 2 clause

import time
import warnings
import torch
import torch.nn.functional as F
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import GCN
from torch_geometric import compile

from . import DeepDetector
from ..nn import GADNRBase
from ..utils import logger


[docs]class GADNR(DeepDetector): """ The GAD-NR model. See :cite:`roy2023gadnr` for details. Parameters ---------- hid_dim : int, optional Hidden dimension of model. Default: ``64``. num_layers : int, optional Total number of layers in the backbone encoder model. Default: ``1``. deg_dec_layers : int, optional The number of layers for the node degree decoder. Default: ``4``. fea_dec_layers : int, optional The number of layers for the node feature decoder. Default: ``3``. backbone : torch.nn.Module, optional The backbone of the deep detector implemented in PyG. Default: ``torch_geometric.nn.GCN``. sample_size : int, optional The number of samples for the neighborhood distribution. Default: ``2``. sample_time : int, optional The number sample times to remove the noise during node feature and neighborhood distribution reconstruction. Default: ``3``. neigh_loss : str, optional The neighbor reconstruction loss. ``KL`` represents the KL divergence loss, ``W2`` represents the W2 loss. Defualt: ``KL``. lambda_loss1 : float, optional The weight of the neighborhood reconstruction loss term. Default: ``1e-2``. lambda_loss2 : float, optional The weight of the node feature reconstruction loss term. Default: ``1e-3``. lambda_loss3 : float, optional The weight of the node degree reconstruction loss term. Default: ``1e-4``. real_loss : bool, optional Whether using the original loss proposed in the paper as the decision score, if not, using the proposed weighted decision score. Default: ``True``. lr : float, optional Learning rate. Default: ``0.01``. epoch : int, optional Maximum number of training epoch. Default: ``100``. dropout : float, optional Dropout rate. Default: ``0.``. weight_decay : float, optional Weight decay (L2 penalty). Default: ``0.0003``. act : callable activation function or None, optional Activation function if not None. Default: ``torch.nn.functional.relu``. gpu : int GPU Index, -1 for using CPU. Default: ``-1``. batch_size : int, optional Minibatch size, 0 for full batch training. Default: ``0``. num_neigh : int, optional Number of neighbors in sampling, -1 for all neighbors. Default: ``-1``. contamination : float, optional The amount of contamination of the dataset in (0., 0.5], i.e., the proportion of outliers in the dataset. Used when fitting to define the threshold on the decision function. Default: ``0.1``. verbose : int, optional Verbosity mode. Range in [0, 3]. Larger value for printing out more log information. Default: ``0``. save_emb : bool, optional Whether to save the embedding. Default: ``False``. compile_model : bool, optional Whether to compile the model with ``torch_geometric.compile``. Default: ``False``. **kwargs : optional Other parameters for the backbone. Attributes ---------- decision_score_ : torch.Tensor The outlier scores of the training data. Outliers tend to have higher scores. This value is available once the detector is fitted. threshold_ : float The threshold is based on ``contamination``. It is the :math:`N`*``contamination`` most abnormal samples in ``decision_score_``. The threshold is calculated for generating binary outlier labels. label_ : torch.Tensor The binary labels of the training data. 0 stands for inliers and 1 for outliers. It is generated by applying ``threshold_`` on ``decision_score_``. emb : torch.Tensor or tuple of torch.Tensor or None The learned node hidden embeddings of shape :math:`N \\times` ``hid_dim``. Only available when ``save_emb`` is ``True``. When the detector has not been fitted, ``emb`` is ``None``. When the detector has multiple embeddings, ``emb`` is a tuple of torch.Tensor. """ def __init__(self, hid_dim=64, num_layers=1, deg_dec_layers=4, fea_dec_layers=3, backbone=GCN, sample_size=2, sample_time=3, neigh_loss='KL', lambda_loss1=1e-2, lambda_loss2=1e-1, lambda_loss3=8e-1, real_loss=True, lr=1e-2, epoch=100, dropout=0., weight_decay=3e-4, act=F.relu, gpu=-1, batch_size=0, num_neigh=-1, contamination=0.1, verbose=0, save_emb=False, compile_model=False, **kwargs): super(GADNR, self).__init__(hid_dim=hid_dim, num_layers=num_layers, dropout=dropout, weight_decay=weight_decay, act=act, backbone=backbone, contamination=contamination, lr=lr, epoch=epoch, gpu=gpu, batch_size=batch_size, num_neigh=num_neigh, verbose=verbose, save_emb=save_emb, compile_model=compile_model, **kwargs) self.encoder_layers = num_layers self.deg_dec_layers = deg_dec_layers self.fea_dec_layers = fea_dec_layers self.sample_size = sample_size self.sample_time = sample_time self.neigh_loss = neigh_loss self.lambda_loss1 = lambda_loss1 self.lambda_loss2 = lambda_loss2 self.lambda_loss3 = lambda_loss3 self.real_loss = real_loss self.neighbor_num_list = None self.neighbor_dict = None self.id_mapping = None self.full_batch = None self.tot_nodes = 0 self.verbose = verbose def process_graph(self, data): if self.batch_size != data.x.shape[0]: # mini-batch data, neighbor_dict, neighbor_num_list, id_mapping = \ GADNRBase.process_graph(data, data.input_id.tolist()) else: # full batch data, neighbor_dict, neighbor_num_list, id_mapping = \ GADNRBase.process_graph(data) self.tot_nodes = data.x.shape[0] self.neighbor_num_list = neighbor_num_list.to(self.device) self.neighbor_dict = neighbor_dict self.id_mapping = id_mapping return data def init_model(self, **kwargs): if self.save_emb: self.emb = torch.zeros(self.num_nodes, self.hid_dim) return GADNRBase(in_dim=self.in_dim, hid_dim=self.hid_dim, encoder_layers=self.encoder_layers, deg_dec_layers=self.deg_dec_layers, fea_dec_layers=self.fea_dec_layers, sample_size=self.sample_size, sample_time=self.sample_time, neighbor_num_list=self.neighbor_num_list, tot_nodes=self.tot_nodes, neigh_loss=self.neigh_loss, lambda_loss1=self.lambda_loss1, lambda_loss2=self.lambda_loss2, lambda_loss3=self.lambda_loss3, full_batch=self.full_batch, backbone=self.backbone, device=self.device).to(self.device) def forward_model(self, data): if not self.full_batch: # mini-batch training h0, degree_logits, feat_recon_list, neigh_recon_list = \ self.model(data.x, data.edge_index, data.input_id.tolist(), self.neighbor_dict, self.id_mapping) else: # full batch training h0, degree_logits, feat_recon_list, neigh_recon_list = \ self.model(data.x, data.edge_index) loss, loss_per_node, h_loss, degree_loss, feature_loss = \ self.model.loss_func(h0, degree_logits, feat_recon_list, neigh_recon_list, self.neighbor_num_list) return loss, loss_per_node.cpu().detach(), h_loss.cpu().detach(), \ degree_loss.cpu().detach(), feature_loss.cpu().detach() def comp_decision_score(self, loss_per_node, h_loss, degree_loss, feature_loss, h_loss_weight, degree_loss_weight, feature_loss_weight): """Compute the decision score based on orginal loss or weighted loss. """ if self.real_loss: # the orginal decision score from the loss comp_loss = loss_per_node else: # the weighted decision score h_loss_norm = h_loss / (torch.max(h_loss) - torch.min(h_loss)) degree_loss_norm = degree_loss / \ (torch.max(degree_loss) - torch.min(degree_loss)) feature_loss_norm = feature_loss / \ (torch.max(feature_loss) - torch.min(feature_loss)) comp_loss = h_loss_weight * h_loss_norm \ + degree_loss_weight * degree_loss_norm \ + feature_loss_weight * feature_loss_norm return comp_loss
[docs] def fit(self, data, label=None, h_loss_weight=1.0, degree_loss_weight=0., feature_loss_weight=2.5, loss_step=20 ): """ Overwrite the base model fit function since GAD-NR uses multiple personalized loss functions. Parameters ---------- data : torch_geometric.data.Data Input graph. label : torch.Tensor, optional The optional outlier ground truth labels used for testing. Default: ``None``. h_loss_weight : float, optional The weight of the neighborhood reconstruction loss term used in the weighted decision score. Default: ``1.0``. degree_loss_weight : float, optional The weight of the node degree reconstruction loss term used in the weighted decision score. Default: ``0.``. feature_loss_weight : float, optional The weight of the node feature reconstruction loss term used in the weighted decision score. Default: ``2.5``. loss_step : int, optional The epoch interval to update the loss terms. Default: ``20``. """ self.num_nodes, self.in_dim = data.x.shape if self.batch_size == 0: # full batch training self.batch_size = data.x.shape[0] data = self.process_graph(data) self.full_batch = True else: # mini batch training loader = NeighborLoader(data, self.num_neigh, batch_size=self.batch_size) self.full_batch = False self.model = self.init_model(**self.kwargs) if self.compile_model: self.model = compile(self.model) degree_params = list(map(id, self.model.degree_decoder.parameters())) base_params = filter(lambda p: id(p) not in degree_params, self.model.parameters()) optimizer = torch.optim.Adam([{'params': base_params}, {'params': self.model.degree_decoder. parameters(), 'lr': 1e-2}], lr=self.lr, weight_decay=self.weight_decay) min_loss = float('inf') self.arg_min_loss_per_node = None self.model.train() self.decision_score_ = torch.zeros(data.x.shape[0]) for epoch in range(self.epoch): start_time = time.time() epoch_loss = 0 epoch_loss_per_node = torch.zeros(data.x.shape[0]) if epoch%loss_step==0: self.model.lambda_loss2 = self.model.lambda_loss2 + 0.5 self.model.lambda_loss3 = self.model.lambda_loss3 / 2 # full batch training if self.full_batch: loss, loss_per_node, h_loss, degree_loss, feature_loss = \ self.forward_model(data) comp_loss = self.comp_decision_score(loss_per_node, h_loss, degree_loss, feature_loss, h_loss_weight, degree_loss_weight, feature_loss_weight) self.decision_score_ = comp_loss.squeeze(1) if self.save_emb: self.emb = self.model.emb.cpu() optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss = loss.item() * self.batch_size epoch_loss_per_node = loss_per_node.squeeze(1) else: # mini batch training for sampled_data in loader: batch_size = sampled_data.batch_size node_idx = sampled_data.n_id sampled_data = self.process_graph(sampled_data) loss, loss_per_node, h_loss, degree_loss, feature_loss = \ self.forward_model(sampled_data) comp_loss = self.comp_decision_score(loss_per_node, h_loss, degree_loss, feature_loss, h_loss_weight, degree_loss_weight, feature_loss_weight) self.decision_score_[node_idx[:batch_size]] = \ comp_loss.squeeze(1) if self.save_emb: self.emb[node_idx[:batch_size]] = \ self.model.emb[:batch_size].cpu() optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.item() * batch_size epoch_loss_per_node[node_idx[:batch_size]] = \ loss_per_node.squeeze(1) loss_value = epoch_loss / data.x.shape[0] if loss_value < min_loss: min_loss = loss_value self.arg_min_loss_per_node = epoch_loss_per_node logger(epoch=epoch, loss=loss_value, score=self.decision_score_, target=label, time=time.time() - start_time, verbose=self.verbose, train=True) self._process_decision_score() return self
def decision_function(self, data, label=None, h_loss_weight=1.0, degree_loss_weight=0., feature_loss_weight=2.5): """ Overwrite the decision fuction from the base model due to the unique loss function and decision score from the GADNR paper. The three loss term weights must be the same as the fit function if ``real_loss`` is ``False``. """ if self.full_batch: # full batch inference assert self.batch_size == data.x.shape[0], """ The test data should have the same number of nodes as the training data under the full batch mode. To test on the data with different number of nodes, please use the mini-batch mode. """ data = self.process_graph(data) else: # mini batch inference loader = NeighborLoader(data, self.num_neigh, batch_size=self.batch_size) self.model.eval() outlier_score = torch.zeros(data.x.shape[0]) if self.save_emb: if type(self.hid_dim) is tuple: self.emb = (torch.zeros(data.x.shape[0], self.hid_dim[0]), torch.zeros(data.x.shape[0], self.hid_dim[1])) else: self.emb = torch.zeros(data.x.shape[0], self.hid_dim) start_time = time.time() if self.batch_size == data.x.shape[0]: # full batch inference loss, loss_per_node, h_loss, degree_loss, feature_loss = \ self.forward_model(data) comp_loss = self.comp_decision_score(loss_per_node, h_loss, degree_loss, feature_loss, h_loss_weight, degree_loss_weight, feature_loss_weight) outlier_score = comp_loss.squeeze(1) if self.save_emb: self.emb = self.model.emb.cpu() else: # mini batch inference for sampled_data in loader: batch_size = sampled_data.batch_size node_idx = sampled_data.n_id sampled_data = self.process_graph(sampled_data) loss, loss_per_node, h_loss, degree_loss, feature_loss = \ self.forward_model(sampled_data) comp_loss = self.comp_decision_score(loss_per_node, h_loss, degree_loss, feature_loss, h_loss_weight, degree_loss_weight, feature_loss_weight) outlier_score[node_idx[:batch_size]] = comp_loss.squeeze(1) logger(loss=loss.item() / data.x.shape[0], score=outlier_score, target=label, time=time.time() - start_time, verbose=self.verbose, train=False) return outlier_score