@inproceedings{d0d186d9b6a44910a6d4011c2e06d6a0,
title = "Target Distribution Guided Network Sampling",
abstract = "Studying public users' data on social networks to provide service and prediction for the society has been a widespread and effective way thanks to the rapid raise of social networks. However, users' population structure online is usually different from that of physical world, which may influence the researches significantly. Thus it may become an essential limitation for studies conducted by revealing knowledge from social media data owing to the biased network population structure. Tradition sample approaches are either resources-intensive or data-biased. In this paper, we proposed a target distribution guided sample process to solve the problem of imbalanced user data in the virtual space. We make intervention to the sampling procedure according to the real-Time divergence of the collected sample set against the target distribution, apply theory of homophily to discover the users with matched features and refine the samples with recursive sampling. Experiments show this method is able to successfully constrain samples' overall structure according to the given distribution within a given JS divergence of 0.1 while leaving the unrelated features distributed randomly. Moreover, it takes less times of access to collect a certain number of samples for the method proposed in this paper and thus save time and computer resources.",
keywords = "Online analytical processing, Social network analysis",
author = "Renjie Fan and Zhiwen Yu and Bin Guo and Liang Wang and Dingqi Yang",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 5th International Conference on Advanced Cloud and Big Data, CBD 2017 ; Conference date: 13-08-2017 Through 16-08-2017",
year = "2017",
month = sep,
day = "6",
doi = "10.1109/CBD.2017.71",
language = "英语",
series = "Proceedings - 5th International Conference on Advanced Cloud and Big Data, CBD 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "374--379",
booktitle = "Proceedings - 5th International Conference on Advanced Cloud and Big Data, CBD 2017",
}