@inproceedings{dcbd4944508e41fd84e8e1438e23dee5,
title = "BLM-17m: A Large-Scale Dataset for Black Lives Matter Topic Detection on Twitter",
abstract = "Protection of human rights is one of the most important problems of the modern world. In this paper, we construct a Twitter dataset that covers one of the most significant human rights contradiction in recent years which affected the whole world: the George Floyd incident. We propose a labeled dataset for topic detection that contains about 17 million tweets. These Tweets are collected from 25 May 2020 to 21 August 2020, covering about 90 days from the start of the incident. We labeled the dataset by monitoring most trending news topics from global and local newspapers and used TF-IDF and LDA as baselines. We evaluated the results of these two methods with three different k values for precision, recall and F1-score.",
keywords = "AI, BlackLivesMatter, BLM, Natural Language Processing, Sentiment Analysis, Social Media",
author = "Hasan Kemik and Nusret Ozates and Meysam Asgari-Chenaghlou and Yang Li and Erik Cambria",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 23rd IEEE International Conference on Data Mining Workshops, ICDMW 2023 ; Conference date: 01-12-2023 Through 04-12-2023",
year = "2023",
doi = "10.1109/ICDMW60847.2023.00100",
language = "英语",
series = "IEEE International Conference on Data Mining Workshops, ICDMW",
publisher = "IEEE Computer Society",
pages = "736--743",
editor = "Jihe Wang and Yi He and Dinh, {Thang N.} and Christan Grant and Meikang Qiu and Witold Pedrycz",
booktitle = "Proceedings - 23rd IEEE International Conference on Data Mining Workshops, ICDMW 2023",
}