Evaluating the Social Impact of Generative AI Systems in Systems and Society.
Irene Solaiman*, Zeerak Talat*, William Agnew, Lama Ahmad, Dylan Baker, Su Lin Blodgett, Canyu Chen, Hal Daumé III, Jesse Dodge, Isabella Duan, Felix Friedrich, Avijit Ghosh, Usman Gohar, Sara Hooker, Yacine Jernite, Ria Kalluri, Alberto Lusoli, Alina Leidinger, Michelle Lin, Xiuzhu Lin, Sasha Luccioni, Jennifer Mickel, Margaret Mitchell, Jessica Newman, Anaelia Ovalle, Marie-Therese Png, Shubham Singh, Andrew Strait, Lukas Struppek, Arjun Subramonian. The Oxford University Press Handbook of Generative AI.
Eds. Philip Hacker, Andreas Engel, Sarah Hammer, Brent Mittelstadt.
Content Moderation. Zeerak Talat. The Aesthetics of Machine Vision: Critical Terms and Ideas.
2025
The Role of Expertise in Effectively Moderating Harmful Social Media Content
Nuredin Ali Abdelkadir, Tianling Yang, Shivani Kapania, Meron Estefanos, Fasica Berhane Gebrekidan, Zecharias Zelalem, Messai Ali, Rishan Berhe, Dylan Baker, Zeerak Talat, Milagros Miceli, Alex Hanna, Timnit Gebru. CHI 2025. BibTeX @inproceedings{Abdelkadir_Role_2025,
address = {Yokohama Japan},
title = {The {Role} of {Expertise} in {Effectively} {Moderating} {Harmful} {Social} {Media} {Content}},
isbn = {979-8-4007-1394-1},
url = {https://dl.acm.org/doi/10.1145/3706598.3714010},
doi = {10.1145/3706598.3714010},
language = {en},
urldate = {2025-05-04},
booktitle = {Proceedings of the 2025 {CHI} {Conference} on {Human} {Factors} in {Computing} {Systems}},
publisher = {ACM},
author = {Abdelkadir, Nuredin Ali and Yang, Tianling and Kapania, Shivani and Estefanos, Meron and Gebrekidan, Fasica Berhane and Zelalem, Zecharias and Ali, Messai and Berhe, Rishan and Baker, Dylan and Talat, Zeerak and Miceli, Milagros and Hanna, Alex and Gebru, Timnit},
month = apr,
year = {2025},
pages = {1--21},
}
SHADES: Towards a Multilingual Assessment of Stereotypes in Large Language Models
Margaret Mitchell, Giuseppe Attanasio, Ioana Baldini, Miruna Clinciu, Jordan Clive, Pieter Delobelle, Manan Dey, Sil Hamilton, Timm Dill, Jad Doughman, Ritam Dutt, Avijit Ghosh, Jessica Zosa Forde, Carolin Holtermann, Lucie-Aimée Kaffee, Tanmay Laud, Anne Lauscher, Roberto L Lopez-Davila, Maraim Masoud, Nikita Nangia, Anaelia Ovalle, Giada Pistilli, Dragomir Radev, Beatrice Savoldi, Vipul Raheja, Jeremy Qin, Esther Ploeger, Arjun Subramonian, Kaustubh Dhole, Kaiser Sun, Amirbek Djanibekov, Jonibek Mansurov, Kayo Yin, Emilio Villa Cueva, Sagnik Mukherjee, Jerry Huang, Xudong Shen, Jay Gala, Hamdan Al-Ali, Tair Djanibekov, Nurdaulet Mukhituly, Shangrui Nie, Shanya Sharma, Karolina Stanczak, Eliza Szczechla, Tiago Timponi Torrent, Deepak Tunuguntla, Marcelo Viridiano, Oskar van der Wal, Adina Yakefu, Aurélie Névéol, Mike Zhang, Sydney Zink, Zeerak Talat. NAACL 2025. BibTeX @inproceedings{mitchell-etal-2025-shades,
title = {{SHADES}: Towards a Multilingual Assessment of Stereotypes in Large Language Models},
author = {Mitchell, Margaret and Attanasio, Giuseppe and Baldini, Ioana and Clinciu, Miruna and Clive, Jordan and Delobelle, Pieter and Dey, Manan and Hamilton, Sil and Dill, Timm and Doughman, Jad and Dutt, Ritam and Ghosh, Avijit and Forde, Jessica Zosa and Holtermann, Carolin and Kaffee, Lucie-Aim{\'e}e and Laud, Tanmay and Lauscher, Anne and Lopez-Davila, Roberto L and Masoud, Maraim and Nangia, Nikita and Ovalle, Anaelia and Pistilli, Giada and Radev, Dragomir and Savoldi, Beatrice and Raheja, Vipul and Qin, Jeremy and Ploeger, Esther and Subramonian, Arjun and Dhole, Kaustubh and Sun, Kaiser and Djanibekov, Amirbek and Mansurov, Jonibek and Yin, Kayo and Cueva, Emilio Villa and Mukherjee, Sagnik and Huang, Jerry and Shen, Xudong and Gala, Jay and Al-Ali, Hamdan and Tair Djanibekov and Mukhituly, Nurdaulet and Nie, Shangrui and Sharma, Shanya and Stanczak, Karolina and Szczechla, Eliza and Timponi Torrent, Tiago and Tunuguntla, Deepak and Viridiano, Marcelo and Van Der Wal, Oskar and Yakefu, Adina and N{\'e}v{\'e}ol, Aur{\'e}lie and Zhang, Mike and Zink, Sydney and Talat, Zeerak},
editor = {Chiruzzo, Luis and Ritter, Alan and Wang, Lu},
booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)},
month = {apr},
year = {2025},
address = {Albuquerque, New Mexico},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2025.naacl-long.600/},
pages = {11995--12041},
ISBN = {979-8-89176-189-6}
}
Exploring the Limitations of Detecting Machine-Generated Text
Jad Doughman, Osama Mohammed Afsal, Hawau Olamine Toyin, Shady Shehata, Preslav Nakov, Zeerak Talat. COLING 2025. BibTeX @inproceedings{doughman-etal-2025-exploring,
title = 'Exploring the Limitations of Detecting Machine-Generated Text',
author = 'Doughman, Jad and
Mohammed Afzal, Osama and
Toyin, Hawau Olamide and
Shehata, Shady and
Nakov, Preslav and
Talat, Zeerak',
editor = 'Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven',
booktitle = 'Proceedings of the 31st International Conference on Computational Linguistics',
month = jan,
year = '2025',
address = 'Abu Dhabi, UAE',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2025.coling-main.288/',
pages = '4274--4281'
}
The Only Way is Ethics: A Guide to Ethical Research with Large Language Models
Eddie Ungless, Nikolas Vitsakis, Zeerak Talat, James Garforth, Björn Ross, Arno Onken, Atoosa Kasirzadeh, Alexandra Birch. COLING 2025. BibTeX @inproceedings{ungless-etal-2025-way,
title = 'The Only Way is Ethics: A Guide to Ethical Research with Large Language Models',
author = 'Ungless, Eddie L. and
Vitsakis, Nikolas and
Talat, Zeerak and
Garforth, James and
Ross, Bjorn and
Onken, Arno and
Kasirzadeh, Atoosa and
Birch, Alexandra',
editor = 'Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven',
booktitle = 'Proceedings of the 31st International Conference on Computational Linguistics',
month = jan,
year = '2025',
address = 'Abu Dhabi, UAE',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2025.coling-main.603/',
pages = '8992--9005'
}
2024
A Capabilities Approach to Studying Bias and Harm in Language Technologies
Hellina Hailu Nigatu, Zeerak Talat. WAI Amsterdam 2024. BibTeX @misc{nigatu2024capabilitiesapproachstudyingbias,
title={A Capabilities Approach to Studying Bias and Harm in Language Technologies},
author={Hellina Hailu Nigatu and Zeerak Talat},
year={2024},
eprint={2411.04298},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2411.04298},
}
Exploitation All the Way Down: Calling out the Root Cause of Bad Online Experiences for Users of the Majority World
Hellina Hailu Nigatu, Zeerak Talat. Data and AI Governance Coalition at 19th Annual Meeting of Internet Governance Forum. BibTeX @misc{nigatu2024exploitationwaydowncalling,
title={Exploitation All the Way Down: Calling out the Root Cause of Bad Online Experiences for Users of the 'Majority World'},
author={Hellina Hailu Nigatu and Zeerak Talat},
year={2024},
eprint={2501.14748},
archivePrefix={arXiv},
primaryClass={cs.CY},
url={https://arxiv.org/abs/2501.14748},
}
Ethics Whitepaper: Whitepaper on Ethical Research into Large Language Models
Eddie L. Ungless, Nikolas Vitsakis, Zeerak Talat, James Garforth, Björn Ross, Arno Onken, Atoosa Kasirzadeh, Alexandra Birch. 2024. BibTeX @misc{Ungless_Ethics_2024,
title = {Ethics {Whitepaper}: {Whitepaper} on {Ethical} {Research} into {Large} {Language} {Models}},
shorttitle = {Ethics {Whitepaper}},
url = {http://arxiv.org/abs/2410.19812},
abstract = {This whitepaper offers an overview of the ethical considerations surrounding research into or with large language models (LLMs). As LLMs become more integrated into widely used applications, their societal impact increases, bringing important ethical questions to the forefront. With a growing body of work examining the ethical development, deployment, and use of LLMs, this whitepaper provides a comprehensive and practical guide to best practices, designed to help those in research and in industry to uphold the highest ethical standards in their work.},
urldate = {2024-11-05},
publisher = {arXiv},
author = {Ungless, Eddie L. and Vitsakis, Nikolas and Talat, Zeerak and Garforth, James and Ross, Björn and Onken, Arno and Kasirzadeh, Atoosa and Birch, Alexandra},
month = oct,
year = {2024},
note = {arXiv:2410.19812 [cs]},
keywords = {Computer Science - Computation and Language, Computer Science - Computers and Society},
annote = {Comment: 47 pages},
}
Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)
Yi-ling Chung, Zeerak Talat, Debora Nozza, Flor Miriam Plaza-del-Arco, Paul Röttger, Aida Mostafazadeh Davani. WOAH 2024. BibTeX @proceedings{woah-2024-online,
title = 'Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)',
editor = {Chung, Yi-Ling and
Talat, Zeerak and
Nozza, Debora and
Plaza-del-Arco, Flor Miriam and
R{"o}ttger, Paul and
Mostafazadeh Davani, Aida and
Calabrese, Agostina},
month = jun,
year = '2024',
address = 'Mexico City, Mexico',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2024.woah-1.0/'
}
LLMs produce racist output when prompted in African American English
Su Lin Blodgett, Zeerak Talat. News and Views 2024. BibTeX @article{Blodgett_LLMs_2024,
title = {{LLMs} produce racist output when prompted in {African} {American} {English}},
volume = {633},
copyright = {https://www.springernature.com/gp/researchers/text-and-data-mining},
issn = {0028-0836, 1476-4687},
url = {https://www.nature.com/articles/d41586-024-02527-x},
doi = {10.1038/d41586-024-02527-x},
language = {en},
number = {8028},
urldate = {2025-05-04},
journal = {Nature},
author = {Blodgett, Su Lin and Talat, Zeerak},
month = sep,
year = {2024},
pages = {40--41},
}
Metrics for What, Metrics for Whom: Assessing Actionability of Bias Evaluation Metrics in NLP
Pieter Delebolle, Giuseppe Attanasio, Debora Nozza, Su Lin Blodgett, Zeerak Talat. EMNLP 2024. BibTeX @inproceedings{delobelle-etal-2024-metrics,
title = 'Metrics for What, Metrics for Whom: Assessing Actionability of Bias Evaluation Metrics in {NLP}',
author = 'Delobelle, Pieter and
Attanasio, Giuseppe and
Nozza, Debora and
Blodgett, Su Lin and
Talat, Zeerak',
editor = 'Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung',
booktitle = 'Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing',
month = nov,
year = '2024',
address = 'Miami, Florida, USA',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2024.emnlp-main.1207/',
doi = '10.18653/v1/2024.emnlp-main.1207',
pages = '21669--21691'
}
Understanding 'Democratization' in NLP and ML Research
Arjun Subramonian, Vagrant Gautam, Dietrich Klakow, Zeerak Talat. EMNLP 2024. BibTeX @inproceedings{subramonian-etal-2024-understanding,
title = 'Understanding { extquotedblleft}Democratization{ extquotedblright} in {NLP} and {ML} Research',
author = 'Subramonian, Arjun and
Gautam, Vagrant and
Klakow, Dietrich and
Talat, Zeerak',
editor = 'Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung',
booktitle = 'Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing',
month = nov,
year = '2024',
address = 'Miami, Florida, USA',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2024.emnlp-main.184/',
doi = '10.18653/v1/2024.emnlp-main.184',
pages = '3151--3166'
}
Classist Tools: Social Class Correlates with Performance in NLP
Amanda Cercas Curry, Giuseppe Attanasio, Zeerak Talat, Dirk Hovy. ACL 2024. BibTeX @inproceedings{curry-etal-2024-classist,
title = 'Classist Tools: Social Class Correlates with Performance in {NLP}',
author = 'Cercas Curry, Amanda and
Attanasio, Giuseppe and
Talat, Zeerak and
Hovy, Dirk',
editor = 'Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek',
booktitle = 'Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)',
month = aug,
year = '2024',
address = 'Bangkok, Thailand',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2024.acl-long.682/',
doi = '10.18653/v1/2024.acl-long.682',
pages = '12643--12655'
}
Documenting Geographically and Contextually Diverse Language Data Sources
Angelina McMillan-Major, Francesco De Toni, Zaid Alyafeai, Stella Biderman, Kimbo Chen, Gérard Dupont, Hady Elsahar, Chris Emezue, Alham Fikri Aji, Suzana Ilić, Nurulaqilla Khamis, Colin Leong, Maraim Masoud, Aitor Soroa, Pedro Ortiz Suarez, Daniel van Strien, Zeerak Talat, Yacine Jernite. NEJLT 2024. BibTeX @article{mcmillan-majorDocumentingGeographicallyContextually2022,
title = {Documenting {Geographically} and {Contextually} {Diverse} {Data} {Sources}: {The} {BigScience} {Catalogue} of {Language} {Data} and {Resources}},
copyright = {All rights reserved},
shorttitle = {Documenting {Geographically} and {Contextually} {Diverse} {Data} {Sources}},
url = {http://arxiv.org/abs/2201.10066},
abstract = {In recent years, large-scale data collection efforts have prioritized the amount of data collected in order to improve the modeling capabilities of large language models. This prioritization, however, has resulted in concerns with respect to the rights of data subjects represented in data collections, particularly when considering the difficulty in interrogating these collections due to insufficient documentation and tools for analysis. Mindful of these pitfalls, we present our methodology for a documentation-first, human-centered data collection project as part of the BigScience initiative. We identified a geographically diverse set of target language groups (Arabic, Basque, Chinese, Catalan, English, French, Indic languages, Indonesian, Niger-Congo languages, Portuguese, Spanish, and Vietnamese, as well as programming languages) for which to collect metadata on potential data sources. To structure this effort, we developed our online catalogue as a supporting tool for gathering metadata through organized public hackathons. We present our development process; analyses of the resulting resource metadata, including distributions over languages, regions, and resource types; and our lessons learned in this endeavor.},
urldate = {2022-03-07},
journal = {arXiv:2201.10066 [cs]},
author = {McMillan-Major, Angelina and Alyafeai, Zaid and Biderman, Stella and Chen, Kimbo and De Toni, Francesco and Dupont, Gérard and Elsahar, Hady and Emezue, Chris and Aji, Alham Fikri and Ilić, Suzana and Khamis, Nurulaqilla and Leong, Colin and Masoud, Maraim and Soroa, Aitor and Suarez, Pedro Ortiz and Talat, Zeerak and van Strien, Daniel and Jernite, Yacine},
month = jan,
year = {2022},
note = {arXiv: 2201.10066},
annote = {Comment: 8 pages plus appendix and references},
file = {arXiv Fulltext PDF:/Users/zeeraktalat/Zotero/storage/DBPUWMKR/McMillan-Major et al. - 2022 - Documenting Geographically and Contextually Divers.pdf:application/pdf;arXiv.org Snapshot:/Users/zeeraktalat/Zotero/storage/SVLMNX4X/2201.html:text/html},
}
ARAOFFENSE: Detecting Offensive Speech Across Dialects in Arabic Media
Youssef Nafea, Shady Shehata, Zeerak Talat, Ahmed Aboeitta, Ahmed Sharshar, Preslav Nakov. Interspeech 2024. BibTeX @inproceedings{nafea24_interspeech,
title = {AraOffence: Detecting Offensive Speech Across Dialects in Arabic Media},
author = {Youssef Nafea and Shady Shehata and Zeerak Talat and Ahmed Aboeitta and Ahmed Sharshar and Preslav Nakov},
year = {2024},
booktitle = {Interspeech 2024},
pages = {4303--4307},
doi = {10.21437/Interspeech.2024-2077},
issn = {2958-1796},
}
The Perspectivist Paradigm Shift: Assumptions and Challenges of Capturing Human Labels
Eve Fleisig, Su Lin Blodgett, Dan Klein, Zeerak Talat. NAACL 2024. BibTeX @inproceedings{fleisig-etal-2024-perspectivist,
title = 'The Perspectivist Paradigm Shift: Assumptions and Challenges of Capturing Human Labels',
author = 'Fleisig, Eve and
Blodgett, Su Lin and
Klein, Dan and
Talat, Zeerak',
editor = 'Duh, Kevin and
Gomez, Helena and
Bethard, Steven',
booktitle = 'Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)',
month = jun,
year = '2024',
address = 'Mexico City, Mexico',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2024.naacl-long.126/',
doi = '10.18653/v1/2024.naacl-long.126',
pages = '2279--2292'
}
Impoverished Language Technology: The Lack of (Social) Class in NLP
Amanda Cercas Curry, Zeerak Talat, Dirk Hovy. LREC-COLING 2024. BibTeX @inproceedings{cercas-curry-etal-2024-impoverished,
title = 'Impoverished Language Technology: The Lack of (Social) Class in {NLP}',
author = 'Cercas Curry, Amanda and
Talat, Zeerak and
Hovy, Dirk',
editor = 'Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen',
booktitle = 'Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)',
month = may,
year = '2024',
address = 'Torino, Italia',
publisher = 'ELRA and ICCL',
url = 'https://aclanthology.org/2024.lrec-main.761/',
pages = '8675--8682'
}
Subjective Isms? On the Danger of Conflating Hate and Offence in Abusive Language Detection
Amanda Cercas Curry, Gavin Abercrombie, Zeerak Talat. WOAH 2024. BibTeX @inproceedings{cercas-curry-etal-2024-subjective,
title = 'Subjective Isms? On the Danger of Conflating Hate and Offence in Abusive Language Detection',
author = 'Cercas Curry, Amanda and
Abercrombie, Gavin and
Talat, Zeerak',
editor = {Chung, Yi-Ling and
Talat, Zeerak and
Nozza, Debora and
Plaza-del-Arco, Flor Miriam and
R{"o}ttger, Paul and
Mostafazadeh Davani, Aida and
Calabrese, Agostina},
booktitle = 'Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)',
month = jun,
year = '2024',
address = 'Mexico City, Mexico',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2024.woah-1.22/',
doi = '10.18653/v1/2024.woah-1.22',
pages = '275--282'
}
Zero-shot Sentiment Analysis in Low-Resource Languages Using a Multilingual Sentiment Lexicon
Fajri Koto, Tilman Beck, Zeerak Talat, Iryna Gurevych, Timothy Baldwin. EACL 2024. BibTeX @inproceedings{koto-etal-2024-zero,
title = 'Zero-shot Sentiment Analysis in Low-Resource Languages Using a Multilingual Sentiment Lexicon',
author = 'Koto, Fajri and
Beck, Tilman and
Talat, Zeerak and
Gurevych, Iryna and
Baldwin, Timothy',
editor = 'Graham, Yvette and
Purver, Matthew',
booktitle = 'Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)',
month = mar,
year = '2024',
address = 'St. Julian{'}s, Malta',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2024.eacl-long.18/',
pages = '298--320'
}
2023
Thorny Roses: Investigating the Dual Use Dilemma in Natural Language Processing
Lucie-Aimée Kaffee, Arnav Arora, Zeerak Talat, Isabelle Augenstein. EMNLP Findings 2023. BibTeX @inproceedings{kaffee-etal-2023-thorny,
title = {Thorny Roses: Investigating the Dual Use Dilemma in Natural Language Processing},
author = {Kaffee, Lucie-Aim{\'e}e and
Arora, Arnav and
Talat, Zeerak and
Augenstein, Isabelle},
editor = {Bouamor, Houda and
Pino, Juan and
Bali, Kalika},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
month = {dec,
year = {2023},
address = {Singapore},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2023.findings-emnlp.932/},
doi = {10.18653/v1/2023.findings-emnlp.932},
pages = {13977--13998}}
Mirages. On Anthropomorphism in Dialogue Systems
Gavin Abercrombie, Amanda Cercas Curry, Tanvi Dinkar, Verena Rieser, Zeerak Talat. EMNLP 2023. BibTeX @inproceedings{abercrombie-etal-2023-mirages,
title = 'Mirages. On Anthropomorphism in Dialogue Systems',
author = 'Abercrombie, Gavin and
Cercas Curry, Amanda and
Dinkar, Tanvi and
Rieser, Verena and
Talat, Zeerak',
editor = 'Bouamor, Houda and
Pino, Juan and
Bali, Kalika',
booktitle = 'Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing',
month = dec,
year = '2023',
address = 'Singapore',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2023.emnlp-main.290/',
doi = '10.18653/v1/2023.emnlp-main.290',
pages = '4776--4790'
}
Bound by the Bounty: Collaboratively Shaping Evaluation Processes for Queer AI Harms
Organizers of Queer in AI, Nathan Dennler, Anaelia Ovalle, Ashwin Singh, Luca Soldaini, Arjun Subramonian, Huy Tu, William Agnew, Avijit Ghosh, Kyra Yee, Irene Font Peradejordi, Zeerak Talat, Mayra Russo, Jess de Jesus de Pinho Pinhal. Proceedings of the Conference on Artificial Intelligence, Ethics, and Society. BibTeX @inproceedings{dennlerBoundBountyCollaboratively2023,
address = {Montr\'{e}al QC Canada},
title = {Bound by the {Bounty}: {Collaboratively} {Shaping} {Evaluation} {Processes} for {Queer} {AI} {Harms}},
copyright = {All rights reserved},
isbn = {979-8-4007-0231-0},
shorttitle = {Bound by the {Bounty}},
url = {https://dl.acm.org/doi/10.1145/3600211.3604682},
doi = {10.1145/3600211.3604682},
language = {en},
urldate = {2024-03-13},
booktitle = {Proceedings of the 2023 {AAAI}/{ACM} {Conference} on {AI}, {Ethics}, and {Society}},
publisher = {ACM},
author = {Dennler, Nathan and Ovalle, Anaelia and Singh, Ashwin and Soldaini, Luca and Subramonian, Arjun and Tu, Huy and Agnew, William and Ghosh, Avijit and Yee, Kyra and Peradejordi, Irene Font and Talat, Zeerak and Russo, Mayra and Pinhal, Jess De Jesus De Pinho},
month = aug,
year = {2023},
pages = {375--386},
}
It's Incomprehensible: On Machine Learning and Decoloniality
Abeba Birhane, Zeerak Talat. Handbook of Critical Studies of AI 2023. eds. Simon Lindgren. BibTeX @incollection{birhaneItIncomprehensibleMachine2023,
title = {It’s incomprehensible: on machine learning and decoloniality},
copyright = {All rights reserved},
isbn = {978-1-80392-856-2 978-1-80392-855-5},
shorttitle = {It’s incomprehensible},
url = {https://www.elgaronline.com/view/book/9781803928562/book-part-9781803928562-16.xml},
urldate = {2023-11-23},
booktitle = {Handbook of {Critical} {Studies} of {Artificial} {Intelligence}},
publisher = {Edward Elgar Publishing},
author = {Birhane, Abeba and Talat, Zeerak},
editor = {Lindgren, Simon},
month = nov,
year = {2023},
doi = {10.4337/9781803928562.00016},
pages = {128--140},
}
Proceedings of the 7th Workshop on Online Abuse and Harms (WOAH 2023)
Yi-ling Chung, Paul Röttger, Debora Nozza, Zeerak Talat, Aida Mostafazadeh Davani. WOAH 2023. BibTeX @proceedings{woah-2023-online,
title = 'Proceedings of the Seventh Workshop on Online Abuse and Harms (WOAH)',
editor = 'Chung, Yi-ling and R{\"o}ttger, Paul and Nozza, Debora and Talat, Zeerak and Davani, Aida Mostafazadeh'
month = jul,
year = '2022',
address = 'Seattle, Washington (Hybrid)',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2022.woah-1.0/'
}
Futures for Research on Hate Speech in Online Social Media Platforms
Jaime Lee Kirtz, Zeerak Talat. Challenges and Perspectives of Hate Speech 2023. eds. C. Strippel, S. Paasch-Colberg, M. Emmer & J. Trebbe. BibTeX @article{kirtzFuturesResearchHate2023,
title = {Futures for {Research} on {Hate} {Speech} in {Online} {Social} {Media} {Platforms}},
copyright = {Creative Commons Attribution 4.0 International},
issn = {2198-7610},
url = {https://www.ssoar.info/ssoar/handle/document/86419},
doi = {10.48541/DCR.V12.27},
abstract = {This chapter provides an overview of the various themes and points of connections between the various chapters in this section and outlines the current limitations as well as the major social and technical issues that still need to be addressed in hate speech detection. In particular, Kirtz and Talat discuss the ways in contexts—from legal contexts such as laws determining data collection methods to sociocultural contexts like annotator knowledge—affect the possibilities for the machine learning pipelines. Along with identifying current issues and limitations, Kirtz and Talat delineate future avenues for hate speech detection research.},
language = {en},
urldate = {2023-04-27},
author = {Kirtz, Jaimie Lee and Talat, Zeerak},
editor = {Strippel, Christian and Paasch-Colberg, Sünje and Emmer, Martin and Trebbe, Joachim},
year = {2023},
note = {ISBN: 9783945681121
Publisher: Freie Universität Berlin},
}
Federated Learning for Hate Speech Detection
Jay Gala, Jash Mehta, Deep Gandhi, Zeerak Talat. EACL 2023. BibTeX @inproceedings{gala-etal-2023-federated,
title = 'A Federated Approach for Hate Speech Detection',
author = 'Gala, Jay and
Gandhi, Deep and
Mehta, Jash and
Talat, Zeerak',
editor = 'Vlachos, Andreas and
Augenstein, Isabelle',
booktitle = 'Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics',
month = may,
year = '2023',
address = 'Dubrovnik, Croatia',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2023.eacl-main.237/',
doi = '10.18653/v1/2023.eacl-main.237',
pages = '3248--3259'
}
[Best Paper Award] Queer In AI: A Case Study in Community-Led Participatory AI
Organisers of Queer in AI. FAccT 2023. BibTeX @inproceedings{queerinaiQueerAICase2023,
address = {Chicago IL USA},
title = {Queer {In} {AI}: {A} {Case} {Study} in {Community}-{Led} {Participatory} {AI}},
copyright = {All rights reserved},
isbn = {979-8-4007-0192-4},
shorttitle = {Queer {In} {AI}},
url = {https://dl.acm.org/doi/10.1145/3593013.3594134},
doi = {10.1145/3593013.3594134},
language = {en},
urldate = {2023-07-03},
booktitle = {2023 {ACM} {Conference} on {Fairness}, {Accountability}, and {Transparency}},
publisher = {ACM},
author = {Queerinai, Organizers Of and Ovalle, Anaelia and Subramonian, Arjun and Singh, Ashwin and Voelcker, Claas and Sutherland, Danica J. and Locatelli, Davide and Breznik, Eva and Klubicka, Filip and Yuan, Hang and J, Hetvi and Zhang, Huan and Shriram, Jaidev and Lehman, Kruno and Soldaini, Luca and Sap, Maarten and Deisenroth, Marc Peter and Pacheco, Maria Leonor and Ryskina, Maria and Mundt, Martin and Agarwal, Milind and Mclean, Nyx and Xu, Pan and Pranav, A and Korpan, Raj and Ray, Ruchira and Mathew, Sarah and Arora, Sarthak and John, St and Anand, Tanvi and Agrawal, Vishakha and Agnew, William and Long, Yanan and Wang, Zijie J. and Talat, Zeerak and Ghosh, Avijit and Dennler, Nathaniel and Noseworthy, Michael and Jha, Sharvani and Baylor, Emi and Joshi, Aditya and Bilenko, Natalia Y. and Mcnamara, Andrew and Gontijo-Lopes, Raphael and Markham, Alex and Dong, Evyn and Kay, Jackie and Saraswat, Manu and Vytla, Nikhil and Stark, Luke},
month = jun,
year = {2023},
keywords = {Machine Learning, Science and Technology Studies, Social media, 1925-1995, 1926-1984, 1983, cite, Computer Vision, Computers and Society, Content Moderation, Deviance, Digital Rhetoric, Feminism, Gender, Programming languages, Social Representations, User-generated Content},
pages = {1882--1895},
}
2022
Back to the Future: On Potential Histories in NLP Zeerak Talat, Anne Lauscher. 2022. BibTeX @misc{talatBackFuturePotential2022,
title = {Back to the {Future}: {On} {Potential} {Histories} in {NLP}},
copyright = {All rights reserved},
shorttitle = {Back to the {Future}},
url = {http://arxiv.org/abs/2210.06245},
abstract = {Machine learning and NLP require the construction of datasets to train and fine-tune models. In this context, previous work has demonstrated the sensitivity of these data sets. For instance, potential societal biases in this data are likely to be encoded and to be amplified in the models we deploy. In this work, we draw from developments in the field of history and take a novel perspective on these problems: considering datasets and models through the lens of historical fiction surfaces their political nature, and affords re-configuring how we view the past, such that marginalized discourses are surfaced. Building on such insights, we argue that contemporary methods for machine learning are prejudiced towards dominant and hegemonic histories. Employing the example of neopronouns, we show that by surfacing marginalized histories within contemporary conditions, we can create models that better represent the lived realities of traditionally marginalized and excluded communities.},
urldate = {2023-01-18},
publisher = {arXiv},
author = {Talat, Zeerak and Lauscher, Anne},
month = oct,
year = {2022},
note = {arXiv:2210.06245 [cs]},
}
BLOOM: A 176B-Parameter Open-Access Multilingual Language Model
Teven Le Scao, et al.. 2022. BibTeX @misc{Scao_BLOOM_2023,
title = {{BLOOM}: {A} {176B}-{Parameter} {Open}-{Access} {Multilingual} {Language} {Model}},
copyright = {All rights reserved},
shorttitle = {{BLOOM}},
url = {http://arxiv.org/abs/2211.05100},
abstract = {Large language models (LLMs) have been shown to be able to perform new tasks based on a few demonstrations or natural language instructions. While these capabilities have led to widespread adoption, most LLMs are developed by resource-rich organizations and are frequently kept from the public. As a step towards democratizing this powerful technology, we present BLOOM, a 176B-parameter open-access language model designed and built thanks to a collaboration of hundreds of researchers. BLOOM is a decoder-only Transformer language model that was trained on the ROOTS corpus, a dataset comprising hundreds of sources in 46 natural and 13 programming languages (59 in total). We find that BLOOM achieves competitive performance on a wide variety of benchmarks, with stronger results after undergoing multitask prompted finetuning. To facilitate future research and applications using LLMs, we publicly release our models and code under the Responsible AI License.},
urldate = {2023-07-03},
publisher = {arXiv},
author = {Scao, Teven Le and Fan, Angela and Akiki, Christopher and Pavlick, Ellie and Ilić, Suzana and Hesslow, Daniel and Castagné, Roman and Luccioni, Alexandra Sasha and Yvon, François and Gallé, Matthias and Tow, Jonathan and Rush, Alexander M. and Biderman, Stella and Webson, Albert and Ammanamanchi, Pawan Sasanka and Wang, Thomas and Sagot, Benoît and Muennighoff, Niklas and del Moral, Albert Villanova and Ruwase, Olatunji and Bawden, Rachel and Bekman, Stas and McMillan-Major, Angelina and Beltagy, Iz and Nguyen, Huu and Saulnier, Lucile and Tan, Samson and Suarez, Pedro Ortiz and Sanh, Victor and Laurençon, Hugo and Jernite, Yacine and Launay, Julien and Mitchell, Margaret and Raffel, Colin and Gokaslan, Aaron and Simhi, Adi and Soroa, Aitor and Aji, Alham Fikri and Alfassy, Amit and Rogers, Anna and Nitzav, Ariel Kreisberg and Xu, Canwen and Mou, Chenghao and Emezue, Chris and Klamm, Christopher and Leong, Colin and van Strien, Daniel and Adelani, David Ifeoluwa and Radev, Dragomir and Ponferrada, Eduardo González and Levkovizh, Efrat and Kim, Ethan and Natan, Eyal Bar and De Toni, Francesco and Dupont, Gérard and Kruszewski, Germán and Pistilli, Giada and Elsahar, Hady and Benyamina, Hamza and Tran, Hieu and Yu, Ian and Abdulmumin, Idris and Johnson, Isaac and Gonzalez-Dios, Itziar and de la Rosa, Javier and Chim, Jenny and Dodge, Jesse and Zhu, Jian and Chang, Jonathan and Frohberg, Jörg and Tobing, Joseph and Bhattacharjee, Joydeep and Almubarak, Khalid and Chen, Kimbo and Lo, Kyle and Von Werra, Leandro and Weber, Leon and Phan, Long and allal, Loubna Ben and Tanguy, Ludovic and Dey, Manan and Muñoz, Manuel Romero and Masoud, Maraim and Grandury, María and Šaško, Mario and Huang, Max and Coavoux, Maximin and Singh, Mayank and Jiang, Mike Tian-Jian and Vu, Minh Chien and Jauhar, Mohammad A. and Ghaleb, Mustafa and Subramani, Nishant and Kassner, Nora and Khamis, Nurulaqilla and Nguyen, Olivier and Espejel, Omar and de Gibert, Ona and Villegas, Paulo and Henderson, Peter and Colombo, Pierre and Amuok, Priscilla and Lhoest, Quentin and Harliman, Rheza and Bommasani, Rishi and López, Roberto Luis and Ribeiro, Rui and Osei, Salomey and Pyysalo, Sampo and Nagel, Sebastian and Bose, Shamik and Muhammad, Shamsuddeen Hassan and Sharma, Shanya and Longpre, Shayne and Nikpoor, Somaieh and Silberberg, Stanislav and Pai, Suhas and Zink, Sydney and Torrent, Tiago Timponi and Schick, Timo and Thrush, Tristan and Danchev, Valentin and Nikoulina, Vassilina and Laippala, Veronika and Lepercq, Violette and Prabhu, Vrinda and Alyafeai, Zaid and Talat, Zeerak and Raja, Arun and Heinzerling, Benjamin and Si, Chenglei and Taşar, Davut Emre and Salesky, Elizabeth and Mielke, Sabrina J. and Lee, Wilson Y. and Sharma, Abheesht and Santilli, Andrea and Chaffin, Antoine and Stiegler, Arnaud and Datta, Debajyoti and Szczechla, Eliza and Chhablani, Gunjan and Wang, Han and Pandey, Harshit and Strobelt, Hendrik and Fries, Jason Alan and Rozen, Jos and Gao, Leo and Sutawika, Lintang and Bari, M. Saiful and Al-shaibani, Maged S. and Manica, Matteo and Nayak, Nihal and Teehan, Ryan and Albanie, Samuel and Shen, Sheng and Ben-David, Srulik and Bach, Stephen H. and Kim, Taewoon and Bers, Tali and Fevry, Thibault and Neeraj, Trishala and Thakker, Urmish and Raunak, Vikas and Tang, Xiangru and Yong, Zheng-Xin and Sun, Zhiqing and Brody, Shaked and Uri, Yallow and Tojarieh, Hadar and Roberts, Adam and Chung, Hyung Won and Tae, Jaesung and Phang, Jason and Press, Ofir and Li, Conglong and Narayanan, Deepak and Bourfoune, Hatim and Casper, Jared and Rasley, Jeff and Ryabinin, Max and Mishra, Mayank and Zhang, Minjia and Shoeybi, Mohammad and Peyrounette, Myriam and Patry, Nicolas and Tazi, Nouamane and Sanseviero, Omar and von Platen, Patrick and Cornette, Pierre and Lavallée, Pierre François and Lacroix, Rémi and Rajbhandari, Samyam and Gandhi, Sanchit and Smith, Shaden and Requena, Stéphane and Patil, Suraj and Dettmers, Tim and Baruwa, Ahmed and Singh, Amanpreet and Cheveleva, Anastasia and Ligozat, Anne-Laure and Subramonian, Arjun and Névéol, Aurélie and Lovering, Charles and Garrette, Dan and Tunuguntla, Deepak and Reiter, Ehud and Taktasheva, Ekaterina and Voloshina, Ekaterina and Bogdanov, Eli and Winata, Genta Indra and Schoelkopf, Hailey and Kalo, Jan-Christoph and Novikova, Jekaterina and Forde, Jessica Zosa and Clive, Jordan and Kasai, Jungo and Kawamura, Ken and Hazan, Liam and Carpuat, Marine and Clinciu, Miruna and Kim, Najoung and Cheng, Newton and Serikov, Oleg and Antverg, Omer and van der Wal, Oskar and Zhang, Rui and Zhang, Ruochen and Gehrmann, Sebastian and Mirkin, Shachar and Pais, Shani and Shavrina, Tatiana and Scialom, Thomas and Yun, Tian and Limisiewicz, Tomasz and Rieser, Verena and Protasov, Vitaly and Mikhailov, Vladislav and Pruksachatkun, Yada and Belinkov, Yonatan and Bamberger, Zachary and Kasner, Zdeněk and Rueda, Alice and Pestana, Amanda and Feizpour, Amir and Khan, Ammar and Faranak, Amy and Santos, Ana and Hevia, Anthony and Unldreaj, Antigona and Aghagol, Arash and Abdollahi, Arezoo and Tammour, Aycha and HajiHosseini, Azadeh and Behroozi, Bahareh and Ajibade, Benjamin and Saxena, Bharat and Ferrandis, Carlos Muñoz and McDuff, Daniel and Contractor, Danish and Lansky, David and David, Davis and Kiela, Douwe and Nguyen, Duong A. and Tan, Edward and Baylor, Emi and Ozoani, Ezinwanne and Mirza, Fatima and Ononiwu, Frankline and Rezanejad, Habib and Jones, Hessie and Bhattacharya, Indrani and Solaiman, Irene and Sedenko, Irina and Nejadgholi, Isar and Passmore, Jesse and Seltzer, Josh and Sanz, Julio Bonis and Dutra, Livia and Samagaio, Mairon and Elbadri, Maraim and Mieskes, Margot and Gerchick, Marissa and Akinlolu, Martha and McKenna, Michael and Qiu, Mike and Ghauri, Muhammed and Burynok, Mykola and Abrar, Nafis and Rajani, Nazneen and Elkott, Nour and Fahmy, Nour and Samuel, Olanrewaju and An, Ran and Kromann, Rasmus and Hao, Ryan and Alizadeh, Samira and Shubber, Sarmad and Wang, Silas and Roy, Sourav and Viguier, Sylvain and Le, Thanh and Oyebade, Tobi and Le, Trieu and Yang, Yoyo and Nguyen, Zach and Kashyap, Abhinav Ramesh and Palasciano, Alfredo and Callahan, Alison and Shukla, Anima and Miranda-Escalada, Antonio and Singh, Ayush and Beilharz, Benjamin and Wang, Bo and Brito, Caio and Zhou, Chenxi and Jain, Chirag and Xu, Chuxin and Fourrier, Clémentine and Periñán, Daniel León and Molano, Daniel and Yu, Dian and Manjavacas, Enrique and Barth, Fabio and Fuhrimann, Florian and Altay, Gabriel and Bayrak, Giyaseddin and Burns, Gully and Vrabec, Helena U. and Bello, Imane and Dash, Ishani and Kang, Jihyun and Giorgi, John and Golde, Jonas and Posada, Jose David and Sivaraman, Karthik Rangasai and Bulchandani, Lokesh and Liu, Lu and Shinzato, Luisa and de Bykhovetz, Madeleine Hahn and Takeuchi, Maiko and Pàmies, Marc and Castillo, Maria A. and Nezhurina, Marianna and Sänger, Mario and Samwald, Matthias and Cullan, Michael and Weinberg, Michael and De Wolf, Michiel and Mihaljcic, Mina and Liu, Minna and Freidank, Moritz and Kang, Myungsun and Seelam, Natasha and Dahlberg, Nathan and Broad, Nicholas Michio and Muellner, Nikolaus and Fung, Pascale and Haller, Patrick and Chandrasekhar, Ramya and Eisenberg, Renata and Martin, Robert and Canalli, Rodrigo and Su, Rosaline and Su, Ruisi and Cahyawijaya, Samuel and Garda, Samuele and Deshmukh, Shlok S. and Mishra, Shubhanshu and Kiblawi, Sid and Ott, Simon and Sang-aroonsiri, Sinee and Kumar, Srishti and Schweter, Stefan and Bharati, Sushil and Laud, Tanmay and Gigant, Théo and Kainuma, Tomoya and Kusa, Wojciech and Labrak, Yanis and Bajaj, Yash Shailesh and Venkatraman, Yash and Xu, Yifan and Xu, Yingxin and Xu, Yu and Tan, Zhe and Xie, Zhongli and Ye, Zifan and Bras, Mathilde and Belkada, Younes and Wolf, Thomas},
month = jun,
year = {2023},
note = {arXiv:2211.05100 [cs]},
keywords = {Natural Language Processing},
}
A Federated Approach to Predicting Emojis in Hindi Tweets
Deep Gandhi, Jash Mehta, Nirali Parekh, Karan Waghela, Lynette D'Mello, Zeerak Talat. EMNLP 2022. BibTeX @inproceedings{gandhi-etal-2022-federated,
title = 'A Federated Approach to Predicting Emojis in {H}indi Tweets',
author = 'Gandhi, Deep and
Mehta, Jash and
Parekh, Nirali and
Waghela, Karan and
D{'}Mello, Lynette and
Talat, Zeerak',
editor = 'Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue',
booktitle = 'Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing',
month = dec,
year = '2022',
address = 'Abu Dhabi, United Arab Emirates',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2022.emnlp-main.819/',
doi = '10.18653/v1/2022.emnlp-main.819',
pages = '11951--11961'
}
Directions for NLP Practices Applied to Online Hate Speech Detection
Paula Fortuna, Monica Dominguez, Leo Wanner, Zeerak Talat. EMNLP 2022. BibTeX @inproceedings{fortuna-etal-2022-directions,
title = 'Directions for {NLP} Practices Applied to Online Hate Speech Detection',
author = 'Fortuna, Paula and
Dominguez, Monica and
Wanner, Leo and
Talat, Zeerak',
editor = 'Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue',
booktitle = 'Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing',
month = dec,
year = '2022',
address = 'Abu Dhabi, United Arab Emirates',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2022.emnlp-main.809/',
doi = '10.18653/v1/2022.emnlp-main.809',
pages = '11794--11805'
}
Multilingual HateCheck: Functional Tests for Multilingual Hate Speech Detection Models
Paul Röttger, Haitham Seelawi, Debora Nozza, Zeerak Talat, Bertie Vidgen. WOAH 2022. BibTeX @inproceedings{rottger-etal-2022-multilingual,
title = 'Multilingual {H}ate{C}heck: Functional Tests for Multilingual Hate Speech Detection Models',
author = {R{\"o}ttger, Paul and
Seelawi, Haitham and
Nozza, Debora and
Talat, Zeerak and
Vidgen, Bertie},
editor = 'Narang, Kanika and
Mostafazadeh Davani, Aida and
Mathias, Lambert and
Vidgen, Bertie and
Talat, Zeerak',
booktitle = 'Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)',
month = jul,
year = '2022',
address = 'Seattle, Washington (Hybrid)',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2022.woah-1.15/',
doi = '10.18653/v1/2022.woah-1.15',
pages = '154--169'
}
Data Governance in the Age of Large-Scale Data-Driven Language Technology
Yacine Jernite, Huu Nguyen, Stella Biderman, Anna Rogers, Maraim Masoud, Valentin Danchev, Samson Tan, Alexandra Sasha Luccioni, Nishant Subramani, Isaac Johnson, Gérard Dupont, Jesse Dodge, Kyle Lo, Zeerak Talat, Dragomir Radev, Aaron Gokaslan, Somaieh Nikpoor, Peter Henderson, Rishi Bommasani and Margaret Mitchell. FAccT 2022. BibTeX @inproceedings{10.1145/3531146.3534637,
author = {Jernite, Yacine and Nguyen, Huu and Biderman, Stella and Rogers, Anna and Masoud, Maraim and Danchev, Valentin and Tan, Samson and Luccioni, Alexandra Sasha and Subramani, Nishant and Johnson, Isaac and Dupont, Gerard and Dodge, Jesse and Lo, Kyle and Talat, Zeerak and Radev, Dragomir and Gokaslan, Aaron and Nikpoor, Somaieh and Henderson, Peter and Bommasani, Rishi and Mitchell, Margaret},
title = {Data Governance in the Age of Large-Scale Data-Driven Language Technology},
year = {2022},
isbn = {9781450393522},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3531146.3534637},
doi = {10.1145/3531146.3534637},
abstract = {The recent emergence and adoption of Machine Learning technology, and specifically of Large Language Models, has drawn attention to the need for systematic and transparent management of language data. This work proposes an approach to global language data governance that attempts to organize data management amongst stakeholders, values, and rights. Our proposal is informed by prior work on distributed governance that accounts for human values and grounded by an international research collaboration that brings together researchers and practitioners from 60 countries. The framework we present is a multi-party international governance structure focused on language data, and incorporating technical and organizational tools needed to support its work.},
booktitle = {Proceedings of the 2022 ACM Conference on Fairness, Accountability, and Transparency},
pages = {2206–2222},
numpages = {17},
keywords = {data rights, datasets, language data, technology governance},
location = {Seoul, Republic of Korea},
series = {FAccT '22}
}
On the Machine Learning of Ethical Judgments from Natural Language Zeerak Talat, Hagen Blix, Josef Valvoda, Maya Indira Ganesh, Ryan Cotterell, Adina Williams. NAACL 2022. BibTeX @inproceedings{talat-etal-2022-machine,
title = 'On the Machine Learning of Ethical Judgments from Natural Language',
author = 'Talat, Zeerak and
Blix, Hagen and
Valvoda, Josef and
Ganesh, Maya Indira and
Cotterell, Ryan and
Williams, Adina',
editor = 'Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir',
booktitle = 'Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies',
month = jul,
year = '2022',
address = 'Seattle, United States',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2022.naacl-main.56/',
doi = '10.18653/v1/2022.naacl-main.56',
pages = '769--779'
}
You Reap What You Sow: On the Challenges of Bias Evaluation Under Multilingual Settings Zeerak Talat, Aurélie Névéol, Stella Biderman, Miruna~Clinciu, Manan Dey, Shayne Longpre, Alexandra Sasha Luccioni, Maraim Masoud, Margaret Mitchell, Dragomir Radev, Shanya Sharma, Arjun Subramonian, Jaesung Tae, Samson Tan, Deepak Tunuguntla, Oskar van der Wal. BigScience #5 2022. BibTeX @inproceedings{Talat_You_2022,
address = {virtual+Dublin},
title = {You reap what you sow: {On} the challenges of bias evaluation under multilingual settings},
copyright = {All rights reserved},
url = {https://aclanthology.org/2022.bigscience-1.3},
abstract = {Evaluating bias, fairness, and social impact in monolingual language models is a difficult task. This challenge is further compounded when language modeling occurs in a multilingual context. Considering the implication of evaluation biases for large multilingual language models, we situate the discussion of bias evaluation within a wider context of social scientific research with computational work.We highlight three dimensions of developing multilingual bias evaluation frameworks: (1) increasing transparency through documentation, (2) expanding targets of bias beyond gender, and (3) addressing cultural differences that exist between languages.We further discuss the power dynamics and consequences of training large language models and recommend that researchers remain cognizant of the ramifications of developing such technologies.},
booktitle = {Proceedings of {BigScience} episode \#5 – workshop on challenges & perspectives in creating large language models},
publisher = {Association for Computational Linguistics},
author = {Talat, Zeerak and N{\'e}v{\'e}ol, Aur{\'e}lie and Biderman, Stella and Clinciu, Miruna and Dey, Manan and Longpre, Shayne and Luccioni, Sasha and Masoud, Maraim and Mitchell, Margaret and Radev, Dragomir and Sharma, Shanya and Subramonian, Arjun and Tae, Jaesung and Tan, Samson and Tunuguntla, Deepak and Van Der Wal, Oskar},
month = may,
year = {2022},
pages = {26--41},
}
Proceedings of the 6th Workshop on Online Abuse and Harms (WOAH 2022)
Kanika Narang, Aida Mostafazadeh Davani, Mathias Lambert, Bertie Vidgen, Zeerak Talat. WOAH 2022. BibTeX @proceedings{woah-2022-online,
title = 'Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)',
editor = 'Narang, Kanika and
Mostafazadeh Davani, Aida and
Mathias, Lambert and
Vidgen, Bertie and
Talat, Zeerak',
month = jul,
year = '2022',
address = 'Seattle, Washington (Hybrid)',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2022.woah-1.0/'
}
2021
Disembodied Machine Learning: On the Illusion of Objectivity in NLP Zeerak Talat, Dilan Lulz, Joachim Bingel, Isabelle Augenstein. 2021. BibTeX @unpublished{talatDisembodiedMachineLearning2021,
title = {Disembodied {Machine} {Learning}: {On} the {Illusion} of {Objectivity} in {NLP}},
copyright = {All rights reserved},
shorttitle = {Disembodied {Machine} {Learning}},
url = {http://arxiv.org/abs/2101.11974},
abstract = {Machine Learning seeks to identify and encode bodies of knowledge within provided datasets. However, data encodes subjective content, which determines the possible outcomes of the models trained on it. Because such subjectivity enables marginalisation of parts of society, it is termed (social) `bias' and sought to be removed. In this paper, we contextualise this discourse of bias in the ML community against the subjective choices in the development process. Through a consideration of how choices in data and model development construct subjectivity, or biases that are represented in a model, we argue that addressing and mitigating biases is near-impossible. This is because both data and ML models are objects for which meaning is made in each step of the development pipeline, from data selection over annotation to model training and analysis. Accordingly, we find the prevalent discourse of bias limiting in its ability to address social marginalisation. We recommend to be conscientious of this, and to accept that de-biasing methods only correct for a fraction of biases.},
urldate = {2021-12-26},
author = {Talat, Zeerak and Lulz, Smarika and Bingel, Joachim and Augenstein, Isabelle},
month = jan,
year = {2021},
note = {arXiv: 2101.11974},
keywords = {Computers and Society, Artificial intelligence},
}
A Survey of Race, Racism, and Anti-Racism in NLP
Anjalie Field, Su Lin Blodgett, Zeerak Talat, Yulia Tsvetkov. ACL 2021. BibTeX @inproceedings{field-etal-2021-survey,
title = 'A Survey of Race, Racism, and Anti-Racism in {NLP}',
author = 'Field, Anjalie and
Blodgett, Su Lin and
Waseem, Zeerak and
Tsvetkov, Yulia',
editor = 'Zong, Chengqing and
Xia, Fei and
Li, Wenjie and
Navigli, Roberto',
booktitle = 'Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)',
month = aug,
year = '2021',
address = 'Online',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2021.acl-long.149/',
doi = '10.18653/v1/2021.acl-long.149',
pages = '1905--1925',
abstract = 'Despite inextricable ties between race and language, little work has considered race in NLP research and development. In this work, we survey 79 papers from the ACL anthology that mention race. These papers reveal various types of race-related bias in all stages of NLP model development, highlighting the need for proactive consideration of how NLP systems can uphold racial hierarchies. However, persistent gaps in research on race and NLP remain: race has been siloed as a niche topic and remains ignored in many NLP tasks; most work operationalizes race as a fixed single-dimensional variable with a ground-truth label, which risks reinforcing differences produced by historical racism; and the voices of historically marginalized people are nearly absent in NLP literature. By identifying where and how NLP literature has and has not considered race, especially in comparison to related fields, our work calls for inclusion and racial justice in NLP research practices.'
}
HateCheck: Functional Tests for Hate Speech Detection Models
Paul Röttger, Bertie Vidgen, Dong Nguyen, Zeerak Talat, Helen Margetts, Janet Pierrehumbert. ACL 2021. BibTeX @inproceedings{rottger-etal-2021-hatecheck,
title = '{H}ate{C}heck: Functional Tests for Hate Speech Detection Models',
author = {R{\"o}ttger, Paul and
Vidgen, Bertie and
Nguyen, Dong and
Waseem, Zeerak and
Margetts, Helen and
Pierrehumbert, Janet},
editor = 'Zong, Chengqing and
Xia, Fei and
Li, Wenjie and
Navigli, Roberto',
booktitle = 'Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)',
month = aug,
year = '2021',
address = 'Online',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2021.acl-long.4/',
doi = '10.18653/v1/2021.acl-long.4',
pages = '41--58',
abstract = 'Detecting online hate is a difficult task that even state-of-the-art models struggle with. Typically, hate speech detection models are evaluated by measuring their performance on held-out test data using metrics such as accuracy and F1 score. However, this approach makes it difficult to identify specific model weak points. It also risks overestimating generalisable model performance due to increasingly well-evidenced systematic gaps and biases in hate speech datasets. To enable more targeted diagnostic insights, we introduce HateCheck, a suite of functional tests for hate speech detection models. We specify 29 model functionalities motivated by a review of previous research and a series of interviews with civil society stakeholders. We craft test cases for each functionality and validate their quality through a structured annotation process. To illustrate HateCheck`s utility, we test near-state-of-the-art transformer models as well as two popular commercial models, revealing critical model weaknesses.'
}
Learning from the Worst: Dynamically Generated Datasets to Improve Online Hate Detection
Bertie Vidgen, Tristan Thrush, Zeerak Talat, Douwe Kiela. ACL 2021. BibTeX @inproceedings{Vidgen_Learning_2021,
address = {Online},
title = {Learning from the {Worst}: {Dynamically} {Generated} {Datasets} to {Improve} {Online} {Hate} {Detection}},
shorttitle = {Learning from the {Worst}},
url = {https://aclanthology.org/2021.acl-long.132},
doi = {10.18653/v1/2021.acl-long.132},
language = {en},
urldate = {2024-03-19},
booktitle = {Proceedings of the 59th {Annual} {Meeting} of the {Association} for {Computational} {Linguistics} and the 11th {International} {Joint} {Conference} on {Natural} {Language} {Processing} ({Volume} 1: {Long} {Papers})},
publisher = {Association for Computational Linguistics},
author = {Vidgen, Bertie and Thrush, Tristan and Waseem, Zeerak and Kiela, Douwe},
year = {2021},
pages = {1667--1682},
}
Dynabench: Rethinking Benchmarking in NLP
Douwe Kiela, Max Bartolo, Yixin Nie, Divyansh Kaushik, Atticus Geiger, Zhengxuan Wu, Bertie Vidgen, Grusha Prasad, Amanpreet Singh, Pratik Ringshia, Zhiyi Ma, Tristan Thrush, Sebastian Riedel, Zeerak Talat, Pontus Stenetorp, Robin Jia, Mohit Bansal, Christopher Potts, Adina Williams. ACL 2021. BibTeX @inproceedings{kiela-etal-2021-dynabench,
title = 'Dynabench: Rethinking Benchmarking in {NLP}',
author = 'Kiela, Douwe and
Bartolo, Max and
Nie, Yixin and
Kaushik, Divyansh and
Geiger, Atticus and
Wu, Zhengxuan and
Vidgen, Bertie and
Prasad, Grusha and
Singh, Amanpreet and
Ringshia, Pratik and
Ma, Zhiyi and
Thrush, Tristan and
Riedel, Sebastian and
Talat, Zeerak and
Stenetorp, Pontus and
Jia, Robin and
Bansal, Mohit and
Potts, Christopher and
Williams, Adina',
editor = 'Toutanova, Kristina and
Rumshisky, Anna and
Zettlemoyer, Luke and
Hakkani-Tur, Dilek and
Beltagy, Iz and
Bethard, Steven and
Cotterell, Ryan and
Chakraborty, Tanmoy and
Zhou, Yichao',
booktitle = 'Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies',
month = jun,
year = '2021',
address = 'Online',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2021.naacl-main.324/',
doi = '10.18653/v1/2021.naacl-main.324',
pages = '4110--4124',
abstract = 'We introduce Dynabench, an open-source platform for dynamic dataset creation and model benchmarking. Dynabench runs in a web browser and supports human-and-model-in-the-loop dataset creation: annotators seek to create examples that a target model will misclassify, but that another person will not. In this paper, we argue that Dynabench addresses a critical need in our community: contemporary models quickly achieve outstanding performance on benchmark tasks but nonetheless fail on simple challenge examples and falter in real-world scenarios. With Dynabench, dataset creation, model development, and model assessment can directly inform each other, leading to more robust and informative benchmarks. We report on four initial NLP tasks, illustrating these concepts and highlighting the promise of the platform, and address potential objections to dynamic benchmarking as a new standard for the field.'
}
Findings of the WOAH 5 Shared Task on Fine Grained Hateful Memes Detection
Lambert Mathias, Shaoliang Nie, Aida Mostafazadeh Davani, Douwe Kiela, Vinodkumar Prabhakaran, Bertie Vidgen, Zeerak Wsaeem. WOAH 2021. BibTeX @inproceedings{mathias-etal-2021-findings,
title = 'Findings of the {WOAH} 5 Shared Task on Fine Grained Hateful Memes Detection',
author = 'Mathias, Lambert and
Nie, Shaoliang and
Mostafazadeh Davani, Aida and
Kiela, Douwe and
Prabhakaran, Vinodkumar and
Vidgen, Bertie and
Talat, Zeerak',
editor = 'Mostafazadeh Davani, Aida and
Kiela, Douwe and
Lambert, Mathias and
Vidgen, Bertie and
Prabhakaran, Vinodkumar and
Talat, Zeerak',
booktitle = 'Proceedings of the 5th Workshop on Online Abuse and Harms (WOAH 2021)',
month = aug,
year = '2021',
address = 'Online',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2021.woah-1.21/',
doi = '10.18653/v1/2021.woah-1.21',
pages = '201--206',
abstract = 'We present the results and main findings of the shared task at WOAH 5 on hateful memes detection. The task include two subtasks relating to distinct challenges in the fine-grained detection of hateful memes: (1) the protected category attacked by the meme and (2) the attack type. 3 teams submitted system description papers. This shared task builds on the hateful memes detection task created by Facebook AI Research in 2020.'
}
'Hold on honey, men at work': A semi-supervised approach to detecting sexism in sitcoms
Smriti Singh, Tanvi Anand, Arijit Ghosh Chowdhury, Zeerak Talat. ACL SRW 2021. BibTeX @inproceedings{singh-etal-2021-hold,
title = '{ extquotedblleft}Hold on honey, men at work{ extquotedblright}: A semi-supervised approach to detecting sexism in sitcoms',
author = 'Singh, Smriti and
Anand, Tanvi and
Ghosh Chowdhury, Arijit and
Talat, Zeerak',
editor = 'Kabbara, Jad and
Lin, Haitao and
Paullada, Amandalynne and
Vamvas, Jannis',
booktitle = 'Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: Student Research Workshop',
month = aug,
year = '2021',
address = 'Online',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2021.acl-srw.19/',
doi = '10.18653/v1/2021.acl-srw.19',
pages = '180--185',
abstract = 'Television shows play an important role inpropagating societal norms. Owing to the popularity of the situational comedy (sitcom) genre, it contributes significantly to the over-all development of society. In an effort to analyze the content of television shows belong-ing to this genre, we present a dataset of dialogue turns from popular sitcoms annotated for the presence of sexist remarks. We train a text classification model to detect sexism using domain adaptive learning. We apply the model to our dataset to analyze the evolution of sexist content over the years. We propose a domain-specific semi-supervised architecture for the aforementioned detection of sexism. Through extensive experiments, we show that our model often yields better classification performance over generic deep learn-ing based sentence classification that does not employ domain-specific training. We find that while sexism decreases over time on average,the proportion of sexist dialogue for the most sexist sitcom actually increases. A quantitative analysis along with a detailed error analysis presents the case for our proposed methodology'
}
Proceedings of the 5th Workshop on Online Abuse and Harms (WOAH 2021)
Aida Mostafazadeh Davani, Douwe Kiela, Mathias Lambert, Bertie Vidgen, Vinodkumar Prabhakaran, Zeerak Talat. Proceedings of the 5th Workshop on Online Abuse and Harms. BibTeX @proceedings{woah-2021-online,
title = 'Proceedings of the 5th Workshop on Online Abuse and Harms (WOAH 2021)',
editor = 'Mostafazadeh Davani, Aida and
Kiela, Douwe and
Lambert, Mathias and
Vidgen, Bertie and
Prabhakaran, Vinodkumar and
Talat, Zeerak',
month = aug,
year = '2021',
address = 'Online',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2021.woah-1.0/'
}
2020
Leaky academia: digital intimacy and open secrets in times of COVID-19
Nanna Thylstrup, Zeerak Talat, Daniela Agostinho. Identities 2020. BibTeX @misc{Thylstrup_Leaky_2020a,
title = {Leaky academia: digital intimacy and open secrets in times of {COVID}-19},
shorttitle = {Leaky academia},
url = {http://www.identitiesjournal.com/4/post/2020/09/leaky-academia-digital-intimacy-and-open-secrets-in-times-of-covid-19.html},
abstract = {Explore short essays, think pieces, reflections and field notes from the state of the COVID-19 lockdown that help think through the profound social-political effects of this twenty-first century...},
language = {en},
urldate = {2025-05-04},
journal = {Identities Journal Blog},
author = {Thylstrup, Nanna and Talat, Zeerak and Agostinho, Daniela},
year = {2020},
}
Detecting East Asian Prejudice on Social Media
Bertie Vidgen, Austin Botelho, David Broniatowski, Ella Guest, Matthew Hall, Helen Margetts, Rebekah Tromble, Zeerak Talat, Scott Hale. WOAH 2020. BibTeX @inproceedings{vidgen-etal-2020-detecting,
title = 'Detecting {E}ast {A}sian Prejudice on Social Media',
author = 'Vidgen, Bertie and
Hale, Scott A. and
Guest, Ella and
Margetts, Helen and
Broniatowski, David and
Talat, Zeerak and
Botelho, Austin and
Hall, Matthew and
Tromble, Rebekah',
editor = 'Akiwowo, Seyi and
Vidgen, Bertie and
Prabhakaran, Vinodkumar and
Talat, Zeerak',
booktitle = 'Proceedings of the Fourth Workshop on Online Abuse and Harms',
month = nov,
year = '2020',
address = 'Online',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2020.alw-1.19/',
doi = '10.18653/v1/2020.alw-1.19',
pages = '162--172',
abstract = 'During COVID-19 concerns have heightened about the spread of aggressive and hateful language online, especially hostility directed against East Asia and East Asian people. We report on a new dataset and the creation of a machine learning classifier that categorizes social media posts from Twitter into four classes: Hostility against East Asia, Criticism of East Asia, Meta-discussions of East Asian prejudice, and a neutral class. The classifier achieves a macro-F1 score of 0.83. We then conduct an in-depth ground-up error analysis and show that the model struggles with edge cases and ambiguous content. We provide the 20,000 tweet training dataset (annotated by experienced analysts), which also contains several secondary categories and additional flags. We also provide the 40,000 original annotations (before adjudication), the full codebook, annotations for COVID-19 relevance and East Asian relevance and stance for 1,000 hashtags, and the final model.'
}
Online Abuse and Human Rights: WOAH Satellite Session at RightsCon 2020
Vinodkumar Prabhakaran, Zeerak Talat, Seyi Akiwowo, Bertie Vidgen. WOAH 2020. BibTeX @inproceedings{prabhakaran-etal-2020-online,
title = 'Online Abuse and Human Rights: {WOAH} Satellite Session at {R}ights{C}on 2020',
author = 'Prabhakaran, Vinodkumar and
Talat, Zeerak and
Akiwowo, Seyi and
Vidgen, Bertie',
editor = 'Akiwowo, Seyi and
Vidgen, Bertie and
Prabhakaran, Vinodkumar and
Talat, Zeerak',
booktitle = 'Proceedings of the Fourth Workshop on Online Abuse and Harms',
month = nov,
year = '2020',
address = 'Online',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2020.alw-1.1/',
doi = '10.18653/v1/2020.alw-1.1',
pages = '1--6',
abstract = 'In 2020 The Workshop on Online Abuse and Harms (WOAH) held a satellite panel at RightsCons 2020, an international human rights conference. Our aim was to bridge the gap between human rights scholarship and Natural Language Processing (NLP) research communities in tackling online abuse. We report on the discussions that took place, and present an analysis of four key issues which emerged: Problems in tackling online abuse, Solutions, Meta concerns and the Ecosystem of content moderation and research. We argue there is a pressing need for NLP research communities to engage with human rights perspectives, and identify four key ways in which NLP research into online abuse could immediately be enhanced to create better and more ethical solutions.'
}
Proceedings of the 4th Workshop on Online Abuse and Harms (WOAH 2020)
Seyi Akiwowo, Bertie Vidgen, Vinodkumar Prabhakaran, Zeerak Talat. WOAH 2020. BibTeX @proceedings{alw-2020-online,
title = 'Proceedings of the Fourth Workshop on Online Abuse and Harms',
editor = 'Akiwowo, Seyi and
Vidgen, Bertie and
Prabhakaran, Vinodkumar and
Talat, Zeerak',
month = nov,
year = '2020',
address = 'Online',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/2020.alw-1.0/'
}
2019
Detecting 'Dirt' and 'Toxicity': Rethinking Content Moderation as Pollution Behaviour
Nanna Bonde Thylstrup, Zeerak Talat. 2019. BibTeX @article{thylstrupDetectingDirtToxicity2020,
title = {Detecting ‘{Dirt}’ and ‘{Toxicity}’: {Rethinking} {Content} {Moderation} as {Pollution} {Behaviour}},
copyright = {All rights reserved},
issn = {1556-5068},
shorttitle = {Detecting ‘{Dirt}’ and ‘{Toxicity}’},
url = {https://www.ssrn.com/abstract=3709719},
doi = {10.2139/ssrn.3709719},
language = {en},
urldate = {2021-12-26},
journal = {SSRN Electronic Journal},
author = {Thylstrup, Nanna and Talat, Zeerak},
year = {2020},
}
Proceedings of the 2019 Workshop on Widening NLP
Amittai Axelrod, Diyi Yang, Rossana Cunha, Samira Shaikh, Zeerak Talat. WiNLP 2019. BibTeX @proceedings{winlp-2019-36,
title = 'Proceedings of the 2019 Workshop on Widening NLP',
editor = 'Axelrod, Amittai and
Yang, Diyi and
Cunha, Rossana and
Shaikh, Samira and
Talat, Zeerak',
month = aug,
year = '2019',
address = 'Florence, Italy',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/W19-3600/'
}
Proceedings of the 3rd Workshop on Abusive Language Online
Sarah T. Roberts, Joel Tetreault, Vinodkumar Prabhakaran, Zeerak Talat. WOAH 2019. BibTeX @proceedings{ws-2019-abusive,
title = 'Proceedings of the Third Workshop on Abusive Language Online',
editor = 'Roberts, Sarah T. and
Tetreault, Joel and
Prabhakaran, Vinodkumar and
Talat, Zeerak',
month = aug,
year = '2019',
address = 'Florence, Italy',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/W19-3500/'
}
2018
Proceedings of the 2nd Workshop on Abusive Language Online
Darja Fišer, Ruihong Huang, Vinodkumar Prabhakaran, Rob Voigt, Zeerak Talat, Jacqueline Wernimont. WOAH 2018. BibTeX @proceedings{ws-2018-abusive,
title = 'Proceedings of the 2nd Workshop on Abusive Language Online ({ALW}2)',
editor = 'Fi{{s}}er, Darja and
Huang, Ruihong and
Prabhakaran, Vinodkumar and
Voigt, Rob and
Talat, Zeerak and
Wernimont, Jacqueline',
month = oct,
year = '2018',
address = 'Brussels, Belgium',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/W18-5100/'
}
Bridging the Gaps: Multi Task Learning for Domain Transfer of Hate Speech Detection Zeerak Talat, James Thorne, Joachim Bingel. Online Harassment 2018. eds. Jennifer Goldbeck. BibTeX @incollection{talatBridgingGapsMulti2018,
address = {Cham},
title = {Bridging the {Gaps}: {Multi} {Task} {Learning} for {Domain} {Transfer} of {Hate} {Speech} {Detection}: {Multi}-task {Learning} for {Domain} {Transfer} of {Hate} {Speech} {Detection}},
copyright = {All rights reserved},
isbn = {978-3-319-78582-0 978-3-319-78583-7},
shorttitle = {Bridging the {Gaps}},
url = {https://link.springer.com/10.1007/978-3-319-78583-7_3},
language = {en},
urldate = {2022-01-21},
booktitle = {Online {Harassment}},
publisher = {Springer International Publishing},
author = {Talat, Zeerak and Thorne, James and Bingel, Joachim},
editor = {Golbeck, Jennifer},
year = {2018},
doi = {10.1007/978-3-319-78583-7_3},
note = {Series Title: Human–Computer Interaction Series},
pages = {29--55},
}
2017
Understanding Abuse: A Typology of Abusive Language Detection Subtasks Zeerak Talat, Thomas Davidson, Dana Warmsley and Ingmar Weber. WOAH 2017. BibTeX @inproceedings{talat-etal-2017-understanding,
title = 'Understanding Abuse: A Typology of Abusive Language Detection Subtasks',
author = 'Talat, Zeerak and
Davidson, Thomas and
Warmsley, Dana and
Weber, Ingmar',
editor = 'Talat, Zeerak and
Chung, Wendy Hui Kyong and
Hovy, Dirk and
Tetreault, Joel',
booktitle = 'Proceedings of the First Workshop on Abusive Language Online',
month = aug,
year = '2017',
address = 'Vancouver, BC, Canada',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/W17-3012/',
doi = '10.18653/v1/W17-3012',
pages = '78--84'
}
Proceedings of the 1st Workshop on Abusive Language Online Zeerak Talat, Wendy Hui Kyong Chung, Dirk Hovy, and Joel Tetreault. WOAH 2017. BibTeX @proceedings{ws-2017-abusive,
title = 'Proceedings of the First Workshop on Abusive Language Online',
editor = 'Talat, Zeerak and
Chung, Wendy Hui Kyong and
Hovy, Dirk and
Tetreault, Joel',
month = aug,
year = '2017',
address = 'Vancouver, BC, Canada',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/W17-3000/'
}
2016
Are You a Racist or Am I Seeing Things? Annotator Influence on Hate Speech Detection on Twitter Zeerak Talat. NLP+CSS 2016. BibTeX @inproceedings{talat-2016-racist,
title = 'Are You a Racist or Am {I} Seeing Things? Annotator Influence on Hate Speech Detection on {T}witter',
author = 'Talat, Zeerak',
editor = {Bamman, David and
Doğruöz, A. Seza and
Eisenstein, Jacob and
Hovy, Dirk and
Jurgens, David and
O{'}Connor, Brendan and
Oh, Alice and
Tsur, Oren and
Volkova, Svitlana},
booktitle = 'Proceedings of the First Workshop on {NLP} and Computational Social Science',
month = nov,
year = '2016',
address = 'Austin, Texas',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/W16-5618/',
doi = '10.18653/v1/W16-5618',
pages = '138--142'
}
Hateful Symbols or Hateful People? Predictive Features for Hate Speech Detection on Twitter Zeerak Talat, Dirk Hovy. NAACL SRW 2016. BibTeX @inproceedings{talat-hovy-2016-hateful,
title = 'Hateful Symbols or Hateful People? Predictive Features for Hate Speech Detection on {T}witter',
author = 'Talat, Zeerak and
Hovy, Dirk',
editor = 'Andreas, Jacob and
Choi, Eunsol and
Lazaridou, Angeliki',
booktitle = 'Proceedings of the {NAACL} Student Research Workshop',
month = jun,
year = '2016',
address = 'San Diego, California',
publisher = 'Association for Computational Linguistics',
url = 'https://aclanthology.org/N16-2013/',
doi = '10.18653/v1/N16-2013',
pages = '88--93'
}