There is a newer version of this record available.

Dataset Open Access

SemEval 2019 Task 4 - Hyperpartisan News Detection

Johannes Kiesel; Martin Potthast; Maria Mestre; Rishabh Shukla; Benno Stein; David Corney; Emmanuel Vincent; Payam Adineh


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nmm##2200000uu#4500</leader>
  <datafield tag="041" ind1=" " ind2=" ">
    <subfield code="a">eng</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Hyperpartisan news</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">SemEval</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">SemEval 2019</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">SemEval 2019 Task 4</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Biased news</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">News bias</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Hyperpartisan</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Hyperpartisanship</subfield>
  </datafield>
  <controlfield tag="005">20211213111305.0</controlfield>
  <controlfield tag="001">1406208</controlfield>
  <datafield tag="711" ind1=" " ind2=" ">
    <subfield code="g">SemEval-2019</subfield>
    <subfield code="a">International Workshop on Semantic Evaluation 2019</subfield>
    <subfield code="n">Task 4</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Leipzig University</subfield>
    <subfield code="a">Martin Potthast</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Factmata Ltd.</subfield>
    <subfield code="a">Maria Mestre</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Factmata Ltd.</subfield>
    <subfield code="a">Rishabh Shukla</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="a">Benno Stein</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">David Corney</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Factmata Ltd.</subfield>
    <subfield code="a">Emmanuel Vincent</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="a">Payam Adineh</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">1381470031</subfield>
    <subfield code="z">md5:c3e85da69f0ec76d30a2c1a0b22d3150</subfield>
    <subfield code="u">https://zenodo.org/record/1406208/files/articles-training-20180831.xml.zip</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">547904088</subfield>
    <subfield code="z">md5:5dd17f5043f130407cf599d585ba4ca9</subfield>
    <subfield code="u">https://zenodo.org/record/1406208/files/articles-validation-20180831.xml.zip</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">2129</subfield>
    <subfield code="z">md5:31e3fb439c98b18cd74a7d936a65b218</subfield>
    <subfield code="u">https://zenodo.org/record/1406208/files/article.xsd</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">29988422</subfield>
    <subfield code="z">md5:7ea315edde4f500b554571f388a2fa46</subfield>
    <subfield code="u">https://zenodo.org/record/1406208/files/ground-truth-training-20180831.xml.zip</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">6984845</subfield>
    <subfield code="z">md5:50fdbd01f9eef4902a0e6fe93a360577</subfield>
    <subfield code="u">https://zenodo.org/record/1406208/files/ground-truth-validation-20180831.xml.zip</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">1628</subfield>
    <subfield code="z">md5:81dd0e153d6f78ca10a5599da6aac66e</subfield>
    <subfield code="u">https://zenodo.org/record/1406208/files/ground-truth.xsd</subfield>
  </datafield>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="y">Conference website</subfield>
    <subfield code="u">http://alt.qcri.org/semeval2019/</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2018-09-03</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="p">openaire_data</subfield>
    <subfield code="p">user-pan</subfield>
    <subfield code="p">user-webis</subfield>
    <subfield code="o">oai:zenodo.org:1406208</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="u">Bauhaus-Universität Weimar</subfield>
    <subfield code="0">(orcid)0000-0002-1617-6508</subfield>
    <subfield code="a">Johannes Kiesel</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">SemEval 2019 Task 4 - Hyperpartisan News Detection</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-pan</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-webis</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">https://creativecommons.org/licenses/by/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution 4.0 International</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;Third trial dataset for the SemEval 2019 Task 4: Hyperpartisan News Detection.&lt;/p&gt;

&lt;p&gt;The dataset contains 1 million articles. It is split in training (200,000 left, 400,000 least, 200,000 right) and validation (50,000 left, 100,000 least, 50,000 right), where &lt;strong&gt;no&lt;/strong&gt; publisher that occurs in the training set also occurs in the validation set. All articles are labeled by the overall bias of the publisher as provided by BuzzFeed journalists or MediaBiasFactCheck.com.&lt;/p&gt;

&lt;p&gt;The trial data is not fully cleaned. Due to some encoding error, some characters are replaced by question marks. However, all files are already fully compatible with the XML schema files.&lt;/p&gt;</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">url</subfield>
    <subfield code="i">isReferencedBy</subfield>
    <subfield code="a">https://pan.webis.de/semeval19/semeval19-web/</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">isVersionOf</subfield>
    <subfield code="a">10.5281/zenodo.1310145</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.5281/zenodo.1406208</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">dataset</subfield>
  </datafield>
</record>
19,498
14,704
views
downloads
All versions This version
Views 19,4988,808
Downloads 14,7041,895
Data volume 5.3 TB960.0 GB
Unique views 16,0888,339
Unique downloads 4,046519

Share

Cite as