<?xml version='1.0' encoding='UTF-8'?>
<codeBook version="1.2.2" ID="KEN_2017-2018_MLCRM_v01_M" xml-lang="en" xmlns="http://www.icpsr.umich.edu/DDI" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.icpsr.umich.edu/DDI http://www.icpsr.umich.edu/DDI/Version1-2-2.xsd">
  <docDscr>
    <citation>
      <titlStmt>
        <titl>
          Road Safety in Kenya
        </titl>
        <subTitl>
          P158664
        </subTitl>
        <altTitl>
          --
        </altTitl>
        <IDNo>
          DDI_KEN_2017-2018_MLCRM_v01_M_WB
        </IDNo>
      </titlStmt>
      <prodStmt>
        <producer abbr="DECDG" affiliation="World Bank" role="Documentation of the study">
          Development Economics Data Group
        </producer>
        <prodDate date="2021-03-24">
          2021-03-24
        </prodDate>
        <software version="4.0.9" date="2013-04-23">
          Nesstar Publisher
        </software>
      </prodStmt>
      <verStmt>
        <version>
          Version 01 (March 2021)
        </version>
      </verStmt>
    </citation>
    <docSrc>
      <titlStmt>
        <titl>
          KEN_2017-2018_MLCRM_v01_M
        </titl>
        <subTitl>
          --
        </subTitl>
      </titlStmt>
      <prodStmt>
        <prodPlac>
          --
        </prodPlac>
      </prodStmt>
    </docSrc>
  </docDscr>
  <stdyDscr>
    <citation>
      <titlStmt>
        <titl>
          Manually Labelled Crash Reports from Ma3Route 2017-2018
        </titl>
        <altTitl>
          MLCRM 2017-2018
        </altTitl>
        <IDNo>
          KEN_2017-2018_MLCRM_v01_M
        </IDNo>
      </titlStmt>
      <rspStmt>
        <AuthEnty>
          World Bank
        </AuthEnty>
      </rspStmt>
      <prodStmt>
        <producer affiliation="Development Impact Evaluation Department, World Bank">
          Sveta Milusheva
        </producer>
        <producer affiliation="Development Impact Evaluation Department, World Bank">
          Robert Marty
        </producer>
        <producer affiliation="Development Impact Evaluation Department, World Bank">
          Guadalupe Bedoya
        </producer>
        <producer affiliation="School of Architecture and Planning, Massachusetts Institute of Technology">
          Sarah Williams
        </producer>
        <producer affiliation="School of Information, University of California, Berkeley">
          Elizabeth Resor
        </producer>
        <producer affiliation="Development Impact Evaluation Department, World Bank">
          Arianna Legovini
        </producer>
        <software version="4.0.9" date="2013-04-23">
          Nesstar Publisher
        </software>
      </prodStmt>
      <distStmt>
        <contact affiliation="World Bank" email="rmarty@worldbank.org">
          Robert Marty
        </contact>
      </distStmt>
      <serStmt URI="KEN_2012-2020_TIMLCRM_v01_M">
        <serInfo>
          An associated ''Tweet IDs From Ma3Route 2012-2020" dataset is available at: https://microdata.worldbank.org/index.php/catalog/3820
        </serInfo>
      </serStmt>
      <verStmt>
        <version>
          <![CDATA[- v2.1:  Edited, anonymous dataset for public distribution.]]>
        </version>
        <verResp affiliation=" "/>
      </verStmt>
    </citation>
    <stdyInfo>
      <abstract>
        The purpose of the Tweet IDs and Manually Labelled Crash Reports from Ma3Route 2017-2018 project is identify tweets from the @Ma3Route twitter handles that report road traffic crash reports. Using the Twitter API, tweets were scraped from Ma3Route, which is a mobile/web/SMS platform that crowdsources transport data and provides users with information on on road traffic crash reports as well as traffic, matatu directions, and driving reports.
      </abstract>
      <sumDscr>
        <collDate date="2017-07-01" event="start"/>
        <collDate date="2018-07-31" event="end"/>
        <nation abbr="KEN">
          Kenya
        </nation>
        <geogCover>
          Kenya (primarily Nairobi)
        </geogCover>
        <anlyUnit>
          Road traffic crash reports
        </anlyUnit>
        <universe>
          Tweets reporting road traffic crash reports, scraped from twitter handle @Ma3Route
        </universe>
        <dataKind>
          Observation data/ratings [obs]
        </dataKind>
      </sumDscr>
    </stdyInfo>
    <method>
      <dataColl>
        <collMode>
          Other [oth]
        </collMode>
        <sources/>
        <collSitu>
          Using the Twitter API, tweets were scraped from the twitter handle @Ma3Route. Ma3Route is a mobile/web/SMS platform that crowdsources transport data and provides users with information on traffic, road traffic crash (RTC), matatu directions and driving reports. Users post RTC or traffic information to Ma3Route, where Ma3Route then publishes the post on Twitter. Tweets were obtained in order to identify tweets that reported RTC. Tweets from May 2012 to July 2020 were scraped and a "truth dataset", of tweets manually coded to determine if they reported crash reports and the location of the reported crashes, was generated. Additional information on the data is provided in additional documents found under the 'Documentation' tab.
        </collSitu>
      </dataColl>
    </method>
    <dataAccs>
      <useStmt>
        <confDec required="yes">
          Users of the data agree to keep confidential all data contained in these datasets and to make no attempt to identify, trace or contact any individual whose data is included in these datasets.
        </confDec>
        <citReq>
          <![CDATA[Use of the dataset must be acknowledged using a citation which would include:
- the Identification of the Primary Investigator
- the title of the survey (including country, acronym and year of implementation)
- the survey reference number
- the source and date of download

Example,

World Bank Group. Kenya - Manually Labelled Crash Reports from Ma3Route (MLCRM) 2017-2018. Ref. KEN_2017-2018_MLCRM_v01_M. Dataset downloaded from [url] on [date].]]>
        </citReq>
        <conditions>
          Licensed access
        </conditions>
        <disclaimer>
          The user of the data acknowledges that the original collector of the data, the authorized distributor of the data, and the relevant funding agency bear no responsibility for use of the data or for interpretations or inferences based upon such uses.
        </disclaimer>
      </useStmt>
    </dataAccs>
  </stdyDscr>
  <fileDscr ID="F1" URI="KEN_2017-2018_MLCRM_v01_M.Nesstar?Index=0&amp;Name=twitter_truth">
    <fileTxt>
      <fileName>
        twitter_truth.NSDstat
      </fileName>
      <fileCont>
        Using the Twitter API, tweets were scrapped from the twitter handle @Ma3Route. Ma3Route is a mobile/web/SMS platform that crowdsources transport data and provides users with information on traffic, road traffic crash (RTC), matatu directions and driving reports. Users post RTC or traffic information to Ma3Route, where Ma3Route then publishes the post on Twitter. Tweets were obtained in order to identify tweets that reported RTC. This dataset includes a manually labelled dataset of a subset of tweets indicating which tweets report a crash and the location of crashes.
      </fileCont>
      <dimensns>
        <caseQnty>
          9479
        </caseQnty>
        <varQnty>
          9
        </varQnty>
      </dimensns>
      <fileType>
        Nesstar 200801
      </fileType>
    </fileTxt>
  </fileDscr>
  <dataDscr>
    <var ID="V1" name="uid" files="F1" dcml="0" intrvl="contin">
      <location StartPos="1" EndPos="4" width="4" RecSegNo="1"/>
      <labl>
        Unique ID
      </labl>
      <qstn>
        <qstnLit>
          Unique ID
        </qstnLit>
      </qstn>
      <valrng>
        <range UNITS="REAL" min="1" max="9479"/>
      </valrng>
      <sumStat type="vald">
        9479
      </sumStat>
      <sumStat type="invd">
        0
      </sumStat>
      <sumStat type="min">
        1
      </sumStat>
      <sumStat type="max">
        9479
      </sumStat>
      <varFormat type="numeric" schema="other"/>
    </var>
    <var ID="V2" name="tweet_id" files="F1" intrvl="discrete">
      <location StartPos="5" EndPos="23" width="19" RecSegNo="1"/>
      <labl>
        Tweet ID
      </labl>
      <qstn>
        <qstnLit>
          Tweet ID
        </qstnLit>
      </qstn>
      <sumStat type="vald">
        7724
      </sumStat>
      <sumStat type="invd">
        0
      </sumStat>
      <varFormat type="character" schema="other"/>
    </var>
    <var ID="V3" name="created_at" files="F1" dcml="0" intrvl="contin">
      <location StartPos="24" EndPos="36" width="13" RecSegNo="1"/>
      <labl>
        Time Date/Time (EAT)
      </labl>
      <qstn>
        <qstnLit>
          Time Date/Time (EAT)
        </qstnLit>
      </qstn>
      <valrng>
        <range UNITS="REAL" min="1814496118000" max="1848681624000"/>
      </valrng>
      <sumStat type="vald">
        9479
      </sumStat>
      <sumStat type="invd">
        0
      </sumStat>
      <sumStat type="min">
        1814496118000
      </sumStat>
      <sumStat type="max">
        1848681624000
      </sumStat>
      <varFormat type="numeric" schema="other"/>
    </var>
    <var ID="V4" name="crash_report" files="F1" dcml="0" intrvl="discrete">
      <location StartPos="37" EndPos="37" width="1" RecSegNo="1"/>
      <labl>
        Tweet reports crash
      </labl>
      <qstn>
        <qstnLit>
          Tweet reports crash
        </qstnLit>
      </qstn>
      <valrng>
        <range UNITS="REAL" min="0" max="1"/>
      </valrng>
      <sumStat type="vald">
        9479
      </sumStat>
      <sumStat type="invd">
        0
      </sumStat>
      <sumStat type="min">
        0
      </sumStat>
      <sumStat type="max">
        1
      </sumStat>
      <catgry>
        <catValu>
          0
        </catValu>
        <labl>
          No
        </labl>
        <catStat type="freq">
          2878
        </catStat>
      </catgry>
      <catgry>
        <catValu>
          1
        </catValu>
        <labl>
          Yes
        </labl>
        <catStat type="freq">
          6601
        </catStat>
      </catgry>
      <varFormat type="numeric" schema="other"/>
    </var>
    <var ID="V5" name="latitude" files="F1" dcml="0" intrvl="contin">
      <location StartPos="38" EndPos="46" width="9" RecSegNo="1"/>
      <labl>
        Latitude of crash
      </labl>
      <qstn>
        <qstnLit>
          Latitude of crash
        </qstnLit>
      </qstn>
      <valrng>
        <range UNITS="REAL" min="-4.059868" max="1.257331"/>
      </valrng>
      <sumStat type="vald">
        4193
      </sumStat>
      <sumStat type="invd">
        5286
      </sumStat>
      <sumStat type="min">
        -4.06
      </sumStat>
      <sumStat type="max">
        1.257
      </sumStat>
      <varFormat type="numeric" schema="other"/>
    </var>
    <var ID="V6" name="longitude" files="F1" dcml="0" intrvl="contin">
      <location StartPos="47" EndPos="55" width="9" RecSegNo="1"/>
      <labl>
        Longitude of crash
      </labl>
      <qstn>
        <qstnLit>
          Longitude of crash
        </qstnLit>
      </qstn>
      <valrng>
        <range UNITS="REAL" min="34.145808" max="40.171389"/>
      </valrng>
      <sumStat type="vald">
        4191
      </sumStat>
      <sumStat type="invd">
        5288
      </sumStat>
      <sumStat type="min">
        34.146
      </sumStat>
      <sumStat type="max">
        40.171
      </sumStat>
      <varFormat type="numeric" schema="other"/>
    </var>
    <var ID="V7" name="crash_id_c1" files="F1" dcml="0" intrvl="contin">
      <location StartPos="56" EndPos="60" width="5" RecSegNo="1"/>
      <labl>
        Crash ID (from coder 1)
      </labl>
      <qstn>
        <qstnLit>
          Crash ID (from coder 1)
        </qstnLit>
      </qstn>
      <valrng>
        <range UNITS="REAL" min="907" max="41898"/>
      </valrng>
      <sumStat type="vald">
        3796
      </sumStat>
      <sumStat type="invd">
        5683
      </sumStat>
      <sumStat type="min">
        907
      </sumStat>
      <sumStat type="max">
        41898
      </sumStat>
      <varFormat type="numeric" schema="other"/>
    </var>
    <var ID="V8" name="crash_id_c2" files="F1" dcml="0" intrvl="contin">
      <location StartPos="61" EndPos="65" width="5" RecSegNo="1"/>
      <labl>
        Crash ID (from coder 2)
      </labl>
      <qstn>
        <qstnLit>
          Crash ID (from coder 2)
        </qstnLit>
      </qstn>
      <valrng>
        <range UNITS="REAL" min="909" max="47227"/>
      </valrng>
      <sumStat type="vald">
        3674
      </sumStat>
      <sumStat type="invd">
        5805
      </sumStat>
      <sumStat type="min">
        909
      </sumStat>
      <sumStat type="max">
        47227
      </sumStat>
      <varFormat type="numeric" schema="other"/>
    </var>
    <var ID="V9" name="crash_landmark" files="F1" intrvl="discrete">
      <location StartPos="66" EndPos="136" width="71" RecSegNo="1"/>
      <labl>
        Landmark used to geocode
      </labl>
      <qstn>
        <qstnLit>
          Landmark used to geocode
        </qstnLit>
      </qstn>
      <sumStat type="vald">
        4210
      </sumStat>
      <sumStat type="invd">
        0
      </sumStat>
      <varFormat type="character" schema="other"/>
    </var>
  </dataDscr>
</codeBook>
