DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

Make Dictionary Of US Cities

12.29.2009
| 4457 views |
  • submit to reddit
        
"""
Reads the data from http://www.census.gov/geo/www/gazetteer/places2k.html and finds the cities by state.

It produces a dictionary with the two letter state codes as keys. For each key there is
a list of dictionaries each with info on a city.
"""

def process(sline):
    d={'state': sline[0:2],\
       'city':sline[9:73].strip(),\
       'population':int(sline[73:82]),\
       'housing':int(sline[82:91]),\
       'land_area':float(sline[119:131]),\
       'water_area':float(sline[131:143]),\
       'latitude':float(sline[143:153]),\
       'longitude':float(sline[153:164]),\
       'city_type':''}
    for t in ['(balance)','city','CDP','town','borough','village','comunidad','municipality','zona urbana']:
        idx = d['city'].rfind(t)
        if (idx>-1):
            d['city']=d['city'][0:idx-1]
            d['city_type']=t
            break
    return d


print "Reading data..."
import urllib
import codecs
text_file=codecs.getreader('latin_1')( urllib.urlopen('http://www.census.gov/tiger/tms/gazetteer/places2k.txt'))
lines=text_file.readlines()

citiesByState = {}
for city_line in lines:
    if len(city_line) > 0 :
        thisCity = process(city_line)
        citiesByState.setdefault(thisCity['state'],[]).append(thisCity)
print "US census data has been read"

# Pickle it
import pickle
fp=open('us_cities.pickle','wb')
pickle.dump(citiesByState,fp)
fp.close()