# Import modules
import numpy as np
import pandas as pd
import geopandas as gpd
import networkx as nx
import osmnx as ox
import re
from shapely.geometry import Point, LineString, Polygon
import matplotlib.pyplot as plt
from tqdm import tqdm
import multiprocessing as mp
import folium
import itertools
import os
import time
import warnings
import IPython
import requests
from IPython.display import display, clear_output

warnings.filterwarnings("ignore")
print('\n'.join(f'{m.__name__}=={m.__version__}' for m in globals().values() if getattr(m, '__version__', None)))

numpy==1.22.0
pandas==1.3.5
geopandas==0.10.2
networkx==2.6.3
osmnx==1.1.2
re==2.2.1
folium==0.12.1.post1
IPython==8.3.0
requests==2.27.1


# Check working directory
os.getcwd()

'/home/jovyan/work/RPr-Kang-2020/procedure/code'


# Use to set work directory properly
if os.path.basename(os.getcwd()) == 'code':
    os.chdir('../../')
os.getcwd()

'/home/jovyan/work/RPr-Kang-2020'


# Read in at risk population data
atrisk_data = gpd.read_file('./data/raw/public/PopData/Illinois_Tract.shp')
atrisk_data.head()


# Read in covid case data
covid_data = gpd.read_file('./data/raw/public/PopData/Chicago_ZIPCODE.shp')
covid_data['cases'] = covid_data['cases']
covid_data.head()


# Read in hospital data
hospitals = gpd.read_file('./data/raw/public/HospitalData/Chicago_Hospital_Info.shp')
hospitals.head()


# Plot hospital data
m = folium.Map(location=[41.85, -87.65], tiles='cartodbpositron', zoom_start=10)
for i in range(0, len(hospitals)):
    folium.CircleMarker(
      location=[hospitals.iloc[i]['Y'], hospitals.iloc[i]['X']],
      popup="{}{}\n{}{}\n{}{}".format('Hospital Name: ',hospitals.iloc[i]['Hospital'],
                                      'ICU Beds: ',hospitals.iloc[i]['Adult ICU'],
                                      'Ventilators: ', hospitals.iloc[i]['Total Vent']),
      radius=5,
      color='blue',
      fill=True,
      fill_opacity=0.6,
      legend_name = 'Hospitals'
    ).add_to(m)
legend_html =   '''<div style="position: fixed; width: 20%; heigh: auto;
                            bottom: 10px; left: 10px;
                            solid grey; z-index:9999; font-size:14px;
                            ">&nbsp; Legend<br>'''

m


# Read in and plot grid file for Chicago
grid_file = gpd.read_file('./data/raw/public/GridFile/Chicago_Grid.shp')
grid_file.plot(figsize=(8,8))

<AxesSubplot:>


%%time
# To create a new graph from OpenStreetMap, delete or rename data/raw/private/Chicago_Network_Buffer.graphml 
# (if it exists), and set OSM to True 
OSM = False
#can make OSM True

# if buffered street network is not saved, and OSM is preferred, # generate a new graph from OpenStreetMap and save it
if not os.path.exists("./data/raw/private/Chicago_Network_Buffer.graphml") and OSM:
    print("Loading buffered Chicago road network from OpenStreetMap. Please wait... runtime may exceed 9min...", flush=True)
    G = ox.graph_from_place('Chicago', network_type='drive', buffer_dist=24140.2) 
    print("Saving Chicago road network to raw/private/Chicago_Network_Buffer.graphml. Please wait...", flush=True)
    ox.save_graphml(G, './data/raw/private/Chicago_Network_Buffer.graphml')
    print("Data saved.")

# otherwise, if buffered street network is not saved, download graph from the OSF project
elif not os.path.exists("./data/raw/private/Chicago_Network_Buffer.graphml"):
    print("Downloading buffered Chicago road network from OSF...", flush=True)
    url = 'https://osf.io/download/z8ery/'
    r = requests.get(url, allow_redirects=True)
    print("Saving buffered Chicago road network to file...", flush=True)
    open('./data/raw/private/Chicago_Network_Buffer.graphml', 'wb').write(r.content)

# if the buffered street network is already saved, load it
if os.path.exists("./data/raw/private/Chicago_Network_Buffer.graphml"):
    print("Loading buffered Chicago road network from raw/private/Chicago_Network_Buffer.graphml. Please wait...", flush=True)
    G = ox.load_graphml('./data/raw/private/Chicago_Network_Buffer.graphml') 
    print("Data loaded.") 
else:
    print("Error: could not load the road network from file.")

Loading buffered Chicago road network from raw/private/Chicago_Network_Buffer.graphml. Please wait...
Data loaded.
CPU times: user 36.5 s, sys: 1.58 s, total: 38.1 s
Wall time: 38.2 s


%%time
ox.plot_graph(G, node_size = 1, bgcolor = 'white', node_color = 'black', edge_color = "#333333", node_alpha = 0.5, edge_linewidth = 0.5)

CPU times: user 46.5 s, sys: 331 ms, total: 46.8 s
Wall time: 46.6 s

(<Figure size 576x576 with 1 Axes>, <AxesSubplot:>)


%%time
# Turn nodes and edges into geodataframes
nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)

# Get unique counts of road segments for each speed limit
print(edges['maxspeed'].value_counts())
print(str(len(edges)) + " edges in graph")

# can we also visualize highways / roads with higher speed limits to check accuracy?
# the code above converts the graph into an edges geodataframe, which could theoretically be filtered
# by fast road segments and mapped, e.g. in folium

25 mph                        6016
30 mph                        4873
35 mph                        4803
20 mph                        3621
40 mph                        2842
45 mph                        2423
55 mph                         876
60 mph                         293
50 mph                         287
15 mph                         107
70 mph                          79
[40 mph, 45 mph]                54
10 mph                          44
[35 mph, 30 mph]                36
65 mph                          36
[40 mph, 35 mph]                36
[45 mph, 35 mph]                34
[45 mph, 55 mph]                29
45,30                           24
[50 mph, 45 mph]                19
25, east                        14
25                              14
[25 mph, 30 mph]                13
[40 mph, 30 mph]                11
[35 mph, 20 mph]                 6
[25 mph, 35 mph]                 6
[60 mph, 65 mph]                 5
20                               4
[60 mph, 70 mph]                 4
[25 mph, 20 mph]                 4
[55 mph, 65 mph]                 4
[40 mph, 45 mph, 35 mph]         3
[50 mph, 40 mph]                 3
[50 mph, 55 mph]                 3
[55 mph, 60 mph]                 3
5 mph                            2
[50 mph, 45 mph, 55 mph]         2
[25, east, 30 mph]               2
[40 mph, 55 mph, 35 mph]         2
[50 mph, 55 mph, 45, east]       2
[45 mph, 60 mph]                 2
[40 mph, 25 mph, 35 mph]         2
[5 mph, 35 mph]                  2
[40 mph, 15 mph, 30 mph]         2
[60 mph, 55 mph]                 2
[45 mph, 70 mph, 5 mph]          2
[15 mph, 25 mph]                 2
[15 mph, 30 mph]                 2
[45 mph, 30 mph]                 2
[45mph, 45 mph]                  1
[30 mph, 20 mph]                 1
[55 mph, 35 mph]                 1
[45 mph, 15 mph]                 1
[15 mph, 45 mph]                 1
[15 mph, 15]                     1
Name: maxspeed, dtype: int64
384974 edges in graph
CPU times: user 31.4 s, sys: 61.9 ms, total: 31.4 s
Wall time: 31.4 s


edges.head()


# # two things about this function:
# # 1) the work to remove nodes is hardly worth it now that OSMnx cleans graphs by default
# # the function is now only pruning < 300 nodes
# # 2) try using the OSMnx speed module for setting speeds, travel times
# # https://osmnx.readthedocs.io/en/stable/user-reference.html#module-osmnx.speed
# # just be careful about units of speed and time!
# # the remainder of this code expects 'time' to be measured in minutes

# def network_setting(network):
#     _nodes_removed = len([n for (n, deg) in network.out_degree() if deg ==0])
#     network.remove_nodes_from([n for (n, deg) in network.out_degree() if deg ==0])
#     for component in list(nx.strongly_connected_components(network)):
#         if len(component)<10:
#             for node in component:
#                 _nodes_removed+=1
#                 network.remove_node(node)
#     for u, v, k, data in tqdm(G.edges(data=True, keys=True),position=0):
#         if 'maxspeed' in data.keys():
#             speed_type = type(data['maxspeed'])
#             if (speed_type==str):
#                 # Add in try/except blocks to catch maxspeed formats that don't fit Kang et al's cases
#                 try:
#                     if len(data['maxspeed'].split(','))==2:
#                         data['maxspeed_fix']=float(data['maxspeed'].split(',')[0])                  
#                     elif data['maxspeed']=='signals':
#                         data['maxspeed_fix']=30.0 # drive speed setting as 35 miles
#                     else:
#                         data['maxspeed_fix']=float(data['maxspeed'].split()[0])
#                 except:
#                     data['maxspeed_fix']=30.0 #miles
#             else:
#                 try:
#                     data['maxspeed_fix']=float(data['maxspeed'][0].split()[0])
#                 except:
#                     data['maxspeed_fix']=30.0 #miles
#         else:
#             data['maxspeed_fix']=30.0 #miles
#         data['maxspeed_meters'] = data['maxspeed_fix']*26.8223 # convert mile per hour to meters per minute
#         data['time'] = float(data['length'])/ data['maxspeed_meters'] # meters / meters per minute = minutes
#     print("Removed {} nodes ({:2.4f}%) from the OSMNX network".format(_nodes_removed, _nodes_removed/float(network.number_of_nodes())))
#     print("Number of nodes: {}".format(network.number_of_nodes()))
#     print("Number of edges: {}".format(network.number_of_edges()))    
#     return(network)


def network_setting(network):
    _nodes_removed = len([n for (n, deg) in network.out_degree() if deg ==0])
    network.remove_nodes_from([n for (n, deg) in network.out_degree() if deg ==0])
    for component in list(nx.strongly_connected_components(network)):
        if len(component)<10:
            for node in component:
                _nodes_removed+=1
                network.remove_node(node)
    ox.speed.add_edge_speeds(network)
    ox.speed.add_edge_travel_times(network)
    print("Removed {} nodes ({:2.4f}%) from the OSMNX network".format(_nodes_removed, _nodes_removed/float(network.number_of_nodes())))
    print("Number of nodes: {}".format(network.number_of_nodes()))
    print("Number of edges: {}".format(network.number_of_edges()))
    return(network)
    
#ox.speed.add_edge_speeds(G)
#ox.speed.add_edge_speeds(G, agg=np.mean)


%%time
# G, hospitals, grid_file, pop_data = file_import (population_dropdown.value)
G = network_setting(G)
# Create point geometries for each node in the graph, to make constructing catchment area polygons easier
for node, data in G.nodes(data=True):
    data['geometry']=Point(data['x'], data['y'])
# Modify code to react to processor dropdown (got rid of file_import function)

Removed 315 nodes (0.0022%) from the OSMNX network
Number of nodes: 142777
Number of edges: 384591
CPU times: user 40.7 s, sys: 222 ms, total: 40.9 s
Wall time: 40.9 s


%%time
## Get unique counts for each road network
# Turn nodes and edges in geodataframes
nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)


# Check that osmnx added speeds and travel times to graph
# print(edges['speed_kph'].value_counts())
# print(str(len(edges)) + " edges in graph")
# print(edges['travel_time'].value_counts())

# # Count
# edges['speed_kph'] = edges['speed_kph']*0.621371
# G=ox.graph_from_gdfs(nodes, edges)
# print(edges['speed_kph'].value_counts())
# print(str(len(edges)) + " edges in graph")

37.6     285042
47.8      29182
56.9      27966
60.7       8898
40.2       6991
48.3       4873
56.3       4803
32.2       3618
64.4       2841
72.4       2423
41.5       2405
83.2       2243
88.5        867
53.4        460
96.6        290
80.5        287
89.0        257
29.5        234
67.7        158
46.7        145
24.1        107
112.7        70
68.0         54
16.1         44
64.0         39
60.0         38
52.0         36
80.0         31
25.0         28
104.6        25
45.3         24
76.0         19
44.0         19
56.0         11
36.0          8
48.0          8
84.0          5
72.0          5
92.0          5
100.0         5
20.0          4
32.0          4
104.0         4
96.0          3
53.0          2
71.0          2
69.0          2
8.0           2
45.0          2
40.0          1
19.0          1
Name: speed_kph, dtype: int64
384591 edges in graph
9.7      13538
9.6      11885
19.4      7470
19.3      6568
9.8       6516
         ...  
89.7         1
106.3        1
126.4        1
86.4         1
145.8        1
Name: travel_time, Length: 1214, dtype: int64
CPU times: user 30.8 s, sys: 79.5 ms, total: 30.9 s
Wall time: 30.9 s


def hospital_setting(hospitals, G):
    # Create an empty column 
    hospitals['nearest_osm']=None
    # Append the neaerest osm column with each hospitals neaerest osm node
    for i in tqdm(hospitals.index, desc="Find the nearest network node from hospitals", position=0):
        hospitals['nearest_osm'][i] = ox.get_nearest_node(G, [hospitals['Y'][i], hospitals['X'][i]], method='euclidean') # find the nearest node from hospital location
    print ('hospital setting is done')
    return(hospitals)


def pop_centroid (pop_data, pop_type):
    pop_data = pop_data.to_crs({'init': 'epsg:4326'})
    # If pop is selected in dropdown, select at risk pop where population is greater than 0
    if pop_type =="pop":
        pop_data=pop_data[pop_data['OverFifty']>=0]
    # If covid is selected in dropdown, select where covid cases are greater than 0
    if pop_type =="covid":
        pop_data=pop_data[pop_data['cases']>=0]
    pop_cent = pop_data.centroid # it make the polygon to the point without any other information
    # Convert to gdf
    pop_centroid = gpd.GeoDataFrame()
    i = 0
    for point in tqdm(pop_cent, desc='Pop Centroid File Setting', position=0):
        if pop_type== "pop":
            pop = pop_data.iloc[i]['OverFifty']
            code = pop_data.iloc[i]['GEOID']
        if pop_type =="covid":
            pop = pop_data.iloc[i]['cases']
            code = pop_data.iloc[i].ZCTA5CE10
        pop_centroid = pop_centroid.append({'code':code,'pop': pop,'geometry': point}, ignore_index=True)
        i = i+1
    return(pop_centroid)


def dijkstra_cca_polygons(G, nearest_osm, distances, distance_unit = "travel_time"):
    
    '''
    
    Before running: must assign point geometries to street nodes
    
    # create point geometries for the entire graph
    for node, data in G.nodes(data=True):
    data['geometry']=Point(data['x'], data['y'])
    
    '''
    
    ## CREATE DICTIONARIES
    # create dictionary of nearest nodes
    nearest_nodes_30 = nx.single_source_dijkstra_path_length(G, nearest_osm, distances[2], distance_unit) # creating the largest graph from which 10 and 20 minute drive times can be extracted from
    
    # extract values within 20 and 10 (respectively) minutes drive times
    nearest_nodes_20 = dict()
    nearest_nodes_10 = dict()
    for key, value in nearest_nodes_30.items():
        if value <= distances[1]:
            nearest_nodes_20[key] = value
        if value <= distances[0]:
            nearest_nodes_10[key] = value
    
    ## CREATE POLYGONS FOR 3 DISTANCE CATEGORIES (10 min, 20 min, 30 min)
    # 30 MIN
    # If the graph already has a geometry attribute with point data,
    # this line will create a GeoPandas GeoDataFrame from the nearest_nodes_30 dictionary
    points_30 = gpd.GeoDataFrame(gpd.GeoSeries(nx.get_node_attributes(G.subgraph(nearest_nodes_30), 'geometry')))

    # This line converts the nearest_nodes_30 dictionary into a Pandas data frame and joins it to points
    # left_index=True and right_index=True are options for merge() to join on the index values
    points_30 = points_30.merge(pd.Series(nearest_nodes_30).to_frame(), left_index=True, right_index=True)

    # Re-name the columns and set the geodataframe geometry to the geometry column
    points_30 = points_30.rename(columns={'0_x':'geometry','0_y':'z'}).set_geometry('geometry')

    # Create a convex hull polygon from the points
    polygon_30 = gpd.GeoDataFrame(gpd.GeoSeries(points_30.unary_union.convex_hull))
    polygon_30 = polygon_30.rename(columns={0:'geometry'}).set_geometry('geometry')
    
    # 20 MIN
    # Select nodes less than or equal to 20
    points_20 = points_30.query("z <= 1200")
    
    # Create a convex hull polygon from the points
    polygon_20 = gpd.GeoDataFrame(gpd.GeoSeries(points_20.unary_union.convex_hull))
    polygon_20 = polygon_20.rename(columns={0:'geometry'}).set_geometry('geometry')
    
    # 10 MIN
    # Select nodes less than or equal to 10
    points_10 = points_30.query("z <= 600")
    
    # Create a convex hull polygon from the points
    polygon_10 = gpd.GeoDataFrame(gpd.GeoSeries(points_10.unary_union.convex_hull))
    polygon_10 = polygon_10.rename(columns={0:'geometry'}).set_geometry('geometry')
    
    # Create empty list and append polygons
    polygons = []
    
    # Append
    polygons.append(polygon_10)
    polygons.append(polygon_20)
    polygons.append(polygon_30)
    
    # Clip the overlapping distance ploygons (create two donuts + hole)
    for i in reversed(range(1, len(distances))):
        polygons[i] = gpd.overlay(polygons[i], polygons[i-1], how="difference")

    return polygons


def hospital_measure_acc (_thread_id, hospital, pop_data, distances, weights):
    # Create polygons
    polygons = dijkstra_cca_polygons(G, hospital['nearest_osm'], distances)
    
    # Calculate accessibility measurements
    num_pops = []
    for j in pop_data.index:
        point = pop_data['geometry'][j]
        # Multiply polygons by weights
        for k in range(len(polygons)):
            if len(polygons[k]) > 0: # To exclude the weirdo (convex hull is not polygon)
                if (point.within(polygons[k].iloc[0]["geometry"])):
                    num_pops.append(pop_data['pop'][j]*weights[k])  
    total_pop = sum(num_pops)
    for i in range(len(distances)):
        polygons[i]['time']=distances[i]
        polygons[i]['total_pop']=total_pop
        polygons[i]['hospital_icu_beds'] = float(hospital['Adult ICU'])/polygons[i]['total_pop'] # proportion of # of beds over pops in 10 mins
        polygons[i]['hospital_vents'] = float(hospital['Total Vent'])/polygons[i]['total_pop'] # proportion of # of beds over pops in 10 mins
        polygons[i].crs = { 'init' : 'epsg:4326'}
        polygons[i] = polygons[i].to_crs({'init':'epsg:32616'})
    print('{:.0f}'.format(_thread_id), end=" ", flush=True)
    return(_thread_id, [ polygon.copy(deep=True) for polygon in polygons ])


def hospital_acc_unpacker(args):
    return hospital_measure_acc(*args)

# WHERE THE RESULTS ARE POOLED AND THEN REAGGREGATED
def measure_acc_par (hospitals, pop_data, network, distances, weights, num_proc = 4):
    catchments = []
    for distance in distances:
        catchments.append(gpd.GeoDataFrame())
    pool = mp.Pool(processes = num_proc)
    hospital_list = [ hospitals.iloc[i] for i in range(len(hospitals)) ]
    print("Calculating", len(hospital_list), "hospital catchments...\ncompleted number:", end=" ")
    results = pool.map(hospital_acc_unpacker, zip(range(len(hospital_list)), hospital_list, itertools.repeat(pop_data), itertools.repeat(distances), itertools.repeat(weights)))
    pool.close()
    results.sort()
    results = [ r[1] for r in results ]
    for i in range(len(results)):
        for j in range(len(distances)):
            catchments[j] = catchments[j].append(results[i][j], sort=False)
    return catchments


from collections import Counter
def overlap_calc(_id, poly, grid_file, weight, service_type):
    value_dict = Counter()
    if type(poly.iloc[0][service_type])!=type(None):           
        value = float(poly[service_type])*weight
        intersect = gpd.overlay(grid_file, poly, how='intersection')
        intersect['overlapped']= intersect.area
        intersect['percent'] = intersect['overlapped']/intersect['area']
        intersect=intersect[intersect['percent']>=0.5]
        intersect_region = intersect['id']
        for intersect_id in intersect_region:
            try:
                value_dict[intersect_id] +=value
            except:
                value_dict[intersect_id] = value
    return(_id, value_dict)

def overlap_calc_unpacker(args):
    return overlap_calc(*args)


# from collections import Counter
# def overlap_calc(_id, poly, grid_file, weight, service_type):
#     #writing function for overlap_calc
#     value_dict = Counter()
#     if type(poly.iloc[0][service_type])!=type(None):
#         #identify service areas (with ICU beds or ventilators)
#         value = float(poly[service_type])*weight
#         #weight service areas by catchment area (for a given catchment area eg. 1, 0.68, etc)
#         intersect = gpd.overlay(grid_file, poly, how='intersection')
#         #doing overlay between catchment areas and the hexagons (grid file)
#         intersect['overlapped']= intersect.area
#         #calcuate area of fragments where catchment areas and hexagons intersect
#         intersect['percent'] = intersect['overlapped']/intersect['area']
#         #calculate percentage of how much catchment area is within a hexagon
#        # intersect=intersect[intersect['percent']>=0.5]
#         # finding which hexgons have catchment areas which contribute less than 50%
#         value = [intersect['percent']]*value
#         intersect_region = intersect['id']
#         for intersect_id in intersect_region:
#             try:
#                 value_dict[intersect_id] +=value
#             except:
#                 value_dict[intersect_id] = value
#     return(_id, value_dict)

# def overlap_calc_unpacker(args):
#     return overlap_calc(*args)


def overlapping_function (grid_file, catchments, service_type, weights, num_proc = 4):
    grid_file[service_type]=0
    pool = mp.Pool(processes = num_proc)
    acc_list = []
    for i in range(len(catchments)):
        acc_list.extend([ catchments[i][j:j+1] for j in range(len(catchments[i])) ])
    acc_weights = []
    for i in range(len(catchments)):
        acc_weights.extend( [weights[i]]*len(catchments[i]) )
    results = pool.map(overlap_calc_unpacker, zip(range(len(acc_list)), acc_list, itertools.repeat(grid_file), acc_weights, itertools.repeat(service_type)))
    pool.close()
    results.sort()
    results = [ r[1] for r in results ]
    service_values = results[0]
    for result in results[1:]:
        service_values+=result
    for intersect_id, value in service_values.items():
        grid_file.loc[grid_file['id']==intersect_id, service_type] += value
    return(grid_file)


def normalization (result, res):
    result[res]=(result[res]-min(result[res]))/(max(result[res])-min(result[res]))
    return result


def output_map(output_grid, base_map, hospitals, resource):
    ax=output_grid.plot(column=resource, cmap='PuBuGn',figsize=(18,12), legend=True, zorder=1)
    # Next two lines set bounds for our x- and y-axes because it looks like there's a weird 
    # Point at the bottom left of the map that's messing up our frame (Maja)
    ax.set_xlim([314000, 370000])
    ax.set_ylim([540000, 616000])
    base_map.plot(ax=ax, facecolor="none", edgecolor='gray', lw=0.1)
    hospitals.plot(ax=ax, markersize=10, zorder=1, c='blue')


import ipywidgets
from IPython.display import display

processor_dropdown = ipywidgets.Dropdown( options=[("1", 1), ("2", 2), ("3", 3), ("4", 4)],
    value = 4, description = "Processor: ")

population_dropdown = ipywidgets.Dropdown( options=[("Population at Risk", "pop"), ("COVID-19 Patients", "covid") ],
    value = "pop", description = "Population: ")

resource_dropdown = ipywidgets.Dropdown( options=[("ICU Beds", "hospital_icu_beds"), ("Ventilators", "hospital_vents") ],
    value = "hospital_icu_beds", description = "Resource: ")

hospital_dropdown =  ipywidgets.Dropdown( options=[("All hospitals", "hospitals"), ("Subset", "hospital_subset") ],
    value = "hospitals", description = "Hospital:")

display(processor_dropdown,population_dropdown,resource_dropdown,hospital_dropdown)

Dropdown(description='Processor: ', index=3, options=(('1', 1), ('2', 2), ('3', 3), ('4', 4)), value=4)

Dropdown(description='Population: ', options=(('Population at Risk', 'pop'), ('COVID-19 Patients', 'covid')), …

Dropdown(description='Resource: ', options=(('ICU Beds', 'hospital_icu_beds'), ('Ventilators', 'hospital_vents…

Dropdown(description='Hospital:', options=(('All hospitals', 'hospitals'), ('Subset', 'hospital_subset')), val…


if population_dropdown.value == "pop":
    pop_data = pop_centroid(atrisk_data, population_dropdown.value)
elif population_dropdown.value == "covid":
    pop_data = pop_centroid(covid_data, population_dropdown.value)
distances=[600,1200,1800] # Distances in travel time
#distances=[10,20,30] # Distances in travel time
weights=[1.0, 0.68, 0.22] # Weights where weights[0] is applied to distances[0]
# Other weighting options representing different distance decays
# weights1, weights2, weights3 = [1.0, 0.42, 0.09], [1.0, 0.75, 0.5], [1.0, 0.5, 0.1]
# it is surprising how long this function takes just to calculate centroids.
# why not do it with the geopandas/pandas functions rather than iterating through every item?

Pop Centroid File Setting: 100%|██████████| 3121/3121 [03:25<00:00, 15.18it/s]


# Set hospitals according to hospital dropdown
if hospital_dropdown.value == "hospital_subset":
    hospitals = hospital_setting(hospitals[:1], G)
else: 
    hospitals = hospital_setting(hospitals, G)
resources = ["hospital_icu_beds", "hospital_vents"] # resources
# this is also slower than it needs to be; if network nodes and hospitals are both
# geopandas data frames, it should be possible to do a much faster spatial join rather than iterating through every hospital

Find the nearest network node from hospitals: 100%|██████████| 66/66 [01:17<00:00,  1.17s/it]

hospital setting is done


# Create point geometries for entire graph
# what is the pupose of the following two lines? Can this be deleted?
# for node, data in G.nodes(data=True):
#     data['geometry']=Point(data['x'], data['y'])

# which hospital to visualize? 
fighosp = 7

# Create catchment for hospital 0
poly = dijkstra_cca_polygons(G, hospitals['nearest_osm'][fighosp], distances)

# Reproject polygons
for i in range(len(poly)):
    poly[i].crs = { 'init' : 'epsg:4326'}
    poly[i] = poly[i].to_crs({'init':'epsg:32616'})

# Reproject hospitals 
# Possible to map from the hospitals data rather than creating hospital_subset?
hospital_subset = hospitals.iloc[[fighosp]].to_crs(epsg=32616)

fig, ax = plt.subplots(figsize=(12,8))

min_10 = poly[0].plot(ax=ax, color="royalblue", label="10 min drive")
min_20 = poly[1].plot(ax=ax, color="cornflowerblue", label="20 min drive")
min_30 = poly[2].plot(ax=ax, color="lightsteelblue", label="30 min drive")

hospital_subset.plot(ax=ax, color="red", legend=True, label = "hospital")

# Add legend
ax.legend()

<matplotlib.legend.Legend at 0x7f810fdbe130>


poly

[                                            geometry
 0  POLYGON ((441787.793 4610504.026, 433342.680 4...,
                                             geometry
 0  POLYGON ((433849.512 4600241.594, 427684.513 4...,
                                             geometry
 0  POLYGON ((451766.066 4587286.033, 438932.445 4...]


%%time
catchments = measure_acc_par(hospitals, pop_data, G, distances, weights, num_proc=processor_dropdown.value)

Calculating 66 hospital catchments...
completed number: 5 15 0 10 6 1 16 11 2 7 12 17 3 8 18 13 4 9 19 14 20 25 30 35 21 26 31 36 22 27 37 32 28 23 33 38 29 24 34 39 40 45 55 50 41 46 56 51 42 47 57 52 43 48 58 53 44 49 59 54 60 65 61 62 63 64 CPU times: user 2.15 s, sys: 408 ms, total: 2.56 s
Wall time: 1min 48s


%%time
for j in range(len(catchments)):
    catchments[j] = catchments[j][catchments[j][resource_dropdown.value]!=float('inf')]
result=overlapping_function(grid_file, catchments, resource_dropdown.value, weights, num_proc=processor_dropdown.value)

CPU times: user 5.2 s, sys: 365 ms, total: 5.56 s
Wall time: 17 s


# add weight field to each catchment polygon
for i in range(len(weights)):
    catchments[i]['weight'] = weights[i]
# combine the three sets of catchment polygons into one geodataframe
geocatchments = pd.concat([catchments[0], catchments[1], catchments[2]])
geocatchments


%%time
# set weighted to False for original 50% threshold method
# switch to True for area-weighted overlay
weighted = True

# if the value to be calculated is already in the hegaxon grid, delete it
# otherwise, the field name gets a suffix _1 in the overlay step
if resource_dropdown.value in list(grid_file.columns.values):
    grid_file = grid_file.drop(resource_dropdown.value, axis = 1)
    
# calculate hexagon 'target' areas
grid_file['area'] = grid_file.area
    
# Intersection overlay of hospital catchments and hexagon grid
print("Intersecting hospital catchments with hexagon grid...")
fragments = gpd.overlay(grid_file, geocatchments, how='intersection')

# Calculate percent coverage of the hexagon by the hospital catchment as
# fragment area / target(hexagon) area
fragments['percent'] = fragments.area / fragments['area']

# if using weighted aggregation... 
if weighted:
    print("Calculating area-weighted value...")
    # multiply the service/population ratio by the distance weight and the percent coverage
    fragments['value'] = fragments[resource_dropdown.value] * fragments['weight'] * fragments['percent']

# if using the 50% coverage rule for unweighted aggregation...
else:
    print("Calculating value for hexagons with >=50% overlap...")
    # filter for only the fragments with > 50% coverage by hospital catchment
    fragments = fragments[fragments['percent']>=0.5]
    # multiply the service/population ration by the distance weight
    fragments['value'] = fragments[resource_dropdown.value] * fragments['weight']

# select just the hexagon id and value from the fragments,
# group the fragments by the (hexagon) id,
# and sum the values
print("Summarizing results by hexagon id...")
sum_results = fragments[['id', 'value']].groupby(by = ['id']).sum()

# join the results to the hexagon grid_file based on hexagon id
print("Joining results to hexagons...")
result_new = pd.merge(grid_file, sum_results, how="left", on = "id")

# rename value column name to the resource name
result_new.rename(columns = {'value' : resource_dropdown.value})

Intersecting hospital catchments with hexagon grid...
Calculating area-weighted value...
Summarizing results by hexagon id...
Joining results to hexagons...
CPU times: user 12.2 s, sys: 61.4 ms, total: 12.3 s
Wall time: 12.3 s


%%time
result = normalization (result, resource_dropdown.value)

CPU times: user 2.24 ms, sys: 3 µs, total: 2.25 ms
Wall time: 2.14 ms


result.head()


%%time
hospitals = hospitals.to_crs({'init': 'epsg:26971'})
result = result.to_crs({'init': 'epsg:26971'})
output_map(result, pop_data, hospitals, resource_dropdown.value)

CPU times: user 1.33 s, sys: 157 ms, total: 1.49 s
Wall time: 1.29 s


def output_map_classified(output_grid, hospitals, resource):
    ax=output_grid.plot(column=resource, 
                        scheme='Equal_Interval', 
                        k=5, 
                        linewidth=0,
                        cmap='Blues', 
                        figsize=(18,12), 
                        legend=True, 
                        label="Acc Measure",
                        zorder=1)
    # Next two lines set bounds for our x- and y-axes because it looks like there's a weird 
    # Point at the bottom left of the map that's messing up our frame (Maja)
    ax.set_xlim([325000, 370000])
    ax.set_ylim([550000, 600000])
    hospitals.plot(ax=ax, 
                   markersize=10, 
                   zorder=2,
                   c='black',
                   legend=False,
                   )


output_map_classified(result, hospitals, resource_dropdown.value)
# save as image with file name including the resource value, population value, and buffered / not buffered
plt.savefig('./results/figures/reproduction/{}_{}_buff_classified_spdLimit.png'.format(population_dropdown.value, resource_dropdown.value, resource_dropdown.value))

	GEOID	STATEFP	COUNTYFP	TRACTCE	NAMELSAD	Pop	Unnamed_ 0	NAME	OverFifty	TotalPop	geometry
0	17091011700	17	091	011700	Census Tract 117	3688	588	Census Tract 117, Kankakee County, Illinois	1135	3688	POLYGON ((-87.88768 41.13594, -87.88764 41.136...
1	17091011800	17	091	011800	Census Tract 118	2623	220	Census Tract 118, Kankakee County, Illinois	950	2623	POLYGON ((-87.89410 41.14388, -87.89400 41.143...
2	17119400951	17	119	400951	Census Tract 4009.51	5005	2285	Census Tract 4009.51, Madison County, Illinois	2481	5005	POLYGON ((-90.11192 38.70281, -90.11128 38.703...
3	17119400952	17	119	400952	Census Tract 4009.52	3014	2299	Census Tract 4009.52, Madison County, Illinois	1221	3014	POLYGON ((-90.09442 38.72031, -90.09360 38.720...
4	17135957500	17	135	957500	Census Tract 9575	2869	1026	Census Tract 9575, Montgomery County, Illinois	1171	2869	POLYGON ((-89.70369 39.34803, -89.69928 39.348...

	ZCTA5CE10	County	State	Join	ZONE	ZONENAME	FIPS	pop	cases	geometry
0	60660	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	43242	78	POLYGON ((-87.65049 41.99735, -87.65029 41.996...
1	60640	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	69715	117	POLYGON ((-87.64645 41.97965, -87.64565 41.978...
2	60614	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	71308	134	MULTIPOLYGON (((-87.67703 41.91845, -87.67705 ...
3	60712	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	12539	42	MULTIPOLYGON (((-87.76181 42.00465, -87.76156 ...
4	60076	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	31867	114	MULTIPOLYGON (((-87.74782 42.01540, -87.74526 ...

	FID	Hospital	City	ZIP_Code	X	Y	Total_Bed	Adult ICU	Total Vent	geometry
0	2	Methodist Hospital of Chicago	Chicago	60640	-87.671079	41.972800	145	36	12	MULTIPOINT (-87.67108 41.97280)
1	4	Advocate Christ Medical Center	Oak Lawn	60453	-87.732483	41.720281	785	196	64	MULTIPOINT (-87.73248 41.72028)
2	13	Evanston Hospital	Evanston	60201	-87.683288	42.065393	354	89	29	MULTIPOINT (-87.68329 42.06539)
3	24	AMITA Health Adventist Medical Center Hinsdale	Hinsdale	60521	-87.920116	41.805613	261	65	21	MULTIPOINT (-87.92012 41.80561)
4	25	Holy Cross Hospital	Chicago	60629	-87.690841	41.770001	264	66	21	MULTIPOINT (-87.69084 41.77000)

			osmid	oneway	lanes	ref	name	highway	maxspeed	length	geometry	bridge	tunnel	access	junction	width	area	service
u	v	key
738776	768967302	0	[61699092, 918557247]	True	[5, 4]	I 294	Tri-State Tollway	motorway	55 mph	467.708	LINESTRING (-87.68109 41.58525, -87.68096 41.5...	NaN	NaN	NaN	NaN	NaN	NaN	NaN
738920	348225363	0	[61431949, 31298719]	True	5	I 80;I 94	Kingery Expressway	motorway	55 mph	1220.747	LINESTRING (-87.56225 41.57764, -87.55790 41.5...	yes	NaN	NaN	NaN	NaN	NaN	NaN
739113	1875082688	0	60862616	True	2	NaN	NaN	motorway_link	NaN	549.609	LINESTRING (-87.34349 41.56738, -87.34277 41.5...	NaN	NaN	NaN	NaN	NaN	NaN	NaN
739113	739130	0	292493273	True	4	I 80;I 94;US 6	Borman Expressway	motorway	55 mph	1191.046	LINESTRING (-87.34349 41.56738, -87.34104 41.5...	NaN	NaN	NaN	NaN	NaN	NaN	NaN
739117	739113	0	292493271	True	5	I 80;I 94;US 6	Borman Expressway	motorway	55 mph	381.798	LINESTRING (-87.34806 41.56768, -87.34689 41.5...	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	geometry	time	total_pop	hospital_icu_beds	hospital_vents	weight
0	POLYGON ((448183.185 4637565.186, 445181.084 4...	600	774246.84	0.000046	0.000015	1.00
0	POLYGON ((438560.568 4609646.631, 432067.664 4...	600	719649.38	0.000272	0.000089	1.00
0	POLYGON ((444065.785 4649104.166, 442871.481 4...	600	455860.76	0.000195	0.000064	1.00
0	POLYGON ((421468.232 4621045.724, 421031.920 4...	600	718206.62	0.000091	0.000029	1.00
0	POLYGON ((443313.505 4615987.097, 440030.033 4...	600	694887.00	0.000095	0.000030	1.00
...	...	...	...	...	...	...
0	POLYGON ((443223.331 4604956.986, 440431.675 4...	1800	999144.68	0.000027	0.000009	0.22
0	MULTIPOLYGON (((420251.781 4675101.028, 428590...	1800	758913.48	0.000059	0.000018	0.22
0	POLYGON ((415910.447 4618609.875, 409824.239 4...	1800	963567.82	0.000087	0.000028	0.22
0	POLYGON ((444519.744 4602914.274, 438784.832 4...	1800	930027.68	0.000066	0.000022	0.22
0	POLYGON ((416033.726 4607281.060, 413086.072 4...	1800	785422.20	0.000120	0.000038	0.22

Title: Reproduction of Spatial Accessibility of COVID-19 Healthcare Resources in Illinois¶

Original Study Design¶

Original Data¶

Modules¶

Check Directories¶

Load and Visualize Data¶

Population and COVID-19 Cases Data by County¶

Load Hospital Data¶

Generate and Plot Map of Hospitals¶

Load and Plot Hexagon Grids (500-meter resolution)¶

Load the Road Network¶

Plot the Road Network¶

Check speed limit values¶

network_setting function¶

Preprocess the Network using network_setting¶

Re-check speed limit values¶

"Helper" Functions¶

hospital_setting¶

pop_centroid¶

djikstra_cca_polygons¶

hospital_measure_acc (adjusted to incorporate dijkstra_cca_polygons)¶

measure_acc_par¶

overlap_calc¶

overlapping_function¶

normalization¶

file_import¶

Run the model¶

Process population data¶

Process hospital data¶

Visualize catchment areas for first hospital¶

Calculate hospital catchment areas¶

Calculate accessibility¶

Results & Discussion¶

Accessibility Map¶

Conclusion¶

References¶

	left	top	right	bottom	id	area	geometry	hospital_icu_beds
0	440843.416087	4.638515e+06	441420.766356	4.638015e+06	4158	216506.350946	POLYGON ((440843.416 4638265.403, 440987.754 4...	0.003569
1	440843.416087	4.638015e+06	441420.766356	4.637515e+06	4159	216506.350946	POLYGON ((440843.416 4637765.403, 440987.754 4...	0.003607
2	440843.416087	4.639515e+06	441420.766356	4.639015e+06	4156	216506.350946	POLYGON ((440843.416 4639265.403, 440987.754 4...	0.003651
3	440843.416087	4.639015e+06	441420.766356	4.638515e+06	4157	216506.350946	POLYGON ((440843.416 4638765.403, 440987.754 4...	0.003593
4	440843.416087	4.640515e+06	441420.766356	4.640015e+06	4154	216506.350946	POLYGON ((440843.416 4640265.403, 440987.754 4...	0.003608
...	...	...	...	...	...	...	...	...
3274	440843.416087	4.643015e+06	441420.766356	4.642515e+06	4149	216506.350946	POLYGON ((440843.416 4642765.403, 440987.754 4...	0.003535
3275	440843.416087	4.644515e+06	441420.766356	4.644015e+06	4146	216506.350946	POLYGON ((440843.416 4644265.403, 440987.754 4...	0.003515
3276	440843.416087	4.644015e+06	441420.766356	4.643515e+06	4147	216506.350946	POLYGON ((440843.416 4643765.403, 440987.754 4...	0.003525
3277	440843.416087	4.645515e+06	441420.766356	4.645015e+06	4144	216506.350946	POLYGON ((440843.416 4645265.403, 440987.754 4...	0.003403
3278	440843.416087	4.645015e+06	441420.766356	4.644515e+06	4145	216506.350946	POLYGON ((440843.416 4644765.403, 440987.754 4...	0.003472