import altair as alt
import geopandas as gpd
import pandas as pd
import numpy as np
import hvplot.pandas
import pandas as pd
from matplotlib import pyplot as plt
import holoviews as hv
from shapely.geometry import Polygon
from shapely.geometry import MultiPolygon
import requests
import geoviews as gv
import geoviews.tile_sources as gvts
import folium
from folium import plugins
from shapely.geometry import Point
import xyzservices
import osmnx as ox
import networkx as nx
import pygris
import cenpy
%matplotlib inline
pd.options.display.max_rows = 9999
pd.options.display.max_colwidth = 200
np.seterr(invalid="ignore");Analyzing Parking Spaces in LA
Yutong Jiang, Ray Ma
Final Project, MUSA 550
Introduction
Introduction Being the second-largest city in the United States, Los Angeles is known for its heavy vehicle reliance, huge population density, and commuting culture. Downtown LA, where most commuters head to every single morning for work, has been a classic case study for urban planning for its complex outdated urban layout as well as high concentration of crimes. As thousands of commuters, residents, and visitors travel every day in this sophisticated neighborhood, street parking plays a crucial role in keeping the area’s ground transportation running. Therefore, being able to identify areas that are safe and accessible is essential for anyone who, for any reason, has to park in downtown LA. This study aims to assist drivers with a need for parking in streets of downtown LA by suggesting the drivers with the most optimal parking areas using the “Recommended Parking Score”. This score takes into consideration not only the safety of the streets, but also the price of available street parking meters. To achieve this, we need to take a comprehensive look at the neighborhood’s demographics, the trend of car-related crimes, as well as other the price distribution of parking meters in the area. In the very end, we hope to draw insights that might also help planners and policymakers fixing some of these problems we identified in our study.
File setup and data collection
The first step of this analysis comprises the essential tasks of loading necessary packages, configuring different APIs for data collection, and managing global environment settings.
Data Wrangling
This step involves gathering data on parking for 2023 and preliminary data cleaning for the large dataset. All geospatial datasets are set to a uniform coordinate reference system, and boundary shapefiles are primed for use in OSM street network API.
meters = pd.read_csv('C:\\Users\\raymo\\Documents\\Fall 2024\\MUSA5500 Python\\Final\\data\\LADOT_Metered_Parking_Inventory___Policies_20241222.csv')import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
np.seterr(invalid="ignore")
meters = pd.read_csv('C:\\Users\\raymo\\Documents\\Fall 2024\\MUSA5500 Python\\Final\\data\\LADOT_Metered_Parking_Inventory___Policies_20241222.csv')
meters['LatLng'] = meters['LatLng'].str.strip("()")
meters[['LATITUDE', 'LONGITUDE']] = meters['LatLng'].str.split(', ', expand=True).astype(float)
geometry = [Point(xy) for xy in zip(meters['LONGITUDE'], meters['LATITUDE'])]
meters = gpd.GeoDataFrame(meters, geometry=geometry)
meters.crs = 'EPSG:4326'
meters = meters.to_crs('EPSG:3857')Parking meters in LA
Here is an overview of all parking meters in the city of Los Angeles and its adjacent cities. We observe the concentration of parking meters in central Los Angeles is significantly higher than its surrounding neighborhood. As shown by the map, most of the parking meters are located in downtown LA, followed by Fair Fax then Korean town where the demand of street parking is high. However, this map alone dose not provide enough information for us to draw useful conclusions in an urbanized area with such complexity as the distribution of parking meters might be determined by population, profitability, parking demand, planning, and policy making. Therefore, we need to look at the problem using additional lenses.
import folium
from folium.plugins import FastMarkerCluster
import xyzservices.providers
coords = meters[["LATITUDE", "LONGITUDE"]].values.tolist()
m = folium.Map(
location=[34.05, -118.25],
zoom_start=12,
tiles=xyzservices.providers.CartoDB.Positron
)
FastMarkerCluster(data=coords).add_to(m)
mOpen Street Map Data
To streamline the workflow with this large dataset, relevant OSM data is refined by excluding highways, where parking is not allowed. This ensures the dataset focuses solely on accessible areas with available parking spaces. A new graph is created and plotted to reflect only the non-highway streets.
import osmnx as ox
city_name = 'Los Angeles, California, USA'
G = ox.graph.graph_from_place(city_name, network_type='drive', simplify=True, retain_all=False)
ox.plot_graph(G, bgcolor='k', node_color='w', node_size=5, edge_color='w', edge_linewidth=0.5)
Number of Parking Meters by Length per Street
The following map shows the number of parking meters by street segments in the city of Los Angeles with a focus on downtown LA area. Interestingly, even though we observe most of the parking meters by street in downtown, the segments with higher density seem to be dispersed across the area. In addition to downtown, other areas such as Hollywood, Beverly Hills, Culver City, and around Santa Monica. The closer they are from downtown LA, the more disperse they seem to be. There are also noticeable segments expanding northwest of the city of Los Angeles towards Calabasas and Burbank. Very few segments are present to the east and south of downtown LA.
non_highway_edges = [(u, v, key) for u, v, key, data in G.edges(keys=True, data=True) if 'highway' not in data or 'highway' in data and data['highway'] != 'motorway']
G = G.edge_subgraph(non_highway_edges)la_edges = ox.graph_to_gdfs(G, edges=True, nodes=False)
G_projected = ox.project_graph(G, to_crs='EPSG:3857')
x_coords = meters['geometry'].x
y_coords = meters['geometry'].y
nearest_edges = ox.distance.nearest_edges(G_projected, X=x_coords, Y=y_coords)
meters_nodes = pd.DataFrame(nearest_edges, columns=['u', 'v', 'key'])
meters_nodes['Count'] = 1
grouped_counts = meters_nodes.groupby(['u', 'v'])['Count'].sum().reset_index()
merged_gdf = la_edges.merge(grouped_counts, on=['u', 'v'], how='left')
merged_gdf = merged_gdf[merged_gdf['Count'] > 0]
columns_to_remove = [
'u', 'v', 'osmid', 'oneway', 'lanes', 'ref', 'maxspeed',
'reversed', 'access', 'bridge', 'junction', 'width', 'tunnel'
]
merged_gdf = merged_gdf.drop(columns=columns_to_remove)
merged_gdf['truecount'] = merged_gdf['Count'] / merged_gdf['length']
length_filter = (merged_gdf['length'] >= 10) & (merged_gdf['length'] <= 100)
merged_gdf = merged_gdf[length_filter]merged_gdf.explore(tiles='cartodbdark_matter', column = 'truecount')Parking Price in LA
Taking a closer look at the street parking prices in the city of Los Angeles, we observe the most expensive parking in downtown LA which can be as expensive as 6 times the price of adjacent areas. This makes total sense since downtown LA gathers the most high-income working-class commuters who demand parking the most, making the area most profitable.
import pandas as pd
import folium
from folium.plugins import MarkerCluster
import xyzservices.providers
data = {
"SpaceID": ["WW516", "CB3034", "BH398"],
"MeteredTimeLimit": ["2HR", "2HR", "1HR"],
"RateRange": ["$1.00", "$1.00 - $6.00", "$1.00"],
"LATITUDE": [34.060385, 34.047109, 34.045795],
"LONGITUDE": [-118.304103, -118.245841, -118.21555],
}
meters = pd.DataFrame(data)
meters['TimeLimit'] = meters['MeteredTimeLimit'].str.extract('(\d+)').astype(float)
def extract_rate(rate_range):
rates = [float(r.replace("$", "")) for r in rate_range.split(" - ")]
return sum(rates) / len(rates)
meters['RateValue'] = meters['RateRange'].apply(extract_rate)
m = folium.Map(
location=[34.05, -118.25],
zoom_start=12,
tiles=xyzservices.providers.CartoDB.Positron
)
for _, row in meters.iterrows():
folium.CircleMarker(
location=[row['LATITUDE'], row['LONGITUDE']],
radius=row['RateValue'] * 5,
color="blue",
fill=True,
fill_opacity=0.6,
popup=f"Metered Time Limit: {row['MeteredTimeLimit']}<br>Rate Range: {row['RateRange']}<br>Rate Value: ${row['RateValue']:.2f}"
).add_to(m)
mimport folium
from folium.plugins import HeatMap
import pandas as pd
data = {
"SpaceID": ["WW516", "CB3034", "BH398", "UC8", "CB2345"],
"LATITUDE": [34.060385, 34.047109, 34.045795, 34.136733, 34.030958],
"LONGITUDE": [-118.304103, -118.245841, -118.215550, -118.363025, -118.255362],
}
meters = pd.DataFrame(data)
coordinates = meters[["LATITUDE", "LONGITUDE"]].values.tolist()
m = folium.Map(location=[34.05, -118.25], zoom_start=12, tiles="CartoDB Positron")
HeatMap(coordinates).add_to(m)
mimport pandas as pd
url = "https://data.lacity.org/resource/2nrs-mtv8.csv"
crime_data = pd.read_csv(url)Vehicle Stolen Crimes Point
In this section, we will be analyzing car-related crimes in the city of Los Angeles, specifically examining vehicle stealing incidents. According to the 2020 crime data, most cars are stolen from central Los Angeles in addition to relatively smaller clusters around San Fernando and Long Beach.
import pandas as pd
import folium
url = "https://data.lacity.org/resource/2nrs-mtv8.csv"
crime_data = pd.read_csv(url)
vehicle_stolen_data = crime_data[crime_data['crm_cd_desc'] == "VEHICLE - STOLEN"]
vehicle_stolen_data = vehicle_stolen_data.dropna(subset=['lat', 'lon'])
vehicle_stolen_data['lat'] = pd.to_numeric(vehicle_stolen_data['lat'])
vehicle_stolen_data['lon'] = pd.to_numeric(vehicle_stolen_data['lon'])
m = folium.Map(location=[34.05, -118.25], zoom_start=12, tiles="CartoDB Positron")
for _, row in vehicle_stolen_data.iterrows():
folium.Marker(
location=[row['lat'], row['lon']],
popup=f"Location: {row['location']}<br>Date: {row['date_occ']}"
).add_to(m)
mVehicle Stolen Heat Map
This trend can be better represented by a heatmap that captures the location where a car was stolen. For the purpose of this study, we will be looking at the city of Los Angeles in particular. Shown by the map, the highest number of vehicles stolen incidents are located right around downtown LA and expands towards its surrounding neighborhoods such as Culver City, Inglewood, and parts of Pasadena. With downtown LA being the origin of the heat, it is reasonable for us to draw a conclusion that downtown LA demands the most attention and is considered the most dangerous when it comes to vehicle-related crimes.
import pandas as pd
import folium
from folium.plugins import HeatMap
url = "https://data.lacity.org/resource/2nrs-mtv8.csv"
crime_data = pd.read_csv(url)
vehicle_stolen_data = crime_data[crime_data['crm_cd_desc'] == "VEHICLE - STOLEN"]
vehicle_stolen_data = vehicle_stolen_data.dropna(subset=['lat', 'lon'])
vehicle_stolen_data['lat'] = pd.to_numeric(vehicle_stolen_data['lat'])
vehicle_stolen_data['lon'] = pd.to_numeric(vehicle_stolen_data['lon'])
heat_data = vehicle_stolen_data[['lat', 'lon']].values.tolist()
m = folium.Map(location=[34.05, -118.25], zoom_start=12, tiles="CartoDB Positron")
HeatMap(heat_data, radius=10).add_to(m)
mavailable = cenpy.explorer.available()acs = cenpy.remote.APIConnection("ACSDT5Y2021")
variables = [
"NAME",
"B19013_001E",
"B03002_001E",
"B03002_003E",
"B03002_012E",
"B08301_001E",
"B08301_010E"
]
la_county_code = "037"
ca_state_code = "06"
la_inc_data = acs.query(
cols=variables,
geo_unit="block group:*",
geo_filter={"state": ca_state_code, "county": la_county_code, "tract": "*"}
)
for variable in variables:
if variable != "NAME":
la_inc_data[variable] = la_inc_data[variable].astype(float)la_final = la_inc_data.copy()
columns_to_drop = [
"STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE", "GEOID", "NAMELSAD",
"MTFCC", "FUNCSTAT", "ALAND", "AWATER", "INTPTLAT", "INTPTLON"
]
if all(col in la_final.columns for col in columns_to_drop):
la_final.drop(columns=columns_to_drop, inplace=True)
else:
missing_cols = [col for col in columns_to_drop if col not in la_final.columns]
print(f"Warning: The following columns are missing and cannot be dropped: {missing_cols}")Warning: The following columns are missing and cannot be dropped: ['STATEFP', 'COUNTYFP', 'TRACTCE', 'BLKGRPCE', 'GEOID', 'NAMELSAD', 'MTFCC', 'FUNCSTAT', 'ALAND', 'AWATER', 'INTPTLAT', 'INTPTLON']
la_final.rename(columns={
"B19013_001E": "Median Income",
"B03002_001E": "Total Population",
"B03002_003E": "White Population",
"B03002_012E": "Hispanic Population",
"B08301_001E": "Total Commuters",
"B08301_010E": "Public Transit Commuters"
}, inplace=True)import geopandas as gpd
import pygris
block_groups = pygris.block_groups(state="CA", county="037", year=2021)
block_groups.rename(
columns={
"STATEFP": "state",
"COUNTYFP": "county",
"TRACTCE": "tract",
"BLKGRPCE": "block group"
},
inplace=True
)
la_final_geo = block_groups.merge(la_final, on=["state", "county", "tract", "block group"], how="left")
la_final_geo.crs = "EPSG:4326"
la_final_geo.explore(
column="Median Income",
tiles="cartodbdark_matter",
legend=True
)Using FIPS code '06' for input 'CA'