# Import Libraries
import os
import re
import folium
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

from scipy import stats
from scipy.stats import ttest_ind
from geopy import distance
from shapely.geometry import Point
from pandas_profiling import ProfileReport
from IPython.display import display, HTML
from IPython.display import clear_output
from sklearn.linear_model import LinearRegression


# Load local copy of the data set 
# libraries_df = pd.read_csv("~/Raw_Data/Library_Locations/Libraries_Locations_Contact_Information_and_Usual_Hours_of_Operation.csv")

# Or download a more recent copy from URL
url = "https://data.cityofchicago.org/api/views/x8fc-8rcq/rows.csv?accessType=DOWNLOAD"
libraries_df = pd.read_csv(url)

libraries_df.head()


libraries_df.tail()


# Inspect data set structure
libraries_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81 entries, 0 to 80
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   NAME                81 non-null     object
 1   HOURS OF OPERATION  81 non-null     object
 2   ADDRESS             81 non-null     object
 3   CITY                81 non-null     object
 4   STATE               81 non-null     object
 5   ZIP                 81 non-null     int64 
 6   PHONE               81 non-null     object
 7   WEBSITE             81 non-null     object
 8   LOCATION            81 non-null     object
dtypes: int64(1), object(8)
memory usage: 5.8+ KB


# Define a function to parse location strings into tuples of floats
def parse_location(location_str):
    # Remove parentheses from the location string and split it into two parts by the comma
    lat_str, lon_str = location_str[1:-1].split(", ")
    # Convert the parts to floats and return them as a tuple
    return float(lat_str), float(lon_str)

# Remove leading and trailing white spaces from all column names in the dataframe
# This is to prevent issues where a column name can't be found due to unexpected white spaces
libraries_df.columns = libraries_df.columns.str.strip()

# Create a new folium Map object
chicago_map = folium.Map(location = [41.8781, -87.6298], zoom_start = 11)

# Iterate over each row in the library dataframe
for idx, row in libraries_df.iterrows():
    # Use the parse_location function to convert the location string into a tuple of floats
    location = parse_location(row['LOCATION'])
    # Add a marker to the map at the given location
    # The tooltip includes the label 'Public Library:' followed by the name of the library (with HTML bold formatting)
    tooltip_text = f'<b>Public Library:</b> {row["NAME"]}'
    folium.Marker(location, tooltip = tooltip_text).add_to(chicago_map)

# Display the map
# Note, this will only work in an interactive environment like a Jupyter notebook
# If you're running this script from a file, you may need to save the map to an HTML file and open it in a web browser
chicago_map


# Define a function to calculate distances from each library to all other libraries
def calculate_distances(data):
    # Initialize an empty list to hold the minimum distances for each library
    distances = []
    
    # Iterate over each row (library) in the data
    for idx1, row1 in data.iterrows():
        # Initialize an empty list to hold the distances from the current library to all other libraries
        library_distances = []
        # Parse the location of the current library
        location1 = parse_location(row1['LOCATION'])
        
        # Again iterate over each row (library) in the data
        for idx2, row2 in data.iterrows():
            # If the second library is not the same as the current library
            if idx1 != idx2:
                # Parse the location of the second library
                location2 = parse_location(row2['LOCATION'])
                # Calculate the distance between the current library and the second library
                dist = distance.distance(location1, location2).miles
                # Add the calculated distance to the list of distances for the current library
                library_distances.append(dist)
        
        # Find the minimum distance for the current library and add it to the list of minimum distances
        distances.append(min(library_distances))
    
    # Return the list of minimum distances
    return distances

# Call the function to calculate the minimum distance for each library
min_distances = calculate_distances(libraries_df)

# Calculate the mean (average) of the minimum distances
mean_min_dist = round(sum(min_distances) / len(min_distances), 2)
print(f"Mean minimum distance: {mean_min_dist} miles")

Mean minimum distance: 1.22 miles


# Create a histogram of the minimum distances
plt.hist(min_distances, bins = 5, edgecolor = 'black')
plt.xlabel('Minimum Distance (miles)')
plt.ylabel('Frequency')
plt.title('Histogram of Minimum Distances between Libraries')
plt.show()


# Calculate the standard deviation of the minimum distances
std_min_dist = round(np.std(min_distances), 2)
print(f"Standard deviation of minimum distances: {std_min_dist} miles")

Standard deviation of minimum distances: 0.34 miles


# Create a new folium Map object
chicago_map2 = folium.Map(location = [41.8781, -87.6298], zoom_start = 11)

# Load GeoJSON file with community areas from a local copy 
# community_areas_gdf = gpd.read_file("~/Raw_Data/Boundaries_Community_Areas/Boundaries_Community_Areas_Current.geojson")

# Load GeoJSON file with community areas from a URL
community_areas_url = "https://data.cityofchicago.org/api/geospatial/cauq-8yn6?method=export&format=GeoJSON"
community_areas_gdf = gpd.read_file(community_areas_url)

# Make sure 'community' is in the columns of community_areas_gdf
assert 'community' in community_areas_gdf.columns, "'community' column not found in community_areas_gdf"

# Add the community areas layer to the map
community_layer = folium.GeoJson(community_areas_gdf, name = "Community Areas")

# Add tooltips to the community layer using the 'community' column
# The tooltip includes the label 'Community Area:' followed by the name of the community area
tooltips = folium.features.GeoJsonTooltip(fields = ['community'], labels = True, sticky = True,
                                          aliases = ['Community Area: '])
community_layer.add_child(tooltips)

# Add the community layer to the map
community_layer.add_to(chicago_map2)

# Iterate over each row in the library dataframe
for idx, row in libraries_df.iterrows():
    # Use the parse_location function to convert the location string into a tuple of floats
    location = parse_location(row['LOCATION'])
    # Add a marker to the map at the given location
    # The tooltip includes the label 'Public Library:' followed by the name of the library (with HTML bold formatting)
    tooltip_text = f'<b>Public Library:</b> {row["NAME"]}'
    folium.Marker(location, tooltip = tooltip_text).add_to(chicago_map2)

# Display the map
chicago_map2


# Regular expression patterns for each day
patterns = {
    "MON": r"Mon\..*?,\s*(.*?);",
    "TUES": r"Tues\..*?,\s*(.*?);",
    "WED": r"Wed\..*?,\s*(.*?);",
    "THURS": r"Thurs\..*?,\s*(.*?);",
    "FRI": r"Fri\..*?,\s*(.*?);",
    "SAT": r"Sat\..*?,\s*(.*?);",
    "SUN": r"Sun\..*?,\s*(.*?);",
}

# Function to parse hours of operation using regex
def parse_hours_regex(hours_str):
    hours_dict = {}
    
    for day, pattern in patterns.items():
        match = re.search(pattern, hours_str)
        hours_dict[day] = match.group(1).strip() if match else ""
    
    return hours_dict

# Apply the function to the "HOURS OF OPERATION" column
hours_df = libraries_df['HOURS OF OPERATION'].apply(parse_hours_regex).apply(pd.Series)

# Merge the hours dataframe with the original dataframe
parsed_data = pd.concat([libraries_df, hours_df], axis = 1)

parsed_data.head()


# Function to calculate total hours from a time range string
def calculate_hours(time_range_str):
    if not time_range_str:
        return 0
    
    # Convert "Noon" to "12"
    time_range_str = time_range_str.replace("Noon", "12")
    
    # Split the string into start time and end time
    start_str, end_str = time_range_str.split("-")
    
    # Convert start and end times to integers
    start = int(start_str.strip())
    end = int(end_str.strip())
    
    # If the end time is less than the start time, add 12 to it (to handle times in the evening)
    if end < start:
        end += 12
    
    # Calculate the total hours
    total_hours = end - start
    
    return total_hours

# Apply the function to the hours for each day and sum up the total hours for each week
parsed_data["TOTAL HOURS"] = parsed_data[["MON", "TUES", "WED", "THURS", "FRI", "SAT", "SUN"]].applymap(calculate_hours).sum(axis=1)

parsed_data.head()


# Convert hours to numeric for plot
hours_numeric_df = hours_df.applymap(calculate_hours)

# Remove Sunday from the hours dataframe as it contains no values (Public libraries are closed Sundays!)
hours_numeric_df = hours_numeric_df.drop(columns="SUN")

# Create histograms for hours of operation for each day
plt.figure(figsize = (14, 10))
for i, day in enumerate(["MON", "TUES", "WED", "THURS", "FRI", "SAT"], start = 1):
    plt.subplot(2, 3, i)
    plt.hist(hours_numeric_df[day], bins = range(0, 13), alpha = 0.7, color = 'skyblue', edgecolor = 'black')
    plt.title(f"{day} Hours of Operation")
    plt.xlabel("Hours")
    plt.ylabel("Frequency")

plt.tight_layout()
plt.show()

# Create histogram for total hours per week
plt.figure(figsize = (10, 6))
plt.hist(parsed_data["TOTAL HOURS"], bins = range(0, 60, 2), alpha = 0.7, color = 'skyblue', edgecolor = 'black')
plt.title("Total Hours of Operation Per Week")
plt.xlabel("Hours")
plt.ylabel("Frequency")
plt.show()


# Filter the dataframe for libraries open less than 48 hours per week
less_than_48 = parsed_data[parsed_data["TOTAL HOURS"] < 48][["NAME", "TOTAL HOURS"]]

# Sort by total hours
less_than_48 = less_than_48.sort_values("TOTAL HOURS")

less_than_48


# Melt the dataframe to long format (long format is better for analysis)
location_long_format = pd.melt(parsed_data, id_vars = ['NAME', 'HOURS OF OPERATION', 'ADDRESS', 'CITY', 'STATE', 'ZIP',
                                            'PHONE', 'WEBSITE', 'LOCATION', 'TOTAL HOURS'], 
                      value_vars = ["MON", "TUES", "WED", "THURS", "FRI", "SAT", "SUN"], 
                      var_name = 'DAY', value_name = 'HOURS')

# Convert hours to numerical format
location_long_format['HOURS'] = location_long_format['HOURS'].apply(calculate_hours)

# Rename specific columns
column_rename_dict = {'TOTAL HOURS': 'TOTAL HOURS WEEK', 'HOURS': 'HOURS OPEN'}
location_long_format = location_long_format.rename(columns=column_rename_dict)

# Specify the saving path for the CSV file
saving_path = '~/Processed_Data/Library_Locations_Processed.csv'

# Save the long format DataFrame to a CSV file
location_long_format.to_csv(saving_path, index = False)

location_long_format.head()


# Inspect data set structure
visitors_2011 = df = pd.read_csv("https://data.cityofchicago.org/api/views/xxwy-zyzu/rows.csv?accessType=DOWNLOAD") # Load data from URL
visitors_2011.head()


visitors_2011.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81 entries, 0 to 80
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   LOCATION   80 non-null     object 
 1   ADDRESS    79 non-null     object 
 2   CITY       79 non-null     object 
 3   ZIP CODE   79 non-null     float64
 4   JANUARY    80 non-null     float64
 5   FEBRUARY   80 non-null     float64
 6   MARCH      79 non-null     float64
 7   APRIL      79 non-null     float64
 8   MAY        79 non-null     float64
 9   JUNE       78 non-null     float64
 10  JULY       78 non-null     float64
 11  AUGUST     77 non-null     float64
 12  SEPTEMBER  78 non-null     float64
 13  OCTOBER    78 non-null     float64
 14  NOVEMBER   78 non-null     float64
 15  DECEMBER   78 non-null     float64
 16  YTD        79 non-null     float64
dtypes: float64(14), object(3)
memory usage: 10.9+ KB


# Get list of all the CSV files within a specific directory 
def get_csv_files_in_path(main_path, subdirectory):
    # Combine the main path and subdirectory to get the full directory path
    path = os.path.join(main_path, subdirectory)

    # Check if the given path exists and is a directory
    if not os.path.exists(path) or not os.path.isdir(path):
        print(f"Invalid path: {path}")
        return []

    # Get a list of all files in the directory
    file_list = os.listdir(path)

    # Filter the list to include only CSV files
    csv_files = [os.path.join(path, file) for file in file_list if file.lower().endswith('.csv')]

    return csv_files

# Example usage
main_path_to_files = "~/Raw_Data"
subdirectory_path = "Visitors_By_Location"
csv_files = get_csv_files_in_path(main_path_to_files, subdirectory_path)

print("CSV files in the specified path:")
for csv_file in csv_files:
    print(csv_file)

CSV files in the specified path:
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2023_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2014_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2018_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2015_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2011_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2019_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2013_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2020_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2017_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2021_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2016_Visitors_by_Location.csv
/Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Raw_Data/Visitors_By_Location/Libraries_2012_Visitors_by_Location.csv


def process_csv_files(csv_files, new_directory):
    processed_dfs = []  # List to store processed dataframes

    # Define a function to map month number to season
    def month_to_season(month):
        if month in [3, 4, 5]:
            return 'Spring'
        elif month in [6, 7, 8]:
            return 'Summer'
        elif month in [9, 10, 11]:
            return 'Fall'
        else:
            return 'Winter'
            
    for file_path in csv_files:      
        # Read the CSV file
        df = pd.read_csv(file_path)

        # Replace missing values with 0
        df = df.fillna(0)

        # Define the data types for each column
        data_types = {
            'JANUARY': int,
            'FEBRUARY': int,
            'MARCH': int,
            'APRIL': int,
            'MAY': int,
            'JUNE': int,
            'JULY': int,
            'AUGUST': int,
            'SEPTEMBER': int,
            'OCTOBER': int,
            'NOVEMBER': int,
            'DECEMBER': int,
            'YTD': int,
        }

        # Extract the year from the file name using regular expression
        match = re.search(r"Libraries_(\d{4})", file_path)
        if match:
            year = int(match.group(1))
            
        # Remove the 'LOCATION' column for data after 2019
        if year > 2019:
            df = df.drop(columns = df.columns[df.columns.str.contains(r'(?i)^.*location.*\d*$')])

        # Check if the 'ZIP' or 'ZIP CODE' column exists and drop them
        if 'ZIP' in df.columns:
            df = df.drop(columns = ['ZIP'])
        if 'ZIP CODE' in df.columns:
            df = df.drop(columns = ['ZIP CODE'])

        # Check if the 'LOCATION' or 'BRANCH' column exists
        if 'LOCATION' in df.columns:
            # Rename the 'LOCATION' column to 'NAME'
            df = df.rename(columns = {'LOCATION': 'NAME'})
        elif 'BRANCH' in df.columns:
            # Rename the 'BRANCH' column to 'NAME'
            df = df.rename(columns = {'BRANCH': 'NAME'})

        # Check if the 'ADDRESS' column exists
        if 'ADDRESS' in df.columns:
            # Remove the 'ADDRESS' column
            df = df.drop(columns = ['ADDRESS'])

        # Check if the 'CITY' column exists
        if 'CITY' in df.columns:
            # Remove the 'CITY' column
            df = df.drop(columns = ['CITY'])

        # Apply the specified data types to the DataFrame
        df = df.astype(data_types)
      
        # Change column names
        df = df.rename(columns = {'JANUARY': 'JAN', 'FEBRUARY': 'FEB', 'MARCH': 'MAR', 'APRIL': 'APR', 'MAY': 'MAY',
                                  'JUNE': 'JUN', 'JULY': 'JUL', 'AUGUST': 'AUG', 'SEPTEMBER': 'SEP', 'OCTOBER': 'OCT',
                                  'NOVEMBER': 'NOV', 'DECEMBER': 'DEC', 'YTD': 'TOTAL_VISITORS_YEAR'})
        # Add the 'Year' column
        df['YEAR'] = year

        # Add a column for PROBLEM_WITH_COUNTER
        df['PROBLEM_WITH_COUNTER'] = df['NAME'].str.contains(r'\*', regex = True)

        # Remove trailing white spaces from the 'NAME' column
        df['NAME'] = df['NAME'].str.strip()

        # Remove asterisks from the 'NAME' column
        df['NAME'] = df['NAME'].str.replace('\*', '', regex = True)

        # Remove rows where 'Name' is empty or equals 'Total'
        df = df[df['NAME'].notna()]
        df = df[~df['NAME'].str.lower().str.contains('total')]

        # Convert from wide to long format
        df = df.melt(id_vars = ['NAME', 'YEAR', 'TOTAL_VISITORS_YEAR', 'PROBLEM_WITH_COUNTER'],
                     var_name = 'MONTH', value_name = 'VISITOR_COUNT')

        # Add a column for month number
        df['MONTH_NUMBER'] = pd.to_datetime(df['MONTH'], format = '%b', errors = 'coerce').dt.month

        # Add the 'SEASON' column
        df['SEASON'] = df['MONTH_NUMBER'].apply(month_to_season)

        # Rearrange the columns if desired
        df = df[['NAME', 'YEAR', 'TOTAL_VISITORS_YEAR', 'PROBLEM_WITH_COUNTER', 'MONTH', 'MONTH_NUMBER', 'SEASON', 'VISITOR_COUNT']]

        # Rearrange the rows by name, year, and month number
        df = df.sort_values(by = ['NAME', 'YEAR', 'MONTH_NUMBER'])

        # Reset the index if desired
        df = df.reset_index(drop = True)

        # Create the new file name
        file_name = os.path.basename(file_path)
        new_file_name = os.path.splitext(file_name)[0] + '_long_format.csv'

        # Construct the new file path
        new_file_path = os.path.join(new_directory, new_file_name)

        # Save the DataFrame to a new CSV file
        df.to_csv(new_file_path, index = False)

        processed_dfs.append(df)  # Store the processed DataFrame

    # Merge all the processed dataframes
    merged_df = pd.concat(processed_dfs, ignore_index = True)

    # Rearrange the rows in the merged dataframe by name, year, and month number
    merged_df = merged_df.sort_values(by = ['NAME', 'YEAR', 'MONTH_NUMBER'])

    # Reset the index of the merged dataframe
    merged_df = merged_df.reset_index(drop = True)

    # Save the merged dataframe to a separate CSV file
    merged_file_path = os.path.join(new_directory, 'Visitors_by_Location_merged_data_long_format.csv')
    merged_df.to_csv(merged_file_path, index = False)
    
    print("Done processing csv files!") 
    print(f"Clean files saved to: {new_directory}.")
    
    return merged_df


# Example usage
new_directory = "~/Processed_Data" # Path where you want the processed files saved
merged_df = process_csv_files(csv_files, new_directory)

Done processing csv files!
Clean files saved to: /Users/Jesse/Desktop/Workspace/Tableau/Chicago_Public_Libraries/Processed_Data.


# Merge the two data sets on 'NAME' column
merged_full_df = pd.merge(merged_df, location_long_format, on = 'NAME')

# Save the full merged dataframe
merged_full_df.to_csv("~/Processed_Data/Public_Library_Location_Visitor_Count_2011_2023.csv", index = False)
merged_full_df.head()


merged_full_df.tail()


def perform_data_profiling(df):
    # Display general information about the dataframe
    print("Data Profile:")
    print("-" * 50)
    print(df.info())
    print("-" * 50)
    
    # Display summary statistics
    print("Summary Statistics:")
    print("-" * 50)
    print(df.describe())
    print("-" * 50)
    
    # Display the first few rows of the dataframe
    print("Sample Data:")
    print("-" * 50)
    print(df.head())
    print("-" * 50)
    
    # Display the number of unique values in each column
    print("Unique Value Counts:")
    print("-" * 50)
    for column in df.columns:
        unique_values = df[column].nunique()
        print(f"{column}: {unique_values} unique values")
    print("-" * 50)
    
    # Display missing value information
    print("Missing Value Counts:")
    print("-" * 50)
    print(df.isnull().sum())
    print("-" * 50)
    
    # Display data types of each column
    print("Data Types:")
    print("-" * 50)
    print(df.dtypes)
    print("-" * 50)
    
    # Display correlations between numeric columns
    numeric_columns = df.select_dtypes(include = 'number').columns
    if len(numeric_columns) > 1:
        print("Correlation Matrix:")
        print("-" * 50)
        print(df[numeric_columns].corr())
        print("-" * 50)

# Perform data profiling on the merged dataframe
perform_data_profiling(merged_full_df)

Data Profile:
--------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
Int64Index: 75180 entries, 0 to 75179
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   NAME                  75180 non-null  object 
 1   YEAR                  75180 non-null  int64  
 2   TOTAL_VISITORS_YEAR   75180 non-null  int64  
 3   PROBLEM_WITH_COUNTER  75180 non-null  object 
 4   MONTH                 75180 non-null  object 
 5   MONTH_NUMBER          75180 non-null  int64  
 6   SEASON                75180 non-null  object 
 7   VISITOR_COUNT         75180 non-null  int64  
 8   HOURS OF OPERATION    75180 non-null  object 
 9   ADDRESS               75180 non-null  object 
 10  CITY                  75180 non-null  object 
 11  STATE                 75180 non-null  object 
 12  ZIP                   75180 non-null  int64  
 13  PHONE                 75180 non-null  object 
 14  WEBSITE               75180 non-null  object 
 15  LOCATION              75180 non-null  object 
 16  TOTAL HOURS WEEK      75180 non-null  int64  
 17  DAY                   75180 non-null  object 
 18  HOURS OPEN            75180 non-null  int64  
 19  Z_SCORE               75180 non-null  float64
dtypes: float64(1), int64(7), object(12)
memory usage: 14.1+ MB
None
--------------------------------------------------
Summary Statistics:
--------------------------------------------------
               YEAR  TOTAL_VISITORS_YEAR  MONTH_NUMBER  VISITOR_COUNT  \
count  75180.000000         7.518000e+04  75180.000000   75180.000000   
mean    2016.737430         8.269647e+04      6.500000    6891.372439   
std        3.621391         7.017199e+04      3.452075    6257.785494   
min     2011.000000         0.000000e+00      1.000000       0.000000   
25%     2014.000000         4.366100e+04      3.750000    3520.000000   
50%     2017.000000         7.505200e+04      6.500000    6403.000000   
75%     2020.000000         1.096040e+05      9.250000    9338.250000   
max     2023.000000         1.405964e+06     12.000000  141696.000000   

                ZIP  TOTAL HOURS WEEK    HOURS OPEN       Z_SCORE  
count  75180.000000      75180.000000  75180.000000  75180.000000  
mean   60633.706145         46.248045      6.606864      0.597374  
std       28.428708          6.669656      3.061526      0.801968  
min    60605.000000          0.000000      0.000000      0.000060  
25%    60617.000000         48.000000      8.000000      0.214833  
50%    60630.000000         48.000000      8.000000      0.467321  
75%    60643.000000         48.000000      8.000000      0.857447  
max    60827.000000         48.000000     11.000000     21.542049  
--------------------------------------------------
Sample Data:
--------------------------------------------------
          NAME  YEAR  TOTAL_VISITORS_YEAR PROBLEM_WITH_COUNTER MONTH  \
0  Albany Park  2011               117425                False   JAN   
1  Albany Park  2011               117425                False   JAN   
2  Albany Park  2011               117425                False   JAN   
3  Albany Park  2011               117425                False   JAN   
4  Albany Park  2011               117425                False   JAN   

   MONTH_NUMBER  SEASON  VISITOR_COUNT  \
0             1  Winter           9604   
1             1  Winter           9604   
2             1  Winter           9604   
3             1  Winter           9604   
4             1  Winter           9604   

                                  HOURS OF OPERATION              ADDRESS  \
0  Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...  3401 W. Foster Ave.   
1  Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...  3401 W. Foster Ave.   
2  Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...  3401 W. Foster Ave.   
3  Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...  3401 W. Foster Ave.   
4  Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...  3401 W. Foster Ave.   

      CITY STATE    ZIP           PHONE  \
0  Chicago    IL  60625  (773) 539-5450   
1  Chicago    IL  60625  (773) 539-5450   
2  Chicago    IL  60625  (773) 539-5450   
3  Chicago    IL  60625  (773) 539-5450   
4  Chicago    IL  60625  (773) 539-5450   

                                  WEBSITE  \
0  https://www.chipublib.org/locations/3/   
1  https://www.chipublib.org/locations/3/   
2  https://www.chipublib.org/locations/3/   
3  https://www.chipublib.org/locations/3/   
4  https://www.chipublib.org/locations/3/   

                                  LOCATION  TOTAL HOURS WEEK    DAY  \
0  (41.97557881655979, -87.71361314512697)                48    MON   
1  (41.97557881655979, -87.71361314512697)                48   TUES   
2  (41.97557881655979, -87.71361314512697)                48    WED   
3  (41.97557881655979, -87.71361314512697)                48  THURS   
4  (41.97557881655979, -87.71361314512697)                48    FRI   

   HOURS OPEN   Z_SCORE  
0           8  0.433483  
1           8  0.433483  
2           8  0.433483  
3           8  0.433483  
4           8  0.433483  
--------------------------------------------------
Unique Value Counts:
--------------------------------------------------
NAME: 80 unique values
YEAR: 12 unique values
TOTAL_VISITORS_YEAR: 885 unique values
PROBLEM_WITH_COUNTER: 2 unique values
MONTH: 12 unique values
MONTH_NUMBER: 12 unique values
SEASON: 4 unique values
VISITOR_COUNT: 6961 unique values
HOURS OF OPERATION: 6 unique values
ADDRESS: 80 unique values
CITY: 1 unique values
STATE: 1 unique values
ZIP: 49 unique values
PHONE: 80 unique values
WEBSITE: 80 unique values
LOCATION: 80 unique values
TOTAL HOURS WEEK: 4 unique values
DAY: 7 unique values
HOURS OPEN: 3 unique values
Z_SCORE: 6961 unique values
--------------------------------------------------
Missing Value Counts:
--------------------------------------------------
NAME                    0
YEAR                    0
TOTAL_VISITORS_YEAR     0
PROBLEM_WITH_COUNTER    0
MONTH                   0
MONTH_NUMBER            0
SEASON                  0
VISITOR_COUNT           0
HOURS OF OPERATION      0
ADDRESS                 0
CITY                    0
STATE                   0
ZIP                     0
PHONE                   0
WEBSITE                 0
LOCATION                0
TOTAL HOURS WEEK        0
DAY                     0
HOURS OPEN              0
Z_SCORE                 0
dtype: int64
--------------------------------------------------
Data Types:
--------------------------------------------------
NAME                     object
YEAR                      int64
TOTAL_VISITORS_YEAR       int64
PROBLEM_WITH_COUNTER     object
MONTH                    object
MONTH_NUMBER              int64
SEASON                   object
VISITOR_COUNT             int64
HOURS OF OPERATION       object
ADDRESS                  object
CITY                     object
STATE                    object
ZIP                       int64
PHONE                    object
WEBSITE                  object
LOCATION                 object
TOTAL HOURS WEEK          int64
DAY                      object
HOURS OPEN                int64
Z_SCORE                 float64
dtype: object
--------------------------------------------------
Correlation Matrix:
--------------------------------------------------
                             YEAR  TOTAL_VISITORS_YEAR  MONTH_NUMBER  \
YEAR                 1.000000e+00        -3.583990e-01  4.462930e-17   
TOTAL_VISITORS_YEAR -3.583990e-01         1.000000e+00 -7.013253e-17   
MONTH_NUMBER         4.462930e-17        -7.013253e-17  1.000000e+00   
VISITOR_COUNT       -3.349105e-01         9.344625e-01 -1.777759e-02   
ZIP                 -2.902156e-02        -9.906362e-02 -7.209972e-15   
TOTAL HOURS WEEK    -3.199835e-02         4.678836e-02  6.400582e-17   
HOURS OPEN          -9.958526e-03         1.456147e-02  3.840921e-17   
Z_SCORE              1.401029e-01         6.466537e-01  3.503484e-02   

                     VISITOR_COUNT           ZIP  TOTAL HOURS WEEK  \
YEAR                     -0.334910 -2.902156e-02     -3.199835e-02   
TOTAL_VISITORS_YEAR       0.934463 -9.906362e-02      4.678836e-02   
MONTH_NUMBER             -0.017778 -7.209972e-15      6.400582e-17   
VISITOR_COUNT             1.000000 -9.257124e-02      4.372197e-02   
ZIP                      -0.092571  1.000000e+00     -2.547387e-01   
TOTAL HOURS WEEK          0.043722 -2.547387e-01      1.000000e+00   
HOURS OPEN                0.013607 -7.927978e-02      3.112200e-01   
Z_SCORE                   0.655241 -3.663064e-02     -8.461968e-02   

                       HOURS OPEN   Z_SCORE  
YEAR                -9.958526e-03  0.140103  
TOTAL_VISITORS_YEAR  1.456147e-02  0.646654  
MONTH_NUMBER         3.840921e-17  0.035035  
VISITOR_COUNT        1.360715e-02  0.655241  
ZIP                 -7.927978e-02 -0.036631  
TOTAL HOURS WEEK     3.112200e-01 -0.084620  
HOURS OPEN           1.000000e+00 -0.026335  
Z_SCORE             -2.633533e-02  1.000000  
--------------------------------------------------


# Perform data profiling using pandas_profiling
profile = ProfileReport(merged_full_df, title = 'Pandas Profiling Report: Merged Dataset')
profile.to_file("visitors_by_location_data_profiling_report.html")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]


# Specify the file path of the HTML report
html_file = "visitors_by_location_data_profiling_report.html"

# Display the HTML report in the notebook
display(HTML(filename = html_file))

# Clear the displayed output
# clear_output()


# Set the style of the plots
sns.set_style("whitegrid")

# Define a color palette for the seasons
season_colors = {'Spring': '#9ACD32', 'Summer': '#FF0000', 'Fall': '#FFA500', 'Winter': '#87CEEB'}

# Question 1: Is there a time of the year for which they experience more visitors (e.g., summer vs winter)?
seasonal_data_avg = merged_full_df.groupby('SEASON')['VISITOR_COUNT'].mean().reset_index()

# Plot the average visitor count by season
plt.figure(figsize = (8, 6))
sns.barplot(x ='SEASON', y = 'VISITOR_COUNT', data = seasonal_data_avg, 
            order = ['Spring', 'Summer', 'Fall', 'Winter'], 
            palette = season_colors)
plt.title('Average Visitor Count by Season')
plt.ylabel('Average Number of Visitors')  
plt.show()


# Define custom colors for each season (using hex codes)
colors = {
    'Spring': '#9ACD32',    # Pastel green
    'Summer': '#FF0000',    # Red
    'Fall':   '#FFA500',    # Orange 
    'Winter': '#87CEEB'     # Light blue
}

# Plot the counts of visitors by year and season with custom colors
fig, ax = plt.subplots(figsize = (12, 8))
count_by_year_season.plot(kind = 'bar', stacked = True, ax = ax, color = [colors[col] for col in count_by_year_season.columns])
ax.set_title('Number of Visitors to Libraries Each Year by Season')
ax.set_xlabel('Year')
ax.set_ylabel('Number of Visitors')

# Remove scientific notation and set the y-axis labels to regular format
ax.yaxis.set_major_formatter('{:.0f}'.format)

legend = ax.legend(title = 'Seasons')
plt.setp(legend.get_title(), fontsize = 'medium')
plt.xticks(rotation = 45)
plt.tight_layout()
plt.show()


# Set the style of the plots
sns.set_style("whitegrid")

# Question 2: Has there been a decline or increase in the use of public libraries?
yearly_data = merged_full_df.groupby('YEAR')['VISITOR_COUNT'].sum().reset_index()

# Include years 2011 and 2023 if missing and set their visitor count to 0
yearly_data = yearly_data.set_index('YEAR').reindex(range(2011, 2024)).fillna(0).reset_index()

# Plot the visitor count by year
plt.figure(figsize = (12, 6))
sns.lineplot(x = 'YEAR', y = 'VISITOR_COUNT', data = yearly_data, marker = 'o', color = 'b')
plt.title('Total Visitor Count by Year')
plt.ylabel('Number of Visitors')

# Format y-axis tick labels to show the count directly without scientific notation
plt.ticklabel_format(style = 'plain', axis = 'y')

# Add a vertical line for the start of the COVID-19 pandemic (assumed in January 2020)
plt.axvline(x = 2020, color = 'red', linestyle = '--', label = 'Start of COVID-19 Pandemic')
plt.legend()  # Show the legend with the label for the vertical line

plt.xticks(rotation = 45)  # Rotate x-axis labels for better readability
plt.tight_layout()         # Adjust layout to avoid cropping labels
plt.show()


# Question 3: Did the COVID-19 pandemic contribute to any more changes?

# Let's consider years before 2020 (pre-COVID) and from 2020 onwards (during COVID)
pre_covid = merged_full_df[merged_full_df['YEAR'] < 2020]['VISITOR_COUNT']
during_covid = merged_full_df[merged_full_df['YEAR'] >= 2020]['VISITOR_COUNT']

# Calculate the average visitor count
average_pre_covid = pre_covid.mean()
average_during_covid = during_covid.mean()

# Print the average visitor count
print(f'Average visitor count before COVID-19: {average_pre_covid:.0f}')
print(f'Average visitor count during COVID-19: {average_during_covid:.0f}')

# Conduct the independent samples t-test
t_stat, p_value = stats.ttest_ind(pre_covid, during_covid, equal_var = False)

# Print the results
print(f'T-test statistic: {t_stat:.2f}')
print(f'P-value: {p_value:.5f}')

Average visitor count before COVID-19: 8283
Average visitor count during COVID-19: 3092
T-test statistic: 111.10
P-value: 0.00000


yearly_visitors = merged_full_df.groupby('YEAR')['TOTAL_VISITORS_YEAR'].sum()

# Fit a linear regression to the pre-2020 data
model = LinearRegression()
model.fit(pre_2020_data.index.values.reshape(-1, 1), pre_2020_data.values)

# Predict visitor count for 2020, 2021, 2022, and 2023
predicted_2020 = model.predict(np.array([[2020]]))[0]
predicted_2021 = model.predict(np.array([[2021]]))[0]
predicted_2022 = model.predict(np.array([[2022]]))[0]
predicted_2023 = model.predict(np.array([[2023]]))[0]

# Plot the observed and predicted visitor count
plt.figure(figsize = (10, 6))
sns.lineplot(x = yearly_visitors.index, y = yearly_visitors.values, label = 'Observed')
plt.plot([2020, 2021, 2022, 2023], [predicted_2020, predicted_2021, predicted_2022, predicted_2023],
         'r--', label = 'Predicted')
plt.title('Total Visitors per Year')
plt.xlabel('Year')
plt.ylabel('Total Visitors')
plt.legend()

# Disable scientific notation for the y-axis tick labels
plt.ticklabel_format(axis = 'y', style = 'plain', useMathText = False)

plt.show()


# Calculate Z-scores
merged_full_df['Z_SCORE'] = np.abs(stats.zscore(merged_full_df['VISITOR_COUNT']))

# Filter out outliers (we define outliers as data points with a Z-score greater than 3)
df_filtered_no_outliers = merged_full_df[merged_full_df['Z_SCORE'] <= 3]

# Drop the 'Z_SCORE' column as we no longer need it
df_filtered_no_outliers = df_filtered_no_outliers.drop('Z_SCORE', axis = 1)

# Segmentation Analysis
# Here we will perform separate analyses for each library
# As an example, let's compare the total visitor count for each library in 2023
df_2023 = df_filtered_no_outliers[df_filtered_no_outliers['YEAR'] == 2023]
visitor_counts_2023 = df_2023.groupby('NAME')['VISITOR_COUNT'].sum().sort_values(ascending = False)

# Display the libraries with the highest and lowest visitor counts in 2023
visitor_counts_2023

NAME
Sulzer Regional Library              709492
Chinatown                            518014
Lincoln Park                         417928
Merlo                                382445
Logan Square                         380919
                                      ...  
Sherman Park                          39837
Chicago Bee                           37443
Brainerd                              37079
Harold Washtington Library Center         0
Galewood-Mont Clare                       0
Name: VISITOR_COUNT, Length: 80, dtype: int64


# Check the distribution of visitor counts before and after 2020
before_2020 = df_filtered_no_outliers[df_filtered_no_outliers['YEAR'] < 2020]['VISITOR_COUNT']
after_2020 = df_filtered_no_outliers[df_filtered_no_outliers['YEAR'] >= 2020]['VISITOR_COUNT']

# Plot histograms
plt.figure(figsize = (12, 6))
plt.hist(before_2020, bins = 30, alpha = 0.5, label = 'Before 2020')
plt.hist(after_2020, bins = 30, alpha = 0.5, label = 'After 2020')
plt.title('Distribution of Visitor Count Before and After 2020')
plt.xlabel('Visitor Count')
plt.ylabel('Frequency')
plt.legend()
plt.show()

# Perform Levene's test for equal variances
from scipy.stats import levene
stat, p = levene(before_2020, after_2020)
print(f'Levene\'s test for equal variances: p = {p}')

Levene's test for equal variances: p = 3.3231360711195104e-300


# Perform Welch's t-test
t_stat, p = ttest_ind(before_2020, after_2020, equal_var = False)

# Print the results of the Welch's t-test
print(f'Welch\'s t-test statistic: {t_stat:.2f}')
print(f'p-value: {p:.5f}')

Welch's t-test statistic: 196.75
p-value: 0.00000


merged_full_df.groupby('NAME')['TOTAL_VISITORS_YEAR'].max()

NAME
Albany Park                 159397
Altgeld                      63052
Archer Heights              124653
Austin                      103092
Austin-Irving               145852
                             ...  
West Pullman                121069
West Town                   146851
Whitney M. Young, Jr.       128395
Woodson Regional Library    214173
Wrightwood-Ashburn           89580
Name: TOTAL_VISITORS_YEAR, Length: 80, dtype: int64


merged_full_df.groupby('NAME')['TOTAL_VISITORS_YEAR'].min()

NAME
Albany Park                   421
Altgeld                      5666
Archer Heights              19245
Austin                      19049
Austin-Irving               16357
                            ...  
West Pullman                15932
West Town                   35757
Whitney M. Young, Jr.           0
Woodson Regional Library    30324
Wrightwood-Ashburn          12274
Name: TOTAL_VISITORS_YEAR, Length: 80, dtype: int64


merged_full_df.groupby(['NAME', 'YEAR'])['VISITOR_COUNT'].agg(['sum', 'mean'])


merged_full_df.groupby(['NAME', 'MONTH'])['VISITOR_COUNT'].agg(['sum', 'mean'])


merged_full_df.groupby(['NAME', 'SEASON'])['VISITOR_COUNT'].agg(['sum', 'mean'])


# Find the library with the highest attendance
highest_attendance_library = merged_full_df.groupby('NAME')['TOTAL_VISITORS_YEAR'].sum().idxmax()
highest_attendance_count = merged_full_df.groupby('NAME')['TOTAL_VISITORS_YEAR'].sum().max()

# Find the library with the lowest attendance
lowest_attendance_library = merged_full_df.groupby('NAME')['TOTAL_VISITORS_YEAR'].sum().idxmin()
lowest_attendance_count = merged_full_df.groupby('NAME')['TOTAL_VISITORS_YEAR'].sum().min()

print("Library with highest attendance:", highest_attendance_library, "with attendance count of", highest_attendance_count)
print("Library with lowest attendance:", lowest_attendance_library, "with attendance count of", lowest_attendance_count)

Library with highest attendance: Harold Washtington Library Center with attendance count of 225055320
Library with lowest attendance: Legler Regional with attendance count of 8639904

	NAME	HOURS OF OPERATION	ADDRESS	CITY	STATE	ZIP	PHONE	WEBSITE	LOCATION
76	Woodson Regional Library	Mon. - Thurs., 9-8; Fri. & Sat., 9-5; Sun., 1-5	9525 S. Halsted St.	Chicago	IL	60628	(312) 747-6900	https://www.chipublib.org/locations/81/	(41.720694885749005, -87.64304817213312)
77	Mayfair	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	4400 W. Lawrence Ave.	Chicago	IL	60630	(312) 744-1254	https://www.chipublib.org/locations/49/	(41.968242773953044, -87.737968778247)
78	Chicago Bee	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	3647 S. State St.	Chicago	IL	60609	(312) 747-6872	https://www.chipublib.org/locations/18/	(41.82824306445502, -87.6263495444489)
79	Uptown	Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...	929 W. Buena Ave.	Chicago	IL	60613	(312) 744-8400	https://www.chipublib.org/locations/70/	(41.95832305807637, -87.65424744448335)
80	Sherman Park	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	5440 S. Racine Ave.	Chicago	IL	60609	(312) 747-0477	https://www.chipublib.org/locations/64/	(41.79476901885989, -87.65502837616037)

	LOCATION	ADDRESS	CITY	ZIP CODE	JANUARY	FEBRUARY	MARCH	APRIL	MAY	JUNE	JULY	AUGUST	SEPTEMBER	OCTOBER	NOVEMBER	DECEMBER	YTD
0	Albany Park	5150 N. Kimball Avenue	CHICAGO	60625.0	9604.0	10500.0	9050.0	9300.0	8271.0	10984.0	9986.0	11078.0	9453.0	10213.0	9377.0	9609.0	117425.0
1	Altgeld	13281 S. Corliss Avenue	CHICAGO	60827.0	5809.0	3899.0	5207.0	5201.0	4494.0	5760.0	3653.0	2414.0	4552.0	6891.0	5698.0	5079.0	58657.0
2	Archer Heights*	5055 S. Archer Avenue	CHICAGO	60632.0	9829.0	9394.0	11342.0	11114.0	9365.0	11247.0	10329.0	11231.0	10373.0	11364.0	10011.0	9054.0	124653.0
3	Austin	5615 W. Race Avenue	CHICAGO	60644.0	6713.0	6250.0	7054.0	9139.0	8857.0	9586.0	8352.0	10359.0	9151.0	10016.0	8461.0	8368.0	102306.0
4	Austin-Irving	6100 W. Irving Park Road	CHICAGO	60634.0	11556.0	9904.0	13214.0	13064.0	10969.0	12587.0	12596.0	13638.0	12542.0	13286.0	11868.0	10628.0	145852.0

Chicago Public Libraries Data Analysis¶

Author: Jesus Cantu Jr.¶

Last Updated: July 26, 2023¶

Spatial Analysis- Part 1¶

Libraries - Locations, Contact Information, and Usual Hours of Operation¶

Spatial Analysis- Part 2¶

Boundaries - Community Areas¶

Hours of Operation¶

Incorporating Visitor Data¶

Libraries - 2011 Visitors by Location¶

Data Cleaning¶

DATA PROFILING¶

ANALYSIS¶

Seasonality in Chicago Public Library Usage¶

Change in Chicago Public Library Usage Over Time¶

Impact of COVID-19 Pandemic¶

Outlier Detection and Treatment¶

Additional Questions¶

Conclusion¶

Future Work¶

	NAME	HOURS OF OPERATION	ADDRESS	CITY	STATE	ZIP	PHONE	WEBSITE	LOCATION
0	Vodak-East Side	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	3710 E. 106th St.	Chicago	IL	60617	(312) 747-5500	https://www.chipublib.org/locations/71/	(41.70283443594318, -87.61428978448026)
1	Albany Park	Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...	3401 W. Foster Ave.	Chicago	IL	60625	(773) 539-5450	https://www.chipublib.org/locations/3/	(41.97557881655979, -87.71361314512697)
2	Avalon	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	8148 S. Stony Island Ave.	Chicago	IL	60617	(312) 747-5234	https://www.chipublib.org/locations/8/	(41.746393038286826, -87.5860053710736)
3	Brainerd	Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...	1350 W. 89th St.	Chicago	IL	60620	(312) 747-6291	https://www.chipublib.org/locations/13/	(41.73244482025524, -87.65772892721816)
4	Popular Library at Water Works	Mon. - Thurs., 10-6; Fri. & Sat., 9-5; Sun., 1-5	163 E. Pearson St.	Chicago	IL	60611	(312) 742-8811	https://www.chipublib.org/locations/73/	(41.897484072390675, -87.62337776811282)

	NAME	TOTAL HOURS
42	Galewood-Mont Clare	0
4	Popular Library at Water Works	32
5	Little Italy	32
13	Portage-Cragin	32
33	Archer Heights	32
47	West Pullman	32
51	Austin	32
26	Sulzer Regional Library	38
49	Legler Regional	38
60	Harold Washtington Library Center	38
76	Woodson Regional Library	38

	NAME	YEAR	TOTAL_VISITORS_YEAR	PROBLEM_WITH_COUNTER	MONTH	MONTH_NUMBER	SEASON	VISITOR_COUNT	HOURS OF OPERATION	ADDRESS	CITY	STATE	ZIP	PHONE	WEBSITE	LOCATION	TOTAL HOURS WEEK	DAY	HOURS OPEN
0	Albany Park	2011	117425	False	JAN	1	Winter	9604	Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...	3401 W. Foster Ave.	Chicago	IL	60625	(773) 539-5450	https://www.chipublib.org/locations/3/	(41.97557881655979, -87.71361314512697)	48	MON	8
1	Albany Park	2011	117425	False	JAN	1	Winter	9604	Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...	3401 W. Foster Ave.	Chicago	IL	60625	(773) 539-5450	https://www.chipublib.org/locations/3/	(41.97557881655979, -87.71361314512697)	48	TUES	8
2	Albany Park	2011	117425	False	JAN	1	Winter	9604	Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...	3401 W. Foster Ave.	Chicago	IL	60625	(773) 539-5450	https://www.chipublib.org/locations/3/	(41.97557881655979, -87.71361314512697)	48	WED	8
3	Albany Park	2011	117425	False	JAN	1	Winter	9604	Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...	3401 W. Foster Ave.	Chicago	IL	60625	(773) 539-5450	https://www.chipublib.org/locations/3/	(41.97557881655979, -87.71361314512697)	48	THURS	8
4	Albany Park	2011	117425	False	JAN	1	Winter	9604	Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...	3401 W. Foster Ave.	Chicago	IL	60625	(773) 539-5450	https://www.chipublib.org/locations/3/	(41.97557881655979, -87.71361314512697)	48	FRI	8

	NAME	YEAR	TOTAL_VISITORS_YEAR	PROBLEM_WITH_COUNTER	MONTH	MONTH_NUMBER	SEASON	HOURS OF OPERATION	ADDRESS	CITY	STATE	ZIP	PHONE	WEBSITE	LOCATION	TOTAL HOURS WEEK	DAY	HOURS OPEN
75175	Wrightwood-Ashburn	2023	26008	False	DEC	12	Winter	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	8530 S. Kedzie Ave.	Chicago	IL	60652	(312) 747-2696	https://www.chipublib.org/locations/82/	(41.73795704970435, -87.70221598421591)	48	WED	8
75176	Wrightwood-Ashburn	2023	26008	False	DEC	12	Winter	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	8530 S. Kedzie Ave.	Chicago	IL	60652	(312) 747-2696	https://www.chipublib.org/locations/82/	(41.73795704970435, -87.70221598421591)	48	THURS	8
75177	Wrightwood-Ashburn	2023	26008	False	DEC	12	Winter	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	8530 S. Kedzie Ave.	Chicago	IL	60652	(312) 747-2696	https://www.chipublib.org/locations/82/	(41.73795704970435, -87.70221598421591)	48	FRI	8
75178	Wrightwood-Ashburn	2023	26008	False	DEC	12	Winter	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	8530 S. Kedzie Ave.	Chicago	IL	60652	(312) 747-2696	https://www.chipublib.org/locations/82/	(41.73795704970435, -87.70221598421591)	48	SAT	8
75179	Wrightwood-Ashburn	2023	26008	False	DEC	12	Winter	Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...	8530 S. Kedzie Ave.	Chicago	IL	60652	(312) 747-2696	https://www.chipublib.org/locations/82/	(41.73795704970435, -87.70221598421591)	48	SUN	0

		sum	mean
NAME	YEAR
Albany Park	2011	821975	9785.416667
	2012	549395	6540.416667
	2013	2947	35.083333
	2014	371812	4426.333333
	2015	1076257	12812.583333
...	...	...	...
Wrightwood-Ashburn	2018	368270	4384.166667
	2019	331471	3946.083333
	2020	149366	1778.166667
	2021	85918	1022.833333
	2023	182056	2167.333333

		sum	mean
NAME	MONTH
Albany Park	APR	637819	7593.083333
	AUG	676844	8057.666667
	DEC	630105	7501.250000
	FEB	642061	7643.583333
	JAN	673918	8022.833333
...	...	...	...
Wrightwood-Ashburn	MAR	406714	4841.833333
	MAY	348362	4147.166667
	NOV	397754	4735.166667
	OCT	439789	5235.583333
	SEP	409878	4879.500000

		sum	mean
NAME	SEASON
Albany Park	Fall	2150246	8532.722222
	Spring	1944299	7715.472222
	Summer	2027312	8044.888889
	Winter	1946084	7722.555556
Altgeld	Fall	875259	3473.250000
...	...	...	...
Woodson Regional Library	Winter	770105	9167.916667
Wrightwood-Ashburn	Fall	1247421	4950.083333
	Spring	1153495	4577.361111
	Summer	1257305	4989.305556
	Winter	1150429	4565.194444